1 |
12 |
jlechner |
/* Perform various loop optimizations, including strength reduction.
|
2 |
|
|
Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995,
|
3 |
|
|
1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
|
4 |
|
|
Free Software Foundation, Inc.
|
5 |
|
|
|
6 |
|
|
This file is part of GCC.
|
7 |
|
|
|
8 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
9 |
|
|
the terms of the GNU General Public License as published by the Free
|
10 |
|
|
Software Foundation; either version 2, or (at your option) any later
|
11 |
|
|
version.
|
12 |
|
|
|
13 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
14 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
15 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
16 |
|
|
for more details.
|
17 |
|
|
|
18 |
|
|
You should have received a copy of the GNU General Public License
|
19 |
|
|
along with GCC; see the file COPYING. If not, write to the Free
|
20 |
|
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
21 |
|
|
02110-1301, USA. */
|
22 |
|
|
|
23 |
|
|
/* This is the loop optimization pass of the compiler.
|
24 |
|
|
It finds invariant computations within loops and moves them
|
25 |
|
|
to the beginning of the loop. Then it identifies basic and
|
26 |
|
|
general induction variables.
|
27 |
|
|
|
28 |
|
|
Basic induction variables (BIVs) are a pseudo registers which are set within
|
29 |
|
|
a loop only by incrementing or decrementing its value. General induction
|
30 |
|
|
variables (GIVs) are pseudo registers with a value which is a linear function
|
31 |
|
|
of a basic induction variable. BIVs are recognized by `basic_induction_var';
|
32 |
|
|
GIVs by `general_induction_var'.
|
33 |
|
|
|
34 |
|
|
Once induction variables are identified, strength reduction is applied to the
|
35 |
|
|
general induction variables, and induction variable elimination is applied to
|
36 |
|
|
the basic induction variables.
|
37 |
|
|
|
38 |
|
|
It also finds cases where
|
39 |
|
|
a register is set within the loop by zero-extending a narrower value
|
40 |
|
|
and changes these to zero the entire register once before the loop
|
41 |
|
|
and merely copy the low part within the loop.
|
42 |
|
|
|
43 |
|
|
Most of the complexity is in heuristics to decide when it is worth
|
44 |
|
|
while to do these things. */
|
45 |
|
|
|
46 |
|
|
#include "config.h"
|
47 |
|
|
#include "system.h"
|
48 |
|
|
#include "coretypes.h"
|
49 |
|
|
#include "tm.h"
|
50 |
|
|
#include "rtl.h"
|
51 |
|
|
#include "tm_p.h"
|
52 |
|
|
#include "function.h"
|
53 |
|
|
#include "expr.h"
|
54 |
|
|
#include "hard-reg-set.h"
|
55 |
|
|
#include "basic-block.h"
|
56 |
|
|
#include "insn-config.h"
|
57 |
|
|
#include "regs.h"
|
58 |
|
|
#include "recog.h"
|
59 |
|
|
#include "flags.h"
|
60 |
|
|
#include "real.h"
|
61 |
|
|
#include "cselib.h"
|
62 |
|
|
#include "except.h"
|
63 |
|
|
#include "toplev.h"
|
64 |
|
|
#include "predict.h"
|
65 |
|
|
#include "insn-flags.h"
|
66 |
|
|
#include "optabs.h"
|
67 |
|
|
#include "cfgloop.h"
|
68 |
|
|
#include "ggc.h"
|
69 |
|
|
#include "timevar.h"
|
70 |
|
|
#include "tree-pass.h"
|
71 |
|
|
|
72 |
|
|
/* Get the loop info pointer of a loop. */
|
73 |
|
|
#define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux)
|
74 |
|
|
|
75 |
|
|
/* Get a pointer to the loop movables structure. */
|
76 |
|
|
#define LOOP_MOVABLES(LOOP) (&LOOP_INFO (LOOP)->movables)
|
77 |
|
|
|
78 |
|
|
/* Get a pointer to the loop registers structure. */
|
79 |
|
|
#define LOOP_REGS(LOOP) (&LOOP_INFO (LOOP)->regs)
|
80 |
|
|
|
81 |
|
|
/* Get a pointer to the loop induction variables structure. */
|
82 |
|
|
#define LOOP_IVS(LOOP) (&LOOP_INFO (LOOP)->ivs)
|
83 |
|
|
|
84 |
|
|
/* Get the luid of an insn. Catch the error of trying to reference the LUID
|
85 |
|
|
of an insn added during loop, since these don't have LUIDs. */
|
86 |
|
|
|
87 |
|
|
#define INSN_LUID(INSN) \
|
88 |
|
|
(gcc_assert (INSN_UID (INSN) < max_uid_for_loop), uid_luid[INSN_UID (INSN)])
|
89 |
|
|
|
90 |
|
|
#define REGNO_FIRST_LUID(REGNO) \
|
91 |
|
|
(REGNO_FIRST_UID (REGNO) < max_uid_for_loop \
|
92 |
|
|
? uid_luid[REGNO_FIRST_UID (REGNO)] \
|
93 |
|
|
: 0)
|
94 |
|
|
#define REGNO_LAST_LUID(REGNO) \
|
95 |
|
|
(REGNO_LAST_UID (REGNO) < max_uid_for_loop \
|
96 |
|
|
? uid_luid[REGNO_LAST_UID (REGNO)] \
|
97 |
|
|
: INT_MAX)
|
98 |
|
|
|
99 |
|
|
/* A "basic induction variable" or biv is a pseudo reg that is set
|
100 |
|
|
(within this loop) only by incrementing or decrementing it. */
|
101 |
|
|
/* A "general induction variable" or giv is a pseudo reg whose
|
102 |
|
|
value is a linear function of a biv. */
|
103 |
|
|
|
104 |
|
|
/* Bivs are recognized by `basic_induction_var';
|
105 |
|
|
Givs by `general_induction_var'. */
|
106 |
|
|
|
107 |
|
|
/* An enum for the two different types of givs, those that are used
|
108 |
|
|
as memory addresses and those that are calculated into registers. */
|
109 |
|
|
enum g_types
|
110 |
|
|
{
|
111 |
|
|
DEST_ADDR,
|
112 |
|
|
DEST_REG
|
113 |
|
|
};
|
114 |
|
|
|
115 |
|
|
|
116 |
|
|
/* A `struct induction' is created for every instruction that sets
|
117 |
|
|
an induction variable (either a biv or a giv). */
|
118 |
|
|
|
119 |
|
|
struct induction
|
120 |
|
|
{
|
121 |
|
|
rtx insn; /* The insn that sets a biv or giv */
|
122 |
|
|
rtx new_reg; /* New register, containing strength reduced
|
123 |
|
|
version of this giv. */
|
124 |
|
|
rtx src_reg; /* Biv from which this giv is computed.
|
125 |
|
|
(If this is a biv, then this is the biv.) */
|
126 |
|
|
enum g_types giv_type; /* Indicate whether DEST_ADDR or DEST_REG */
|
127 |
|
|
rtx dest_reg; /* Destination register for insn: this is the
|
128 |
|
|
register which was the biv or giv.
|
129 |
|
|
For a biv, this equals src_reg.
|
130 |
|
|
For a DEST_ADDR type giv, this is 0. */
|
131 |
|
|
rtx *location; /* Place in the insn where this giv occurs.
|
132 |
|
|
If GIV_TYPE is DEST_REG, this is 0. */
|
133 |
|
|
/* For a biv, this is the place where add_val
|
134 |
|
|
was found. */
|
135 |
|
|
enum machine_mode mode; /* The mode of this biv or giv */
|
136 |
|
|
rtx mem; /* For DEST_ADDR, the memory object. */
|
137 |
|
|
rtx mult_val; /* Multiplicative factor for src_reg. */
|
138 |
|
|
rtx add_val; /* Additive constant for that product. */
|
139 |
|
|
int benefit; /* Gain from eliminating this insn. */
|
140 |
|
|
rtx final_value; /* If the giv is used outside the loop, and its
|
141 |
|
|
final value could be calculated, it is put
|
142 |
|
|
here, and the giv is made replaceable. Set
|
143 |
|
|
the giv to this value before the loop. */
|
144 |
|
|
unsigned combined_with; /* The number of givs this giv has been
|
145 |
|
|
combined with. If nonzero, this giv
|
146 |
|
|
cannot combine with any other giv. */
|
147 |
|
|
unsigned replaceable : 1; /* 1 if we can substitute the strength-reduced
|
148 |
|
|
variable for the original variable.
|
149 |
|
|
|
150 |
|
|
new one must be copied into the old pseudo
|
151 |
|
|
reg each time the old one is set. */
|
152 |
|
|
unsigned not_replaceable : 1; /* Used to prevent duplicating work. This is
|
153 |
|
|
1 if we know that the giv definitely can
|
154 |
|
|
not be made replaceable, in which case we
|
155 |
|
|
don't bother checking the variable again
|
156 |
|
|
even if further info is available.
|
157 |
|
|
Both this and the above can be zero. */
|
158 |
|
|
unsigned ignore : 1; /* 1 prohibits further processing of giv */
|
159 |
|
|
unsigned always_computable : 1;/* 1 if this value is computable every
|
160 |
|
|
iteration. */
|
161 |
|
|
unsigned always_executed : 1; /* 1 if this set occurs each iteration. */
|
162 |
|
|
unsigned maybe_multiple : 1; /* Only used for a biv and 1 if this biv
|
163 |
|
|
update may be done multiple times per
|
164 |
|
|
iteration. */
|
165 |
|
|
unsigned cant_derive : 1; /* For giv's, 1 if this giv cannot derive
|
166 |
|
|
another giv. This occurs in many cases
|
167 |
|
|
where a giv's lifetime spans an update to
|
168 |
|
|
a biv. */
|
169 |
|
|
unsigned maybe_dead : 1; /* 1 if this giv might be dead. In that case,
|
170 |
|
|
we won't use it to eliminate a biv, it
|
171 |
|
|
would probably lose. */
|
172 |
|
|
unsigned auto_inc_opt : 1; /* 1 if this giv had its increment output next
|
173 |
|
|
to it to try to form an auto-inc address. */
|
174 |
|
|
unsigned shared : 1;
|
175 |
|
|
unsigned no_const_addval : 1; /* 1 if add_val does not contain a const. */
|
176 |
|
|
int lifetime; /* Length of life of this giv */
|
177 |
|
|
rtx derive_adjustment; /* If nonzero, is an adjustment to be
|
178 |
|
|
subtracted from add_val when this giv
|
179 |
|
|
derives another. This occurs when the
|
180 |
|
|
giv spans a biv update by incrementation. */
|
181 |
|
|
rtx ext_dependent; /* If nonzero, is a sign or zero extension
|
182 |
|
|
if a biv on which this giv is dependent. */
|
183 |
|
|
struct induction *next_iv; /* For givs, links together all givs that are
|
184 |
|
|
based on the same biv. For bivs, links
|
185 |
|
|
together all biv entries that refer to the
|
186 |
|
|
same biv register. */
|
187 |
|
|
struct induction *same; /* For givs, if the giv has been combined with
|
188 |
|
|
another giv, this points to the base giv.
|
189 |
|
|
The base giv will have COMBINED_WITH nonzero.
|
190 |
|
|
For bivs, if the biv has the same LOCATION
|
191 |
|
|
than another biv, this points to the base
|
192 |
|
|
biv. */
|
193 |
|
|
struct induction *same_insn; /* If there are multiple identical givs in
|
194 |
|
|
the same insn, then all but one have this
|
195 |
|
|
field set, and they all point to the giv
|
196 |
|
|
that doesn't have this field set. */
|
197 |
|
|
rtx last_use; /* For a giv made from a biv increment, this is
|
198 |
|
|
a substitute for the lifetime information. */
|
199 |
|
|
};
|
200 |
|
|
|
201 |
|
|
|
202 |
|
|
/* A `struct iv_class' is created for each biv. */
|
203 |
|
|
|
204 |
|
|
struct iv_class
|
205 |
|
|
{
|
206 |
|
|
unsigned int regno; /* Pseudo reg which is the biv. */
|
207 |
|
|
int biv_count; /* Number of insns setting this reg. */
|
208 |
|
|
struct induction *biv; /* List of all insns that set this reg. */
|
209 |
|
|
int giv_count; /* Number of DEST_REG givs computed from this
|
210 |
|
|
biv. The resulting count is only used in
|
211 |
|
|
check_dbra_loop. */
|
212 |
|
|
struct induction *giv; /* List of all insns that compute a giv
|
213 |
|
|
from this reg. */
|
214 |
|
|
int total_benefit; /* Sum of BENEFITs of all those givs. */
|
215 |
|
|
rtx initial_value; /* Value of reg at loop start. */
|
216 |
|
|
rtx initial_test; /* Test performed on BIV before loop. */
|
217 |
|
|
rtx final_value; /* Value of reg at loop end, if known. */
|
218 |
|
|
struct iv_class *next; /* Links all class structures together. */
|
219 |
|
|
rtx init_insn; /* insn which initializes biv, 0 if none. */
|
220 |
|
|
rtx init_set; /* SET of INIT_INSN, if any. */
|
221 |
|
|
unsigned incremented : 1; /* 1 if somewhere incremented/decremented */
|
222 |
|
|
unsigned eliminable : 1; /* 1 if plausible candidate for
|
223 |
|
|
elimination. */
|
224 |
|
|
unsigned nonneg : 1; /* 1 if we added a REG_NONNEG note for
|
225 |
|
|
this. */
|
226 |
|
|
unsigned reversed : 1; /* 1 if we reversed the loop that this
|
227 |
|
|
biv controls. */
|
228 |
|
|
unsigned all_reduced : 1; /* 1 if all givs using this biv have
|
229 |
|
|
been reduced. */
|
230 |
|
|
};
|
231 |
|
|
|
232 |
|
|
|
233 |
|
|
/* Definitions used by the basic induction variable discovery code. */
|
234 |
|
|
enum iv_mode
|
235 |
|
|
{
|
236 |
|
|
UNKNOWN_INDUCT,
|
237 |
|
|
BASIC_INDUCT,
|
238 |
|
|
NOT_BASIC_INDUCT,
|
239 |
|
|
GENERAL_INDUCT
|
240 |
|
|
};
|
241 |
|
|
|
242 |
|
|
|
243 |
|
|
/* A `struct iv' is created for every register. */
|
244 |
|
|
|
245 |
|
|
struct iv
|
246 |
|
|
{
|
247 |
|
|
enum iv_mode type;
|
248 |
|
|
union
|
249 |
|
|
{
|
250 |
|
|
struct iv_class *class;
|
251 |
|
|
struct induction *info;
|
252 |
|
|
} iv;
|
253 |
|
|
};
|
254 |
|
|
|
255 |
|
|
|
256 |
|
|
#define REG_IV_TYPE(ivs, n) ivs->regs[n].type
|
257 |
|
|
#define REG_IV_INFO(ivs, n) ivs->regs[n].iv.info
|
258 |
|
|
#define REG_IV_CLASS(ivs, n) ivs->regs[n].iv.class
|
259 |
|
|
|
260 |
|
|
|
261 |
|
|
struct loop_ivs
|
262 |
|
|
{
|
263 |
|
|
/* Indexed by register number, contains pointer to `struct
|
264 |
|
|
iv' if register is an induction variable. */
|
265 |
|
|
struct iv *regs;
|
266 |
|
|
|
267 |
|
|
/* Size of regs array. */
|
268 |
|
|
unsigned int n_regs;
|
269 |
|
|
|
270 |
|
|
/* The head of a list which links together (via the next field)
|
271 |
|
|
every iv class for the current loop. */
|
272 |
|
|
struct iv_class *list;
|
273 |
|
|
};
|
274 |
|
|
|
275 |
|
|
|
276 |
|
|
typedef struct loop_mem_info
|
277 |
|
|
{
|
278 |
|
|
rtx mem; /* The MEM itself. */
|
279 |
|
|
rtx reg; /* Corresponding pseudo, if any. */
|
280 |
|
|
int optimize; /* Nonzero if we can optimize access to this MEM. */
|
281 |
|
|
} loop_mem_info;
|
282 |
|
|
|
283 |
|
|
|
284 |
|
|
|
285 |
|
|
struct loop_reg
|
286 |
|
|
{
|
287 |
|
|
/* Number of times the reg is set during the loop being scanned.
|
288 |
|
|
During code motion, a negative value indicates a reg that has
|
289 |
|
|
been made a candidate; in particular -2 means that it is an
|
290 |
|
|
candidate that we know is equal to a constant and -1 means that
|
291 |
|
|
it is a candidate not known equal to a constant. After code
|
292 |
|
|
motion, regs moved have 0 (which is accurate now) while the
|
293 |
|
|
failed candidates have the original number of times set.
|
294 |
|
|
|
295 |
|
|
Therefore, at all times, == 0 indicates an invariant register;
|
296 |
|
|
< 0 a conditionally invariant one. */
|
297 |
|
|
int set_in_loop;
|
298 |
|
|
|
299 |
|
|
/* Original value of set_in_loop; same except that this value
|
300 |
|
|
is not set negative for a reg whose sets have been made candidates
|
301 |
|
|
and not set to 0 for a reg that is moved. */
|
302 |
|
|
int n_times_set;
|
303 |
|
|
|
304 |
|
|
/* Contains the insn in which a register was used if it was used
|
305 |
|
|
exactly once; contains const0_rtx if it was used more than once. */
|
306 |
|
|
rtx single_usage;
|
307 |
|
|
|
308 |
|
|
/* Nonzero indicates that the register cannot be moved or strength
|
309 |
|
|
reduced. */
|
310 |
|
|
char may_not_optimize;
|
311 |
|
|
|
312 |
|
|
/* Nonzero means reg N has already been moved out of one loop.
|
313 |
|
|
This reduces the desire to move it out of another. */
|
314 |
|
|
char moved_once;
|
315 |
|
|
};
|
316 |
|
|
|
317 |
|
|
|
318 |
|
|
struct loop_regs
|
319 |
|
|
{
|
320 |
|
|
int num; /* Number of regs used in table. */
|
321 |
|
|
int size; /* Size of table. */
|
322 |
|
|
struct loop_reg *array; /* Register usage info. array. */
|
323 |
|
|
int multiple_uses; /* Nonzero if a reg has multiple uses. */
|
324 |
|
|
};
|
325 |
|
|
|
326 |
|
|
|
327 |
|
|
|
328 |
|
|
struct loop_movables
|
329 |
|
|
{
|
330 |
|
|
/* Head of movable chain. */
|
331 |
|
|
struct movable *head;
|
332 |
|
|
/* Last movable in chain. */
|
333 |
|
|
struct movable *last;
|
334 |
|
|
};
|
335 |
|
|
|
336 |
|
|
|
337 |
|
|
/* Information pertaining to a loop. */
|
338 |
|
|
|
339 |
|
|
struct loop_info
|
340 |
|
|
{
|
341 |
|
|
/* Nonzero if there is a subroutine call in the current loop. */
|
342 |
|
|
int has_call;
|
343 |
|
|
/* Nonzero if there is a libcall in the current loop. */
|
344 |
|
|
int has_libcall;
|
345 |
|
|
/* Nonzero if there is a non constant call in the current loop. */
|
346 |
|
|
int has_nonconst_call;
|
347 |
|
|
/* Nonzero if there is a prefetch instruction in the current loop. */
|
348 |
|
|
int has_prefetch;
|
349 |
|
|
/* Nonzero if there is a volatile memory reference in the current
|
350 |
|
|
loop. */
|
351 |
|
|
int has_volatile;
|
352 |
|
|
/* Nonzero if there is a tablejump in the current loop. */
|
353 |
|
|
int has_tablejump;
|
354 |
|
|
/* Nonzero if there are ways to leave the loop other than falling
|
355 |
|
|
off the end. */
|
356 |
|
|
int has_multiple_exit_targets;
|
357 |
|
|
/* Nonzero if there is an indirect jump in the current function. */
|
358 |
|
|
int has_indirect_jump;
|
359 |
|
|
/* Register or constant initial loop value. */
|
360 |
|
|
rtx initial_value;
|
361 |
|
|
/* Register or constant value used for comparison test. */
|
362 |
|
|
rtx comparison_value;
|
363 |
|
|
/* Register or constant approximate final value. */
|
364 |
|
|
rtx final_value;
|
365 |
|
|
/* Register or constant initial loop value with term common to
|
366 |
|
|
final_value removed. */
|
367 |
|
|
rtx initial_equiv_value;
|
368 |
|
|
/* Register or constant final loop value with term common to
|
369 |
|
|
initial_value removed. */
|
370 |
|
|
rtx final_equiv_value;
|
371 |
|
|
/* Register corresponding to iteration variable. */
|
372 |
|
|
rtx iteration_var;
|
373 |
|
|
/* Constant loop increment. */
|
374 |
|
|
rtx increment;
|
375 |
|
|
enum rtx_code comparison_code;
|
376 |
|
|
/* Holds the number of loop iterations. It is zero if the number
|
377 |
|
|
could not be calculated. Must be unsigned since the number of
|
378 |
|
|
iterations can be as high as 2^wordsize - 1. For loops with a
|
379 |
|
|
wider iterator, this number will be zero if the number of loop
|
380 |
|
|
iterations is too large for an unsigned integer to hold. */
|
381 |
|
|
unsigned HOST_WIDE_INT n_iterations;
|
382 |
|
|
int used_count_register;
|
383 |
|
|
/* The loop iterator induction variable. */
|
384 |
|
|
struct iv_class *iv;
|
385 |
|
|
/* List of MEMs that are stored in this loop. */
|
386 |
|
|
rtx store_mems;
|
387 |
|
|
/* Array of MEMs that are used (read or written) in this loop, but
|
388 |
|
|
cannot be aliased by anything in this loop, except perhaps
|
389 |
|
|
themselves. In other words, if mems[i] is altered during
|
390 |
|
|
the loop, it is altered by an expression that is rtx_equal_p to
|
391 |
|
|
it. */
|
392 |
|
|
loop_mem_info *mems;
|
393 |
|
|
/* The index of the next available slot in MEMS. */
|
394 |
|
|
int mems_idx;
|
395 |
|
|
/* The number of elements allocated in MEMS. */
|
396 |
|
|
int mems_allocated;
|
397 |
|
|
/* Nonzero if we don't know what MEMs were changed in the current
|
398 |
|
|
loop. This happens if the loop contains a call (in which case
|
399 |
|
|
`has_call' will also be set) or if we store into more than
|
400 |
|
|
NUM_STORES MEMs. */
|
401 |
|
|
int unknown_address_altered;
|
402 |
|
|
/* The above doesn't count any readonly memory locations that are
|
403 |
|
|
stored. This does. */
|
404 |
|
|
int unknown_constant_address_altered;
|
405 |
|
|
/* Count of memory write instructions discovered in the loop. */
|
406 |
|
|
int num_mem_sets;
|
407 |
|
|
/* The insn where the first of these was found. */
|
408 |
|
|
rtx first_loop_store_insn;
|
409 |
|
|
/* The chain of movable insns in loop. */
|
410 |
|
|
struct loop_movables movables;
|
411 |
|
|
/* The registers used the in loop. */
|
412 |
|
|
struct loop_regs regs;
|
413 |
|
|
/* The induction variable information in loop. */
|
414 |
|
|
struct loop_ivs ivs;
|
415 |
|
|
/* Nonzero if call is in pre_header extended basic block. */
|
416 |
|
|
int pre_header_has_call;
|
417 |
|
|
};
|
418 |
|
|
|
419 |
|
|
/* Not really meaningful values, but at least something. */
|
420 |
|
|
#ifndef SIMULTANEOUS_PREFETCHES
|
421 |
|
|
#define SIMULTANEOUS_PREFETCHES 3
|
422 |
|
|
#endif
|
423 |
|
|
#ifndef PREFETCH_BLOCK
|
424 |
|
|
#define PREFETCH_BLOCK 32
|
425 |
|
|
#endif
|
426 |
|
|
#ifndef HAVE_prefetch
|
427 |
|
|
#define HAVE_prefetch 0
|
428 |
|
|
#define CODE_FOR_prefetch 0
|
429 |
|
|
#define gen_prefetch(a,b,c) (gcc_unreachable (), NULL_RTX)
|
430 |
|
|
#endif
|
431 |
|
|
|
432 |
|
|
/* Give up the prefetch optimizations once we exceed a given threshold.
|
433 |
|
|
It is unlikely that we would be able to optimize something in a loop
|
434 |
|
|
with so many detected prefetches. */
|
435 |
|
|
#define MAX_PREFETCHES 100
|
436 |
|
|
/* The number of prefetch blocks that are beneficial to fetch at once before
|
437 |
|
|
a loop with a known (and low) iteration count. */
|
438 |
|
|
#define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
|
439 |
|
|
/* For very tiny loops it is not worthwhile to prefetch even before the loop,
|
440 |
|
|
since it is likely that the data are already in the cache. */
|
441 |
|
|
#define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
|
442 |
|
|
|
443 |
|
|
/* Parameterize some prefetch heuristics so they can be turned on and off
|
444 |
|
|
easily for performance testing on new architectures. These can be
|
445 |
|
|
defined in target-dependent files. */
|
446 |
|
|
|
447 |
|
|
/* Prefetch is worthwhile only when loads/stores are dense. */
|
448 |
|
|
#ifndef PREFETCH_ONLY_DENSE_MEM
|
449 |
|
|
#define PREFETCH_ONLY_DENSE_MEM 1
|
450 |
|
|
#endif
|
451 |
|
|
|
452 |
|
|
/* Define what we mean by "dense" loads and stores; This value divided by 256
|
453 |
|
|
is the minimum percentage of memory references that worth prefetching. */
|
454 |
|
|
#ifndef PREFETCH_DENSE_MEM
|
455 |
|
|
#define PREFETCH_DENSE_MEM 220
|
456 |
|
|
#endif
|
457 |
|
|
|
458 |
|
|
/* Do not prefetch for a loop whose iteration count is known to be low. */
|
459 |
|
|
#ifndef PREFETCH_NO_LOW_LOOPCNT
|
460 |
|
|
#define PREFETCH_NO_LOW_LOOPCNT 1
|
461 |
|
|
#endif
|
462 |
|
|
|
463 |
|
|
/* Define what we mean by a "low" iteration count. */
|
464 |
|
|
#ifndef PREFETCH_LOW_LOOPCNT
|
465 |
|
|
#define PREFETCH_LOW_LOOPCNT 32
|
466 |
|
|
#endif
|
467 |
|
|
|
468 |
|
|
/* Do not prefetch for a loop that contains a function call; such a loop is
|
469 |
|
|
probably not an internal loop. */
|
470 |
|
|
#ifndef PREFETCH_NO_CALL
|
471 |
|
|
#define PREFETCH_NO_CALL 1
|
472 |
|
|
#endif
|
473 |
|
|
|
474 |
|
|
/* Do not prefetch accesses with an extreme stride. */
|
475 |
|
|
#ifndef PREFETCH_NO_EXTREME_STRIDE
|
476 |
|
|
#define PREFETCH_NO_EXTREME_STRIDE 1
|
477 |
|
|
#endif
|
478 |
|
|
|
479 |
|
|
/* Define what we mean by an "extreme" stride. */
|
480 |
|
|
#ifndef PREFETCH_EXTREME_STRIDE
|
481 |
|
|
#define PREFETCH_EXTREME_STRIDE 4096
|
482 |
|
|
#endif
|
483 |
|
|
|
484 |
|
|
/* Define a limit to how far apart indices can be and still be merged
|
485 |
|
|
into a single prefetch. */
|
486 |
|
|
#ifndef PREFETCH_EXTREME_DIFFERENCE
|
487 |
|
|
#define PREFETCH_EXTREME_DIFFERENCE 4096
|
488 |
|
|
#endif
|
489 |
|
|
|
490 |
|
|
/* Issue prefetch instructions before the loop to fetch data to be used
|
491 |
|
|
in the first few loop iterations. */
|
492 |
|
|
#ifndef PREFETCH_BEFORE_LOOP
|
493 |
|
|
#define PREFETCH_BEFORE_LOOP 1
|
494 |
|
|
#endif
|
495 |
|
|
|
496 |
|
|
/* Do not handle reversed order prefetches (negative stride). */
|
497 |
|
|
#ifndef PREFETCH_NO_REVERSE_ORDER
|
498 |
|
|
#define PREFETCH_NO_REVERSE_ORDER 1
|
499 |
|
|
#endif
|
500 |
|
|
|
501 |
|
|
/* Prefetch even if the GIV is in conditional code. */
|
502 |
|
|
#ifndef PREFETCH_CONDITIONAL
|
503 |
|
|
#define PREFETCH_CONDITIONAL 1
|
504 |
|
|
#endif
|
505 |
|
|
|
506 |
|
|
#define LOOP_REG_LIFETIME(LOOP, REGNO) \
|
507 |
|
|
((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
|
508 |
|
|
|
509 |
|
|
#define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
|
510 |
|
|
((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
|
511 |
|
|
|| REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
|
512 |
|
|
|
513 |
|
|
#define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
|
514 |
|
|
((REGNO) < FIRST_PSEUDO_REGISTER \
|
515 |
|
|
? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
|
516 |
|
|
|
517 |
|
|
|
518 |
|
|
/* Vector mapping INSN_UIDs to luids.
|
519 |
|
|
The luids are like uids but increase monotonically always.
|
520 |
|
|
We use them to see whether a jump comes from outside a given loop. */
|
521 |
|
|
|
522 |
|
|
static int *uid_luid;
|
523 |
|
|
|
524 |
|
|
/* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
|
525 |
|
|
number the insn is contained in. */
|
526 |
|
|
|
527 |
|
|
static struct loop **uid_loop;
|
528 |
|
|
|
529 |
|
|
/* 1 + largest uid of any insn. */
|
530 |
|
|
|
531 |
|
|
static int max_uid_for_loop;
|
532 |
|
|
|
533 |
|
|
/* Number of loops detected in current function. Used as index to the
|
534 |
|
|
next few tables. */
|
535 |
|
|
|
536 |
|
|
static int max_loop_num;
|
537 |
|
|
|
538 |
|
|
/* Bound on pseudo register number before loop optimization.
|
539 |
|
|
A pseudo has valid regscan info if its number is < max_reg_before_loop. */
|
540 |
|
|
static unsigned int max_reg_before_loop;
|
541 |
|
|
|
542 |
|
|
/* The value to pass to the next call of reg_scan_update. */
|
543 |
|
|
static int loop_max_reg;
|
544 |
|
|
|
545 |
|
|
/* During the analysis of a loop, a chain of `struct movable's
|
546 |
|
|
is made to record all the movable insns found.
|
547 |
|
|
Then the entire chain can be scanned to decide which to move. */
|
548 |
|
|
|
549 |
|
|
struct movable
|
550 |
|
|
{
|
551 |
|
|
rtx insn; /* A movable insn */
|
552 |
|
|
rtx set_src; /* The expression this reg is set from. */
|
553 |
|
|
rtx set_dest; /* The destination of this SET. */
|
554 |
|
|
rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
|
555 |
|
|
of any registers used within the LIBCALL. */
|
556 |
|
|
int consec; /* Number of consecutive following insns
|
557 |
|
|
that must be moved with this one. */
|
558 |
|
|
unsigned int regno; /* The register it sets */
|
559 |
|
|
short lifetime; /* lifetime of that register;
|
560 |
|
|
may be adjusted when matching movables
|
561 |
|
|
that load the same value are found. */
|
562 |
|
|
short savings; /* Number of insns we can move for this reg,
|
563 |
|
|
including other movables that force this
|
564 |
|
|
or match this one. */
|
565 |
|
|
ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
|
566 |
|
|
a low part that we should avoid changing when
|
567 |
|
|
clearing the rest of the reg. */
|
568 |
|
|
unsigned int cond : 1; /* 1 if only conditionally movable */
|
569 |
|
|
unsigned int force : 1; /* 1 means MUST move this insn */
|
570 |
|
|
unsigned int global : 1; /* 1 means reg is live outside this loop */
|
571 |
|
|
/* If PARTIAL is 1, GLOBAL means something different:
|
572 |
|
|
that the reg is live outside the range from where it is set
|
573 |
|
|
to the following label. */
|
574 |
|
|
unsigned int done : 1; /* 1 inhibits further processing of this */
|
575 |
|
|
|
576 |
|
|
unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
|
577 |
|
|
In particular, moving it does not make it
|
578 |
|
|
invariant. */
|
579 |
|
|
unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
|
580 |
|
|
load SRC, rather than copying INSN. */
|
581 |
|
|
unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
|
582 |
|
|
first insn of a consecutive sets group. */
|
583 |
|
|
unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
|
584 |
|
|
unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
|
585 |
|
|
the original insn with a copy from that
|
586 |
|
|
pseudo, rather than deleting it. */
|
587 |
|
|
struct movable *match; /* First entry for same value */
|
588 |
|
|
struct movable *forces; /* An insn that must be moved if this is */
|
589 |
|
|
struct movable *next;
|
590 |
|
|
};
|
591 |
|
|
|
592 |
|
|
|
593 |
|
|
static FILE *loop_dump_stream;
|
594 |
|
|
|
595 |
|
|
/* Forward declarations. */
|
596 |
|
|
|
597 |
|
|
static void invalidate_loops_containing_label (rtx);
|
598 |
|
|
static void find_and_verify_loops (rtx, struct loops *);
|
599 |
|
|
static void mark_loop_jump (rtx, struct loop *);
|
600 |
|
|
static void prescan_loop (struct loop *);
|
601 |
|
|
static int reg_in_basic_block_p (rtx, rtx);
|
602 |
|
|
static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
|
603 |
|
|
static int labels_in_range_p (rtx, int);
|
604 |
|
|
static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
|
605 |
|
|
static void note_addr_stored (rtx, rtx, void *);
|
606 |
|
|
static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
|
607 |
|
|
static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
|
608 |
|
|
static rtx find_regs_nested (rtx, rtx);
|
609 |
|
|
static void scan_loop (struct loop*, int);
|
610 |
|
|
#if 0
|
611 |
|
|
static void replace_call_address (rtx, rtx, rtx);
|
612 |
|
|
#endif
|
613 |
|
|
static rtx skip_consec_insns (rtx, int);
|
614 |
|
|
static int libcall_benefit (rtx);
|
615 |
|
|
static rtx libcall_other_reg (rtx, rtx);
|
616 |
|
|
static void record_excess_regs (rtx, rtx, rtx *);
|
617 |
|
|
static void ignore_some_movables (struct loop_movables *);
|
618 |
|
|
static void force_movables (struct loop_movables *);
|
619 |
|
|
static void combine_movables (struct loop_movables *, struct loop_regs *);
|
620 |
|
|
static int num_unmoved_movables (const struct loop *);
|
621 |
|
|
static int regs_match_p (rtx, rtx, struct loop_movables *);
|
622 |
|
|
static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
|
623 |
|
|
struct loop_regs *);
|
624 |
|
|
static void add_label_notes (rtx, rtx);
|
625 |
|
|
static void move_movables (struct loop *loop, struct loop_movables *, int,
|
626 |
|
|
int);
|
627 |
|
|
static void loop_movables_add (struct loop_movables *, struct movable *);
|
628 |
|
|
static void loop_movables_free (struct loop_movables *);
|
629 |
|
|
static int count_nonfixed_reads (const struct loop *, rtx);
|
630 |
|
|
static void loop_bivs_find (struct loop *);
|
631 |
|
|
static void loop_bivs_init_find (struct loop *);
|
632 |
|
|
static void loop_bivs_check (struct loop *);
|
633 |
|
|
static void loop_givs_find (struct loop *);
|
634 |
|
|
static void loop_givs_check (struct loop *);
|
635 |
|
|
static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
|
636 |
|
|
static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
|
637 |
|
|
struct induction *, rtx);
|
638 |
|
|
static void loop_givs_dead_check (struct loop *, struct iv_class *);
|
639 |
|
|
static void loop_givs_reduce (struct loop *, struct iv_class *);
|
640 |
|
|
static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
|
641 |
|
|
static void loop_ivs_free (struct loop *);
|
642 |
|
|
static void strength_reduce (struct loop *, int);
|
643 |
|
|
static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
|
644 |
|
|
static int valid_initial_value_p (rtx, rtx, int, rtx);
|
645 |
|
|
static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
|
646 |
|
|
static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
|
647 |
|
|
rtx, rtx *, int, int);
|
648 |
|
|
static void check_final_value (const struct loop *, struct induction *);
|
649 |
|
|
static void loop_ivs_dump (const struct loop *, FILE *, int);
|
650 |
|
|
static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
|
651 |
|
|
static void loop_biv_dump (const struct induction *, FILE *, int);
|
652 |
|
|
static void loop_giv_dump (const struct induction *, FILE *, int);
|
653 |
|
|
static void record_giv (const struct loop *, struct induction *, rtx, rtx,
|
654 |
|
|
rtx, rtx, rtx, rtx, int, enum g_types, int, int,
|
655 |
|
|
rtx *);
|
656 |
|
|
static void update_giv_derive (const struct loop *, rtx);
|
657 |
|
|
static HOST_WIDE_INT get_monotonic_increment (struct iv_class *);
|
658 |
|
|
static bool biased_biv_fits_mode_p (const struct loop *, struct iv_class *,
|
659 |
|
|
HOST_WIDE_INT, enum machine_mode,
|
660 |
|
|
unsigned HOST_WIDE_INT);
|
661 |
|
|
static bool biv_fits_mode_p (const struct loop *, struct iv_class *,
|
662 |
|
|
HOST_WIDE_INT, enum machine_mode, bool);
|
663 |
|
|
static bool extension_within_bounds_p (const struct loop *, struct iv_class *,
|
664 |
|
|
HOST_WIDE_INT, rtx);
|
665 |
|
|
static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
|
666 |
|
|
static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
|
667 |
|
|
rtx, rtx, rtx *, rtx *, rtx **);
|
668 |
|
|
static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
|
669 |
|
|
static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
|
670 |
|
|
rtx *, rtx *, int, int *, enum machine_mode);
|
671 |
|
|
static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
|
672 |
|
|
rtx *, rtx *, rtx *);
|
673 |
|
|
static int check_dbra_loop (struct loop *, int);
|
674 |
|
|
static rtx express_from_1 (rtx, rtx, rtx);
|
675 |
|
|
static rtx combine_givs_p (struct induction *, struct induction *);
|
676 |
|
|
static int cmp_combine_givs_stats (const void *, const void *);
|
677 |
|
|
static void combine_givs (struct loop_regs *, struct iv_class *);
|
678 |
|
|
static int product_cheap_p (rtx, rtx);
|
679 |
|
|
static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
|
680 |
|
|
int, int);
|
681 |
|
|
static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
|
682 |
|
|
struct iv_class *, int, basic_block, rtx);
|
683 |
|
|
static int last_use_this_basic_block (rtx, rtx);
|
684 |
|
|
static void record_initial (rtx, rtx, void *);
|
685 |
|
|
static void update_reg_last_use (rtx, rtx);
|
686 |
|
|
static rtx next_insn_in_loop (const struct loop *, rtx);
|
687 |
|
|
static void loop_regs_scan (const struct loop *, int);
|
688 |
|
|
static int count_insns_in_loop (const struct loop *);
|
689 |
|
|
static int find_mem_in_note_1 (rtx *, void *);
|
690 |
|
|
static rtx find_mem_in_note (rtx);
|
691 |
|
|
static void load_mems (const struct loop *);
|
692 |
|
|
static int insert_loop_mem (rtx *, void *);
|
693 |
|
|
static int replace_loop_mem (rtx *, void *);
|
694 |
|
|
static void replace_loop_mems (rtx, rtx, rtx, int);
|
695 |
|
|
static int replace_loop_reg (rtx *, void *);
|
696 |
|
|
static void replace_loop_regs (rtx insn, rtx, rtx);
|
697 |
|
|
static void note_reg_stored (rtx, rtx, void *);
|
698 |
|
|
static void try_copy_prop (const struct loop *, rtx, unsigned int);
|
699 |
|
|
static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
|
700 |
|
|
static rtx check_insn_for_givs (struct loop *, rtx, int, int);
|
701 |
|
|
static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
|
702 |
|
|
static rtx gen_add_mult (rtx, rtx, rtx, rtx);
|
703 |
|
|
static void loop_regs_update (const struct loop *, rtx);
|
704 |
|
|
static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
|
705 |
|
|
static int loop_invariant_p (const struct loop *, rtx);
|
706 |
|
|
static rtx loop_insn_hoist (const struct loop *, rtx);
|
707 |
|
|
static void loop_iv_add_mult_emit_before (const struct loop *, rtx, rtx, rtx,
|
708 |
|
|
rtx, basic_block, rtx);
|
709 |
|
|
static rtx loop_insn_emit_before (const struct loop *, basic_block,
|
710 |
|
|
rtx, rtx);
|
711 |
|
|
static int loop_insn_first_p (rtx, rtx);
|
712 |
|
|
static rtx get_condition_for_loop (const struct loop *, rtx);
|
713 |
|
|
static void loop_iv_add_mult_sink (const struct loop *, rtx, rtx, rtx, rtx);
|
714 |
|
|
static void loop_iv_add_mult_hoist (const struct loop *, rtx, rtx, rtx, rtx);
|
715 |
|
|
static rtx extend_value_for_giv (struct induction *, rtx);
|
716 |
|
|
static rtx loop_insn_sink (const struct loop *, rtx);
|
717 |
|
|
|
718 |
|
|
static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
|
719 |
|
|
static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
|
720 |
|
|
rtx, rtx);
|
721 |
|
|
static rtx loop_call_insn_hoist (const struct loop *, rtx);
|
722 |
|
|
static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
|
723 |
|
|
|
724 |
|
|
static void loop_dump_aux (const struct loop *, FILE *, int);
|
725 |
|
|
static void loop_delete_insns (rtx, rtx);
|
726 |
|
|
static HOST_WIDE_INT remove_constant_addition (rtx *);
|
727 |
|
|
static rtx gen_load_of_final_value (rtx, rtx);
|
728 |
|
|
void debug_ivs (const struct loop *);
|
729 |
|
|
void debug_iv_class (const struct iv_class *);
|
730 |
|
|
void debug_biv (const struct induction *);
|
731 |
|
|
void debug_giv (const struct induction *);
|
732 |
|
|
void debug_loop (const struct loop *);
|
733 |
|
|
void debug_loops (const struct loops *);
|
734 |
|
|
|
735 |
|
|
typedef struct loop_replace_args
|
736 |
|
|
{
|
737 |
|
|
rtx match;
|
738 |
|
|
rtx replacement;
|
739 |
|
|
rtx insn;
|
740 |
|
|
} loop_replace_args;
|
741 |
|
|
|
742 |
|
|
/* Nonzero iff INSN is between START and END, inclusive. */
|
743 |
|
|
#define INSN_IN_RANGE_P(INSN, START, END) \
|
744 |
|
|
(INSN_UID (INSN) < max_uid_for_loop \
|
745 |
|
|
&& INSN_LUID (INSN) >= INSN_LUID (START) \
|
746 |
|
|
&& INSN_LUID (INSN) <= INSN_LUID (END))
|
747 |
|
|
|
748 |
|
|
/* Indirect_jump_in_function is computed once per function. */
|
749 |
|
|
static int indirect_jump_in_function;
|
750 |
|
|
static int indirect_jump_in_function_p (rtx);
|
751 |
|
|
|
752 |
|
|
static int compute_luids (rtx, rtx, int);
|
753 |
|
|
|
754 |
|
|
static int biv_elimination_giv_has_0_offset (struct induction *,
|
755 |
|
|
struct induction *, rtx);
|
756 |
|
|
|
757 |
|
|
/* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
|
758 |
|
|
copy the value of the strength reduced giv to its original register. */
|
759 |
|
|
static int copy_cost;
|
760 |
|
|
|
761 |
|
|
/* Cost of using a register, to normalize the benefits of a giv. */
|
762 |
|
|
static int reg_address_cost;
|
763 |
|
|
|
764 |
|
|
void
|
765 |
|
|
init_loop (void)
|
766 |
|
|
{
|
767 |
|
|
rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
|
768 |
|
|
|
769 |
|
|
reg_address_cost = address_cost (reg, SImode);
|
770 |
|
|
|
771 |
|
|
copy_cost = COSTS_N_INSNS (1);
|
772 |
|
|
}
|
773 |
|
|
|
774 |
|
|
/* Compute the mapping from uids to luids.
|
775 |
|
|
LUIDs are numbers assigned to insns, like uids,
|
776 |
|
|
except that luids increase monotonically through the code.
|
777 |
|
|
Start at insn START and stop just before END. Assign LUIDs
|
778 |
|
|
starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
|
779 |
|
|
static int
|
780 |
|
|
compute_luids (rtx start, rtx end, int prev_luid)
|
781 |
|
|
{
|
782 |
|
|
int i;
|
783 |
|
|
rtx insn;
|
784 |
|
|
|
785 |
|
|
for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
|
786 |
|
|
{
|
787 |
|
|
if (INSN_UID (insn) >= max_uid_for_loop)
|
788 |
|
|
continue;
|
789 |
|
|
/* Don't assign luids to line-number NOTEs, so that the distance in
|
790 |
|
|
luids between two insns is not affected by -g. */
|
791 |
|
|
if (!NOTE_P (insn)
|
792 |
|
|
|| NOTE_LINE_NUMBER (insn) <= 0)
|
793 |
|
|
uid_luid[INSN_UID (insn)] = ++i;
|
794 |
|
|
else
|
795 |
|
|
/* Give a line number note the same luid as preceding insn. */
|
796 |
|
|
uid_luid[INSN_UID (insn)] = i;
|
797 |
|
|
}
|
798 |
|
|
return i + 1;
|
799 |
|
|
}
|
800 |
|
|
|
801 |
|
|
/* Entry point of this file. Perform loop optimization
|
802 |
|
|
on the current function. F is the first insn of the function
|
803 |
|
|
and DUMPFILE is a stream for output of a trace of actions taken
|
804 |
|
|
(or 0 if none should be output). */
|
805 |
|
|
|
806 |
|
|
void
|
807 |
|
|
loop_optimize (rtx f, FILE *dumpfile, int flags)
|
808 |
|
|
{
|
809 |
|
|
rtx insn;
|
810 |
|
|
int i;
|
811 |
|
|
struct loops loops_data;
|
812 |
|
|
struct loops *loops = &loops_data;
|
813 |
|
|
struct loop_info *loops_info;
|
814 |
|
|
|
815 |
|
|
loop_dump_stream = dumpfile;
|
816 |
|
|
|
817 |
|
|
init_recog_no_volatile ();
|
818 |
|
|
|
819 |
|
|
max_reg_before_loop = max_reg_num ();
|
820 |
|
|
loop_max_reg = max_reg_before_loop;
|
821 |
|
|
|
822 |
|
|
regs_may_share = 0;
|
823 |
|
|
|
824 |
|
|
/* Count the number of loops. */
|
825 |
|
|
|
826 |
|
|
max_loop_num = 0;
|
827 |
|
|
for (insn = f; insn; insn = NEXT_INSN (insn))
|
828 |
|
|
{
|
829 |
|
|
if (NOTE_P (insn)
|
830 |
|
|
&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
|
831 |
|
|
max_loop_num++;
|
832 |
|
|
}
|
833 |
|
|
|
834 |
|
|
/* Don't waste time if no loops. */
|
835 |
|
|
if (max_loop_num == 0)
|
836 |
|
|
return;
|
837 |
|
|
|
838 |
|
|
loops->num = max_loop_num;
|
839 |
|
|
|
840 |
|
|
/* Get size to use for tables indexed by uids.
|
841 |
|
|
Leave some space for labels allocated by find_and_verify_loops. */
|
842 |
|
|
max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
|
843 |
|
|
|
844 |
|
|
uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
|
845 |
|
|
uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
|
846 |
|
|
|
847 |
|
|
/* Allocate storage for array of loops. */
|
848 |
|
|
loops->array = xcalloc (loops->num, sizeof (struct loop));
|
849 |
|
|
|
850 |
|
|
/* Find and process each loop.
|
851 |
|
|
First, find them, and record them in order of their beginnings. */
|
852 |
|
|
find_and_verify_loops (f, loops);
|
853 |
|
|
|
854 |
|
|
/* Allocate and initialize auxiliary loop information. */
|
855 |
|
|
loops_info = xcalloc (loops->num, sizeof (struct loop_info));
|
856 |
|
|
for (i = 0; i < (int) loops->num; i++)
|
857 |
|
|
loops->array[i].aux = loops_info + i;
|
858 |
|
|
|
859 |
|
|
/* Now find all register lifetimes. This must be done after
|
860 |
|
|
find_and_verify_loops, because it might reorder the insns in the
|
861 |
|
|
function. */
|
862 |
|
|
reg_scan (f, max_reg_before_loop);
|
863 |
|
|
|
864 |
|
|
/* This must occur after reg_scan so that registers created by gcse
|
865 |
|
|
will have entries in the register tables.
|
866 |
|
|
|
867 |
|
|
We could have added a call to reg_scan after gcse_main in toplev.c,
|
868 |
|
|
but moving this call to init_alias_analysis is more efficient. */
|
869 |
|
|
init_alias_analysis ();
|
870 |
|
|
|
871 |
|
|
/* See if we went too far. Note that get_max_uid already returns
|
872 |
|
|
one more that the maximum uid of all insn. */
|
873 |
|
|
gcc_assert (get_max_uid () <= max_uid_for_loop);
|
874 |
|
|
/* Now reset it to the actual size we need. See above. */
|
875 |
|
|
max_uid_for_loop = get_max_uid ();
|
876 |
|
|
|
877 |
|
|
/* find_and_verify_loops has already called compute_luids, but it
|
878 |
|
|
might have rearranged code afterwards, so we need to recompute
|
879 |
|
|
the luids now. */
|
880 |
|
|
compute_luids (f, NULL_RTX, 0);
|
881 |
|
|
|
882 |
|
|
/* Don't leave gaps in uid_luid for insns that have been
|
883 |
|
|
deleted. It is possible that the first or last insn
|
884 |
|
|
using some register has been deleted by cross-jumping.
|
885 |
|
|
Make sure that uid_luid for that former insn's uid
|
886 |
|
|
points to the general area where that insn used to be. */
|
887 |
|
|
for (i = 0; i < max_uid_for_loop; i++)
|
888 |
|
|
{
|
889 |
|
|
uid_luid[0] = uid_luid[i];
|
890 |
|
|
if (uid_luid[0] != 0)
|
891 |
|
|
break;
|
892 |
|
|
}
|
893 |
|
|
for (i = 0; i < max_uid_for_loop; i++)
|
894 |
|
|
if (uid_luid[i] == 0)
|
895 |
|
|
uid_luid[i] = uid_luid[i - 1];
|
896 |
|
|
|
897 |
|
|
/* Determine if the function has indirect jump. On some systems
|
898 |
|
|
this prevents low overhead loop instructions from being used. */
|
899 |
|
|
indirect_jump_in_function = indirect_jump_in_function_p (f);
|
900 |
|
|
|
901 |
|
|
/* Now scan the loops, last ones first, since this means inner ones are done
|
902 |
|
|
before outer ones. */
|
903 |
|
|
for (i = max_loop_num - 1; i >= 0; i--)
|
904 |
|
|
{
|
905 |
|
|
struct loop *loop = &loops->array[i];
|
906 |
|
|
|
907 |
|
|
if (! loop->invalid && loop->end)
|
908 |
|
|
{
|
909 |
|
|
scan_loop (loop, flags);
|
910 |
|
|
ggc_collect ();
|
911 |
|
|
}
|
912 |
|
|
}
|
913 |
|
|
|
914 |
|
|
end_alias_analysis ();
|
915 |
|
|
|
916 |
|
|
/* Clean up. */
|
917 |
|
|
for (i = 0; i < (int) loops->num; i++)
|
918 |
|
|
free (loops_info[i].mems);
|
919 |
|
|
|
920 |
|
|
free (uid_luid);
|
921 |
|
|
free (uid_loop);
|
922 |
|
|
free (loops_info);
|
923 |
|
|
free (loops->array);
|
924 |
|
|
}
|
925 |
|
|
|
926 |
|
|
/* Returns the next insn, in execution order, after INSN. START and
|
927 |
|
|
END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
|
928 |
|
|
respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
|
929 |
|
|
insn-stream; it is used with loops that are entered near the
|
930 |
|
|
bottom. */
|
931 |
|
|
|
932 |
|
|
static rtx
|
933 |
|
|
next_insn_in_loop (const struct loop *loop, rtx insn)
|
934 |
|
|
{
|
935 |
|
|
insn = NEXT_INSN (insn);
|
936 |
|
|
|
937 |
|
|
if (insn == loop->end)
|
938 |
|
|
{
|
939 |
|
|
if (loop->top)
|
940 |
|
|
/* Go to the top of the loop, and continue there. */
|
941 |
|
|
insn = loop->top;
|
942 |
|
|
else
|
943 |
|
|
/* We're done. */
|
944 |
|
|
insn = NULL_RTX;
|
945 |
|
|
}
|
946 |
|
|
|
947 |
|
|
if (insn == loop->scan_start)
|
948 |
|
|
/* We're done. */
|
949 |
|
|
insn = NULL_RTX;
|
950 |
|
|
|
951 |
|
|
return insn;
|
952 |
|
|
}
|
953 |
|
|
|
954 |
|
|
/* Find any register references hidden inside X and add them to
|
955 |
|
|
the dependency list DEPS. This is used to look inside CLOBBER (MEM
|
956 |
|
|
when checking whether a PARALLEL can be pulled out of a loop. */
|
957 |
|
|
|
958 |
|
|
static rtx
|
959 |
|
|
find_regs_nested (rtx deps, rtx x)
|
960 |
|
|
{
|
961 |
|
|
enum rtx_code code = GET_CODE (x);
|
962 |
|
|
if (code == REG)
|
963 |
|
|
deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
|
964 |
|
|
else
|
965 |
|
|
{
|
966 |
|
|
const char *fmt = GET_RTX_FORMAT (code);
|
967 |
|
|
int i, j;
|
968 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
969 |
|
|
{
|
970 |
|
|
if (fmt[i] == 'e')
|
971 |
|
|
deps = find_regs_nested (deps, XEXP (x, i));
|
972 |
|
|
else if (fmt[i] == 'E')
|
973 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
974 |
|
|
deps = find_regs_nested (deps, XVECEXP (x, i, j));
|
975 |
|
|
}
|
976 |
|
|
}
|
977 |
|
|
return deps;
|
978 |
|
|
}
|
979 |
|
|
|
980 |
|
|
/* Optimize one loop described by LOOP. */
|
981 |
|
|
|
982 |
|
|
/* ??? Could also move memory writes out of loops if the destination address
|
983 |
|
|
is invariant, the source is invariant, the memory write is not volatile,
|
984 |
|
|
and if we can prove that no read inside the loop can read this address
|
985 |
|
|
before the write occurs. If there is a read of this address after the
|
986 |
|
|
write, then we can also mark the memory read as invariant. */
|
987 |
|
|
|
988 |
|
|
static void
|
989 |
|
|
scan_loop (struct loop *loop, int flags)
|
990 |
|
|
{
|
991 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
992 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
993 |
|
|
int i;
|
994 |
|
|
rtx loop_start = loop->start;
|
995 |
|
|
rtx loop_end = loop->end;
|
996 |
|
|
rtx p;
|
997 |
|
|
/* 1 if we are scanning insns that could be executed zero times. */
|
998 |
|
|
int maybe_never = 0;
|
999 |
|
|
/* 1 if we are scanning insns that might never be executed
|
1000 |
|
|
due to a subroutine call which might exit before they are reached. */
|
1001 |
|
|
int call_passed = 0;
|
1002 |
|
|
/* Number of insns in the loop. */
|
1003 |
|
|
int insn_count;
|
1004 |
|
|
int tem;
|
1005 |
|
|
rtx temp, update_start, update_end;
|
1006 |
|
|
/* The SET from an insn, if it is the only SET in the insn. */
|
1007 |
|
|
rtx set, set1;
|
1008 |
|
|
/* Chain describing insns movable in current loop. */
|
1009 |
|
|
struct loop_movables *movables = LOOP_MOVABLES (loop);
|
1010 |
|
|
/* Ratio of extra register life span we can justify
|
1011 |
|
|
for saving an instruction. More if loop doesn't call subroutines
|
1012 |
|
|
since in that case saving an insn makes more difference
|
1013 |
|
|
and more registers are available. */
|
1014 |
|
|
int threshold;
|
1015 |
|
|
int in_libcall;
|
1016 |
|
|
|
1017 |
|
|
loop->top = 0;
|
1018 |
|
|
|
1019 |
|
|
movables->head = 0;
|
1020 |
|
|
movables->last = 0;
|
1021 |
|
|
|
1022 |
|
|
/* Determine whether this loop starts with a jump down to a test at
|
1023 |
|
|
the end. This will occur for a small number of loops with a test
|
1024 |
|
|
that is too complex to duplicate in front of the loop.
|
1025 |
|
|
|
1026 |
|
|
We search for the first insn or label in the loop, skipping NOTEs.
|
1027 |
|
|
However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
|
1028 |
|
|
(because we might have a loop executed only once that contains a
|
1029 |
|
|
loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
|
1030 |
|
|
(in case we have a degenerate loop).
|
1031 |
|
|
|
1032 |
|
|
Note that if we mistakenly think that a loop is entered at the top
|
1033 |
|
|
when, in fact, it is entered at the exit test, the only effect will be
|
1034 |
|
|
slightly poorer optimization. Making the opposite error can generate
|
1035 |
|
|
incorrect code. Since very few loops now start with a jump to the
|
1036 |
|
|
exit test, the code here to detect that case is very conservative. */
|
1037 |
|
|
|
1038 |
|
|
for (p = NEXT_INSN (loop_start);
|
1039 |
|
|
p != loop_end
|
1040 |
|
|
&& !LABEL_P (p) && ! INSN_P (p)
|
1041 |
|
|
&& (!NOTE_P (p)
|
1042 |
|
|
|| (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
|
1043 |
|
|
&& NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
|
1044 |
|
|
p = NEXT_INSN (p))
|
1045 |
|
|
;
|
1046 |
|
|
|
1047 |
|
|
loop->scan_start = p;
|
1048 |
|
|
|
1049 |
|
|
/* If loop end is the end of the current function, then emit a
|
1050 |
|
|
NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
|
1051 |
|
|
note insn. This is the position we use when sinking insns out of
|
1052 |
|
|
the loop. */
|
1053 |
|
|
if (NEXT_INSN (loop->end) != 0)
|
1054 |
|
|
loop->sink = NEXT_INSN (loop->end);
|
1055 |
|
|
else
|
1056 |
|
|
loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
|
1057 |
|
|
|
1058 |
|
|
/* Set up variables describing this loop. */
|
1059 |
|
|
prescan_loop (loop);
|
1060 |
|
|
threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
|
1061 |
|
|
|
1062 |
|
|
/* If loop has a jump before the first label,
|
1063 |
|
|
the true entry is the target of that jump.
|
1064 |
|
|
Start scan from there.
|
1065 |
|
|
But record in LOOP->TOP the place where the end-test jumps
|
1066 |
|
|
back to so we can scan that after the end of the loop. */
|
1067 |
|
|
if (JUMP_P (p)
|
1068 |
|
|
/* Loop entry must be unconditional jump (and not a RETURN) */
|
1069 |
|
|
&& any_uncondjump_p (p)
|
1070 |
|
|
&& JUMP_LABEL (p) != 0
|
1071 |
|
|
/* Check to see whether the jump actually
|
1072 |
|
|
jumps out of the loop (meaning it's no loop).
|
1073 |
|
|
This case can happen for things like
|
1074 |
|
|
do {..} while (0). If this label was generated previously
|
1075 |
|
|
by loop, we can't tell anything about it and have to reject
|
1076 |
|
|
the loop. */
|
1077 |
|
|
&& INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
|
1078 |
|
|
{
|
1079 |
|
|
loop->top = next_label (loop->scan_start);
|
1080 |
|
|
loop->scan_start = JUMP_LABEL (p);
|
1081 |
|
|
}
|
1082 |
|
|
|
1083 |
|
|
/* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
|
1084 |
|
|
as required by loop_reg_used_before_p. So skip such loops. (This
|
1085 |
|
|
test may never be true, but it's best to play it safe.)
|
1086 |
|
|
|
1087 |
|
|
Also, skip loops where we do not start scanning at a label. This
|
1088 |
|
|
test also rejects loops starting with a JUMP_INSN that failed the
|
1089 |
|
|
test above. */
|
1090 |
|
|
|
1091 |
|
|
if (INSN_UID (loop->scan_start) >= max_uid_for_loop
|
1092 |
|
|
|| !LABEL_P (loop->scan_start))
|
1093 |
|
|
{
|
1094 |
|
|
if (loop_dump_stream)
|
1095 |
|
|
fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
|
1096 |
|
|
INSN_UID (loop_start), INSN_UID (loop_end));
|
1097 |
|
|
return;
|
1098 |
|
|
}
|
1099 |
|
|
|
1100 |
|
|
/* Allocate extra space for REGs that might be created by load_mems.
|
1101 |
|
|
We allocate a little extra slop as well, in the hopes that we
|
1102 |
|
|
won't have to reallocate the regs array. */
|
1103 |
|
|
loop_regs_scan (loop, loop_info->mems_idx + 16);
|
1104 |
|
|
insn_count = count_insns_in_loop (loop);
|
1105 |
|
|
|
1106 |
|
|
if (loop_dump_stream)
|
1107 |
|
|
fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
|
1108 |
|
|
INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
|
1109 |
|
|
|
1110 |
|
|
/* Scan through the loop finding insns that are safe to move.
|
1111 |
|
|
Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
|
1112 |
|
|
this reg will be considered invariant for subsequent insns.
|
1113 |
|
|
We consider whether subsequent insns use the reg
|
1114 |
|
|
in deciding whether it is worth actually moving.
|
1115 |
|
|
|
1116 |
|
|
MAYBE_NEVER is nonzero if we have passed a conditional jump insn
|
1117 |
|
|
and therefore it is possible that the insns we are scanning
|
1118 |
|
|
would never be executed. At such times, we must make sure
|
1119 |
|
|
that it is safe to execute the insn once instead of zero times.
|
1120 |
|
|
When MAYBE_NEVER is 0, all insns will be executed at least once
|
1121 |
|
|
so that is not a problem. */
|
1122 |
|
|
|
1123 |
|
|
for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
|
1124 |
|
|
p != NULL_RTX;
|
1125 |
|
|
p = next_insn_in_loop (loop, p))
|
1126 |
|
|
{
|
1127 |
|
|
if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
|
1128 |
|
|
in_libcall--;
|
1129 |
|
|
if (NONJUMP_INSN_P (p))
|
1130 |
|
|
{
|
1131 |
|
|
/* Do not scan past an optimization barrier. */
|
1132 |
|
|
if (GET_CODE (PATTERN (p)) == ASM_INPUT)
|
1133 |
|
|
break;
|
1134 |
|
|
temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
|
1135 |
|
|
if (temp)
|
1136 |
|
|
in_libcall++;
|
1137 |
|
|
if (! in_libcall
|
1138 |
|
|
&& (set = single_set (p))
|
1139 |
|
|
&& REG_P (SET_DEST (set))
|
1140 |
|
|
&& SET_DEST (set) != frame_pointer_rtx
|
1141 |
|
|
#ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
|
1142 |
|
|
&& SET_DEST (set) != pic_offset_table_rtx
|
1143 |
|
|
#endif
|
1144 |
|
|
&& ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
|
1145 |
|
|
{
|
1146 |
|
|
int tem1 = 0;
|
1147 |
|
|
int tem2 = 0;
|
1148 |
|
|
int move_insn = 0;
|
1149 |
|
|
int insert_temp = 0;
|
1150 |
|
|
rtx src = SET_SRC (set);
|
1151 |
|
|
rtx dependencies = 0;
|
1152 |
|
|
|
1153 |
|
|
/* Figure out what to use as a source of this insn. If a
|
1154 |
|
|
REG_EQUIV note is given or if a REG_EQUAL note with a
|
1155 |
|
|
constant operand is specified, use it as the source and
|
1156 |
|
|
mark that we should move this insn by calling
|
1157 |
|
|
emit_move_insn rather that duplicating the insn.
|
1158 |
|
|
|
1159 |
|
|
Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
|
1160 |
|
|
note is present. */
|
1161 |
|
|
temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
|
1162 |
|
|
if (temp)
|
1163 |
|
|
src = XEXP (temp, 0), move_insn = 1;
|
1164 |
|
|
else
|
1165 |
|
|
{
|
1166 |
|
|
temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
|
1167 |
|
|
if (temp && CONSTANT_P (XEXP (temp, 0)))
|
1168 |
|
|
src = XEXP (temp, 0), move_insn = 1;
|
1169 |
|
|
if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
|
1170 |
|
|
{
|
1171 |
|
|
src = XEXP (temp, 0);
|
1172 |
|
|
/* A libcall block can use regs that don't appear in
|
1173 |
|
|
the equivalent expression. To move the libcall,
|
1174 |
|
|
we must move those regs too. */
|
1175 |
|
|
dependencies = libcall_other_reg (p, src);
|
1176 |
|
|
}
|
1177 |
|
|
}
|
1178 |
|
|
|
1179 |
|
|
/* For parallels, add any possible uses to the dependencies, as
|
1180 |
|
|
we can't move the insn without resolving them first.
|
1181 |
|
|
MEMs inside CLOBBERs may also reference registers; these
|
1182 |
|
|
count as implicit uses. */
|
1183 |
|
|
if (GET_CODE (PATTERN (p)) == PARALLEL)
|
1184 |
|
|
{
|
1185 |
|
|
for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
|
1186 |
|
|
{
|
1187 |
|
|
rtx x = XVECEXP (PATTERN (p), 0, i);
|
1188 |
|
|
if (GET_CODE (x) == USE)
|
1189 |
|
|
dependencies
|
1190 |
|
|
= gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
|
1191 |
|
|
dependencies);
|
1192 |
|
|
else if (GET_CODE (x) == CLOBBER
|
1193 |
|
|
&& MEM_P (XEXP (x, 0)))
|
1194 |
|
|
dependencies = find_regs_nested (dependencies,
|
1195 |
|
|
XEXP (XEXP (x, 0), 0));
|
1196 |
|
|
}
|
1197 |
|
|
}
|
1198 |
|
|
|
1199 |
|
|
if (/* The register is used in basic blocks other
|
1200 |
|
|
than the one where it is set (meaning that
|
1201 |
|
|
something after this point in the loop might
|
1202 |
|
|
depend on its value before the set). */
|
1203 |
|
|
! reg_in_basic_block_p (p, SET_DEST (set))
|
1204 |
|
|
/* And the set is not guaranteed to be executed once
|
1205 |
|
|
the loop starts, or the value before the set is
|
1206 |
|
|
needed before the set occurs...
|
1207 |
|
|
|
1208 |
|
|
??? Note we have quadratic behavior here, mitigated
|
1209 |
|
|
by the fact that the previous test will often fail for
|
1210 |
|
|
large loops. Rather than re-scanning the entire loop
|
1211 |
|
|
each time for register usage, we should build tables
|
1212 |
|
|
of the register usage and use them here instead. */
|
1213 |
|
|
&& (maybe_never
|
1214 |
|
|
|| loop_reg_used_before_p (loop, set, p)))
|
1215 |
|
|
/* It is unsafe to move the set. However, it may be OK to
|
1216 |
|
|
move the source into a new pseudo, and substitute a
|
1217 |
|
|
reg-to-reg copy for the original insn.
|
1218 |
|
|
|
1219 |
|
|
This code used to consider it OK to move a set of a variable
|
1220 |
|
|
which was not created by the user and not used in an exit
|
1221 |
|
|
test.
|
1222 |
|
|
That behavior is incorrect and was removed. */
|
1223 |
|
|
insert_temp = 1;
|
1224 |
|
|
|
1225 |
|
|
/* Don't try to optimize a MODE_CC set with a constant
|
1226 |
|
|
source. It probably will be combined with a conditional
|
1227 |
|
|
jump. */
|
1228 |
|
|
if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
|
1229 |
|
|
&& CONSTANT_P (src))
|
1230 |
|
|
;
|
1231 |
|
|
/* Don't try to optimize a register that was made
|
1232 |
|
|
by loop-optimization for an inner loop.
|
1233 |
|
|
We don't know its life-span, so we can't compute
|
1234 |
|
|
the benefit. */
|
1235 |
|
|
else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
|
1236 |
|
|
;
|
1237 |
|
|
/* Don't move the source and add a reg-to-reg copy:
|
1238 |
|
|
- with -Os (this certainly increases size),
|
1239 |
|
|
- if the mode doesn't support copy operations (obviously),
|
1240 |
|
|
- if the source is already a reg (the motion will gain nothing),
|
1241 |
|
|
- if the source is a legitimate constant (likewise),
|
1242 |
|
|
- if the dest is a hard register (may be unrecognizable). */
|
1243 |
|
|
else if (insert_temp
|
1244 |
|
|
&& (optimize_size
|
1245 |
|
|
|| ! can_copy_p (GET_MODE (SET_SRC (set)))
|
1246 |
|
|
|| REG_P (SET_SRC (set))
|
1247 |
|
|
|| (CONSTANT_P (SET_SRC (set))
|
1248 |
|
|
&& LEGITIMATE_CONSTANT_P (SET_SRC (set)))
|
1249 |
|
|
|| REGNO (SET_DEST (set)) < FIRST_PSEUDO_REGISTER))
|
1250 |
|
|
;
|
1251 |
|
|
else if ((tem = loop_invariant_p (loop, src))
|
1252 |
|
|
&& (dependencies == 0
|
1253 |
|
|
|| (tem2
|
1254 |
|
|
= loop_invariant_p (loop, dependencies)) != 0)
|
1255 |
|
|
&& (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
|
1256 |
|
|
|| (tem1
|
1257 |
|
|
= consec_sets_invariant_p
|
1258 |
|
|
(loop, SET_DEST (set),
|
1259 |
|
|
regs->array[REGNO (SET_DEST (set))].set_in_loop,
|
1260 |
|
|
p)))
|
1261 |
|
|
/* If the insn can cause a trap (such as divide by zero),
|
1262 |
|
|
can't move it unless it's guaranteed to be executed
|
1263 |
|
|
once loop is entered. Even a function call might
|
1264 |
|
|
prevent the trap insn from being reached
|
1265 |
|
|
(since it might exit!) */
|
1266 |
|
|
&& ! ((maybe_never || call_passed)
|
1267 |
|
|
&& may_trap_p (src)))
|
1268 |
|
|
{
|
1269 |
|
|
struct movable *m;
|
1270 |
|
|
int regno = REGNO (SET_DEST (set));
|
1271 |
|
|
rtx user, user_set;
|
1272 |
|
|
|
1273 |
|
|
/* A potential lossage is where we have a case where two
|
1274 |
|
|
insns can be combined as long as they are both in the
|
1275 |
|
|
loop, but we move one of them outside the loop. For
|
1276 |
|
|
large loops, this can lose. The most common case of
|
1277 |
|
|
this is the address of a function being called.
|
1278 |
|
|
|
1279 |
|
|
Therefore, if this register is marked as being used
|
1280 |
|
|
exactly once if we are in a loop with calls
|
1281 |
|
|
(a "large loop"), see if we can replace the usage of
|
1282 |
|
|
this register with the source of this SET. If we can,
|
1283 |
|
|
delete this insn.
|
1284 |
|
|
|
1285 |
|
|
Don't do this if:
|
1286 |
|
|
(1) P has a REG_RETVAL note or
|
1287 |
|
|
(2) if we have SMALL_REGISTER_CLASSES and
|
1288 |
|
|
(a) SET_SRC is a hard register or
|
1289 |
|
|
(b) the destination of the user is a hard register. */
|
1290 |
|
|
|
1291 |
|
|
if (loop_info->has_call
|
1292 |
|
|
&& regno >= FIRST_PSEUDO_REGISTER
|
1293 |
|
|
&& (user = regs->array[regno].single_usage) != NULL
|
1294 |
|
|
&& user != const0_rtx
|
1295 |
|
|
&& REGNO_FIRST_UID (regno) == INSN_UID (p)
|
1296 |
|
|
&& REGNO_LAST_UID (regno) == INSN_UID (user)
|
1297 |
|
|
&& regs->array[regno].set_in_loop == 1
|
1298 |
|
|
&& GET_CODE (SET_SRC (set)) != ASM_OPERANDS
|
1299 |
|
|
&& ! side_effects_p (SET_SRC (set))
|
1300 |
|
|
&& ! find_reg_note (p, REG_RETVAL, NULL_RTX)
|
1301 |
|
|
&& (!SMALL_REGISTER_CLASSES
|
1302 |
|
|
|| !REG_P (SET_SRC (set))
|
1303 |
|
|
|| !HARD_REGISTER_P (SET_SRC (set)))
|
1304 |
|
|
&& (!SMALL_REGISTER_CLASSES
|
1305 |
|
|
|| !NONJUMP_INSN_P (user)
|
1306 |
|
|
|| !(user_set = single_set (user))
|
1307 |
|
|
|| !REG_P (SET_DEST (user_set))
|
1308 |
|
|
|| !HARD_REGISTER_P (SET_DEST (user_set)))
|
1309 |
|
|
/* This test is not redundant; SET_SRC (set) might be
|
1310 |
|
|
a call-clobbered register and the life of REGNO
|
1311 |
|
|
might span a call. */
|
1312 |
|
|
&& ! modified_between_p (SET_SRC (set), p, user)
|
1313 |
|
|
&& no_labels_between_p (p, user)
|
1314 |
|
|
&& validate_replace_rtx (SET_DEST (set),
|
1315 |
|
|
SET_SRC (set), user))
|
1316 |
|
|
{
|
1317 |
|
|
/* Replace any usage in a REG_EQUAL note. Must copy
|
1318 |
|
|
the new source, so that we don't get rtx sharing
|
1319 |
|
|
between the SET_SOURCE and REG_NOTES of insn p. */
|
1320 |
|
|
REG_NOTES (user)
|
1321 |
|
|
= replace_rtx (REG_NOTES (user), SET_DEST (set),
|
1322 |
|
|
copy_rtx (SET_SRC (set)));
|
1323 |
|
|
|
1324 |
|
|
delete_insn (p);
|
1325 |
|
|
for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
|
1326 |
|
|
i++)
|
1327 |
|
|
regs->array[regno+i].set_in_loop = 0;
|
1328 |
|
|
continue;
|
1329 |
|
|
}
|
1330 |
|
|
|
1331 |
|
|
m = xmalloc (sizeof (struct movable));
|
1332 |
|
|
m->next = 0;
|
1333 |
|
|
m->insn = p;
|
1334 |
|
|
m->set_src = src;
|
1335 |
|
|
m->dependencies = dependencies;
|
1336 |
|
|
m->set_dest = SET_DEST (set);
|
1337 |
|
|
m->force = 0;
|
1338 |
|
|
m->consec
|
1339 |
|
|
= regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
|
1340 |
|
|
m->done = 0;
|
1341 |
|
|
m->forces = 0;
|
1342 |
|
|
m->partial = 0;
|
1343 |
|
|
m->move_insn = move_insn;
|
1344 |
|
|
m->move_insn_first = 0;
|
1345 |
|
|
m->insert_temp = insert_temp;
|
1346 |
|
|
m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
|
1347 |
|
|
m->savemode = VOIDmode;
|
1348 |
|
|
m->regno = regno;
|
1349 |
|
|
/* Set M->cond if either loop_invariant_p
|
1350 |
|
|
or consec_sets_invariant_p returned 2
|
1351 |
|
|
(only conditionally invariant). */
|
1352 |
|
|
m->cond = ((tem | tem1 | tem2) > 1);
|
1353 |
|
|
m->global = LOOP_REG_GLOBAL_P (loop, regno);
|
1354 |
|
|
m->match = 0;
|
1355 |
|
|
m->lifetime = LOOP_REG_LIFETIME (loop, regno);
|
1356 |
|
|
m->savings = regs->array[regno].n_times_set;
|
1357 |
|
|
if (find_reg_note (p, REG_RETVAL, NULL_RTX))
|
1358 |
|
|
m->savings += libcall_benefit (p);
|
1359 |
|
|
for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
|
1360 |
|
|
regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
|
1361 |
|
|
/* Add M to the end of the chain MOVABLES. */
|
1362 |
|
|
loop_movables_add (movables, m);
|
1363 |
|
|
|
1364 |
|
|
if (m->consec > 0)
|
1365 |
|
|
{
|
1366 |
|
|
/* It is possible for the first instruction to have a
|
1367 |
|
|
REG_EQUAL note but a non-invariant SET_SRC, so we must
|
1368 |
|
|
remember the status of the first instruction in case
|
1369 |
|
|
the last instruction doesn't have a REG_EQUAL note. */
|
1370 |
|
|
m->move_insn_first = m->move_insn;
|
1371 |
|
|
|
1372 |
|
|
/* Skip this insn, not checking REG_LIBCALL notes. */
|
1373 |
|
|
p = next_nonnote_insn (p);
|
1374 |
|
|
/* Skip the consecutive insns, if there are any. */
|
1375 |
|
|
p = skip_consec_insns (p, m->consec);
|
1376 |
|
|
/* Back up to the last insn of the consecutive group. */
|
1377 |
|
|
p = prev_nonnote_insn (p);
|
1378 |
|
|
|
1379 |
|
|
/* We must now reset m->move_insn, m->is_equiv, and
|
1380 |
|
|
possibly m->set_src to correspond to the effects of
|
1381 |
|
|
all the insns. */
|
1382 |
|
|
temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
|
1383 |
|
|
if (temp)
|
1384 |
|
|
m->set_src = XEXP (temp, 0), m->move_insn = 1;
|
1385 |
|
|
else
|
1386 |
|
|
{
|
1387 |
|
|
temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
|
1388 |
|
|
if (temp && CONSTANT_P (XEXP (temp, 0)))
|
1389 |
|
|
m->set_src = XEXP (temp, 0), m->move_insn = 1;
|
1390 |
|
|
else
|
1391 |
|
|
m->move_insn = 0;
|
1392 |
|
|
|
1393 |
|
|
}
|
1394 |
|
|
m->is_equiv
|
1395 |
|
|
= (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
|
1396 |
|
|
}
|
1397 |
|
|
}
|
1398 |
|
|
/* If this register is always set within a STRICT_LOW_PART
|
1399 |
|
|
or set to zero, then its high bytes are constant.
|
1400 |
|
|
So clear them outside the loop and within the loop
|
1401 |
|
|
just load the low bytes.
|
1402 |
|
|
We must check that the machine has an instruction to do so.
|
1403 |
|
|
Also, if the value loaded into the register
|
1404 |
|
|
depends on the same register, this cannot be done. */
|
1405 |
|
|
else if (SET_SRC (set) == const0_rtx
|
1406 |
|
|
&& NONJUMP_INSN_P (NEXT_INSN (p))
|
1407 |
|
|
&& (set1 = single_set (NEXT_INSN (p)))
|
1408 |
|
|
&& GET_CODE (set1) == SET
|
1409 |
|
|
&& (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
|
1410 |
|
|
&& (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
|
1411 |
|
|
&& (SUBREG_REG (XEXP (SET_DEST (set1), 0))
|
1412 |
|
|
== SET_DEST (set))
|
1413 |
|
|
&& !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
|
1414 |
|
|
{
|
1415 |
|
|
int regno = REGNO (SET_DEST (set));
|
1416 |
|
|
if (regs->array[regno].set_in_loop == 2)
|
1417 |
|
|
{
|
1418 |
|
|
struct movable *m;
|
1419 |
|
|
m = xmalloc (sizeof (struct movable));
|
1420 |
|
|
m->next = 0;
|
1421 |
|
|
m->insn = p;
|
1422 |
|
|
m->set_dest = SET_DEST (set);
|
1423 |
|
|
m->dependencies = 0;
|
1424 |
|
|
m->force = 0;
|
1425 |
|
|
m->consec = 0;
|
1426 |
|
|
m->done = 0;
|
1427 |
|
|
m->forces = 0;
|
1428 |
|
|
m->move_insn = 0;
|
1429 |
|
|
m->move_insn_first = 0;
|
1430 |
|
|
m->insert_temp = insert_temp;
|
1431 |
|
|
m->partial = 1;
|
1432 |
|
|
/* If the insn may not be executed on some cycles,
|
1433 |
|
|
we can't clear the whole reg; clear just high part.
|
1434 |
|
|
Not even if the reg is used only within this loop.
|
1435 |
|
|
Consider this:
|
1436 |
|
|
while (1)
|
1437 |
|
|
while (s != t) {
|
1438 |
|
|
if (foo ()) x = *s;
|
1439 |
|
|
use (x);
|
1440 |
|
|
}
|
1441 |
|
|
Clearing x before the inner loop could clobber a value
|
1442 |
|
|
being saved from the last time around the outer loop.
|
1443 |
|
|
However, if the reg is not used outside this loop
|
1444 |
|
|
and all uses of the register are in the same
|
1445 |
|
|
basic block as the store, there is no problem.
|
1446 |
|
|
|
1447 |
|
|
If this insn was made by loop, we don't know its
|
1448 |
|
|
INSN_LUID and hence must make a conservative
|
1449 |
|
|
assumption. */
|
1450 |
|
|
m->global = (INSN_UID (p) >= max_uid_for_loop
|
1451 |
|
|
|| LOOP_REG_GLOBAL_P (loop, regno)
|
1452 |
|
|
|| (labels_in_range_p
|
1453 |
|
|
(p, REGNO_FIRST_LUID (regno))));
|
1454 |
|
|
if (maybe_never && m->global)
|
1455 |
|
|
m->savemode = GET_MODE (SET_SRC (set1));
|
1456 |
|
|
else
|
1457 |
|
|
m->savemode = VOIDmode;
|
1458 |
|
|
m->regno = regno;
|
1459 |
|
|
m->cond = 0;
|
1460 |
|
|
m->match = 0;
|
1461 |
|
|
m->lifetime = LOOP_REG_LIFETIME (loop, regno);
|
1462 |
|
|
m->savings = 1;
|
1463 |
|
|
for (i = 0;
|
1464 |
|
|
i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
|
1465 |
|
|
i++)
|
1466 |
|
|
regs->array[regno+i].set_in_loop = -1;
|
1467 |
|
|
/* Add M to the end of the chain MOVABLES. */
|
1468 |
|
|
loop_movables_add (movables, m);
|
1469 |
|
|
}
|
1470 |
|
|
}
|
1471 |
|
|
}
|
1472 |
|
|
}
|
1473 |
|
|
/* Past a call insn, we get to insns which might not be executed
|
1474 |
|
|
because the call might exit. This matters for insns that trap.
|
1475 |
|
|
Constant and pure call insns always return, so they don't count. */
|
1476 |
|
|
else if (CALL_P (p) && ! CONST_OR_PURE_CALL_P (p))
|
1477 |
|
|
call_passed = 1;
|
1478 |
|
|
/* Past a label or a jump, we get to insns for which we
|
1479 |
|
|
can't count on whether or how many times they will be
|
1480 |
|
|
executed during each iteration. Therefore, we can
|
1481 |
|
|
only move out sets of trivial variables
|
1482 |
|
|
(those not used after the loop). */
|
1483 |
|
|
/* Similar code appears twice in strength_reduce. */
|
1484 |
|
|
else if ((LABEL_P (p) || JUMP_P (p))
|
1485 |
|
|
/* If we enter the loop in the middle, and scan around to the
|
1486 |
|
|
beginning, don't set maybe_never for that. This must be an
|
1487 |
|
|
unconditional jump, otherwise the code at the top of the
|
1488 |
|
|
loop might never be executed. Unconditional jumps are
|
1489 |
|
|
followed by a barrier then the loop_end. */
|
1490 |
|
|
&& ! (JUMP_P (p) && JUMP_LABEL (p) == loop->top
|
1491 |
|
|
&& NEXT_INSN (NEXT_INSN (p)) == loop_end
|
1492 |
|
|
&& any_uncondjump_p (p)))
|
1493 |
|
|
maybe_never = 1;
|
1494 |
|
|
}
|
1495 |
|
|
|
1496 |
|
|
/* If one movable subsumes another, ignore that other. */
|
1497 |
|
|
|
1498 |
|
|
ignore_some_movables (movables);
|
1499 |
|
|
|
1500 |
|
|
/* For each movable insn, see if the reg that it loads
|
1501 |
|
|
leads when it dies right into another conditionally movable insn.
|
1502 |
|
|
If so, record that the second insn "forces" the first one,
|
1503 |
|
|
since the second can be moved only if the first is. */
|
1504 |
|
|
|
1505 |
|
|
force_movables (movables);
|
1506 |
|
|
|
1507 |
|
|
/* See if there are multiple movable insns that load the same value.
|
1508 |
|
|
If there are, make all but the first point at the first one
|
1509 |
|
|
through the `match' field, and add the priorities of them
|
1510 |
|
|
all together as the priority of the first. */
|
1511 |
|
|
|
1512 |
|
|
combine_movables (movables, regs);
|
1513 |
|
|
|
1514 |
|
|
/* Now consider each movable insn to decide whether it is worth moving.
|
1515 |
|
|
Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
|
1516 |
|
|
|
1517 |
|
|
For machines with few registers this increases code size, so do not
|
1518 |
|
|
move moveables when optimizing for code size on such machines.
|
1519 |
|
|
(The 18 below is the value for i386.) */
|
1520 |
|
|
|
1521 |
|
|
if (!optimize_size
|
1522 |
|
|
|| (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
|
1523 |
|
|
{
|
1524 |
|
|
move_movables (loop, movables, threshold, insn_count);
|
1525 |
|
|
|
1526 |
|
|
/* Recalculate regs->array if move_movables has created new
|
1527 |
|
|
registers. */
|
1528 |
|
|
if (max_reg_num () > regs->num)
|
1529 |
|
|
{
|
1530 |
|
|
loop_regs_scan (loop, 0);
|
1531 |
|
|
for (update_start = loop_start;
|
1532 |
|
|
PREV_INSN (update_start)
|
1533 |
|
|
&& !LABEL_P (PREV_INSN (update_start));
|
1534 |
|
|
update_start = PREV_INSN (update_start))
|
1535 |
|
|
;
|
1536 |
|
|
update_end = NEXT_INSN (loop_end);
|
1537 |
|
|
|
1538 |
|
|
reg_scan_update (update_start, update_end, loop_max_reg);
|
1539 |
|
|
loop_max_reg = max_reg_num ();
|
1540 |
|
|
}
|
1541 |
|
|
}
|
1542 |
|
|
|
1543 |
|
|
/* Now candidates that still are negative are those not moved.
|
1544 |
|
|
Change regs->array[I].set_in_loop to indicate that those are not actually
|
1545 |
|
|
invariant. */
|
1546 |
|
|
for (i = 0; i < regs->num; i++)
|
1547 |
|
|
if (regs->array[i].set_in_loop < 0)
|
1548 |
|
|
regs->array[i].set_in_loop = regs->array[i].n_times_set;
|
1549 |
|
|
|
1550 |
|
|
/* Now that we've moved some things out of the loop, we might be able to
|
1551 |
|
|
hoist even more memory references. */
|
1552 |
|
|
load_mems (loop);
|
1553 |
|
|
|
1554 |
|
|
/* Recalculate regs->array if load_mems has created new registers. */
|
1555 |
|
|
if (max_reg_num () > regs->num)
|
1556 |
|
|
loop_regs_scan (loop, 0);
|
1557 |
|
|
|
1558 |
|
|
for (update_start = loop_start;
|
1559 |
|
|
PREV_INSN (update_start)
|
1560 |
|
|
&& !LABEL_P (PREV_INSN (update_start));
|
1561 |
|
|
update_start = PREV_INSN (update_start))
|
1562 |
|
|
;
|
1563 |
|
|
update_end = NEXT_INSN (loop_end);
|
1564 |
|
|
|
1565 |
|
|
reg_scan_update (update_start, update_end, loop_max_reg);
|
1566 |
|
|
loop_max_reg = max_reg_num ();
|
1567 |
|
|
|
1568 |
|
|
if (flag_strength_reduce)
|
1569 |
|
|
{
|
1570 |
|
|
if (update_end && LABEL_P (update_end))
|
1571 |
|
|
/* Ensure our label doesn't go away. */
|
1572 |
|
|
LABEL_NUSES (update_end)++;
|
1573 |
|
|
|
1574 |
|
|
strength_reduce (loop, flags);
|
1575 |
|
|
|
1576 |
|
|
reg_scan_update (update_start, update_end, loop_max_reg);
|
1577 |
|
|
loop_max_reg = max_reg_num ();
|
1578 |
|
|
|
1579 |
|
|
if (update_end && LABEL_P (update_end)
|
1580 |
|
|
&& --LABEL_NUSES (update_end) == 0)
|
1581 |
|
|
delete_related_insns (update_end);
|
1582 |
|
|
}
|
1583 |
|
|
|
1584 |
|
|
|
1585 |
|
|
/* The movable information is required for strength reduction. */
|
1586 |
|
|
loop_movables_free (movables);
|
1587 |
|
|
|
1588 |
|
|
free (regs->array);
|
1589 |
|
|
regs->array = 0;
|
1590 |
|
|
regs->num = 0;
|
1591 |
|
|
}
|
1592 |
|
|
|
1593 |
|
|
/* Add elements to *OUTPUT to record all the pseudo-regs
|
1594 |
|
|
mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
|
1595 |
|
|
|
1596 |
|
|
static void
|
1597 |
|
|
record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
|
1598 |
|
|
{
|
1599 |
|
|
enum rtx_code code;
|
1600 |
|
|
const char *fmt;
|
1601 |
|
|
int i;
|
1602 |
|
|
|
1603 |
|
|
code = GET_CODE (in_this);
|
1604 |
|
|
|
1605 |
|
|
switch (code)
|
1606 |
|
|
{
|
1607 |
|
|
case PC:
|
1608 |
|
|
case CC0:
|
1609 |
|
|
case CONST_INT:
|
1610 |
|
|
case CONST_DOUBLE:
|
1611 |
|
|
case CONST:
|
1612 |
|
|
case SYMBOL_REF:
|
1613 |
|
|
case LABEL_REF:
|
1614 |
|
|
return;
|
1615 |
|
|
|
1616 |
|
|
case REG:
|
1617 |
|
|
if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
|
1618 |
|
|
&& ! reg_mentioned_p (in_this, not_in_this))
|
1619 |
|
|
*output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
|
1620 |
|
|
return;
|
1621 |
|
|
|
1622 |
|
|
default:
|
1623 |
|
|
break;
|
1624 |
|
|
}
|
1625 |
|
|
|
1626 |
|
|
fmt = GET_RTX_FORMAT (code);
|
1627 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
1628 |
|
|
{
|
1629 |
|
|
int j;
|
1630 |
|
|
|
1631 |
|
|
switch (fmt[i])
|
1632 |
|
|
{
|
1633 |
|
|
case 'E':
|
1634 |
|
|
for (j = 0; j < XVECLEN (in_this, i); j++)
|
1635 |
|
|
record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
|
1636 |
|
|
break;
|
1637 |
|
|
|
1638 |
|
|
case 'e':
|
1639 |
|
|
record_excess_regs (XEXP (in_this, i), not_in_this, output);
|
1640 |
|
|
break;
|
1641 |
|
|
}
|
1642 |
|
|
}
|
1643 |
|
|
}
|
1644 |
|
|
|
1645 |
|
|
/* Check what regs are referred to in the libcall block ending with INSN,
|
1646 |
|
|
aside from those mentioned in the equivalent value.
|
1647 |
|
|
If there are none, return 0.
|
1648 |
|
|
If there are one or more, return an EXPR_LIST containing all of them. */
|
1649 |
|
|
|
1650 |
|
|
static rtx
|
1651 |
|
|
libcall_other_reg (rtx insn, rtx equiv)
|
1652 |
|
|
{
|
1653 |
|
|
rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
|
1654 |
|
|
rtx p = XEXP (note, 0);
|
1655 |
|
|
rtx output = 0;
|
1656 |
|
|
|
1657 |
|
|
/* First, find all the regs used in the libcall block
|
1658 |
|
|
that are not mentioned as inputs to the result. */
|
1659 |
|
|
|
1660 |
|
|
while (p != insn)
|
1661 |
|
|
{
|
1662 |
|
|
if (INSN_P (p))
|
1663 |
|
|
record_excess_regs (PATTERN (p), equiv, &output);
|
1664 |
|
|
p = NEXT_INSN (p);
|
1665 |
|
|
}
|
1666 |
|
|
|
1667 |
|
|
return output;
|
1668 |
|
|
}
|
1669 |
|
|
|
1670 |
|
|
/* Return 1 if all uses of REG
|
1671 |
|
|
are between INSN and the end of the basic block. */
|
1672 |
|
|
|
1673 |
|
|
static int
|
1674 |
|
|
reg_in_basic_block_p (rtx insn, rtx reg)
|
1675 |
|
|
{
|
1676 |
|
|
int regno = REGNO (reg);
|
1677 |
|
|
rtx p;
|
1678 |
|
|
|
1679 |
|
|
if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
|
1680 |
|
|
return 0;
|
1681 |
|
|
|
1682 |
|
|
/* Search this basic block for the already recorded last use of the reg. */
|
1683 |
|
|
for (p = insn; p; p = NEXT_INSN (p))
|
1684 |
|
|
{
|
1685 |
|
|
switch (GET_CODE (p))
|
1686 |
|
|
{
|
1687 |
|
|
case NOTE:
|
1688 |
|
|
break;
|
1689 |
|
|
|
1690 |
|
|
case INSN:
|
1691 |
|
|
case CALL_INSN:
|
1692 |
|
|
/* Ordinary insn: if this is the last use, we win. */
|
1693 |
|
|
if (REGNO_LAST_UID (regno) == INSN_UID (p))
|
1694 |
|
|
return 1;
|
1695 |
|
|
break;
|
1696 |
|
|
|
1697 |
|
|
case JUMP_INSN:
|
1698 |
|
|
/* Jump insn: if this is the last use, we win. */
|
1699 |
|
|
if (REGNO_LAST_UID (regno) == INSN_UID (p))
|
1700 |
|
|
return 1;
|
1701 |
|
|
/* Otherwise, it's the end of the basic block, so we lose. */
|
1702 |
|
|
return 0;
|
1703 |
|
|
|
1704 |
|
|
case CODE_LABEL:
|
1705 |
|
|
case BARRIER:
|
1706 |
|
|
/* It's the end of the basic block, so we lose. */
|
1707 |
|
|
return 0;
|
1708 |
|
|
|
1709 |
|
|
default:
|
1710 |
|
|
break;
|
1711 |
|
|
}
|
1712 |
|
|
}
|
1713 |
|
|
|
1714 |
|
|
/* The "last use" that was recorded can't be found after the first
|
1715 |
|
|
use. This can happen when the last use was deleted while
|
1716 |
|
|
processing an inner loop, this inner loop was then completely
|
1717 |
|
|
unrolled, and the outer loop is always exited after the inner loop,
|
1718 |
|
|
so that everything after the first use becomes a single basic block. */
|
1719 |
|
|
return 1;
|
1720 |
|
|
}
|
1721 |
|
|
|
1722 |
|
|
/* Compute the benefit of eliminating the insns in the block whose
|
1723 |
|
|
last insn is LAST. This may be a group of insns used to compute a
|
1724 |
|
|
value directly or can contain a library call. */
|
1725 |
|
|
|
1726 |
|
|
static int
|
1727 |
|
|
libcall_benefit (rtx last)
|
1728 |
|
|
{
|
1729 |
|
|
rtx insn;
|
1730 |
|
|
int benefit = 0;
|
1731 |
|
|
|
1732 |
|
|
for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
|
1733 |
|
|
insn != last; insn = NEXT_INSN (insn))
|
1734 |
|
|
{
|
1735 |
|
|
if (CALL_P (insn))
|
1736 |
|
|
benefit += 10; /* Assume at least this many insns in a library
|
1737 |
|
|
routine. */
|
1738 |
|
|
else if (NONJUMP_INSN_P (insn)
|
1739 |
|
|
&& GET_CODE (PATTERN (insn)) != USE
|
1740 |
|
|
&& GET_CODE (PATTERN (insn)) != CLOBBER)
|
1741 |
|
|
benefit++;
|
1742 |
|
|
}
|
1743 |
|
|
|
1744 |
|
|
return benefit;
|
1745 |
|
|
}
|
1746 |
|
|
|
1747 |
|
|
/* Skip COUNT insns from INSN, counting library calls as 1 insn. */
|
1748 |
|
|
|
1749 |
|
|
static rtx
|
1750 |
|
|
skip_consec_insns (rtx insn, int count)
|
1751 |
|
|
{
|
1752 |
|
|
for (; count > 0; count--)
|
1753 |
|
|
{
|
1754 |
|
|
rtx temp;
|
1755 |
|
|
|
1756 |
|
|
/* If first insn of libcall sequence, skip to end. */
|
1757 |
|
|
/* Do this at start of loop, since INSN is guaranteed to
|
1758 |
|
|
be an insn here. */
|
1759 |
|
|
if (!NOTE_P (insn)
|
1760 |
|
|
&& (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
|
1761 |
|
|
insn = XEXP (temp, 0);
|
1762 |
|
|
|
1763 |
|
|
do
|
1764 |
|
|
insn = NEXT_INSN (insn);
|
1765 |
|
|
while (NOTE_P (insn));
|
1766 |
|
|
}
|
1767 |
|
|
|
1768 |
|
|
return insn;
|
1769 |
|
|
}
|
1770 |
|
|
|
1771 |
|
|
/* Ignore any movable whose insn falls within a libcall
|
1772 |
|
|
which is part of another movable.
|
1773 |
|
|
We make use of the fact that the movable for the libcall value
|
1774 |
|
|
was made later and so appears later on the chain. */
|
1775 |
|
|
|
1776 |
|
|
static void
|
1777 |
|
|
ignore_some_movables (struct loop_movables *movables)
|
1778 |
|
|
{
|
1779 |
|
|
struct movable *m, *m1;
|
1780 |
|
|
|
1781 |
|
|
for (m = movables->head; m; m = m->next)
|
1782 |
|
|
{
|
1783 |
|
|
/* Is this a movable for the value of a libcall? */
|
1784 |
|
|
rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
|
1785 |
|
|
if (note)
|
1786 |
|
|
{
|
1787 |
|
|
rtx insn;
|
1788 |
|
|
/* Check for earlier movables inside that range,
|
1789 |
|
|
and mark them invalid. We cannot use LUIDs here because
|
1790 |
|
|
insns created by loop.c for prior loops don't have LUIDs.
|
1791 |
|
|
Rather than reject all such insns from movables, we just
|
1792 |
|
|
explicitly check each insn in the libcall (since invariant
|
1793 |
|
|
libcalls aren't that common). */
|
1794 |
|
|
for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
|
1795 |
|
|
for (m1 = movables->head; m1 != m; m1 = m1->next)
|
1796 |
|
|
if (m1->insn == insn)
|
1797 |
|
|
m1->done = 1;
|
1798 |
|
|
}
|
1799 |
|
|
}
|
1800 |
|
|
}
|
1801 |
|
|
|
1802 |
|
|
/* For each movable insn, see if the reg that it loads
|
1803 |
|
|
leads when it dies right into another conditionally movable insn.
|
1804 |
|
|
If so, record that the second insn "forces" the first one,
|
1805 |
|
|
since the second can be moved only if the first is. */
|
1806 |
|
|
|
1807 |
|
|
static void
|
1808 |
|
|
force_movables (struct loop_movables *movables)
|
1809 |
|
|
{
|
1810 |
|
|
struct movable *m, *m1;
|
1811 |
|
|
|
1812 |
|
|
for (m1 = movables->head; m1; m1 = m1->next)
|
1813 |
|
|
/* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
|
1814 |
|
|
if (!m1->partial && !m1->done)
|
1815 |
|
|
{
|
1816 |
|
|
int regno = m1->regno;
|
1817 |
|
|
for (m = m1->next; m; m = m->next)
|
1818 |
|
|
/* ??? Could this be a bug? What if CSE caused the
|
1819 |
|
|
register of M1 to be used after this insn?
|
1820 |
|
|
Since CSE does not update regno_last_uid,
|
1821 |
|
|
this insn M->insn might not be where it dies.
|
1822 |
|
|
But very likely this doesn't matter; what matters is
|
1823 |
|
|
that M's reg is computed from M1's reg. */
|
1824 |
|
|
if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
|
1825 |
|
|
&& !m->done)
|
1826 |
|
|
break;
|
1827 |
|
|
if (m != 0 && m->set_src == m1->set_dest
|
1828 |
|
|
/* If m->consec, m->set_src isn't valid. */
|
1829 |
|
|
&& m->consec == 0)
|
1830 |
|
|
m = 0;
|
1831 |
|
|
|
1832 |
|
|
/* Increase the priority of the moving the first insn
|
1833 |
|
|
since it permits the second to be moved as well.
|
1834 |
|
|
Likewise for insns already forced by the first insn. */
|
1835 |
|
|
if (m != 0)
|
1836 |
|
|
{
|
1837 |
|
|
struct movable *m2;
|
1838 |
|
|
|
1839 |
|
|
m->forces = m1;
|
1840 |
|
|
for (m2 = m1; m2; m2 = m2->forces)
|
1841 |
|
|
{
|
1842 |
|
|
m2->lifetime += m->lifetime;
|
1843 |
|
|
m2->savings += m->savings;
|
1844 |
|
|
}
|
1845 |
|
|
}
|
1846 |
|
|
}
|
1847 |
|
|
}
|
1848 |
|
|
|
1849 |
|
|
/* Find invariant expressions that are equal and can be combined into
|
1850 |
|
|
one register. */
|
1851 |
|
|
|
1852 |
|
|
static void
|
1853 |
|
|
combine_movables (struct loop_movables *movables, struct loop_regs *regs)
|
1854 |
|
|
{
|
1855 |
|
|
struct movable *m;
|
1856 |
|
|
char *matched_regs = xmalloc (regs->num);
|
1857 |
|
|
enum machine_mode mode;
|
1858 |
|
|
|
1859 |
|
|
/* Regs that are set more than once are not allowed to match
|
1860 |
|
|
or be matched. I'm no longer sure why not. */
|
1861 |
|
|
/* Only pseudo registers are allowed to match or be matched,
|
1862 |
|
|
since move_movables does not validate the change. */
|
1863 |
|
|
/* Perhaps testing m->consec_sets would be more appropriate here? */
|
1864 |
|
|
|
1865 |
|
|
for (m = movables->head; m; m = m->next)
|
1866 |
|
|
if (m->match == 0 && regs->array[m->regno].n_times_set == 1
|
1867 |
|
|
&& m->regno >= FIRST_PSEUDO_REGISTER
|
1868 |
|
|
&& !m->insert_temp
|
1869 |
|
|
&& !m->partial)
|
1870 |
|
|
{
|
1871 |
|
|
struct movable *m1;
|
1872 |
|
|
int regno = m->regno;
|
1873 |
|
|
|
1874 |
|
|
memset (matched_regs, 0, regs->num);
|
1875 |
|
|
matched_regs[regno] = 1;
|
1876 |
|
|
|
1877 |
|
|
/* We want later insns to match the first one. Don't make the first
|
1878 |
|
|
one match any later ones. So start this loop at m->next. */
|
1879 |
|
|
for (m1 = m->next; m1; m1 = m1->next)
|
1880 |
|
|
if (m != m1 && m1->match == 0
|
1881 |
|
|
&& !m1->insert_temp
|
1882 |
|
|
&& regs->array[m1->regno].n_times_set == 1
|
1883 |
|
|
&& m1->regno >= FIRST_PSEUDO_REGISTER
|
1884 |
|
|
/* A reg used outside the loop mustn't be eliminated. */
|
1885 |
|
|
&& !m1->global
|
1886 |
|
|
/* A reg used for zero-extending mustn't be eliminated. */
|
1887 |
|
|
&& !m1->partial
|
1888 |
|
|
&& (matched_regs[m1->regno]
|
1889 |
|
|
||
|
1890 |
|
|
(GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
|
1891 |
|
|
/* See if the source of M1 says it matches M. */
|
1892 |
|
|
&& ((REG_P (m1->set_src)
|
1893 |
|
|
&& matched_regs[REGNO (m1->set_src)])
|
1894 |
|
|
|| rtx_equal_for_loop_p (m->set_src, m1->set_src,
|
1895 |
|
|
movables, regs))))
|
1896 |
|
|
&& ((m->dependencies == m1->dependencies)
|
1897 |
|
|
|| rtx_equal_p (m->dependencies, m1->dependencies)))
|
1898 |
|
|
{
|
1899 |
|
|
m->lifetime += m1->lifetime;
|
1900 |
|
|
m->savings += m1->savings;
|
1901 |
|
|
m1->done = 1;
|
1902 |
|
|
m1->match = m;
|
1903 |
|
|
matched_regs[m1->regno] = 1;
|
1904 |
|
|
}
|
1905 |
|
|
}
|
1906 |
|
|
|
1907 |
|
|
/* Now combine the regs used for zero-extension.
|
1908 |
|
|
This can be done for those not marked `global'
|
1909 |
|
|
provided their lives don't overlap. */
|
1910 |
|
|
|
1911 |
|
|
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
|
1912 |
|
|
mode = GET_MODE_WIDER_MODE (mode))
|
1913 |
|
|
{
|
1914 |
|
|
struct movable *m0 = 0;
|
1915 |
|
|
|
1916 |
|
|
/* Combine all the registers for extension from mode MODE.
|
1917 |
|
|
Don't combine any that are used outside this loop. */
|
1918 |
|
|
for (m = movables->head; m; m = m->next)
|
1919 |
|
|
if (m->partial && ! m->global
|
1920 |
|
|
&& mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
|
1921 |
|
|
{
|
1922 |
|
|
struct movable *m1;
|
1923 |
|
|
|
1924 |
|
|
int first = REGNO_FIRST_LUID (m->regno);
|
1925 |
|
|
int last = REGNO_LAST_LUID (m->regno);
|
1926 |
|
|
|
1927 |
|
|
if (m0 == 0)
|
1928 |
|
|
{
|
1929 |
|
|
/* First one: don't check for overlap, just record it. */
|
1930 |
|
|
m0 = m;
|
1931 |
|
|
continue;
|
1932 |
|
|
}
|
1933 |
|
|
|
1934 |
|
|
/* Make sure they extend to the same mode.
|
1935 |
|
|
(Almost always true.) */
|
1936 |
|
|
if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
|
1937 |
|
|
continue;
|
1938 |
|
|
|
1939 |
|
|
/* We already have one: check for overlap with those
|
1940 |
|
|
already combined together. */
|
1941 |
|
|
for (m1 = movables->head; m1 != m; m1 = m1->next)
|
1942 |
|
|
if (m1 == m0 || (m1->partial && m1->match == m0))
|
1943 |
|
|
if (! (REGNO_FIRST_LUID (m1->regno) > last
|
1944 |
|
|
|| REGNO_LAST_LUID (m1->regno) < first))
|
1945 |
|
|
goto overlap;
|
1946 |
|
|
|
1947 |
|
|
/* No overlap: we can combine this with the others. */
|
1948 |
|
|
m0->lifetime += m->lifetime;
|
1949 |
|
|
m0->savings += m->savings;
|
1950 |
|
|
m->done = 1;
|
1951 |
|
|
m->match = m0;
|
1952 |
|
|
|
1953 |
|
|
overlap:
|
1954 |
|
|
;
|
1955 |
|
|
}
|
1956 |
|
|
}
|
1957 |
|
|
|
1958 |
|
|
/* Clean up. */
|
1959 |
|
|
free (matched_regs);
|
1960 |
|
|
}
|
1961 |
|
|
|
1962 |
|
|
/* Returns the number of movable instructions in LOOP that were not
|
1963 |
|
|
moved outside the loop. */
|
1964 |
|
|
|
1965 |
|
|
static int
|
1966 |
|
|
num_unmoved_movables (const struct loop *loop)
|
1967 |
|
|
{
|
1968 |
|
|
int num = 0;
|
1969 |
|
|
struct movable *m;
|
1970 |
|
|
|
1971 |
|
|
for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
|
1972 |
|
|
if (!m->done)
|
1973 |
|
|
++num;
|
1974 |
|
|
|
1975 |
|
|
return num;
|
1976 |
|
|
}
|
1977 |
|
|
|
1978 |
|
|
|
1979 |
|
|
/* Return 1 if regs X and Y will become the same if moved. */
|
1980 |
|
|
|
1981 |
|
|
static int
|
1982 |
|
|
regs_match_p (rtx x, rtx y, struct loop_movables *movables)
|
1983 |
|
|
{
|
1984 |
|
|
unsigned int xn = REGNO (x);
|
1985 |
|
|
unsigned int yn = REGNO (y);
|
1986 |
|
|
struct movable *mx, *my;
|
1987 |
|
|
|
1988 |
|
|
for (mx = movables->head; mx; mx = mx->next)
|
1989 |
|
|
if (mx->regno == xn)
|
1990 |
|
|
break;
|
1991 |
|
|
|
1992 |
|
|
for (my = movables->head; my; my = my->next)
|
1993 |
|
|
if (my->regno == yn)
|
1994 |
|
|
break;
|
1995 |
|
|
|
1996 |
|
|
return (mx && my
|
1997 |
|
|
&& ((mx->match == my->match && mx->match != 0)
|
1998 |
|
|
|| mx->match == my
|
1999 |
|
|
|| mx == my->match));
|
2000 |
|
|
}
|
2001 |
|
|
|
2002 |
|
|
/* Return 1 if X and Y are identical-looking rtx's.
|
2003 |
|
|
This is the Lisp function EQUAL for rtx arguments.
|
2004 |
|
|
|
2005 |
|
|
If two registers are matching movables or a movable register and an
|
2006 |
|
|
equivalent constant, consider them equal. */
|
2007 |
|
|
|
2008 |
|
|
static int
|
2009 |
|
|
rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
|
2010 |
|
|
struct loop_regs *regs)
|
2011 |
|
|
{
|
2012 |
|
|
int i;
|
2013 |
|
|
int j;
|
2014 |
|
|
struct movable *m;
|
2015 |
|
|
enum rtx_code code;
|
2016 |
|
|
const char *fmt;
|
2017 |
|
|
|
2018 |
|
|
if (x == y)
|
2019 |
|
|
return 1;
|
2020 |
|
|
if (x == 0 || y == 0)
|
2021 |
|
|
return 0;
|
2022 |
|
|
|
2023 |
|
|
code = GET_CODE (x);
|
2024 |
|
|
|
2025 |
|
|
/* If we have a register and a constant, they may sometimes be
|
2026 |
|
|
equal. */
|
2027 |
|
|
if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
|
2028 |
|
|
&& CONSTANT_P (y))
|
2029 |
|
|
{
|
2030 |
|
|
for (m = movables->head; m; m = m->next)
|
2031 |
|
|
if (m->move_insn && m->regno == REGNO (x)
|
2032 |
|
|
&& rtx_equal_p (m->set_src, y))
|
2033 |
|
|
return 1;
|
2034 |
|
|
}
|
2035 |
|
|
else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
|
2036 |
|
|
&& CONSTANT_P (x))
|
2037 |
|
|
{
|
2038 |
|
|
for (m = movables->head; m; m = m->next)
|
2039 |
|
|
if (m->move_insn && m->regno == REGNO (y)
|
2040 |
|
|
&& rtx_equal_p (m->set_src, x))
|
2041 |
|
|
return 1;
|
2042 |
|
|
}
|
2043 |
|
|
|
2044 |
|
|
/* Otherwise, rtx's of different codes cannot be equal. */
|
2045 |
|
|
if (code != GET_CODE (y))
|
2046 |
|
|
return 0;
|
2047 |
|
|
|
2048 |
|
|
/* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
|
2049 |
|
|
(REG:SI x) and (REG:HI x) are NOT equivalent. */
|
2050 |
|
|
|
2051 |
|
|
if (GET_MODE (x) != GET_MODE (y))
|
2052 |
|
|
return 0;
|
2053 |
|
|
|
2054 |
|
|
/* These types of rtx's can be compared nonrecursively. */
|
2055 |
|
|
switch (code)
|
2056 |
|
|
{
|
2057 |
|
|
case PC:
|
2058 |
|
|
case CC0:
|
2059 |
|
|
case CONST_INT:
|
2060 |
|
|
case CONST_DOUBLE:
|
2061 |
|
|
return 0;
|
2062 |
|
|
|
2063 |
|
|
case REG:
|
2064 |
|
|
return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
|
2065 |
|
|
|
2066 |
|
|
case LABEL_REF:
|
2067 |
|
|
return XEXP (x, 0) == XEXP (y, 0);
|
2068 |
|
|
case SYMBOL_REF:
|
2069 |
|
|
return XSTR (x, 0) == XSTR (y, 0);
|
2070 |
|
|
|
2071 |
|
|
default:
|
2072 |
|
|
break;
|
2073 |
|
|
}
|
2074 |
|
|
|
2075 |
|
|
/* Compare the elements. If any pair of corresponding elements
|
2076 |
|
|
fail to match, return 0 for the whole things. */
|
2077 |
|
|
|
2078 |
|
|
fmt = GET_RTX_FORMAT (code);
|
2079 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
2080 |
|
|
{
|
2081 |
|
|
switch (fmt[i])
|
2082 |
|
|
{
|
2083 |
|
|
case 'w':
|
2084 |
|
|
if (XWINT (x, i) != XWINT (y, i))
|
2085 |
|
|
return 0;
|
2086 |
|
|
break;
|
2087 |
|
|
|
2088 |
|
|
case 'i':
|
2089 |
|
|
if (XINT (x, i) != XINT (y, i))
|
2090 |
|
|
return 0;
|
2091 |
|
|
break;
|
2092 |
|
|
|
2093 |
|
|
case 'E':
|
2094 |
|
|
/* Two vectors must have the same length. */
|
2095 |
|
|
if (XVECLEN (x, i) != XVECLEN (y, i))
|
2096 |
|
|
return 0;
|
2097 |
|
|
|
2098 |
|
|
/* And the corresponding elements must match. */
|
2099 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
2100 |
|
|
if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
|
2101 |
|
|
movables, regs) == 0)
|
2102 |
|
|
return 0;
|
2103 |
|
|
break;
|
2104 |
|
|
|
2105 |
|
|
case 'e':
|
2106 |
|
|
if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
|
2107 |
|
|
== 0)
|
2108 |
|
|
return 0;
|
2109 |
|
|
break;
|
2110 |
|
|
|
2111 |
|
|
case 's':
|
2112 |
|
|
if (strcmp (XSTR (x, i), XSTR (y, i)))
|
2113 |
|
|
return 0;
|
2114 |
|
|
break;
|
2115 |
|
|
|
2116 |
|
|
case 'u':
|
2117 |
|
|
/* These are just backpointers, so they don't matter. */
|
2118 |
|
|
break;
|
2119 |
|
|
|
2120 |
|
|
case '0':
|
2121 |
|
|
break;
|
2122 |
|
|
|
2123 |
|
|
/* It is believed that rtx's at this level will never
|
2124 |
|
|
contain anything but integers and other rtx's,
|
2125 |
|
|
except for within LABEL_REFs and SYMBOL_REFs. */
|
2126 |
|
|
default:
|
2127 |
|
|
gcc_unreachable ();
|
2128 |
|
|
}
|
2129 |
|
|
}
|
2130 |
|
|
return 1;
|
2131 |
|
|
}
|
2132 |
|
|
|
2133 |
|
|
/* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
|
2134 |
|
|
insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
|
2135 |
|
|
references is incremented once for each added note. */
|
2136 |
|
|
|
2137 |
|
|
static void
|
2138 |
|
|
add_label_notes (rtx x, rtx insns)
|
2139 |
|
|
{
|
2140 |
|
|
enum rtx_code code = GET_CODE (x);
|
2141 |
|
|
int i, j;
|
2142 |
|
|
const char *fmt;
|
2143 |
|
|
rtx insn;
|
2144 |
|
|
|
2145 |
|
|
if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
|
2146 |
|
|
{
|
2147 |
|
|
/* This code used to ignore labels that referred to dispatch tables to
|
2148 |
|
|
avoid flow generating (slightly) worse code.
|
2149 |
|
|
|
2150 |
|
|
We no longer ignore such label references (see LABEL_REF handling in
|
2151 |
|
|
mark_jump_label for additional information). */
|
2152 |
|
|
for (insn = insns; insn; insn = NEXT_INSN (insn))
|
2153 |
|
|
if (reg_mentioned_p (XEXP (x, 0), insn))
|
2154 |
|
|
{
|
2155 |
|
|
REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
|
2156 |
|
|
REG_NOTES (insn));
|
2157 |
|
|
if (LABEL_P (XEXP (x, 0)))
|
2158 |
|
|
LABEL_NUSES (XEXP (x, 0))++;
|
2159 |
|
|
}
|
2160 |
|
|
}
|
2161 |
|
|
|
2162 |
|
|
fmt = GET_RTX_FORMAT (code);
|
2163 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
2164 |
|
|
{
|
2165 |
|
|
if (fmt[i] == 'e')
|
2166 |
|
|
add_label_notes (XEXP (x, i), insns);
|
2167 |
|
|
else if (fmt[i] == 'E')
|
2168 |
|
|
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
2169 |
|
|
add_label_notes (XVECEXP (x, i, j), insns);
|
2170 |
|
|
}
|
2171 |
|
|
}
|
2172 |
|
|
|
2173 |
|
|
/* Scan MOVABLES, and move the insns that deserve to be moved.
|
2174 |
|
|
If two matching movables are combined, replace one reg with the
|
2175 |
|
|
other throughout. */
|
2176 |
|
|
|
2177 |
|
|
static void
|
2178 |
|
|
move_movables (struct loop *loop, struct loop_movables *movables,
|
2179 |
|
|
int threshold, int insn_count)
|
2180 |
|
|
{
|
2181 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
2182 |
|
|
int nregs = regs->num;
|
2183 |
|
|
rtx new_start = 0;
|
2184 |
|
|
struct movable *m;
|
2185 |
|
|
rtx p;
|
2186 |
|
|
rtx loop_start = loop->start;
|
2187 |
|
|
rtx loop_end = loop->end;
|
2188 |
|
|
/* Map of pseudo-register replacements to handle combining
|
2189 |
|
|
when we move several insns that load the same value
|
2190 |
|
|
into different pseudo-registers. */
|
2191 |
|
|
rtx *reg_map = xcalloc (nregs, sizeof (rtx));
|
2192 |
|
|
char *already_moved = xcalloc (nregs, sizeof (char));
|
2193 |
|
|
|
2194 |
|
|
for (m = movables->head; m; m = m->next)
|
2195 |
|
|
{
|
2196 |
|
|
/* Describe this movable insn. */
|
2197 |
|
|
|
2198 |
|
|
if (loop_dump_stream)
|
2199 |
|
|
{
|
2200 |
|
|
fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
|
2201 |
|
|
INSN_UID (m->insn), m->regno, m->lifetime);
|
2202 |
|
|
if (m->consec > 0)
|
2203 |
|
|
fprintf (loop_dump_stream, "consec %d, ", m->consec);
|
2204 |
|
|
if (m->cond)
|
2205 |
|
|
fprintf (loop_dump_stream, "cond ");
|
2206 |
|
|
if (m->force)
|
2207 |
|
|
fprintf (loop_dump_stream, "force ");
|
2208 |
|
|
if (m->global)
|
2209 |
|
|
fprintf (loop_dump_stream, "global ");
|
2210 |
|
|
if (m->done)
|
2211 |
|
|
fprintf (loop_dump_stream, "done ");
|
2212 |
|
|
if (m->move_insn)
|
2213 |
|
|
fprintf (loop_dump_stream, "move-insn ");
|
2214 |
|
|
if (m->match)
|
2215 |
|
|
fprintf (loop_dump_stream, "matches %d ",
|
2216 |
|
|
INSN_UID (m->match->insn));
|
2217 |
|
|
if (m->forces)
|
2218 |
|
|
fprintf (loop_dump_stream, "forces %d ",
|
2219 |
|
|
INSN_UID (m->forces->insn));
|
2220 |
|
|
}
|
2221 |
|
|
|
2222 |
|
|
/* Ignore the insn if it's already done (it matched something else).
|
2223 |
|
|
Otherwise, see if it is now safe to move. */
|
2224 |
|
|
|
2225 |
|
|
if (!m->done
|
2226 |
|
|
&& (! m->cond
|
2227 |
|
|
|| (1 == loop_invariant_p (loop, m->set_src)
|
2228 |
|
|
&& (m->dependencies == 0
|
2229 |
|
|
|| 1 == loop_invariant_p (loop, m->dependencies))
|
2230 |
|
|
&& (m->consec == 0
|
2231 |
|
|
|| 1 == consec_sets_invariant_p (loop, m->set_dest,
|
2232 |
|
|
m->consec + 1,
|
2233 |
|
|
m->insn))))
|
2234 |
|
|
&& (! m->forces || m->forces->done))
|
2235 |
|
|
{
|
2236 |
|
|
int regno;
|
2237 |
|
|
rtx p;
|
2238 |
|
|
int savings = m->savings;
|
2239 |
|
|
|
2240 |
|
|
/* We have an insn that is safe to move.
|
2241 |
|
|
Compute its desirability. */
|
2242 |
|
|
|
2243 |
|
|
p = m->insn;
|
2244 |
|
|
regno = m->regno;
|
2245 |
|
|
|
2246 |
|
|
if (loop_dump_stream)
|
2247 |
|
|
fprintf (loop_dump_stream, "savings %d ", savings);
|
2248 |
|
|
|
2249 |
|
|
if (regs->array[regno].moved_once && loop_dump_stream)
|
2250 |
|
|
fprintf (loop_dump_stream, "halved since already moved ");
|
2251 |
|
|
|
2252 |
|
|
/* An insn MUST be moved if we already moved something else
|
2253 |
|
|
which is safe only if this one is moved too: that is,
|
2254 |
|
|
if already_moved[REGNO] is nonzero. */
|
2255 |
|
|
|
2256 |
|
|
/* An insn is desirable to move if the new lifetime of the
|
2257 |
|
|
register is no more than THRESHOLD times the old lifetime.
|
2258 |
|
|
If it's not desirable, it means the loop is so big
|
2259 |
|
|
that moving won't speed things up much,
|
2260 |
|
|
and it is liable to make register usage worse. */
|
2261 |
|
|
|
2262 |
|
|
/* It is also desirable to move if it can be moved at no
|
2263 |
|
|
extra cost because something else was already moved. */
|
2264 |
|
|
|
2265 |
|
|
if (already_moved[regno]
|
2266 |
|
|
|| (threshold * savings * m->lifetime) >=
|
2267 |
|
|
(regs->array[regno].moved_once ? insn_count * 2 : insn_count)
|
2268 |
|
|
|| (m->forces && m->forces->done
|
2269 |
|
|
&& regs->array[m->forces->regno].n_times_set == 1))
|
2270 |
|
|
{
|
2271 |
|
|
int count;
|
2272 |
|
|
struct movable *m1;
|
2273 |
|
|
rtx first = NULL_RTX;
|
2274 |
|
|
rtx newreg = NULL_RTX;
|
2275 |
|
|
|
2276 |
|
|
if (m->insert_temp)
|
2277 |
|
|
newreg = gen_reg_rtx (GET_MODE (m->set_dest));
|
2278 |
|
|
|
2279 |
|
|
/* Now move the insns that set the reg. */
|
2280 |
|
|
|
2281 |
|
|
if (m->partial && m->match)
|
2282 |
|
|
{
|
2283 |
|
|
rtx newpat, i1;
|
2284 |
|
|
rtx r1, r2;
|
2285 |
|
|
/* Find the end of this chain of matching regs.
|
2286 |
|
|
Thus, we load each reg in the chain from that one reg.
|
2287 |
|
|
And that reg is loaded with 0 directly,
|
2288 |
|
|
since it has ->match == 0. */
|
2289 |
|
|
for (m1 = m; m1->match; m1 = m1->match);
|
2290 |
|
|
newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
|
2291 |
|
|
SET_DEST (PATTERN (m1->insn)));
|
2292 |
|
|
i1 = loop_insn_hoist (loop, newpat);
|
2293 |
|
|
|
2294 |
|
|
/* Mark the moved, invariant reg as being allowed to
|
2295 |
|
|
share a hard reg with the other matching invariant. */
|
2296 |
|
|
REG_NOTES (i1) = REG_NOTES (m->insn);
|
2297 |
|
|
r1 = SET_DEST (PATTERN (m->insn));
|
2298 |
|
|
r2 = SET_DEST (PATTERN (m1->insn));
|
2299 |
|
|
regs_may_share
|
2300 |
|
|
= gen_rtx_EXPR_LIST (VOIDmode, r1,
|
2301 |
|
|
gen_rtx_EXPR_LIST (VOIDmode, r2,
|
2302 |
|
|
regs_may_share));
|
2303 |
|
|
delete_insn (m->insn);
|
2304 |
|
|
|
2305 |
|
|
if (new_start == 0)
|
2306 |
|
|
new_start = i1;
|
2307 |
|
|
|
2308 |
|
|
if (loop_dump_stream)
|
2309 |
|
|
fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
|
2310 |
|
|
}
|
2311 |
|
|
/* If we are to re-generate the item being moved with a
|
2312 |
|
|
new move insn, first delete what we have and then emit
|
2313 |
|
|
the move insn before the loop. */
|
2314 |
|
|
else if (m->move_insn)
|
2315 |
|
|
{
|
2316 |
|
|
rtx i1, temp, seq;
|
2317 |
|
|
|
2318 |
|
|
for (count = m->consec; count >= 0; count--)
|
2319 |
|
|
{
|
2320 |
|
|
if (!NOTE_P (p))
|
2321 |
|
|
{
|
2322 |
|
|
/* If this is the first insn of a library
|
2323 |
|
|
call sequence, something is very
|
2324 |
|
|
wrong. */
|
2325 |
|
|
gcc_assert (!find_reg_note
|
2326 |
|
|
(p, REG_LIBCALL, NULL_RTX));
|
2327 |
|
|
|
2328 |
|
|
/* If this is the last insn of a libcall
|
2329 |
|
|
sequence, then delete every insn in the
|
2330 |
|
|
sequence except the last. The last insn
|
2331 |
|
|
is handled in the normal manner. */
|
2332 |
|
|
temp = find_reg_note (p, REG_RETVAL, NULL_RTX);
|
2333 |
|
|
|
2334 |
|
|
if (temp)
|
2335 |
|
|
{
|
2336 |
|
|
temp = XEXP (temp, 0);
|
2337 |
|
|
while (temp != p)
|
2338 |
|
|
temp = delete_insn (temp);
|
2339 |
|
|
}
|
2340 |
|
|
}
|
2341 |
|
|
|
2342 |
|
|
temp = p;
|
2343 |
|
|
p = delete_insn (p);
|
2344 |
|
|
|
2345 |
|
|
/* simplify_giv_expr expects that it can walk the insns
|
2346 |
|
|
at m->insn forwards and see this old sequence we are
|
2347 |
|
|
tossing here. delete_insn does preserve the next
|
2348 |
|
|
pointers, but when we skip over a NOTE we must fix
|
2349 |
|
|
it up. Otherwise that code walks into the non-deleted
|
2350 |
|
|
insn stream. */
|
2351 |
|
|
while (p && NOTE_P (p))
|
2352 |
|
|
p = NEXT_INSN (temp) = NEXT_INSN (p);
|
2353 |
|
|
|
2354 |
|
|
if (m->insert_temp)
|
2355 |
|
|
{
|
2356 |
|
|
/* Replace the original insn with a move from
|
2357 |
|
|
our newly created temp. */
|
2358 |
|
|
start_sequence ();
|
2359 |
|
|
emit_move_insn (m->set_dest, newreg);
|
2360 |
|
|
seq = get_insns ();
|
2361 |
|
|
end_sequence ();
|
2362 |
|
|
emit_insn_before (seq, p);
|
2363 |
|
|
}
|
2364 |
|
|
}
|
2365 |
|
|
|
2366 |
|
|
start_sequence ();
|
2367 |
|
|
emit_move_insn (m->insert_temp ? newreg : m->set_dest,
|
2368 |
|
|
m->set_src);
|
2369 |
|
|
seq = get_insns ();
|
2370 |
|
|
end_sequence ();
|
2371 |
|
|
|
2372 |
|
|
add_label_notes (m->set_src, seq);
|
2373 |
|
|
|
2374 |
|
|
i1 = loop_insn_hoist (loop, seq);
|
2375 |
|
|
if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
|
2376 |
|
|
set_unique_reg_note (i1,
|
2377 |
|
|
m->is_equiv ? REG_EQUIV : REG_EQUAL,
|
2378 |
|
|
m->set_src);
|
2379 |
|
|
|
2380 |
|
|
if (loop_dump_stream)
|
2381 |
|
|
fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
|
2382 |
|
|
|
2383 |
|
|
/* The more regs we move, the less we like moving them. */
|
2384 |
|
|
threshold -= 3;
|
2385 |
|
|
}
|
2386 |
|
|
else
|
2387 |
|
|
{
|
2388 |
|
|
for (count = m->consec; count >= 0; count--)
|
2389 |
|
|
{
|
2390 |
|
|
rtx i1, temp;
|
2391 |
|
|
|
2392 |
|
|
/* If first insn of libcall sequence, skip to end. */
|
2393 |
|
|
/* Do this at start of loop, since p is guaranteed to
|
2394 |
|
|
be an insn here. */
|
2395 |
|
|
if (!NOTE_P (p)
|
2396 |
|
|
&& (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
|
2397 |
|
|
p = XEXP (temp, 0);
|
2398 |
|
|
|
2399 |
|
|
/* If last insn of libcall sequence, move all
|
2400 |
|
|
insns except the last before the loop. The last
|
2401 |
|
|
insn is handled in the normal manner. */
|
2402 |
|
|
if (!NOTE_P (p)
|
2403 |
|
|
&& (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
|
2404 |
|
|
{
|
2405 |
|
|
rtx fn_address = 0;
|
2406 |
|
|
rtx fn_reg = 0;
|
2407 |
|
|
rtx fn_address_insn = 0;
|
2408 |
|
|
|
2409 |
|
|
first = 0;
|
2410 |
|
|
for (temp = XEXP (temp, 0); temp != p;
|
2411 |
|
|
temp = NEXT_INSN (temp))
|
2412 |
|
|
{
|
2413 |
|
|
rtx body;
|
2414 |
|
|
rtx n;
|
2415 |
|
|
rtx next;
|
2416 |
|
|
|
2417 |
|
|
if (NOTE_P (temp))
|
2418 |
|
|
continue;
|
2419 |
|
|
|
2420 |
|
|
body = PATTERN (temp);
|
2421 |
|
|
|
2422 |
|
|
/* Find the next insn after TEMP,
|
2423 |
|
|
not counting USE or NOTE insns. */
|
2424 |
|
|
for (next = NEXT_INSN (temp); next != p;
|
2425 |
|
|
next = NEXT_INSN (next))
|
2426 |
|
|
if (! (NONJUMP_INSN_P (next)
|
2427 |
|
|
&& GET_CODE (PATTERN (next)) == USE)
|
2428 |
|
|
&& !NOTE_P (next))
|
2429 |
|
|
break;
|
2430 |
|
|
|
2431 |
|
|
/* If that is the call, this may be the insn
|
2432 |
|
|
that loads the function address.
|
2433 |
|
|
|
2434 |
|
|
Extract the function address from the insn
|
2435 |
|
|
that loads it into a register.
|
2436 |
|
|
If this insn was cse'd, we get incorrect code.
|
2437 |
|
|
|
2438 |
|
|
So emit a new move insn that copies the
|
2439 |
|
|
function address into the register that the
|
2440 |
|
|
call insn will use. flow.c will delete any
|
2441 |
|
|
redundant stores that we have created. */
|
2442 |
|
|
if (CALL_P (next)
|
2443 |
|
|
&& GET_CODE (body) == SET
|
2444 |
|
|
&& REG_P (SET_DEST (body))
|
2445 |
|
|
&& (n = find_reg_note (temp, REG_EQUAL,
|
2446 |
|
|
NULL_RTX)))
|
2447 |
|
|
{
|
2448 |
|
|
fn_reg = SET_SRC (body);
|
2449 |
|
|
if (!REG_P (fn_reg))
|
2450 |
|
|
fn_reg = SET_DEST (body);
|
2451 |
|
|
fn_address = XEXP (n, 0);
|
2452 |
|
|
fn_address_insn = temp;
|
2453 |
|
|
}
|
2454 |
|
|
/* We have the call insn.
|
2455 |
|
|
If it uses the register we suspect it might,
|
2456 |
|
|
load it with the correct address directly. */
|
2457 |
|
|
if (CALL_P (temp)
|
2458 |
|
|
&& fn_address != 0
|
2459 |
|
|
&& reg_referenced_p (fn_reg, body))
|
2460 |
|
|
loop_insn_emit_after (loop, 0, fn_address_insn,
|
2461 |
|
|
gen_move_insn
|
2462 |
|
|
(fn_reg, fn_address));
|
2463 |
|
|
|
2464 |
|
|
if (CALL_P (temp))
|
2465 |
|
|
{
|
2466 |
|
|
i1 = loop_call_insn_hoist (loop, body);
|
2467 |
|
|
/* Because the USAGE information potentially
|
2468 |
|
|
contains objects other than hard registers
|
2469 |
|
|
we need to copy it. */
|
2470 |
|
|
if (CALL_INSN_FUNCTION_USAGE (temp))
|
2471 |
|
|
CALL_INSN_FUNCTION_USAGE (i1)
|
2472 |
|
|
= copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
|
2473 |
|
|
}
|
2474 |
|
|
else
|
2475 |
|
|
i1 = loop_insn_hoist (loop, body);
|
2476 |
|
|
if (first == 0)
|
2477 |
|
|
first = i1;
|
2478 |
|
|
if (temp == fn_address_insn)
|
2479 |
|
|
fn_address_insn = i1;
|
2480 |
|
|
REG_NOTES (i1) = REG_NOTES (temp);
|
2481 |
|
|
REG_NOTES (temp) = NULL;
|
2482 |
|
|
delete_insn (temp);
|
2483 |
|
|
}
|
2484 |
|
|
if (new_start == 0)
|
2485 |
|
|
new_start = first;
|
2486 |
|
|
}
|
2487 |
|
|
if (m->savemode != VOIDmode)
|
2488 |
|
|
{
|
2489 |
|
|
/* P sets REG to zero; but we should clear only
|
2490 |
|
|
the bits that are not covered by the mode
|
2491 |
|
|
m->savemode. */
|
2492 |
|
|
rtx reg = m->set_dest;
|
2493 |
|
|
rtx sequence;
|
2494 |
|
|
rtx tem;
|
2495 |
|
|
|
2496 |
|
|
start_sequence ();
|
2497 |
|
|
tem = expand_simple_binop
|
2498 |
|
|
(GET_MODE (reg), AND, reg,
|
2499 |
|
|
GEN_INT ((((HOST_WIDE_INT) 1
|
2500 |
|
|
<< GET_MODE_BITSIZE (m->savemode)))
|
2501 |
|
|
- 1),
|
2502 |
|
|
reg, 1, OPTAB_LIB_WIDEN);
|
2503 |
|
|
gcc_assert (tem);
|
2504 |
|
|
if (tem != reg)
|
2505 |
|
|
emit_move_insn (reg, tem);
|
2506 |
|
|
sequence = get_insns ();
|
2507 |
|
|
end_sequence ();
|
2508 |
|
|
i1 = loop_insn_hoist (loop, sequence);
|
2509 |
|
|
}
|
2510 |
|
|
else if (CALL_P (p))
|
2511 |
|
|
{
|
2512 |
|
|
i1 = loop_call_insn_hoist (loop, PATTERN (p));
|
2513 |
|
|
/* Because the USAGE information potentially
|
2514 |
|
|
contains objects other than hard registers
|
2515 |
|
|
we need to copy it. */
|
2516 |
|
|
if (CALL_INSN_FUNCTION_USAGE (p))
|
2517 |
|
|
CALL_INSN_FUNCTION_USAGE (i1)
|
2518 |
|
|
= copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
|
2519 |
|
|
}
|
2520 |
|
|
else if (count == m->consec && m->move_insn_first)
|
2521 |
|
|
{
|
2522 |
|
|
rtx seq;
|
2523 |
|
|
/* The SET_SRC might not be invariant, so we must
|
2524 |
|
|
use the REG_EQUAL note. */
|
2525 |
|
|
start_sequence ();
|
2526 |
|
|
emit_move_insn (m->insert_temp ? newreg : m->set_dest,
|
2527 |
|
|
m->set_src);
|
2528 |
|
|
seq = get_insns ();
|
2529 |
|
|
end_sequence ();
|
2530 |
|
|
|
2531 |
|
|
add_label_notes (m->set_src, seq);
|
2532 |
|
|
|
2533 |
|
|
i1 = loop_insn_hoist (loop, seq);
|
2534 |
|
|
if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
|
2535 |
|
|
set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
|
2536 |
|
|
: REG_EQUAL, m->set_src);
|
2537 |
|
|
}
|
2538 |
|
|
else if (m->insert_temp)
|
2539 |
|
|
{
|
2540 |
|
|
rtx *reg_map2 = xcalloc (REGNO (newreg),
|
2541 |
|
|
sizeof(rtx));
|
2542 |
|
|
reg_map2 [m->regno] = newreg;
|
2543 |
|
|
|
2544 |
|
|
i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
|
2545 |
|
|
replace_regs (i1, reg_map2, REGNO (newreg), 1);
|
2546 |
|
|
free (reg_map2);
|
2547 |
|
|
}
|
2548 |
|
|
else
|
2549 |
|
|
i1 = loop_insn_hoist (loop, PATTERN (p));
|
2550 |
|
|
|
2551 |
|
|
if (REG_NOTES (i1) == 0)
|
2552 |
|
|
{
|
2553 |
|
|
REG_NOTES (i1) = REG_NOTES (p);
|
2554 |
|
|
REG_NOTES (p) = NULL;
|
2555 |
|
|
|
2556 |
|
|
/* If there is a REG_EQUAL note present whose value
|
2557 |
|
|
is not loop invariant, then delete it, since it
|
2558 |
|
|
may cause problems with later optimization passes.
|
2559 |
|
|
It is possible for cse to create such notes
|
2560 |
|
|
like this as a result of record_jump_cond. */
|
2561 |
|
|
|
2562 |
|
|
if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
|
2563 |
|
|
&& ! loop_invariant_p (loop, XEXP (temp, 0)))
|
2564 |
|
|
remove_note (i1, temp);
|
2565 |
|
|
}
|
2566 |
|
|
|
2567 |
|
|
if (new_start == 0)
|
2568 |
|
|
new_start = i1;
|
2569 |
|
|
|
2570 |
|
|
if (loop_dump_stream)
|
2571 |
|
|
fprintf (loop_dump_stream, " moved to %d",
|
2572 |
|
|
INSN_UID (i1));
|
2573 |
|
|
|
2574 |
|
|
/* If library call, now fix the REG_NOTES that contain
|
2575 |
|
|
insn pointers, namely REG_LIBCALL on FIRST
|
2576 |
|
|
and REG_RETVAL on I1. */
|
2577 |
|
|
if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
|
2578 |
|
|
{
|
2579 |
|
|
XEXP (temp, 0) = first;
|
2580 |
|
|
temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
|
2581 |
|
|
XEXP (temp, 0) = i1;
|
2582 |
|
|
}
|
2583 |
|
|
|
2584 |
|
|
temp = p;
|
2585 |
|
|
delete_insn (p);
|
2586 |
|
|
p = NEXT_INSN (p);
|
2587 |
|
|
|
2588 |
|
|
/* simplify_giv_expr expects that it can walk the insns
|
2589 |
|
|
at m->insn forwards and see this old sequence we are
|
2590 |
|
|
tossing here. delete_insn does preserve the next
|
2591 |
|
|
pointers, but when we skip over a NOTE we must fix
|
2592 |
|
|
it up. Otherwise that code walks into the non-deleted
|
2593 |
|
|
insn stream. */
|
2594 |
|
|
while (p && NOTE_P (p))
|
2595 |
|
|
p = NEXT_INSN (temp) = NEXT_INSN (p);
|
2596 |
|
|
|
2597 |
|
|
if (m->insert_temp)
|
2598 |
|
|
{
|
2599 |
|
|
rtx seq;
|
2600 |
|
|
/* Replace the original insn with a move from
|
2601 |
|
|
our newly created temp. */
|
2602 |
|
|
start_sequence ();
|
2603 |
|
|
emit_move_insn (m->set_dest, newreg);
|
2604 |
|
|
seq = get_insns ();
|
2605 |
|
|
end_sequence ();
|
2606 |
|
|
emit_insn_before (seq, p);
|
2607 |
|
|
}
|
2608 |
|
|
}
|
2609 |
|
|
|
2610 |
|
|
/* The more regs we move, the less we like moving them. */
|
2611 |
|
|
threshold -= 3;
|
2612 |
|
|
}
|
2613 |
|
|
|
2614 |
|
|
m->done = 1;
|
2615 |
|
|
|
2616 |
|
|
if (!m->insert_temp)
|
2617 |
|
|
{
|
2618 |
|
|
/* Any other movable that loads the same register
|
2619 |
|
|
MUST be moved. */
|
2620 |
|
|
already_moved[regno] = 1;
|
2621 |
|
|
|
2622 |
|
|
/* This reg has been moved out of one loop. */
|
2623 |
|
|
regs->array[regno].moved_once = 1;
|
2624 |
|
|
|
2625 |
|
|
/* The reg set here is now invariant. */
|
2626 |
|
|
if (! m->partial)
|
2627 |
|
|
{
|
2628 |
|
|
int i;
|
2629 |
|
|
for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
|
2630 |
|
|
regs->array[regno+i].set_in_loop = 0;
|
2631 |
|
|
}
|
2632 |
|
|
|
2633 |
|
|
/* Change the length-of-life info for the register
|
2634 |
|
|
to say it lives at least the full length of this loop.
|
2635 |
|
|
This will help guide optimizations in outer loops. */
|
2636 |
|
|
|
2637 |
|
|
if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
|
2638 |
|
|
/* This is the old insn before all the moved insns.
|
2639 |
|
|
We can't use the moved insn because it is out of range
|
2640 |
|
|
in uid_luid. Only the old insns have luids. */
|
2641 |
|
|
REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
|
2642 |
|
|
if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
|
2643 |
|
|
REGNO_LAST_UID (regno) = INSN_UID (loop_end);
|
2644 |
|
|
}
|
2645 |
|
|
|
2646 |
|
|
/* Combine with this moved insn any other matching movables. */
|
2647 |
|
|
|
2648 |
|
|
if (! m->partial)
|
2649 |
|
|
for (m1 = movables->head; m1; m1 = m1->next)
|
2650 |
|
|
if (m1->match == m)
|
2651 |
|
|
{
|
2652 |
|
|
rtx temp;
|
2653 |
|
|
|
2654 |
|
|
reg_map[m1->regno] = m->set_dest;
|
2655 |
|
|
|
2656 |
|
|
/* Get rid of the matching insn
|
2657 |
|
|
and prevent further processing of it. */
|
2658 |
|
|
m1->done = 1;
|
2659 |
|
|
|
2660 |
|
|
/* If library call, delete all insns. */
|
2661 |
|
|
if ((temp = find_reg_note (m1->insn, REG_RETVAL,
|
2662 |
|
|
NULL_RTX)))
|
2663 |
|
|
delete_insn_chain (XEXP (temp, 0), m1->insn);
|
2664 |
|
|
else
|
2665 |
|
|
delete_insn (m1->insn);
|
2666 |
|
|
|
2667 |
|
|
/* Any other movable that loads the same register
|
2668 |
|
|
MUST be moved. */
|
2669 |
|
|
already_moved[m1->regno] = 1;
|
2670 |
|
|
|
2671 |
|
|
/* The reg merged here is now invariant,
|
2672 |
|
|
if the reg it matches is invariant. */
|
2673 |
|
|
if (! m->partial)
|
2674 |
|
|
{
|
2675 |
|
|
int i;
|
2676 |
|
|
for (i = 0;
|
2677 |
|
|
i < LOOP_REGNO_NREGS (regno, m1->set_dest);
|
2678 |
|
|
i++)
|
2679 |
|
|
regs->array[m1->regno+i].set_in_loop = 0;
|
2680 |
|
|
}
|
2681 |
|
|
}
|
2682 |
|
|
}
|
2683 |
|
|
else if (loop_dump_stream)
|
2684 |
|
|
fprintf (loop_dump_stream, "not desirable");
|
2685 |
|
|
}
|
2686 |
|
|
else if (loop_dump_stream && !m->match)
|
2687 |
|
|
fprintf (loop_dump_stream, "not safe");
|
2688 |
|
|
|
2689 |
|
|
if (loop_dump_stream)
|
2690 |
|
|
fprintf (loop_dump_stream, "\n");
|
2691 |
|
|
}
|
2692 |
|
|
|
2693 |
|
|
if (new_start == 0)
|
2694 |
|
|
new_start = loop_start;
|
2695 |
|
|
|
2696 |
|
|
/* Go through all the instructions in the loop, making
|
2697 |
|
|
all the register substitutions scheduled in REG_MAP. */
|
2698 |
|
|
for (p = new_start; p != loop_end; p = NEXT_INSN (p))
|
2699 |
|
|
if (INSN_P (p))
|
2700 |
|
|
{
|
2701 |
|
|
replace_regs (PATTERN (p), reg_map, nregs, 0);
|
2702 |
|
|
replace_regs (REG_NOTES (p), reg_map, nregs, 0);
|
2703 |
|
|
INSN_CODE (p) = -1;
|
2704 |
|
|
}
|
2705 |
|
|
|
2706 |
|
|
/* Clean up. */
|
2707 |
|
|
free (reg_map);
|
2708 |
|
|
free (already_moved);
|
2709 |
|
|
}
|
2710 |
|
|
|
2711 |
|
|
|
2712 |
|
|
static void
|
2713 |
|
|
loop_movables_add (struct loop_movables *movables, struct movable *m)
|
2714 |
|
|
{
|
2715 |
|
|
if (movables->head == 0)
|
2716 |
|
|
movables->head = m;
|
2717 |
|
|
else
|
2718 |
|
|
movables->last->next = m;
|
2719 |
|
|
movables->last = m;
|
2720 |
|
|
}
|
2721 |
|
|
|
2722 |
|
|
|
2723 |
|
|
static void
|
2724 |
|
|
loop_movables_free (struct loop_movables *movables)
|
2725 |
|
|
{
|
2726 |
|
|
struct movable *m;
|
2727 |
|
|
struct movable *m_next;
|
2728 |
|
|
|
2729 |
|
|
for (m = movables->head; m; m = m_next)
|
2730 |
|
|
{
|
2731 |
|
|
m_next = m->next;
|
2732 |
|
|
free (m);
|
2733 |
|
|
}
|
2734 |
|
|
}
|
2735 |
|
|
|
2736 |
|
|
#if 0
|
2737 |
|
|
/* Scan X and replace the address of any MEM in it with ADDR.
|
2738 |
|
|
REG is the address that MEM should have before the replacement. */
|
2739 |
|
|
|
2740 |
|
|
static void
|
2741 |
|
|
replace_call_address (rtx x, rtx reg, rtx addr)
|
2742 |
|
|
{
|
2743 |
|
|
enum rtx_code code;
|
2744 |
|
|
int i;
|
2745 |
|
|
const char *fmt;
|
2746 |
|
|
|
2747 |
|
|
if (x == 0)
|
2748 |
|
|
return;
|
2749 |
|
|
code = GET_CODE (x);
|
2750 |
|
|
switch (code)
|
2751 |
|
|
{
|
2752 |
|
|
case PC:
|
2753 |
|
|
case CC0:
|
2754 |
|
|
case CONST_INT:
|
2755 |
|
|
case CONST_DOUBLE:
|
2756 |
|
|
case CONST:
|
2757 |
|
|
case SYMBOL_REF:
|
2758 |
|
|
case LABEL_REF:
|
2759 |
|
|
case REG:
|
2760 |
|
|
return;
|
2761 |
|
|
|
2762 |
|
|
case SET:
|
2763 |
|
|
/* Short cut for very common case. */
|
2764 |
|
|
replace_call_address (XEXP (x, 1), reg, addr);
|
2765 |
|
|
return;
|
2766 |
|
|
|
2767 |
|
|
case CALL:
|
2768 |
|
|
/* Short cut for very common case. */
|
2769 |
|
|
replace_call_address (XEXP (x, 0), reg, addr);
|
2770 |
|
|
return;
|
2771 |
|
|
|
2772 |
|
|
case MEM:
|
2773 |
|
|
/* If this MEM uses a reg other than the one we expected,
|
2774 |
|
|
something is wrong. */
|
2775 |
|
|
gcc_assert (XEXP (x, 0) == reg);
|
2776 |
|
|
XEXP (x, 0) = addr;
|
2777 |
|
|
return;
|
2778 |
|
|
|
2779 |
|
|
default:
|
2780 |
|
|
break;
|
2781 |
|
|
}
|
2782 |
|
|
|
2783 |
|
|
fmt = GET_RTX_FORMAT (code);
|
2784 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
2785 |
|
|
{
|
2786 |
|
|
if (fmt[i] == 'e')
|
2787 |
|
|
replace_call_address (XEXP (x, i), reg, addr);
|
2788 |
|
|
else if (fmt[i] == 'E')
|
2789 |
|
|
{
|
2790 |
|
|
int j;
|
2791 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
2792 |
|
|
replace_call_address (XVECEXP (x, i, j), reg, addr);
|
2793 |
|
|
}
|
2794 |
|
|
}
|
2795 |
|
|
}
|
2796 |
|
|
#endif
|
2797 |
|
|
|
2798 |
|
|
/* Return the number of memory refs to addresses that vary
|
2799 |
|
|
in the rtx X. */
|
2800 |
|
|
|
2801 |
|
|
static int
|
2802 |
|
|
count_nonfixed_reads (const struct loop *loop, rtx x)
|
2803 |
|
|
{
|
2804 |
|
|
enum rtx_code code;
|
2805 |
|
|
int i;
|
2806 |
|
|
const char *fmt;
|
2807 |
|
|
int value;
|
2808 |
|
|
|
2809 |
|
|
if (x == 0)
|
2810 |
|
|
return 0;
|
2811 |
|
|
|
2812 |
|
|
code = GET_CODE (x);
|
2813 |
|
|
switch (code)
|
2814 |
|
|
{
|
2815 |
|
|
case PC:
|
2816 |
|
|
case CC0:
|
2817 |
|
|
case CONST_INT:
|
2818 |
|
|
case CONST_DOUBLE:
|
2819 |
|
|
case CONST:
|
2820 |
|
|
case SYMBOL_REF:
|
2821 |
|
|
case LABEL_REF:
|
2822 |
|
|
case REG:
|
2823 |
|
|
return 0;
|
2824 |
|
|
|
2825 |
|
|
case MEM:
|
2826 |
|
|
return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
|
2827 |
|
|
+ count_nonfixed_reads (loop, XEXP (x, 0)));
|
2828 |
|
|
|
2829 |
|
|
default:
|
2830 |
|
|
break;
|
2831 |
|
|
}
|
2832 |
|
|
|
2833 |
|
|
value = 0;
|
2834 |
|
|
fmt = GET_RTX_FORMAT (code);
|
2835 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
2836 |
|
|
{
|
2837 |
|
|
if (fmt[i] == 'e')
|
2838 |
|
|
value += count_nonfixed_reads (loop, XEXP (x, i));
|
2839 |
|
|
if (fmt[i] == 'E')
|
2840 |
|
|
{
|
2841 |
|
|
int j;
|
2842 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
2843 |
|
|
value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
|
2844 |
|
|
}
|
2845 |
|
|
}
|
2846 |
|
|
return value;
|
2847 |
|
|
}
|
2848 |
|
|
|
2849 |
|
|
/* Scan a loop setting the elements `loops_enclosed',
|
2850 |
|
|
`has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
|
2851 |
|
|
`unknown_address_altered', `unknown_constant_address_altered', and
|
2852 |
|
|
`num_mem_sets' in LOOP. Also, fill in the array `mems' and the
|
2853 |
|
|
list `store_mems' in LOOP. */
|
2854 |
|
|
|
2855 |
|
|
static void
|
2856 |
|
|
prescan_loop (struct loop *loop)
|
2857 |
|
|
{
|
2858 |
|
|
int level = 1;
|
2859 |
|
|
rtx insn;
|
2860 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
2861 |
|
|
rtx start = loop->start;
|
2862 |
|
|
rtx end = loop->end;
|
2863 |
|
|
/* The label after END. Jumping here is just like falling off the
|
2864 |
|
|
end of the loop. We use next_nonnote_insn instead of next_label
|
2865 |
|
|
as a hedge against the (pathological) case where some actual insn
|
2866 |
|
|
might end up between the two. */
|
2867 |
|
|
rtx exit_target = next_nonnote_insn (end);
|
2868 |
|
|
|
2869 |
|
|
loop_info->has_indirect_jump = indirect_jump_in_function;
|
2870 |
|
|
loop_info->pre_header_has_call = 0;
|
2871 |
|
|
loop_info->has_call = 0;
|
2872 |
|
|
loop_info->has_nonconst_call = 0;
|
2873 |
|
|
loop_info->has_prefetch = 0;
|
2874 |
|
|
loop_info->has_volatile = 0;
|
2875 |
|
|
loop_info->has_tablejump = 0;
|
2876 |
|
|
loop_info->has_multiple_exit_targets = 0;
|
2877 |
|
|
loop->level = 1;
|
2878 |
|
|
|
2879 |
|
|
loop_info->unknown_address_altered = 0;
|
2880 |
|
|
loop_info->unknown_constant_address_altered = 0;
|
2881 |
|
|
loop_info->store_mems = NULL_RTX;
|
2882 |
|
|
loop_info->first_loop_store_insn = NULL_RTX;
|
2883 |
|
|
loop_info->mems_idx = 0;
|
2884 |
|
|
loop_info->num_mem_sets = 0;
|
2885 |
|
|
|
2886 |
|
|
for (insn = start; insn && !LABEL_P (insn);
|
2887 |
|
|
insn = PREV_INSN (insn))
|
2888 |
|
|
{
|
2889 |
|
|
if (CALL_P (insn))
|
2890 |
|
|
{
|
2891 |
|
|
loop_info->pre_header_has_call = 1;
|
2892 |
|
|
break;
|
2893 |
|
|
}
|
2894 |
|
|
}
|
2895 |
|
|
|
2896 |
|
|
for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
|
2897 |
|
|
insn = NEXT_INSN (insn))
|
2898 |
|
|
{
|
2899 |
|
|
switch (GET_CODE (insn))
|
2900 |
|
|
{
|
2901 |
|
|
case NOTE:
|
2902 |
|
|
if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
|
2903 |
|
|
{
|
2904 |
|
|
++level;
|
2905 |
|
|
/* Count number of loops contained in this one. */
|
2906 |
|
|
loop->level++;
|
2907 |
|
|
}
|
2908 |
|
|
else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
|
2909 |
|
|
--level;
|
2910 |
|
|
break;
|
2911 |
|
|
|
2912 |
|
|
case CALL_INSN:
|
2913 |
|
|
if (! CONST_OR_PURE_CALL_P (insn))
|
2914 |
|
|
{
|
2915 |
|
|
loop_info->unknown_address_altered = 1;
|
2916 |
|
|
loop_info->has_nonconst_call = 1;
|
2917 |
|
|
}
|
2918 |
|
|
else if (pure_call_p (insn))
|
2919 |
|
|
loop_info->has_nonconst_call = 1;
|
2920 |
|
|
loop_info->has_call = 1;
|
2921 |
|
|
if (can_throw_internal (insn))
|
2922 |
|
|
loop_info->has_multiple_exit_targets = 1;
|
2923 |
|
|
break;
|
2924 |
|
|
|
2925 |
|
|
case JUMP_INSN:
|
2926 |
|
|
if (! loop_info->has_multiple_exit_targets)
|
2927 |
|
|
{
|
2928 |
|
|
rtx set = pc_set (insn);
|
2929 |
|
|
|
2930 |
|
|
if (set)
|
2931 |
|
|
{
|
2932 |
|
|
rtx src = SET_SRC (set);
|
2933 |
|
|
rtx label1, label2;
|
2934 |
|
|
|
2935 |
|
|
if (GET_CODE (src) == IF_THEN_ELSE)
|
2936 |
|
|
{
|
2937 |
|
|
label1 = XEXP (src, 1);
|
2938 |
|
|
label2 = XEXP (src, 2);
|
2939 |
|
|
}
|
2940 |
|
|
else
|
2941 |
|
|
{
|
2942 |
|
|
label1 = src;
|
2943 |
|
|
label2 = NULL_RTX;
|
2944 |
|
|
}
|
2945 |
|
|
|
2946 |
|
|
do
|
2947 |
|
|
{
|
2948 |
|
|
if (label1 && label1 != pc_rtx)
|
2949 |
|
|
{
|
2950 |
|
|
if (GET_CODE (label1) != LABEL_REF)
|
2951 |
|
|
{
|
2952 |
|
|
/* Something tricky. */
|
2953 |
|
|
loop_info->has_multiple_exit_targets = 1;
|
2954 |
|
|
break;
|
2955 |
|
|
}
|
2956 |
|
|
else if (XEXP (label1, 0) != exit_target
|
2957 |
|
|
&& LABEL_OUTSIDE_LOOP_P (label1))
|
2958 |
|
|
{
|
2959 |
|
|
/* A jump outside the current loop. */
|
2960 |
|
|
loop_info->has_multiple_exit_targets = 1;
|
2961 |
|
|
break;
|
2962 |
|
|
}
|
2963 |
|
|
}
|
2964 |
|
|
|
2965 |
|
|
label1 = label2;
|
2966 |
|
|
label2 = NULL_RTX;
|
2967 |
|
|
}
|
2968 |
|
|
while (label1);
|
2969 |
|
|
}
|
2970 |
|
|
else
|
2971 |
|
|
{
|
2972 |
|
|
/* A return, or something tricky. */
|
2973 |
|
|
loop_info->has_multiple_exit_targets = 1;
|
2974 |
|
|
}
|
2975 |
|
|
}
|
2976 |
|
|
/* Fall through. */
|
2977 |
|
|
|
2978 |
|
|
case INSN:
|
2979 |
|
|
if (volatile_refs_p (PATTERN (insn)))
|
2980 |
|
|
loop_info->has_volatile = 1;
|
2981 |
|
|
|
2982 |
|
|
if (JUMP_P (insn)
|
2983 |
|
|
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|
2984 |
|
|
|| GET_CODE (PATTERN (insn)) == ADDR_VEC))
|
2985 |
|
|
loop_info->has_tablejump = 1;
|
2986 |
|
|
|
2987 |
|
|
note_stores (PATTERN (insn), note_addr_stored, loop_info);
|
2988 |
|
|
if (! loop_info->first_loop_store_insn && loop_info->store_mems)
|
2989 |
|
|
loop_info->first_loop_store_insn = insn;
|
2990 |
|
|
|
2991 |
|
|
if (flag_non_call_exceptions && can_throw_internal (insn))
|
2992 |
|
|
loop_info->has_multiple_exit_targets = 1;
|
2993 |
|
|
break;
|
2994 |
|
|
|
2995 |
|
|
default:
|
2996 |
|
|
break;
|
2997 |
|
|
}
|
2998 |
|
|
}
|
2999 |
|
|
|
3000 |
|
|
/* Now, rescan the loop, setting up the LOOP_MEMS array. */
|
3001 |
|
|
if (/* An exception thrown by a called function might land us
|
3002 |
|
|
anywhere. */
|
3003 |
|
|
! loop_info->has_nonconst_call
|
3004 |
|
|
/* We don't want loads for MEMs moved to a location before the
|
3005 |
|
|
one at which their stack memory becomes allocated. (Note
|
3006 |
|
|
that this is not a problem for malloc, etc., since those
|
3007 |
|
|
require actual function calls. */
|
3008 |
|
|
&& ! current_function_calls_alloca
|
3009 |
|
|
/* There are ways to leave the loop other than falling off the
|
3010 |
|
|
end. */
|
3011 |
|
|
&& ! loop_info->has_multiple_exit_targets)
|
3012 |
|
|
for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
|
3013 |
|
|
insn = NEXT_INSN (insn))
|
3014 |
|
|
for_each_rtx (&insn, insert_loop_mem, loop_info);
|
3015 |
|
|
|
3016 |
|
|
/* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
|
3017 |
|
|
that loop_invariant_p and load_mems can use true_dependence
|
3018 |
|
|
to determine what is really clobbered. */
|
3019 |
|
|
if (loop_info->unknown_address_altered)
|
3020 |
|
|
{
|
3021 |
|
|
rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
|
3022 |
|
|
|
3023 |
|
|
loop_info->store_mems
|
3024 |
|
|
= gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
|
3025 |
|
|
}
|
3026 |
|
|
if (loop_info->unknown_constant_address_altered)
|
3027 |
|
|
{
|
3028 |
|
|
rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
|
3029 |
|
|
MEM_READONLY_P (mem) = 1;
|
3030 |
|
|
loop_info->store_mems
|
3031 |
|
|
= gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
|
3032 |
|
|
}
|
3033 |
|
|
}
|
3034 |
|
|
|
3035 |
|
|
/* Invalidate all loops containing LABEL. */
|
3036 |
|
|
|
3037 |
|
|
static void
|
3038 |
|
|
invalidate_loops_containing_label (rtx label)
|
3039 |
|
|
{
|
3040 |
|
|
struct loop *loop;
|
3041 |
|
|
for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
|
3042 |
|
|
loop->invalid = 1;
|
3043 |
|
|
}
|
3044 |
|
|
|
3045 |
|
|
/* Scan the function looking for loops. Record the start and end of each loop.
|
3046 |
|
|
Also mark as invalid loops any loops that contain a setjmp or are branched
|
3047 |
|
|
to from outside the loop. */
|
3048 |
|
|
|
3049 |
|
|
static void
|
3050 |
|
|
find_and_verify_loops (rtx f, struct loops *loops)
|
3051 |
|
|
{
|
3052 |
|
|
rtx insn;
|
3053 |
|
|
rtx label;
|
3054 |
|
|
int num_loops;
|
3055 |
|
|
struct loop *current_loop;
|
3056 |
|
|
struct loop *next_loop;
|
3057 |
|
|
struct loop *loop;
|
3058 |
|
|
|
3059 |
|
|
num_loops = loops->num;
|
3060 |
|
|
|
3061 |
|
|
compute_luids (f, NULL_RTX, 0);
|
3062 |
|
|
|
3063 |
|
|
/* If there are jumps to undefined labels,
|
3064 |
|
|
treat them as jumps out of any/all loops.
|
3065 |
|
|
This also avoids writing past end of tables when there are no loops. */
|
3066 |
|
|
uid_loop[0] = NULL;
|
3067 |
|
|
|
3068 |
|
|
/* Find boundaries of loops, mark which loops are contained within
|
3069 |
|
|
loops, and invalidate loops that have setjmp. */
|
3070 |
|
|
|
3071 |
|
|
num_loops = 0;
|
3072 |
|
|
current_loop = NULL;
|
3073 |
|
|
for (insn = f; insn; insn = NEXT_INSN (insn))
|
3074 |
|
|
{
|
3075 |
|
|
if (NOTE_P (insn))
|
3076 |
|
|
switch (NOTE_LINE_NUMBER (insn))
|
3077 |
|
|
{
|
3078 |
|
|
case NOTE_INSN_LOOP_BEG:
|
3079 |
|
|
next_loop = loops->array + num_loops;
|
3080 |
|
|
next_loop->num = num_loops;
|
3081 |
|
|
num_loops++;
|
3082 |
|
|
next_loop->start = insn;
|
3083 |
|
|
next_loop->outer = current_loop;
|
3084 |
|
|
current_loop = next_loop;
|
3085 |
|
|
break;
|
3086 |
|
|
|
3087 |
|
|
case NOTE_INSN_LOOP_END:
|
3088 |
|
|
gcc_assert (current_loop);
|
3089 |
|
|
|
3090 |
|
|
current_loop->end = insn;
|
3091 |
|
|
current_loop = current_loop->outer;
|
3092 |
|
|
break;
|
3093 |
|
|
|
3094 |
|
|
default:
|
3095 |
|
|
break;
|
3096 |
|
|
}
|
3097 |
|
|
|
3098 |
|
|
if (CALL_P (insn)
|
3099 |
|
|
&& find_reg_note (insn, REG_SETJMP, NULL))
|
3100 |
|
|
{
|
3101 |
|
|
/* In this case, we must invalidate our current loop and any
|
3102 |
|
|
enclosing loop. */
|
3103 |
|
|
for (loop = current_loop; loop; loop = loop->outer)
|
3104 |
|
|
{
|
3105 |
|
|
loop->invalid = 1;
|
3106 |
|
|
if (loop_dump_stream)
|
3107 |
|
|
fprintf (loop_dump_stream,
|
3108 |
|
|
"\nLoop at %d ignored due to setjmp.\n",
|
3109 |
|
|
INSN_UID (loop->start));
|
3110 |
|
|
}
|
3111 |
|
|
}
|
3112 |
|
|
|
3113 |
|
|
/* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
|
3114 |
|
|
enclosing loop, but this doesn't matter. */
|
3115 |
|
|
uid_loop[INSN_UID (insn)] = current_loop;
|
3116 |
|
|
}
|
3117 |
|
|
|
3118 |
|
|
/* Any loop containing a label used in an initializer must be invalidated,
|
3119 |
|
|
because it can be jumped into from anywhere. */
|
3120 |
|
|
for (label = forced_labels; label; label = XEXP (label, 1))
|
3121 |
|
|
invalidate_loops_containing_label (XEXP (label, 0));
|
3122 |
|
|
|
3123 |
|
|
/* Any loop containing a label used for an exception handler must be
|
3124 |
|
|
invalidated, because it can be jumped into from anywhere. */
|
3125 |
|
|
for_each_eh_label (invalidate_loops_containing_label);
|
3126 |
|
|
|
3127 |
|
|
/* Now scan all insn's in the function. If any JUMP_INSN branches into a
|
3128 |
|
|
loop that it is not contained within, that loop is marked invalid.
|
3129 |
|
|
If any INSN or CALL_INSN uses a label's address, then the loop containing
|
3130 |
|
|
that label is marked invalid, because it could be jumped into from
|
3131 |
|
|
anywhere.
|
3132 |
|
|
|
3133 |
|
|
Also look for blocks of code ending in an unconditional branch that
|
3134 |
|
|
exits the loop. If such a block is surrounded by a conditional
|
3135 |
|
|
branch around the block, move the block elsewhere (see below) and
|
3136 |
|
|
invert the jump to point to the code block. This may eliminate a
|
3137 |
|
|
label in our loop and will simplify processing by both us and a
|
3138 |
|
|
possible second cse pass. */
|
3139 |
|
|
|
3140 |
|
|
for (insn = f; insn; insn = NEXT_INSN (insn))
|
3141 |
|
|
if (INSN_P (insn))
|
3142 |
|
|
{
|
3143 |
|
|
struct loop *this_loop = uid_loop[INSN_UID (insn)];
|
3144 |
|
|
|
3145 |
|
|
if (NONJUMP_INSN_P (insn) || CALL_P (insn))
|
3146 |
|
|
{
|
3147 |
|
|
rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
|
3148 |
|
|
if (note)
|
3149 |
|
|
invalidate_loops_containing_label (XEXP (note, 0));
|
3150 |
|
|
}
|
3151 |
|
|
|
3152 |
|
|
if (!JUMP_P (insn))
|
3153 |
|
|
continue;
|
3154 |
|
|
|
3155 |
|
|
mark_loop_jump (PATTERN (insn), this_loop);
|
3156 |
|
|
|
3157 |
|
|
/* See if this is an unconditional branch outside the loop. */
|
3158 |
|
|
if (this_loop
|
3159 |
|
|
&& (GET_CODE (PATTERN (insn)) == RETURN
|
3160 |
|
|
|| (any_uncondjump_p (insn)
|
3161 |
|
|
&& onlyjump_p (insn)
|
3162 |
|
|
&& (uid_loop[INSN_UID (JUMP_LABEL (insn))]
|
3163 |
|
|
!= this_loop)))
|
3164 |
|
|
&& get_max_uid () < max_uid_for_loop)
|
3165 |
|
|
{
|
3166 |
|
|
rtx p;
|
3167 |
|
|
rtx our_next = next_real_insn (insn);
|
3168 |
|
|
rtx last_insn_to_move = NEXT_INSN (insn);
|
3169 |
|
|
struct loop *dest_loop;
|
3170 |
|
|
struct loop *outer_loop = NULL;
|
3171 |
|
|
|
3172 |
|
|
/* Go backwards until we reach the start of the loop, a label,
|
3173 |
|
|
or a JUMP_INSN. */
|
3174 |
|
|
for (p = PREV_INSN (insn);
|
3175 |
|
|
!LABEL_P (p)
|
3176 |
|
|
&& ! (NOTE_P (p)
|
3177 |
|
|
&& NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
|
3178 |
|
|
&& !JUMP_P (p);
|
3179 |
|
|
p = PREV_INSN (p))
|
3180 |
|
|
;
|
3181 |
|
|
|
3182 |
|
|
/* Check for the case where we have a jump to an inner nested
|
3183 |
|
|
loop, and do not perform the optimization in that case. */
|
3184 |
|
|
|
3185 |
|
|
if (JUMP_LABEL (insn))
|
3186 |
|
|
{
|
3187 |
|
|
dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
|
3188 |
|
|
if (dest_loop)
|
3189 |
|
|
{
|
3190 |
|
|
for (outer_loop = dest_loop; outer_loop;
|
3191 |
|
|
outer_loop = outer_loop->outer)
|
3192 |
|
|
if (outer_loop == this_loop)
|
3193 |
|
|
break;
|
3194 |
|
|
}
|
3195 |
|
|
}
|
3196 |
|
|
|
3197 |
|
|
/* Make sure that the target of P is within the current loop. */
|
3198 |
|
|
|
3199 |
|
|
if (JUMP_P (p) && JUMP_LABEL (p)
|
3200 |
|
|
&& uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
|
3201 |
|
|
outer_loop = this_loop;
|
3202 |
|
|
|
3203 |
|
|
/* If we stopped on a JUMP_INSN to the next insn after INSN,
|
3204 |
|
|
we have a block of code to try to move.
|
3205 |
|
|
|
3206 |
|
|
We look backward and then forward from the target of INSN
|
3207 |
|
|
to find a BARRIER at the same loop depth as the target.
|
3208 |
|
|
If we find such a BARRIER, we make a new label for the start
|
3209 |
|
|
of the block, invert the jump in P and point it to that label,
|
3210 |
|
|
and move the block of code to the spot we found. */
|
3211 |
|
|
|
3212 |
|
|
if (! outer_loop
|
3213 |
|
|
&& JUMP_P (p)
|
3214 |
|
|
&& JUMP_LABEL (p) != 0
|
3215 |
|
|
/* Just ignore jumps to labels that were never emitted.
|
3216 |
|
|
These always indicate compilation errors. */
|
3217 |
|
|
&& INSN_UID (JUMP_LABEL (p)) != 0
|
3218 |
|
|
&& any_condjump_p (p) && onlyjump_p (p)
|
3219 |
|
|
&& next_real_insn (JUMP_LABEL (p)) == our_next
|
3220 |
|
|
/* If it's not safe to move the sequence, then we
|
3221 |
|
|
mustn't try. */
|
3222 |
|
|
&& insns_safe_to_move_p (p, NEXT_INSN (insn),
|
3223 |
|
|
&last_insn_to_move))
|
3224 |
|
|
{
|
3225 |
|
|
rtx target
|
3226 |
|
|
= JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
|
3227 |
|
|
struct loop *target_loop = uid_loop[INSN_UID (target)];
|
3228 |
|
|
rtx loc, loc2;
|
3229 |
|
|
rtx tmp;
|
3230 |
|
|
|
3231 |
|
|
/* Search for possible garbage past the conditional jumps
|
3232 |
|
|
and look for the last barrier. */
|
3233 |
|
|
for (tmp = last_insn_to_move;
|
3234 |
|
|
tmp && !LABEL_P (tmp); tmp = NEXT_INSN (tmp))
|
3235 |
|
|
if (BARRIER_P (tmp))
|
3236 |
|
|
last_insn_to_move = tmp;
|
3237 |
|
|
|
3238 |
|
|
for (loc = target; loc; loc = PREV_INSN (loc))
|
3239 |
|
|
if (BARRIER_P (loc)
|
3240 |
|
|
/* Don't move things inside a tablejump. */
|
3241 |
|
|
&& ((loc2 = next_nonnote_insn (loc)) == 0
|
3242 |
|
|
|| !LABEL_P (loc2)
|
3243 |
|
|
|| (loc2 = next_nonnote_insn (loc2)) == 0
|
3244 |
|
|
|| !JUMP_P (loc2)
|
3245 |
|
|
|| (GET_CODE (PATTERN (loc2)) != ADDR_VEC
|
3246 |
|
|
&& GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
|
3247 |
|
|
&& uid_loop[INSN_UID (loc)] == target_loop)
|
3248 |
|
|
break;
|
3249 |
|
|
|
3250 |
|
|
if (loc == 0)
|
3251 |
|
|
for (loc = target; loc; loc = NEXT_INSN (loc))
|
3252 |
|
|
if (BARRIER_P (loc)
|
3253 |
|
|
/* Don't move things inside a tablejump. */
|
3254 |
|
|
&& ((loc2 = next_nonnote_insn (loc)) == 0
|
3255 |
|
|
|| !LABEL_P (loc2)
|
3256 |
|
|
|| (loc2 = next_nonnote_insn (loc2)) == 0
|
3257 |
|
|
|| !JUMP_P (loc2)
|
3258 |
|
|
|| (GET_CODE (PATTERN (loc2)) != ADDR_VEC
|
3259 |
|
|
&& GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
|
3260 |
|
|
&& uid_loop[INSN_UID (loc)] == target_loop)
|
3261 |
|
|
break;
|
3262 |
|
|
|
3263 |
|
|
if (loc)
|
3264 |
|
|
{
|
3265 |
|
|
rtx cond_label = JUMP_LABEL (p);
|
3266 |
|
|
rtx new_label = get_label_after (p);
|
3267 |
|
|
|
3268 |
|
|
/* Ensure our label doesn't go away. */
|
3269 |
|
|
LABEL_NUSES (cond_label)++;
|
3270 |
|
|
|
3271 |
|
|
/* Verify that uid_loop is large enough and that
|
3272 |
|
|
we can invert P. */
|
3273 |
|
|
if (invert_jump (p, new_label, 1))
|
3274 |
|
|
{
|
3275 |
|
|
rtx q, r;
|
3276 |
|
|
bool only_notes;
|
3277 |
|
|
|
3278 |
|
|
/* If no suitable BARRIER was found, create a suitable
|
3279 |
|
|
one before TARGET. Since TARGET is a fall through
|
3280 |
|
|
path, we'll need to insert a jump around our block
|
3281 |
|
|
and add a BARRIER before TARGET.
|
3282 |
|
|
|
3283 |
|
|
This creates an extra unconditional jump outside
|
3284 |
|
|
the loop. However, the benefits of removing rarely
|
3285 |
|
|
executed instructions from inside the loop usually
|
3286 |
|
|
outweighs the cost of the extra unconditional jump
|
3287 |
|
|
outside the loop. */
|
3288 |
|
|
if (loc == 0)
|
3289 |
|
|
{
|
3290 |
|
|
rtx temp;
|
3291 |
|
|
|
3292 |
|
|
temp = gen_jump (JUMP_LABEL (insn));
|
3293 |
|
|
temp = emit_jump_insn_before (temp, target);
|
3294 |
|
|
JUMP_LABEL (temp) = JUMP_LABEL (insn);
|
3295 |
|
|
LABEL_NUSES (JUMP_LABEL (insn))++;
|
3296 |
|
|
loc = emit_barrier_before (target);
|
3297 |
|
|
}
|
3298 |
|
|
|
3299 |
|
|
/* Include the BARRIER after INSN and copy the
|
3300 |
|
|
block after LOC. */
|
3301 |
|
|
only_notes = squeeze_notes (&new_label,
|
3302 |
|
|
&last_insn_to_move);
|
3303 |
|
|
gcc_assert (!only_notes);
|
3304 |
|
|
|
3305 |
|
|
reorder_insns (new_label, last_insn_to_move, loc);
|
3306 |
|
|
|
3307 |
|
|
/* All those insns are now in TARGET_LOOP. */
|
3308 |
|
|
for (q = new_label;
|
3309 |
|
|
q != NEXT_INSN (last_insn_to_move);
|
3310 |
|
|
q = NEXT_INSN (q))
|
3311 |
|
|
uid_loop[INSN_UID (q)] = target_loop;
|
3312 |
|
|
|
3313 |
|
|
/* The label jumped to by INSN is no longer a loop
|
3314 |
|
|
exit. Unless INSN does not have a label (e.g.,
|
3315 |
|
|
it is a RETURN insn), search loop->exit_labels
|
3316 |
|
|
to find its label_ref, and remove it. Also turn
|
3317 |
|
|
off LABEL_OUTSIDE_LOOP_P bit. */
|
3318 |
|
|
if (JUMP_LABEL (insn))
|
3319 |
|
|
{
|
3320 |
|
|
for (q = 0, r = this_loop->exit_labels;
|
3321 |
|
|
r;
|
3322 |
|
|
q = r, r = LABEL_NEXTREF (r))
|
3323 |
|
|
if (XEXP (r, 0) == JUMP_LABEL (insn))
|
3324 |
|
|
{
|
3325 |
|
|
LABEL_OUTSIDE_LOOP_P (r) = 0;
|
3326 |
|
|
if (q)
|
3327 |
|
|
LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
|
3328 |
|
|
else
|
3329 |
|
|
this_loop->exit_labels = LABEL_NEXTREF (r);
|
3330 |
|
|
break;
|
3331 |
|
|
}
|
3332 |
|
|
|
3333 |
|
|
for (loop = this_loop; loop && loop != target_loop;
|
3334 |
|
|
loop = loop->outer)
|
3335 |
|
|
loop->exit_count--;
|
3336 |
|
|
|
3337 |
|
|
/* If we didn't find it, then something is
|
3338 |
|
|
wrong. */
|
3339 |
|
|
gcc_assert (r);
|
3340 |
|
|
}
|
3341 |
|
|
|
3342 |
|
|
/* P is now a jump outside the loop, so it must be put
|
3343 |
|
|
in loop->exit_labels, and marked as such.
|
3344 |
|
|
The easiest way to do this is to just call
|
3345 |
|
|
mark_loop_jump again for P. */
|
3346 |
|
|
mark_loop_jump (PATTERN (p), this_loop);
|
3347 |
|
|
|
3348 |
|
|
/* If INSN now jumps to the insn after it,
|
3349 |
|
|
delete INSN. */
|
3350 |
|
|
if (JUMP_LABEL (insn) != 0
|
3351 |
|
|
&& (next_real_insn (JUMP_LABEL (insn))
|
3352 |
|
|
== next_real_insn (insn)))
|
3353 |
|
|
delete_related_insns (insn);
|
3354 |
|
|
}
|
3355 |
|
|
|
3356 |
|
|
/* Continue the loop after where the conditional
|
3357 |
|
|
branch used to jump, since the only branch insn
|
3358 |
|
|
in the block (if it still remains) is an inter-loop
|
3359 |
|
|
branch and hence needs no processing. */
|
3360 |
|
|
insn = NEXT_INSN (cond_label);
|
3361 |
|
|
|
3362 |
|
|
if (--LABEL_NUSES (cond_label) == 0)
|
3363 |
|
|
delete_related_insns (cond_label);
|
3364 |
|
|
|
3365 |
|
|
/* This loop will be continued with NEXT_INSN (insn). */
|
3366 |
|
|
insn = PREV_INSN (insn);
|
3367 |
|
|
}
|
3368 |
|
|
}
|
3369 |
|
|
}
|
3370 |
|
|
}
|
3371 |
|
|
}
|
3372 |
|
|
|
3373 |
|
|
/* If any label in X jumps to a loop different from LOOP_NUM and any of the
|
3374 |
|
|
loops it is contained in, mark the target loop invalid.
|
3375 |
|
|
|
3376 |
|
|
For speed, we assume that X is part of a pattern of a JUMP_INSN. */
|
3377 |
|
|
|
3378 |
|
|
static void
|
3379 |
|
|
mark_loop_jump (rtx x, struct loop *loop)
|
3380 |
|
|
{
|
3381 |
|
|
struct loop *dest_loop;
|
3382 |
|
|
struct loop *outer_loop;
|
3383 |
|
|
int i;
|
3384 |
|
|
|
3385 |
|
|
switch (GET_CODE (x))
|
3386 |
|
|
{
|
3387 |
|
|
case PC:
|
3388 |
|
|
case USE:
|
3389 |
|
|
case CLOBBER:
|
3390 |
|
|
case REG:
|
3391 |
|
|
case MEM:
|
3392 |
|
|
case CONST_INT:
|
3393 |
|
|
case CONST_DOUBLE:
|
3394 |
|
|
case RETURN:
|
3395 |
|
|
return;
|
3396 |
|
|
|
3397 |
|
|
case CONST:
|
3398 |
|
|
/* There could be a label reference in here. */
|
3399 |
|
|
mark_loop_jump (XEXP (x, 0), loop);
|
3400 |
|
|
return;
|
3401 |
|
|
|
3402 |
|
|
case PLUS:
|
3403 |
|
|
case MINUS:
|
3404 |
|
|
case MULT:
|
3405 |
|
|
mark_loop_jump (XEXP (x, 0), loop);
|
3406 |
|
|
mark_loop_jump (XEXP (x, 1), loop);
|
3407 |
|
|
return;
|
3408 |
|
|
|
3409 |
|
|
case LO_SUM:
|
3410 |
|
|
/* This may refer to a LABEL_REF or SYMBOL_REF. */
|
3411 |
|
|
mark_loop_jump (XEXP (x, 1), loop);
|
3412 |
|
|
return;
|
3413 |
|
|
|
3414 |
|
|
case SIGN_EXTEND:
|
3415 |
|
|
case ZERO_EXTEND:
|
3416 |
|
|
mark_loop_jump (XEXP (x, 0), loop);
|
3417 |
|
|
return;
|
3418 |
|
|
|
3419 |
|
|
case LABEL_REF:
|
3420 |
|
|
dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
|
3421 |
|
|
|
3422 |
|
|
/* Link together all labels that branch outside the loop. This
|
3423 |
|
|
is used by final_[bg]iv_value and the loop unrolling code. Also
|
3424 |
|
|
mark this LABEL_REF so we know that this branch should predict
|
3425 |
|
|
false. */
|
3426 |
|
|
|
3427 |
|
|
/* A check to make sure the label is not in an inner nested loop,
|
3428 |
|
|
since this does not count as a loop exit. */
|
3429 |
|
|
if (dest_loop)
|
3430 |
|
|
{
|
3431 |
|
|
for (outer_loop = dest_loop; outer_loop;
|
3432 |
|
|
outer_loop = outer_loop->outer)
|
3433 |
|
|
if (outer_loop == loop)
|
3434 |
|
|
break;
|
3435 |
|
|
}
|
3436 |
|
|
else
|
3437 |
|
|
outer_loop = NULL;
|
3438 |
|
|
|
3439 |
|
|
if (loop && ! outer_loop)
|
3440 |
|
|
{
|
3441 |
|
|
LABEL_OUTSIDE_LOOP_P (x) = 1;
|
3442 |
|
|
LABEL_NEXTREF (x) = loop->exit_labels;
|
3443 |
|
|
loop->exit_labels = x;
|
3444 |
|
|
|
3445 |
|
|
for (outer_loop = loop;
|
3446 |
|
|
outer_loop && outer_loop != dest_loop;
|
3447 |
|
|
outer_loop = outer_loop->outer)
|
3448 |
|
|
outer_loop->exit_count++;
|
3449 |
|
|
}
|
3450 |
|
|
|
3451 |
|
|
/* If this is inside a loop, but not in the current loop or one enclosed
|
3452 |
|
|
by it, it invalidates at least one loop. */
|
3453 |
|
|
|
3454 |
|
|
if (! dest_loop)
|
3455 |
|
|
return;
|
3456 |
|
|
|
3457 |
|
|
/* We must invalidate every nested loop containing the target of this
|
3458 |
|
|
label, except those that also contain the jump insn. */
|
3459 |
|
|
|
3460 |
|
|
for (; dest_loop; dest_loop = dest_loop->outer)
|
3461 |
|
|
{
|
3462 |
|
|
/* Stop when we reach a loop that also contains the jump insn. */
|
3463 |
|
|
for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
|
3464 |
|
|
if (dest_loop == outer_loop)
|
3465 |
|
|
return;
|
3466 |
|
|
|
3467 |
|
|
/* If we get here, we know we need to invalidate a loop. */
|
3468 |
|
|
if (loop_dump_stream && ! dest_loop->invalid)
|
3469 |
|
|
fprintf (loop_dump_stream,
|
3470 |
|
|
"\nLoop at %d ignored due to multiple entry points.\n",
|
3471 |
|
|
INSN_UID (dest_loop->start));
|
3472 |
|
|
|
3473 |
|
|
dest_loop->invalid = 1;
|
3474 |
|
|
}
|
3475 |
|
|
return;
|
3476 |
|
|
|
3477 |
|
|
case SET:
|
3478 |
|
|
/* If this is not setting pc, ignore. */
|
3479 |
|
|
if (SET_DEST (x) == pc_rtx)
|
3480 |
|
|
mark_loop_jump (SET_SRC (x), loop);
|
3481 |
|
|
return;
|
3482 |
|
|
|
3483 |
|
|
case IF_THEN_ELSE:
|
3484 |
|
|
mark_loop_jump (XEXP (x, 1), loop);
|
3485 |
|
|
mark_loop_jump (XEXP (x, 2), loop);
|
3486 |
|
|
return;
|
3487 |
|
|
|
3488 |
|
|
case PARALLEL:
|
3489 |
|
|
case ADDR_VEC:
|
3490 |
|
|
for (i = 0; i < XVECLEN (x, 0); i++)
|
3491 |
|
|
mark_loop_jump (XVECEXP (x, 0, i), loop);
|
3492 |
|
|
return;
|
3493 |
|
|
|
3494 |
|
|
case ADDR_DIFF_VEC:
|
3495 |
|
|
for (i = 0; i < XVECLEN (x, 1); i++)
|
3496 |
|
|
mark_loop_jump (XVECEXP (x, 1, i), loop);
|
3497 |
|
|
return;
|
3498 |
|
|
|
3499 |
|
|
default:
|
3500 |
|
|
/* Strictly speaking this is not a jump into the loop, only a possible
|
3501 |
|
|
jump out of the loop. However, we have no way to link the destination
|
3502 |
|
|
of this jump onto the list of exit labels. To be safe we mark this
|
3503 |
|
|
loop and any containing loops as invalid. */
|
3504 |
|
|
if (loop)
|
3505 |
|
|
{
|
3506 |
|
|
for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
|
3507 |
|
|
{
|
3508 |
|
|
if (loop_dump_stream && ! outer_loop->invalid)
|
3509 |
|
|
fprintf (loop_dump_stream,
|
3510 |
|
|
"\nLoop at %d ignored due to unknown exit jump.\n",
|
3511 |
|
|
INSN_UID (outer_loop->start));
|
3512 |
|
|
outer_loop->invalid = 1;
|
3513 |
|
|
}
|
3514 |
|
|
}
|
3515 |
|
|
return;
|
3516 |
|
|
}
|
3517 |
|
|
}
|
3518 |
|
|
|
3519 |
|
|
/* Return nonzero if there is a label in the range from
|
3520 |
|
|
insn INSN to and including the insn whose luid is END
|
3521 |
|
|
INSN must have an assigned luid (i.e., it must not have
|
3522 |
|
|
been previously created by loop.c). */
|
3523 |
|
|
|
3524 |
|
|
static int
|
3525 |
|
|
labels_in_range_p (rtx insn, int end)
|
3526 |
|
|
{
|
3527 |
|
|
while (insn && INSN_LUID (insn) <= end)
|
3528 |
|
|
{
|
3529 |
|
|
if (LABEL_P (insn))
|
3530 |
|
|
return 1;
|
3531 |
|
|
insn = NEXT_INSN (insn);
|
3532 |
|
|
}
|
3533 |
|
|
|
3534 |
|
|
return 0;
|
3535 |
|
|
}
|
3536 |
|
|
|
3537 |
|
|
/* Record that a memory reference X is being set. */
|
3538 |
|
|
|
3539 |
|
|
static void
|
3540 |
|
|
note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
|
3541 |
|
|
void *data ATTRIBUTE_UNUSED)
|
3542 |
|
|
{
|
3543 |
|
|
struct loop_info *loop_info = data;
|
3544 |
|
|
|
3545 |
|
|
if (x == 0 || !MEM_P (x))
|
3546 |
|
|
return;
|
3547 |
|
|
|
3548 |
|
|
/* Count number of memory writes.
|
3549 |
|
|
This affects heuristics in strength_reduce. */
|
3550 |
|
|
loop_info->num_mem_sets++;
|
3551 |
|
|
|
3552 |
|
|
/* BLKmode MEM means all memory is clobbered. */
|
3553 |
|
|
if (GET_MODE (x) == BLKmode)
|
3554 |
|
|
{
|
3555 |
|
|
if (MEM_READONLY_P (x))
|
3556 |
|
|
loop_info->unknown_constant_address_altered = 1;
|
3557 |
|
|
else
|
3558 |
|
|
loop_info->unknown_address_altered = 1;
|
3559 |
|
|
|
3560 |
|
|
return;
|
3561 |
|
|
}
|
3562 |
|
|
|
3563 |
|
|
loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
|
3564 |
|
|
loop_info->store_mems);
|
3565 |
|
|
}
|
3566 |
|
|
|
3567 |
|
|
/* X is a value modified by an INSN that references a biv inside a loop
|
3568 |
|
|
exit test (i.e., X is somehow related to the value of the biv). If X
|
3569 |
|
|
is a pseudo that is used more than once, then the biv is (effectively)
|
3570 |
|
|
used more than once. DATA is a pointer to a loop_regs structure. */
|
3571 |
|
|
|
3572 |
|
|
static void
|
3573 |
|
|
note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
|
3574 |
|
|
{
|
3575 |
|
|
struct loop_regs *regs = (struct loop_regs *) data;
|
3576 |
|
|
|
3577 |
|
|
if (x == 0)
|
3578 |
|
|
return;
|
3579 |
|
|
|
3580 |
|
|
while (GET_CODE (x) == STRICT_LOW_PART
|
3581 |
|
|
|| GET_CODE (x) == SIGN_EXTRACT
|
3582 |
|
|
|| GET_CODE (x) == ZERO_EXTRACT
|
3583 |
|
|
|| GET_CODE (x) == SUBREG)
|
3584 |
|
|
x = XEXP (x, 0);
|
3585 |
|
|
|
3586 |
|
|
if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
|
3587 |
|
|
return;
|
3588 |
|
|
|
3589 |
|
|
/* If we do not have usage information, or if we know the register
|
3590 |
|
|
is used more than once, note that fact for check_dbra_loop. */
|
3591 |
|
|
if (REGNO (x) >= max_reg_before_loop
|
3592 |
|
|
|| ! regs->array[REGNO (x)].single_usage
|
3593 |
|
|
|| regs->array[REGNO (x)].single_usage == const0_rtx)
|
3594 |
|
|
regs->multiple_uses = 1;
|
3595 |
|
|
}
|
3596 |
|
|
|
3597 |
|
|
/* Return nonzero if the rtx X is invariant over the current loop.
|
3598 |
|
|
|
3599 |
|
|
The value is 2 if we refer to something only conditionally invariant.
|
3600 |
|
|
|
3601 |
|
|
A memory ref is invariant if it is not volatile and does not conflict
|
3602 |
|
|
with anything stored in `loop_info->store_mems'. */
|
3603 |
|
|
|
3604 |
|
|
static int
|
3605 |
|
|
loop_invariant_p (const struct loop *loop, rtx x)
|
3606 |
|
|
{
|
3607 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
3608 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
3609 |
|
|
int i;
|
3610 |
|
|
enum rtx_code code;
|
3611 |
|
|
const char *fmt;
|
3612 |
|
|
int conditional = 0;
|
3613 |
|
|
rtx mem_list_entry;
|
3614 |
|
|
|
3615 |
|
|
if (x == 0)
|
3616 |
|
|
return 1;
|
3617 |
|
|
code = GET_CODE (x);
|
3618 |
|
|
switch (code)
|
3619 |
|
|
{
|
3620 |
|
|
case CONST_INT:
|
3621 |
|
|
case CONST_DOUBLE:
|
3622 |
|
|
case SYMBOL_REF:
|
3623 |
|
|
case CONST:
|
3624 |
|
|
return 1;
|
3625 |
|
|
|
3626 |
|
|
case LABEL_REF:
|
3627 |
|
|
return 1;
|
3628 |
|
|
|
3629 |
|
|
case PC:
|
3630 |
|
|
case CC0:
|
3631 |
|
|
case UNSPEC_VOLATILE:
|
3632 |
|
|
return 0;
|
3633 |
|
|
|
3634 |
|
|
case REG:
|
3635 |
|
|
if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
|
3636 |
|
|
|| x == arg_pointer_rtx || x == pic_offset_table_rtx)
|
3637 |
|
|
&& ! current_function_has_nonlocal_goto)
|
3638 |
|
|
return 1;
|
3639 |
|
|
|
3640 |
|
|
if (LOOP_INFO (loop)->has_call
|
3641 |
|
|
&& REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
|
3642 |
|
|
return 0;
|
3643 |
|
|
|
3644 |
|
|
/* Out-of-range regs can occur when we are called from unrolling.
|
3645 |
|
|
These registers created by the unroller are set in the loop,
|
3646 |
|
|
hence are never invariant.
|
3647 |
|
|
Other out-of-range regs can be generated by load_mems; those that
|
3648 |
|
|
are written to in the loop are not invariant, while those that are
|
3649 |
|
|
not written to are invariant. It would be easy for load_mems
|
3650 |
|
|
to set n_times_set correctly for these registers, however, there
|
3651 |
|
|
is no easy way to distinguish them from registers created by the
|
3652 |
|
|
unroller. */
|
3653 |
|
|
|
3654 |
|
|
if (REGNO (x) >= (unsigned) regs->num)
|
3655 |
|
|
return 0;
|
3656 |
|
|
|
3657 |
|
|
if (regs->array[REGNO (x)].set_in_loop < 0)
|
3658 |
|
|
return 2;
|
3659 |
|
|
|
3660 |
|
|
return regs->array[REGNO (x)].set_in_loop == 0;
|
3661 |
|
|
|
3662 |
|
|
case MEM:
|
3663 |
|
|
/* Volatile memory references must be rejected. Do this before
|
3664 |
|
|
checking for read-only items, so that volatile read-only items
|
3665 |
|
|
will be rejected also. */
|
3666 |
|
|
if (MEM_VOLATILE_P (x))
|
3667 |
|
|
return 0;
|
3668 |
|
|
|
3669 |
|
|
/* See if there is any dependence between a store and this load. */
|
3670 |
|
|
mem_list_entry = loop_info->store_mems;
|
3671 |
|
|
while (mem_list_entry)
|
3672 |
|
|
{
|
3673 |
|
|
if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
|
3674 |
|
|
x, rtx_varies_p))
|
3675 |
|
|
return 0;
|
3676 |
|
|
|
3677 |
|
|
mem_list_entry = XEXP (mem_list_entry, 1);
|
3678 |
|
|
}
|
3679 |
|
|
|
3680 |
|
|
/* It's not invalidated by a store in memory
|
3681 |
|
|
but we must still verify the address is invariant. */
|
3682 |
|
|
break;
|
3683 |
|
|
|
3684 |
|
|
case ASM_OPERANDS:
|
3685 |
|
|
/* Don't mess with insns declared volatile. */
|
3686 |
|
|
if (MEM_VOLATILE_P (x))
|
3687 |
|
|
return 0;
|
3688 |
|
|
break;
|
3689 |
|
|
|
3690 |
|
|
default:
|
3691 |
|
|
break;
|
3692 |
|
|
}
|
3693 |
|
|
|
3694 |
|
|
fmt = GET_RTX_FORMAT (code);
|
3695 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
3696 |
|
|
{
|
3697 |
|
|
if (fmt[i] == 'e')
|
3698 |
|
|
{
|
3699 |
|
|
int tem = loop_invariant_p (loop, XEXP (x, i));
|
3700 |
|
|
if (tem == 0)
|
3701 |
|
|
return 0;
|
3702 |
|
|
if (tem == 2)
|
3703 |
|
|
conditional = 1;
|
3704 |
|
|
}
|
3705 |
|
|
else if (fmt[i] == 'E')
|
3706 |
|
|
{
|
3707 |
|
|
int j;
|
3708 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
3709 |
|
|
{
|
3710 |
|
|
int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
|
3711 |
|
|
if (tem == 0)
|
3712 |
|
|
return 0;
|
3713 |
|
|
if (tem == 2)
|
3714 |
|
|
conditional = 1;
|
3715 |
|
|
}
|
3716 |
|
|
|
3717 |
|
|
}
|
3718 |
|
|
}
|
3719 |
|
|
|
3720 |
|
|
return 1 + conditional;
|
3721 |
|
|
}
|
3722 |
|
|
|
3723 |
|
|
/* Return nonzero if all the insns in the loop that set REG
|
3724 |
|
|
are INSN and the immediately following insns,
|
3725 |
|
|
and if each of those insns sets REG in an invariant way
|
3726 |
|
|
(not counting uses of REG in them).
|
3727 |
|
|
|
3728 |
|
|
The value is 2 if some of these insns are only conditionally invariant.
|
3729 |
|
|
|
3730 |
|
|
We assume that INSN itself is the first set of REG
|
3731 |
|
|
and that its source is invariant. */
|
3732 |
|
|
|
3733 |
|
|
static int
|
3734 |
|
|
consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
|
3735 |
|
|
rtx insn)
|
3736 |
|
|
{
|
3737 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
3738 |
|
|
rtx p = insn;
|
3739 |
|
|
unsigned int regno = REGNO (reg);
|
3740 |
|
|
rtx temp;
|
3741 |
|
|
/* Number of sets we have to insist on finding after INSN. */
|
3742 |
|
|
int count = n_sets - 1;
|
3743 |
|
|
int old = regs->array[regno].set_in_loop;
|
3744 |
|
|
int value = 0;
|
3745 |
|
|
int this;
|
3746 |
|
|
|
3747 |
|
|
/* If N_SETS hit the limit, we can't rely on its value. */
|
3748 |
|
|
if (n_sets == 127)
|
3749 |
|
|
return 0;
|
3750 |
|
|
|
3751 |
|
|
regs->array[regno].set_in_loop = 0;
|
3752 |
|
|
|
3753 |
|
|
while (count > 0)
|
3754 |
|
|
{
|
3755 |
|
|
enum rtx_code code;
|
3756 |
|
|
rtx set;
|
3757 |
|
|
|
3758 |
|
|
p = NEXT_INSN (p);
|
3759 |
|
|
code = GET_CODE (p);
|
3760 |
|
|
|
3761 |
|
|
/* If library call, skip to end of it. */
|
3762 |
|
|
if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
|
3763 |
|
|
p = XEXP (temp, 0);
|
3764 |
|
|
|
3765 |
|
|
this = 0;
|
3766 |
|
|
if (code == INSN
|
3767 |
|
|
&& (set = single_set (p))
|
3768 |
|
|
&& REG_P (SET_DEST (set))
|
3769 |
|
|
&& REGNO (SET_DEST (set)) == regno)
|
3770 |
|
|
{
|
3771 |
|
|
this = loop_invariant_p (loop, SET_SRC (set));
|
3772 |
|
|
if (this != 0)
|
3773 |
|
|
value |= this;
|
3774 |
|
|
else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
|
3775 |
|
|
{
|
3776 |
|
|
/* If this is a libcall, then any invariant REG_EQUAL note is OK.
|
3777 |
|
|
If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
|
3778 |
|
|
notes are OK. */
|
3779 |
|
|
this = (CONSTANT_P (XEXP (temp, 0))
|
3780 |
|
|
|| (find_reg_note (p, REG_RETVAL, NULL_RTX)
|
3781 |
|
|
&& loop_invariant_p (loop, XEXP (temp, 0))));
|
3782 |
|
|
if (this != 0)
|
3783 |
|
|
value |= this;
|
3784 |
|
|
}
|
3785 |
|
|
}
|
3786 |
|
|
if (this != 0)
|
3787 |
|
|
count--;
|
3788 |
|
|
else if (code != NOTE)
|
3789 |
|
|
{
|
3790 |
|
|
regs->array[regno].set_in_loop = old;
|
3791 |
|
|
return 0;
|
3792 |
|
|
}
|
3793 |
|
|
}
|
3794 |
|
|
|
3795 |
|
|
regs->array[regno].set_in_loop = old;
|
3796 |
|
|
/* If loop_invariant_p ever returned 2, we return 2. */
|
3797 |
|
|
return 1 + (value & 2);
|
3798 |
|
|
}
|
3799 |
|
|
|
3800 |
|
|
/* Look at all uses (not sets) of registers in X. For each, if it is
|
3801 |
|
|
the single use, set USAGE[REGNO] to INSN; if there was a previous use in
|
3802 |
|
|
a different insn, set USAGE[REGNO] to const0_rtx. */
|
3803 |
|
|
|
3804 |
|
|
static void
|
3805 |
|
|
find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
|
3806 |
|
|
{
|
3807 |
|
|
enum rtx_code code = GET_CODE (x);
|
3808 |
|
|
const char *fmt = GET_RTX_FORMAT (code);
|
3809 |
|
|
int i, j;
|
3810 |
|
|
|
3811 |
|
|
if (code == REG)
|
3812 |
|
|
regs->array[REGNO (x)].single_usage
|
3813 |
|
|
= (regs->array[REGNO (x)].single_usage != 0
|
3814 |
|
|
&& regs->array[REGNO (x)].single_usage != insn)
|
3815 |
|
|
? const0_rtx : insn;
|
3816 |
|
|
|
3817 |
|
|
else if (code == SET)
|
3818 |
|
|
{
|
3819 |
|
|
/* Don't count SET_DEST if it is a REG; otherwise count things
|
3820 |
|
|
in SET_DEST because if a register is partially modified, it won't
|
3821 |
|
|
show up as a potential movable so we don't care how USAGE is set
|
3822 |
|
|
for it. */
|
3823 |
|
|
if (!REG_P (SET_DEST (x)))
|
3824 |
|
|
find_single_use_in_loop (regs, insn, SET_DEST (x));
|
3825 |
|
|
find_single_use_in_loop (regs, insn, SET_SRC (x));
|
3826 |
|
|
}
|
3827 |
|
|
else
|
3828 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
3829 |
|
|
{
|
3830 |
|
|
if (fmt[i] == 'e' && XEXP (x, i) != 0)
|
3831 |
|
|
find_single_use_in_loop (regs, insn, XEXP (x, i));
|
3832 |
|
|
else if (fmt[i] == 'E')
|
3833 |
|
|
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
3834 |
|
|
find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
|
3835 |
|
|
}
|
3836 |
|
|
}
|
3837 |
|
|
|
3838 |
|
|
/* Count and record any set in X which is contained in INSN. Update
|
3839 |
|
|
REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
|
3840 |
|
|
in X. */
|
3841 |
|
|
|
3842 |
|
|
static void
|
3843 |
|
|
count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
|
3844 |
|
|
{
|
3845 |
|
|
if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
|
3846 |
|
|
/* Don't move a reg that has an explicit clobber.
|
3847 |
|
|
It's not worth the pain to try to do it correctly. */
|
3848 |
|
|
regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
|
3849 |
|
|
|
3850 |
|
|
if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
|
3851 |
|
|
{
|
3852 |
|
|
rtx dest = SET_DEST (x);
|
3853 |
|
|
while (GET_CODE (dest) == SUBREG
|
3854 |
|
|
|| GET_CODE (dest) == ZERO_EXTRACT
|
3855 |
|
|
|| GET_CODE (dest) == STRICT_LOW_PART)
|
3856 |
|
|
dest = XEXP (dest, 0);
|
3857 |
|
|
if (REG_P (dest))
|
3858 |
|
|
{
|
3859 |
|
|
int i;
|
3860 |
|
|
int regno = REGNO (dest);
|
3861 |
|
|
for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
|
3862 |
|
|
{
|
3863 |
|
|
/* If this is the first setting of this reg
|
3864 |
|
|
in current basic block, and it was set before,
|
3865 |
|
|
it must be set in two basic blocks, so it cannot
|
3866 |
|
|
be moved out of the loop. */
|
3867 |
|
|
if (regs->array[regno].set_in_loop > 0
|
3868 |
|
|
&& last_set[regno] == 0)
|
3869 |
|
|
regs->array[regno+i].may_not_optimize = 1;
|
3870 |
|
|
/* If this is not first setting in current basic block,
|
3871 |
|
|
see if reg was used in between previous one and this.
|
3872 |
|
|
If so, neither one can be moved. */
|
3873 |
|
|
if (last_set[regno] != 0
|
3874 |
|
|
&& reg_used_between_p (dest, last_set[regno], insn))
|
3875 |
|
|
regs->array[regno+i].may_not_optimize = 1;
|
3876 |
|
|
if (regs->array[regno+i].set_in_loop < 127)
|
3877 |
|
|
++regs->array[regno+i].set_in_loop;
|
3878 |
|
|
last_set[regno+i] = insn;
|
3879 |
|
|
}
|
3880 |
|
|
}
|
3881 |
|
|
}
|
3882 |
|
|
}
|
3883 |
|
|
|
3884 |
|
|
/* Given a loop that is bounded by LOOP->START and LOOP->END and that
|
3885 |
|
|
is entered at LOOP->SCAN_START, return 1 if the register set in SET
|
3886 |
|
|
contained in insn INSN is used by any insn that precedes INSN in
|
3887 |
|
|
cyclic order starting from the loop entry point.
|
3888 |
|
|
|
3889 |
|
|
We don't want to use INSN_LUID here because if we restrict INSN to those
|
3890 |
|
|
that have a valid INSN_LUID, it means we cannot move an invariant out
|
3891 |
|
|
from an inner loop past two loops. */
|
3892 |
|
|
|
3893 |
|
|
static int
|
3894 |
|
|
loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
|
3895 |
|
|
{
|
3896 |
|
|
rtx reg = SET_DEST (set);
|
3897 |
|
|
rtx p;
|
3898 |
|
|
|
3899 |
|
|
/* Scan forward checking for register usage. If we hit INSN, we
|
3900 |
|
|
are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
|
3901 |
|
|
for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
|
3902 |
|
|
{
|
3903 |
|
|
if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
|
3904 |
|
|
return 1;
|
3905 |
|
|
|
3906 |
|
|
if (p == loop->end)
|
3907 |
|
|
p = loop->start;
|
3908 |
|
|
}
|
3909 |
|
|
|
3910 |
|
|
return 0;
|
3911 |
|
|
}
|
3912 |
|
|
|
3913 |
|
|
|
3914 |
|
|
/* Information we collect about arrays that we might want to prefetch. */
|
3915 |
|
|
struct prefetch_info
|
3916 |
|
|
{
|
3917 |
|
|
struct iv_class *class; /* Class this prefetch is based on. */
|
3918 |
|
|
struct induction *giv; /* GIV this prefetch is based on. */
|
3919 |
|
|
rtx base_address; /* Start prefetching from this address plus
|
3920 |
|
|
index. */
|
3921 |
|
|
HOST_WIDE_INT index;
|
3922 |
|
|
HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
|
3923 |
|
|
iteration. */
|
3924 |
|
|
unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
|
3925 |
|
|
prefetch area in one iteration. */
|
3926 |
|
|
unsigned int total_bytes; /* Total bytes loop will access in this block.
|
3927 |
|
|
This is set only for loops with known
|
3928 |
|
|
iteration counts and is 0xffffffff
|
3929 |
|
|
otherwise. */
|
3930 |
|
|
int prefetch_in_loop; /* Number of prefetch insns in loop. */
|
3931 |
|
|
int prefetch_before_loop; /* Number of prefetch insns before loop. */
|
3932 |
|
|
unsigned int write : 1; /* 1 for read/write prefetches. */
|
3933 |
|
|
};
|
3934 |
|
|
|
3935 |
|
|
/* Data used by check_store function. */
|
3936 |
|
|
struct check_store_data
|
3937 |
|
|
{
|
3938 |
|
|
rtx mem_address;
|
3939 |
|
|
int mem_write;
|
3940 |
|
|
};
|
3941 |
|
|
|
3942 |
|
|
static void check_store (rtx, rtx, void *);
|
3943 |
|
|
static void emit_prefetch_instructions (struct loop *);
|
3944 |
|
|
static int rtx_equal_for_prefetch_p (rtx, rtx);
|
3945 |
|
|
|
3946 |
|
|
/* Set mem_write when mem_address is found. Used as callback to
|
3947 |
|
|
note_stores. */
|
3948 |
|
|
static void
|
3949 |
|
|
check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
|
3950 |
|
|
{
|
3951 |
|
|
struct check_store_data *d = (struct check_store_data *) data;
|
3952 |
|
|
|
3953 |
|
|
if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
|
3954 |
|
|
d->mem_write = 1;
|
3955 |
|
|
}
|
3956 |
|
|
|
3957 |
|
|
/* Like rtx_equal_p, but attempts to swap commutative operands. This is
|
3958 |
|
|
important to get some addresses combined. Later more sophisticated
|
3959 |
|
|
transformations can be added when necessary.
|
3960 |
|
|
|
3961 |
|
|
??? Same trick with swapping operand is done at several other places.
|
3962 |
|
|
It can be nice to develop some common way to handle this. */
|
3963 |
|
|
|
3964 |
|
|
static int
|
3965 |
|
|
rtx_equal_for_prefetch_p (rtx x, rtx y)
|
3966 |
|
|
{
|
3967 |
|
|
int i;
|
3968 |
|
|
int j;
|
3969 |
|
|
enum rtx_code code = GET_CODE (x);
|
3970 |
|
|
const char *fmt;
|
3971 |
|
|
|
3972 |
|
|
if (x == y)
|
3973 |
|
|
return 1;
|
3974 |
|
|
if (code != GET_CODE (y))
|
3975 |
|
|
return 0;
|
3976 |
|
|
|
3977 |
|
|
if (GET_MODE (x) != GET_MODE (y))
|
3978 |
|
|
return 0;
|
3979 |
|
|
|
3980 |
|
|
switch (code)
|
3981 |
|
|
{
|
3982 |
|
|
case PC:
|
3983 |
|
|
case CC0:
|
3984 |
|
|
case CONST_INT:
|
3985 |
|
|
case CONST_DOUBLE:
|
3986 |
|
|
return 0;
|
3987 |
|
|
|
3988 |
|
|
case LABEL_REF:
|
3989 |
|
|
return XEXP (x, 0) == XEXP (y, 0);
|
3990 |
|
|
|
3991 |
|
|
default:
|
3992 |
|
|
break;
|
3993 |
|
|
}
|
3994 |
|
|
|
3995 |
|
|
if (COMMUTATIVE_ARITH_P (x))
|
3996 |
|
|
{
|
3997 |
|
|
return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
|
3998 |
|
|
&& rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
|
3999 |
|
|
|| (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
|
4000 |
|
|
&& rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
|
4001 |
|
|
}
|
4002 |
|
|
|
4003 |
|
|
/* Compare the elements. If any pair of corresponding elements fails to
|
4004 |
|
|
match, return 0 for the whole thing. */
|
4005 |
|
|
|
4006 |
|
|
fmt = GET_RTX_FORMAT (code);
|
4007 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
4008 |
|
|
{
|
4009 |
|
|
switch (fmt[i])
|
4010 |
|
|
{
|
4011 |
|
|
case 'w':
|
4012 |
|
|
if (XWINT (x, i) != XWINT (y, i))
|
4013 |
|
|
return 0;
|
4014 |
|
|
break;
|
4015 |
|
|
|
4016 |
|
|
case 'i':
|
4017 |
|
|
if (XINT (x, i) != XINT (y, i))
|
4018 |
|
|
return 0;
|
4019 |
|
|
break;
|
4020 |
|
|
|
4021 |
|
|
case 'E':
|
4022 |
|
|
/* Two vectors must have the same length. */
|
4023 |
|
|
if (XVECLEN (x, i) != XVECLEN (y, i))
|
4024 |
|
|
return 0;
|
4025 |
|
|
|
4026 |
|
|
/* And the corresponding elements must match. */
|
4027 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
4028 |
|
|
if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
|
4029 |
|
|
XVECEXP (y, i, j)) == 0)
|
4030 |
|
|
return 0;
|
4031 |
|
|
break;
|
4032 |
|
|
|
4033 |
|
|
case 'e':
|
4034 |
|
|
if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
|
4035 |
|
|
return 0;
|
4036 |
|
|
break;
|
4037 |
|
|
|
4038 |
|
|
case 's':
|
4039 |
|
|
if (strcmp (XSTR (x, i), XSTR (y, i)))
|
4040 |
|
|
return 0;
|
4041 |
|
|
break;
|
4042 |
|
|
|
4043 |
|
|
case 'u':
|
4044 |
|
|
/* These are just backpointers, so they don't matter. */
|
4045 |
|
|
break;
|
4046 |
|
|
|
4047 |
|
|
case '0':
|
4048 |
|
|
break;
|
4049 |
|
|
|
4050 |
|
|
/* It is believed that rtx's at this level will never
|
4051 |
|
|
contain anything but integers and other rtx's,
|
4052 |
|
|
except for within LABEL_REFs and SYMBOL_REFs. */
|
4053 |
|
|
default:
|
4054 |
|
|
gcc_unreachable ();
|
4055 |
|
|
}
|
4056 |
|
|
}
|
4057 |
|
|
return 1;
|
4058 |
|
|
}
|
4059 |
|
|
|
4060 |
|
|
/* Remove constant addition value from the expression X (when present)
|
4061 |
|
|
and return it. */
|
4062 |
|
|
|
4063 |
|
|
static HOST_WIDE_INT
|
4064 |
|
|
remove_constant_addition (rtx *x)
|
4065 |
|
|
{
|
4066 |
|
|
HOST_WIDE_INT addval = 0;
|
4067 |
|
|
rtx exp = *x;
|
4068 |
|
|
|
4069 |
|
|
/* Avoid clobbering a shared CONST expression. */
|
4070 |
|
|
if (GET_CODE (exp) == CONST)
|
4071 |
|
|
{
|
4072 |
|
|
if (GET_CODE (XEXP (exp, 0)) == PLUS
|
4073 |
|
|
&& GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
|
4074 |
|
|
&& GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
|
4075 |
|
|
{
|
4076 |
|
|
*x = XEXP (XEXP (exp, 0), 0);
|
4077 |
|
|
return INTVAL (XEXP (XEXP (exp, 0), 1));
|
4078 |
|
|
}
|
4079 |
|
|
return 0;
|
4080 |
|
|
}
|
4081 |
|
|
|
4082 |
|
|
if (GET_CODE (exp) == CONST_INT)
|
4083 |
|
|
{
|
4084 |
|
|
addval = INTVAL (exp);
|
4085 |
|
|
*x = const0_rtx;
|
4086 |
|
|
}
|
4087 |
|
|
|
4088 |
|
|
/* For plus expression recurse on ourself. */
|
4089 |
|
|
else if (GET_CODE (exp) == PLUS)
|
4090 |
|
|
{
|
4091 |
|
|
addval += remove_constant_addition (&XEXP (exp, 0));
|
4092 |
|
|
addval += remove_constant_addition (&XEXP (exp, 1));
|
4093 |
|
|
|
4094 |
|
|
/* In case our parameter was constant, remove extra zero from the
|
4095 |
|
|
expression. */
|
4096 |
|
|
if (XEXP (exp, 0) == const0_rtx)
|
4097 |
|
|
*x = XEXP (exp, 1);
|
4098 |
|
|
else if (XEXP (exp, 1) == const0_rtx)
|
4099 |
|
|
*x = XEXP (exp, 0);
|
4100 |
|
|
}
|
4101 |
|
|
|
4102 |
|
|
return addval;
|
4103 |
|
|
}
|
4104 |
|
|
|
4105 |
|
|
/* Attempt to identify accesses to arrays that are most likely to cause cache
|
4106 |
|
|
misses, and emit prefetch instructions a few prefetch blocks forward.
|
4107 |
|
|
|
4108 |
|
|
To detect the arrays we use the GIV information that was collected by the
|
4109 |
|
|
strength reduction pass.
|
4110 |
|
|
|
4111 |
|
|
The prefetch instructions are generated after the GIV information is done
|
4112 |
|
|
and before the strength reduction process. The new GIVs are injected into
|
4113 |
|
|
the strength reduction tables, so the prefetch addresses are optimized as
|
4114 |
|
|
well.
|
4115 |
|
|
|
4116 |
|
|
GIVs are split into base address, stride, and constant addition values.
|
4117 |
|
|
GIVs with the same address, stride and close addition values are combined
|
4118 |
|
|
into a single prefetch. Also writes to GIVs are detected, so that prefetch
|
4119 |
|
|
for write instructions can be used for the block we write to, on machines
|
4120 |
|
|
that support write prefetches.
|
4121 |
|
|
|
4122 |
|
|
Several heuristics are used to determine when to prefetch. They are
|
4123 |
|
|
controlled by defined symbols that can be overridden for each target. */
|
4124 |
|
|
|
4125 |
|
|
static void
|
4126 |
|
|
emit_prefetch_instructions (struct loop *loop)
|
4127 |
|
|
{
|
4128 |
|
|
int num_prefetches = 0;
|
4129 |
|
|
int num_real_prefetches = 0;
|
4130 |
|
|
int num_real_write_prefetches = 0;
|
4131 |
|
|
int num_prefetches_before = 0;
|
4132 |
|
|
int num_write_prefetches_before = 0;
|
4133 |
|
|
int ahead = 0;
|
4134 |
|
|
int i;
|
4135 |
|
|
struct iv_class *bl;
|
4136 |
|
|
struct induction *iv;
|
4137 |
|
|
struct prefetch_info info[MAX_PREFETCHES];
|
4138 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
4139 |
|
|
|
4140 |
|
|
if (!HAVE_prefetch || PREFETCH_BLOCK == 0)
|
4141 |
|
|
return;
|
4142 |
|
|
|
4143 |
|
|
/* Consider only loops w/o calls. When a call is done, the loop is probably
|
4144 |
|
|
slow enough to read the memory. */
|
4145 |
|
|
if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
|
4146 |
|
|
{
|
4147 |
|
|
if (loop_dump_stream)
|
4148 |
|
|
fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
|
4149 |
|
|
|
4150 |
|
|
return;
|
4151 |
|
|
}
|
4152 |
|
|
|
4153 |
|
|
/* Don't prefetch in loops known to have few iterations. */
|
4154 |
|
|
if (PREFETCH_NO_LOW_LOOPCNT
|
4155 |
|
|
&& LOOP_INFO (loop)->n_iterations
|
4156 |
|
|
&& LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
|
4157 |
|
|
{
|
4158 |
|
|
if (loop_dump_stream)
|
4159 |
|
|
fprintf (loop_dump_stream,
|
4160 |
|
|
"Prefetch: ignoring loop: not enough iterations.\n");
|
4161 |
|
|
return;
|
4162 |
|
|
}
|
4163 |
|
|
|
4164 |
|
|
/* Search all induction variables and pick those interesting for the prefetch
|
4165 |
|
|
machinery. */
|
4166 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
4167 |
|
|
{
|
4168 |
|
|
struct induction *biv = bl->biv, *biv1;
|
4169 |
|
|
int basestride = 0;
|
4170 |
|
|
|
4171 |
|
|
biv1 = biv;
|
4172 |
|
|
|
4173 |
|
|
/* Expect all BIVs to be executed in each iteration. This makes our
|
4174 |
|
|
analysis more conservative. */
|
4175 |
|
|
while (biv1)
|
4176 |
|
|
{
|
4177 |
|
|
/* Discard non-constant additions that we can't handle well yet, and
|
4178 |
|
|
BIVs that are executed multiple times; such BIVs ought to be
|
4179 |
|
|
handled in the nested loop. We accept not_every_iteration BIVs,
|
4180 |
|
|
since these only result in larger strides and make our
|
4181 |
|
|
heuristics more conservative. */
|
4182 |
|
|
if (GET_CODE (biv->add_val) != CONST_INT)
|
4183 |
|
|
{
|
4184 |
|
|
if (loop_dump_stream)
|
4185 |
|
|
{
|
4186 |
|
|
fprintf (loop_dump_stream,
|
4187 |
|
|
"Prefetch: ignoring biv %d: non-constant addition at insn %d:",
|
4188 |
|
|
REGNO (biv->src_reg), INSN_UID (biv->insn));
|
4189 |
|
|
print_rtl (loop_dump_stream, biv->add_val);
|
4190 |
|
|
fprintf (loop_dump_stream, "\n");
|
4191 |
|
|
}
|
4192 |
|
|
break;
|
4193 |
|
|
}
|
4194 |
|
|
|
4195 |
|
|
if (biv->maybe_multiple)
|
4196 |
|
|
{
|
4197 |
|
|
if (loop_dump_stream)
|
4198 |
|
|
{
|
4199 |
|
|
fprintf (loop_dump_stream,
|
4200 |
|
|
"Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
|
4201 |
|
|
REGNO (biv->src_reg), INSN_UID (biv->insn));
|
4202 |
|
|
print_rtl (loop_dump_stream, biv->add_val);
|
4203 |
|
|
fprintf (loop_dump_stream, "\n");
|
4204 |
|
|
}
|
4205 |
|
|
break;
|
4206 |
|
|
}
|
4207 |
|
|
|
4208 |
|
|
basestride += INTVAL (biv1->add_val);
|
4209 |
|
|
biv1 = biv1->next_iv;
|
4210 |
|
|
}
|
4211 |
|
|
|
4212 |
|
|
if (biv1 || !basestride)
|
4213 |
|
|
continue;
|
4214 |
|
|
|
4215 |
|
|
for (iv = bl->giv; iv; iv = iv->next_iv)
|
4216 |
|
|
{
|
4217 |
|
|
rtx address;
|
4218 |
|
|
rtx temp;
|
4219 |
|
|
HOST_WIDE_INT index = 0;
|
4220 |
|
|
int add = 1;
|
4221 |
|
|
HOST_WIDE_INT stride = 0;
|
4222 |
|
|
int stride_sign = 1;
|
4223 |
|
|
struct check_store_data d;
|
4224 |
|
|
const char *ignore_reason = NULL;
|
4225 |
|
|
int size = GET_MODE_SIZE (GET_MODE (iv));
|
4226 |
|
|
|
4227 |
|
|
/* See whether an induction variable is interesting to us and if
|
4228 |
|
|
not, report the reason. */
|
4229 |
|
|
if (iv->giv_type != DEST_ADDR)
|
4230 |
|
|
ignore_reason = "giv is not a destination address";
|
4231 |
|
|
|
4232 |
|
|
/* We are interested only in constant stride memory references
|
4233 |
|
|
in order to be able to compute density easily. */
|
4234 |
|
|
else if (GET_CODE (iv->mult_val) != CONST_INT)
|
4235 |
|
|
ignore_reason = "stride is not constant";
|
4236 |
|
|
|
4237 |
|
|
else
|
4238 |
|
|
{
|
4239 |
|
|
stride = INTVAL (iv->mult_val) * basestride;
|
4240 |
|
|
if (stride < 0)
|
4241 |
|
|
{
|
4242 |
|
|
stride = -stride;
|
4243 |
|
|
stride_sign = -1;
|
4244 |
|
|
}
|
4245 |
|
|
|
4246 |
|
|
/* On some targets, reversed order prefetches are not
|
4247 |
|
|
worthwhile. */
|
4248 |
|
|
if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
|
4249 |
|
|
ignore_reason = "reversed order stride";
|
4250 |
|
|
|
4251 |
|
|
/* Prefetch of accesses with an extreme stride might not be
|
4252 |
|
|
worthwhile, either. */
|
4253 |
|
|
else if (PREFETCH_NO_EXTREME_STRIDE
|
4254 |
|
|
&& stride > PREFETCH_EXTREME_STRIDE)
|
4255 |
|
|
ignore_reason = "extreme stride";
|
4256 |
|
|
|
4257 |
|
|
/* Ignore GIVs with varying add values; we can't predict the
|
4258 |
|
|
value for the next iteration. */
|
4259 |
|
|
else if (!loop_invariant_p (loop, iv->add_val))
|
4260 |
|
|
ignore_reason = "giv has varying add value";
|
4261 |
|
|
|
4262 |
|
|
/* Ignore GIVs in the nested loops; they ought to have been
|
4263 |
|
|
handled already. */
|
4264 |
|
|
else if (iv->maybe_multiple)
|
4265 |
|
|
ignore_reason = "giv is in nested loop";
|
4266 |
|
|
}
|
4267 |
|
|
|
4268 |
|
|
if (ignore_reason != NULL)
|
4269 |
|
|
{
|
4270 |
|
|
if (loop_dump_stream)
|
4271 |
|
|
fprintf (loop_dump_stream,
|
4272 |
|
|
"Prefetch: ignoring giv at %d: %s.\n",
|
4273 |
|
|
INSN_UID (iv->insn), ignore_reason);
|
4274 |
|
|
continue;
|
4275 |
|
|
}
|
4276 |
|
|
|
4277 |
|
|
/* Determine the pointer to the basic array we are examining. It is
|
4278 |
|
|
the sum of the BIV's initial value and the GIV's add_val. */
|
4279 |
|
|
address = copy_rtx (iv->add_val);
|
4280 |
|
|
temp = copy_rtx (bl->initial_value);
|
4281 |
|
|
|
4282 |
|
|
address = simplify_gen_binary (PLUS, Pmode, temp, address);
|
4283 |
|
|
index = remove_constant_addition (&address);
|
4284 |
|
|
|
4285 |
|
|
d.mem_write = 0;
|
4286 |
|
|
d.mem_address = *iv->location;
|
4287 |
|
|
|
4288 |
|
|
/* When the GIV is not always executed, we might be better off by
|
4289 |
|
|
not dirtying the cache pages. */
|
4290 |
|
|
if (PREFETCH_CONDITIONAL || iv->always_executed)
|
4291 |
|
|
note_stores (PATTERN (iv->insn), check_store, &d);
|
4292 |
|
|
else
|
4293 |
|
|
{
|
4294 |
|
|
if (loop_dump_stream)
|
4295 |
|
|
fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
|
4296 |
|
|
INSN_UID (iv->insn), "in conditional code.");
|
4297 |
|
|
continue;
|
4298 |
|
|
}
|
4299 |
|
|
|
4300 |
|
|
/* Attempt to find another prefetch to the same array and see if we
|
4301 |
|
|
can merge this one. */
|
4302 |
|
|
for (i = 0; i < num_prefetches; i++)
|
4303 |
|
|
if (rtx_equal_for_prefetch_p (address, info[i].base_address)
|
4304 |
|
|
&& stride == info[i].stride)
|
4305 |
|
|
{
|
4306 |
|
|
/* In case both access same array (same location
|
4307 |
|
|
just with small difference in constant indexes), merge
|
4308 |
|
|
the prefetches. Just do the later and the earlier will
|
4309 |
|
|
get prefetched from previous iteration.
|
4310 |
|
|
The artificial threshold should not be too small,
|
4311 |
|
|
but also not bigger than small portion of memory usually
|
4312 |
|
|
traversed by single loop. */
|
4313 |
|
|
if (index >= info[i].index
|
4314 |
|
|
&& index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
|
4315 |
|
|
{
|
4316 |
|
|
info[i].write |= d.mem_write;
|
4317 |
|
|
info[i].bytes_accessed += size;
|
4318 |
|
|
info[i].index = index;
|
4319 |
|
|
info[i].giv = iv;
|
4320 |
|
|
info[i].class = bl;
|
4321 |
|
|
info[num_prefetches].base_address = address;
|
4322 |
|
|
add = 0;
|
4323 |
|
|
break;
|
4324 |
|
|
}
|
4325 |
|
|
|
4326 |
|
|
if (index < info[i].index
|
4327 |
|
|
&& info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
|
4328 |
|
|
{
|
4329 |
|
|
info[i].write |= d.mem_write;
|
4330 |
|
|
info[i].bytes_accessed += size;
|
4331 |
|
|
add = 0;
|
4332 |
|
|
break;
|
4333 |
|
|
}
|
4334 |
|
|
}
|
4335 |
|
|
|
4336 |
|
|
/* Merging failed. */
|
4337 |
|
|
if (add)
|
4338 |
|
|
{
|
4339 |
|
|
info[num_prefetches].giv = iv;
|
4340 |
|
|
info[num_prefetches].class = bl;
|
4341 |
|
|
info[num_prefetches].index = index;
|
4342 |
|
|
info[num_prefetches].stride = stride;
|
4343 |
|
|
info[num_prefetches].base_address = address;
|
4344 |
|
|
info[num_prefetches].write = d.mem_write;
|
4345 |
|
|
info[num_prefetches].bytes_accessed = size;
|
4346 |
|
|
num_prefetches++;
|
4347 |
|
|
if (num_prefetches >= MAX_PREFETCHES)
|
4348 |
|
|
{
|
4349 |
|
|
if (loop_dump_stream)
|
4350 |
|
|
fprintf (loop_dump_stream,
|
4351 |
|
|
"Maximal number of prefetches exceeded.\n");
|
4352 |
|
|
return;
|
4353 |
|
|
}
|
4354 |
|
|
}
|
4355 |
|
|
}
|
4356 |
|
|
}
|
4357 |
|
|
|
4358 |
|
|
for (i = 0; i < num_prefetches; i++)
|
4359 |
|
|
{
|
4360 |
|
|
int density;
|
4361 |
|
|
|
4362 |
|
|
/* Attempt to calculate the total number of bytes fetched by all
|
4363 |
|
|
iterations of the loop. Avoid overflow. */
|
4364 |
|
|
if (LOOP_INFO (loop)->n_iterations
|
4365 |
|
|
&& ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
|
4366 |
|
|
>= LOOP_INFO (loop)->n_iterations))
|
4367 |
|
|
info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
|
4368 |
|
|
else
|
4369 |
|
|
info[i].total_bytes = 0xffffffff;
|
4370 |
|
|
|
4371 |
|
|
density = info[i].bytes_accessed * 100 / info[i].stride;
|
4372 |
|
|
|
4373 |
|
|
/* Prefetch might be worthwhile only when the loads/stores are dense. */
|
4374 |
|
|
if (PREFETCH_ONLY_DENSE_MEM)
|
4375 |
|
|
if (density * 256 > PREFETCH_DENSE_MEM * 100
|
4376 |
|
|
&& (info[i].total_bytes / PREFETCH_BLOCK
|
4377 |
|
|
>= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
|
4378 |
|
|
{
|
4379 |
|
|
info[i].prefetch_before_loop = 1;
|
4380 |
|
|
info[i].prefetch_in_loop
|
4381 |
|
|
= (info[i].total_bytes / PREFETCH_BLOCK
|
4382 |
|
|
> PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
|
4383 |
|
|
}
|
4384 |
|
|
else
|
4385 |
|
|
{
|
4386 |
|
|
info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
|
4387 |
|
|
if (loop_dump_stream)
|
4388 |
|
|
fprintf (loop_dump_stream,
|
4389 |
|
|
"Prefetch: ignoring giv at %d: %d%% density is too low.\n",
|
4390 |
|
|
INSN_UID (info[i].giv->insn), density);
|
4391 |
|
|
}
|
4392 |
|
|
else
|
4393 |
|
|
info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
|
4394 |
|
|
|
4395 |
|
|
/* Find how many prefetch instructions we'll use within the loop. */
|
4396 |
|
|
if (info[i].prefetch_in_loop != 0)
|
4397 |
|
|
{
|
4398 |
|
|
info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
|
4399 |
|
|
/ PREFETCH_BLOCK);
|
4400 |
|
|
num_real_prefetches += info[i].prefetch_in_loop;
|
4401 |
|
|
if (info[i].write)
|
4402 |
|
|
num_real_write_prefetches += info[i].prefetch_in_loop;
|
4403 |
|
|
}
|
4404 |
|
|
}
|
4405 |
|
|
|
4406 |
|
|
/* Determine how many iterations ahead to prefetch within the loop, based
|
4407 |
|
|
on how many prefetches we currently expect to do within the loop. */
|
4408 |
|
|
if (num_real_prefetches != 0)
|
4409 |
|
|
{
|
4410 |
|
|
if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
|
4411 |
|
|
{
|
4412 |
|
|
if (loop_dump_stream)
|
4413 |
|
|
fprintf (loop_dump_stream,
|
4414 |
|
|
"Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
|
4415 |
|
|
SIMULTANEOUS_PREFETCHES, num_real_prefetches);
|
4416 |
|
|
num_real_prefetches = 0, num_real_write_prefetches = 0;
|
4417 |
|
|
}
|
4418 |
|
|
}
|
4419 |
|
|
/* We'll also use AHEAD to determine how many prefetch instructions to
|
4420 |
|
|
emit before a loop, so don't leave it zero. */
|
4421 |
|
|
if (ahead == 0)
|
4422 |
|
|
ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
|
4423 |
|
|
|
4424 |
|
|
for (i = 0; i < num_prefetches; i++)
|
4425 |
|
|
{
|
4426 |
|
|
/* Update if we've decided not to prefetch anything within the loop. */
|
4427 |
|
|
if (num_real_prefetches == 0)
|
4428 |
|
|
info[i].prefetch_in_loop = 0;
|
4429 |
|
|
|
4430 |
|
|
/* Find how many prefetch instructions we'll use before the loop. */
|
4431 |
|
|
if (info[i].prefetch_before_loop != 0)
|
4432 |
|
|
{
|
4433 |
|
|
int n = info[i].total_bytes / PREFETCH_BLOCK;
|
4434 |
|
|
if (n > ahead)
|
4435 |
|
|
n = ahead;
|
4436 |
|
|
info[i].prefetch_before_loop = n;
|
4437 |
|
|
num_prefetches_before += n;
|
4438 |
|
|
if (info[i].write)
|
4439 |
|
|
num_write_prefetches_before += n;
|
4440 |
|
|
}
|
4441 |
|
|
|
4442 |
|
|
if (loop_dump_stream)
|
4443 |
|
|
{
|
4444 |
|
|
if (info[i].prefetch_in_loop == 0
|
4445 |
|
|
&& info[i].prefetch_before_loop == 0)
|
4446 |
|
|
continue;
|
4447 |
|
|
fprintf (loop_dump_stream, "Prefetch insn: %d",
|
4448 |
|
|
INSN_UID (info[i].giv->insn));
|
4449 |
|
|
fprintf (loop_dump_stream,
|
4450 |
|
|
"; in loop: %d; before: %d; %s\n",
|
4451 |
|
|
info[i].prefetch_in_loop,
|
4452 |
|
|
info[i].prefetch_before_loop,
|
4453 |
|
|
info[i].write ? "read/write" : "read only");
|
4454 |
|
|
fprintf (loop_dump_stream,
|
4455 |
|
|
" density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
|
4456 |
|
|
(int) (info[i].bytes_accessed * 100 / info[i].stride),
|
4457 |
|
|
info[i].bytes_accessed, info[i].total_bytes);
|
4458 |
|
|
fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
|
4459 |
|
|
"; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
|
4460 |
|
|
info[i].index, info[i].stride);
|
4461 |
|
|
print_rtl (loop_dump_stream, info[i].base_address);
|
4462 |
|
|
fprintf (loop_dump_stream, "\n");
|
4463 |
|
|
}
|
4464 |
|
|
}
|
4465 |
|
|
|
4466 |
|
|
if (num_real_prefetches + num_prefetches_before > 0)
|
4467 |
|
|
{
|
4468 |
|
|
/* Record that this loop uses prefetch instructions. */
|
4469 |
|
|
LOOP_INFO (loop)->has_prefetch = 1;
|
4470 |
|
|
|
4471 |
|
|
if (loop_dump_stream)
|
4472 |
|
|
{
|
4473 |
|
|
fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
|
4474 |
|
|
num_real_prefetches, num_real_write_prefetches);
|
4475 |
|
|
fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
|
4476 |
|
|
num_prefetches_before, num_write_prefetches_before);
|
4477 |
|
|
}
|
4478 |
|
|
}
|
4479 |
|
|
|
4480 |
|
|
for (i = 0; i < num_prefetches; i++)
|
4481 |
|
|
{
|
4482 |
|
|
int y;
|
4483 |
|
|
|
4484 |
|
|
for (y = 0; y < info[i].prefetch_in_loop; y++)
|
4485 |
|
|
{
|
4486 |
|
|
rtx loc = copy_rtx (*info[i].giv->location);
|
4487 |
|
|
rtx insn;
|
4488 |
|
|
int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
|
4489 |
|
|
rtx before_insn = info[i].giv->insn;
|
4490 |
|
|
rtx prev_insn = PREV_INSN (info[i].giv->insn);
|
4491 |
|
|
rtx seq;
|
4492 |
|
|
|
4493 |
|
|
/* We can save some effort by offsetting the address on
|
4494 |
|
|
architectures with offsettable memory references. */
|
4495 |
|
|
if (offsettable_address_p (0, VOIDmode, loc))
|
4496 |
|
|
loc = plus_constant (loc, bytes_ahead);
|
4497 |
|
|
else
|
4498 |
|
|
{
|
4499 |
|
|
rtx reg = gen_reg_rtx (Pmode);
|
4500 |
|
|
loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
|
4501 |
|
|
GEN_INT (bytes_ahead), reg,
|
4502 |
|
|
0, before_insn);
|
4503 |
|
|
loc = reg;
|
4504 |
|
|
}
|
4505 |
|
|
|
4506 |
|
|
start_sequence ();
|
4507 |
|
|
/* Make sure the address operand is valid for prefetch. */
|
4508 |
|
|
if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
|
4509 |
|
|
(loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
|
4510 |
|
|
loc = force_reg (Pmode, loc);
|
4511 |
|
|
emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
|
4512 |
|
|
GEN_INT (3)));
|
4513 |
|
|
seq = get_insns ();
|
4514 |
|
|
end_sequence ();
|
4515 |
|
|
emit_insn_before (seq, before_insn);
|
4516 |
|
|
|
4517 |
|
|
/* Check all insns emitted and record the new GIV
|
4518 |
|
|
information. */
|
4519 |
|
|
insn = NEXT_INSN (prev_insn);
|
4520 |
|
|
while (insn != before_insn)
|
4521 |
|
|
{
|
4522 |
|
|
insn = check_insn_for_givs (loop, insn,
|
4523 |
|
|
info[i].giv->always_executed,
|
4524 |
|
|
info[i].giv->maybe_multiple);
|
4525 |
|
|
insn = NEXT_INSN (insn);
|
4526 |
|
|
}
|
4527 |
|
|
}
|
4528 |
|
|
|
4529 |
|
|
if (PREFETCH_BEFORE_LOOP)
|
4530 |
|
|
{
|
4531 |
|
|
/* Emit insns before the loop to fetch the first cache lines or,
|
4532 |
|
|
if we're not prefetching within the loop, everything we expect
|
4533 |
|
|
to need. */
|
4534 |
|
|
for (y = 0; y < info[i].prefetch_before_loop; y++)
|
4535 |
|
|
{
|
4536 |
|
|
rtx reg = gen_reg_rtx (Pmode);
|
4537 |
|
|
rtx loop_start = loop->start;
|
4538 |
|
|
rtx init_val = info[i].class->initial_value;
|
4539 |
|
|
rtx add_val = simplify_gen_binary (PLUS, Pmode,
|
4540 |
|
|
info[i].giv->add_val,
|
4541 |
|
|
GEN_INT (y * PREFETCH_BLOCK));
|
4542 |
|
|
|
4543 |
|
|
/* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
|
4544 |
|
|
non-constant INIT_VAL to have the same mode as REG, which
|
4545 |
|
|
in this case we know to be Pmode. */
|
4546 |
|
|
if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
|
4547 |
|
|
{
|
4548 |
|
|
rtx seq;
|
4549 |
|
|
|
4550 |
|
|
start_sequence ();
|
4551 |
|
|
init_val = convert_to_mode (Pmode, init_val, 0);
|
4552 |
|
|
seq = get_insns ();
|
4553 |
|
|
end_sequence ();
|
4554 |
|
|
loop_insn_emit_before (loop, 0, loop_start, seq);
|
4555 |
|
|
}
|
4556 |
|
|
loop_iv_add_mult_emit_before (loop, init_val,
|
4557 |
|
|
info[i].giv->mult_val,
|
4558 |
|
|
add_val, reg, 0, loop_start);
|
4559 |
|
|
emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
|
4560 |
|
|
GEN_INT (3)),
|
4561 |
|
|
loop_start);
|
4562 |
|
|
}
|
4563 |
|
|
}
|
4564 |
|
|
}
|
4565 |
|
|
|
4566 |
|
|
return;
|
4567 |
|
|
}
|
4568 |
|
|
|
4569 |
|
|
/* Communication with routines called via `note_stores'. */
|
4570 |
|
|
|
4571 |
|
|
static rtx note_insn;
|
4572 |
|
|
|
4573 |
|
|
/* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
|
4574 |
|
|
|
4575 |
|
|
static rtx addr_placeholder;
|
4576 |
|
|
|
4577 |
|
|
/* ??? Unfinished optimizations, and possible future optimizations,
|
4578 |
|
|
for the strength reduction code. */
|
4579 |
|
|
|
4580 |
|
|
/* ??? The interaction of biv elimination, and recognition of 'constant'
|
4581 |
|
|
bivs, may cause problems. */
|
4582 |
|
|
|
4583 |
|
|
/* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
|
4584 |
|
|
performance problems.
|
4585 |
|
|
|
4586 |
|
|
Perhaps don't eliminate things that can be combined with an addressing
|
4587 |
|
|
mode. Find all givs that have the same biv, mult_val, and add_val;
|
4588 |
|
|
then for each giv, check to see if its only use dies in a following
|
4589 |
|
|
memory address. If so, generate a new memory address and check to see
|
4590 |
|
|
if it is valid. If it is valid, then store the modified memory address,
|
4591 |
|
|
otherwise, mark the giv as not done so that it will get its own iv. */
|
4592 |
|
|
|
4593 |
|
|
/* ??? Could try to optimize branches when it is known that a biv is always
|
4594 |
|
|
positive. */
|
4595 |
|
|
|
4596 |
|
|
/* ??? When replace a biv in a compare insn, we should replace with closest
|
4597 |
|
|
giv so that an optimized branch can still be recognized by the combiner,
|
4598 |
|
|
e.g. the VAX acb insn. */
|
4599 |
|
|
|
4600 |
|
|
/* ??? Many of the checks involving uid_luid could be simplified if regscan
|
4601 |
|
|
was rerun in loop_optimize whenever a register was added or moved.
|
4602 |
|
|
Also, some of the optimizations could be a little less conservative. */
|
4603 |
|
|
|
4604 |
|
|
/* Searches the insns between INSN and LOOP->END. Returns 1 if there
|
4605 |
|
|
is a backward branch in that range that branches to somewhere between
|
4606 |
|
|
LOOP->START and INSN. Returns 0 otherwise. */
|
4607 |
|
|
|
4608 |
|
|
/* ??? This is quadratic algorithm. Could be rewritten to be linear.
|
4609 |
|
|
In practice, this is not a problem, because this function is seldom called,
|
4610 |
|
|
and uses a negligible amount of CPU time on average. */
|
4611 |
|
|
|
4612 |
|
|
static int
|
4613 |
|
|
back_branch_in_range_p (const struct loop *loop, rtx insn)
|
4614 |
|
|
{
|
4615 |
|
|
rtx p, q, target_insn;
|
4616 |
|
|
rtx loop_start = loop->start;
|
4617 |
|
|
rtx loop_end = loop->end;
|
4618 |
|
|
rtx orig_loop_end = loop->end;
|
4619 |
|
|
|
4620 |
|
|
/* Stop before we get to the backward branch at the end of the loop. */
|
4621 |
|
|
loop_end = prev_nonnote_insn (loop_end);
|
4622 |
|
|
if (BARRIER_P (loop_end))
|
4623 |
|
|
loop_end = PREV_INSN (loop_end);
|
4624 |
|
|
|
4625 |
|
|
/* Check in case insn has been deleted, search forward for first non
|
4626 |
|
|
deleted insn following it. */
|
4627 |
|
|
while (INSN_DELETED_P (insn))
|
4628 |
|
|
insn = NEXT_INSN (insn);
|
4629 |
|
|
|
4630 |
|
|
/* Check for the case where insn is the last insn in the loop. Deal
|
4631 |
|
|
with the case where INSN was a deleted loop test insn, in which case
|
4632 |
|
|
it will now be the NOTE_LOOP_END. */
|
4633 |
|
|
if (insn == loop_end || insn == orig_loop_end)
|
4634 |
|
|
return 0;
|
4635 |
|
|
|
4636 |
|
|
for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
|
4637 |
|
|
{
|
4638 |
|
|
if (JUMP_P (p))
|
4639 |
|
|
{
|
4640 |
|
|
target_insn = JUMP_LABEL (p);
|
4641 |
|
|
|
4642 |
|
|
/* Search from loop_start to insn, to see if one of them is
|
4643 |
|
|
the target_insn. We can't use INSN_LUID comparisons here,
|
4644 |
|
|
since insn may not have an LUID entry. */
|
4645 |
|
|
for (q = loop_start; q != insn; q = NEXT_INSN (q))
|
4646 |
|
|
if (q == target_insn)
|
4647 |
|
|
return 1;
|
4648 |
|
|
}
|
4649 |
|
|
}
|
4650 |
|
|
|
4651 |
|
|
return 0;
|
4652 |
|
|
}
|
4653 |
|
|
|
4654 |
|
|
/* Scan the loop body and call FNCALL for each insn. In the addition to the
|
4655 |
|
|
LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
|
4656 |
|
|
callback.
|
4657 |
|
|
|
4658 |
|
|
NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
|
4659 |
|
|
least once for every loop iteration except for the last one.
|
4660 |
|
|
|
4661 |
|
|
MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
|
4662 |
|
|
loop iteration.
|
4663 |
|
|
*/
|
4664 |
|
|
typedef rtx (*loop_insn_callback) (struct loop *, rtx, int, int);
|
4665 |
|
|
static void
|
4666 |
|
|
for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
|
4667 |
|
|
{
|
4668 |
|
|
int not_every_iteration = 0;
|
4669 |
|
|
int maybe_multiple = 0;
|
4670 |
|
|
int past_loop_latch = 0;
|
4671 |
|
|
bool exit_test_is_entry = false;
|
4672 |
|
|
rtx p;
|
4673 |
|
|
|
4674 |
|
|
/* If loop_scan_start points to the loop exit test, the loop body
|
4675 |
|
|
cannot be counted on running on every iteration, and we have to
|
4676 |
|
|
be wary of subversive use of gotos inside expression
|
4677 |
|
|
statements. */
|
4678 |
|
|
if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
|
4679 |
|
|
{
|
4680 |
|
|
exit_test_is_entry = true;
|
4681 |
|
|
maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
|
4682 |
|
|
}
|
4683 |
|
|
|
4684 |
|
|
/* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
|
4685 |
|
|
for (p = next_insn_in_loop (loop, loop->scan_start);
|
4686 |
|
|
p != NULL_RTX;
|
4687 |
|
|
p = next_insn_in_loop (loop, p))
|
4688 |
|
|
{
|
4689 |
|
|
p = fncall (loop, p, not_every_iteration, maybe_multiple);
|
4690 |
|
|
|
4691 |
|
|
/* Past CODE_LABEL, we get to insns that may be executed multiple
|
4692 |
|
|
times. The only way we can be sure that they can't is if every
|
4693 |
|
|
jump insn between here and the end of the loop either
|
4694 |
|
|
returns, exits the loop, is a jump to a location that is still
|
4695 |
|
|
behind the label, or is a jump to the loop start. */
|
4696 |
|
|
|
4697 |
|
|
if (LABEL_P (p))
|
4698 |
|
|
{
|
4699 |
|
|
rtx insn = p;
|
4700 |
|
|
|
4701 |
|
|
maybe_multiple = 0;
|
4702 |
|
|
|
4703 |
|
|
while (1)
|
4704 |
|
|
{
|
4705 |
|
|
insn = NEXT_INSN (insn);
|
4706 |
|
|
if (insn == loop->scan_start)
|
4707 |
|
|
break;
|
4708 |
|
|
if (insn == loop->end)
|
4709 |
|
|
{
|
4710 |
|
|
if (loop->top != 0)
|
4711 |
|
|
insn = loop->top;
|
4712 |
|
|
else
|
4713 |
|
|
break;
|
4714 |
|
|
if (insn == loop->scan_start)
|
4715 |
|
|
break;
|
4716 |
|
|
}
|
4717 |
|
|
|
4718 |
|
|
if (JUMP_P (insn)
|
4719 |
|
|
&& GET_CODE (PATTERN (insn)) != RETURN
|
4720 |
|
|
&& (!any_condjump_p (insn)
|
4721 |
|
|
|| (JUMP_LABEL (insn) != 0
|
4722 |
|
|
&& JUMP_LABEL (insn) != loop->scan_start
|
4723 |
|
|
&& !loop_insn_first_p (p, JUMP_LABEL (insn)))))
|
4724 |
|
|
{
|
4725 |
|
|
maybe_multiple = 1;
|
4726 |
|
|
break;
|
4727 |
|
|
}
|
4728 |
|
|
}
|
4729 |
|
|
}
|
4730 |
|
|
|
4731 |
|
|
/* Past a jump, we get to insns for which we can't count
|
4732 |
|
|
on whether they will be executed during each iteration. */
|
4733 |
|
|
/* This code appears twice in strength_reduce. There is also similar
|
4734 |
|
|
code in scan_loop. */
|
4735 |
|
|
if (JUMP_P (p)
|
4736 |
|
|
/* If we enter the loop in the middle, and scan around to the
|
4737 |
|
|
beginning, don't set not_every_iteration for that.
|
4738 |
|
|
This can be any kind of jump, since we want to know if insns
|
4739 |
|
|
will be executed if the loop is executed. */
|
4740 |
|
|
&& (exit_test_is_entry
|
4741 |
|
|
|| !(JUMP_LABEL (p) == loop->top
|
4742 |
|
|
&& ((NEXT_INSN (NEXT_INSN (p)) == loop->end
|
4743 |
|
|
&& any_uncondjump_p (p))
|
4744 |
|
|
|| (NEXT_INSN (p) == loop->end
|
4745 |
|
|
&& any_condjump_p (p))))))
|
4746 |
|
|
{
|
4747 |
|
|
rtx label = 0;
|
4748 |
|
|
|
4749 |
|
|
/* If this is a jump outside the loop, then it also doesn't
|
4750 |
|
|
matter. Check to see if the target of this branch is on the
|
4751 |
|
|
loop->exits_labels list. */
|
4752 |
|
|
|
4753 |
|
|
for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
|
4754 |
|
|
if (XEXP (label, 0) == JUMP_LABEL (p))
|
4755 |
|
|
break;
|
4756 |
|
|
|
4757 |
|
|
if (!label)
|
4758 |
|
|
not_every_iteration = 1;
|
4759 |
|
|
}
|
4760 |
|
|
|
4761 |
|
|
/* Note if we pass a loop latch. If we do, then we can not clear
|
4762 |
|
|
NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
|
4763 |
|
|
a loop since a jump before the last CODE_LABEL may have started
|
4764 |
|
|
a new loop iteration.
|
4765 |
|
|
|
4766 |
|
|
Note that LOOP_TOP is only set for rotated loops and we need
|
4767 |
|
|
this check for all loops, so compare against the CODE_LABEL
|
4768 |
|
|
which immediately follows LOOP_START. */
|
4769 |
|
|
if (JUMP_P (p)
|
4770 |
|
|
&& JUMP_LABEL (p) == NEXT_INSN (loop->start))
|
4771 |
|
|
past_loop_latch = 1;
|
4772 |
|
|
|
4773 |
|
|
/* Unlike in the code motion pass where MAYBE_NEVER indicates that
|
4774 |
|
|
an insn may never be executed, NOT_EVERY_ITERATION indicates whether
|
4775 |
|
|
or not an insn is known to be executed each iteration of the
|
4776 |
|
|
loop, whether or not any iterations are known to occur.
|
4777 |
|
|
|
4778 |
|
|
Therefore, if we have just passed a label and have no more labels
|
4779 |
|
|
between here and the test insn of the loop, and we have not passed
|
4780 |
|
|
a jump to the top of the loop, then we know these insns will be
|
4781 |
|
|
executed each iteration. */
|
4782 |
|
|
|
4783 |
|
|
if (not_every_iteration
|
4784 |
|
|
&& !past_loop_latch
|
4785 |
|
|
&& LABEL_P (p)
|
4786 |
|
|
&& no_labels_between_p (p, loop->end))
|
4787 |
|
|
not_every_iteration = 0;
|
4788 |
|
|
}
|
4789 |
|
|
}
|
4790 |
|
|
|
4791 |
|
|
static void
|
4792 |
|
|
loop_bivs_find (struct loop *loop)
|
4793 |
|
|
{
|
4794 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
4795 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
4796 |
|
|
/* Temporary list pointers for traversing ivs->list. */
|
4797 |
|
|
struct iv_class *bl, **backbl;
|
4798 |
|
|
|
4799 |
|
|
ivs->list = 0;
|
4800 |
|
|
|
4801 |
|
|
for_each_insn_in_loop (loop, check_insn_for_bivs);
|
4802 |
|
|
|
4803 |
|
|
/* Scan ivs->list to remove all regs that proved not to be bivs.
|
4804 |
|
|
Make a sanity check against regs->n_times_set. */
|
4805 |
|
|
for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
|
4806 |
|
|
{
|
4807 |
|
|
if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
|
4808 |
|
|
/* Above happens if register modified by subreg, etc. */
|
4809 |
|
|
/* Make sure it is not recognized as a basic induction var: */
|
4810 |
|
|
|| regs->array[bl->regno].n_times_set != bl->biv_count
|
4811 |
|
|
/* If never incremented, it is invariant that we decided not to
|
4812 |
|
|
move. So leave it alone. */
|
4813 |
|
|
|| ! bl->incremented)
|
4814 |
|
|
{
|
4815 |
|
|
if (loop_dump_stream)
|
4816 |
|
|
fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
|
4817 |
|
|
bl->regno,
|
4818 |
|
|
(REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
|
4819 |
|
|
? "not induction variable"
|
4820 |
|
|
: (! bl->incremented ? "never incremented"
|
4821 |
|
|
: "count error")));
|
4822 |
|
|
|
4823 |
|
|
REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
|
4824 |
|
|
*backbl = bl->next;
|
4825 |
|
|
}
|
4826 |
|
|
else
|
4827 |
|
|
{
|
4828 |
|
|
backbl = &bl->next;
|
4829 |
|
|
|
4830 |
|
|
if (loop_dump_stream)
|
4831 |
|
|
fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
|
4832 |
|
|
}
|
4833 |
|
|
}
|
4834 |
|
|
}
|
4835 |
|
|
|
4836 |
|
|
|
4837 |
|
|
/* Determine how BIVS are initialized by looking through pre-header
|
4838 |
|
|
extended basic block. */
|
4839 |
|
|
static void
|
4840 |
|
|
loop_bivs_init_find (struct loop *loop)
|
4841 |
|
|
{
|
4842 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
4843 |
|
|
/* Temporary list pointers for traversing ivs->list. */
|
4844 |
|
|
struct iv_class *bl;
|
4845 |
|
|
int call_seen;
|
4846 |
|
|
rtx p;
|
4847 |
|
|
|
4848 |
|
|
/* Find initial value for each biv by searching backwards from loop_start,
|
4849 |
|
|
halting at first label. Also record any test condition. */
|
4850 |
|
|
|
4851 |
|
|
call_seen = 0;
|
4852 |
|
|
for (p = loop->start; p && !LABEL_P (p); p = PREV_INSN (p))
|
4853 |
|
|
{
|
4854 |
|
|
rtx test;
|
4855 |
|
|
|
4856 |
|
|
note_insn = p;
|
4857 |
|
|
|
4858 |
|
|
if (CALL_P (p))
|
4859 |
|
|
call_seen = 1;
|
4860 |
|
|
|
4861 |
|
|
if (INSN_P (p))
|
4862 |
|
|
note_stores (PATTERN (p), record_initial, ivs);
|
4863 |
|
|
|
4864 |
|
|
/* Record any test of a biv that branches around the loop if no store
|
4865 |
|
|
between it and the start of loop. We only care about tests with
|
4866 |
|
|
constants and registers and only certain of those. */
|
4867 |
|
|
if (JUMP_P (p)
|
4868 |
|
|
&& JUMP_LABEL (p) != 0
|
4869 |
|
|
&& next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
|
4870 |
|
|
&& (test = get_condition_for_loop (loop, p)) != 0
|
4871 |
|
|
&& REG_P (XEXP (test, 0))
|
4872 |
|
|
&& REGNO (XEXP (test, 0)) < max_reg_before_loop
|
4873 |
|
|
&& (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
|
4874 |
|
|
&& valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
|
4875 |
|
|
&& bl->init_insn == 0)
|
4876 |
|
|
{
|
4877 |
|
|
/* If an NE test, we have an initial value! */
|
4878 |
|
|
if (GET_CODE (test) == NE)
|
4879 |
|
|
{
|
4880 |
|
|
bl->init_insn = p;
|
4881 |
|
|
bl->init_set = gen_rtx_SET (VOIDmode,
|
4882 |
|
|
XEXP (test, 0), XEXP (test, 1));
|
4883 |
|
|
}
|
4884 |
|
|
else
|
4885 |
|
|
bl->initial_test = test;
|
4886 |
|
|
}
|
4887 |
|
|
}
|
4888 |
|
|
}
|
4889 |
|
|
|
4890 |
|
|
|
4891 |
|
|
/* Look at the each biv and see if we can say anything better about its
|
4892 |
|
|
initial value from any initializing insns set up above. (This is done
|
4893 |
|
|
in two passes to avoid missing SETs in a PARALLEL.) */
|
4894 |
|
|
static void
|
4895 |
|
|
loop_bivs_check (struct loop *loop)
|
4896 |
|
|
{
|
4897 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
4898 |
|
|
/* Temporary list pointers for traversing ivs->list. */
|
4899 |
|
|
struct iv_class *bl;
|
4900 |
|
|
struct iv_class **backbl;
|
4901 |
|
|
|
4902 |
|
|
for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
|
4903 |
|
|
{
|
4904 |
|
|
rtx src;
|
4905 |
|
|
rtx note;
|
4906 |
|
|
|
4907 |
|
|
if (! bl->init_insn)
|
4908 |
|
|
continue;
|
4909 |
|
|
|
4910 |
|
|
/* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
|
4911 |
|
|
is a constant, use the value of that. */
|
4912 |
|
|
if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
|
4913 |
|
|
&& CONSTANT_P (XEXP (note, 0)))
|
4914 |
|
|
|| ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
|
4915 |
|
|
&& CONSTANT_P (XEXP (note, 0))))
|
4916 |
|
|
src = XEXP (note, 0);
|
4917 |
|
|
else
|
4918 |
|
|
src = SET_SRC (bl->init_set);
|
4919 |
|
|
|
4920 |
|
|
if (loop_dump_stream)
|
4921 |
|
|
fprintf (loop_dump_stream,
|
4922 |
|
|
"Biv %d: initialized at insn %d: initial value ",
|
4923 |
|
|
bl->regno, INSN_UID (bl->init_insn));
|
4924 |
|
|
|
4925 |
|
|
if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
|
4926 |
|
|
|| GET_MODE (src) == VOIDmode)
|
4927 |
|
|
&& valid_initial_value_p (src, bl->init_insn,
|
4928 |
|
|
LOOP_INFO (loop)->pre_header_has_call,
|
4929 |
|
|
loop->start))
|
4930 |
|
|
{
|
4931 |
|
|
bl->initial_value = src;
|
4932 |
|
|
|
4933 |
|
|
if (loop_dump_stream)
|
4934 |
|
|
{
|
4935 |
|
|
print_simple_rtl (loop_dump_stream, src);
|
4936 |
|
|
fputc ('\n', loop_dump_stream);
|
4937 |
|
|
}
|
4938 |
|
|
}
|
4939 |
|
|
/* If we can't make it a giv,
|
4940 |
|
|
let biv keep initial value of "itself". */
|
4941 |
|
|
else if (loop_dump_stream)
|
4942 |
|
|
fprintf (loop_dump_stream, "is complex\n");
|
4943 |
|
|
}
|
4944 |
|
|
}
|
4945 |
|
|
|
4946 |
|
|
|
4947 |
|
|
/* Search the loop for general induction variables. */
|
4948 |
|
|
|
4949 |
|
|
static void
|
4950 |
|
|
loop_givs_find (struct loop* loop)
|
4951 |
|
|
{
|
4952 |
|
|
for_each_insn_in_loop (loop, check_insn_for_givs);
|
4953 |
|
|
}
|
4954 |
|
|
|
4955 |
|
|
|
4956 |
|
|
/* For each giv for which we still don't know whether or not it is
|
4957 |
|
|
replaceable, check to see if it is replaceable because its final value
|
4958 |
|
|
can be calculated. */
|
4959 |
|
|
|
4960 |
|
|
static void
|
4961 |
|
|
loop_givs_check (struct loop *loop)
|
4962 |
|
|
{
|
4963 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
4964 |
|
|
struct iv_class *bl;
|
4965 |
|
|
|
4966 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
4967 |
|
|
{
|
4968 |
|
|
struct induction *v;
|
4969 |
|
|
|
4970 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
4971 |
|
|
if (! v->replaceable && ! v->not_replaceable)
|
4972 |
|
|
check_final_value (loop, v);
|
4973 |
|
|
}
|
4974 |
|
|
}
|
4975 |
|
|
|
4976 |
|
|
/* Try to generate the simplest rtx for the expression
|
4977 |
|
|
(PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
|
4978 |
|
|
value of giv's. */
|
4979 |
|
|
|
4980 |
|
|
static rtx
|
4981 |
|
|
fold_rtx_mult_add (rtx mult1, rtx mult2, rtx add1, enum machine_mode mode)
|
4982 |
|
|
{
|
4983 |
|
|
rtx temp, mult_res;
|
4984 |
|
|
rtx result;
|
4985 |
|
|
|
4986 |
|
|
/* The modes must all be the same. This should always be true. For now,
|
4987 |
|
|
check to make sure. */
|
4988 |
|
|
gcc_assert (GET_MODE (mult1) == mode || GET_MODE (mult1) == VOIDmode);
|
4989 |
|
|
gcc_assert (GET_MODE (mult2) == mode || GET_MODE (mult2) == VOIDmode);
|
4990 |
|
|
gcc_assert (GET_MODE (add1) == mode || GET_MODE (add1) == VOIDmode);
|
4991 |
|
|
|
4992 |
|
|
/* Ensure that if at least one of mult1/mult2 are constant, then mult2
|
4993 |
|
|
will be a constant. */
|
4994 |
|
|
if (GET_CODE (mult1) == CONST_INT)
|
4995 |
|
|
{
|
4996 |
|
|
temp = mult2;
|
4997 |
|
|
mult2 = mult1;
|
4998 |
|
|
mult1 = temp;
|
4999 |
|
|
}
|
5000 |
|
|
|
5001 |
|
|
mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
|
5002 |
|
|
if (! mult_res)
|
5003 |
|
|
mult_res = gen_rtx_MULT (mode, mult1, mult2);
|
5004 |
|
|
|
5005 |
|
|
/* Again, put the constant second. */
|
5006 |
|
|
if (GET_CODE (add1) == CONST_INT)
|
5007 |
|
|
{
|
5008 |
|
|
temp = add1;
|
5009 |
|
|
add1 = mult_res;
|
5010 |
|
|
mult_res = temp;
|
5011 |
|
|
}
|
5012 |
|
|
|
5013 |
|
|
result = simplify_binary_operation (PLUS, mode, add1, mult_res);
|
5014 |
|
|
if (! result)
|
5015 |
|
|
result = gen_rtx_PLUS (mode, add1, mult_res);
|
5016 |
|
|
|
5017 |
|
|
return result;
|
5018 |
|
|
}
|
5019 |
|
|
|
5020 |
|
|
/* Searches the list of induction struct's for the biv BL, to try to calculate
|
5021 |
|
|
the total increment value for one iteration of the loop as a constant.
|
5022 |
|
|
|
5023 |
|
|
Returns the increment value as an rtx, simplified as much as possible,
|
5024 |
|
|
if it can be calculated. Otherwise, returns 0. */
|
5025 |
|
|
|
5026 |
|
|
static rtx
|
5027 |
|
|
biv_total_increment (const struct iv_class *bl)
|
5028 |
|
|
{
|
5029 |
|
|
struct induction *v;
|
5030 |
|
|
rtx result;
|
5031 |
|
|
|
5032 |
|
|
/* For increment, must check every instruction that sets it. Each
|
5033 |
|
|
instruction must be executed only once each time through the loop.
|
5034 |
|
|
To verify this, we check that the insn is always executed, and that
|
5035 |
|
|
there are no backward branches after the insn that branch to before it.
|
5036 |
|
|
Also, the insn must have a mult_val of one (to make sure it really is
|
5037 |
|
|
an increment). */
|
5038 |
|
|
|
5039 |
|
|
result = const0_rtx;
|
5040 |
|
|
for (v = bl->biv; v; v = v->next_iv)
|
5041 |
|
|
{
|
5042 |
|
|
if (v->always_computable && v->mult_val == const1_rtx
|
5043 |
|
|
&& ! v->maybe_multiple
|
5044 |
|
|
&& SCALAR_INT_MODE_P (v->mode))
|
5045 |
|
|
{
|
5046 |
|
|
/* If we have already counted it, skip it. */
|
5047 |
|
|
if (v->same)
|
5048 |
|
|
continue;
|
5049 |
|
|
|
5050 |
|
|
result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
|
5051 |
|
|
}
|
5052 |
|
|
else
|
5053 |
|
|
return 0;
|
5054 |
|
|
}
|
5055 |
|
|
|
5056 |
|
|
return result;
|
5057 |
|
|
}
|
5058 |
|
|
|
5059 |
|
|
/* Try to prove that the register is dead after the loop exits. Trace every
|
5060 |
|
|
loop exit looking for an insn that will always be executed, which sets
|
5061 |
|
|
the register to some value, and appears before the first use of the register
|
5062 |
|
|
is found. If successful, then return 1, otherwise return 0. */
|
5063 |
|
|
|
5064 |
|
|
/* ?? Could be made more intelligent in the handling of jumps, so that
|
5065 |
|
|
it can search past if statements and other similar structures. */
|
5066 |
|
|
|
5067 |
|
|
static int
|
5068 |
|
|
reg_dead_after_loop (const struct loop *loop, rtx reg)
|
5069 |
|
|
{
|
5070 |
|
|
rtx insn, label;
|
5071 |
|
|
int jump_count = 0;
|
5072 |
|
|
int label_count = 0;
|
5073 |
|
|
|
5074 |
|
|
/* In addition to checking all exits of this loop, we must also check
|
5075 |
|
|
all exits of inner nested loops that would exit this loop. We don't
|
5076 |
|
|
have any way to identify those, so we just give up if there are any
|
5077 |
|
|
such inner loop exits. */
|
5078 |
|
|
|
5079 |
|
|
for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
|
5080 |
|
|
label_count++;
|
5081 |
|
|
|
5082 |
|
|
if (label_count != loop->exit_count)
|
5083 |
|
|
return 0;
|
5084 |
|
|
|
5085 |
|
|
/* HACK: Must also search the loop fall through exit, create a label_ref
|
5086 |
|
|
here which points to the loop->end, and append the loop_number_exit_labels
|
5087 |
|
|
list to it. */
|
5088 |
|
|
label = gen_rtx_LABEL_REF (Pmode, loop->end);
|
5089 |
|
|
LABEL_NEXTREF (label) = loop->exit_labels;
|
5090 |
|
|
|
5091 |
|
|
for (; label; label = LABEL_NEXTREF (label))
|
5092 |
|
|
{
|
5093 |
|
|
/* Succeed if find an insn which sets the biv or if reach end of
|
5094 |
|
|
function. Fail if find an insn that uses the biv, or if come to
|
5095 |
|
|
a conditional jump. */
|
5096 |
|
|
|
5097 |
|
|
insn = NEXT_INSN (XEXP (label, 0));
|
5098 |
|
|
while (insn)
|
5099 |
|
|
{
|
5100 |
|
|
if (INSN_P (insn))
|
5101 |
|
|
{
|
5102 |
|
|
rtx set, note;
|
5103 |
|
|
|
5104 |
|
|
if (reg_referenced_p (reg, PATTERN (insn)))
|
5105 |
|
|
return 0;
|
5106 |
|
|
|
5107 |
|
|
note = find_reg_equal_equiv_note (insn);
|
5108 |
|
|
if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0)))
|
5109 |
|
|
return 0;
|
5110 |
|
|
|
5111 |
|
|
set = single_set (insn);
|
5112 |
|
|
if (set && rtx_equal_p (SET_DEST (set), reg))
|
5113 |
|
|
break;
|
5114 |
|
|
|
5115 |
|
|
if (JUMP_P (insn))
|
5116 |
|
|
{
|
5117 |
|
|
if (GET_CODE (PATTERN (insn)) == RETURN)
|
5118 |
|
|
break;
|
5119 |
|
|
else if (!any_uncondjump_p (insn)
|
5120 |
|
|
/* Prevent infinite loop following infinite loops. */
|
5121 |
|
|
|| jump_count++ > 20)
|
5122 |
|
|
return 0;
|
5123 |
|
|
else
|
5124 |
|
|
insn = JUMP_LABEL (insn);
|
5125 |
|
|
}
|
5126 |
|
|
}
|
5127 |
|
|
|
5128 |
|
|
insn = NEXT_INSN (insn);
|
5129 |
|
|
}
|
5130 |
|
|
}
|
5131 |
|
|
|
5132 |
|
|
/* Success, the register is dead on all loop exits. */
|
5133 |
|
|
return 1;
|
5134 |
|
|
}
|
5135 |
|
|
|
5136 |
|
|
/* Try to calculate the final value of the biv, the value it will have at
|
5137 |
|
|
the end of the loop. If we can do it, return that value. */
|
5138 |
|
|
|
5139 |
|
|
static rtx
|
5140 |
|
|
final_biv_value (const struct loop *loop, struct iv_class *bl)
|
5141 |
|
|
{
|
5142 |
|
|
unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
|
5143 |
|
|
rtx increment, tem;
|
5144 |
|
|
|
5145 |
|
|
/* ??? This only works for MODE_INT biv's. Reject all others for now. */
|
5146 |
|
|
|
5147 |
|
|
if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
|
5148 |
|
|
return 0;
|
5149 |
|
|
|
5150 |
|
|
/* The final value for reversed bivs must be calculated differently than
|
5151 |
|
|
for ordinary bivs. In this case, there is already an insn after the
|
5152 |
|
|
loop which sets this biv's final value (if necessary), and there are
|
5153 |
|
|
no other loop exits, so we can return any value. */
|
5154 |
|
|
if (bl->reversed)
|
5155 |
|
|
{
|
5156 |
|
|
if (loop_dump_stream)
|
5157 |
|
|
fprintf (loop_dump_stream,
|
5158 |
|
|
"Final biv value for %d, reversed biv.\n", bl->regno);
|
5159 |
|
|
|
5160 |
|
|
return const0_rtx;
|
5161 |
|
|
}
|
5162 |
|
|
|
5163 |
|
|
/* Try to calculate the final value as initial value + (number of iterations
|
5164 |
|
|
* increment). For this to work, increment must be invariant, the only
|
5165 |
|
|
exit from the loop must be the fall through at the bottom (otherwise
|
5166 |
|
|
it may not have its final value when the loop exits), and the initial
|
5167 |
|
|
value of the biv must be invariant. */
|
5168 |
|
|
|
5169 |
|
|
if (n_iterations != 0
|
5170 |
|
|
&& ! loop->exit_count
|
5171 |
|
|
&& loop_invariant_p (loop, bl->initial_value))
|
5172 |
|
|
{
|
5173 |
|
|
increment = biv_total_increment (bl);
|
5174 |
|
|
|
5175 |
|
|
if (increment && loop_invariant_p (loop, increment))
|
5176 |
|
|
{
|
5177 |
|
|
/* Can calculate the loop exit value, emit insns after loop
|
5178 |
|
|
end to calculate this value into a temporary register in
|
5179 |
|
|
case it is needed later. */
|
5180 |
|
|
|
5181 |
|
|
tem = gen_reg_rtx (bl->biv->mode);
|
5182 |
|
|
record_base_value (REGNO (tem), bl->biv->add_val, 0);
|
5183 |
|
|
loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
|
5184 |
|
|
bl->initial_value, tem);
|
5185 |
|
|
|
5186 |
|
|
if (loop_dump_stream)
|
5187 |
|
|
fprintf (loop_dump_stream,
|
5188 |
|
|
"Final biv value for %d, calculated.\n", bl->regno);
|
5189 |
|
|
|
5190 |
|
|
return tem;
|
5191 |
|
|
}
|
5192 |
|
|
}
|
5193 |
|
|
|
5194 |
|
|
/* Check to see if the biv is dead at all loop exits. */
|
5195 |
|
|
if (reg_dead_after_loop (loop, bl->biv->src_reg))
|
5196 |
|
|
{
|
5197 |
|
|
if (loop_dump_stream)
|
5198 |
|
|
fprintf (loop_dump_stream,
|
5199 |
|
|
"Final biv value for %d, biv dead after loop exit.\n",
|
5200 |
|
|
bl->regno);
|
5201 |
|
|
|
5202 |
|
|
return const0_rtx;
|
5203 |
|
|
}
|
5204 |
|
|
|
5205 |
|
|
return 0;
|
5206 |
|
|
}
|
5207 |
|
|
|
5208 |
|
|
/* Return nonzero if it is possible to eliminate the biv BL provided
|
5209 |
|
|
all givs are reduced. This is possible if either the reg is not
|
5210 |
|
|
used outside the loop, or we can compute what its final value will
|
5211 |
|
|
be. */
|
5212 |
|
|
|
5213 |
|
|
static int
|
5214 |
|
|
loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
|
5215 |
|
|
int threshold, int insn_count)
|
5216 |
|
|
{
|
5217 |
|
|
/* For architectures with a decrement_and_branch_until_zero insn,
|
5218 |
|
|
don't do this if we put a REG_NONNEG note on the endtest for this
|
5219 |
|
|
biv. */
|
5220 |
|
|
|
5221 |
|
|
#ifdef HAVE_decrement_and_branch_until_zero
|
5222 |
|
|
if (bl->nonneg)
|
5223 |
|
|
{
|
5224 |
|
|
if (loop_dump_stream)
|
5225 |
|
|
fprintf (loop_dump_stream,
|
5226 |
|
|
"Cannot eliminate nonneg biv %d.\n", bl->regno);
|
5227 |
|
|
return 0;
|
5228 |
|
|
}
|
5229 |
|
|
#endif
|
5230 |
|
|
|
5231 |
|
|
/* Check that biv is used outside loop or if it has a final value.
|
5232 |
|
|
Compare against bl->init_insn rather than loop->start. We aren't
|
5233 |
|
|
concerned with any uses of the biv between init_insn and
|
5234 |
|
|
loop->start since these won't be affected by the value of the biv
|
5235 |
|
|
elsewhere in the function, so long as init_insn doesn't use the
|
5236 |
|
|
biv itself. */
|
5237 |
|
|
|
5238 |
|
|
if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
|
5239 |
|
|
&& bl->init_insn
|
5240 |
|
|
&& INSN_UID (bl->init_insn) < max_uid_for_loop
|
5241 |
|
|
&& REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
|
5242 |
|
|
&& ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
|
5243 |
|
|
|| (bl->final_value = final_biv_value (loop, bl)))
|
5244 |
|
|
return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
|
5245 |
|
|
|
5246 |
|
|
if (loop_dump_stream)
|
5247 |
|
|
{
|
5248 |
|
|
fprintf (loop_dump_stream,
|
5249 |
|
|
"Cannot eliminate biv %d.\n",
|
5250 |
|
|
bl->regno);
|
5251 |
|
|
fprintf (loop_dump_stream,
|
5252 |
|
|
"First use: insn %d, last use: insn %d.\n",
|
5253 |
|
|
REGNO_FIRST_UID (bl->regno),
|
5254 |
|
|
REGNO_LAST_UID (bl->regno));
|
5255 |
|
|
}
|
5256 |
|
|
return 0;
|
5257 |
|
|
}
|
5258 |
|
|
|
5259 |
|
|
|
5260 |
|
|
/* Reduce each giv of BL that we have decided to reduce. */
|
5261 |
|
|
|
5262 |
|
|
static void
|
5263 |
|
|
loop_givs_reduce (struct loop *loop, struct iv_class *bl)
|
5264 |
|
|
{
|
5265 |
|
|
struct induction *v;
|
5266 |
|
|
|
5267 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
5268 |
|
|
{
|
5269 |
|
|
struct induction *tv;
|
5270 |
|
|
if (! v->ignore && v->same == 0)
|
5271 |
|
|
{
|
5272 |
|
|
int auto_inc_opt = 0;
|
5273 |
|
|
|
5274 |
|
|
/* If the code for derived givs immediately below has already
|
5275 |
|
|
allocated a new_reg, we must keep it. */
|
5276 |
|
|
if (! v->new_reg)
|
5277 |
|
|
v->new_reg = gen_reg_rtx (v->mode);
|
5278 |
|
|
|
5279 |
|
|
#ifdef AUTO_INC_DEC
|
5280 |
|
|
/* If the target has auto-increment addressing modes, and
|
5281 |
|
|
this is an address giv, then try to put the increment
|
5282 |
|
|
immediately after its use, so that flow can create an
|
5283 |
|
|
auto-increment addressing mode. */
|
5284 |
|
|
/* Don't do this for loops entered at the bottom, to avoid
|
5285 |
|
|
this invalid transformation:
|
5286 |
|
|
jmp L; -> jmp L;
|
5287 |
|
|
TOP: TOP:
|
5288 |
|
|
use giv use giv
|
5289 |
|
|
L: inc giv
|
5290 |
|
|
inc biv L:
|
5291 |
|
|
test biv test giv
|
5292 |
|
|
cbr TOP cbr TOP
|
5293 |
|
|
*/
|
5294 |
|
|
if (v->giv_type == DEST_ADDR && bl->biv_count == 1
|
5295 |
|
|
&& bl->biv->always_executed && ! bl->biv->maybe_multiple
|
5296 |
|
|
/* We don't handle reversed biv's because bl->biv->insn
|
5297 |
|
|
does not have a valid INSN_LUID. */
|
5298 |
|
|
&& ! bl->reversed
|
5299 |
|
|
&& v->always_executed && ! v->maybe_multiple
|
5300 |
|
|
&& INSN_UID (v->insn) < max_uid_for_loop
|
5301 |
|
|
&& !loop->top)
|
5302 |
|
|
{
|
5303 |
|
|
/* If other giv's have been combined with this one, then
|
5304 |
|
|
this will work only if all uses of the other giv's occur
|
5305 |
|
|
before this giv's insn. This is difficult to check.
|
5306 |
|
|
|
5307 |
|
|
We simplify this by looking for the common case where
|
5308 |
|
|
there is one DEST_REG giv, and this giv's insn is the
|
5309 |
|
|
last use of the dest_reg of that DEST_REG giv. If the
|
5310 |
|
|
increment occurs after the address giv, then we can
|
5311 |
|
|
perform the optimization. (Otherwise, the increment
|
5312 |
|
|
would have to go before other_giv, and we would not be
|
5313 |
|
|
able to combine it with the address giv to get an
|
5314 |
|
|
auto-inc address.) */
|
5315 |
|
|
if (v->combined_with)
|
5316 |
|
|
{
|
5317 |
|
|
struct induction *other_giv = 0;
|
5318 |
|
|
|
5319 |
|
|
for (tv = bl->giv; tv; tv = tv->next_iv)
|
5320 |
|
|
if (tv->same == v)
|
5321 |
|
|
{
|
5322 |
|
|
if (other_giv)
|
5323 |
|
|
break;
|
5324 |
|
|
else
|
5325 |
|
|
other_giv = tv;
|
5326 |
|
|
}
|
5327 |
|
|
if (! tv && other_giv
|
5328 |
|
|
&& REGNO (other_giv->dest_reg) < max_reg_before_loop
|
5329 |
|
|
&& (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
|
5330 |
|
|
== INSN_UID (v->insn))
|
5331 |
|
|
&& INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
|
5332 |
|
|
auto_inc_opt = 1;
|
5333 |
|
|
}
|
5334 |
|
|
/* Check for case where increment is before the address
|
5335 |
|
|
giv. Do this test in "loop order". */
|
5336 |
|
|
else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
|
5337 |
|
|
&& (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
|
5338 |
|
|
|| (INSN_LUID (bl->biv->insn)
|
5339 |
|
|
> INSN_LUID (loop->scan_start))))
|
5340 |
|
|
|| (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
|
5341 |
|
|
&& (INSN_LUID (loop->scan_start)
|
5342 |
|
|
< INSN_LUID (bl->biv->insn))))
|
5343 |
|
|
auto_inc_opt = -1;
|
5344 |
|
|
else
|
5345 |
|
|
auto_inc_opt = 1;
|
5346 |
|
|
|
5347 |
|
|
#ifdef HAVE_cc0
|
5348 |
|
|
{
|
5349 |
|
|
rtx prev;
|
5350 |
|
|
|
5351 |
|
|
/* We can't put an insn immediately after one setting
|
5352 |
|
|
cc0, or immediately before one using cc0. */
|
5353 |
|
|
if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
|
5354 |
|
|
|| (auto_inc_opt == -1
|
5355 |
|
|
&& (prev = prev_nonnote_insn (v->insn)) != 0
|
5356 |
|
|
&& INSN_P (prev)
|
5357 |
|
|
&& sets_cc0_p (PATTERN (prev))))
|
5358 |
|
|
auto_inc_opt = 0;
|
5359 |
|
|
}
|
5360 |
|
|
#endif
|
5361 |
|
|
|
5362 |
|
|
if (auto_inc_opt)
|
5363 |
|
|
v->auto_inc_opt = 1;
|
5364 |
|
|
}
|
5365 |
|
|
#endif
|
5366 |
|
|
|
5367 |
|
|
/* For each place where the biv is incremented, add an insn
|
5368 |
|
|
to increment the new, reduced reg for the giv. */
|
5369 |
|
|
for (tv = bl->biv; tv; tv = tv->next_iv)
|
5370 |
|
|
{
|
5371 |
|
|
rtx insert_before;
|
5372 |
|
|
|
5373 |
|
|
/* Skip if location is the same as a previous one. */
|
5374 |
|
|
if (tv->same)
|
5375 |
|
|
continue;
|
5376 |
|
|
if (! auto_inc_opt)
|
5377 |
|
|
insert_before = NEXT_INSN (tv->insn);
|
5378 |
|
|
else if (auto_inc_opt == 1)
|
5379 |
|
|
insert_before = NEXT_INSN (v->insn);
|
5380 |
|
|
else
|
5381 |
|
|
insert_before = v->insn;
|
5382 |
|
|
|
5383 |
|
|
if (tv->mult_val == const1_rtx)
|
5384 |
|
|
loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
|
5385 |
|
|
v->new_reg, v->new_reg,
|
5386 |
|
|
0, insert_before);
|
5387 |
|
|
else /* tv->mult_val == const0_rtx */
|
5388 |
|
|
/* A multiply is acceptable here
|
5389 |
|
|
since this is presumed to be seldom executed. */
|
5390 |
|
|
loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
|
5391 |
|
|
v->add_val, v->new_reg,
|
5392 |
|
|
0, insert_before);
|
5393 |
|
|
}
|
5394 |
|
|
|
5395 |
|
|
/* Add code at loop start to initialize giv's reduced reg. */
|
5396 |
|
|
|
5397 |
|
|
loop_iv_add_mult_hoist (loop,
|
5398 |
|
|
extend_value_for_giv (v, bl->initial_value),
|
5399 |
|
|
v->mult_val, v->add_val, v->new_reg);
|
5400 |
|
|
}
|
5401 |
|
|
}
|
5402 |
|
|
}
|
5403 |
|
|
|
5404 |
|
|
|
5405 |
|
|
/* Check for givs whose first use is their definition and whose
|
5406 |
|
|
last use is the definition of another giv. If so, it is likely
|
5407 |
|
|
dead and should not be used to derive another giv nor to
|
5408 |
|
|
eliminate a biv. */
|
5409 |
|
|
|
5410 |
|
|
static void
|
5411 |
|
|
loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
|
5412 |
|
|
{
|
5413 |
|
|
struct induction *v;
|
5414 |
|
|
|
5415 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
5416 |
|
|
{
|
5417 |
|
|
if (v->ignore
|
5418 |
|
|
|| (v->same && v->same->ignore))
|
5419 |
|
|
continue;
|
5420 |
|
|
|
5421 |
|
|
if (v->giv_type == DEST_REG
|
5422 |
|
|
&& REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
|
5423 |
|
|
{
|
5424 |
|
|
struct induction *v1;
|
5425 |
|
|
|
5426 |
|
|
for (v1 = bl->giv; v1; v1 = v1->next_iv)
|
5427 |
|
|
if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
|
5428 |
|
|
v->maybe_dead = 1;
|
5429 |
|
|
}
|
5430 |
|
|
}
|
5431 |
|
|
}
|
5432 |
|
|
|
5433 |
|
|
|
5434 |
|
|
static void
|
5435 |
|
|
loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
|
5436 |
|
|
{
|
5437 |
|
|
struct induction *v;
|
5438 |
|
|
|
5439 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
5440 |
|
|
{
|
5441 |
|
|
if (v->same && v->same->ignore)
|
5442 |
|
|
v->ignore = 1;
|
5443 |
|
|
|
5444 |
|
|
if (v->ignore)
|
5445 |
|
|
continue;
|
5446 |
|
|
|
5447 |
|
|
/* Update expression if this was combined, in case other giv was
|
5448 |
|
|
replaced. */
|
5449 |
|
|
if (v->same)
|
5450 |
|
|
v->new_reg = replace_rtx (v->new_reg,
|
5451 |
|
|
v->same->dest_reg, v->same->new_reg);
|
5452 |
|
|
|
5453 |
|
|
/* See if this register is known to be a pointer to something. If
|
5454 |
|
|
so, see if we can find the alignment. First see if there is a
|
5455 |
|
|
destination register that is a pointer. If so, this shares the
|
5456 |
|
|
alignment too. Next see if we can deduce anything from the
|
5457 |
|
|
computational information. If not, and this is a DEST_ADDR
|
5458 |
|
|
giv, at least we know that it's a pointer, though we don't know
|
5459 |
|
|
the alignment. */
|
5460 |
|
|
if (REG_P (v->new_reg)
|
5461 |
|
|
&& v->giv_type == DEST_REG
|
5462 |
|
|
&& REG_POINTER (v->dest_reg))
|
5463 |
|
|
mark_reg_pointer (v->new_reg,
|
5464 |
|
|
REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
|
5465 |
|
|
else if (REG_P (v->new_reg)
|
5466 |
|
|
&& REG_POINTER (v->src_reg))
|
5467 |
|
|
{
|
5468 |
|
|
unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
|
5469 |
|
|
|
5470 |
|
|
if (align == 0
|
5471 |
|
|
|| GET_CODE (v->add_val) != CONST_INT
|
5472 |
|
|
|| INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
|
5473 |
|
|
align = 0;
|
5474 |
|
|
|
5475 |
|
|
mark_reg_pointer (v->new_reg, align);
|
5476 |
|
|
}
|
5477 |
|
|
else if (REG_P (v->new_reg)
|
5478 |
|
|
&& REG_P (v->add_val)
|
5479 |
|
|
&& REG_POINTER (v->add_val))
|
5480 |
|
|
{
|
5481 |
|
|
unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
|
5482 |
|
|
|
5483 |
|
|
if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
|
5484 |
|
|
|| INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
|
5485 |
|
|
align = 0;
|
5486 |
|
|
|
5487 |
|
|
mark_reg_pointer (v->new_reg, align);
|
5488 |
|
|
}
|
5489 |
|
|
else if (REG_P (v->new_reg) && v->giv_type == DEST_ADDR)
|
5490 |
|
|
mark_reg_pointer (v->new_reg, 0);
|
5491 |
|
|
|
5492 |
|
|
if (v->giv_type == DEST_ADDR)
|
5493 |
|
|
{
|
5494 |
|
|
/* Store reduced reg as the address in the memref where we found
|
5495 |
|
|
this giv. */
|
5496 |
|
|
if (validate_change_maybe_volatile (v->insn, v->location,
|
5497 |
|
|
v->new_reg))
|
5498 |
|
|
/* Yay, it worked! */;
|
5499 |
|
|
/* Not replaceable; emit an insn to set the original
|
5500 |
|
|
giv reg from the reduced giv. */
|
5501 |
|
|
else if (REG_P (*v->location))
|
5502 |
|
|
{
|
5503 |
|
|
rtx tem;
|
5504 |
|
|
start_sequence ();
|
5505 |
|
|
tem = force_operand (v->new_reg, *v->location);
|
5506 |
|
|
if (tem != *v->location)
|
5507 |
|
|
emit_move_insn (*v->location, tem);
|
5508 |
|
|
tem = get_insns ();
|
5509 |
|
|
end_sequence ();
|
5510 |
|
|
loop_insn_emit_before (loop, 0, v->insn, tem);
|
5511 |
|
|
}
|
5512 |
|
|
else if (GET_CODE (*v->location) == PLUS
|
5513 |
|
|
&& REG_P (XEXP (*v->location, 0))
|
5514 |
|
|
&& CONSTANT_P (XEXP (*v->location, 1)))
|
5515 |
|
|
{
|
5516 |
|
|
rtx tem;
|
5517 |
|
|
start_sequence ();
|
5518 |
|
|
tem = expand_simple_binop (GET_MODE (*v->location), MINUS,
|
5519 |
|
|
v->new_reg, XEXP (*v->location, 1),
|
5520 |
|
|
NULL_RTX, 0, OPTAB_LIB_WIDEN);
|
5521 |
|
|
emit_move_insn (XEXP (*v->location, 0), tem);
|
5522 |
|
|
tem = get_insns ();
|
5523 |
|
|
end_sequence ();
|
5524 |
|
|
loop_insn_emit_before (loop, 0, v->insn, tem);
|
5525 |
|
|
}
|
5526 |
|
|
else
|
5527 |
|
|
{
|
5528 |
|
|
/* If it wasn't a reg, create a pseudo and use that. */
|
5529 |
|
|
rtx reg, seq;
|
5530 |
|
|
start_sequence ();
|
5531 |
|
|
reg = force_reg (v->mode, *v->location);
|
5532 |
|
|
if (validate_change_maybe_volatile (v->insn, v->location, reg))
|
5533 |
|
|
{
|
5534 |
|
|
seq = get_insns ();
|
5535 |
|
|
end_sequence ();
|
5536 |
|
|
loop_insn_emit_before (loop, 0, v->insn, seq);
|
5537 |
|
|
}
|
5538 |
|
|
else
|
5539 |
|
|
{
|
5540 |
|
|
end_sequence ();
|
5541 |
|
|
if (loop_dump_stream)
|
5542 |
|
|
fprintf (loop_dump_stream,
|
5543 |
|
|
"unable to reduce iv in insn %d\n",
|
5544 |
|
|
INSN_UID (v->insn));
|
5545 |
|
|
bl->all_reduced = 0;
|
5546 |
|
|
v->ignore = 1;
|
5547 |
|
|
continue;
|
5548 |
|
|
}
|
5549 |
|
|
}
|
5550 |
|
|
}
|
5551 |
|
|
else if (v->replaceable)
|
5552 |
|
|
{
|
5553 |
|
|
reg_map[REGNO (v->dest_reg)] = v->new_reg;
|
5554 |
|
|
}
|
5555 |
|
|
else
|
5556 |
|
|
{
|
5557 |
|
|
rtx original_insn = v->insn;
|
5558 |
|
|
rtx note;
|
5559 |
|
|
|
5560 |
|
|
/* Not replaceable; emit an insn to set the original giv reg from
|
5561 |
|
|
the reduced giv, same as above. */
|
5562 |
|
|
v->insn = loop_insn_emit_after (loop, 0, original_insn,
|
5563 |
|
|
gen_move_insn (v->dest_reg,
|
5564 |
|
|
v->new_reg));
|
5565 |
|
|
|
5566 |
|
|
/* The original insn may have a REG_EQUAL note. This note is
|
5567 |
|
|
now incorrect and may result in invalid substitutions later.
|
5568 |
|
|
The original insn is dead, but may be part of a libcall
|
5569 |
|
|
sequence, which doesn't seem worth the bother of handling. */
|
5570 |
|
|
note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
|
5571 |
|
|
if (note)
|
5572 |
|
|
remove_note (original_insn, note);
|
5573 |
|
|
}
|
5574 |
|
|
|
5575 |
|
|
/* When a loop is reversed, givs which depend on the reversed
|
5576 |
|
|
biv, and which are live outside the loop, must be set to their
|
5577 |
|
|
correct final value. This insn is only needed if the giv is
|
5578 |
|
|
not replaceable. The correct final value is the same as the
|
5579 |
|
|
value that the giv starts the reversed loop with. */
|
5580 |
|
|
if (bl->reversed && ! v->replaceable)
|
5581 |
|
|
loop_iv_add_mult_sink (loop,
|
5582 |
|
|
extend_value_for_giv (v, bl->initial_value),
|
5583 |
|
|
v->mult_val, v->add_val, v->dest_reg);
|
5584 |
|
|
else if (v->final_value)
|
5585 |
|
|
loop_insn_sink_or_swim (loop,
|
5586 |
|
|
gen_load_of_final_value (v->dest_reg,
|
5587 |
|
|
v->final_value));
|
5588 |
|
|
|
5589 |
|
|
if (loop_dump_stream)
|
5590 |
|
|
{
|
5591 |
|
|
fprintf (loop_dump_stream, "giv at %d reduced to ",
|
5592 |
|
|
INSN_UID (v->insn));
|
5593 |
|
|
print_simple_rtl (loop_dump_stream, v->new_reg);
|
5594 |
|
|
fprintf (loop_dump_stream, "\n");
|
5595 |
|
|
}
|
5596 |
|
|
}
|
5597 |
|
|
}
|
5598 |
|
|
|
5599 |
|
|
|
5600 |
|
|
static int
|
5601 |
|
|
loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
|
5602 |
|
|
struct iv_class *bl, struct induction *v,
|
5603 |
|
|
rtx test_reg)
|
5604 |
|
|
{
|
5605 |
|
|
int add_cost;
|
5606 |
|
|
int benefit;
|
5607 |
|
|
|
5608 |
|
|
benefit = v->benefit;
|
5609 |
|
|
PUT_MODE (test_reg, v->mode);
|
5610 |
|
|
add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
|
5611 |
|
|
test_reg, test_reg);
|
5612 |
|
|
|
5613 |
|
|
/* Reduce benefit if not replaceable, since we will insert a
|
5614 |
|
|
move-insn to replace the insn that calculates this giv. Don't do
|
5615 |
|
|
this unless the giv is a user variable, since it will often be
|
5616 |
|
|
marked non-replaceable because of the duplication of the exit
|
5617 |
|
|
code outside the loop. In such a case, the copies we insert are
|
5618 |
|
|
dead and will be deleted. So they don't have a cost. Similar
|
5619 |
|
|
situations exist. */
|
5620 |
|
|
/* ??? The new final_[bg]iv_value code does a much better job of
|
5621 |
|
|
finding replaceable giv's, and hence this code may no longer be
|
5622 |
|
|
necessary. */
|
5623 |
|
|
if (! v->replaceable && ! bl->eliminable
|
5624 |
|
|
&& REG_USERVAR_P (v->dest_reg))
|
5625 |
|
|
benefit -= copy_cost;
|
5626 |
|
|
|
5627 |
|
|
/* Decrease the benefit to count the add-insns that we will insert
|
5628 |
|
|
to increment the reduced reg for the giv. ??? This can
|
5629 |
|
|
overestimate the run-time cost of the additional insns, e.g. if
|
5630 |
|
|
there are multiple basic blocks that increment the biv, but only
|
5631 |
|
|
one of these blocks is executed during each iteration. There is
|
5632 |
|
|
no good way to detect cases like this with the current structure
|
5633 |
|
|
of the loop optimizer. This code is more accurate for
|
5634 |
|
|
determining code size than run-time benefits. */
|
5635 |
|
|
benefit -= add_cost * bl->biv_count;
|
5636 |
|
|
|
5637 |
|
|
/* Decide whether to strength-reduce this giv or to leave the code
|
5638 |
|
|
unchanged (recompute it from the biv each time it is used). This
|
5639 |
|
|
decision can be made independently for each giv. */
|
5640 |
|
|
|
5641 |
|
|
#ifdef AUTO_INC_DEC
|
5642 |
|
|
/* Attempt to guess whether autoincrement will handle some of the
|
5643 |
|
|
new add insns; if so, increase BENEFIT (undo the subtraction of
|
5644 |
|
|
add_cost that was done above). */
|
5645 |
|
|
if (v->giv_type == DEST_ADDR
|
5646 |
|
|
/* Increasing the benefit is risky, since this is only a guess.
|
5647 |
|
|
Avoid increasing register pressure in cases where there would
|
5648 |
|
|
be no other benefit from reducing this giv. */
|
5649 |
|
|
&& benefit > 0
|
5650 |
|
|
&& GET_CODE (v->mult_val) == CONST_INT)
|
5651 |
|
|
{
|
5652 |
|
|
int size = GET_MODE_SIZE (GET_MODE (v->mem));
|
5653 |
|
|
|
5654 |
|
|
if (HAVE_POST_INCREMENT
|
5655 |
|
|
&& INTVAL (v->mult_val) == size)
|
5656 |
|
|
benefit += add_cost * bl->biv_count;
|
5657 |
|
|
else if (HAVE_PRE_INCREMENT
|
5658 |
|
|
&& INTVAL (v->mult_val) == size)
|
5659 |
|
|
benefit += add_cost * bl->biv_count;
|
5660 |
|
|
else if (HAVE_POST_DECREMENT
|
5661 |
|
|
&& -INTVAL (v->mult_val) == size)
|
5662 |
|
|
benefit += add_cost * bl->biv_count;
|
5663 |
|
|
else if (HAVE_PRE_DECREMENT
|
5664 |
|
|
&& -INTVAL (v->mult_val) == size)
|
5665 |
|
|
benefit += add_cost * bl->biv_count;
|
5666 |
|
|
}
|
5667 |
|
|
#endif
|
5668 |
|
|
|
5669 |
|
|
return benefit;
|
5670 |
|
|
}
|
5671 |
|
|
|
5672 |
|
|
|
5673 |
|
|
/* Free IV structures for LOOP. */
|
5674 |
|
|
|
5675 |
|
|
static void
|
5676 |
|
|
loop_ivs_free (struct loop *loop)
|
5677 |
|
|
{
|
5678 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
5679 |
|
|
struct iv_class *iv = ivs->list;
|
5680 |
|
|
|
5681 |
|
|
free (ivs->regs);
|
5682 |
|
|
|
5683 |
|
|
while (iv)
|
5684 |
|
|
{
|
5685 |
|
|
struct iv_class *next = iv->next;
|
5686 |
|
|
struct induction *induction;
|
5687 |
|
|
struct induction *next_induction;
|
5688 |
|
|
|
5689 |
|
|
for (induction = iv->biv; induction; induction = next_induction)
|
5690 |
|
|
{
|
5691 |
|
|
next_induction = induction->next_iv;
|
5692 |
|
|
free (induction);
|
5693 |
|
|
}
|
5694 |
|
|
for (induction = iv->giv; induction; induction = next_induction)
|
5695 |
|
|
{
|
5696 |
|
|
next_induction = induction->next_iv;
|
5697 |
|
|
free (induction);
|
5698 |
|
|
}
|
5699 |
|
|
|
5700 |
|
|
free (iv);
|
5701 |
|
|
iv = next;
|
5702 |
|
|
}
|
5703 |
|
|
}
|
5704 |
|
|
|
5705 |
|
|
/* Look back before LOOP->START for the insn that sets REG and return
|
5706 |
|
|
the equivalent constant if there is a REG_EQUAL note otherwise just
|
5707 |
|
|
the SET_SRC of REG. */
|
5708 |
|
|
|
5709 |
|
|
static rtx
|
5710 |
|
|
loop_find_equiv_value (const struct loop *loop, rtx reg)
|
5711 |
|
|
{
|
5712 |
|
|
rtx loop_start = loop->start;
|
5713 |
|
|
rtx insn, set;
|
5714 |
|
|
rtx ret;
|
5715 |
|
|
|
5716 |
|
|
ret = reg;
|
5717 |
|
|
for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
|
5718 |
|
|
{
|
5719 |
|
|
if (LABEL_P (insn))
|
5720 |
|
|
break;
|
5721 |
|
|
|
5722 |
|
|
else if (INSN_P (insn) && reg_set_p (reg, insn))
|
5723 |
|
|
{
|
5724 |
|
|
/* We found the last insn before the loop that sets the register.
|
5725 |
|
|
If it sets the entire register, and has a REG_EQUAL note,
|
5726 |
|
|
then use the value of the REG_EQUAL note. */
|
5727 |
|
|
if ((set = single_set (insn))
|
5728 |
|
|
&& (SET_DEST (set) == reg))
|
5729 |
|
|
{
|
5730 |
|
|
rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
|
5731 |
|
|
|
5732 |
|
|
/* Only use the REG_EQUAL note if it is a constant.
|
5733 |
|
|
Other things, divide in particular, will cause
|
5734 |
|
|
problems later if we use them. */
|
5735 |
|
|
if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
|
5736 |
|
|
&& CONSTANT_P (XEXP (note, 0)))
|
5737 |
|
|
ret = XEXP (note, 0);
|
5738 |
|
|
else
|
5739 |
|
|
ret = SET_SRC (set);
|
5740 |
|
|
|
5741 |
|
|
/* We cannot do this if it changes between the
|
5742 |
|
|
assignment and loop start though. */
|
5743 |
|
|
if (modified_between_p (ret, insn, loop_start))
|
5744 |
|
|
ret = reg;
|
5745 |
|
|
}
|
5746 |
|
|
break;
|
5747 |
|
|
}
|
5748 |
|
|
}
|
5749 |
|
|
return ret;
|
5750 |
|
|
}
|
5751 |
|
|
|
5752 |
|
|
/* Find and return register term common to both expressions OP0 and
|
5753 |
|
|
OP1 or NULL_RTX if no such term exists. Each expression must be a
|
5754 |
|
|
REG or a PLUS of a REG. */
|
5755 |
|
|
|
5756 |
|
|
static rtx
|
5757 |
|
|
find_common_reg_term (rtx op0, rtx op1)
|
5758 |
|
|
{
|
5759 |
|
|
if ((REG_P (op0) || GET_CODE (op0) == PLUS)
|
5760 |
|
|
&& (REG_P (op1) || GET_CODE (op1) == PLUS))
|
5761 |
|
|
{
|
5762 |
|
|
rtx op00;
|
5763 |
|
|
rtx op01;
|
5764 |
|
|
rtx op10;
|
5765 |
|
|
rtx op11;
|
5766 |
|
|
|
5767 |
|
|
if (GET_CODE (op0) == PLUS)
|
5768 |
|
|
op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
|
5769 |
|
|
else
|
5770 |
|
|
op01 = const0_rtx, op00 = op0;
|
5771 |
|
|
|
5772 |
|
|
if (GET_CODE (op1) == PLUS)
|
5773 |
|
|
op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
|
5774 |
|
|
else
|
5775 |
|
|
op11 = const0_rtx, op10 = op1;
|
5776 |
|
|
|
5777 |
|
|
/* Find and return common register term if present. */
|
5778 |
|
|
if (REG_P (op00) && (op00 == op10 || op00 == op11))
|
5779 |
|
|
return op00;
|
5780 |
|
|
else if (REG_P (op01) && (op01 == op10 || op01 == op11))
|
5781 |
|
|
return op01;
|
5782 |
|
|
}
|
5783 |
|
|
|
5784 |
|
|
/* No common register term found. */
|
5785 |
|
|
return NULL_RTX;
|
5786 |
|
|
}
|
5787 |
|
|
|
5788 |
|
|
/* Determine the loop iterator and calculate the number of loop
|
5789 |
|
|
iterations. Returns the exact number of loop iterations if it can
|
5790 |
|
|
be calculated, otherwise returns zero. */
|
5791 |
|
|
|
5792 |
|
|
static unsigned HOST_WIDE_INT
|
5793 |
|
|
loop_iterations (struct loop *loop)
|
5794 |
|
|
{
|
5795 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
5796 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
5797 |
|
|
rtx comparison, comparison_value;
|
5798 |
|
|
rtx iteration_var, initial_value, increment, final_value;
|
5799 |
|
|
enum rtx_code comparison_code;
|
5800 |
|
|
HOST_WIDE_INT inc;
|
5801 |
|
|
unsigned HOST_WIDE_INT abs_inc;
|
5802 |
|
|
unsigned HOST_WIDE_INT abs_diff;
|
5803 |
|
|
int off_by_one;
|
5804 |
|
|
int increment_dir;
|
5805 |
|
|
int unsigned_p, compare_dir, final_larger;
|
5806 |
|
|
rtx last_loop_insn;
|
5807 |
|
|
struct iv_class *bl;
|
5808 |
|
|
|
5809 |
|
|
loop_info->n_iterations = 0;
|
5810 |
|
|
loop_info->initial_value = 0;
|
5811 |
|
|
loop_info->initial_equiv_value = 0;
|
5812 |
|
|
loop_info->comparison_value = 0;
|
5813 |
|
|
loop_info->final_value = 0;
|
5814 |
|
|
loop_info->final_equiv_value = 0;
|
5815 |
|
|
loop_info->increment = 0;
|
5816 |
|
|
loop_info->iteration_var = 0;
|
5817 |
|
|
loop_info->iv = 0;
|
5818 |
|
|
|
5819 |
|
|
/* We used to use prev_nonnote_insn here, but that fails because it might
|
5820 |
|
|
accidentally get the branch for a contained loop if the branch for this
|
5821 |
|
|
loop was deleted. We can only trust branches immediately before the
|
5822 |
|
|
loop_end. */
|
5823 |
|
|
last_loop_insn = PREV_INSN (loop->end);
|
5824 |
|
|
|
5825 |
|
|
/* ??? We should probably try harder to find the jump insn
|
5826 |
|
|
at the end of the loop. The following code assumes that
|
5827 |
|
|
the last loop insn is a jump to the top of the loop. */
|
5828 |
|
|
if (!JUMP_P (last_loop_insn))
|
5829 |
|
|
{
|
5830 |
|
|
if (loop_dump_stream)
|
5831 |
|
|
fprintf (loop_dump_stream,
|
5832 |
|
|
"Loop iterations: No final conditional branch found.\n");
|
5833 |
|
|
return 0;
|
5834 |
|
|
}
|
5835 |
|
|
|
5836 |
|
|
/* If there is a more than a single jump to the top of the loop
|
5837 |
|
|
we cannot (easily) determine the iteration count. */
|
5838 |
|
|
if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
|
5839 |
|
|
{
|
5840 |
|
|
if (loop_dump_stream)
|
5841 |
|
|
fprintf (loop_dump_stream,
|
5842 |
|
|
"Loop iterations: Loop has multiple back edges.\n");
|
5843 |
|
|
return 0;
|
5844 |
|
|
}
|
5845 |
|
|
|
5846 |
|
|
/* Find the iteration variable. If the last insn is a conditional
|
5847 |
|
|
branch, and the insn before tests a register value, make that the
|
5848 |
|
|
iteration variable. */
|
5849 |
|
|
|
5850 |
|
|
comparison = get_condition_for_loop (loop, last_loop_insn);
|
5851 |
|
|
if (comparison == 0)
|
5852 |
|
|
{
|
5853 |
|
|
if (loop_dump_stream)
|
5854 |
|
|
fprintf (loop_dump_stream,
|
5855 |
|
|
"Loop iterations: No final comparison found.\n");
|
5856 |
|
|
return 0;
|
5857 |
|
|
}
|
5858 |
|
|
|
5859 |
|
|
/* ??? Get_condition may switch position of induction variable and
|
5860 |
|
|
invariant register when it canonicalizes the comparison. */
|
5861 |
|
|
|
5862 |
|
|
comparison_code = GET_CODE (comparison);
|
5863 |
|
|
iteration_var = XEXP (comparison, 0);
|
5864 |
|
|
comparison_value = XEXP (comparison, 1);
|
5865 |
|
|
|
5866 |
|
|
if (!REG_P (iteration_var))
|
5867 |
|
|
{
|
5868 |
|
|
if (loop_dump_stream)
|
5869 |
|
|
fprintf (loop_dump_stream,
|
5870 |
|
|
"Loop iterations: Comparison not against register.\n");
|
5871 |
|
|
return 0;
|
5872 |
|
|
}
|
5873 |
|
|
|
5874 |
|
|
/* The only new registers that are created before loop iterations
|
5875 |
|
|
are givs made from biv increments or registers created by
|
5876 |
|
|
load_mems. In the latter case, it is possible that try_copy_prop
|
5877 |
|
|
will propagate a new pseudo into the old iteration register but
|
5878 |
|
|
this will be marked by having the REG_USERVAR_P bit set. */
|
5879 |
|
|
|
5880 |
|
|
gcc_assert ((unsigned) REGNO (iteration_var) < ivs->n_regs
|
5881 |
|
|
|| REG_USERVAR_P (iteration_var));
|
5882 |
|
|
|
5883 |
|
|
/* Determine the initial value of the iteration variable, and the amount
|
5884 |
|
|
that it is incremented each loop. Use the tables constructed by
|
5885 |
|
|
the strength reduction pass to calculate these values. */
|
5886 |
|
|
|
5887 |
|
|
/* Clear the result values, in case no answer can be found. */
|
5888 |
|
|
initial_value = 0;
|
5889 |
|
|
increment = 0;
|
5890 |
|
|
|
5891 |
|
|
/* The iteration variable can be either a giv or a biv. Check to see
|
5892 |
|
|
which it is, and compute the variable's initial value, and increment
|
5893 |
|
|
value if possible. */
|
5894 |
|
|
|
5895 |
|
|
/* If this is a new register, can't handle it since we don't have any
|
5896 |
|
|
reg_iv_type entry for it. */
|
5897 |
|
|
if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
|
5898 |
|
|
{
|
5899 |
|
|
if (loop_dump_stream)
|
5900 |
|
|
fprintf (loop_dump_stream,
|
5901 |
|
|
"Loop iterations: No reg_iv_type entry for iteration var.\n");
|
5902 |
|
|
return 0;
|
5903 |
|
|
}
|
5904 |
|
|
|
5905 |
|
|
/* Reject iteration variables larger than the host wide int size, since they
|
5906 |
|
|
could result in a number of iterations greater than the range of our
|
5907 |
|
|
`unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
|
5908 |
|
|
else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
|
5909 |
|
|
> HOST_BITS_PER_WIDE_INT))
|
5910 |
|
|
{
|
5911 |
|
|
if (loop_dump_stream)
|
5912 |
|
|
fprintf (loop_dump_stream,
|
5913 |
|
|
"Loop iterations: Iteration var rejected because mode too large.\n");
|
5914 |
|
|
return 0;
|
5915 |
|
|
}
|
5916 |
|
|
else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
|
5917 |
|
|
{
|
5918 |
|
|
if (loop_dump_stream)
|
5919 |
|
|
fprintf (loop_dump_stream,
|
5920 |
|
|
"Loop iterations: Iteration var not an integer.\n");
|
5921 |
|
|
return 0;
|
5922 |
|
|
}
|
5923 |
|
|
|
5924 |
|
|
/* Try swapping the comparison to identify a suitable iv. */
|
5925 |
|
|
if (REG_IV_TYPE (ivs, REGNO (iteration_var)) != BASIC_INDUCT
|
5926 |
|
|
&& REG_IV_TYPE (ivs, REGNO (iteration_var)) != GENERAL_INDUCT
|
5927 |
|
|
&& REG_P (comparison_value)
|
5928 |
|
|
&& REGNO (comparison_value) < ivs->n_regs)
|
5929 |
|
|
{
|
5930 |
|
|
rtx temp = comparison_value;
|
5931 |
|
|
comparison_code = swap_condition (comparison_code);
|
5932 |
|
|
comparison_value = iteration_var;
|
5933 |
|
|
iteration_var = temp;
|
5934 |
|
|
}
|
5935 |
|
|
|
5936 |
|
|
if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
|
5937 |
|
|
{
|
5938 |
|
|
gcc_assert (REGNO (iteration_var) < ivs->n_regs);
|
5939 |
|
|
|
5940 |
|
|
/* Grab initial value, only useful if it is a constant. */
|
5941 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
|
5942 |
|
|
initial_value = bl->initial_value;
|
5943 |
|
|
if (!bl->biv->always_executed || bl->biv->maybe_multiple)
|
5944 |
|
|
{
|
5945 |
|
|
if (loop_dump_stream)
|
5946 |
|
|
fprintf (loop_dump_stream,
|
5947 |
|
|
"Loop iterations: Basic induction var not set once in each iteration.\n");
|
5948 |
|
|
return 0;
|
5949 |
|
|
}
|
5950 |
|
|
|
5951 |
|
|
increment = biv_total_increment (bl);
|
5952 |
|
|
}
|
5953 |
|
|
else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
|
5954 |
|
|
{
|
5955 |
|
|
HOST_WIDE_INT offset = 0;
|
5956 |
|
|
struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
|
5957 |
|
|
rtx biv_initial_value;
|
5958 |
|
|
|
5959 |
|
|
gcc_assert (REGNO (v->src_reg) < ivs->n_regs);
|
5960 |
|
|
|
5961 |
|
|
if (!v->always_executed || v->maybe_multiple)
|
5962 |
|
|
{
|
5963 |
|
|
if (loop_dump_stream)
|
5964 |
|
|
fprintf (loop_dump_stream,
|
5965 |
|
|
"Loop iterations: General induction var not set once in each iteration.\n");
|
5966 |
|
|
return 0;
|
5967 |
|
|
}
|
5968 |
|
|
|
5969 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
|
5970 |
|
|
|
5971 |
|
|
/* Increment value is mult_val times the increment value of the biv. */
|
5972 |
|
|
|
5973 |
|
|
increment = biv_total_increment (bl);
|
5974 |
|
|
if (increment)
|
5975 |
|
|
{
|
5976 |
|
|
struct induction *biv_inc;
|
5977 |
|
|
|
5978 |
|
|
increment = fold_rtx_mult_add (v->mult_val,
|
5979 |
|
|
extend_value_for_giv (v, increment),
|
5980 |
|
|
const0_rtx, v->mode);
|
5981 |
|
|
/* The caller assumes that one full increment has occurred at the
|
5982 |
|
|
first loop test. But that's not true when the biv is incremented
|
5983 |
|
|
after the giv is set (which is the usual case), e.g.:
|
5984 |
|
|
i = 6; do {;} while (i++ < 9) .
|
5985 |
|
|
Therefore, we bias the initial value by subtracting the amount of
|
5986 |
|
|
the increment that occurs between the giv set and the giv test. */
|
5987 |
|
|
for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
|
5988 |
|
|
{
|
5989 |
|
|
if (loop_insn_first_p (v->insn, biv_inc->insn))
|
5990 |
|
|
{
|
5991 |
|
|
if (REG_P (biv_inc->add_val))
|
5992 |
|
|
{
|
5993 |
|
|
if (loop_dump_stream)
|
5994 |
|
|
fprintf (loop_dump_stream,
|
5995 |
|
|
"Loop iterations: Basic induction var add_val is REG %d.\n",
|
5996 |
|
|
REGNO (biv_inc->add_val));
|
5997 |
|
|
return 0;
|
5998 |
|
|
}
|
5999 |
|
|
|
6000 |
|
|
/* If we have already counted it, skip it. */
|
6001 |
|
|
if (biv_inc->same)
|
6002 |
|
|
continue;
|
6003 |
|
|
|
6004 |
|
|
offset -= INTVAL (biv_inc->add_val);
|
6005 |
|
|
}
|
6006 |
|
|
}
|
6007 |
|
|
}
|
6008 |
|
|
if (loop_dump_stream)
|
6009 |
|
|
fprintf (loop_dump_stream,
|
6010 |
|
|
"Loop iterations: Giv iterator, initial value bias %ld.\n",
|
6011 |
|
|
(long) offset);
|
6012 |
|
|
|
6013 |
|
|
/* Initial value is mult_val times the biv's initial value plus
|
6014 |
|
|
add_val. Only useful if it is a constant. */
|
6015 |
|
|
biv_initial_value = extend_value_for_giv (v, bl->initial_value);
|
6016 |
|
|
initial_value
|
6017 |
|
|
= fold_rtx_mult_add (v->mult_val,
|
6018 |
|
|
plus_constant (biv_initial_value, offset),
|
6019 |
|
|
v->add_val, v->mode);
|
6020 |
|
|
}
|
6021 |
|
|
else
|
6022 |
|
|
{
|
6023 |
|
|
if (loop_dump_stream)
|
6024 |
|
|
fprintf (loop_dump_stream,
|
6025 |
|
|
"Loop iterations: Not basic or general induction var.\n");
|
6026 |
|
|
return 0;
|
6027 |
|
|
}
|
6028 |
|
|
|
6029 |
|
|
if (initial_value == 0)
|
6030 |
|
|
return 0;
|
6031 |
|
|
|
6032 |
|
|
unsigned_p = 0;
|
6033 |
|
|
off_by_one = 0;
|
6034 |
|
|
switch (comparison_code)
|
6035 |
|
|
{
|
6036 |
|
|
case LEU:
|
6037 |
|
|
unsigned_p = 1;
|
6038 |
|
|
case LE:
|
6039 |
|
|
compare_dir = 1;
|
6040 |
|
|
off_by_one = 1;
|
6041 |
|
|
break;
|
6042 |
|
|
case GEU:
|
6043 |
|
|
unsigned_p = 1;
|
6044 |
|
|
case GE:
|
6045 |
|
|
compare_dir = -1;
|
6046 |
|
|
off_by_one = -1;
|
6047 |
|
|
break;
|
6048 |
|
|
case EQ:
|
6049 |
|
|
/* Cannot determine loop iterations with this case. */
|
6050 |
|
|
compare_dir = 0;
|
6051 |
|
|
break;
|
6052 |
|
|
case LTU:
|
6053 |
|
|
unsigned_p = 1;
|
6054 |
|
|
case LT:
|
6055 |
|
|
compare_dir = 1;
|
6056 |
|
|
break;
|
6057 |
|
|
case GTU:
|
6058 |
|
|
unsigned_p = 1;
|
6059 |
|
|
case GT:
|
6060 |
|
|
compare_dir = -1;
|
6061 |
|
|
break;
|
6062 |
|
|
case NE:
|
6063 |
|
|
compare_dir = 0;
|
6064 |
|
|
break;
|
6065 |
|
|
default:
|
6066 |
|
|
gcc_unreachable ();
|
6067 |
|
|
}
|
6068 |
|
|
|
6069 |
|
|
/* If the comparison value is an invariant register, then try to find
|
6070 |
|
|
its value from the insns before the start of the loop. */
|
6071 |
|
|
|
6072 |
|
|
final_value = comparison_value;
|
6073 |
|
|
if (REG_P (comparison_value)
|
6074 |
|
|
&& loop_invariant_p (loop, comparison_value))
|
6075 |
|
|
{
|
6076 |
|
|
final_value = loop_find_equiv_value (loop, comparison_value);
|
6077 |
|
|
|
6078 |
|
|
/* If we don't get an invariant final value, we are better
|
6079 |
|
|
off with the original register. */
|
6080 |
|
|
if (! loop_invariant_p (loop, final_value))
|
6081 |
|
|
final_value = comparison_value;
|
6082 |
|
|
}
|
6083 |
|
|
|
6084 |
|
|
/* Calculate the approximate final value of the induction variable
|
6085 |
|
|
(on the last successful iteration). The exact final value
|
6086 |
|
|
depends on the branch operator, and increment sign. It will be
|
6087 |
|
|
wrong if the iteration variable is not incremented by one each
|
6088 |
|
|
time through the loop and (comparison_value + off_by_one -
|
6089 |
|
|
initial_value) % increment != 0.
|
6090 |
|
|
??? Note that the final_value may overflow and thus final_larger
|
6091 |
|
|
will be bogus. A potentially infinite loop will be classified
|
6092 |
|
|
as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
|
6093 |
|
|
if (off_by_one)
|
6094 |
|
|
final_value = plus_constant (final_value, off_by_one);
|
6095 |
|
|
|
6096 |
|
|
/* Save the calculated values describing this loop's bounds, in case
|
6097 |
|
|
precondition_loop_p will need them later. These values can not be
|
6098 |
|
|
recalculated inside precondition_loop_p because strength reduction
|
6099 |
|
|
optimizations may obscure the loop's structure.
|
6100 |
|
|
|
6101 |
|
|
These values are only required by precondition_loop_p and insert_bct
|
6102 |
|
|
whenever the number of iterations cannot be computed at compile time.
|
6103 |
|
|
Only the difference between final_value and initial_value is
|
6104 |
|
|
important. Note that final_value is only approximate. */
|
6105 |
|
|
loop_info->initial_value = initial_value;
|
6106 |
|
|
loop_info->comparison_value = comparison_value;
|
6107 |
|
|
loop_info->final_value = plus_constant (comparison_value, off_by_one);
|
6108 |
|
|
loop_info->increment = increment;
|
6109 |
|
|
loop_info->iteration_var = iteration_var;
|
6110 |
|
|
loop_info->comparison_code = comparison_code;
|
6111 |
|
|
loop_info->iv = bl;
|
6112 |
|
|
|
6113 |
|
|
/* Try to determine the iteration count for loops such
|
6114 |
|
|
as (for i = init; i < init + const; i++). When running the
|
6115 |
|
|
loop optimization twice, the first pass often converts simple
|
6116 |
|
|
loops into this form. */
|
6117 |
|
|
|
6118 |
|
|
if (REG_P (initial_value))
|
6119 |
|
|
{
|
6120 |
|
|
rtx reg1;
|
6121 |
|
|
rtx reg2;
|
6122 |
|
|
rtx const2;
|
6123 |
|
|
|
6124 |
|
|
reg1 = initial_value;
|
6125 |
|
|
if (GET_CODE (final_value) == PLUS)
|
6126 |
|
|
reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
|
6127 |
|
|
else
|
6128 |
|
|
reg2 = final_value, const2 = const0_rtx;
|
6129 |
|
|
|
6130 |
|
|
/* Check for initial_value = reg1, final_value = reg2 + const2,
|
6131 |
|
|
where reg1 != reg2. */
|
6132 |
|
|
if (REG_P (reg2) && reg2 != reg1)
|
6133 |
|
|
{
|
6134 |
|
|
rtx temp;
|
6135 |
|
|
|
6136 |
|
|
/* Find what reg1 is equivalent to. Hopefully it will
|
6137 |
|
|
either be reg2 or reg2 plus a constant. */
|
6138 |
|
|
temp = loop_find_equiv_value (loop, reg1);
|
6139 |
|
|
|
6140 |
|
|
if (find_common_reg_term (temp, reg2))
|
6141 |
|
|
initial_value = temp;
|
6142 |
|
|
else if (loop_invariant_p (loop, reg2))
|
6143 |
|
|
{
|
6144 |
|
|
/* Find what reg2 is equivalent to. Hopefully it will
|
6145 |
|
|
either be reg1 or reg1 plus a constant. Let's ignore
|
6146 |
|
|
the latter case for now since it is not so common. */
|
6147 |
|
|
temp = loop_find_equiv_value (loop, reg2);
|
6148 |
|
|
|
6149 |
|
|
if (temp == loop_info->iteration_var)
|
6150 |
|
|
temp = initial_value;
|
6151 |
|
|
if (temp == reg1)
|
6152 |
|
|
final_value = (const2 == const0_rtx)
|
6153 |
|
|
? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
|
6154 |
|
|
}
|
6155 |
|
|
}
|
6156 |
|
|
}
|
6157 |
|
|
|
6158 |
|
|
loop_info->initial_equiv_value = initial_value;
|
6159 |
|
|
loop_info->final_equiv_value = final_value;
|
6160 |
|
|
|
6161 |
|
|
/* For EQ comparison loops, we don't have a valid final value.
|
6162 |
|
|
Check this now so that we won't leave an invalid value if we
|
6163 |
|
|
return early for any other reason. */
|
6164 |
|
|
if (comparison_code == EQ)
|
6165 |
|
|
loop_info->final_equiv_value = loop_info->final_value = 0;
|
6166 |
|
|
|
6167 |
|
|
if (increment == 0)
|
6168 |
|
|
{
|
6169 |
|
|
if (loop_dump_stream)
|
6170 |
|
|
fprintf (loop_dump_stream,
|
6171 |
|
|
"Loop iterations: Increment value can't be calculated.\n");
|
6172 |
|
|
return 0;
|
6173 |
|
|
}
|
6174 |
|
|
|
6175 |
|
|
if (GET_CODE (increment) != CONST_INT)
|
6176 |
|
|
{
|
6177 |
|
|
/* If we have a REG, check to see if REG holds a constant value. */
|
6178 |
|
|
/* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
|
6179 |
|
|
clear if it is worthwhile to try to handle such RTL. */
|
6180 |
|
|
if (REG_P (increment) || GET_CODE (increment) == SUBREG)
|
6181 |
|
|
increment = loop_find_equiv_value (loop, increment);
|
6182 |
|
|
|
6183 |
|
|
if (GET_CODE (increment) != CONST_INT)
|
6184 |
|
|
{
|
6185 |
|
|
if (loop_dump_stream)
|
6186 |
|
|
{
|
6187 |
|
|
fprintf (loop_dump_stream,
|
6188 |
|
|
"Loop iterations: Increment value not constant ");
|
6189 |
|
|
print_simple_rtl (loop_dump_stream, increment);
|
6190 |
|
|
fprintf (loop_dump_stream, ".\n");
|
6191 |
|
|
}
|
6192 |
|
|
return 0;
|
6193 |
|
|
}
|
6194 |
|
|
loop_info->increment = increment;
|
6195 |
|
|
}
|
6196 |
|
|
|
6197 |
|
|
if (GET_CODE (initial_value) != CONST_INT)
|
6198 |
|
|
{
|
6199 |
|
|
if (loop_dump_stream)
|
6200 |
|
|
{
|
6201 |
|
|
fprintf (loop_dump_stream,
|
6202 |
|
|
"Loop iterations: Initial value not constant ");
|
6203 |
|
|
print_simple_rtl (loop_dump_stream, initial_value);
|
6204 |
|
|
fprintf (loop_dump_stream, ".\n");
|
6205 |
|
|
}
|
6206 |
|
|
return 0;
|
6207 |
|
|
}
|
6208 |
|
|
else if (GET_CODE (final_value) != CONST_INT)
|
6209 |
|
|
{
|
6210 |
|
|
if (loop_dump_stream)
|
6211 |
|
|
{
|
6212 |
|
|
fprintf (loop_dump_stream,
|
6213 |
|
|
"Loop iterations: Final value not constant ");
|
6214 |
|
|
print_simple_rtl (loop_dump_stream, final_value);
|
6215 |
|
|
fprintf (loop_dump_stream, ".\n");
|
6216 |
|
|
}
|
6217 |
|
|
return 0;
|
6218 |
|
|
}
|
6219 |
|
|
else if (comparison_code == EQ)
|
6220 |
|
|
{
|
6221 |
|
|
rtx inc_once;
|
6222 |
|
|
|
6223 |
|
|
if (loop_dump_stream)
|
6224 |
|
|
fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
|
6225 |
|
|
|
6226 |
|
|
inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment),
|
6227 |
|
|
GET_MODE (iteration_var));
|
6228 |
|
|
|
6229 |
|
|
if (inc_once == final_value)
|
6230 |
|
|
{
|
6231 |
|
|
/* The iterator value once through the loop is equal to the
|
6232 |
|
|
comparison value. Either we have an infinite loop, or
|
6233 |
|
|
we'll loop twice. */
|
6234 |
|
|
if (increment == const0_rtx)
|
6235 |
|
|
return 0;
|
6236 |
|
|
loop_info->n_iterations = 2;
|
6237 |
|
|
}
|
6238 |
|
|
else
|
6239 |
|
|
loop_info->n_iterations = 1;
|
6240 |
|
|
|
6241 |
|
|
if (GET_CODE (loop_info->initial_value) == CONST_INT)
|
6242 |
|
|
loop_info->final_value
|
6243 |
|
|
= gen_int_mode ((INTVAL (loop_info->initial_value)
|
6244 |
|
|
+ loop_info->n_iterations * INTVAL (increment)),
|
6245 |
|
|
GET_MODE (iteration_var));
|
6246 |
|
|
else
|
6247 |
|
|
loop_info->final_value
|
6248 |
|
|
= plus_constant (loop_info->initial_value,
|
6249 |
|
|
loop_info->n_iterations * INTVAL (increment));
|
6250 |
|
|
loop_info->final_equiv_value
|
6251 |
|
|
= gen_int_mode ((INTVAL (initial_value)
|
6252 |
|
|
+ loop_info->n_iterations * INTVAL (increment)),
|
6253 |
|
|
GET_MODE (iteration_var));
|
6254 |
|
|
return loop_info->n_iterations;
|
6255 |
|
|
}
|
6256 |
|
|
|
6257 |
|
|
/* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
|
6258 |
|
|
if (unsigned_p)
|
6259 |
|
|
final_larger
|
6260 |
|
|
= ((unsigned HOST_WIDE_INT) INTVAL (final_value)
|
6261 |
|
|
> (unsigned HOST_WIDE_INT) INTVAL (initial_value))
|
6262 |
|
|
- ((unsigned HOST_WIDE_INT) INTVAL (final_value)
|
6263 |
|
|
< (unsigned HOST_WIDE_INT) INTVAL (initial_value));
|
6264 |
|
|
else
|
6265 |
|
|
final_larger = (INTVAL (final_value) > INTVAL (initial_value))
|
6266 |
|
|
- (INTVAL (final_value) < INTVAL (initial_value));
|
6267 |
|
|
|
6268 |
|
|
if (INTVAL (increment) > 0)
|
6269 |
|
|
increment_dir = 1;
|
6270 |
|
|
else if (INTVAL (increment) == 0)
|
6271 |
|
|
increment_dir = 0;
|
6272 |
|
|
else
|
6273 |
|
|
increment_dir = -1;
|
6274 |
|
|
|
6275 |
|
|
/* There are 27 different cases: compare_dir = -1, 0, 1;
|
6276 |
|
|
final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
|
6277 |
|
|
There are 4 normal cases, 4 reverse cases (where the iteration variable
|
6278 |
|
|
will overflow before the loop exits), 4 infinite loop cases, and 15
|
6279 |
|
|
immediate exit (0 or 1 iteration depending on loop type) cases.
|
6280 |
|
|
Only try to optimize the normal cases. */
|
6281 |
|
|
|
6282 |
|
|
/* (compare_dir/final_larger/increment_dir)
|
6283 |
|
|
Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
|
6284 |
|
|
Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
|
6285 |
|
|
Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
|
6286 |
|
|
Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
|
6287 |
|
|
|
6288 |
|
|
/* ?? If the meaning of reverse loops (where the iteration variable
|
6289 |
|
|
will overflow before the loop exits) is undefined, then could
|
6290 |
|
|
eliminate all of these special checks, and just always assume
|
6291 |
|
|
the loops are normal/immediate/infinite. Note that this means
|
6292 |
|
|
the sign of increment_dir does not have to be known. Also,
|
6293 |
|
|
since it does not really hurt if immediate exit loops or infinite loops
|
6294 |
|
|
are optimized, then that case could be ignored also, and hence all
|
6295 |
|
|
loops can be optimized.
|
6296 |
|
|
|
6297 |
|
|
According to ANSI Spec, the reverse loop case result is undefined,
|
6298 |
|
|
because the action on overflow is undefined.
|
6299 |
|
|
|
6300 |
|
|
See also the special test for NE loops below. */
|
6301 |
|
|
|
6302 |
|
|
if (final_larger == increment_dir && final_larger != 0
|
6303 |
|
|
&& (final_larger == compare_dir || compare_dir == 0))
|
6304 |
|
|
/* Normal case. */
|
6305 |
|
|
;
|
6306 |
|
|
else
|
6307 |
|
|
{
|
6308 |
|
|
if (loop_dump_stream)
|
6309 |
|
|
fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
|
6310 |
|
|
return 0;
|
6311 |
|
|
}
|
6312 |
|
|
|
6313 |
|
|
/* Calculate the number of iterations, final_value is only an approximation,
|
6314 |
|
|
so correct for that. Note that abs_diff and n_iterations are
|
6315 |
|
|
unsigned, because they can be as large as 2^n - 1. */
|
6316 |
|
|
|
6317 |
|
|
inc = INTVAL (increment);
|
6318 |
|
|
gcc_assert (inc);
|
6319 |
|
|
if (inc > 0)
|
6320 |
|
|
{
|
6321 |
|
|
abs_diff = INTVAL (final_value) - INTVAL (initial_value);
|
6322 |
|
|
abs_inc = inc;
|
6323 |
|
|
}
|
6324 |
|
|
else
|
6325 |
|
|
{
|
6326 |
|
|
abs_diff = INTVAL (initial_value) - INTVAL (final_value);
|
6327 |
|
|
abs_inc = -inc;
|
6328 |
|
|
}
|
6329 |
|
|
|
6330 |
|
|
/* Given that iteration_var is going to iterate over its own mode,
|
6331 |
|
|
not HOST_WIDE_INT, disregard higher bits that might have come
|
6332 |
|
|
into the picture due to sign extension of initial and final
|
6333 |
|
|
values. */
|
6334 |
|
|
abs_diff &= ((unsigned HOST_WIDE_INT) 1
|
6335 |
|
|
<< (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
|
6336 |
|
|
<< 1) - 1;
|
6337 |
|
|
|
6338 |
|
|
/* For NE tests, make sure that the iteration variable won't miss
|
6339 |
|
|
the final value. If abs_diff mod abs_incr is not zero, then the
|
6340 |
|
|
iteration variable will overflow before the loop exits, and we
|
6341 |
|
|
can not calculate the number of iterations. */
|
6342 |
|
|
if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
|
6343 |
|
|
return 0;
|
6344 |
|
|
|
6345 |
|
|
/* Note that the number of iterations could be calculated using
|
6346 |
|
|
(abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
|
6347 |
|
|
handle potential overflow of the summation. */
|
6348 |
|
|
loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
|
6349 |
|
|
return loop_info->n_iterations;
|
6350 |
|
|
}
|
6351 |
|
|
|
6352 |
|
|
/* Perform strength reduction and induction variable elimination.
|
6353 |
|
|
|
6354 |
|
|
Pseudo registers created during this function will be beyond the
|
6355 |
|
|
last valid index in several tables including
|
6356 |
|
|
REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
|
6357 |
|
|
problem here, because the added registers cannot be givs outside of
|
6358 |
|
|
their loop, and hence will never be reconsidered. But scan_loop
|
6359 |
|
|
must check regnos to make sure they are in bounds. */
|
6360 |
|
|
|
6361 |
|
|
static void
|
6362 |
|
|
strength_reduce (struct loop *loop, int flags)
|
6363 |
|
|
{
|
6364 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
6365 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
6366 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
6367 |
|
|
rtx p;
|
6368 |
|
|
/* Temporary list pointer for traversing ivs->list. */
|
6369 |
|
|
struct iv_class *bl;
|
6370 |
|
|
/* Ratio of extra register life span we can justify
|
6371 |
|
|
for saving an instruction. More if loop doesn't call subroutines
|
6372 |
|
|
since in that case saving an insn makes more difference
|
6373 |
|
|
and more registers are available. */
|
6374 |
|
|
/* ??? could set this to last value of threshold in move_movables */
|
6375 |
|
|
int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
|
6376 |
|
|
/* Map of pseudo-register replacements. */
|
6377 |
|
|
rtx *reg_map = NULL;
|
6378 |
|
|
int reg_map_size;
|
6379 |
|
|
rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
|
6380 |
|
|
int insn_count = count_insns_in_loop (loop);
|
6381 |
|
|
|
6382 |
|
|
addr_placeholder = gen_reg_rtx (Pmode);
|
6383 |
|
|
|
6384 |
|
|
ivs->n_regs = max_reg_before_loop;
|
6385 |
|
|
ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
|
6386 |
|
|
|
6387 |
|
|
/* Find all BIVs in loop. */
|
6388 |
|
|
loop_bivs_find (loop);
|
6389 |
|
|
|
6390 |
|
|
/* Exit if there are no bivs. */
|
6391 |
|
|
if (! ivs->list)
|
6392 |
|
|
{
|
6393 |
|
|
loop_ivs_free (loop);
|
6394 |
|
|
return;
|
6395 |
|
|
}
|
6396 |
|
|
|
6397 |
|
|
/* Determine how BIVS are initialized by looking through pre-header
|
6398 |
|
|
extended basic block. */
|
6399 |
|
|
loop_bivs_init_find (loop);
|
6400 |
|
|
|
6401 |
|
|
/* Look at the each biv and see if we can say anything better about its
|
6402 |
|
|
initial value from any initializing insns set up above. */
|
6403 |
|
|
loop_bivs_check (loop);
|
6404 |
|
|
|
6405 |
|
|
/* Search the loop for general induction variables. */
|
6406 |
|
|
loop_givs_find (loop);
|
6407 |
|
|
|
6408 |
|
|
/* Try to calculate and save the number of loop iterations. This is
|
6409 |
|
|
set to zero if the actual number can not be calculated. This must
|
6410 |
|
|
be called after all giv's have been identified, since otherwise it may
|
6411 |
|
|
fail if the iteration variable is a giv. */
|
6412 |
|
|
loop_iterations (loop);
|
6413 |
|
|
|
6414 |
|
|
#ifdef HAVE_prefetch
|
6415 |
|
|
if (flags & LOOP_PREFETCH)
|
6416 |
|
|
emit_prefetch_instructions (loop);
|
6417 |
|
|
#endif
|
6418 |
|
|
|
6419 |
|
|
/* Now for each giv for which we still don't know whether or not it is
|
6420 |
|
|
replaceable, check to see if it is replaceable because its final value
|
6421 |
|
|
can be calculated. This must be done after loop_iterations is called,
|
6422 |
|
|
so that final_giv_value will work correctly. */
|
6423 |
|
|
loop_givs_check (loop);
|
6424 |
|
|
|
6425 |
|
|
/* Try to prove that the loop counter variable (if any) is always
|
6426 |
|
|
nonnegative; if so, record that fact with a REG_NONNEG note
|
6427 |
|
|
so that "decrement and branch until zero" insn can be used. */
|
6428 |
|
|
check_dbra_loop (loop, insn_count);
|
6429 |
|
|
|
6430 |
|
|
/* Create reg_map to hold substitutions for replaceable giv regs.
|
6431 |
|
|
Some givs might have been made from biv increments, so look at
|
6432 |
|
|
ivs->reg_iv_type for a suitable size. */
|
6433 |
|
|
reg_map_size = ivs->n_regs;
|
6434 |
|
|
reg_map = xcalloc (reg_map_size, sizeof (rtx));
|
6435 |
|
|
|
6436 |
|
|
/* Examine each iv class for feasibility of strength reduction/induction
|
6437 |
|
|
variable elimination. */
|
6438 |
|
|
|
6439 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
6440 |
|
|
{
|
6441 |
|
|
struct induction *v;
|
6442 |
|
|
int benefit;
|
6443 |
|
|
|
6444 |
|
|
/* Test whether it will be possible to eliminate this biv
|
6445 |
|
|
provided all givs are reduced. */
|
6446 |
|
|
bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
|
6447 |
|
|
|
6448 |
|
|
/* This will be true at the end, if all givs which depend on this
|
6449 |
|
|
biv have been strength reduced.
|
6450 |
|
|
We can't (currently) eliminate the biv unless this is so. */
|
6451 |
|
|
bl->all_reduced = 1;
|
6452 |
|
|
|
6453 |
|
|
/* Check each extension dependent giv in this class to see if its
|
6454 |
|
|
root biv is safe from wrapping in the interior mode. */
|
6455 |
|
|
check_ext_dependent_givs (loop, bl);
|
6456 |
|
|
|
6457 |
|
|
/* Combine all giv's for this iv_class. */
|
6458 |
|
|
combine_givs (regs, bl);
|
6459 |
|
|
|
6460 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
6461 |
|
|
{
|
6462 |
|
|
struct induction *tv;
|
6463 |
|
|
|
6464 |
|
|
if (v->ignore || v->same)
|
6465 |
|
|
continue;
|
6466 |
|
|
|
6467 |
|
|
benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
|
6468 |
|
|
|
6469 |
|
|
/* If an insn is not to be strength reduced, then set its ignore
|
6470 |
|
|
flag, and clear bl->all_reduced. */
|
6471 |
|
|
|
6472 |
|
|
/* A giv that depends on a reversed biv must be reduced if it is
|
6473 |
|
|
used after the loop exit, otherwise, it would have the wrong
|
6474 |
|
|
value after the loop exit. To make it simple, just reduce all
|
6475 |
|
|
of such giv's whether or not we know they are used after the loop
|
6476 |
|
|
exit. */
|
6477 |
|
|
|
6478 |
|
|
if (v->lifetime * threshold * benefit < insn_count
|
6479 |
|
|
&& ! bl->reversed)
|
6480 |
|
|
{
|
6481 |
|
|
if (loop_dump_stream)
|
6482 |
|
|
fprintf (loop_dump_stream,
|
6483 |
|
|
"giv of insn %d not worth while, %d vs %d.\n",
|
6484 |
|
|
INSN_UID (v->insn),
|
6485 |
|
|
v->lifetime * threshold * benefit, insn_count);
|
6486 |
|
|
v->ignore = 1;
|
6487 |
|
|
bl->all_reduced = 0;
|
6488 |
|
|
}
|
6489 |
|
|
else if (!v->always_computable
|
6490 |
|
|
&& (may_trap_or_fault_p (v->add_val)
|
6491 |
|
|
|| may_trap_or_fault_p (v->mult_val)))
|
6492 |
|
|
{
|
6493 |
|
|
if (loop_dump_stream)
|
6494 |
|
|
fprintf (loop_dump_stream,
|
6495 |
|
|
"giv of insn %d: not always computable.\n",
|
6496 |
|
|
INSN_UID (v->insn));
|
6497 |
|
|
v->ignore = 1;
|
6498 |
|
|
bl->all_reduced = 0;
|
6499 |
|
|
}
|
6500 |
|
|
else
|
6501 |
|
|
{
|
6502 |
|
|
/* Check that we can increment the reduced giv without a
|
6503 |
|
|
multiply insn. If not, reject it. */
|
6504 |
|
|
|
6505 |
|
|
for (tv = bl->biv; tv; tv = tv->next_iv)
|
6506 |
|
|
if (tv->mult_val == const1_rtx
|
6507 |
|
|
&& ! product_cheap_p (tv->add_val, v->mult_val))
|
6508 |
|
|
{
|
6509 |
|
|
if (loop_dump_stream)
|
6510 |
|
|
fprintf (loop_dump_stream,
|
6511 |
|
|
"giv of insn %d: would need a multiply.\n",
|
6512 |
|
|
INSN_UID (v->insn));
|
6513 |
|
|
v->ignore = 1;
|
6514 |
|
|
bl->all_reduced = 0;
|
6515 |
|
|
break;
|
6516 |
|
|
}
|
6517 |
|
|
}
|
6518 |
|
|
}
|
6519 |
|
|
|
6520 |
|
|
/* Check for givs whose first use is their definition and whose
|
6521 |
|
|
last use is the definition of another giv. If so, it is likely
|
6522 |
|
|
dead and should not be used to derive another giv nor to
|
6523 |
|
|
eliminate a biv. */
|
6524 |
|
|
loop_givs_dead_check (loop, bl);
|
6525 |
|
|
|
6526 |
|
|
/* Reduce each giv that we decided to reduce. */
|
6527 |
|
|
loop_givs_reduce (loop, bl);
|
6528 |
|
|
|
6529 |
|
|
/* Rescan all givs. If a giv is the same as a giv not reduced, mark it
|
6530 |
|
|
as not reduced.
|
6531 |
|
|
|
6532 |
|
|
For each giv register that can be reduced now: if replaceable,
|
6533 |
|
|
substitute reduced reg wherever the old giv occurs;
|
6534 |
|
|
else add new move insn "giv_reg = reduced_reg". */
|
6535 |
|
|
loop_givs_rescan (loop, bl, reg_map);
|
6536 |
|
|
|
6537 |
|
|
/* All the givs based on the biv bl have been reduced if they
|
6538 |
|
|
merit it. */
|
6539 |
|
|
|
6540 |
|
|
/* For each giv not marked as maybe dead that has been combined with a
|
6541 |
|
|
second giv, clear any "maybe dead" mark on that second giv.
|
6542 |
|
|
v->new_reg will either be or refer to the register of the giv it
|
6543 |
|
|
combined with.
|
6544 |
|
|
|
6545 |
|
|
Doing this clearing avoids problems in biv elimination where
|
6546 |
|
|
a giv's new_reg is a complex value that can't be put in the
|
6547 |
|
|
insn but the giv combined with (with a reg as new_reg) is
|
6548 |
|
|
marked maybe_dead. Since the register will be used in either
|
6549 |
|
|
case, we'd prefer it be used from the simpler giv. */
|
6550 |
|
|
|
6551 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
6552 |
|
|
if (! v->maybe_dead && v->same)
|
6553 |
|
|
v->same->maybe_dead = 0;
|
6554 |
|
|
|
6555 |
|
|
/* Try to eliminate the biv, if it is a candidate.
|
6556 |
|
|
This won't work if ! bl->all_reduced,
|
6557 |
|
|
since the givs we planned to use might not have been reduced.
|
6558 |
|
|
|
6559 |
|
|
We have to be careful that we didn't initially think we could
|
6560 |
|
|
eliminate this biv because of a giv that we now think may be
|
6561 |
|
|
dead and shouldn't be used as a biv replacement.
|
6562 |
|
|
|
6563 |
|
|
Also, there is the possibility that we may have a giv that looks
|
6564 |
|
|
like it can be used to eliminate a biv, but the resulting insn
|
6565 |
|
|
isn't valid. This can happen, for example, on the 88k, where a
|
6566 |
|
|
JUMP_INSN can compare a register only with zero. Attempts to
|
6567 |
|
|
replace it with a compare with a constant will fail.
|
6568 |
|
|
|
6569 |
|
|
Note that in cases where this call fails, we may have replaced some
|
6570 |
|
|
of the occurrences of the biv with a giv, but no harm was done in
|
6571 |
|
|
doing so in the rare cases where it can occur. */
|
6572 |
|
|
|
6573 |
|
|
if (bl->all_reduced == 1 && bl->eliminable
|
6574 |
|
|
&& maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
|
6575 |
|
|
{
|
6576 |
|
|
/* ?? If we created a new test to bypass the loop entirely,
|
6577 |
|
|
or otherwise drop straight in, based on this test, then
|
6578 |
|
|
we might want to rewrite it also. This way some later
|
6579 |
|
|
pass has more hope of removing the initialization of this
|
6580 |
|
|
biv entirely. */
|
6581 |
|
|
|
6582 |
|
|
/* If final_value != 0, then the biv may be used after loop end
|
6583 |
|
|
and we must emit an insn to set it just in case.
|
6584 |
|
|
|
6585 |
|
|
Reversed bivs already have an insn after the loop setting their
|
6586 |
|
|
value, so we don't need another one. We can't calculate the
|
6587 |
|
|
proper final value for such a biv here anyways. */
|
6588 |
|
|
if (bl->final_value && ! bl->reversed)
|
6589 |
|
|
loop_insn_sink_or_swim (loop,
|
6590 |
|
|
gen_load_of_final_value (bl->biv->dest_reg,
|
6591 |
|
|
bl->final_value));
|
6592 |
|
|
|
6593 |
|
|
if (loop_dump_stream)
|
6594 |
|
|
fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
|
6595 |
|
|
bl->regno);
|
6596 |
|
|
}
|
6597 |
|
|
/* See above note wrt final_value. But since we couldn't eliminate
|
6598 |
|
|
the biv, we must set the value after the loop instead of before. */
|
6599 |
|
|
else if (bl->final_value && ! bl->reversed)
|
6600 |
|
|
loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
|
6601 |
|
|
bl->final_value));
|
6602 |
|
|
}
|
6603 |
|
|
|
6604 |
|
|
/* Go through all the instructions in the loop, making all the
|
6605 |
|
|
register substitutions scheduled in REG_MAP. */
|
6606 |
|
|
|
6607 |
|
|
for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
|
6608 |
|
|
if (INSN_P (p))
|
6609 |
|
|
{
|
6610 |
|
|
replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
|
6611 |
|
|
replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
|
6612 |
|
|
INSN_CODE (p) = -1;
|
6613 |
|
|
}
|
6614 |
|
|
|
6615 |
|
|
if (loop_dump_stream)
|
6616 |
|
|
fprintf (loop_dump_stream, "\n");
|
6617 |
|
|
|
6618 |
|
|
loop_ivs_free (loop);
|
6619 |
|
|
if (reg_map)
|
6620 |
|
|
free (reg_map);
|
6621 |
|
|
}
|
6622 |
|
|
|
6623 |
|
|
/*Record all basic induction variables calculated in the insn. */
|
6624 |
|
|
static rtx
|
6625 |
|
|
check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
|
6626 |
|
|
int maybe_multiple)
|
6627 |
|
|
{
|
6628 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
6629 |
|
|
rtx set;
|
6630 |
|
|
rtx dest_reg;
|
6631 |
|
|
rtx inc_val;
|
6632 |
|
|
rtx mult_val;
|
6633 |
|
|
rtx *location;
|
6634 |
|
|
|
6635 |
|
|
if (NONJUMP_INSN_P (p)
|
6636 |
|
|
&& (set = single_set (p))
|
6637 |
|
|
&& REG_P (SET_DEST (set)))
|
6638 |
|
|
{
|
6639 |
|
|
dest_reg = SET_DEST (set);
|
6640 |
|
|
if (REGNO (dest_reg) < max_reg_before_loop
|
6641 |
|
|
&& REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
|
6642 |
|
|
&& REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
|
6643 |
|
|
{
|
6644 |
|
|
if (basic_induction_var (loop, SET_SRC (set),
|
6645 |
|
|
GET_MODE (SET_SRC (set)),
|
6646 |
|
|
dest_reg, p, &inc_val, &mult_val,
|
6647 |
|
|
&location))
|
6648 |
|
|
{
|
6649 |
|
|
/* It is a possible basic induction variable.
|
6650 |
|
|
Create and initialize an induction structure for it. */
|
6651 |
|
|
|
6652 |
|
|
struct induction *v = xmalloc (sizeof (struct induction));
|
6653 |
|
|
|
6654 |
|
|
record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
|
6655 |
|
|
not_every_iteration, maybe_multiple);
|
6656 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
|
6657 |
|
|
}
|
6658 |
|
|
else if (REGNO (dest_reg) < ivs->n_regs)
|
6659 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
|
6660 |
|
|
}
|
6661 |
|
|
}
|
6662 |
|
|
return p;
|
6663 |
|
|
}
|
6664 |
|
|
|
6665 |
|
|
/* Record all givs calculated in the insn.
|
6666 |
|
|
A register is a giv if: it is only set once, it is a function of a
|
6667 |
|
|
biv and a constant (or invariant), and it is not a biv. */
|
6668 |
|
|
static rtx
|
6669 |
|
|
check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
|
6670 |
|
|
int maybe_multiple)
|
6671 |
|
|
{
|
6672 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
6673 |
|
|
|
6674 |
|
|
rtx set;
|
6675 |
|
|
/* Look for a general induction variable in a register. */
|
6676 |
|
|
if (NONJUMP_INSN_P (p)
|
6677 |
|
|
&& (set = single_set (p))
|
6678 |
|
|
&& REG_P (SET_DEST (set))
|
6679 |
|
|
&& ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
|
6680 |
|
|
{
|
6681 |
|
|
rtx src_reg;
|
6682 |
|
|
rtx dest_reg;
|
6683 |
|
|
rtx add_val;
|
6684 |
|
|
rtx mult_val;
|
6685 |
|
|
rtx ext_val;
|
6686 |
|
|
int benefit;
|
6687 |
|
|
rtx regnote = 0;
|
6688 |
|
|
rtx last_consec_insn;
|
6689 |
|
|
|
6690 |
|
|
dest_reg = SET_DEST (set);
|
6691 |
|
|
if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
|
6692 |
|
|
return p;
|
6693 |
|
|
|
6694 |
|
|
if (/* SET_SRC is a giv. */
|
6695 |
|
|
(general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
|
6696 |
|
|
&mult_val, &ext_val, 0, &benefit, VOIDmode)
|
6697 |
|
|
/* Equivalent expression is a giv. */
|
6698 |
|
|
|| ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
|
6699 |
|
|
&& general_induction_var (loop, XEXP (regnote, 0), &src_reg,
|
6700 |
|
|
&add_val, &mult_val, &ext_val, 0,
|
6701 |
|
|
&benefit, VOIDmode)))
|
6702 |
|
|
/* Don't try to handle any regs made by loop optimization.
|
6703 |
|
|
We have nothing on them in regno_first_uid, etc. */
|
6704 |
|
|
&& REGNO (dest_reg) < max_reg_before_loop
|
6705 |
|
|
/* Don't recognize a BASIC_INDUCT_VAR here. */
|
6706 |
|
|
&& dest_reg != src_reg
|
6707 |
|
|
/* This must be the only place where the register is set. */
|
6708 |
|
|
&& (regs->array[REGNO (dest_reg)].n_times_set == 1
|
6709 |
|
|
/* or all sets must be consecutive and make a giv. */
|
6710 |
|
|
|| (benefit = consec_sets_giv (loop, benefit, p,
|
6711 |
|
|
src_reg, dest_reg,
|
6712 |
|
|
&add_val, &mult_val, &ext_val,
|
6713 |
|
|
&last_consec_insn))))
|
6714 |
|
|
{
|
6715 |
|
|
struct induction *v = xmalloc (sizeof (struct induction));
|
6716 |
|
|
|
6717 |
|
|
/* If this is a library call, increase benefit. */
|
6718 |
|
|
if (find_reg_note (p, REG_RETVAL, NULL_RTX))
|
6719 |
|
|
benefit += libcall_benefit (p);
|
6720 |
|
|
|
6721 |
|
|
/* Skip the consecutive insns, if there are any. */
|
6722 |
|
|
if (regs->array[REGNO (dest_reg)].n_times_set != 1)
|
6723 |
|
|
p = last_consec_insn;
|
6724 |
|
|
|
6725 |
|
|
record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
|
6726 |
|
|
ext_val, benefit, DEST_REG, not_every_iteration,
|
6727 |
|
|
maybe_multiple, (rtx*) 0);
|
6728 |
|
|
|
6729 |
|
|
}
|
6730 |
|
|
}
|
6731 |
|
|
|
6732 |
|
|
/* Look for givs which are memory addresses. */
|
6733 |
|
|
if (NONJUMP_INSN_P (p))
|
6734 |
|
|
find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
|
6735 |
|
|
maybe_multiple);
|
6736 |
|
|
|
6737 |
|
|
/* Update the status of whether giv can derive other givs. This can
|
6738 |
|
|
change when we pass a label or an insn that updates a biv. */
|
6739 |
|
|
if (INSN_P (p) || LABEL_P (p))
|
6740 |
|
|
update_giv_derive (loop, p);
|
6741 |
|
|
return p;
|
6742 |
|
|
}
|
6743 |
|
|
|
6744 |
|
|
/* Return 1 if X is a valid source for an initial value (or as value being
|
6745 |
|
|
compared against in an initial test).
|
6746 |
|
|
|
6747 |
|
|
X must be either a register or constant and must not be clobbered between
|
6748 |
|
|
the current insn and the start of the loop.
|
6749 |
|
|
|
6750 |
|
|
INSN is the insn containing X. */
|
6751 |
|
|
|
6752 |
|
|
static int
|
6753 |
|
|
valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
|
6754 |
|
|
{
|
6755 |
|
|
if (CONSTANT_P (x))
|
6756 |
|
|
return 1;
|
6757 |
|
|
|
6758 |
|
|
/* Only consider pseudos we know about initialized in insns whose luids
|
6759 |
|
|
we know. */
|
6760 |
|
|
if (!REG_P (x)
|
6761 |
|
|
|| REGNO (x) >= max_reg_before_loop)
|
6762 |
|
|
return 0;
|
6763 |
|
|
|
6764 |
|
|
/* Don't use call-clobbered registers across a call which clobbers it. On
|
6765 |
|
|
some machines, don't use any hard registers at all. */
|
6766 |
|
|
if (REGNO (x) < FIRST_PSEUDO_REGISTER
|
6767 |
|
|
&& (SMALL_REGISTER_CLASSES
|
6768 |
|
|
|| (call_seen && call_used_regs[REGNO (x)])))
|
6769 |
|
|
return 0;
|
6770 |
|
|
|
6771 |
|
|
/* Don't use registers that have been clobbered before the start of the
|
6772 |
|
|
loop. */
|
6773 |
|
|
if (reg_set_between_p (x, insn, loop_start))
|
6774 |
|
|
return 0;
|
6775 |
|
|
|
6776 |
|
|
return 1;
|
6777 |
|
|
}
|
6778 |
|
|
|
6779 |
|
|
/* Scan X for memory refs and check each memory address
|
6780 |
|
|
as a possible giv. INSN is the insn whose pattern X comes from.
|
6781 |
|
|
NOT_EVERY_ITERATION is 1 if the insn might not be executed during
|
6782 |
|
|
every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
|
6783 |
|
|
more than once in each loop iteration. */
|
6784 |
|
|
|
6785 |
|
|
static void
|
6786 |
|
|
find_mem_givs (const struct loop *loop, rtx x, rtx insn,
|
6787 |
|
|
int not_every_iteration, int maybe_multiple)
|
6788 |
|
|
{
|
6789 |
|
|
int i, j;
|
6790 |
|
|
enum rtx_code code;
|
6791 |
|
|
const char *fmt;
|
6792 |
|
|
|
6793 |
|
|
if (x == 0)
|
6794 |
|
|
return;
|
6795 |
|
|
|
6796 |
|
|
code = GET_CODE (x);
|
6797 |
|
|
switch (code)
|
6798 |
|
|
{
|
6799 |
|
|
case REG:
|
6800 |
|
|
case CONST_INT:
|
6801 |
|
|
case CONST:
|
6802 |
|
|
case CONST_DOUBLE:
|
6803 |
|
|
case SYMBOL_REF:
|
6804 |
|
|
case LABEL_REF:
|
6805 |
|
|
case PC:
|
6806 |
|
|
case CC0:
|
6807 |
|
|
case ADDR_VEC:
|
6808 |
|
|
case ADDR_DIFF_VEC:
|
6809 |
|
|
case USE:
|
6810 |
|
|
case CLOBBER:
|
6811 |
|
|
return;
|
6812 |
|
|
|
6813 |
|
|
case MEM:
|
6814 |
|
|
{
|
6815 |
|
|
rtx src_reg;
|
6816 |
|
|
rtx add_val;
|
6817 |
|
|
rtx mult_val;
|
6818 |
|
|
rtx ext_val;
|
6819 |
|
|
int benefit;
|
6820 |
|
|
|
6821 |
|
|
/* This code used to disable creating GIVs with mult_val == 1 and
|
6822 |
|
|
add_val == 0. However, this leads to lost optimizations when
|
6823 |
|
|
it comes time to combine a set of related DEST_ADDR GIVs, since
|
6824 |
|
|
this one would not be seen. */
|
6825 |
|
|
|
6826 |
|
|
if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
|
6827 |
|
|
&mult_val, &ext_val, 1, &benefit,
|
6828 |
|
|
GET_MODE (x)))
|
6829 |
|
|
{
|
6830 |
|
|
/* Found one; record it. */
|
6831 |
|
|
struct induction *v = xmalloc (sizeof (struct induction));
|
6832 |
|
|
|
6833 |
|
|
record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
|
6834 |
|
|
add_val, ext_val, benefit, DEST_ADDR,
|
6835 |
|
|
not_every_iteration, maybe_multiple, &XEXP (x, 0));
|
6836 |
|
|
|
6837 |
|
|
v->mem = x;
|
6838 |
|
|
}
|
6839 |
|
|
}
|
6840 |
|
|
return;
|
6841 |
|
|
|
6842 |
|
|
default:
|
6843 |
|
|
break;
|
6844 |
|
|
}
|
6845 |
|
|
|
6846 |
|
|
/* Recursively scan the subexpressions for other mem refs. */
|
6847 |
|
|
|
6848 |
|
|
fmt = GET_RTX_FORMAT (code);
|
6849 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
6850 |
|
|
if (fmt[i] == 'e')
|
6851 |
|
|
find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
|
6852 |
|
|
maybe_multiple);
|
6853 |
|
|
else if (fmt[i] == 'E')
|
6854 |
|
|
for (j = 0; j < XVECLEN (x, i); j++)
|
6855 |
|
|
find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
|
6856 |
|
|
maybe_multiple);
|
6857 |
|
|
}
|
6858 |
|
|
|
6859 |
|
|
/* Fill in the data about one biv update.
|
6860 |
|
|
V is the `struct induction' in which we record the biv. (It is
|
6861 |
|
|
allocated by the caller, with alloca.)
|
6862 |
|
|
INSN is the insn that sets it.
|
6863 |
|
|
DEST_REG is the biv's reg.
|
6864 |
|
|
|
6865 |
|
|
MULT_VAL is const1_rtx if the biv is being incremented here, in which case
|
6866 |
|
|
INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
|
6867 |
|
|
being set to INC_VAL.
|
6868 |
|
|
|
6869 |
|
|
NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
|
6870 |
|
|
executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
|
6871 |
|
|
can be executed more than once per iteration. If MAYBE_MULTIPLE
|
6872 |
|
|
and NOT_EVERY_ITERATION are both zero, we know that the biv update is
|
6873 |
|
|
executed exactly once per iteration. */
|
6874 |
|
|
|
6875 |
|
|
static void
|
6876 |
|
|
record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
|
6877 |
|
|
rtx inc_val, rtx mult_val, rtx *location,
|
6878 |
|
|
int not_every_iteration, int maybe_multiple)
|
6879 |
|
|
{
|
6880 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
6881 |
|
|
struct iv_class *bl;
|
6882 |
|
|
|
6883 |
|
|
v->insn = insn;
|
6884 |
|
|
v->src_reg = dest_reg;
|
6885 |
|
|
v->dest_reg = dest_reg;
|
6886 |
|
|
v->mult_val = mult_val;
|
6887 |
|
|
v->add_val = inc_val;
|
6888 |
|
|
v->ext_dependent = NULL_RTX;
|
6889 |
|
|
v->location = location;
|
6890 |
|
|
v->mode = GET_MODE (dest_reg);
|
6891 |
|
|
v->always_computable = ! not_every_iteration;
|
6892 |
|
|
v->always_executed = ! not_every_iteration;
|
6893 |
|
|
v->maybe_multiple = maybe_multiple;
|
6894 |
|
|
v->same = 0;
|
6895 |
|
|
|
6896 |
|
|
/* Add this to the reg's iv_class, creating a class
|
6897 |
|
|
if this is the first incrementation of the reg. */
|
6898 |
|
|
|
6899 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
|
6900 |
|
|
if (bl == 0)
|
6901 |
|
|
{
|
6902 |
|
|
/* Create and initialize new iv_class. */
|
6903 |
|
|
|
6904 |
|
|
bl = xmalloc (sizeof (struct iv_class));
|
6905 |
|
|
|
6906 |
|
|
bl->regno = REGNO (dest_reg);
|
6907 |
|
|
bl->biv = 0;
|
6908 |
|
|
bl->giv = 0;
|
6909 |
|
|
bl->biv_count = 0;
|
6910 |
|
|
bl->giv_count = 0;
|
6911 |
|
|
|
6912 |
|
|
/* Set initial value to the reg itself. */
|
6913 |
|
|
bl->initial_value = dest_reg;
|
6914 |
|
|
bl->final_value = 0;
|
6915 |
|
|
/* We haven't seen the initializing insn yet. */
|
6916 |
|
|
bl->init_insn = 0;
|
6917 |
|
|
bl->init_set = 0;
|
6918 |
|
|
bl->initial_test = 0;
|
6919 |
|
|
bl->incremented = 0;
|
6920 |
|
|
bl->eliminable = 0;
|
6921 |
|
|
bl->nonneg = 0;
|
6922 |
|
|
bl->reversed = 0;
|
6923 |
|
|
bl->total_benefit = 0;
|
6924 |
|
|
|
6925 |
|
|
/* Add this class to ivs->list. */
|
6926 |
|
|
bl->next = ivs->list;
|
6927 |
|
|
ivs->list = bl;
|
6928 |
|
|
|
6929 |
|
|
/* Put it in the array of biv register classes. */
|
6930 |
|
|
REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
|
6931 |
|
|
}
|
6932 |
|
|
else
|
6933 |
|
|
{
|
6934 |
|
|
/* Check if location is the same as a previous one. */
|
6935 |
|
|
struct induction *induction;
|
6936 |
|
|
for (induction = bl->biv; induction; induction = induction->next_iv)
|
6937 |
|
|
if (location == induction->location)
|
6938 |
|
|
{
|
6939 |
|
|
v->same = induction;
|
6940 |
|
|
break;
|
6941 |
|
|
}
|
6942 |
|
|
}
|
6943 |
|
|
|
6944 |
|
|
/* Update IV_CLASS entry for this biv. */
|
6945 |
|
|
v->next_iv = bl->biv;
|
6946 |
|
|
bl->biv = v;
|
6947 |
|
|
bl->biv_count++;
|
6948 |
|
|
if (mult_val == const1_rtx)
|
6949 |
|
|
bl->incremented = 1;
|
6950 |
|
|
|
6951 |
|
|
if (loop_dump_stream)
|
6952 |
|
|
loop_biv_dump (v, loop_dump_stream, 0);
|
6953 |
|
|
}
|
6954 |
|
|
|
6955 |
|
|
/* Fill in the data about one giv.
|
6956 |
|
|
V is the `struct induction' in which we record the giv. (It is
|
6957 |
|
|
allocated by the caller, with alloca.)
|
6958 |
|
|
INSN is the insn that sets it.
|
6959 |
|
|
BENEFIT estimates the savings from deleting this insn.
|
6960 |
|
|
TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
|
6961 |
|
|
into a register or is used as a memory address.
|
6962 |
|
|
|
6963 |
|
|
SRC_REG is the biv reg which the giv is computed from.
|
6964 |
|
|
DEST_REG is the giv's reg (if the giv is stored in a reg).
|
6965 |
|
|
MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
|
6966 |
|
|
LOCATION points to the place where this giv's value appears in INSN. */
|
6967 |
|
|
|
6968 |
|
|
static void
|
6969 |
|
|
record_giv (const struct loop *loop, struct induction *v, rtx insn,
|
6970 |
|
|
rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
|
6971 |
|
|
rtx ext_val, int benefit, enum g_types type,
|
6972 |
|
|
int not_every_iteration, int maybe_multiple, rtx *location)
|
6973 |
|
|
{
|
6974 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
6975 |
|
|
struct induction *b;
|
6976 |
|
|
struct iv_class *bl;
|
6977 |
|
|
rtx set = single_set (insn);
|
6978 |
|
|
rtx temp;
|
6979 |
|
|
|
6980 |
|
|
/* Attempt to prove constantness of the values. Don't let simplify_rtx
|
6981 |
|
|
undo the MULT canonicalization that we performed earlier. */
|
6982 |
|
|
temp = simplify_rtx (add_val);
|
6983 |
|
|
if (temp
|
6984 |
|
|
&& ! (GET_CODE (add_val) == MULT
|
6985 |
|
|
&& GET_CODE (temp) == ASHIFT))
|
6986 |
|
|
add_val = temp;
|
6987 |
|
|
|
6988 |
|
|
v->insn = insn;
|
6989 |
|
|
v->src_reg = src_reg;
|
6990 |
|
|
v->giv_type = type;
|
6991 |
|
|
v->dest_reg = dest_reg;
|
6992 |
|
|
v->mult_val = mult_val;
|
6993 |
|
|
v->add_val = add_val;
|
6994 |
|
|
v->ext_dependent = ext_val;
|
6995 |
|
|
v->benefit = benefit;
|
6996 |
|
|
v->location = location;
|
6997 |
|
|
v->cant_derive = 0;
|
6998 |
|
|
v->combined_with = 0;
|
6999 |
|
|
v->maybe_multiple = maybe_multiple;
|
7000 |
|
|
v->maybe_dead = 0;
|
7001 |
|
|
v->derive_adjustment = 0;
|
7002 |
|
|
v->same = 0;
|
7003 |
|
|
v->ignore = 0;
|
7004 |
|
|
v->new_reg = 0;
|
7005 |
|
|
v->final_value = 0;
|
7006 |
|
|
v->same_insn = 0;
|
7007 |
|
|
v->auto_inc_opt = 0;
|
7008 |
|
|
v->shared = 0;
|
7009 |
|
|
|
7010 |
|
|
/* The v->always_computable field is used in update_giv_derive, to
|
7011 |
|
|
determine whether a giv can be used to derive another giv. For a
|
7012 |
|
|
DEST_REG giv, INSN computes a new value for the giv, so its value
|
7013 |
|
|
isn't computable if INSN insn't executed every iteration.
|
7014 |
|
|
However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
|
7015 |
|
|
it does not compute a new value. Hence the value is always computable
|
7016 |
|
|
regardless of whether INSN is executed each iteration. */
|
7017 |
|
|
|
7018 |
|
|
if (type == DEST_ADDR)
|
7019 |
|
|
v->always_computable = 1;
|
7020 |
|
|
else
|
7021 |
|
|
v->always_computable = ! not_every_iteration;
|
7022 |
|
|
|
7023 |
|
|
v->always_executed = ! not_every_iteration;
|
7024 |
|
|
|
7025 |
|
|
if (type == DEST_ADDR)
|
7026 |
|
|
{
|
7027 |
|
|
v->mode = GET_MODE (*location);
|
7028 |
|
|
v->lifetime = 1;
|
7029 |
|
|
}
|
7030 |
|
|
else /* type == DEST_REG */
|
7031 |
|
|
{
|
7032 |
|
|
v->mode = GET_MODE (SET_DEST (set));
|
7033 |
|
|
|
7034 |
|
|
v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
|
7035 |
|
|
|
7036 |
|
|
/* If the lifetime is zero, it means that this register is
|
7037 |
|
|
really a dead store. So mark this as a giv that can be
|
7038 |
|
|
ignored. This will not prevent the biv from being eliminated. */
|
7039 |
|
|
if (v->lifetime == 0)
|
7040 |
|
|
v->ignore = 1;
|
7041 |
|
|
|
7042 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
|
7043 |
|
|
REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
|
7044 |
|
|
}
|
7045 |
|
|
|
7046 |
|
|
/* Add the giv to the class of givs computed from one biv. */
|
7047 |
|
|
|
7048 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (src_reg));
|
7049 |
|
|
gcc_assert (bl);
|
7050 |
|
|
v->next_iv = bl->giv;
|
7051 |
|
|
bl->giv = v;
|
7052 |
|
|
|
7053 |
|
|
/* Don't count DEST_ADDR. This is supposed to count the number of
|
7054 |
|
|
insns that calculate givs. */
|
7055 |
|
|
if (type == DEST_REG)
|
7056 |
|
|
bl->giv_count++;
|
7057 |
|
|
bl->total_benefit += benefit;
|
7058 |
|
|
|
7059 |
|
|
if (type == DEST_ADDR)
|
7060 |
|
|
{
|
7061 |
|
|
v->replaceable = 1;
|
7062 |
|
|
v->not_replaceable = 0;
|
7063 |
|
|
}
|
7064 |
|
|
else
|
7065 |
|
|
{
|
7066 |
|
|
/* The giv can be replaced outright by the reduced register only if all
|
7067 |
|
|
of the following conditions are true:
|
7068 |
|
|
- the insn that sets the giv is always executed on any iteration
|
7069 |
|
|
on which the giv is used at all
|
7070 |
|
|
(there are two ways to deduce this:
|
7071 |
|
|
either the insn is executed on every iteration,
|
7072 |
|
|
or all uses follow that insn in the same basic block),
|
7073 |
|
|
- the giv is not used outside the loop
|
7074 |
|
|
- no assignments to the biv occur during the giv's lifetime. */
|
7075 |
|
|
|
7076 |
|
|
if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
|
7077 |
|
|
/* Previous line always fails if INSN was moved by loop opt. */
|
7078 |
|
|
&& REGNO_LAST_LUID (REGNO (dest_reg))
|
7079 |
|
|
< INSN_LUID (loop->end)
|
7080 |
|
|
&& (! not_every_iteration
|
7081 |
|
|
|| last_use_this_basic_block (dest_reg, insn)))
|
7082 |
|
|
{
|
7083 |
|
|
/* Now check that there are no assignments to the biv within the
|
7084 |
|
|
giv's lifetime. This requires two separate checks. */
|
7085 |
|
|
|
7086 |
|
|
/* Check each biv update, and fail if any are between the first
|
7087 |
|
|
and last use of the giv.
|
7088 |
|
|
|
7089 |
|
|
If this loop contains an inner loop that was unrolled, then
|
7090 |
|
|
the insn modifying the biv may have been emitted by the loop
|
7091 |
|
|
unrolling code, and hence does not have a valid luid. Just
|
7092 |
|
|
mark the biv as not replaceable in this case. It is not very
|
7093 |
|
|
useful as a biv, because it is used in two different loops.
|
7094 |
|
|
It is very unlikely that we would be able to optimize the giv
|
7095 |
|
|
using this biv anyways. */
|
7096 |
|
|
|
7097 |
|
|
v->replaceable = 1;
|
7098 |
|
|
v->not_replaceable = 0;
|
7099 |
|
|
for (b = bl->biv; b; b = b->next_iv)
|
7100 |
|
|
{
|
7101 |
|
|
if (INSN_UID (b->insn) >= max_uid_for_loop
|
7102 |
|
|
|| ((INSN_LUID (b->insn)
|
7103 |
|
|
>= REGNO_FIRST_LUID (REGNO (dest_reg)))
|
7104 |
|
|
&& (INSN_LUID (b->insn)
|
7105 |
|
|
<= REGNO_LAST_LUID (REGNO (dest_reg)))))
|
7106 |
|
|
{
|
7107 |
|
|
v->replaceable = 0;
|
7108 |
|
|
v->not_replaceable = 1;
|
7109 |
|
|
break;
|
7110 |
|
|
}
|
7111 |
|
|
}
|
7112 |
|
|
|
7113 |
|
|
/* If there are any backwards branches that go from after the
|
7114 |
|
|
biv update to before it, then this giv is not replaceable. */
|
7115 |
|
|
if (v->replaceable)
|
7116 |
|
|
for (b = bl->biv; b; b = b->next_iv)
|
7117 |
|
|
if (back_branch_in_range_p (loop, b->insn))
|
7118 |
|
|
{
|
7119 |
|
|
v->replaceable = 0;
|
7120 |
|
|
v->not_replaceable = 1;
|
7121 |
|
|
break;
|
7122 |
|
|
}
|
7123 |
|
|
}
|
7124 |
|
|
else
|
7125 |
|
|
{
|
7126 |
|
|
/* May still be replaceable, we don't have enough info here to
|
7127 |
|
|
decide. */
|
7128 |
|
|
v->replaceable = 0;
|
7129 |
|
|
v->not_replaceable = 0;
|
7130 |
|
|
}
|
7131 |
|
|
}
|
7132 |
|
|
|
7133 |
|
|
/* Record whether the add_val contains a const_int, for later use by
|
7134 |
|
|
combine_givs. */
|
7135 |
|
|
{
|
7136 |
|
|
rtx tem = add_val;
|
7137 |
|
|
|
7138 |
|
|
v->no_const_addval = 1;
|
7139 |
|
|
if (tem == const0_rtx)
|
7140 |
|
|
;
|
7141 |
|
|
else if (CONSTANT_P (add_val))
|
7142 |
|
|
v->no_const_addval = 0;
|
7143 |
|
|
if (GET_CODE (tem) == PLUS)
|
7144 |
|
|
{
|
7145 |
|
|
while (1)
|
7146 |
|
|
{
|
7147 |
|
|
if (GET_CODE (XEXP (tem, 0)) == PLUS)
|
7148 |
|
|
tem = XEXP (tem, 0);
|
7149 |
|
|
else if (GET_CODE (XEXP (tem, 1)) == PLUS)
|
7150 |
|
|
tem = XEXP (tem, 1);
|
7151 |
|
|
else
|
7152 |
|
|
break;
|
7153 |
|
|
}
|
7154 |
|
|
if (CONSTANT_P (XEXP (tem, 1)))
|
7155 |
|
|
v->no_const_addval = 0;
|
7156 |
|
|
}
|
7157 |
|
|
}
|
7158 |
|
|
|
7159 |
|
|
if (loop_dump_stream)
|
7160 |
|
|
loop_giv_dump (v, loop_dump_stream, 0);
|
7161 |
|
|
}
|
7162 |
|
|
|
7163 |
|
|
/* Try to calculate the final value of the giv, the value it will have at
|
7164 |
|
|
the end of the loop. If we can do it, return that value. */
|
7165 |
|
|
|
7166 |
|
|
static rtx
|
7167 |
|
|
final_giv_value (const struct loop *loop, struct induction *v)
|
7168 |
|
|
{
|
7169 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
7170 |
|
|
struct iv_class *bl;
|
7171 |
|
|
rtx insn;
|
7172 |
|
|
rtx increment, tem;
|
7173 |
|
|
rtx seq;
|
7174 |
|
|
rtx loop_end = loop->end;
|
7175 |
|
|
unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
|
7176 |
|
|
|
7177 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
|
7178 |
|
|
|
7179 |
|
|
/* The final value for givs which depend on reversed bivs must be calculated
|
7180 |
|
|
differently than for ordinary givs. In this case, there is already an
|
7181 |
|
|
insn after the loop which sets this giv's final value (if necessary),
|
7182 |
|
|
and there are no other loop exits, so we can return any value. */
|
7183 |
|
|
if (bl->reversed)
|
7184 |
|
|
{
|
7185 |
|
|
if (loop_dump_stream)
|
7186 |
|
|
fprintf (loop_dump_stream,
|
7187 |
|
|
"Final giv value for %d, depends on reversed biv\n",
|
7188 |
|
|
REGNO (v->dest_reg));
|
7189 |
|
|
return const0_rtx;
|
7190 |
|
|
}
|
7191 |
|
|
|
7192 |
|
|
/* Try to calculate the final value as a function of the biv it depends
|
7193 |
|
|
upon. The only exit from the loop must be the fall through at the bottom
|
7194 |
|
|
and the insn that sets the giv must be executed on every iteration
|
7195 |
|
|
(otherwise the giv may not have its final value when the loop exits). */
|
7196 |
|
|
|
7197 |
|
|
/* ??? Can calculate the final giv value by subtracting off the
|
7198 |
|
|
extra biv increments times the giv's mult_val. The loop must have
|
7199 |
|
|
only one exit for this to work, but the loop iterations does not need
|
7200 |
|
|
to be known. */
|
7201 |
|
|
|
7202 |
|
|
if (n_iterations != 0
|
7203 |
|
|
&& ! loop->exit_count
|
7204 |
|
|
&& v->always_executed)
|
7205 |
|
|
{
|
7206 |
|
|
/* ?? It is tempting to use the biv's value here since these insns will
|
7207 |
|
|
be put after the loop, and hence the biv will have its final value
|
7208 |
|
|
then. However, this fails if the biv is subsequently eliminated.
|
7209 |
|
|
Perhaps determine whether biv's are eliminable before trying to
|
7210 |
|
|
determine whether giv's are replaceable so that we can use the
|
7211 |
|
|
biv value here if it is not eliminable. */
|
7212 |
|
|
|
7213 |
|
|
/* We are emitting code after the end of the loop, so we must make
|
7214 |
|
|
sure that bl->initial_value is still valid then. It will still
|
7215 |
|
|
be valid if it is invariant. */
|
7216 |
|
|
|
7217 |
|
|
increment = biv_total_increment (bl);
|
7218 |
|
|
|
7219 |
|
|
if (increment && loop_invariant_p (loop, increment)
|
7220 |
|
|
&& loop_invariant_p (loop, bl->initial_value))
|
7221 |
|
|
{
|
7222 |
|
|
/* Can calculate the loop exit value of its biv as
|
7223 |
|
|
(n_iterations * increment) + initial_value */
|
7224 |
|
|
|
7225 |
|
|
/* The loop exit value of the giv is then
|
7226 |
|
|
(final_biv_value - extra increments) * mult_val + add_val.
|
7227 |
|
|
The extra increments are any increments to the biv which
|
7228 |
|
|
occur in the loop after the giv's value is calculated.
|
7229 |
|
|
We must search from the insn that sets the giv to the end
|
7230 |
|
|
of the loop to calculate this value. */
|
7231 |
|
|
|
7232 |
|
|
/* Put the final biv value in tem. */
|
7233 |
|
|
tem = gen_reg_rtx (v->mode);
|
7234 |
|
|
record_base_value (REGNO (tem), bl->biv->add_val, 0);
|
7235 |
|
|
loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
|
7236 |
|
|
GEN_INT (n_iterations),
|
7237 |
|
|
extend_value_for_giv (v, bl->initial_value),
|
7238 |
|
|
tem);
|
7239 |
|
|
|
7240 |
|
|
/* Subtract off extra increments as we find them. */
|
7241 |
|
|
for (insn = NEXT_INSN (v->insn); insn != loop_end;
|
7242 |
|
|
insn = NEXT_INSN (insn))
|
7243 |
|
|
{
|
7244 |
|
|
struct induction *biv;
|
7245 |
|
|
|
7246 |
|
|
for (biv = bl->biv; biv; biv = biv->next_iv)
|
7247 |
|
|
if (biv->insn == insn)
|
7248 |
|
|
{
|
7249 |
|
|
start_sequence ();
|
7250 |
|
|
tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
|
7251 |
|
|
biv->add_val, NULL_RTX, 0,
|
7252 |
|
|
OPTAB_LIB_WIDEN);
|
7253 |
|
|
seq = get_insns ();
|
7254 |
|
|
end_sequence ();
|
7255 |
|
|
loop_insn_sink (loop, seq);
|
7256 |
|
|
}
|
7257 |
|
|
}
|
7258 |
|
|
|
7259 |
|
|
/* Now calculate the giv's final value. */
|
7260 |
|
|
loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
|
7261 |
|
|
|
7262 |
|
|
if (loop_dump_stream)
|
7263 |
|
|
fprintf (loop_dump_stream,
|
7264 |
|
|
"Final giv value for %d, calc from biv's value.\n",
|
7265 |
|
|
REGNO (v->dest_reg));
|
7266 |
|
|
|
7267 |
|
|
return tem;
|
7268 |
|
|
}
|
7269 |
|
|
}
|
7270 |
|
|
|
7271 |
|
|
/* Replaceable giv's should never reach here. */
|
7272 |
|
|
gcc_assert (!v->replaceable);
|
7273 |
|
|
|
7274 |
|
|
/* Check to see if the biv is dead at all loop exits. */
|
7275 |
|
|
if (reg_dead_after_loop (loop, v->dest_reg))
|
7276 |
|
|
{
|
7277 |
|
|
if (loop_dump_stream)
|
7278 |
|
|
fprintf (loop_dump_stream,
|
7279 |
|
|
"Final giv value for %d, giv dead after loop exit.\n",
|
7280 |
|
|
REGNO (v->dest_reg));
|
7281 |
|
|
|
7282 |
|
|
return const0_rtx;
|
7283 |
|
|
}
|
7284 |
|
|
|
7285 |
|
|
return 0;
|
7286 |
|
|
}
|
7287 |
|
|
|
7288 |
|
|
/* All this does is determine whether a giv can be made replaceable because
|
7289 |
|
|
its final value can be calculated. This code can not be part of record_giv
|
7290 |
|
|
above, because final_giv_value requires that the number of loop iterations
|
7291 |
|
|
be known, and that can not be accurately calculated until after all givs
|
7292 |
|
|
have been identified. */
|
7293 |
|
|
|
7294 |
|
|
static void
|
7295 |
|
|
check_final_value (const struct loop *loop, struct induction *v)
|
7296 |
|
|
{
|
7297 |
|
|
rtx final_value = 0;
|
7298 |
|
|
|
7299 |
|
|
/* DEST_ADDR givs will never reach here, because they are always marked
|
7300 |
|
|
replaceable above in record_giv. */
|
7301 |
|
|
|
7302 |
|
|
/* The giv can be replaced outright by the reduced register only if all
|
7303 |
|
|
of the following conditions are true:
|
7304 |
|
|
- the insn that sets the giv is always executed on any iteration
|
7305 |
|
|
on which the giv is used at all
|
7306 |
|
|
(there are two ways to deduce this:
|
7307 |
|
|
either the insn is executed on every iteration,
|
7308 |
|
|
or all uses follow that insn in the same basic block),
|
7309 |
|
|
- its final value can be calculated (this condition is different
|
7310 |
|
|
than the one above in record_giv)
|
7311 |
|
|
- it's not used before the it's set
|
7312 |
|
|
- no assignments to the biv occur during the giv's lifetime. */
|
7313 |
|
|
|
7314 |
|
|
#if 0
|
7315 |
|
|
/* This is only called now when replaceable is known to be false. */
|
7316 |
|
|
/* Clear replaceable, so that it won't confuse final_giv_value. */
|
7317 |
|
|
v->replaceable = 0;
|
7318 |
|
|
#endif
|
7319 |
|
|
|
7320 |
|
|
if ((final_value = final_giv_value (loop, v))
|
7321 |
|
|
&& (v->always_executed
|
7322 |
|
|
|| last_use_this_basic_block (v->dest_reg, v->insn)))
|
7323 |
|
|
{
|
7324 |
|
|
int biv_increment_seen = 0, before_giv_insn = 0;
|
7325 |
|
|
rtx p = v->insn;
|
7326 |
|
|
rtx last_giv_use;
|
7327 |
|
|
|
7328 |
|
|
v->replaceable = 1;
|
7329 |
|
|
v->not_replaceable = 0;
|
7330 |
|
|
|
7331 |
|
|
/* When trying to determine whether or not a biv increment occurs
|
7332 |
|
|
during the lifetime of the giv, we can ignore uses of the variable
|
7333 |
|
|
outside the loop because final_value is true. Hence we can not
|
7334 |
|
|
use regno_last_uid and regno_first_uid as above in record_giv. */
|
7335 |
|
|
|
7336 |
|
|
/* Search the loop to determine whether any assignments to the
|
7337 |
|
|
biv occur during the giv's lifetime. Start with the insn
|
7338 |
|
|
that sets the giv, and search around the loop until we come
|
7339 |
|
|
back to that insn again.
|
7340 |
|
|
|
7341 |
|
|
Also fail if there is a jump within the giv's lifetime that jumps
|
7342 |
|
|
to somewhere outside the lifetime but still within the loop. This
|
7343 |
|
|
catches spaghetti code where the execution order is not linear, and
|
7344 |
|
|
hence the above test fails. Here we assume that the giv lifetime
|
7345 |
|
|
does not extend from one iteration of the loop to the next, so as
|
7346 |
|
|
to make the test easier. Since the lifetime isn't known yet,
|
7347 |
|
|
this requires two loops. See also record_giv above. */
|
7348 |
|
|
|
7349 |
|
|
last_giv_use = v->insn;
|
7350 |
|
|
|
7351 |
|
|
while (1)
|
7352 |
|
|
{
|
7353 |
|
|
p = NEXT_INSN (p);
|
7354 |
|
|
if (p == loop->end)
|
7355 |
|
|
{
|
7356 |
|
|
before_giv_insn = 1;
|
7357 |
|
|
p = NEXT_INSN (loop->start);
|
7358 |
|
|
}
|
7359 |
|
|
if (p == v->insn)
|
7360 |
|
|
break;
|
7361 |
|
|
|
7362 |
|
|
if (INSN_P (p))
|
7363 |
|
|
{
|
7364 |
|
|
/* It is possible for the BIV increment to use the GIV if we
|
7365 |
|
|
have a cycle. Thus we must be sure to check each insn for
|
7366 |
|
|
both BIV and GIV uses, and we must check for BIV uses
|
7367 |
|
|
first. */
|
7368 |
|
|
|
7369 |
|
|
if (! biv_increment_seen
|
7370 |
|
|
&& reg_set_p (v->src_reg, PATTERN (p)))
|
7371 |
|
|
biv_increment_seen = 1;
|
7372 |
|
|
|
7373 |
|
|
if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
|
7374 |
|
|
{
|
7375 |
|
|
if (biv_increment_seen || before_giv_insn)
|
7376 |
|
|
{
|
7377 |
|
|
v->replaceable = 0;
|
7378 |
|
|
v->not_replaceable = 1;
|
7379 |
|
|
break;
|
7380 |
|
|
}
|
7381 |
|
|
last_giv_use = p;
|
7382 |
|
|
}
|
7383 |
|
|
}
|
7384 |
|
|
}
|
7385 |
|
|
|
7386 |
|
|
/* Now that the lifetime of the giv is known, check for branches
|
7387 |
|
|
from within the lifetime to outside the lifetime if it is still
|
7388 |
|
|
replaceable. */
|
7389 |
|
|
|
7390 |
|
|
if (v->replaceable)
|
7391 |
|
|
{
|
7392 |
|
|
p = v->insn;
|
7393 |
|
|
while (1)
|
7394 |
|
|
{
|
7395 |
|
|
p = NEXT_INSN (p);
|
7396 |
|
|
if (p == loop->end)
|
7397 |
|
|
p = NEXT_INSN (loop->start);
|
7398 |
|
|
if (p == last_giv_use)
|
7399 |
|
|
break;
|
7400 |
|
|
|
7401 |
|
|
if (JUMP_P (p) && JUMP_LABEL (p)
|
7402 |
|
|
&& LABEL_NAME (JUMP_LABEL (p))
|
7403 |
|
|
&& ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
|
7404 |
|
|
&& loop_insn_first_p (loop->start, JUMP_LABEL (p)))
|
7405 |
|
|
|| (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
|
7406 |
|
|
&& loop_insn_first_p (JUMP_LABEL (p), loop->end))))
|
7407 |
|
|
{
|
7408 |
|
|
v->replaceable = 0;
|
7409 |
|
|
v->not_replaceable = 1;
|
7410 |
|
|
|
7411 |
|
|
if (loop_dump_stream)
|
7412 |
|
|
fprintf (loop_dump_stream,
|
7413 |
|
|
"Found branch outside giv lifetime.\n");
|
7414 |
|
|
|
7415 |
|
|
break;
|
7416 |
|
|
}
|
7417 |
|
|
}
|
7418 |
|
|
}
|
7419 |
|
|
|
7420 |
|
|
/* If it is replaceable, then save the final value. */
|
7421 |
|
|
if (v->replaceable)
|
7422 |
|
|
v->final_value = final_value;
|
7423 |
|
|
}
|
7424 |
|
|
|
7425 |
|
|
if (loop_dump_stream && v->replaceable)
|
7426 |
|
|
fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
|
7427 |
|
|
INSN_UID (v->insn), REGNO (v->dest_reg));
|
7428 |
|
|
}
|
7429 |
|
|
|
7430 |
|
|
/* Update the status of whether a giv can derive other givs.
|
7431 |
|
|
|
7432 |
|
|
We need to do something special if there is or may be an update to the biv
|
7433 |
|
|
between the time the giv is defined and the time it is used to derive
|
7434 |
|
|
another giv.
|
7435 |
|
|
|
7436 |
|
|
In addition, a giv that is only conditionally set is not allowed to
|
7437 |
|
|
derive another giv once a label has been passed.
|
7438 |
|
|
|
7439 |
|
|
The cases we look at are when a label or an update to a biv is passed. */
|
7440 |
|
|
|
7441 |
|
|
static void
|
7442 |
|
|
update_giv_derive (const struct loop *loop, rtx p)
|
7443 |
|
|
{
|
7444 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
7445 |
|
|
struct iv_class *bl;
|
7446 |
|
|
struct induction *biv, *giv;
|
7447 |
|
|
rtx tem;
|
7448 |
|
|
int dummy;
|
7449 |
|
|
|
7450 |
|
|
/* Search all IV classes, then all bivs, and finally all givs.
|
7451 |
|
|
|
7452 |
|
|
There are three cases we are concerned with. First we have the situation
|
7453 |
|
|
of a giv that is only updated conditionally. In that case, it may not
|
7454 |
|
|
derive any givs after a label is passed.
|
7455 |
|
|
|
7456 |
|
|
The second case is when a biv update occurs, or may occur, after the
|
7457 |
|
|
definition of a giv. For certain biv updates (see below) that are
|
7458 |
|
|
known to occur between the giv definition and use, we can adjust the
|
7459 |
|
|
giv definition. For others, or when the biv update is conditional,
|
7460 |
|
|
we must prevent the giv from deriving any other givs. There are two
|
7461 |
|
|
sub-cases within this case.
|
7462 |
|
|
|
7463 |
|
|
If this is a label, we are concerned with any biv update that is done
|
7464 |
|
|
conditionally, since it may be done after the giv is defined followed by
|
7465 |
|
|
a branch here (actually, we need to pass both a jump and a label, but
|
7466 |
|
|
this extra tracking doesn't seem worth it).
|
7467 |
|
|
|
7468 |
|
|
If this is a jump, we are concerned about any biv update that may be
|
7469 |
|
|
executed multiple times. We are actually only concerned about
|
7470 |
|
|
backward jumps, but it is probably not worth performing the test
|
7471 |
|
|
on the jump again here.
|
7472 |
|
|
|
7473 |
|
|
If this is a biv update, we must adjust the giv status to show that a
|
7474 |
|
|
subsequent biv update was performed. If this adjustment cannot be done,
|
7475 |
|
|
the giv cannot derive further givs. */
|
7476 |
|
|
|
7477 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
7478 |
|
|
for (biv = bl->biv; biv; biv = biv->next_iv)
|
7479 |
|
|
if (LABEL_P (p) || JUMP_P (p)
|
7480 |
|
|
|| biv->insn == p)
|
7481 |
|
|
{
|
7482 |
|
|
/* Skip if location is the same as a previous one. */
|
7483 |
|
|
if (biv->same)
|
7484 |
|
|
continue;
|
7485 |
|
|
|
7486 |
|
|
for (giv = bl->giv; giv; giv = giv->next_iv)
|
7487 |
|
|
{
|
7488 |
|
|
/* If cant_derive is already true, there is no point in
|
7489 |
|
|
checking all of these conditions again. */
|
7490 |
|
|
if (giv->cant_derive)
|
7491 |
|
|
continue;
|
7492 |
|
|
|
7493 |
|
|
/* If this giv is conditionally set and we have passed a label,
|
7494 |
|
|
it cannot derive anything. */
|
7495 |
|
|
if (LABEL_P (p) && ! giv->always_computable)
|
7496 |
|
|
giv->cant_derive = 1;
|
7497 |
|
|
|
7498 |
|
|
/* Skip givs that have mult_val == 0, since
|
7499 |
|
|
they are really invariants. Also skip those that are
|
7500 |
|
|
replaceable, since we know their lifetime doesn't contain
|
7501 |
|
|
any biv update. */
|
7502 |
|
|
else if (giv->mult_val == const0_rtx || giv->replaceable)
|
7503 |
|
|
continue;
|
7504 |
|
|
|
7505 |
|
|
/* The only way we can allow this giv to derive another
|
7506 |
|
|
is if this is a biv increment and we can form the product
|
7507 |
|
|
of biv->add_val and giv->mult_val. In this case, we will
|
7508 |
|
|
be able to compute a compensation. */
|
7509 |
|
|
else if (biv->insn == p)
|
7510 |
|
|
{
|
7511 |
|
|
rtx ext_val_dummy;
|
7512 |
|
|
|
7513 |
|
|
tem = 0;
|
7514 |
|
|
if (biv->mult_val == const1_rtx)
|
7515 |
|
|
tem = simplify_giv_expr (loop,
|
7516 |
|
|
gen_rtx_MULT (giv->mode,
|
7517 |
|
|
biv->add_val,
|
7518 |
|
|
giv->mult_val),
|
7519 |
|
|
&ext_val_dummy, &dummy);
|
7520 |
|
|
|
7521 |
|
|
if (tem && giv->derive_adjustment)
|
7522 |
|
|
tem = simplify_giv_expr
|
7523 |
|
|
(loop,
|
7524 |
|
|
gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
|
7525 |
|
|
&ext_val_dummy, &dummy);
|
7526 |
|
|
|
7527 |
|
|
if (tem)
|
7528 |
|
|
giv->derive_adjustment = tem;
|
7529 |
|
|
else
|
7530 |
|
|
giv->cant_derive = 1;
|
7531 |
|
|
}
|
7532 |
|
|
else if ((LABEL_P (p) && ! biv->always_computable)
|
7533 |
|
|
|| (JUMP_P (p) && biv->maybe_multiple))
|
7534 |
|
|
giv->cant_derive = 1;
|
7535 |
|
|
}
|
7536 |
|
|
}
|
7537 |
|
|
}
|
7538 |
|
|
|
7539 |
|
|
/* Check whether an insn is an increment legitimate for a basic induction var.
|
7540 |
|
|
X is the source of insn P, or a part of it.
|
7541 |
|
|
MODE is the mode in which X should be interpreted.
|
7542 |
|
|
|
7543 |
|
|
DEST_REG is the putative biv, also the destination of the insn.
|
7544 |
|
|
We accept patterns of these forms:
|
7545 |
|
|
REG = REG + INVARIANT (includes REG = REG - CONSTANT)
|
7546 |
|
|
REG = INVARIANT + REG
|
7547 |
|
|
|
7548 |
|
|
If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
|
7549 |
|
|
store the additive term into *INC_VAL, and store the place where
|
7550 |
|
|
we found the additive term into *LOCATION.
|
7551 |
|
|
|
7552 |
|
|
If X is an assignment of an invariant into DEST_REG, we set
|
7553 |
|
|
*MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
|
7554 |
|
|
|
7555 |
|
|
We also want to detect a BIV when it corresponds to a variable
|
7556 |
|
|
whose mode was promoted. In that case, an increment
|
7557 |
|
|
of the variable may be a PLUS that adds a SUBREG of that variable to
|
7558 |
|
|
an invariant and then sign- or zero-extends the result of the PLUS
|
7559 |
|
|
into the variable.
|
7560 |
|
|
|
7561 |
|
|
Most GIVs in such cases will be in the promoted mode, since that is the
|
7562 |
|
|
probably the natural computation mode (and almost certainly the mode
|
7563 |
|
|
used for addresses) on the machine. So we view the pseudo-reg containing
|
7564 |
|
|
the variable as the BIV, as if it were simply incremented.
|
7565 |
|
|
|
7566 |
|
|
Note that treating the entire pseudo as a BIV will result in making
|
7567 |
|
|
simple increments to any GIVs based on it. However, if the variable
|
7568 |
|
|
overflows in its declared mode but not its promoted mode, the result will
|
7569 |
|
|
be incorrect. This is acceptable if the variable is signed, since
|
7570 |
|
|
overflows in such cases are undefined, but not if it is unsigned, since
|
7571 |
|
|
those overflows are defined. So we only check for SIGN_EXTEND and
|
7572 |
|
|
not ZERO_EXTEND.
|
7573 |
|
|
|
7574 |
|
|
If we cannot find a biv, we return 0. */
|
7575 |
|
|
|
7576 |
|
|
static int
|
7577 |
|
|
basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
|
7578 |
|
|
rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
|
7579 |
|
|
rtx **location)
|
7580 |
|
|
{
|
7581 |
|
|
enum rtx_code code;
|
7582 |
|
|
rtx *argp, arg;
|
7583 |
|
|
rtx insn, set = 0, last, inc;
|
7584 |
|
|
|
7585 |
|
|
code = GET_CODE (x);
|
7586 |
|
|
*location = NULL;
|
7587 |
|
|
switch (code)
|
7588 |
|
|
{
|
7589 |
|
|
case PLUS:
|
7590 |
|
|
if (rtx_equal_p (XEXP (x, 0), dest_reg)
|
7591 |
|
|
|| (GET_CODE (XEXP (x, 0)) == SUBREG
|
7592 |
|
|
&& SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
|
7593 |
|
|
&& SUBREG_REG (XEXP (x, 0)) == dest_reg))
|
7594 |
|
|
{
|
7595 |
|
|
argp = &XEXP (x, 1);
|
7596 |
|
|
}
|
7597 |
|
|
else if (rtx_equal_p (XEXP (x, 1), dest_reg)
|
7598 |
|
|
|| (GET_CODE (XEXP (x, 1)) == SUBREG
|
7599 |
|
|
&& SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
|
7600 |
|
|
&& SUBREG_REG (XEXP (x, 1)) == dest_reg))
|
7601 |
|
|
{
|
7602 |
|
|
argp = &XEXP (x, 0);
|
7603 |
|
|
}
|
7604 |
|
|
else
|
7605 |
|
|
return 0;
|
7606 |
|
|
|
7607 |
|
|
arg = *argp;
|
7608 |
|
|
if (loop_invariant_p (loop, arg) != 1)
|
7609 |
|
|
return 0;
|
7610 |
|
|
|
7611 |
|
|
/* convert_modes can emit new instructions, e.g. when arg is a loop
|
7612 |
|
|
invariant MEM and dest_reg has a different mode.
|
7613 |
|
|
These instructions would be emitted after the end of the function
|
7614 |
|
|
and then *inc_val would be an uninitialized pseudo.
|
7615 |
|
|
Detect this and bail in this case.
|
7616 |
|
|
Other alternatives to solve this can be introducing a convert_modes
|
7617 |
|
|
variant which is allowed to fail but not allowed to emit new
|
7618 |
|
|
instructions, emit these instructions before loop start and let
|
7619 |
|
|
it be garbage collected if *inc_val is never used or saving the
|
7620 |
|
|
*inc_val initialization sequence generated here and when *inc_val
|
7621 |
|
|
is going to be actually used, emit it at some suitable place. */
|
7622 |
|
|
last = get_last_insn ();
|
7623 |
|
|
inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
|
7624 |
|
|
if (get_last_insn () != last)
|
7625 |
|
|
{
|
7626 |
|
|
delete_insns_since (last);
|
7627 |
|
|
return 0;
|
7628 |
|
|
}
|
7629 |
|
|
|
7630 |
|
|
*inc_val = inc;
|
7631 |
|
|
*mult_val = const1_rtx;
|
7632 |
|
|
*location = argp;
|
7633 |
|
|
return 1;
|
7634 |
|
|
|
7635 |
|
|
case SUBREG:
|
7636 |
|
|
/* If what's inside the SUBREG is a BIV, then the SUBREG. This will
|
7637 |
|
|
handle addition of promoted variables.
|
7638 |
|
|
??? The comment at the start of this function is wrong: promoted
|
7639 |
|
|
variable increments don't look like it says they do. */
|
7640 |
|
|
return basic_induction_var (loop, SUBREG_REG (x),
|
7641 |
|
|
GET_MODE (SUBREG_REG (x)),
|
7642 |
|
|
dest_reg, p, inc_val, mult_val, location);
|
7643 |
|
|
|
7644 |
|
|
case REG:
|
7645 |
|
|
/* If this register is assigned in a previous insn, look at its
|
7646 |
|
|
source, but don't go outside the loop or past a label. */
|
7647 |
|
|
|
7648 |
|
|
/* If this sets a register to itself, we would repeat any previous
|
7649 |
|
|
biv increment if we applied this strategy blindly. */
|
7650 |
|
|
if (rtx_equal_p (dest_reg, x))
|
7651 |
|
|
return 0;
|
7652 |
|
|
|
7653 |
|
|
insn = p;
|
7654 |
|
|
while (1)
|
7655 |
|
|
{
|
7656 |
|
|
rtx dest;
|
7657 |
|
|
do
|
7658 |
|
|
{
|
7659 |
|
|
insn = PREV_INSN (insn);
|
7660 |
|
|
}
|
7661 |
|
|
while (insn && NOTE_P (insn)
|
7662 |
|
|
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
|
7663 |
|
|
|
7664 |
|
|
if (!insn)
|
7665 |
|
|
break;
|
7666 |
|
|
set = single_set (insn);
|
7667 |
|
|
if (set == 0)
|
7668 |
|
|
break;
|
7669 |
|
|
dest = SET_DEST (set);
|
7670 |
|
|
if (dest == x
|
7671 |
|
|
|| (GET_CODE (dest) == SUBREG
|
7672 |
|
|
&& (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
|
7673 |
|
|
&& (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
|
7674 |
|
|
&& SUBREG_REG (dest) == x))
|
7675 |
|
|
return basic_induction_var (loop, SET_SRC (set),
|
7676 |
|
|
(GET_MODE (SET_SRC (set)) == VOIDmode
|
7677 |
|
|
? GET_MODE (x)
|
7678 |
|
|
: GET_MODE (SET_SRC (set))),
|
7679 |
|
|
dest_reg, insn,
|
7680 |
|
|
inc_val, mult_val, location);
|
7681 |
|
|
|
7682 |
|
|
while (GET_CODE (dest) == SUBREG
|
7683 |
|
|
|| GET_CODE (dest) == ZERO_EXTRACT
|
7684 |
|
|
|| GET_CODE (dest) == STRICT_LOW_PART)
|
7685 |
|
|
dest = XEXP (dest, 0);
|
7686 |
|
|
if (dest == x)
|
7687 |
|
|
break;
|
7688 |
|
|
}
|
7689 |
|
|
/* Fall through. */
|
7690 |
|
|
|
7691 |
|
|
/* Can accept constant setting of biv only when inside inner most loop.
|
7692 |
|
|
Otherwise, a biv of an inner loop may be incorrectly recognized
|
7693 |
|
|
as a biv of the outer loop,
|
7694 |
|
|
causing code to be moved INTO the inner loop. */
|
7695 |
|
|
case MEM:
|
7696 |
|
|
if (loop_invariant_p (loop, x) != 1)
|
7697 |
|
|
return 0;
|
7698 |
|
|
case CONST_INT:
|
7699 |
|
|
case SYMBOL_REF:
|
7700 |
|
|
case CONST:
|
7701 |
|
|
/* convert_modes dies if we try to convert to or from CCmode, so just
|
7702 |
|
|
exclude that case. It is very unlikely that a condition code value
|
7703 |
|
|
would be a useful iterator anyways. convert_modes dies if we try to
|
7704 |
|
|
convert a float mode to non-float or vice versa too. */
|
7705 |
|
|
if (loop->level == 1
|
7706 |
|
|
&& GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
|
7707 |
|
|
&& GET_MODE_CLASS (mode) != MODE_CC)
|
7708 |
|
|
{
|
7709 |
|
|
/* Possible bug here? Perhaps we don't know the mode of X. */
|
7710 |
|
|
last = get_last_insn ();
|
7711 |
|
|
inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
|
7712 |
|
|
if (get_last_insn () != last)
|
7713 |
|
|
{
|
7714 |
|
|
delete_insns_since (last);
|
7715 |
|
|
return 0;
|
7716 |
|
|
}
|
7717 |
|
|
|
7718 |
|
|
*inc_val = inc;
|
7719 |
|
|
*mult_val = const0_rtx;
|
7720 |
|
|
return 1;
|
7721 |
|
|
}
|
7722 |
|
|
else
|
7723 |
|
|
return 0;
|
7724 |
|
|
|
7725 |
|
|
case SIGN_EXTEND:
|
7726 |
|
|
/* Ignore this BIV if signed arithmetic overflow is defined. */
|
7727 |
|
|
if (flag_wrapv)
|
7728 |
|
|
return 0;
|
7729 |
|
|
return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
|
7730 |
|
|
dest_reg, p, inc_val, mult_val, location);
|
7731 |
|
|
|
7732 |
|
|
case ASHIFTRT:
|
7733 |
|
|
/* Similar, since this can be a sign extension. */
|
7734 |
|
|
for (insn = PREV_INSN (p);
|
7735 |
|
|
(insn && NOTE_P (insn)
|
7736 |
|
|
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
|
7737 |
|
|
insn = PREV_INSN (insn))
|
7738 |
|
|
;
|
7739 |
|
|
|
7740 |
|
|
if (insn)
|
7741 |
|
|
set = single_set (insn);
|
7742 |
|
|
|
7743 |
|
|
if (! rtx_equal_p (dest_reg, XEXP (x, 0))
|
7744 |
|
|
&& set && SET_DEST (set) == XEXP (x, 0)
|
7745 |
|
|
&& GET_CODE (XEXP (x, 1)) == CONST_INT
|
7746 |
|
|
&& INTVAL (XEXP (x, 1)) >= 0
|
7747 |
|
|
&& GET_CODE (SET_SRC (set)) == ASHIFT
|
7748 |
|
|
&& XEXP (x, 1) == XEXP (SET_SRC (set), 1))
|
7749 |
|
|
return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
|
7750 |
|
|
GET_MODE (XEXP (x, 0)),
|
7751 |
|
|
dest_reg, insn, inc_val, mult_val,
|
7752 |
|
|
location);
|
7753 |
|
|
return 0;
|
7754 |
|
|
|
7755 |
|
|
default:
|
7756 |
|
|
return 0;
|
7757 |
|
|
}
|
7758 |
|
|
}
|
7759 |
|
|
|
7760 |
|
|
/* A general induction variable (giv) is any quantity that is a linear
|
7761 |
|
|
function of a basic induction variable,
|
7762 |
|
|
i.e. giv = biv * mult_val + add_val.
|
7763 |
|
|
The coefficients can be any loop invariant quantity.
|
7764 |
|
|
A giv need not be computed directly from the biv;
|
7765 |
|
|
it can be computed by way of other givs. */
|
7766 |
|
|
|
7767 |
|
|
/* Determine whether X computes a giv.
|
7768 |
|
|
If it does, return a nonzero value
|
7769 |
|
|
which is the benefit from eliminating the computation of X;
|
7770 |
|
|
set *SRC_REG to the register of the biv that it is computed from;
|
7771 |
|
|
set *ADD_VAL and *MULT_VAL to the coefficients,
|
7772 |
|
|
such that the value of X is biv * mult + add; */
|
7773 |
|
|
|
7774 |
|
|
static int
|
7775 |
|
|
general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
|
7776 |
|
|
rtx *add_val, rtx *mult_val, rtx *ext_val,
|
7777 |
|
|
int is_addr, int *pbenefit,
|
7778 |
|
|
enum machine_mode addr_mode)
|
7779 |
|
|
{
|
7780 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
7781 |
|
|
rtx orig_x = x;
|
7782 |
|
|
|
7783 |
|
|
/* If this is an invariant, forget it, it isn't a giv. */
|
7784 |
|
|
if (loop_invariant_p (loop, x) == 1)
|
7785 |
|
|
return 0;
|
7786 |
|
|
|
7787 |
|
|
*pbenefit = 0;
|
7788 |
|
|
*ext_val = NULL_RTX;
|
7789 |
|
|
x = simplify_giv_expr (loop, x, ext_val, pbenefit);
|
7790 |
|
|
if (x == 0)
|
7791 |
|
|
return 0;
|
7792 |
|
|
|
7793 |
|
|
switch (GET_CODE (x))
|
7794 |
|
|
{
|
7795 |
|
|
case USE:
|
7796 |
|
|
case CONST_INT:
|
7797 |
|
|
/* Since this is now an invariant and wasn't before, it must be a giv
|
7798 |
|
|
with MULT_VAL == 0. It doesn't matter which BIV we associate this
|
7799 |
|
|
with. */
|
7800 |
|
|
*src_reg = ivs->list->biv->dest_reg;
|
7801 |
|
|
*mult_val = const0_rtx;
|
7802 |
|
|
*add_val = x;
|
7803 |
|
|
break;
|
7804 |
|
|
|
7805 |
|
|
case REG:
|
7806 |
|
|
/* This is equivalent to a BIV. */
|
7807 |
|
|
*src_reg = x;
|
7808 |
|
|
*mult_val = const1_rtx;
|
7809 |
|
|
*add_val = const0_rtx;
|
7810 |
|
|
break;
|
7811 |
|
|
|
7812 |
|
|
case PLUS:
|
7813 |
|
|
/* Either (plus (biv) (invar)) or
|
7814 |
|
|
(plus (mult (biv) (invar_1)) (invar_2)). */
|
7815 |
|
|
if (GET_CODE (XEXP (x, 0)) == MULT)
|
7816 |
|
|
{
|
7817 |
|
|
*src_reg = XEXP (XEXP (x, 0), 0);
|
7818 |
|
|
*mult_val = XEXP (XEXP (x, 0), 1);
|
7819 |
|
|
}
|
7820 |
|
|
else
|
7821 |
|
|
{
|
7822 |
|
|
*src_reg = XEXP (x, 0);
|
7823 |
|
|
*mult_val = const1_rtx;
|
7824 |
|
|
}
|
7825 |
|
|
*add_val = XEXP (x, 1);
|
7826 |
|
|
break;
|
7827 |
|
|
|
7828 |
|
|
case MULT:
|
7829 |
|
|
/* ADD_VAL is zero. */
|
7830 |
|
|
*src_reg = XEXP (x, 0);
|
7831 |
|
|
*mult_val = XEXP (x, 1);
|
7832 |
|
|
*add_val = const0_rtx;
|
7833 |
|
|
break;
|
7834 |
|
|
|
7835 |
|
|
default:
|
7836 |
|
|
gcc_unreachable ();
|
7837 |
|
|
}
|
7838 |
|
|
|
7839 |
|
|
/* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
|
7840 |
|
|
unless they are CONST_INT). */
|
7841 |
|
|
if (GET_CODE (*add_val) == USE)
|
7842 |
|
|
*add_val = XEXP (*add_val, 0);
|
7843 |
|
|
if (GET_CODE (*mult_val) == USE)
|
7844 |
|
|
*mult_val = XEXP (*mult_val, 0);
|
7845 |
|
|
|
7846 |
|
|
if (is_addr)
|
7847 |
|
|
*pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
|
7848 |
|
|
else
|
7849 |
|
|
*pbenefit += rtx_cost (orig_x, SET);
|
7850 |
|
|
|
7851 |
|
|
/* Always return true if this is a giv so it will be detected as such,
|
7852 |
|
|
even if the benefit is zero or negative. This allows elimination
|
7853 |
|
|
of bivs that might otherwise not be eliminated. */
|
7854 |
|
|
return 1;
|
7855 |
|
|
}
|
7856 |
|
|
|
7857 |
|
|
/* Given an expression, X, try to form it as a linear function of a biv.
|
7858 |
|
|
We will canonicalize it to be of the form
|
7859 |
|
|
(plus (mult (BIV) (invar_1))
|
7860 |
|
|
(invar_2))
|
7861 |
|
|
with possible degeneracies.
|
7862 |
|
|
|
7863 |
|
|
The invariant expressions must each be of a form that can be used as a
|
7864 |
|
|
machine operand. We surround then with a USE rtx (a hack, but localized
|
7865 |
|
|
and certainly unambiguous!) if not a CONST_INT for simplicity in this
|
7866 |
|
|
routine; it is the caller's responsibility to strip them.
|
7867 |
|
|
|
7868 |
|
|
If no such canonicalization is possible (i.e., two biv's are used or an
|
7869 |
|
|
expression that is neither invariant nor a biv or giv), this routine
|
7870 |
|
|
returns 0.
|
7871 |
|
|
|
7872 |
|
|
For a nonzero return, the result will have a code of CONST_INT, USE,
|
7873 |
|
|
REG (for a BIV), PLUS, or MULT. No other codes will occur.
|
7874 |
|
|
|
7875 |
|
|
*BENEFIT will be incremented by the benefit of any sub-giv encountered. */
|
7876 |
|
|
|
7877 |
|
|
static rtx sge_plus (enum machine_mode, rtx, rtx);
|
7878 |
|
|
static rtx sge_plus_constant (rtx, rtx);
|
7879 |
|
|
|
7880 |
|
|
static rtx
|
7881 |
|
|
simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
|
7882 |
|
|
{
|
7883 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
7884 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
7885 |
|
|
enum machine_mode mode = GET_MODE (x);
|
7886 |
|
|
rtx arg0, arg1;
|
7887 |
|
|
rtx tem;
|
7888 |
|
|
|
7889 |
|
|
/* If this is not an integer mode, or if we cannot do arithmetic in this
|
7890 |
|
|
mode, this can't be a giv. */
|
7891 |
|
|
if (mode != VOIDmode
|
7892 |
|
|
&& (GET_MODE_CLASS (mode) != MODE_INT
|
7893 |
|
|
|| GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
|
7894 |
|
|
return NULL_RTX;
|
7895 |
|
|
|
7896 |
|
|
switch (GET_CODE (x))
|
7897 |
|
|
{
|
7898 |
|
|
case PLUS:
|
7899 |
|
|
arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
|
7900 |
|
|
arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
|
7901 |
|
|
if (arg0 == 0 || arg1 == 0)
|
7902 |
|
|
return NULL_RTX;
|
7903 |
|
|
|
7904 |
|
|
/* Put constant last, CONST_INT last if both constant. */
|
7905 |
|
|
if ((GET_CODE (arg0) == USE
|
7906 |
|
|
|| GET_CODE (arg0) == CONST_INT)
|
7907 |
|
|
&& ! ((GET_CODE (arg0) == USE
|
7908 |
|
|
&& GET_CODE (arg1) == USE)
|
7909 |
|
|
|| GET_CODE (arg1) == CONST_INT))
|
7910 |
|
|
tem = arg0, arg0 = arg1, arg1 = tem;
|
7911 |
|
|
|
7912 |
|
|
/* Handle addition of zero, then addition of an invariant. */
|
7913 |
|
|
if (arg1 == const0_rtx)
|
7914 |
|
|
return arg0;
|
7915 |
|
|
else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
|
7916 |
|
|
switch (GET_CODE (arg0))
|
7917 |
|
|
{
|
7918 |
|
|
case CONST_INT:
|
7919 |
|
|
case USE:
|
7920 |
|
|
/* Adding two invariants must result in an invariant, so enclose
|
7921 |
|
|
addition operation inside a USE and return it. */
|
7922 |
|
|
if (GET_CODE (arg0) == USE)
|
7923 |
|
|
arg0 = XEXP (arg0, 0);
|
7924 |
|
|
if (GET_CODE (arg1) == USE)
|
7925 |
|
|
arg1 = XEXP (arg1, 0);
|
7926 |
|
|
|
7927 |
|
|
if (GET_CODE (arg0) == CONST_INT)
|
7928 |
|
|
tem = arg0, arg0 = arg1, arg1 = tem;
|
7929 |
|
|
if (GET_CODE (arg1) == CONST_INT)
|
7930 |
|
|
tem = sge_plus_constant (arg0, arg1);
|
7931 |
|
|
else
|
7932 |
|
|
tem = sge_plus (mode, arg0, arg1);
|
7933 |
|
|
|
7934 |
|
|
if (GET_CODE (tem) != CONST_INT)
|
7935 |
|
|
tem = gen_rtx_USE (mode, tem);
|
7936 |
|
|
return tem;
|
7937 |
|
|
|
7938 |
|
|
case REG:
|
7939 |
|
|
case MULT:
|
7940 |
|
|
/* biv + invar or mult + invar. Return sum. */
|
7941 |
|
|
return gen_rtx_PLUS (mode, arg0, arg1);
|
7942 |
|
|
|
7943 |
|
|
case PLUS:
|
7944 |
|
|
/* (a + invar_1) + invar_2. Associate. */
|
7945 |
|
|
return
|
7946 |
|
|
simplify_giv_expr (loop,
|
7947 |
|
|
gen_rtx_PLUS (mode,
|
7948 |
|
|
XEXP (arg0, 0),
|
7949 |
|
|
gen_rtx_PLUS (mode,
|
7950 |
|
|
XEXP (arg0, 1),
|
7951 |
|
|
arg1)),
|
7952 |
|
|
ext_val, benefit);
|
7953 |
|
|
|
7954 |
|
|
default:
|
7955 |
|
|
gcc_unreachable ();
|
7956 |
|
|
}
|
7957 |
|
|
|
7958 |
|
|
/* Each argument must be either REG, PLUS, or MULT. Convert REG to
|
7959 |
|
|
MULT to reduce cases. */
|
7960 |
|
|
if (REG_P (arg0))
|
7961 |
|
|
arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
|
7962 |
|
|
if (REG_P (arg1))
|
7963 |
|
|
arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
|
7964 |
|
|
|
7965 |
|
|
/* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
|
7966 |
|
|
Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
|
7967 |
|
|
Recurse to associate the second PLUS. */
|
7968 |
|
|
if (GET_CODE (arg1) == MULT)
|
7969 |
|
|
tem = arg0, arg0 = arg1, arg1 = tem;
|
7970 |
|
|
|
7971 |
|
|
if (GET_CODE (arg1) == PLUS)
|
7972 |
|
|
return
|
7973 |
|
|
simplify_giv_expr (loop,
|
7974 |
|
|
gen_rtx_PLUS (mode,
|
7975 |
|
|
gen_rtx_PLUS (mode, arg0,
|
7976 |
|
|
XEXP (arg1, 0)),
|
7977 |
|
|
XEXP (arg1, 1)),
|
7978 |
|
|
ext_val, benefit);
|
7979 |
|
|
|
7980 |
|
|
/* Now must have MULT + MULT. Distribute if same biv, else not giv. */
|
7981 |
|
|
if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
|
7982 |
|
|
return NULL_RTX;
|
7983 |
|
|
|
7984 |
|
|
if (!rtx_equal_p (arg0, arg1))
|
7985 |
|
|
return NULL_RTX;
|
7986 |
|
|
|
7987 |
|
|
return simplify_giv_expr (loop,
|
7988 |
|
|
gen_rtx_MULT (mode,
|
7989 |
|
|
XEXP (arg0, 0),
|
7990 |
|
|
gen_rtx_PLUS (mode,
|
7991 |
|
|
XEXP (arg0, 1),
|
7992 |
|
|
XEXP (arg1, 1))),
|
7993 |
|
|
ext_val, benefit);
|
7994 |
|
|
|
7995 |
|
|
case MINUS:
|
7996 |
|
|
/* Handle "a - b" as "a + b * (-1)". */
|
7997 |
|
|
return simplify_giv_expr (loop,
|
7998 |
|
|
gen_rtx_PLUS (mode,
|
7999 |
|
|
XEXP (x, 0),
|
8000 |
|
|
gen_rtx_MULT (mode,
|
8001 |
|
|
XEXP (x, 1),
|
8002 |
|
|
constm1_rtx)),
|
8003 |
|
|
ext_val, benefit);
|
8004 |
|
|
|
8005 |
|
|
case MULT:
|
8006 |
|
|
arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
|
8007 |
|
|
arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
|
8008 |
|
|
if (arg0 == 0 || arg1 == 0)
|
8009 |
|
|
return NULL_RTX;
|
8010 |
|
|
|
8011 |
|
|
/* Put constant last, CONST_INT last if both constant. */
|
8012 |
|
|
if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
|
8013 |
|
|
&& GET_CODE (arg1) != CONST_INT)
|
8014 |
|
|
tem = arg0, arg0 = arg1, arg1 = tem;
|
8015 |
|
|
|
8016 |
|
|
/* If second argument is not now constant, not giv. */
|
8017 |
|
|
if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
|
8018 |
|
|
return NULL_RTX;
|
8019 |
|
|
|
8020 |
|
|
/* Handle multiply by 0 or 1. */
|
8021 |
|
|
if (arg1 == const0_rtx)
|
8022 |
|
|
return const0_rtx;
|
8023 |
|
|
|
8024 |
|
|
else if (arg1 == const1_rtx)
|
8025 |
|
|
return arg0;
|
8026 |
|
|
|
8027 |
|
|
switch (GET_CODE (arg0))
|
8028 |
|
|
{
|
8029 |
|
|
case REG:
|
8030 |
|
|
/* biv * invar. Done. */
|
8031 |
|
|
return gen_rtx_MULT (mode, arg0, arg1);
|
8032 |
|
|
|
8033 |
|
|
case CONST_INT:
|
8034 |
|
|
/* Product of two constants. */
|
8035 |
|
|
return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
|
8036 |
|
|
|
8037 |
|
|
case USE:
|
8038 |
|
|
/* invar * invar is a giv, but attempt to simplify it somehow. */
|
8039 |
|
|
if (GET_CODE (arg1) != CONST_INT)
|
8040 |
|
|
return NULL_RTX;
|
8041 |
|
|
|
8042 |
|
|
arg0 = XEXP (arg0, 0);
|
8043 |
|
|
if (GET_CODE (arg0) == MULT)
|
8044 |
|
|
{
|
8045 |
|
|
/* (invar_0 * invar_1) * invar_2. Associate. */
|
8046 |
|
|
return simplify_giv_expr (loop,
|
8047 |
|
|
gen_rtx_MULT (mode,
|
8048 |
|
|
XEXP (arg0, 0),
|
8049 |
|
|
gen_rtx_MULT (mode,
|
8050 |
|
|
XEXP (arg0,
|
8051 |
|
|
1),
|
8052 |
|
|
arg1)),
|
8053 |
|
|
ext_val, benefit);
|
8054 |
|
|
}
|
8055 |
|
|
/* Propagate the MULT expressions to the innermost nodes. */
|
8056 |
|
|
else if (GET_CODE (arg0) == PLUS)
|
8057 |
|
|
{
|
8058 |
|
|
/* (invar_0 + invar_1) * invar_2. Distribute. */
|
8059 |
|
|
return simplify_giv_expr (loop,
|
8060 |
|
|
gen_rtx_PLUS (mode,
|
8061 |
|
|
gen_rtx_MULT (mode,
|
8062 |
|
|
XEXP (arg0,
|
8063 |
|
|
0),
|
8064 |
|
|
arg1),
|
8065 |
|
|
gen_rtx_MULT (mode,
|
8066 |
|
|
XEXP (arg0,
|
8067 |
|
|
1),
|
8068 |
|
|
arg1)),
|
8069 |
|
|
ext_val, benefit);
|
8070 |
|
|
}
|
8071 |
|
|
return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
|
8072 |
|
|
|
8073 |
|
|
case MULT:
|
8074 |
|
|
/* (a * invar_1) * invar_2. Associate. */
|
8075 |
|
|
return simplify_giv_expr (loop,
|
8076 |
|
|
gen_rtx_MULT (mode,
|
8077 |
|
|
XEXP (arg0, 0),
|
8078 |
|
|
gen_rtx_MULT (mode,
|
8079 |
|
|
XEXP (arg0, 1),
|
8080 |
|
|
arg1)),
|
8081 |
|
|
ext_val, benefit);
|
8082 |
|
|
|
8083 |
|
|
case PLUS:
|
8084 |
|
|
/* (a + invar_1) * invar_2. Distribute. */
|
8085 |
|
|
return simplify_giv_expr (loop,
|
8086 |
|
|
gen_rtx_PLUS (mode,
|
8087 |
|
|
gen_rtx_MULT (mode,
|
8088 |
|
|
XEXP (arg0, 0),
|
8089 |
|
|
arg1),
|
8090 |
|
|
gen_rtx_MULT (mode,
|
8091 |
|
|
XEXP (arg0, 1),
|
8092 |
|
|
arg1)),
|
8093 |
|
|
ext_val, benefit);
|
8094 |
|
|
|
8095 |
|
|
default:
|
8096 |
|
|
gcc_unreachable ();
|
8097 |
|
|
}
|
8098 |
|
|
|
8099 |
|
|
case ASHIFT:
|
8100 |
|
|
/* Shift by constant is multiply by power of two. */
|
8101 |
|
|
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
|
8102 |
|
|
return 0;
|
8103 |
|
|
|
8104 |
|
|
return
|
8105 |
|
|
simplify_giv_expr (loop,
|
8106 |
|
|
gen_rtx_MULT (mode,
|
8107 |
|
|
XEXP (x, 0),
|
8108 |
|
|
GEN_INT ((HOST_WIDE_INT) 1
|
8109 |
|
|
<< INTVAL (XEXP (x, 1)))),
|
8110 |
|
|
ext_val, benefit);
|
8111 |
|
|
|
8112 |
|
|
case NEG:
|
8113 |
|
|
/* "-a" is "a * (-1)" */
|
8114 |
|
|
return simplify_giv_expr (loop,
|
8115 |
|
|
gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
|
8116 |
|
|
ext_val, benefit);
|
8117 |
|
|
|
8118 |
|
|
case NOT:
|
8119 |
|
|
/* "~a" is "-a - 1". Silly, but easy. */
|
8120 |
|
|
return simplify_giv_expr (loop,
|
8121 |
|
|
gen_rtx_MINUS (mode,
|
8122 |
|
|
gen_rtx_NEG (mode, XEXP (x, 0)),
|
8123 |
|
|
const1_rtx),
|
8124 |
|
|
ext_val, benefit);
|
8125 |
|
|
|
8126 |
|
|
case USE:
|
8127 |
|
|
/* Already in proper form for invariant. */
|
8128 |
|
|
return x;
|
8129 |
|
|
|
8130 |
|
|
case SIGN_EXTEND:
|
8131 |
|
|
case ZERO_EXTEND:
|
8132 |
|
|
case TRUNCATE:
|
8133 |
|
|
/* Conditionally recognize extensions of simple IVs. After we've
|
8134 |
|
|
computed loop traversal counts and verified the range of the
|
8135 |
|
|
source IV, we'll reevaluate this as a GIV. */
|
8136 |
|
|
if (*ext_val == NULL_RTX)
|
8137 |
|
|
{
|
8138 |
|
|
arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
|
8139 |
|
|
if (arg0 && *ext_val == NULL_RTX && REG_P (arg0))
|
8140 |
|
|
{
|
8141 |
|
|
*ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
|
8142 |
|
|
return arg0;
|
8143 |
|
|
}
|
8144 |
|
|
}
|
8145 |
|
|
goto do_default;
|
8146 |
|
|
|
8147 |
|
|
case REG:
|
8148 |
|
|
/* If this is a new register, we can't deal with it. */
|
8149 |
|
|
if (REGNO (x) >= max_reg_before_loop)
|
8150 |
|
|
return 0;
|
8151 |
|
|
|
8152 |
|
|
/* Check for biv or giv. */
|
8153 |
|
|
switch (REG_IV_TYPE (ivs, REGNO (x)))
|
8154 |
|
|
{
|
8155 |
|
|
case BASIC_INDUCT:
|
8156 |
|
|
return x;
|
8157 |
|
|
case GENERAL_INDUCT:
|
8158 |
|
|
{
|
8159 |
|
|
struct induction *v = REG_IV_INFO (ivs, REGNO (x));
|
8160 |
|
|
|
8161 |
|
|
/* Form expression from giv and add benefit. Ensure this giv
|
8162 |
|
|
can derive another and subtract any needed adjustment if so. */
|
8163 |
|
|
|
8164 |
|
|
/* Increasing the benefit here is risky. The only case in which it
|
8165 |
|
|
is arguably correct is if this is the only use of V. In other
|
8166 |
|
|
cases, this will artificially inflate the benefit of the current
|
8167 |
|
|
giv, and lead to suboptimal code. Thus, it is disabled, since
|
8168 |
|
|
potentially not reducing an only marginally beneficial giv is
|
8169 |
|
|
less harmful than reducing many givs that are not really
|
8170 |
|
|
beneficial. */
|
8171 |
|
|
{
|
8172 |
|
|
rtx single_use = regs->array[REGNO (x)].single_usage;
|
8173 |
|
|
if (single_use && single_use != const0_rtx)
|
8174 |
|
|
*benefit += v->benefit;
|
8175 |
|
|
}
|
8176 |
|
|
|
8177 |
|
|
if (v->cant_derive)
|
8178 |
|
|
return 0;
|
8179 |
|
|
|
8180 |
|
|
tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
|
8181 |
|
|
v->src_reg, v->mult_val),
|
8182 |
|
|
v->add_val);
|
8183 |
|
|
|
8184 |
|
|
if (v->derive_adjustment)
|
8185 |
|
|
tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
|
8186 |
|
|
arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
|
8187 |
|
|
if (*ext_val)
|
8188 |
|
|
{
|
8189 |
|
|
if (!v->ext_dependent)
|
8190 |
|
|
return arg0;
|
8191 |
|
|
}
|
8192 |
|
|
else
|
8193 |
|
|
{
|
8194 |
|
|
*ext_val = v->ext_dependent;
|
8195 |
|
|
return arg0;
|
8196 |
|
|
}
|
8197 |
|
|
return 0;
|
8198 |
|
|
}
|
8199 |
|
|
|
8200 |
|
|
default:
|
8201 |
|
|
do_default:
|
8202 |
|
|
/* If it isn't an induction variable, and it is invariant, we
|
8203 |
|
|
may be able to simplify things further by looking through
|
8204 |
|
|
the bits we just moved outside the loop. */
|
8205 |
|
|
if (loop_invariant_p (loop, x) == 1)
|
8206 |
|
|
{
|
8207 |
|
|
struct movable *m;
|
8208 |
|
|
struct loop_movables *movables = LOOP_MOVABLES (loop);
|
8209 |
|
|
|
8210 |
|
|
for (m = movables->head; m; m = m->next)
|
8211 |
|
|
if (rtx_equal_p (x, m->set_dest))
|
8212 |
|
|
{
|
8213 |
|
|
/* Ok, we found a match. Substitute and simplify. */
|
8214 |
|
|
|
8215 |
|
|
/* If we match another movable, we must use that, as
|
8216 |
|
|
this one is going away. */
|
8217 |
|
|
if (m->match)
|
8218 |
|
|
return simplify_giv_expr (loop, m->match->set_dest,
|
8219 |
|
|
ext_val, benefit);
|
8220 |
|
|
|
8221 |
|
|
/* If consec is nonzero, this is a member of a group of
|
8222 |
|
|
instructions that were moved together. We handle this
|
8223 |
|
|
case only to the point of seeking to the last insn and
|
8224 |
|
|
looking for a REG_EQUAL. Fail if we don't find one. */
|
8225 |
|
|
if (m->consec != 0)
|
8226 |
|
|
{
|
8227 |
|
|
int i = m->consec;
|
8228 |
|
|
tem = m->insn;
|
8229 |
|
|
do
|
8230 |
|
|
{
|
8231 |
|
|
tem = NEXT_INSN (tem);
|
8232 |
|
|
}
|
8233 |
|
|
while (--i > 0);
|
8234 |
|
|
|
8235 |
|
|
tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
|
8236 |
|
|
if (tem)
|
8237 |
|
|
tem = XEXP (tem, 0);
|
8238 |
|
|
}
|
8239 |
|
|
else
|
8240 |
|
|
{
|
8241 |
|
|
tem = single_set (m->insn);
|
8242 |
|
|
if (tem)
|
8243 |
|
|
tem = SET_SRC (tem);
|
8244 |
|
|
}
|
8245 |
|
|
|
8246 |
|
|
if (tem)
|
8247 |
|
|
{
|
8248 |
|
|
/* What we are most interested in is pointer
|
8249 |
|
|
arithmetic on invariants -- only take
|
8250 |
|
|
patterns we may be able to do something with. */
|
8251 |
|
|
if (GET_CODE (tem) == PLUS
|
8252 |
|
|
|| GET_CODE (tem) == MULT
|
8253 |
|
|
|| GET_CODE (tem) == ASHIFT
|
8254 |
|
|
|| GET_CODE (tem) == CONST_INT
|
8255 |
|
|
|| GET_CODE (tem) == SYMBOL_REF)
|
8256 |
|
|
{
|
8257 |
|
|
tem = simplify_giv_expr (loop, tem, ext_val,
|
8258 |
|
|
benefit);
|
8259 |
|
|
if (tem)
|
8260 |
|
|
return tem;
|
8261 |
|
|
}
|
8262 |
|
|
else if (GET_CODE (tem) == CONST
|
8263 |
|
|
&& GET_CODE (XEXP (tem, 0)) == PLUS
|
8264 |
|
|
&& GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
|
8265 |
|
|
&& GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
|
8266 |
|
|
{
|
8267 |
|
|
tem = simplify_giv_expr (loop, XEXP (tem, 0),
|
8268 |
|
|
ext_val, benefit);
|
8269 |
|
|
if (tem)
|
8270 |
|
|
return tem;
|
8271 |
|
|
}
|
8272 |
|
|
}
|
8273 |
|
|
break;
|
8274 |
|
|
}
|
8275 |
|
|
}
|
8276 |
|
|
break;
|
8277 |
|
|
}
|
8278 |
|
|
|
8279 |
|
|
/* Fall through to general case. */
|
8280 |
|
|
default:
|
8281 |
|
|
/* If invariant, return as USE (unless CONST_INT).
|
8282 |
|
|
Otherwise, not giv. */
|
8283 |
|
|
if (GET_CODE (x) == USE)
|
8284 |
|
|
x = XEXP (x, 0);
|
8285 |
|
|
|
8286 |
|
|
if (loop_invariant_p (loop, x) == 1)
|
8287 |
|
|
{
|
8288 |
|
|
if (GET_CODE (x) == CONST_INT)
|
8289 |
|
|
return x;
|
8290 |
|
|
if (GET_CODE (x) == CONST
|
8291 |
|
|
&& GET_CODE (XEXP (x, 0)) == PLUS
|
8292 |
|
|
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
|
8293 |
|
|
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
|
8294 |
|
|
x = XEXP (x, 0);
|
8295 |
|
|
return gen_rtx_USE (mode, x);
|
8296 |
|
|
}
|
8297 |
|
|
else
|
8298 |
|
|
return 0;
|
8299 |
|
|
}
|
8300 |
|
|
}
|
8301 |
|
|
|
8302 |
|
|
/* This routine folds invariants such that there is only ever one
|
8303 |
|
|
CONST_INT in the summation. It is only used by simplify_giv_expr. */
|
8304 |
|
|
|
8305 |
|
|
static rtx
|
8306 |
|
|
sge_plus_constant (rtx x, rtx c)
|
8307 |
|
|
{
|
8308 |
|
|
if (GET_CODE (x) == CONST_INT)
|
8309 |
|
|
return GEN_INT (INTVAL (x) + INTVAL (c));
|
8310 |
|
|
else if (GET_CODE (x) != PLUS)
|
8311 |
|
|
return gen_rtx_PLUS (GET_MODE (x), x, c);
|
8312 |
|
|
else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
|
8313 |
|
|
{
|
8314 |
|
|
return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
|
8315 |
|
|
GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
|
8316 |
|
|
}
|
8317 |
|
|
else if (GET_CODE (XEXP (x, 0)) == PLUS
|
8318 |
|
|
|| GET_CODE (XEXP (x, 1)) != PLUS)
|
8319 |
|
|
{
|
8320 |
|
|
return gen_rtx_PLUS (GET_MODE (x),
|
8321 |
|
|
sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
|
8322 |
|
|
}
|
8323 |
|
|
else
|
8324 |
|
|
{
|
8325 |
|
|
return gen_rtx_PLUS (GET_MODE (x),
|
8326 |
|
|
sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
|
8327 |
|
|
}
|
8328 |
|
|
}
|
8329 |
|
|
|
8330 |
|
|
static rtx
|
8331 |
|
|
sge_plus (enum machine_mode mode, rtx x, rtx y)
|
8332 |
|
|
{
|
8333 |
|
|
while (GET_CODE (y) == PLUS)
|
8334 |
|
|
{
|
8335 |
|
|
rtx a = XEXP (y, 0);
|
8336 |
|
|
if (GET_CODE (a) == CONST_INT)
|
8337 |
|
|
x = sge_plus_constant (x, a);
|
8338 |
|
|
else
|
8339 |
|
|
x = gen_rtx_PLUS (mode, x, a);
|
8340 |
|
|
y = XEXP (y, 1);
|
8341 |
|
|
}
|
8342 |
|
|
if (GET_CODE (y) == CONST_INT)
|
8343 |
|
|
x = sge_plus_constant (x, y);
|
8344 |
|
|
else
|
8345 |
|
|
x = gen_rtx_PLUS (mode, x, y);
|
8346 |
|
|
return x;
|
8347 |
|
|
}
|
8348 |
|
|
|
8349 |
|
|
/* Help detect a giv that is calculated by several consecutive insns;
|
8350 |
|
|
for example,
|
8351 |
|
|
giv = biv * M
|
8352 |
|
|
giv = giv + A
|
8353 |
|
|
The caller has already identified the first insn P as having a giv as dest;
|
8354 |
|
|
we check that all other insns that set the same register follow
|
8355 |
|
|
immediately after P, that they alter nothing else,
|
8356 |
|
|
and that the result of the last is still a giv.
|
8357 |
|
|
|
8358 |
|
|
The value is 0 if the reg set in P is not really a giv.
|
8359 |
|
|
Otherwise, the value is the amount gained by eliminating
|
8360 |
|
|
all the consecutive insns that compute the value.
|
8361 |
|
|
|
8362 |
|
|
FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
|
8363 |
|
|
SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
|
8364 |
|
|
|
8365 |
|
|
The coefficients of the ultimate giv value are stored in
|
8366 |
|
|
*MULT_VAL and *ADD_VAL. */
|
8367 |
|
|
|
8368 |
|
|
static int
|
8369 |
|
|
consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
|
8370 |
|
|
rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
|
8371 |
|
|
rtx *ext_val, rtx *last_consec_insn)
|
8372 |
|
|
{
|
8373 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
8374 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
8375 |
|
|
int count;
|
8376 |
|
|
enum rtx_code code;
|
8377 |
|
|
int benefit;
|
8378 |
|
|
rtx temp;
|
8379 |
|
|
rtx set;
|
8380 |
|
|
|
8381 |
|
|
/* Indicate that this is a giv so that we can update the value produced in
|
8382 |
|
|
each insn of the multi-insn sequence.
|
8383 |
|
|
|
8384 |
|
|
This induction structure will be used only by the call to
|
8385 |
|
|
general_induction_var below, so we can allocate it on our stack.
|
8386 |
|
|
If this is a giv, our caller will replace the induct var entry with
|
8387 |
|
|
a new induction structure. */
|
8388 |
|
|
struct induction *v;
|
8389 |
|
|
|
8390 |
|
|
if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
|
8391 |
|
|
return 0;
|
8392 |
|
|
|
8393 |
|
|
v = alloca (sizeof (struct induction));
|
8394 |
|
|
v->src_reg = src_reg;
|
8395 |
|
|
v->mult_val = *mult_val;
|
8396 |
|
|
v->add_val = *add_val;
|
8397 |
|
|
v->benefit = first_benefit;
|
8398 |
|
|
v->cant_derive = 0;
|
8399 |
|
|
v->derive_adjustment = 0;
|
8400 |
|
|
v->ext_dependent = NULL_RTX;
|
8401 |
|
|
|
8402 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
|
8403 |
|
|
REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
|
8404 |
|
|
|
8405 |
|
|
count = regs->array[REGNO (dest_reg)].n_times_set - 1;
|
8406 |
|
|
|
8407 |
|
|
while (count > 0)
|
8408 |
|
|
{
|
8409 |
|
|
p = NEXT_INSN (p);
|
8410 |
|
|
code = GET_CODE (p);
|
8411 |
|
|
|
8412 |
|
|
/* If libcall, skip to end of call sequence. */
|
8413 |
|
|
if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
|
8414 |
|
|
p = XEXP (temp, 0);
|
8415 |
|
|
|
8416 |
|
|
if (code == INSN
|
8417 |
|
|
&& (set = single_set (p))
|
8418 |
|
|
&& REG_P (SET_DEST (set))
|
8419 |
|
|
&& SET_DEST (set) == dest_reg
|
8420 |
|
|
&& (general_induction_var (loop, SET_SRC (set), &src_reg,
|
8421 |
|
|
add_val, mult_val, ext_val, 0,
|
8422 |
|
|
&benefit, VOIDmode)
|
8423 |
|
|
/* Giv created by equivalent expression. */
|
8424 |
|
|
|| ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
|
8425 |
|
|
&& general_induction_var (loop, XEXP (temp, 0), &src_reg,
|
8426 |
|
|
add_val, mult_val, ext_val, 0,
|
8427 |
|
|
&benefit, VOIDmode)))
|
8428 |
|
|
&& src_reg == v->src_reg)
|
8429 |
|
|
{
|
8430 |
|
|
if (find_reg_note (p, REG_RETVAL, NULL_RTX))
|
8431 |
|
|
benefit += libcall_benefit (p);
|
8432 |
|
|
|
8433 |
|
|
count--;
|
8434 |
|
|
v->mult_val = *mult_val;
|
8435 |
|
|
v->add_val = *add_val;
|
8436 |
|
|
v->benefit += benefit;
|
8437 |
|
|
}
|
8438 |
|
|
else if (code != NOTE)
|
8439 |
|
|
{
|
8440 |
|
|
/* Allow insns that set something other than this giv to a
|
8441 |
|
|
constant. Such insns are needed on machines which cannot
|
8442 |
|
|
include long constants and should not disqualify a giv. */
|
8443 |
|
|
if (code == INSN
|
8444 |
|
|
&& (set = single_set (p))
|
8445 |
|
|
&& SET_DEST (set) != dest_reg
|
8446 |
|
|
&& CONSTANT_P (SET_SRC (set)))
|
8447 |
|
|
continue;
|
8448 |
|
|
|
8449 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
|
8450 |
|
|
return 0;
|
8451 |
|
|
}
|
8452 |
|
|
}
|
8453 |
|
|
|
8454 |
|
|
REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
|
8455 |
|
|
*last_consec_insn = p;
|
8456 |
|
|
return v->benefit;
|
8457 |
|
|
}
|
8458 |
|
|
|
8459 |
|
|
/* Return an rtx, if any, that expresses giv G2 as a function of the register
|
8460 |
|
|
represented by G1. If no such expression can be found, or it is clear that
|
8461 |
|
|
it cannot possibly be a valid address, 0 is returned.
|
8462 |
|
|
|
8463 |
|
|
To perform the computation, we note that
|
8464 |
|
|
G1 = x * v + a and
|
8465 |
|
|
G2 = y * v + b
|
8466 |
|
|
where `v' is the biv.
|
8467 |
|
|
|
8468 |
|
|
So G2 = (y/b) * G1 + (b - a*y/x).
|
8469 |
|
|
|
8470 |
|
|
Note that MULT = y/x.
|
8471 |
|
|
|
8472 |
|
|
Update: A and B are now allowed to be additive expressions such that
|
8473 |
|
|
B contains all variables in A. That is, computing B-A will not require
|
8474 |
|
|
subtracting variables. */
|
8475 |
|
|
|
8476 |
|
|
static rtx
|
8477 |
|
|
express_from_1 (rtx a, rtx b, rtx mult)
|
8478 |
|
|
{
|
8479 |
|
|
/* If MULT is zero, then A*MULT is zero, and our expression is B. */
|
8480 |
|
|
|
8481 |
|
|
if (mult == const0_rtx)
|
8482 |
|
|
return b;
|
8483 |
|
|
|
8484 |
|
|
/* If MULT is not 1, we cannot handle A with non-constants, since we
|
8485 |
|
|
would then be required to subtract multiples of the registers in A.
|
8486 |
|
|
This is theoretically possible, and may even apply to some Fortran
|
8487 |
|
|
constructs, but it is a lot of work and we do not attempt it here. */
|
8488 |
|
|
|
8489 |
|
|
if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
|
8490 |
|
|
return NULL_RTX;
|
8491 |
|
|
|
8492 |
|
|
/* In general these structures are sorted top to bottom (down the PLUS
|
8493 |
|
|
chain), but not left to right across the PLUS. If B is a higher
|
8494 |
|
|
order giv than A, we can strip one level and recurse. If A is higher
|
8495 |
|
|
order, we'll eventually bail out, but won't know that until the end.
|
8496 |
|
|
If they are the same, we'll strip one level around this loop. */
|
8497 |
|
|
|
8498 |
|
|
while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
|
8499 |
|
|
{
|
8500 |
|
|
rtx ra, rb, oa, ob, tmp;
|
8501 |
|
|
|
8502 |
|
|
ra = XEXP (a, 0), oa = XEXP (a, 1);
|
8503 |
|
|
if (GET_CODE (ra) == PLUS)
|
8504 |
|
|
tmp = ra, ra = oa, oa = tmp;
|
8505 |
|
|
|
8506 |
|
|
rb = XEXP (b, 0), ob = XEXP (b, 1);
|
8507 |
|
|
if (GET_CODE (rb) == PLUS)
|
8508 |
|
|
tmp = rb, rb = ob, ob = tmp;
|
8509 |
|
|
|
8510 |
|
|
if (rtx_equal_p (ra, rb))
|
8511 |
|
|
/* We matched: remove one reg completely. */
|
8512 |
|
|
a = oa, b = ob;
|
8513 |
|
|
else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
|
8514 |
|
|
/* An alternate match. */
|
8515 |
|
|
a = oa, b = rb;
|
8516 |
|
|
else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
|
8517 |
|
|
/* An alternate match. */
|
8518 |
|
|
a = ra, b = ob;
|
8519 |
|
|
else
|
8520 |
|
|
{
|
8521 |
|
|
/* Indicates an extra register in B. Strip one level from B and
|
8522 |
|
|
recurse, hoping B was the higher order expression. */
|
8523 |
|
|
ob = express_from_1 (a, ob, mult);
|
8524 |
|
|
if (ob == NULL_RTX)
|
8525 |
|
|
return NULL_RTX;
|
8526 |
|
|
return gen_rtx_PLUS (GET_MODE (b), rb, ob);
|
8527 |
|
|
}
|
8528 |
|
|
}
|
8529 |
|
|
|
8530 |
|
|
/* Here we are at the last level of A, go through the cases hoping to
|
8531 |
|
|
get rid of everything but a constant. */
|
8532 |
|
|
|
8533 |
|
|
if (GET_CODE (a) == PLUS)
|
8534 |
|
|
{
|
8535 |
|
|
rtx ra, oa;
|
8536 |
|
|
|
8537 |
|
|
ra = XEXP (a, 0), oa = XEXP (a, 1);
|
8538 |
|
|
if (rtx_equal_p (oa, b))
|
8539 |
|
|
oa = ra;
|
8540 |
|
|
else if (!rtx_equal_p (ra, b))
|
8541 |
|
|
return NULL_RTX;
|
8542 |
|
|
|
8543 |
|
|
if (GET_CODE (oa) != CONST_INT)
|
8544 |
|
|
return NULL_RTX;
|
8545 |
|
|
|
8546 |
|
|
return GEN_INT (-INTVAL (oa) * INTVAL (mult));
|
8547 |
|
|
}
|
8548 |
|
|
else if (GET_CODE (a) == CONST_INT)
|
8549 |
|
|
{
|
8550 |
|
|
return plus_constant (b, -INTVAL (a) * INTVAL (mult));
|
8551 |
|
|
}
|
8552 |
|
|
else if (CONSTANT_P (a))
|
8553 |
|
|
{
|
8554 |
|
|
enum machine_mode mode_a = GET_MODE (a);
|
8555 |
|
|
enum machine_mode mode_b = GET_MODE (b);
|
8556 |
|
|
enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
|
8557 |
|
|
return simplify_gen_binary (MINUS, mode, b, a);
|
8558 |
|
|
}
|
8559 |
|
|
else if (GET_CODE (b) == PLUS)
|
8560 |
|
|
{
|
8561 |
|
|
if (rtx_equal_p (a, XEXP (b, 0)))
|
8562 |
|
|
return XEXP (b, 1);
|
8563 |
|
|
else if (rtx_equal_p (a, XEXP (b, 1)))
|
8564 |
|
|
return XEXP (b, 0);
|
8565 |
|
|
else
|
8566 |
|
|
return NULL_RTX;
|
8567 |
|
|
}
|
8568 |
|
|
else if (rtx_equal_p (a, b))
|
8569 |
|
|
return const0_rtx;
|
8570 |
|
|
|
8571 |
|
|
return NULL_RTX;
|
8572 |
|
|
}
|
8573 |
|
|
|
8574 |
|
|
static rtx
|
8575 |
|
|
express_from (struct induction *g1, struct induction *g2)
|
8576 |
|
|
{
|
8577 |
|
|
rtx mult, add;
|
8578 |
|
|
|
8579 |
|
|
/* The value that G1 will be multiplied by must be a constant integer. Also,
|
8580 |
|
|
the only chance we have of getting a valid address is if b*c/a (see above
|
8581 |
|
|
for notation) is also an integer. */
|
8582 |
|
|
if (GET_CODE (g1->mult_val) == CONST_INT
|
8583 |
|
|
&& GET_CODE (g2->mult_val) == CONST_INT)
|
8584 |
|
|
{
|
8585 |
|
|
if (g1->mult_val == const0_rtx
|
8586 |
|
|
|| (g1->mult_val == constm1_rtx
|
8587 |
|
|
&& INTVAL (g2->mult_val)
|
8588 |
|
|
== (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
|
8589 |
|
|
|| INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
|
8590 |
|
|
return NULL_RTX;
|
8591 |
|
|
mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
|
8592 |
|
|
}
|
8593 |
|
|
else if (rtx_equal_p (g1->mult_val, g2->mult_val))
|
8594 |
|
|
mult = const1_rtx;
|
8595 |
|
|
else
|
8596 |
|
|
{
|
8597 |
|
|
/* ??? Find out if the one is a multiple of the other? */
|
8598 |
|
|
return NULL_RTX;
|
8599 |
|
|
}
|
8600 |
|
|
|
8601 |
|
|
add = express_from_1 (g1->add_val, g2->add_val, mult);
|
8602 |
|
|
if (add == NULL_RTX)
|
8603 |
|
|
{
|
8604 |
|
|
/* Failed. If we've got a multiplication factor between G1 and G2,
|
8605 |
|
|
scale G1's addend and try again. */
|
8606 |
|
|
if (INTVAL (mult) > 1)
|
8607 |
|
|
{
|
8608 |
|
|
rtx g1_add_val = g1->add_val;
|
8609 |
|
|
if (GET_CODE (g1_add_val) == MULT
|
8610 |
|
|
&& GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
|
8611 |
|
|
{
|
8612 |
|
|
HOST_WIDE_INT m;
|
8613 |
|
|
m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
|
8614 |
|
|
g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
|
8615 |
|
|
XEXP (g1_add_val, 0), GEN_INT (m));
|
8616 |
|
|
}
|
8617 |
|
|
else
|
8618 |
|
|
{
|
8619 |
|
|
g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
|
8620 |
|
|
mult);
|
8621 |
|
|
}
|
8622 |
|
|
|
8623 |
|
|
add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
|
8624 |
|
|
}
|
8625 |
|
|
}
|
8626 |
|
|
if (add == NULL_RTX)
|
8627 |
|
|
return NULL_RTX;
|
8628 |
|
|
|
8629 |
|
|
/* Form simplified final result. */
|
8630 |
|
|
if (mult == const0_rtx)
|
8631 |
|
|
return add;
|
8632 |
|
|
else if (mult == const1_rtx)
|
8633 |
|
|
mult = g1->dest_reg;
|
8634 |
|
|
else
|
8635 |
|
|
mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
|
8636 |
|
|
|
8637 |
|
|
if (add == const0_rtx)
|
8638 |
|
|
return mult;
|
8639 |
|
|
else
|
8640 |
|
|
{
|
8641 |
|
|
if (GET_CODE (add) == PLUS
|
8642 |
|
|
&& CONSTANT_P (XEXP (add, 1)))
|
8643 |
|
|
{
|
8644 |
|
|
rtx tem = XEXP (add, 1);
|
8645 |
|
|
mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
|
8646 |
|
|
add = tem;
|
8647 |
|
|
}
|
8648 |
|
|
|
8649 |
|
|
return gen_rtx_PLUS (g2->mode, mult, add);
|
8650 |
|
|
}
|
8651 |
|
|
}
|
8652 |
|
|
|
8653 |
|
|
/* Return an rtx, if any, that expresses giv G2 as a function of the register
|
8654 |
|
|
represented by G1. This indicates that G2 should be combined with G1 and
|
8655 |
|
|
that G2 can use (either directly or via an address expression) a register
|
8656 |
|
|
used to represent G1. */
|
8657 |
|
|
|
8658 |
|
|
static rtx
|
8659 |
|
|
combine_givs_p (struct induction *g1, struct induction *g2)
|
8660 |
|
|
{
|
8661 |
|
|
rtx comb, ret;
|
8662 |
|
|
|
8663 |
|
|
/* With the introduction of ext dependent givs, we must care for modes.
|
8664 |
|
|
G2 must not use a wider mode than G1. */
|
8665 |
|
|
if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
|
8666 |
|
|
return NULL_RTX;
|
8667 |
|
|
|
8668 |
|
|
ret = comb = express_from (g1, g2);
|
8669 |
|
|
if (comb == NULL_RTX)
|
8670 |
|
|
return NULL_RTX;
|
8671 |
|
|
if (g1->mode != g2->mode)
|
8672 |
|
|
ret = gen_lowpart (g2->mode, comb);
|
8673 |
|
|
|
8674 |
|
|
/* If these givs are identical, they can be combined. We use the results
|
8675 |
|
|
of express_from because the addends are not in a canonical form, so
|
8676 |
|
|
rtx_equal_p is a weaker test. */
|
8677 |
|
|
/* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
|
8678 |
|
|
combination to be the other way round. */
|
8679 |
|
|
if (comb == g1->dest_reg
|
8680 |
|
|
&& (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
|
8681 |
|
|
{
|
8682 |
|
|
return ret;
|
8683 |
|
|
}
|
8684 |
|
|
|
8685 |
|
|
/* If G2 can be expressed as a function of G1 and that function is valid
|
8686 |
|
|
as an address and no more expensive than using a register for G2,
|
8687 |
|
|
the expression of G2 in terms of G1 can be used. */
|
8688 |
|
|
if (ret != NULL_RTX
|
8689 |
|
|
&& g2->giv_type == DEST_ADDR
|
8690 |
|
|
&& memory_address_p (GET_MODE (g2->mem), ret))
|
8691 |
|
|
return ret;
|
8692 |
|
|
|
8693 |
|
|
return NULL_RTX;
|
8694 |
|
|
}
|
8695 |
|
|
|
8696 |
|
|
/* See if BL is monotonic and has a constant per-iteration increment.
|
8697 |
|
|
Return the increment if so, otherwise return 0. */
|
8698 |
|
|
|
8699 |
|
|
static HOST_WIDE_INT
|
8700 |
|
|
get_monotonic_increment (struct iv_class *bl)
|
8701 |
|
|
{
|
8702 |
|
|
struct induction *v;
|
8703 |
|
|
rtx incr;
|
8704 |
|
|
|
8705 |
|
|
/* Get the total increment and check that it is constant. */
|
8706 |
|
|
incr = biv_total_increment (bl);
|
8707 |
|
|
if (incr == 0 || GET_CODE (incr) != CONST_INT)
|
8708 |
|
|
return 0;
|
8709 |
|
|
|
8710 |
|
|
for (v = bl->biv; v != 0; v = v->next_iv)
|
8711 |
|
|
{
|
8712 |
|
|
if (GET_CODE (v->add_val) != CONST_INT)
|
8713 |
|
|
return 0;
|
8714 |
|
|
|
8715 |
|
|
if (INTVAL (v->add_val) < 0 && INTVAL (incr) >= 0)
|
8716 |
|
|
return 0;
|
8717 |
|
|
|
8718 |
|
|
if (INTVAL (v->add_val) > 0 && INTVAL (incr) <= 0)
|
8719 |
|
|
return 0;
|
8720 |
|
|
}
|
8721 |
|
|
return INTVAL (incr);
|
8722 |
|
|
}
|
8723 |
|
|
|
8724 |
|
|
|
8725 |
|
|
/* Subroutine of biv_fits_mode_p. Return true if biv BL, when biased by
|
8726 |
|
|
BIAS, will never exceed the unsigned range of MODE. LOOP is the loop
|
8727 |
|
|
to which the biv belongs and INCR is its per-iteration increment. */
|
8728 |
|
|
|
8729 |
|
|
static bool
|
8730 |
|
|
biased_biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
|
8731 |
|
|
HOST_WIDE_INT incr, enum machine_mode mode,
|
8732 |
|
|
unsigned HOST_WIDE_INT bias)
|
8733 |
|
|
{
|
8734 |
|
|
unsigned HOST_WIDE_INT initial, maximum, span, delta;
|
8735 |
|
|
|
8736 |
|
|
/* We need to be able to manipulate MODE-size constants. */
|
8737 |
|
|
if (HOST_BITS_PER_WIDE_INT < GET_MODE_BITSIZE (mode))
|
8738 |
|
|
return false;
|
8739 |
|
|
|
8740 |
|
|
/* The number of loop iterations must be constant. */
|
8741 |
|
|
if (LOOP_INFO (loop)->n_iterations == 0)
|
8742 |
|
|
return false;
|
8743 |
|
|
|
8744 |
|
|
/* So must the biv's initial value. */
|
8745 |
|
|
if (bl->initial_value == 0 || GET_CODE (bl->initial_value) != CONST_INT)
|
8746 |
|
|
return false;
|
8747 |
|
|
|
8748 |
|
|
initial = bias + INTVAL (bl->initial_value);
|
8749 |
|
|
maximum = GET_MODE_MASK (mode);
|
8750 |
|
|
|
8751 |
|
|
/* Make sure that the initial value is within range. */
|
8752 |
|
|
if (initial > maximum)
|
8753 |
|
|
return false;
|
8754 |
|
|
|
8755 |
|
|
/* Set up DELTA and SPAN such that the number of iterations * DELTA
|
8756 |
|
|
(calculated to arbitrary precision) must be <= SPAN. */
|
8757 |
|
|
if (incr < 0)
|
8758 |
|
|
{
|
8759 |
|
|
delta = -incr;
|
8760 |
|
|
span = initial;
|
8761 |
|
|
}
|
8762 |
|
|
else
|
8763 |
|
|
{
|
8764 |
|
|
delta = incr;
|
8765 |
|
|
/* Handle the special case in which MAXIMUM is the largest
|
8766 |
|
|
unsigned HOST_WIDE_INT and INITIAL is 0. */
|
8767 |
|
|
if (maximum + 1 == initial)
|
8768 |
|
|
span = LOOP_INFO (loop)->n_iterations * delta;
|
8769 |
|
|
else
|
8770 |
|
|
span = maximum + 1 - initial;
|
8771 |
|
|
}
|
8772 |
|
|
return (span / LOOP_INFO (loop)->n_iterations >= delta);
|
8773 |
|
|
}
|
8774 |
|
|
|
8775 |
|
|
|
8776 |
|
|
/* Return true if biv BL will never exceed the bounds of MODE. LOOP is
|
8777 |
|
|
the loop to which BL belongs and INCR is its per-iteration increment.
|
8778 |
|
|
UNSIGNEDP is true if the biv should be treated as unsigned. */
|
8779 |
|
|
|
8780 |
|
|
static bool
|
8781 |
|
|
biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
|
8782 |
|
|
HOST_WIDE_INT incr, enum machine_mode mode, bool unsignedp)
|
8783 |
|
|
{
|
8784 |
|
|
struct loop_info *loop_info;
|
8785 |
|
|
unsigned HOST_WIDE_INT bias;
|
8786 |
|
|
|
8787 |
|
|
/* A biv's value will always be limited to its natural mode.
|
8788 |
|
|
Larger modes will observe the same wrap-around. */
|
8789 |
|
|
if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (bl->biv->src_reg)))
|
8790 |
|
|
mode = GET_MODE (bl->biv->src_reg);
|
8791 |
|
|
|
8792 |
|
|
loop_info = LOOP_INFO (loop);
|
8793 |
|
|
|
8794 |
|
|
bias = (unsignedp ? 0 : (GET_MODE_MASK (mode) >> 1) + 1);
|
8795 |
|
|
if (biased_biv_fits_mode_p (loop, bl, incr, mode, bias))
|
8796 |
|
|
return true;
|
8797 |
|
|
|
8798 |
|
|
if (mode == GET_MODE (bl->biv->src_reg)
|
8799 |
|
|
&& bl->biv->src_reg == loop_info->iteration_var
|
8800 |
|
|
&& loop_info->comparison_value
|
8801 |
|
|
&& loop_invariant_p (loop, loop_info->comparison_value))
|
8802 |
|
|
{
|
8803 |
|
|
/* If the increment is +1, and the exit test is a <, the BIV
|
8804 |
|
|
cannot overflow. (For <=, we have the problematic case that
|
8805 |
|
|
the comparison value might be the maximum value of the range.) */
|
8806 |
|
|
if (incr == 1)
|
8807 |
|
|
{
|
8808 |
|
|
if (loop_info->comparison_code == LT)
|
8809 |
|
|
return true;
|
8810 |
|
|
if (loop_info->comparison_code == LTU && unsignedp)
|
8811 |
|
|
return true;
|
8812 |
|
|
}
|
8813 |
|
|
|
8814 |
|
|
/* Likewise for increment -1 and exit test >. */
|
8815 |
|
|
if (incr == -1)
|
8816 |
|
|
{
|
8817 |
|
|
if (loop_info->comparison_code == GT)
|
8818 |
|
|
return true;
|
8819 |
|
|
if (loop_info->comparison_code == GTU && unsignedp)
|
8820 |
|
|
return true;
|
8821 |
|
|
}
|
8822 |
|
|
}
|
8823 |
|
|
return false;
|
8824 |
|
|
}
|
8825 |
|
|
|
8826 |
|
|
|
8827 |
|
|
/* Return false iff it is provable that biv BL plus BIAS will not wrap
|
8828 |
|
|
at any point in its update sequence. Note that at the rtl level we
|
8829 |
|
|
may not have information about the signedness of BL; in that case,
|
8830 |
|
|
check for both signed and unsigned overflow. */
|
8831 |
|
|
|
8832 |
|
|
static bool
|
8833 |
|
|
biased_biv_may_wrap_p (const struct loop *loop, struct iv_class *bl,
|
8834 |
|
|
unsigned HOST_WIDE_INT bias)
|
8835 |
|
|
{
|
8836 |
|
|
HOST_WIDE_INT incr;
|
8837 |
|
|
bool check_signed, check_unsigned;
|
8838 |
|
|
enum machine_mode mode;
|
8839 |
|
|
|
8840 |
|
|
/* If the increment is not monotonic, we'd have to check separately
|
8841 |
|
|
at each increment step. Not Worth It. */
|
8842 |
|
|
incr = get_monotonic_increment (bl);
|
8843 |
|
|
if (incr == 0)
|
8844 |
|
|
return true;
|
8845 |
|
|
|
8846 |
|
|
/* If this biv is the loop iteration variable, then we may be able to
|
8847 |
|
|
deduce a sign based on the loop condition. */
|
8848 |
|
|
/* ??? This is not 100% reliable; consider an unsigned biv that is cast
|
8849 |
|
|
to signed for the comparison. However, this same bug appears all
|
8850 |
|
|
through loop.c. */
|
8851 |
|
|
check_signed = check_unsigned = true;
|
8852 |
|
|
if (bl->biv->src_reg == LOOP_INFO (loop)->iteration_var)
|
8853 |
|
|
{
|
8854 |
|
|
switch (LOOP_INFO (loop)->comparison_code)
|
8855 |
|
|
{
|
8856 |
|
|
case GTU: case GEU: case LTU: case LEU:
|
8857 |
|
|
check_signed = false;
|
8858 |
|
|
break;
|
8859 |
|
|
case GT: case GE: case LT: case LE:
|
8860 |
|
|
check_unsigned = false;
|
8861 |
|
|
break;
|
8862 |
|
|
default:
|
8863 |
|
|
break;
|
8864 |
|
|
}
|
8865 |
|
|
}
|
8866 |
|
|
|
8867 |
|
|
mode = GET_MODE (bl->biv->src_reg);
|
8868 |
|
|
|
8869 |
|
|
if (check_unsigned
|
8870 |
|
|
&& !biased_biv_fits_mode_p (loop, bl, incr, mode, bias))
|
8871 |
|
|
return true;
|
8872 |
|
|
|
8873 |
|
|
if (check_signed)
|
8874 |
|
|
{
|
8875 |
|
|
bias += (GET_MODE_MASK (mode) >> 1) + 1;
|
8876 |
|
|
if (!biased_biv_fits_mode_p (loop, bl, incr, mode, bias))
|
8877 |
|
|
return true;
|
8878 |
|
|
}
|
8879 |
|
|
|
8880 |
|
|
return false;
|
8881 |
|
|
}
|
8882 |
|
|
|
8883 |
|
|
|
8884 |
|
|
/* Given that X is an extension or truncation of BL, return true
|
8885 |
|
|
if it is unaffected by overflow. LOOP is the loop to which
|
8886 |
|
|
BL belongs and INCR is its per-iteration increment. */
|
8887 |
|
|
|
8888 |
|
|
static bool
|
8889 |
|
|
extension_within_bounds_p (const struct loop *loop, struct iv_class *bl,
|
8890 |
|
|
HOST_WIDE_INT incr, rtx x)
|
8891 |
|
|
{
|
8892 |
|
|
enum machine_mode mode;
|
8893 |
|
|
bool signedp, unsignedp;
|
8894 |
|
|
|
8895 |
|
|
switch (GET_CODE (x))
|
8896 |
|
|
{
|
8897 |
|
|
case SIGN_EXTEND:
|
8898 |
|
|
case ZERO_EXTEND:
|
8899 |
|
|
mode = GET_MODE (XEXP (x, 0));
|
8900 |
|
|
signedp = (GET_CODE (x) == SIGN_EXTEND);
|
8901 |
|
|
unsignedp = (GET_CODE (x) == ZERO_EXTEND);
|
8902 |
|
|
break;
|
8903 |
|
|
|
8904 |
|
|
case TRUNCATE:
|
8905 |
|
|
/* We don't know whether this value is being used as signed
|
8906 |
|
|
or unsigned, so check the conditions for both. */
|
8907 |
|
|
mode = GET_MODE (x);
|
8908 |
|
|
signedp = unsignedp = true;
|
8909 |
|
|
break;
|
8910 |
|
|
|
8911 |
|
|
default:
|
8912 |
|
|
gcc_unreachable ();
|
8913 |
|
|
}
|
8914 |
|
|
|
8915 |
|
|
return ((!signedp || biv_fits_mode_p (loop, bl, incr, mode, false))
|
8916 |
|
|
&& (!unsignedp || biv_fits_mode_p (loop, bl, incr, mode, true)));
|
8917 |
|
|
}
|
8918 |
|
|
|
8919 |
|
|
|
8920 |
|
|
/* Check each extension dependent giv in this class to see if its
|
8921 |
|
|
root biv is safe from wrapping in the interior mode, which would
|
8922 |
|
|
make the giv illegal. */
|
8923 |
|
|
|
8924 |
|
|
static void
|
8925 |
|
|
check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
|
8926 |
|
|
{
|
8927 |
|
|
struct induction *v;
|
8928 |
|
|
HOST_WIDE_INT incr;
|
8929 |
|
|
|
8930 |
|
|
incr = get_monotonic_increment (bl);
|
8931 |
|
|
|
8932 |
|
|
/* Invalidate givs that fail the tests. */
|
8933 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
8934 |
|
|
if (v->ext_dependent)
|
8935 |
|
|
{
|
8936 |
|
|
if (incr != 0
|
8937 |
|
|
&& extension_within_bounds_p (loop, bl, incr, v->ext_dependent))
|
8938 |
|
|
{
|
8939 |
|
|
if (loop_dump_stream)
|
8940 |
|
|
fprintf (loop_dump_stream,
|
8941 |
|
|
"Verified ext dependent giv at %d of reg %d\n",
|
8942 |
|
|
INSN_UID (v->insn), bl->regno);
|
8943 |
|
|
}
|
8944 |
|
|
else
|
8945 |
|
|
{
|
8946 |
|
|
if (loop_dump_stream)
|
8947 |
|
|
fprintf (loop_dump_stream,
|
8948 |
|
|
"Failed ext dependent giv at %d\n",
|
8949 |
|
|
INSN_UID (v->insn));
|
8950 |
|
|
|
8951 |
|
|
v->ignore = 1;
|
8952 |
|
|
bl->all_reduced = 0;
|
8953 |
|
|
}
|
8954 |
|
|
}
|
8955 |
|
|
}
|
8956 |
|
|
|
8957 |
|
|
/* Generate a version of VALUE in a mode appropriate for initializing V. */
|
8958 |
|
|
|
8959 |
|
|
static rtx
|
8960 |
|
|
extend_value_for_giv (struct induction *v, rtx value)
|
8961 |
|
|
{
|
8962 |
|
|
rtx ext_dep = v->ext_dependent;
|
8963 |
|
|
|
8964 |
|
|
if (! ext_dep)
|
8965 |
|
|
return value;
|
8966 |
|
|
|
8967 |
|
|
/* Recall that check_ext_dependent_givs verified that the known bounds
|
8968 |
|
|
of a biv did not overflow or wrap with respect to the extension for
|
8969 |
|
|
the giv. Therefore, constants need no additional adjustment. */
|
8970 |
|
|
if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
|
8971 |
|
|
return value;
|
8972 |
|
|
|
8973 |
|
|
/* Otherwise, we must adjust the value to compensate for the
|
8974 |
|
|
differing modes of the biv and the giv. */
|
8975 |
|
|
return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
|
8976 |
|
|
}
|
8977 |
|
|
|
8978 |
|
|
struct combine_givs_stats
|
8979 |
|
|
{
|
8980 |
|
|
int giv_number;
|
8981 |
|
|
int total_benefit;
|
8982 |
|
|
};
|
8983 |
|
|
|
8984 |
|
|
static int
|
8985 |
|
|
cmp_combine_givs_stats (const void *xp, const void *yp)
|
8986 |
|
|
{
|
8987 |
|
|
const struct combine_givs_stats * const x =
|
8988 |
|
|
(const struct combine_givs_stats *) xp;
|
8989 |
|
|
const struct combine_givs_stats * const y =
|
8990 |
|
|
(const struct combine_givs_stats *) yp;
|
8991 |
|
|
int d;
|
8992 |
|
|
d = y->total_benefit - x->total_benefit;
|
8993 |
|
|
/* Stabilize the sort. */
|
8994 |
|
|
if (!d)
|
8995 |
|
|
d = x->giv_number - y->giv_number;
|
8996 |
|
|
return d;
|
8997 |
|
|
}
|
8998 |
|
|
|
8999 |
|
|
/* Check all pairs of givs for iv_class BL and see if any can be combined with
|
9000 |
|
|
any other. If so, point SAME to the giv combined with and set NEW_REG to
|
9001 |
|
|
be an expression (in terms of the other giv's DEST_REG) equivalent to the
|
9002 |
|
|
giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
|
9003 |
|
|
|
9004 |
|
|
static void
|
9005 |
|
|
combine_givs (struct loop_regs *regs, struct iv_class *bl)
|
9006 |
|
|
{
|
9007 |
|
|
/* Additional benefit to add for being combined multiple times. */
|
9008 |
|
|
const int extra_benefit = 3;
|
9009 |
|
|
|
9010 |
|
|
struct induction *g1, *g2, **giv_array;
|
9011 |
|
|
int i, j, k, giv_count;
|
9012 |
|
|
struct combine_givs_stats *stats;
|
9013 |
|
|
rtx *can_combine;
|
9014 |
|
|
|
9015 |
|
|
/* Count givs, because bl->giv_count is incorrect here. */
|
9016 |
|
|
giv_count = 0;
|
9017 |
|
|
for (g1 = bl->giv; g1; g1 = g1->next_iv)
|
9018 |
|
|
if (!g1->ignore)
|
9019 |
|
|
giv_count++;
|
9020 |
|
|
|
9021 |
|
|
giv_array = alloca (giv_count * sizeof (struct induction *));
|
9022 |
|
|
i = 0;
|
9023 |
|
|
for (g1 = bl->giv; g1; g1 = g1->next_iv)
|
9024 |
|
|
if (!g1->ignore)
|
9025 |
|
|
giv_array[i++] = g1;
|
9026 |
|
|
|
9027 |
|
|
stats = xcalloc (giv_count, sizeof (*stats));
|
9028 |
|
|
can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
|
9029 |
|
|
|
9030 |
|
|
for (i = 0; i < giv_count; i++)
|
9031 |
|
|
{
|
9032 |
|
|
int this_benefit;
|
9033 |
|
|
rtx single_use;
|
9034 |
|
|
|
9035 |
|
|
g1 = giv_array[i];
|
9036 |
|
|
stats[i].giv_number = i;
|
9037 |
|
|
|
9038 |
|
|
/* If a DEST_REG GIV is used only once, do not allow it to combine
|
9039 |
|
|
with anything, for in doing so we will gain nothing that cannot
|
9040 |
|
|
be had by simply letting the GIV with which we would have combined
|
9041 |
|
|
to be reduced on its own. The lossage shows up in particular with
|
9042 |
|
|
DEST_ADDR targets on hosts with reg+reg addressing, though it can
|
9043 |
|
|
be seen elsewhere as well. */
|
9044 |
|
|
if (g1->giv_type == DEST_REG
|
9045 |
|
|
&& (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
|
9046 |
|
|
&& single_use != const0_rtx)
|
9047 |
|
|
continue;
|
9048 |
|
|
|
9049 |
|
|
this_benefit = g1->benefit;
|
9050 |
|
|
/* Add an additional weight for zero addends. */
|
9051 |
|
|
if (g1->no_const_addval)
|
9052 |
|
|
this_benefit += 1;
|
9053 |
|
|
|
9054 |
|
|
for (j = 0; j < giv_count; j++)
|
9055 |
|
|
{
|
9056 |
|
|
rtx this_combine;
|
9057 |
|
|
|
9058 |
|
|
g2 = giv_array[j];
|
9059 |
|
|
if (g1 != g2
|
9060 |
|
|
&& (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
|
9061 |
|
|
{
|
9062 |
|
|
can_combine[i * giv_count + j] = this_combine;
|
9063 |
|
|
this_benefit += g2->benefit + extra_benefit;
|
9064 |
|
|
}
|
9065 |
|
|
}
|
9066 |
|
|
stats[i].total_benefit = this_benefit;
|
9067 |
|
|
}
|
9068 |
|
|
|
9069 |
|
|
/* Iterate, combining until we can't. */
|
9070 |
|
|
restart:
|
9071 |
|
|
qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
|
9072 |
|
|
|
9073 |
|
|
if (loop_dump_stream)
|
9074 |
|
|
{
|
9075 |
|
|
fprintf (loop_dump_stream, "Sorted combine statistics:\n");
|
9076 |
|
|
for (k = 0; k < giv_count; k++)
|
9077 |
|
|
{
|
9078 |
|
|
g1 = giv_array[stats[k].giv_number];
|
9079 |
|
|
if (!g1->combined_with && !g1->same)
|
9080 |
|
|
fprintf (loop_dump_stream, " {%d, %d}",
|
9081 |
|
|
INSN_UID (giv_array[stats[k].giv_number]->insn),
|
9082 |
|
|
stats[k].total_benefit);
|
9083 |
|
|
}
|
9084 |
|
|
putc ('\n', loop_dump_stream);
|
9085 |
|
|
}
|
9086 |
|
|
|
9087 |
|
|
for (k = 0; k < giv_count; k++)
|
9088 |
|
|
{
|
9089 |
|
|
int g1_add_benefit = 0;
|
9090 |
|
|
|
9091 |
|
|
i = stats[k].giv_number;
|
9092 |
|
|
g1 = giv_array[i];
|
9093 |
|
|
|
9094 |
|
|
/* If it has already been combined, skip. */
|
9095 |
|
|
if (g1->combined_with || g1->same)
|
9096 |
|
|
continue;
|
9097 |
|
|
|
9098 |
|
|
for (j = 0; j < giv_count; j++)
|
9099 |
|
|
{
|
9100 |
|
|
g2 = giv_array[j];
|
9101 |
|
|
if (g1 != g2 && can_combine[i * giv_count + j]
|
9102 |
|
|
/* If it has already been combined, skip. */
|
9103 |
|
|
&& ! g2->same && ! g2->combined_with)
|
9104 |
|
|
{
|
9105 |
|
|
int l;
|
9106 |
|
|
|
9107 |
|
|
g2->new_reg = can_combine[i * giv_count + j];
|
9108 |
|
|
g2->same = g1;
|
9109 |
|
|
/* For destination, we now may replace by mem expression instead
|
9110 |
|
|
of register. This changes the costs considerably, so add the
|
9111 |
|
|
compensation. */
|
9112 |
|
|
if (g2->giv_type == DEST_ADDR)
|
9113 |
|
|
g2->benefit = (g2->benefit + reg_address_cost
|
9114 |
|
|
- address_cost (g2->new_reg,
|
9115 |
|
|
GET_MODE (g2->mem)));
|
9116 |
|
|
g1->combined_with++;
|
9117 |
|
|
g1->lifetime += g2->lifetime;
|
9118 |
|
|
|
9119 |
|
|
g1_add_benefit += g2->benefit;
|
9120 |
|
|
|
9121 |
|
|
/* ??? The new final_[bg]iv_value code does a much better job
|
9122 |
|
|
of finding replaceable giv's, and hence this code may no
|
9123 |
|
|
longer be necessary. */
|
9124 |
|
|
if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
|
9125 |
|
|
g1_add_benefit -= copy_cost;
|
9126 |
|
|
|
9127 |
|
|
/* To help optimize the next set of combinations, remove
|
9128 |
|
|
this giv from the benefits of other potential mates. */
|
9129 |
|
|
for (l = 0; l < giv_count; ++l)
|
9130 |
|
|
{
|
9131 |
|
|
int m = stats[l].giv_number;
|
9132 |
|
|
if (can_combine[m * giv_count + j])
|
9133 |
|
|
stats[l].total_benefit -= g2->benefit + extra_benefit;
|
9134 |
|
|
}
|
9135 |
|
|
|
9136 |
|
|
if (loop_dump_stream)
|
9137 |
|
|
fprintf (loop_dump_stream,
|
9138 |
|
|
"giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
|
9139 |
|
|
INSN_UID (g2->insn), INSN_UID (g1->insn),
|
9140 |
|
|
g1->benefit, g1_add_benefit, g1->lifetime);
|
9141 |
|
|
}
|
9142 |
|
|
}
|
9143 |
|
|
|
9144 |
|
|
/* To help optimize the next set of combinations, remove
|
9145 |
|
|
this giv from the benefits of other potential mates. */
|
9146 |
|
|
if (g1->combined_with)
|
9147 |
|
|
{
|
9148 |
|
|
for (j = 0; j < giv_count; ++j)
|
9149 |
|
|
{
|
9150 |
|
|
int m = stats[j].giv_number;
|
9151 |
|
|
if (can_combine[m * giv_count + i])
|
9152 |
|
|
stats[j].total_benefit -= g1->benefit + extra_benefit;
|
9153 |
|
|
}
|
9154 |
|
|
|
9155 |
|
|
g1->benefit += g1_add_benefit;
|
9156 |
|
|
|
9157 |
|
|
/* We've finished with this giv, and everything it touched.
|
9158 |
|
|
Restart the combination so that proper weights for the
|
9159 |
|
|
rest of the givs are properly taken into account. */
|
9160 |
|
|
/* ??? Ideally we would compact the arrays at this point, so
|
9161 |
|
|
as to not cover old ground. But sanely compacting
|
9162 |
|
|
can_combine is tricky. */
|
9163 |
|
|
goto restart;
|
9164 |
|
|
}
|
9165 |
|
|
}
|
9166 |
|
|
|
9167 |
|
|
/* Clean up. */
|
9168 |
|
|
free (stats);
|
9169 |
|
|
free (can_combine);
|
9170 |
|
|
}
|
9171 |
|
|
|
9172 |
|
|
/* Generate sequence for REG = B * M + A. B is the initial value of
|
9173 |
|
|
the basic induction variable, M a multiplicative constant, A an
|
9174 |
|
|
additive constant and REG the destination register. */
|
9175 |
|
|
|
9176 |
|
|
static rtx
|
9177 |
|
|
gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
|
9178 |
|
|
{
|
9179 |
|
|
rtx seq;
|
9180 |
|
|
rtx result;
|
9181 |
|
|
|
9182 |
|
|
start_sequence ();
|
9183 |
|
|
/* Use unsigned arithmetic. */
|
9184 |
|
|
result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
|
9185 |
|
|
if (reg != result)
|
9186 |
|
|
emit_move_insn (reg, result);
|
9187 |
|
|
seq = get_insns ();
|
9188 |
|
|
end_sequence ();
|
9189 |
|
|
|
9190 |
|
|
return seq;
|
9191 |
|
|
}
|
9192 |
|
|
|
9193 |
|
|
|
9194 |
|
|
/* Update registers created in insn sequence SEQ. */
|
9195 |
|
|
|
9196 |
|
|
static void
|
9197 |
|
|
loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
|
9198 |
|
|
{
|
9199 |
|
|
rtx insn;
|
9200 |
|
|
|
9201 |
|
|
/* Update register info for alias analysis. */
|
9202 |
|
|
|
9203 |
|
|
insn = seq;
|
9204 |
|
|
while (insn != NULL_RTX)
|
9205 |
|
|
{
|
9206 |
|
|
rtx set = single_set (insn);
|
9207 |
|
|
|
9208 |
|
|
if (set && REG_P (SET_DEST (set)))
|
9209 |
|
|
record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
|
9210 |
|
|
|
9211 |
|
|
insn = NEXT_INSN (insn);
|
9212 |
|
|
}
|
9213 |
|
|
}
|
9214 |
|
|
|
9215 |
|
|
|
9216 |
|
|
/* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
|
9217 |
|
|
is the initial value of the basic induction variable, M a
|
9218 |
|
|
multiplicative constant, A an additive constant and REG the
|
9219 |
|
|
destination register. */
|
9220 |
|
|
|
9221 |
|
|
static void
|
9222 |
|
|
loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
|
9223 |
|
|
rtx reg, basic_block before_bb, rtx before_insn)
|
9224 |
|
|
{
|
9225 |
|
|
rtx seq;
|
9226 |
|
|
|
9227 |
|
|
if (! before_insn)
|
9228 |
|
|
{
|
9229 |
|
|
loop_iv_add_mult_hoist (loop, b, m, a, reg);
|
9230 |
|
|
return;
|
9231 |
|
|
}
|
9232 |
|
|
|
9233 |
|
|
/* Use copy_rtx to prevent unexpected sharing of these rtx. */
|
9234 |
|
|
seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
|
9235 |
|
|
|
9236 |
|
|
/* Increase the lifetime of any invariants moved further in code. */
|
9237 |
|
|
update_reg_last_use (a, before_insn);
|
9238 |
|
|
update_reg_last_use (b, before_insn);
|
9239 |
|
|
update_reg_last_use (m, before_insn);
|
9240 |
|
|
|
9241 |
|
|
/* It is possible that the expansion created lots of new registers.
|
9242 |
|
|
Iterate over the sequence we just created and record them all. We
|
9243 |
|
|
must do this before inserting the sequence. */
|
9244 |
|
|
loop_regs_update (loop, seq);
|
9245 |
|
|
|
9246 |
|
|
loop_insn_emit_before (loop, before_bb, before_insn, seq);
|
9247 |
|
|
}
|
9248 |
|
|
|
9249 |
|
|
|
9250 |
|
|
/* Emit insns in loop pre-header to set REG = B * M + A. B is the
|
9251 |
|
|
initial value of the basic induction variable, M a multiplicative
|
9252 |
|
|
constant, A an additive constant and REG the destination
|
9253 |
|
|
register. */
|
9254 |
|
|
|
9255 |
|
|
static void
|
9256 |
|
|
loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
|
9257 |
|
|
{
|
9258 |
|
|
rtx seq;
|
9259 |
|
|
|
9260 |
|
|
/* Use copy_rtx to prevent unexpected sharing of these rtx. */
|
9261 |
|
|
seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
|
9262 |
|
|
|
9263 |
|
|
/* Increase the lifetime of any invariants moved further in code.
|
9264 |
|
|
???? Is this really necessary? */
|
9265 |
|
|
update_reg_last_use (a, loop->sink);
|
9266 |
|
|
update_reg_last_use (b, loop->sink);
|
9267 |
|
|
update_reg_last_use (m, loop->sink);
|
9268 |
|
|
|
9269 |
|
|
/* It is possible that the expansion created lots of new registers.
|
9270 |
|
|
Iterate over the sequence we just created and record them all. We
|
9271 |
|
|
must do this before inserting the sequence. */
|
9272 |
|
|
loop_regs_update (loop, seq);
|
9273 |
|
|
|
9274 |
|
|
loop_insn_sink (loop, seq);
|
9275 |
|
|
}
|
9276 |
|
|
|
9277 |
|
|
|
9278 |
|
|
/* Emit insns after loop to set REG = B * M + A. B is the initial
|
9279 |
|
|
value of the basic induction variable, M a multiplicative constant,
|
9280 |
|
|
A an additive constant and REG the destination register. */
|
9281 |
|
|
|
9282 |
|
|
static void
|
9283 |
|
|
loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
|
9284 |
|
|
{
|
9285 |
|
|
rtx seq;
|
9286 |
|
|
|
9287 |
|
|
/* Use copy_rtx to prevent unexpected sharing of these rtx. */
|
9288 |
|
|
seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
|
9289 |
|
|
|
9290 |
|
|
/* It is possible that the expansion created lots of new registers.
|
9291 |
|
|
Iterate over the sequence we just created and record them all. We
|
9292 |
|
|
must do this before inserting the sequence. */
|
9293 |
|
|
loop_regs_update (loop, seq);
|
9294 |
|
|
|
9295 |
|
|
loop_insn_hoist (loop, seq);
|
9296 |
|
|
}
|
9297 |
|
|
|
9298 |
|
|
|
9299 |
|
|
|
9300 |
|
|
/* Similar to gen_add_mult, but compute cost rather than generating
|
9301 |
|
|
sequence. */
|
9302 |
|
|
|
9303 |
|
|
static int
|
9304 |
|
|
iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
|
9305 |
|
|
{
|
9306 |
|
|
int cost = 0;
|
9307 |
|
|
rtx last, result;
|
9308 |
|
|
|
9309 |
|
|
start_sequence ();
|
9310 |
|
|
result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
|
9311 |
|
|
if (reg != result)
|
9312 |
|
|
emit_move_insn (reg, result);
|
9313 |
|
|
last = get_last_insn ();
|
9314 |
|
|
while (last)
|
9315 |
|
|
{
|
9316 |
|
|
rtx t = single_set (last);
|
9317 |
|
|
if (t)
|
9318 |
|
|
cost += rtx_cost (SET_SRC (t), SET);
|
9319 |
|
|
last = PREV_INSN (last);
|
9320 |
|
|
}
|
9321 |
|
|
end_sequence ();
|
9322 |
|
|
return cost;
|
9323 |
|
|
}
|
9324 |
|
|
|
9325 |
|
|
/* Test whether A * B can be computed without
|
9326 |
|
|
an actual multiply insn. Value is 1 if so.
|
9327 |
|
|
|
9328 |
|
|
??? This function stinks because it generates a ton of wasted RTL
|
9329 |
|
|
??? and as a result fragments GC memory to no end. There are other
|
9330 |
|
|
??? places in the compiler which are invoked a lot and do the same
|
9331 |
|
|
??? thing, generate wasted RTL just to see if something is possible. */
|
9332 |
|
|
|
9333 |
|
|
static int
|
9334 |
|
|
product_cheap_p (rtx a, rtx b)
|
9335 |
|
|
{
|
9336 |
|
|
rtx tmp;
|
9337 |
|
|
int win, n_insns;
|
9338 |
|
|
|
9339 |
|
|
/* If only one is constant, make it B. */
|
9340 |
|
|
if (GET_CODE (a) == CONST_INT)
|
9341 |
|
|
tmp = a, a = b, b = tmp;
|
9342 |
|
|
|
9343 |
|
|
/* If first constant, both constant, so don't need multiply. */
|
9344 |
|
|
if (GET_CODE (a) == CONST_INT)
|
9345 |
|
|
return 1;
|
9346 |
|
|
|
9347 |
|
|
/* If second not constant, neither is constant, so would need multiply. */
|
9348 |
|
|
if (GET_CODE (b) != CONST_INT)
|
9349 |
|
|
return 0;
|
9350 |
|
|
|
9351 |
|
|
/* One operand is constant, so might not need multiply insn. Generate the
|
9352 |
|
|
code for the multiply and see if a call or multiply, or long sequence
|
9353 |
|
|
of insns is generated. */
|
9354 |
|
|
|
9355 |
|
|
start_sequence ();
|
9356 |
|
|
expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
|
9357 |
|
|
tmp = get_insns ();
|
9358 |
|
|
end_sequence ();
|
9359 |
|
|
|
9360 |
|
|
win = 1;
|
9361 |
|
|
if (tmp == NULL_RTX)
|
9362 |
|
|
;
|
9363 |
|
|
else if (INSN_P (tmp))
|
9364 |
|
|
{
|
9365 |
|
|
n_insns = 0;
|
9366 |
|
|
while (tmp != NULL_RTX)
|
9367 |
|
|
{
|
9368 |
|
|
rtx next = NEXT_INSN (tmp);
|
9369 |
|
|
|
9370 |
|
|
if (++n_insns > 3
|
9371 |
|
|
|| !NONJUMP_INSN_P (tmp)
|
9372 |
|
|
|| (GET_CODE (PATTERN (tmp)) == SET
|
9373 |
|
|
&& GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
|
9374 |
|
|
|| (GET_CODE (PATTERN (tmp)) == PARALLEL
|
9375 |
|
|
&& GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
|
9376 |
|
|
&& GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
|
9377 |
|
|
{
|
9378 |
|
|
win = 0;
|
9379 |
|
|
break;
|
9380 |
|
|
}
|
9381 |
|
|
|
9382 |
|
|
tmp = next;
|
9383 |
|
|
}
|
9384 |
|
|
}
|
9385 |
|
|
else if (GET_CODE (tmp) == SET
|
9386 |
|
|
&& GET_CODE (SET_SRC (tmp)) == MULT)
|
9387 |
|
|
win = 0;
|
9388 |
|
|
else if (GET_CODE (tmp) == PARALLEL
|
9389 |
|
|
&& GET_CODE (XVECEXP (tmp, 0, 0)) == SET
|
9390 |
|
|
&& GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
|
9391 |
|
|
win = 0;
|
9392 |
|
|
|
9393 |
|
|
return win;
|
9394 |
|
|
}
|
9395 |
|
|
|
9396 |
|
|
/* Check to see if loop can be terminated by a "decrement and branch until
|
9397 |
|
|
zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
|
9398 |
|
|
Also try reversing an increment loop to a decrement loop
|
9399 |
|
|
to see if the optimization can be performed.
|
9400 |
|
|
Value is nonzero if optimization was performed. */
|
9401 |
|
|
|
9402 |
|
|
/* This is useful even if the architecture doesn't have such an insn,
|
9403 |
|
|
because it might change a loops which increments from 0 to n to a loop
|
9404 |
|
|
which decrements from n to 0. A loop that decrements to zero is usually
|
9405 |
|
|
faster than one that increments from zero. */
|
9406 |
|
|
|
9407 |
|
|
/* ??? This could be rewritten to use some of the loop unrolling procedures,
|
9408 |
|
|
such as approx_final_value, biv_total_increment, loop_iterations, and
|
9409 |
|
|
final_[bg]iv_value. */
|
9410 |
|
|
|
9411 |
|
|
static int
|
9412 |
|
|
check_dbra_loop (struct loop *loop, int insn_count)
|
9413 |
|
|
{
|
9414 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
9415 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
9416 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
9417 |
|
|
struct iv_class *bl;
|
9418 |
|
|
rtx reg;
|
9419 |
|
|
enum machine_mode mode;
|
9420 |
|
|
rtx jump_label;
|
9421 |
|
|
rtx final_value;
|
9422 |
|
|
rtx start_value;
|
9423 |
|
|
rtx new_add_val;
|
9424 |
|
|
rtx comparison;
|
9425 |
|
|
rtx before_comparison;
|
9426 |
|
|
rtx p;
|
9427 |
|
|
rtx jump;
|
9428 |
|
|
rtx first_compare;
|
9429 |
|
|
int compare_and_branch;
|
9430 |
|
|
rtx loop_start = loop->start;
|
9431 |
|
|
rtx loop_end = loop->end;
|
9432 |
|
|
|
9433 |
|
|
/* If last insn is a conditional branch, and the insn before tests a
|
9434 |
|
|
register value, try to optimize it. Otherwise, we can't do anything. */
|
9435 |
|
|
|
9436 |
|
|
jump = PREV_INSN (loop_end);
|
9437 |
|
|
comparison = get_condition_for_loop (loop, jump);
|
9438 |
|
|
if (comparison == 0)
|
9439 |
|
|
return 0;
|
9440 |
|
|
if (!onlyjump_p (jump))
|
9441 |
|
|
return 0;
|
9442 |
|
|
|
9443 |
|
|
/* Try to compute whether the compare/branch at the loop end is one or
|
9444 |
|
|
two instructions. */
|
9445 |
|
|
get_condition (jump, &first_compare, false, true);
|
9446 |
|
|
if (first_compare == jump)
|
9447 |
|
|
compare_and_branch = 1;
|
9448 |
|
|
else if (first_compare == prev_nonnote_insn (jump))
|
9449 |
|
|
compare_and_branch = 2;
|
9450 |
|
|
else
|
9451 |
|
|
return 0;
|
9452 |
|
|
|
9453 |
|
|
{
|
9454 |
|
|
/* If more than one condition is present to control the loop, then
|
9455 |
|
|
do not proceed, as this function does not know how to rewrite
|
9456 |
|
|
loop tests with more than one condition.
|
9457 |
|
|
|
9458 |
|
|
Look backwards from the first insn in the last comparison
|
9459 |
|
|
sequence and see if we've got another comparison sequence. */
|
9460 |
|
|
|
9461 |
|
|
rtx jump1;
|
9462 |
|
|
if ((jump1 = prev_nonnote_insn (first_compare))
|
9463 |
|
|
&& JUMP_P (jump1))
|
9464 |
|
|
return 0;
|
9465 |
|
|
}
|
9466 |
|
|
|
9467 |
|
|
/* Check all of the bivs to see if the compare uses one of them.
|
9468 |
|
|
Skip biv's set more than once because we can't guarantee that
|
9469 |
|
|
it will be zero on the last iteration. Also skip if the biv is
|
9470 |
|
|
used between its update and the test insn. */
|
9471 |
|
|
|
9472 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
9473 |
|
|
{
|
9474 |
|
|
if (bl->biv_count == 1
|
9475 |
|
|
&& ! bl->biv->maybe_multiple
|
9476 |
|
|
&& bl->biv->dest_reg == XEXP (comparison, 0)
|
9477 |
|
|
&& ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
|
9478 |
|
|
first_compare))
|
9479 |
|
|
break;
|
9480 |
|
|
}
|
9481 |
|
|
|
9482 |
|
|
/* Try swapping the comparison to identify a suitable biv. */
|
9483 |
|
|
if (!bl)
|
9484 |
|
|
for (bl = ivs->list; bl; bl = bl->next)
|
9485 |
|
|
if (bl->biv_count == 1
|
9486 |
|
|
&& ! bl->biv->maybe_multiple
|
9487 |
|
|
&& bl->biv->dest_reg == XEXP (comparison, 1)
|
9488 |
|
|
&& ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
|
9489 |
|
|
first_compare))
|
9490 |
|
|
{
|
9491 |
|
|
comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
|
9492 |
|
|
VOIDmode,
|
9493 |
|
|
XEXP (comparison, 1),
|
9494 |
|
|
XEXP (comparison, 0));
|
9495 |
|
|
break;
|
9496 |
|
|
}
|
9497 |
|
|
|
9498 |
|
|
if (! bl)
|
9499 |
|
|
return 0;
|
9500 |
|
|
|
9501 |
|
|
/* Look for the case where the basic induction variable is always
|
9502 |
|
|
nonnegative, and equals zero on the last iteration.
|
9503 |
|
|
In this case, add a reg_note REG_NONNEG, which allows the
|
9504 |
|
|
m68k DBRA instruction to be used. */
|
9505 |
|
|
|
9506 |
|
|
if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
|
9507 |
|
|
|| (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
|
9508 |
|
|
&& GET_CODE (bl->biv->add_val) == CONST_INT
|
9509 |
|
|
&& INTVAL (bl->biv->add_val) < 0)
|
9510 |
|
|
{
|
9511 |
|
|
/* Initial value must be greater than 0,
|
9512 |
|
|
init_val % -dec_value == 0 to ensure that it equals zero on
|
9513 |
|
|
the last iteration */
|
9514 |
|
|
|
9515 |
|
|
if (GET_CODE (bl->initial_value) == CONST_INT
|
9516 |
|
|
&& INTVAL (bl->initial_value) > 0
|
9517 |
|
|
&& (INTVAL (bl->initial_value)
|
9518 |
|
|
% (-INTVAL (bl->biv->add_val))) == 0)
|
9519 |
|
|
{
|
9520 |
|
|
/* Register always nonnegative, add REG_NOTE to branch. */
|
9521 |
|
|
if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
|
9522 |
|
|
REG_NOTES (jump)
|
9523 |
|
|
= gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
|
9524 |
|
|
REG_NOTES (jump));
|
9525 |
|
|
bl->nonneg = 1;
|
9526 |
|
|
|
9527 |
|
|
return 1;
|
9528 |
|
|
}
|
9529 |
|
|
|
9530 |
|
|
/* If the decrement is 1 and the value was tested as >= 0 before
|
9531 |
|
|
the loop, then we can safely optimize. */
|
9532 |
|
|
for (p = loop_start; p; p = PREV_INSN (p))
|
9533 |
|
|
{
|
9534 |
|
|
if (LABEL_P (p))
|
9535 |
|
|
break;
|
9536 |
|
|
if (!JUMP_P (p))
|
9537 |
|
|
continue;
|
9538 |
|
|
|
9539 |
|
|
before_comparison = get_condition_for_loop (loop, p);
|
9540 |
|
|
if (before_comparison
|
9541 |
|
|
&& XEXP (before_comparison, 0) == bl->biv->dest_reg
|
9542 |
|
|
&& (GET_CODE (before_comparison) == LT
|
9543 |
|
|
|| GET_CODE (before_comparison) == LTU)
|
9544 |
|
|
&& XEXP (before_comparison, 1) == const0_rtx
|
9545 |
|
|
&& ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
|
9546 |
|
|
&& INTVAL (bl->biv->add_val) == -1)
|
9547 |
|
|
{
|
9548 |
|
|
if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
|
9549 |
|
|
REG_NOTES (jump)
|
9550 |
|
|
= gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
|
9551 |
|
|
REG_NOTES (jump));
|
9552 |
|
|
bl->nonneg = 1;
|
9553 |
|
|
|
9554 |
|
|
return 1;
|
9555 |
|
|
}
|
9556 |
|
|
}
|
9557 |
|
|
}
|
9558 |
|
|
else if (GET_CODE (bl->biv->add_val) == CONST_INT
|
9559 |
|
|
&& INTVAL (bl->biv->add_val) > 0)
|
9560 |
|
|
{
|
9561 |
|
|
/* Try to change inc to dec, so can apply above optimization. */
|
9562 |
|
|
/* Can do this if:
|
9563 |
|
|
all registers modified are induction variables or invariant,
|
9564 |
|
|
all memory references have non-overlapping addresses
|
9565 |
|
|
(obviously true if only one write)
|
9566 |
|
|
allow 2 insns for the compare/jump at the end of the loop. */
|
9567 |
|
|
/* Also, we must avoid any instructions which use both the reversed
|
9568 |
|
|
biv and another biv. Such instructions will fail if the loop is
|
9569 |
|
|
reversed. We meet this condition by requiring that either
|
9570 |
|
|
no_use_except_counting is true, or else that there is only
|
9571 |
|
|
one biv. */
|
9572 |
|
|
int num_nonfixed_reads = 0;
|
9573 |
|
|
/* 1 if the iteration var is used only to count iterations. */
|
9574 |
|
|
int no_use_except_counting = 0;
|
9575 |
|
|
/* 1 if the loop has no memory store, or it has a single memory store
|
9576 |
|
|
which is reversible. */
|
9577 |
|
|
int reversible_mem_store = 1;
|
9578 |
|
|
|
9579 |
|
|
if (bl->giv_count == 0
|
9580 |
|
|
&& !loop->exit_count
|
9581 |
|
|
&& !loop_info->has_multiple_exit_targets)
|
9582 |
|
|
{
|
9583 |
|
|
rtx bivreg = regno_reg_rtx[bl->regno];
|
9584 |
|
|
struct iv_class *blt;
|
9585 |
|
|
|
9586 |
|
|
/* If there are no givs for this biv, and the only exit is the
|
9587 |
|
|
fall through at the end of the loop, then
|
9588 |
|
|
see if perhaps there are no uses except to count. */
|
9589 |
|
|
no_use_except_counting = 1;
|
9590 |
|
|
for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
|
9591 |
|
|
if (INSN_P (p))
|
9592 |
|
|
{
|
9593 |
|
|
rtx set = single_set (p);
|
9594 |
|
|
|
9595 |
|
|
if (set && REG_P (SET_DEST (set))
|
9596 |
|
|
&& REGNO (SET_DEST (set)) == bl->regno)
|
9597 |
|
|
/* An insn that sets the biv is okay. */
|
9598 |
|
|
;
|
9599 |
|
|
else if (!reg_mentioned_p (bivreg, PATTERN (p)))
|
9600 |
|
|
/* An insn that doesn't mention the biv is okay. */
|
9601 |
|
|
;
|
9602 |
|
|
else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
|
9603 |
|
|
|| p == prev_nonnote_insn (loop_end))
|
9604 |
|
|
{
|
9605 |
|
|
/* If either of these insns uses the biv and sets a pseudo
|
9606 |
|
|
that has more than one usage, then the biv has uses
|
9607 |
|
|
other than counting since it's used to derive a value
|
9608 |
|
|
that is used more than one time. */
|
9609 |
|
|
note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
|
9610 |
|
|
regs);
|
9611 |
|
|
if (regs->multiple_uses)
|
9612 |
|
|
{
|
9613 |
|
|
no_use_except_counting = 0;
|
9614 |
|
|
break;
|
9615 |
|
|
}
|
9616 |
|
|
}
|
9617 |
|
|
else
|
9618 |
|
|
{
|
9619 |
|
|
no_use_except_counting = 0;
|
9620 |
|
|
break;
|
9621 |
|
|
}
|
9622 |
|
|
}
|
9623 |
|
|
|
9624 |
|
|
/* A biv has uses besides counting if it is used to set
|
9625 |
|
|
another biv. */
|
9626 |
|
|
for (blt = ivs->list; blt; blt = blt->next)
|
9627 |
|
|
if (blt->init_set
|
9628 |
|
|
&& reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
|
9629 |
|
|
{
|
9630 |
|
|
no_use_except_counting = 0;
|
9631 |
|
|
break;
|
9632 |
|
|
}
|
9633 |
|
|
}
|
9634 |
|
|
|
9635 |
|
|
if (no_use_except_counting)
|
9636 |
|
|
/* No need to worry about MEMs. */
|
9637 |
|
|
;
|
9638 |
|
|
else if (loop_info->num_mem_sets <= 1)
|
9639 |
|
|
{
|
9640 |
|
|
for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
|
9641 |
|
|
if (INSN_P (p))
|
9642 |
|
|
num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
|
9643 |
|
|
|
9644 |
|
|
/* If the loop has a single store, and the destination address is
|
9645 |
|
|
invariant, then we can't reverse the loop, because this address
|
9646 |
|
|
might then have the wrong value at loop exit.
|
9647 |
|
|
This would work if the source was invariant also, however, in that
|
9648 |
|
|
case, the insn should have been moved out of the loop. */
|
9649 |
|
|
|
9650 |
|
|
if (loop_info->num_mem_sets == 1)
|
9651 |
|
|
{
|
9652 |
|
|
struct induction *v;
|
9653 |
|
|
|
9654 |
|
|
/* If we could prove that each of the memory locations
|
9655 |
|
|
written to was different, then we could reverse the
|
9656 |
|
|
store -- but we don't presently have any way of
|
9657 |
|
|
knowing that. */
|
9658 |
|
|
reversible_mem_store = 0;
|
9659 |
|
|
|
9660 |
|
|
/* If the store depends on a register that is set after the
|
9661 |
|
|
store, it depends on the initial value, and is thus not
|
9662 |
|
|
reversible. */
|
9663 |
|
|
for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
|
9664 |
|
|
{
|
9665 |
|
|
if (v->giv_type == DEST_REG
|
9666 |
|
|
&& reg_mentioned_p (v->dest_reg,
|
9667 |
|
|
PATTERN (loop_info->first_loop_store_insn))
|
9668 |
|
|
&& loop_insn_first_p (loop_info->first_loop_store_insn,
|
9669 |
|
|
v->insn))
|
9670 |
|
|
reversible_mem_store = 0;
|
9671 |
|
|
}
|
9672 |
|
|
}
|
9673 |
|
|
}
|
9674 |
|
|
else
|
9675 |
|
|
return 0;
|
9676 |
|
|
|
9677 |
|
|
/* This code only acts for innermost loops. Also it simplifies
|
9678 |
|
|
the memory address check by only reversing loops with
|
9679 |
|
|
zero or one memory access.
|
9680 |
|
|
Two memory accesses could involve parts of the same array,
|
9681 |
|
|
and that can't be reversed.
|
9682 |
|
|
If the biv is used only for counting, than we don't need to worry
|
9683 |
|
|
about all these things. */
|
9684 |
|
|
|
9685 |
|
|
if ((num_nonfixed_reads <= 1
|
9686 |
|
|
&& ! loop_info->has_nonconst_call
|
9687 |
|
|
&& ! loop_info->has_prefetch
|
9688 |
|
|
&& ! loop_info->has_volatile
|
9689 |
|
|
&& reversible_mem_store
|
9690 |
|
|
&& (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
|
9691 |
|
|
+ num_unmoved_movables (loop) + compare_and_branch == insn_count)
|
9692 |
|
|
&& (bl == ivs->list && bl->next == 0))
|
9693 |
|
|
|| (no_use_except_counting && ! loop_info->has_prefetch))
|
9694 |
|
|
{
|
9695 |
|
|
rtx tem;
|
9696 |
|
|
|
9697 |
|
|
/* Loop can be reversed. */
|
9698 |
|
|
if (loop_dump_stream)
|
9699 |
|
|
fprintf (loop_dump_stream, "Can reverse loop\n");
|
9700 |
|
|
|
9701 |
|
|
/* Now check other conditions:
|
9702 |
|
|
|
9703 |
|
|
The increment must be a constant, as must the initial value,
|
9704 |
|
|
and the comparison code must be LT.
|
9705 |
|
|
|
9706 |
|
|
This test can probably be improved since +/- 1 in the constant
|
9707 |
|
|
can be obtained by changing LT to LE and vice versa; this is
|
9708 |
|
|
confusing. */
|
9709 |
|
|
|
9710 |
|
|
if (comparison
|
9711 |
|
|
/* for constants, LE gets turned into LT */
|
9712 |
|
|
&& (GET_CODE (comparison) == LT
|
9713 |
|
|
|| (GET_CODE (comparison) == LE
|
9714 |
|
|
&& no_use_except_counting)
|
9715 |
|
|
|| GET_CODE (comparison) == LTU))
|
9716 |
|
|
{
|
9717 |
|
|
HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
|
9718 |
|
|
rtx initial_value, comparison_value;
|
9719 |
|
|
int nonneg = 0;
|
9720 |
|
|
enum rtx_code cmp_code;
|
9721 |
|
|
int comparison_const_width;
|
9722 |
|
|
unsigned HOST_WIDE_INT comparison_sign_mask;
|
9723 |
|
|
bool keep_first_compare;
|
9724 |
|
|
|
9725 |
|
|
add_val = INTVAL (bl->biv->add_val);
|
9726 |
|
|
comparison_value = XEXP (comparison, 1);
|
9727 |
|
|
if (GET_MODE (comparison_value) == VOIDmode)
|
9728 |
|
|
comparison_const_width
|
9729 |
|
|
= GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
|
9730 |
|
|
else
|
9731 |
|
|
comparison_const_width
|
9732 |
|
|
= GET_MODE_BITSIZE (GET_MODE (comparison_value));
|
9733 |
|
|
if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
|
9734 |
|
|
comparison_const_width = HOST_BITS_PER_WIDE_INT;
|
9735 |
|
|
comparison_sign_mask
|
9736 |
|
|
= (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
|
9737 |
|
|
|
9738 |
|
|
/* If the comparison value is not a loop invariant, then we
|
9739 |
|
|
can not reverse this loop.
|
9740 |
|
|
|
9741 |
|
|
??? If the insns which initialize the comparison value as
|
9742 |
|
|
a whole compute an invariant result, then we could move
|
9743 |
|
|
them out of the loop and proceed with loop reversal. */
|
9744 |
|
|
if (! loop_invariant_p (loop, comparison_value))
|
9745 |
|
|
return 0;
|
9746 |
|
|
|
9747 |
|
|
if (GET_CODE (comparison_value) == CONST_INT)
|
9748 |
|
|
comparison_val = INTVAL (comparison_value);
|
9749 |
|
|
initial_value = bl->initial_value;
|
9750 |
|
|
|
9751 |
|
|
/* Normalize the initial value if it is an integer and
|
9752 |
|
|
has no other use except as a counter. This will allow
|
9753 |
|
|
a few more loops to be reversed. */
|
9754 |
|
|
if (no_use_except_counting
|
9755 |
|
|
&& GET_CODE (comparison_value) == CONST_INT
|
9756 |
|
|
&& GET_CODE (initial_value) == CONST_INT)
|
9757 |
|
|
{
|
9758 |
|
|
comparison_val = comparison_val - INTVAL (bl->initial_value);
|
9759 |
|
|
/* The code below requires comparison_val to be a multiple
|
9760 |
|
|
of add_val in order to do the loop reversal, so
|
9761 |
|
|
round up comparison_val to a multiple of add_val.
|
9762 |
|
|
Since comparison_value is constant, we know that the
|
9763 |
|
|
current comparison code is LT. */
|
9764 |
|
|
comparison_val = comparison_val + add_val - 1;
|
9765 |
|
|
comparison_val
|
9766 |
|
|
-= (unsigned HOST_WIDE_INT) comparison_val % add_val;
|
9767 |
|
|
/* We postpone overflow checks for COMPARISON_VAL here;
|
9768 |
|
|
even if there is an overflow, we might still be able to
|
9769 |
|
|
reverse the loop, if converting the loop exit test to
|
9770 |
|
|
NE is possible. */
|
9771 |
|
|
initial_value = const0_rtx;
|
9772 |
|
|
}
|
9773 |
|
|
|
9774 |
|
|
/* First check if we can do a vanilla loop reversal. */
|
9775 |
|
|
if (initial_value == const0_rtx
|
9776 |
|
|
&& GET_CODE (comparison_value) == CONST_INT
|
9777 |
|
|
/* Now do postponed overflow checks on COMPARISON_VAL. */
|
9778 |
|
|
&& ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
|
9779 |
|
|
& comparison_sign_mask))
|
9780 |
|
|
{
|
9781 |
|
|
/* Register will always be nonnegative, with value
|
9782 |
|
|
|
9783 |
|
|
add_adjust = add_val;
|
9784 |
|
|
nonneg = 1;
|
9785 |
|
|
cmp_code = GE;
|
9786 |
|
|
}
|
9787 |
|
|
else
|
9788 |
|
|
return 0;
|
9789 |
|
|
|
9790 |
|
|
if (GET_CODE (comparison) == LE)
|
9791 |
|
|
add_adjust -= add_val;
|
9792 |
|
|
|
9793 |
|
|
/* If the initial value is not zero, or if the comparison
|
9794 |
|
|
value is not an exact multiple of the increment, then we
|
9795 |
|
|
can not reverse this loop. */
|
9796 |
|
|
if (initial_value == const0_rtx
|
9797 |
|
|
&& GET_CODE (comparison_value) == CONST_INT)
|
9798 |
|
|
{
|
9799 |
|
|
if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
|
9800 |
|
|
return 0;
|
9801 |
|
|
}
|
9802 |
|
|
else
|
9803 |
|
|
{
|
9804 |
|
|
if (! no_use_except_counting || add_val != 1)
|
9805 |
|
|
return 0;
|
9806 |
|
|
}
|
9807 |
|
|
|
9808 |
|
|
final_value = comparison_value;
|
9809 |
|
|
|
9810 |
|
|
/* Reset these in case we normalized the initial value
|
9811 |
|
|
and comparison value above. */
|
9812 |
|
|
if (GET_CODE (comparison_value) == CONST_INT
|
9813 |
|
|
&& GET_CODE (initial_value) == CONST_INT)
|
9814 |
|
|
{
|
9815 |
|
|
comparison_value = GEN_INT (comparison_val);
|
9816 |
|
|
final_value
|
9817 |
|
|
= GEN_INT (comparison_val + INTVAL (bl->initial_value));
|
9818 |
|
|
}
|
9819 |
|
|
bl->initial_value = initial_value;
|
9820 |
|
|
|
9821 |
|
|
/* Save some info needed to produce the new insns. */
|
9822 |
|
|
reg = bl->biv->dest_reg;
|
9823 |
|
|
mode = GET_MODE (reg);
|
9824 |
|
|
jump_label = condjump_label (PREV_INSN (loop_end));
|
9825 |
|
|
new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
|
9826 |
|
|
|
9827 |
|
|
/* Set start_value; if this is not a CONST_INT, we need
|
9828 |
|
|
to generate a SUB.
|
9829 |
|
|
Initialize biv to start_value before loop start.
|
9830 |
|
|
The old initializing insn will be deleted as a
|
9831 |
|
|
dead store by flow.c. */
|
9832 |
|
|
if (initial_value == const0_rtx
|
9833 |
|
|
&& GET_CODE (comparison_value) == CONST_INT)
|
9834 |
|
|
{
|
9835 |
|
|
start_value
|
9836 |
|
|
= gen_int_mode (comparison_val - add_adjust, mode);
|
9837 |
|
|
loop_insn_hoist (loop, gen_move_insn (reg, start_value));
|
9838 |
|
|
}
|
9839 |
|
|
else if (GET_CODE (initial_value) == CONST_INT)
|
9840 |
|
|
{
|
9841 |
|
|
rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
|
9842 |
|
|
rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
|
9843 |
|
|
|
9844 |
|
|
if (add_insn == 0)
|
9845 |
|
|
return 0;
|
9846 |
|
|
|
9847 |
|
|
start_value
|
9848 |
|
|
= gen_rtx_PLUS (mode, comparison_value, offset);
|
9849 |
|
|
loop_insn_hoist (loop, add_insn);
|
9850 |
|
|
if (GET_CODE (comparison) == LE)
|
9851 |
|
|
final_value = gen_rtx_PLUS (mode, comparison_value,
|
9852 |
|
|
GEN_INT (add_val));
|
9853 |
|
|
}
|
9854 |
|
|
else if (! add_adjust)
|
9855 |
|
|
{
|
9856 |
|
|
rtx sub_insn = gen_sub3_insn (reg, comparison_value,
|
9857 |
|
|
initial_value);
|
9858 |
|
|
|
9859 |
|
|
if (sub_insn == 0)
|
9860 |
|
|
return 0;
|
9861 |
|
|
start_value
|
9862 |
|
|
= gen_rtx_MINUS (mode, comparison_value, initial_value);
|
9863 |
|
|
loop_insn_hoist (loop, sub_insn);
|
9864 |
|
|
}
|
9865 |
|
|
else
|
9866 |
|
|
/* We could handle the other cases too, but it'll be
|
9867 |
|
|
better to have a testcase first. */
|
9868 |
|
|
return 0;
|
9869 |
|
|
|
9870 |
|
|
/* We may not have a single insn which can increment a reg, so
|
9871 |
|
|
create a sequence to hold all the insns from expand_inc. */
|
9872 |
|
|
start_sequence ();
|
9873 |
|
|
expand_inc (reg, new_add_val);
|
9874 |
|
|
tem = get_insns ();
|
9875 |
|
|
end_sequence ();
|
9876 |
|
|
|
9877 |
|
|
p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
|
9878 |
|
|
delete_insn (bl->biv->insn);
|
9879 |
|
|
|
9880 |
|
|
/* Update biv info to reflect its new status. */
|
9881 |
|
|
bl->biv->insn = p;
|
9882 |
|
|
bl->initial_value = start_value;
|
9883 |
|
|
bl->biv->add_val = new_add_val;
|
9884 |
|
|
|
9885 |
|
|
/* Update loop info. */
|
9886 |
|
|
loop_info->initial_value = reg;
|
9887 |
|
|
loop_info->initial_equiv_value = reg;
|
9888 |
|
|
loop_info->final_value = const0_rtx;
|
9889 |
|
|
loop_info->final_equiv_value = const0_rtx;
|
9890 |
|
|
loop_info->comparison_value = const0_rtx;
|
9891 |
|
|
loop_info->comparison_code = cmp_code;
|
9892 |
|
|
loop_info->increment = new_add_val;
|
9893 |
|
|
|
9894 |
|
|
/* Inc LABEL_NUSES so that delete_insn will
|
9895 |
|
|
not delete the label. */
|
9896 |
|
|
LABEL_NUSES (XEXP (jump_label, 0))++;
|
9897 |
|
|
|
9898 |
|
|
/* If we have a separate comparison insn that does more
|
9899 |
|
|
than just set cc0, the result of the comparison might
|
9900 |
|
|
be used outside the loop. */
|
9901 |
|
|
keep_first_compare = (compare_and_branch == 2
|
9902 |
|
|
#ifdef HAVE_CC0
|
9903 |
|
|
&& sets_cc0_p (first_compare) <= 0
|
9904 |
|
|
#endif
|
9905 |
|
|
);
|
9906 |
|
|
|
9907 |
|
|
/* Emit an insn after the end of the loop to set the biv's
|
9908 |
|
|
proper exit value if it is used anywhere outside the loop. */
|
9909 |
|
|
if (keep_first_compare
|
9910 |
|
|
|| (REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
|
9911 |
|
|
|| ! bl->init_insn
|
9912 |
|
|
|| REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
|
9913 |
|
|
loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
|
9914 |
|
|
|
9915 |
|
|
if (keep_first_compare)
|
9916 |
|
|
loop_insn_sink (loop, PATTERN (first_compare));
|
9917 |
|
|
|
9918 |
|
|
/* Delete compare/branch at end of loop. */
|
9919 |
|
|
delete_related_insns (PREV_INSN (loop_end));
|
9920 |
|
|
if (compare_and_branch == 2)
|
9921 |
|
|
delete_related_insns (first_compare);
|
9922 |
|
|
|
9923 |
|
|
/* Add new compare/branch insn at end of loop. */
|
9924 |
|
|
start_sequence ();
|
9925 |
|
|
emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
|
9926 |
|
|
mode, 0,
|
9927 |
|
|
XEXP (jump_label, 0));
|
9928 |
|
|
tem = get_insns ();
|
9929 |
|
|
end_sequence ();
|
9930 |
|
|
emit_jump_insn_before (tem, loop_end);
|
9931 |
|
|
|
9932 |
|
|
for (tem = PREV_INSN (loop_end);
|
9933 |
|
|
tem && !JUMP_P (tem);
|
9934 |
|
|
tem = PREV_INSN (tem))
|
9935 |
|
|
;
|
9936 |
|
|
|
9937 |
|
|
if (tem)
|
9938 |
|
|
JUMP_LABEL (tem) = XEXP (jump_label, 0);
|
9939 |
|
|
|
9940 |
|
|
if (nonneg)
|
9941 |
|
|
{
|
9942 |
|
|
if (tem)
|
9943 |
|
|
{
|
9944 |
|
|
/* Increment of LABEL_NUSES done above. */
|
9945 |
|
|
/* Register is now always nonnegative,
|
9946 |
|
|
so add REG_NONNEG note to the branch. */
|
9947 |
|
|
REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
|
9948 |
|
|
REG_NOTES (tem));
|
9949 |
|
|
}
|
9950 |
|
|
bl->nonneg = 1;
|
9951 |
|
|
}
|
9952 |
|
|
|
9953 |
|
|
/* No insn may reference both the reversed and another biv or it
|
9954 |
|
|
will fail (see comment near the top of the loop reversal
|
9955 |
|
|
code).
|
9956 |
|
|
Earlier on, we have verified that the biv has no use except
|
9957 |
|
|
counting, or it is the only biv in this function.
|
9958 |
|
|
However, the code that computes no_use_except_counting does
|
9959 |
|
|
not verify reg notes. It's possible to have an insn that
|
9960 |
|
|
references another biv, and has a REG_EQUAL note with an
|
9961 |
|
|
expression based on the reversed biv. To avoid this case,
|
9962 |
|
|
remove all REG_EQUAL notes based on the reversed biv
|
9963 |
|
|
here. */
|
9964 |
|
|
for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
|
9965 |
|
|
if (INSN_P (p))
|
9966 |
|
|
{
|
9967 |
|
|
rtx *pnote;
|
9968 |
|
|
rtx set = single_set (p);
|
9969 |
|
|
/* If this is a set of a GIV based on the reversed biv, any
|
9970 |
|
|
REG_EQUAL notes should still be correct. */
|
9971 |
|
|
if (! set
|
9972 |
|
|
|| !REG_P (SET_DEST (set))
|
9973 |
|
|
|| (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
|
9974 |
|
|
|| REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
|
9975 |
|
|
|| REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
|
9976 |
|
|
for (pnote = ®_NOTES (p); *pnote;)
|
9977 |
|
|
{
|
9978 |
|
|
if (REG_NOTE_KIND (*pnote) == REG_EQUAL
|
9979 |
|
|
&& reg_mentioned_p (regno_reg_rtx[bl->regno],
|
9980 |
|
|
XEXP (*pnote, 0)))
|
9981 |
|
|
*pnote = XEXP (*pnote, 1);
|
9982 |
|
|
else
|
9983 |
|
|
pnote = &XEXP (*pnote, 1);
|
9984 |
|
|
}
|
9985 |
|
|
}
|
9986 |
|
|
|
9987 |
|
|
/* Mark that this biv has been reversed. Each giv which depends
|
9988 |
|
|
on this biv, and which is also live past the end of the loop
|
9989 |
|
|
will have to be fixed up. */
|
9990 |
|
|
|
9991 |
|
|
bl->reversed = 1;
|
9992 |
|
|
|
9993 |
|
|
if (loop_dump_stream)
|
9994 |
|
|
{
|
9995 |
|
|
fprintf (loop_dump_stream, "Reversed loop");
|
9996 |
|
|
if (bl->nonneg)
|
9997 |
|
|
fprintf (loop_dump_stream, " and added reg_nonneg\n");
|
9998 |
|
|
else
|
9999 |
|
|
fprintf (loop_dump_stream, "\n");
|
10000 |
|
|
}
|
10001 |
|
|
|
10002 |
|
|
return 1;
|
10003 |
|
|
}
|
10004 |
|
|
}
|
10005 |
|
|
}
|
10006 |
|
|
|
10007 |
|
|
return 0;
|
10008 |
|
|
}
|
10009 |
|
|
|
10010 |
|
|
/* Verify whether the biv BL appears to be eliminable,
|
10011 |
|
|
based on the insns in the loop that refer to it.
|
10012 |
|
|
|
10013 |
|
|
If ELIMINATE_P is nonzero, actually do the elimination.
|
10014 |
|
|
|
10015 |
|
|
THRESHOLD and INSN_COUNT are from loop_optimize and are used to
|
10016 |
|
|
determine whether invariant insns should be placed inside or at the
|
10017 |
|
|
start of the loop. */
|
10018 |
|
|
|
10019 |
|
|
static int
|
10020 |
|
|
maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
|
10021 |
|
|
int eliminate_p, int threshold, int insn_count)
|
10022 |
|
|
{
|
10023 |
|
|
struct loop_ivs *ivs = LOOP_IVS (loop);
|
10024 |
|
|
rtx reg = bl->biv->dest_reg;
|
10025 |
|
|
rtx p;
|
10026 |
|
|
|
10027 |
|
|
/* Scan all insns in the loop, stopping if we find one that uses the
|
10028 |
|
|
biv in a way that we cannot eliminate. */
|
10029 |
|
|
|
10030 |
|
|
for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
|
10031 |
|
|
{
|
10032 |
|
|
enum rtx_code code = GET_CODE (p);
|
10033 |
|
|
basic_block where_bb = 0;
|
10034 |
|
|
rtx where_insn = threshold >= insn_count ? 0 : p;
|
10035 |
|
|
rtx note;
|
10036 |
|
|
|
10037 |
|
|
/* If this is a libcall that sets a giv, skip ahead to its end. */
|
10038 |
|
|
if (INSN_P (p))
|
10039 |
|
|
{
|
10040 |
|
|
note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
|
10041 |
|
|
|
10042 |
|
|
if (note)
|
10043 |
|
|
{
|
10044 |
|
|
rtx last = XEXP (note, 0);
|
10045 |
|
|
rtx set = single_set (last);
|
10046 |
|
|
|
10047 |
|
|
if (set && REG_P (SET_DEST (set)))
|
10048 |
|
|
{
|
10049 |
|
|
unsigned int regno = REGNO (SET_DEST (set));
|
10050 |
|
|
|
10051 |
|
|
if (regno < ivs->n_regs
|
10052 |
|
|
&& REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
|
10053 |
|
|
&& REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
|
10054 |
|
|
p = last;
|
10055 |
|
|
}
|
10056 |
|
|
}
|
10057 |
|
|
}
|
10058 |
|
|
|
10059 |
|
|
/* Closely examine the insn if the biv is mentioned. */
|
10060 |
|
|
if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
|
10061 |
|
|
&& reg_mentioned_p (reg, PATTERN (p))
|
10062 |
|
|
&& ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
|
10063 |
|
|
eliminate_p, where_bb, where_insn))
|
10064 |
|
|
{
|
10065 |
|
|
if (loop_dump_stream)
|
10066 |
|
|
fprintf (loop_dump_stream,
|
10067 |
|
|
"Cannot eliminate biv %d: biv used in insn %d.\n",
|
10068 |
|
|
bl->regno, INSN_UID (p));
|
10069 |
|
|
break;
|
10070 |
|
|
}
|
10071 |
|
|
|
10072 |
|
|
/* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
|
10073 |
|
|
if (eliminate_p
|
10074 |
|
|
&& (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
|
10075 |
|
|
&& reg_mentioned_p (reg, XEXP (note, 0)))
|
10076 |
|
|
remove_note (p, note);
|
10077 |
|
|
}
|
10078 |
|
|
|
10079 |
|
|
if (p == loop->end)
|
10080 |
|
|
{
|
10081 |
|
|
if (loop_dump_stream)
|
10082 |
|
|
fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
|
10083 |
|
|
bl->regno, eliminate_p ? "was" : "can be");
|
10084 |
|
|
return 1;
|
10085 |
|
|
}
|
10086 |
|
|
|
10087 |
|
|
return 0;
|
10088 |
|
|
}
|
10089 |
|
|
|
10090 |
|
|
/* INSN and REFERENCE are instructions in the same insn chain.
|
10091 |
|
|
Return nonzero if INSN is first. */
|
10092 |
|
|
|
10093 |
|
|
static int
|
10094 |
|
|
loop_insn_first_p (rtx insn, rtx reference)
|
10095 |
|
|
{
|
10096 |
|
|
rtx p, q;
|
10097 |
|
|
|
10098 |
|
|
for (p = insn, q = reference;;)
|
10099 |
|
|
{
|
10100 |
|
|
/* Start with test for not first so that INSN == REFERENCE yields not
|
10101 |
|
|
first. */
|
10102 |
|
|
if (q == insn || ! p)
|
10103 |
|
|
return 0;
|
10104 |
|
|
if (p == reference || ! q)
|
10105 |
|
|
return 1;
|
10106 |
|
|
|
10107 |
|
|
/* Either of P or Q might be a NOTE. Notes have the same LUID as the
|
10108 |
|
|
previous insn, hence the <= comparison below does not work if
|
10109 |
|
|
P is a note. */
|
10110 |
|
|
if (INSN_UID (p) < max_uid_for_loop
|
10111 |
|
|
&& INSN_UID (q) < max_uid_for_loop
|
10112 |
|
|
&& !NOTE_P (p))
|
10113 |
|
|
return INSN_LUID (p) <= INSN_LUID (q);
|
10114 |
|
|
|
10115 |
|
|
if (INSN_UID (p) >= max_uid_for_loop
|
10116 |
|
|
|| NOTE_P (p))
|
10117 |
|
|
p = NEXT_INSN (p);
|
10118 |
|
|
if (INSN_UID (q) >= max_uid_for_loop)
|
10119 |
|
|
q = NEXT_INSN (q);
|
10120 |
|
|
}
|
10121 |
|
|
}
|
10122 |
|
|
|
10123 |
|
|
/* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
|
10124 |
|
|
the offset that we have to take into account due to auto-increment /
|
10125 |
|
|
div derivation is zero. */
|
10126 |
|
|
static int
|
10127 |
|
|
biv_elimination_giv_has_0_offset (struct induction *biv,
|
10128 |
|
|
struct induction *giv, rtx insn)
|
10129 |
|
|
{
|
10130 |
|
|
/* If the giv V had the auto-inc address optimization applied
|
10131 |
|
|
to it, and INSN occurs between the giv insn and the biv
|
10132 |
|
|
insn, then we'd have to adjust the value used here.
|
10133 |
|
|
This is rare, so we don't bother to make this possible. */
|
10134 |
|
|
if (giv->auto_inc_opt
|
10135 |
|
|
&& ((loop_insn_first_p (giv->insn, insn)
|
10136 |
|
|
&& loop_insn_first_p (insn, biv->insn))
|
10137 |
|
|
|| (loop_insn_first_p (biv->insn, insn)
|
10138 |
|
|
&& loop_insn_first_p (insn, giv->insn))))
|
10139 |
|
|
return 0;
|
10140 |
|
|
|
10141 |
|
|
return 1;
|
10142 |
|
|
}
|
10143 |
|
|
|
10144 |
|
|
/* If BL appears in X (part of the pattern of INSN), see if we can
|
10145 |
|
|
eliminate its use. If so, return 1. If not, return 0.
|
10146 |
|
|
|
10147 |
|
|
If BIV does not appear in X, return 1.
|
10148 |
|
|
|
10149 |
|
|
If ELIMINATE_P is nonzero, actually do the elimination.
|
10150 |
|
|
WHERE_INSN/WHERE_BB indicate where extra insns should be added.
|
10151 |
|
|
Depending on how many items have been moved out of the loop, it
|
10152 |
|
|
will either be before INSN (when WHERE_INSN is nonzero) or at the
|
10153 |
|
|
start of the loop (when WHERE_INSN is zero). */
|
10154 |
|
|
|
10155 |
|
|
static int
|
10156 |
|
|
maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
|
10157 |
|
|
struct iv_class *bl, int eliminate_p,
|
10158 |
|
|
basic_block where_bb, rtx where_insn)
|
10159 |
|
|
{
|
10160 |
|
|
enum rtx_code code = GET_CODE (x);
|
10161 |
|
|
rtx reg = bl->biv->dest_reg;
|
10162 |
|
|
enum machine_mode mode = GET_MODE (reg);
|
10163 |
|
|
struct induction *v;
|
10164 |
|
|
rtx arg, tem;
|
10165 |
|
|
#ifdef HAVE_cc0
|
10166 |
|
|
rtx new;
|
10167 |
|
|
#endif
|
10168 |
|
|
int arg_operand;
|
10169 |
|
|
const char *fmt;
|
10170 |
|
|
int i, j;
|
10171 |
|
|
|
10172 |
|
|
switch (code)
|
10173 |
|
|
{
|
10174 |
|
|
case REG:
|
10175 |
|
|
/* If we haven't already been able to do something with this BIV,
|
10176 |
|
|
we can't eliminate it. */
|
10177 |
|
|
if (x == reg)
|
10178 |
|
|
return 0;
|
10179 |
|
|
return 1;
|
10180 |
|
|
|
10181 |
|
|
case SET:
|
10182 |
|
|
/* If this sets the BIV, it is not a problem. */
|
10183 |
|
|
if (SET_DEST (x) == reg)
|
10184 |
|
|
return 1;
|
10185 |
|
|
|
10186 |
|
|
/* If this is an insn that defines a giv, it is also ok because
|
10187 |
|
|
it will go away when the giv is reduced. */
|
10188 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
10189 |
|
|
if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
|
10190 |
|
|
return 1;
|
10191 |
|
|
|
10192 |
|
|
#ifdef HAVE_cc0
|
10193 |
|
|
if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
|
10194 |
|
|
{
|
10195 |
|
|
/* Can replace with any giv that was reduced and
|
10196 |
|
|
that has (MULT_VAL != 0) and (ADD_VAL == 0).
|
10197 |
|
|
Require a constant for MULT_VAL, so we know it's nonzero.
|
10198 |
|
|
??? We disable this optimization to avoid potential
|
10199 |
|
|
overflows. */
|
10200 |
|
|
|
10201 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
10202 |
|
|
if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
|
10203 |
|
|
&& v->add_val == const0_rtx
|
10204 |
|
|
&& ! v->ignore && ! v->maybe_dead && v->always_computable
|
10205 |
|
|
&& v->mode == mode
|
10206 |
|
|
&& 0)
|
10207 |
|
|
{
|
10208 |
|
|
if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
|
10209 |
|
|
continue;
|
10210 |
|
|
|
10211 |
|
|
if (! eliminate_p)
|
10212 |
|
|
return 1;
|
10213 |
|
|
|
10214 |
|
|
/* If the giv has the opposite direction of change,
|
10215 |
|
|
then reverse the comparison. */
|
10216 |
|
|
if (INTVAL (v->mult_val) < 0)
|
10217 |
|
|
new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
|
10218 |
|
|
const0_rtx, v->new_reg);
|
10219 |
|
|
else
|
10220 |
|
|
new = v->new_reg;
|
10221 |
|
|
|
10222 |
|
|
/* We can probably test that giv's reduced reg. */
|
10223 |
|
|
if (validate_change (insn, &SET_SRC (x), new, 0))
|
10224 |
|
|
return 1;
|
10225 |
|
|
}
|
10226 |
|
|
|
10227 |
|
|
/* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
|
10228 |
|
|
replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
|
10229 |
|
|
Require a constant for MULT_VAL, so we know it's nonzero.
|
10230 |
|
|
??? Do this only if ADD_VAL is a pointer to avoid a potential
|
10231 |
|
|
overflow problem. */
|
10232 |
|
|
|
10233 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
10234 |
|
|
if (GET_CODE (v->mult_val) == CONST_INT
|
10235 |
|
|
&& v->mult_val != const0_rtx
|
10236 |
|
|
&& ! v->ignore && ! v->maybe_dead && v->always_computable
|
10237 |
|
|
&& v->mode == mode
|
10238 |
|
|
&& (GET_CODE (v->add_val) == SYMBOL_REF
|
10239 |
|
|
|| GET_CODE (v->add_val) == LABEL_REF
|
10240 |
|
|
|| GET_CODE (v->add_val) == CONST
|
10241 |
|
|
|| (REG_P (v->add_val)
|
10242 |
|
|
&& REG_POINTER (v->add_val))))
|
10243 |
|
|
{
|
10244 |
|
|
if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
|
10245 |
|
|
continue;
|
10246 |
|
|
|
10247 |
|
|
if (! eliminate_p)
|
10248 |
|
|
return 1;
|
10249 |
|
|
|
10250 |
|
|
/* If the giv has the opposite direction of change,
|
10251 |
|
|
then reverse the comparison. */
|
10252 |
|
|
if (INTVAL (v->mult_val) < 0)
|
10253 |
|
|
new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
|
10254 |
|
|
v->new_reg);
|
10255 |
|
|
else
|
10256 |
|
|
new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
|
10257 |
|
|
copy_rtx (v->add_val));
|
10258 |
|
|
|
10259 |
|
|
/* Replace biv with the giv's reduced register. */
|
10260 |
|
|
update_reg_last_use (v->add_val, insn);
|
10261 |
|
|
if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
|
10262 |
|
|
return 1;
|
10263 |
|
|
|
10264 |
|
|
/* Insn doesn't support that constant or invariant. Copy it
|
10265 |
|
|
into a register (it will be a loop invariant.) */
|
10266 |
|
|
tem = gen_reg_rtx (GET_MODE (v->new_reg));
|
10267 |
|
|
|
10268 |
|
|
loop_insn_emit_before (loop, 0, where_insn,
|
10269 |
|
|
gen_move_insn (tem,
|
10270 |
|
|
copy_rtx (v->add_val)));
|
10271 |
|
|
|
10272 |
|
|
/* Substitute the new register for its invariant value in
|
10273 |
|
|
the compare expression. */
|
10274 |
|
|
XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
|
10275 |
|
|
if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
|
10276 |
|
|
return 1;
|
10277 |
|
|
}
|
10278 |
|
|
}
|
10279 |
|
|
#endif
|
10280 |
|
|
break;
|
10281 |
|
|
|
10282 |
|
|
case COMPARE:
|
10283 |
|
|
case EQ: case NE:
|
10284 |
|
|
case GT: case GE: case GTU: case GEU:
|
10285 |
|
|
case LT: case LE: case LTU: case LEU:
|
10286 |
|
|
/* See if either argument is the biv. */
|
10287 |
|
|
if (XEXP (x, 0) == reg)
|
10288 |
|
|
arg = XEXP (x, 1), arg_operand = 1;
|
10289 |
|
|
else if (XEXP (x, 1) == reg)
|
10290 |
|
|
arg = XEXP (x, 0), arg_operand = 0;
|
10291 |
|
|
else
|
10292 |
|
|
break;
|
10293 |
|
|
|
10294 |
|
|
if (GET_CODE (arg) != CONST_INT)
|
10295 |
|
|
return 0;
|
10296 |
|
|
|
10297 |
|
|
/* Unless we're dealing with an equality comparison, if we can't
|
10298 |
|
|
determine that the original biv doesn't wrap, then we must not
|
10299 |
|
|
apply the transformation. */
|
10300 |
|
|
/* ??? Actually, what we must do is verify that the transformed
|
10301 |
|
|
giv doesn't wrap. But the general case of this transformation
|
10302 |
|
|
was disabled long ago due to wrapping problems, and there's no
|
10303 |
|
|
point reviving it this close to end-of-life for loop.c. The
|
10304 |
|
|
only case still enabled is known (via the check on add_val) to
|
10305 |
|
|
be pointer arithmetic, which in theory never overflows for
|
10306 |
|
|
valid programs. */
|
10307 |
|
|
/* Without lifetime analysis, we don't know how COMPARE will be
|
10308 |
|
|
used, so we must assume the worst. */
|
10309 |
|
|
if (code != EQ && code != NE
|
10310 |
|
|
&& biased_biv_may_wrap_p (loop, bl, INTVAL (arg)))
|
10311 |
|
|
return 0;
|
10312 |
|
|
|
10313 |
|
|
/* Try to replace with any giv that has constant positive mult_val
|
10314 |
|
|
and a pointer add_val. */
|
10315 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
10316 |
|
|
if (GET_CODE (v->mult_val) == CONST_INT
|
10317 |
|
|
&& INTVAL (v->mult_val) > 0
|
10318 |
|
|
&& (GET_CODE (v->add_val) == SYMBOL_REF
|
10319 |
|
|
|| GET_CODE (v->add_val) == LABEL_REF
|
10320 |
|
|
|| GET_CODE (v->add_val) == CONST
|
10321 |
|
|
|| (REG_P (v->add_val) && REG_POINTER (v->add_val)))
|
10322 |
|
|
&& ! v->ignore && ! v->maybe_dead && v->always_computable
|
10323 |
|
|
&& v->mode == mode)
|
10324 |
|
|
{
|
10325 |
|
|
if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
|
10326 |
|
|
continue;
|
10327 |
|
|
|
10328 |
|
|
if (! eliminate_p)
|
10329 |
|
|
return 1;
|
10330 |
|
|
|
10331 |
|
|
/* Replace biv with the giv's reduced reg. */
|
10332 |
|
|
validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
|
10333 |
|
|
|
10334 |
|
|
/* Load the value into a register. */
|
10335 |
|
|
tem = gen_reg_rtx (mode);
|
10336 |
|
|
loop_iv_add_mult_emit_before (loop, arg, v->mult_val, v->add_val,
|
10337 |
|
|
tem, where_bb, where_insn);
|
10338 |
|
|
|
10339 |
|
|
validate_change (insn, &XEXP (x, arg_operand), tem, 1);
|
10340 |
|
|
|
10341 |
|
|
if (apply_change_group ())
|
10342 |
|
|
return 1;
|
10343 |
|
|
}
|
10344 |
|
|
|
10345 |
|
|
/* If we get here, the biv can't be eliminated. */
|
10346 |
|
|
return 0;
|
10347 |
|
|
|
10348 |
|
|
case MEM:
|
10349 |
|
|
/* If this address is a DEST_ADDR giv, it doesn't matter if the
|
10350 |
|
|
biv is used in it, since it will be replaced. */
|
10351 |
|
|
for (v = bl->giv; v; v = v->next_iv)
|
10352 |
|
|
if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
|
10353 |
|
|
return 1;
|
10354 |
|
|
break;
|
10355 |
|
|
|
10356 |
|
|
default:
|
10357 |
|
|
break;
|
10358 |
|
|
}
|
10359 |
|
|
|
10360 |
|
|
/* See if any subexpression fails elimination. */
|
10361 |
|
|
fmt = GET_RTX_FORMAT (code);
|
10362 |
|
|
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
10363 |
|
|
{
|
10364 |
|
|
switch (fmt[i])
|
10365 |
|
|
{
|
10366 |
|
|
case 'e':
|
10367 |
|
|
if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
|
10368 |
|
|
eliminate_p, where_bb, where_insn))
|
10369 |
|
|
return 0;
|
10370 |
|
|
break;
|
10371 |
|
|
|
10372 |
|
|
case 'E':
|
10373 |
|
|
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
10374 |
|
|
if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
|
10375 |
|
|
eliminate_p, where_bb, where_insn))
|
10376 |
|
|
return 0;
|
10377 |
|
|
break;
|
10378 |
|
|
}
|
10379 |
|
|
}
|
10380 |
|
|
|
10381 |
|
|
return 1;
|
10382 |
|
|
}
|
10383 |
|
|
|
10384 |
|
|
/* Return nonzero if the last use of REG
|
10385 |
|
|
is in an insn following INSN in the same basic block. */
|
10386 |
|
|
|
10387 |
|
|
static int
|
10388 |
|
|
last_use_this_basic_block (rtx reg, rtx insn)
|
10389 |
|
|
{
|
10390 |
|
|
rtx n;
|
10391 |
|
|
for (n = insn;
|
10392 |
|
|
n && !LABEL_P (n) && !JUMP_P (n);
|
10393 |
|
|
n = NEXT_INSN (n))
|
10394 |
|
|
{
|
10395 |
|
|
if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
|
10396 |
|
|
return 1;
|
10397 |
|
|
}
|
10398 |
|
|
return 0;
|
10399 |
|
|
}
|
10400 |
|
|
|
10401 |
|
|
/* Called via `note_stores' to record the initial value of a biv. Here we
|
10402 |
|
|
just record the location of the set and process it later. */
|
10403 |
|
|
|
10404 |
|
|
static void
|
10405 |
|
|
record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
|
10406 |
|
|
{
|
10407 |
|
|
struct loop_ivs *ivs = (struct loop_ivs *) data;
|
10408 |
|
|
struct iv_class *bl;
|
10409 |
|
|
|
10410 |
|
|
if (!REG_P (dest)
|
10411 |
|
|
|| REGNO (dest) >= ivs->n_regs
|
10412 |
|
|
|| REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
|
10413 |
|
|
return;
|
10414 |
|
|
|
10415 |
|
|
bl = REG_IV_CLASS (ivs, REGNO (dest));
|
10416 |
|
|
|
10417 |
|
|
/* If this is the first set found, record it. */
|
10418 |
|
|
if (bl->init_insn == 0)
|
10419 |
|
|
{
|
10420 |
|
|
bl->init_insn = note_insn;
|
10421 |
|
|
bl->init_set = set;
|
10422 |
|
|
}
|
10423 |
|
|
}
|
10424 |
|
|
|
10425 |
|
|
/* If any of the registers in X are "old" and currently have a last use earlier
|
10426 |
|
|
than INSN, update them to have a last use of INSN. Their actual last use
|
10427 |
|
|
will be the previous insn but it will not have a valid uid_luid so we can't
|
10428 |
|
|
use it. X must be a source expression only. */
|
10429 |
|
|
|
10430 |
|
|
static void
|
10431 |
|
|
update_reg_last_use (rtx x, rtx insn)
|
10432 |
|
|
{
|
10433 |
|
|
/* Check for the case where INSN does not have a valid luid. In this case,
|
10434 |
|
|
there is no need to modify the regno_last_uid, as this can only happen
|
10435 |
|
|
when code is inserted after the loop_end to set a pseudo's final value,
|
10436 |
|
|
and hence this insn will never be the last use of x.
|
10437 |
|
|
???? This comment is not correct. See for example loop_givs_reduce.
|
10438 |
|
|
This may insert an insn before another new insn. */
|
10439 |
|
|
if (REG_P (x) && REGNO (x) < max_reg_before_loop
|
10440 |
|
|
&& INSN_UID (insn) < max_uid_for_loop
|
10441 |
|
|
&& REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
|
10442 |
|
|
{
|
10443 |
|
|
REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
|
10444 |
|
|
}
|
10445 |
|
|
else
|
10446 |
|
|
{
|
10447 |
|
|
int i, j;
|
10448 |
|
|
const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
|
10449 |
|
|
for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
|
10450 |
|
|
{
|
10451 |
|
|
if (fmt[i] == 'e')
|
10452 |
|
|
update_reg_last_use (XEXP (x, i), insn);
|
10453 |
|
|
else if (fmt[i] == 'E')
|
10454 |
|
|
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
10455 |
|
|
update_reg_last_use (XVECEXP (x, i, j), insn);
|
10456 |
|
|
}
|
10457 |
|
|
}
|
10458 |
|
|
}
|
10459 |
|
|
|
10460 |
|
|
/* Similar to rtlanal.c:get_condition, except that we also put an
|
10461 |
|
|
invariant last unless both operands are invariants. */
|
10462 |
|
|
|
10463 |
|
|
static rtx
|
10464 |
|
|
get_condition_for_loop (const struct loop *loop, rtx x)
|
10465 |
|
|
{
|
10466 |
|
|
rtx comparison = get_condition (x, (rtx*) 0, false, true);
|
10467 |
|
|
|
10468 |
|
|
if (comparison == 0
|
10469 |
|
|
|| ! loop_invariant_p (loop, XEXP (comparison, 0))
|
10470 |
|
|
|| loop_invariant_p (loop, XEXP (comparison, 1)))
|
10471 |
|
|
return comparison;
|
10472 |
|
|
|
10473 |
|
|
return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
|
10474 |
|
|
XEXP (comparison, 1), XEXP (comparison, 0));
|
10475 |
|
|
}
|
10476 |
|
|
|
10477 |
|
|
/* Scan the function and determine whether it has indirect (computed) jumps.
|
10478 |
|
|
|
10479 |
|
|
This is taken mostly from flow.c; similar code exists elsewhere
|
10480 |
|
|
in the compiler. It may be useful to put this into rtlanal.c. */
|
10481 |
|
|
static int
|
10482 |
|
|
indirect_jump_in_function_p (rtx start)
|
10483 |
|
|
{
|
10484 |
|
|
rtx insn;
|
10485 |
|
|
|
10486 |
|
|
for (insn = start; insn; insn = NEXT_INSN (insn))
|
10487 |
|
|
if (computed_jump_p (insn))
|
10488 |
|
|
return 1;
|
10489 |
|
|
|
10490 |
|
|
return 0;
|
10491 |
|
|
}
|
10492 |
|
|
|
10493 |
|
|
/* Add MEM to the LOOP_MEMS array, if appropriate. See the
|
10494 |
|
|
documentation for LOOP_MEMS for the definition of `appropriate'.
|
10495 |
|
|
This function is called from prescan_loop via for_each_rtx. */
|
10496 |
|
|
|
10497 |
|
|
static int
|
10498 |
|
|
insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
|
10499 |
|
|
{
|
10500 |
|
|
struct loop_info *loop_info = data;
|
10501 |
|
|
int i;
|
10502 |
|
|
rtx m = *mem;
|
10503 |
|
|
|
10504 |
|
|
if (m == NULL_RTX)
|
10505 |
|
|
return 0;
|
10506 |
|
|
|
10507 |
|
|
switch (GET_CODE (m))
|
10508 |
|
|
{
|
10509 |
|
|
case MEM:
|
10510 |
|
|
break;
|
10511 |
|
|
|
10512 |
|
|
case CLOBBER:
|
10513 |
|
|
/* We're not interested in MEMs that are only clobbered. */
|
10514 |
|
|
return -1;
|
10515 |
|
|
|
10516 |
|
|
case CONST_DOUBLE:
|
10517 |
|
|
/* We're not interested in the MEM associated with a
|
10518 |
|
|
CONST_DOUBLE, so there's no need to traverse into this. */
|
10519 |
|
|
return -1;
|
10520 |
|
|
|
10521 |
|
|
case EXPR_LIST:
|
10522 |
|
|
/* We're not interested in any MEMs that only appear in notes. */
|
10523 |
|
|
return -1;
|
10524 |
|
|
|
10525 |
|
|
default:
|
10526 |
|
|
/* This is not a MEM. */
|
10527 |
|
|
return 0;
|
10528 |
|
|
}
|
10529 |
|
|
|
10530 |
|
|
/* See if we've already seen this MEM. */
|
10531 |
|
|
for (i = 0; i < loop_info->mems_idx; ++i)
|
10532 |
|
|
if (rtx_equal_p (m, loop_info->mems[i].mem))
|
10533 |
|
|
{
|
10534 |
|
|
if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
|
10535 |
|
|
loop_info->mems[i].mem = m;
|
10536 |
|
|
if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
|
10537 |
|
|
/* The modes of the two memory accesses are different. If
|
10538 |
|
|
this happens, something tricky is going on, and we just
|
10539 |
|
|
don't optimize accesses to this MEM. */
|
10540 |
|
|
loop_info->mems[i].optimize = 0;
|
10541 |
|
|
|
10542 |
|
|
return 0;
|
10543 |
|
|
}
|
10544 |
|
|
|
10545 |
|
|
/* Resize the array, if necessary. */
|
10546 |
|
|
if (loop_info->mems_idx == loop_info->mems_allocated)
|
10547 |
|
|
{
|
10548 |
|
|
if (loop_info->mems_allocated != 0)
|
10549 |
|
|
loop_info->mems_allocated *= 2;
|
10550 |
|
|
else
|
10551 |
|
|
loop_info->mems_allocated = 32;
|
10552 |
|
|
|
10553 |
|
|
loop_info->mems = xrealloc (loop_info->mems,
|
10554 |
|
|
loop_info->mems_allocated * sizeof (loop_mem_info));
|
10555 |
|
|
}
|
10556 |
|
|
|
10557 |
|
|
/* Actually insert the MEM. */
|
10558 |
|
|
loop_info->mems[loop_info->mems_idx].mem = m;
|
10559 |
|
|
/* We can't hoist this MEM out of the loop if it's a BLKmode MEM
|
10560 |
|
|
because we can't put it in a register. We still store it in the
|
10561 |
|
|
table, though, so that if we see the same address later, but in a
|
10562 |
|
|
non-BLK mode, we'll not think we can optimize it at that point. */
|
10563 |
|
|
loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
|
10564 |
|
|
loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
|
10565 |
|
|
++loop_info->mems_idx;
|
10566 |
|
|
|
10567 |
|
|
return 0;
|
10568 |
|
|
}
|
10569 |
|
|
|
10570 |
|
|
|
10571 |
|
|
/* Allocate REGS->ARRAY or reallocate it if it is too small.
|
10572 |
|
|
|
10573 |
|
|
Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
|
10574 |
|
|
register that is modified by an insn between FROM and TO. If the
|
10575 |
|
|
value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
|
10576 |
|
|
more, stop incrementing it, to avoid overflow.
|
10577 |
|
|
|
10578 |
|
|
Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
|
10579 |
|
|
register I is used, if it is only used once. Otherwise, it is set
|
10580 |
|
|
to 0 (for no uses) or const0_rtx for more than one use. This
|
10581 |
|
|
parameter may be zero, in which case this processing is not done.
|
10582 |
|
|
|
10583 |
|
|
Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
|
10584 |
|
|
optimize register I. */
|
10585 |
|
|
|
10586 |
|
|
static void
|
10587 |
|
|
loop_regs_scan (const struct loop *loop, int extra_size)
|
10588 |
|
|
{
|
10589 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
10590 |
|
|
int old_nregs;
|
10591 |
|
|
/* last_set[n] is nonzero iff reg n has been set in the current
|
10592 |
|
|
basic block. In that case, it is the insn that last set reg n. */
|
10593 |
|
|
rtx *last_set;
|
10594 |
|
|
rtx insn;
|
10595 |
|
|
int i;
|
10596 |
|
|
|
10597 |
|
|
old_nregs = regs->num;
|
10598 |
|
|
regs->num = max_reg_num ();
|
10599 |
|
|
|
10600 |
|
|
/* Grow the regs array if not allocated or too small. */
|
10601 |
|
|
if (regs->num >= regs->size)
|
10602 |
|
|
{
|
10603 |
|
|
regs->size = regs->num + extra_size;
|
10604 |
|
|
|
10605 |
|
|
regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
|
10606 |
|
|
|
10607 |
|
|
/* Zero the new elements. */
|
10608 |
|
|
memset (regs->array + old_nregs, 0,
|
10609 |
|
|
(regs->size - old_nregs) * sizeof (*regs->array));
|
10610 |
|
|
}
|
10611 |
|
|
|
10612 |
|
|
/* Clear previously scanned fields but do not clear n_times_set. */
|
10613 |
|
|
for (i = 0; i < old_nregs; i++)
|
10614 |
|
|
{
|
10615 |
|
|
regs->array[i].set_in_loop = 0;
|
10616 |
|
|
regs->array[i].may_not_optimize = 0;
|
10617 |
|
|
regs->array[i].single_usage = NULL_RTX;
|
10618 |
|
|
}
|
10619 |
|
|
|
10620 |
|
|
last_set = xcalloc (regs->num, sizeof (rtx));
|
10621 |
|
|
|
10622 |
|
|
/* Scan the loop, recording register usage. */
|
10623 |
|
|
for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
|
10624 |
|
|
insn = NEXT_INSN (insn))
|
10625 |
|
|
{
|
10626 |
|
|
if (INSN_P (insn))
|
10627 |
|
|
{
|
10628 |
|
|
/* Record registers that have exactly one use. */
|
10629 |
|
|
find_single_use_in_loop (regs, insn, PATTERN (insn));
|
10630 |
|
|
|
10631 |
|
|
/* Include uses in REG_EQUAL notes. */
|
10632 |
|
|
if (REG_NOTES (insn))
|
10633 |
|
|
find_single_use_in_loop (regs, insn, REG_NOTES (insn));
|
10634 |
|
|
|
10635 |
|
|
if (GET_CODE (PATTERN (insn)) == SET
|
10636 |
|
|
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
10637 |
|
|
count_one_set (regs, insn, PATTERN (insn), last_set);
|
10638 |
|
|
else if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
10639 |
|
|
{
|
10640 |
|
|
int i;
|
10641 |
|
|
for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
|
10642 |
|
|
count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
|
10643 |
|
|
last_set);
|
10644 |
|
|
}
|
10645 |
|
|
}
|
10646 |
|
|
|
10647 |
|
|
if (LABEL_P (insn) || JUMP_P (insn))
|
10648 |
|
|
memset (last_set, 0, regs->num * sizeof (rtx));
|
10649 |
|
|
|
10650 |
|
|
/* Invalidate all registers used for function argument passing.
|
10651 |
|
|
We check rtx_varies_p for the same reason as below, to allow
|
10652 |
|
|
optimizing PIC calculations. */
|
10653 |
|
|
if (CALL_P (insn))
|
10654 |
|
|
{
|
10655 |
|
|
rtx link;
|
10656 |
|
|
for (link = CALL_INSN_FUNCTION_USAGE (insn);
|
10657 |
|
|
link;
|
10658 |
|
|
link = XEXP (link, 1))
|
10659 |
|
|
{
|
10660 |
|
|
rtx op, reg;
|
10661 |
|
|
|
10662 |
|
|
if (GET_CODE (op = XEXP (link, 0)) == USE
|
10663 |
|
|
&& REG_P (reg = XEXP (op, 0))
|
10664 |
|
|
&& rtx_varies_p (reg, 1))
|
10665 |
|
|
regs->array[REGNO (reg)].may_not_optimize = 1;
|
10666 |
|
|
}
|
10667 |
|
|
}
|
10668 |
|
|
}
|
10669 |
|
|
|
10670 |
|
|
/* Invalidate all hard registers clobbered by calls. With one exception:
|
10671 |
|
|
a call-clobbered PIC register is still function-invariant for our
|
10672 |
|
|
purposes, since we can hoist any PIC calculations out of the loop.
|
10673 |
|
|
Thus the call to rtx_varies_p. */
|
10674 |
|
|
if (LOOP_INFO (loop)->has_call)
|
10675 |
|
|
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
10676 |
|
|
if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
|
10677 |
|
|
&& rtx_varies_p (regno_reg_rtx[i], 1))
|
10678 |
|
|
{
|
10679 |
|
|
regs->array[i].may_not_optimize = 1;
|
10680 |
|
|
regs->array[i].set_in_loop = 1;
|
10681 |
|
|
}
|
10682 |
|
|
|
10683 |
|
|
#ifdef AVOID_CCMODE_COPIES
|
10684 |
|
|
/* Don't try to move insns which set CC registers if we should not
|
10685 |
|
|
create CCmode register copies. */
|
10686 |
|
|
for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
|
10687 |
|
|
if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
|
10688 |
|
|
regs->array[i].may_not_optimize = 1;
|
10689 |
|
|
#endif
|
10690 |
|
|
|
10691 |
|
|
/* Set regs->array[I].n_times_set for the new registers. */
|
10692 |
|
|
for (i = old_nregs; i < regs->num; i++)
|
10693 |
|
|
regs->array[i].n_times_set = regs->array[i].set_in_loop;
|
10694 |
|
|
|
10695 |
|
|
free (last_set);
|
10696 |
|
|
}
|
10697 |
|
|
|
10698 |
|
|
/* Returns the number of real INSNs in the LOOP. */
|
10699 |
|
|
|
10700 |
|
|
static int
|
10701 |
|
|
count_insns_in_loop (const struct loop *loop)
|
10702 |
|
|
{
|
10703 |
|
|
int count = 0;
|
10704 |
|
|
rtx insn;
|
10705 |
|
|
|
10706 |
|
|
for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
|
10707 |
|
|
insn = NEXT_INSN (insn))
|
10708 |
|
|
if (INSN_P (insn))
|
10709 |
|
|
++count;
|
10710 |
|
|
|
10711 |
|
|
return count;
|
10712 |
|
|
}
|
10713 |
|
|
|
10714 |
|
|
/* Move MEMs into registers for the duration of the loop. */
|
10715 |
|
|
|
10716 |
|
|
static void
|
10717 |
|
|
load_mems (const struct loop *loop)
|
10718 |
|
|
{
|
10719 |
|
|
struct loop_info *loop_info = LOOP_INFO (loop);
|
10720 |
|
|
struct loop_regs *regs = LOOP_REGS (loop);
|
10721 |
|
|
int maybe_never = 0;
|
10722 |
|
|
int i;
|
10723 |
|
|
rtx p, prev_ebb_head;
|
10724 |
|
|
rtx label = NULL_RTX;
|
10725 |
|
|
rtx end_label;
|
10726 |
|
|
/* Nonzero if the next instruction may never be executed. */
|
10727 |
|
|
int next_maybe_never = 0;
|
10728 |
|
|
unsigned int last_max_reg = max_reg_num ();
|
10729 |
|
|
|
10730 |
|
|
if (loop_info->mems_idx == 0)
|
10731 |
|
|
return;
|
10732 |
|
|
|
10733 |
|
|
/* We cannot use next_label here because it skips over normal insns. */
|
10734 |
|
|
end_label = next_nonnote_insn (loop->end);
|
10735 |
|
|
if (end_label && !LABEL_P (end_label))
|
10736 |
|
|
end_label = NULL_RTX;
|
10737 |
|
|
|
10738 |
|
|
/* Check to see if it's possible that some instructions in the loop are
|
10739 |
|
|
never executed. Also check if there is a goto out of the loop other
|
10740 |
|
|
than right after the end of the loop. */
|
10741 |
|
|
for (p = next_insn_in_loop (loop, loop->scan_start);
|
10742 |
|
|
p != NULL_RTX;
|
10743 |
|
|
p = next_insn_in_loop (loop, p))
|
10744 |
|
|
{
|
10745 |
|
|
if (LABEL_P (p))
|
10746 |
|
|
maybe_never = 1;
|
10747 |
|
|
else if (JUMP_P (p)
|
10748 |
|
|
/* If we enter the loop in the middle, and scan
|
10749 |
|
|
around to the beginning, don't set maybe_never
|
10750 |
|
|
for that. This must be an unconditional jump,
|
10751 |
|
|
otherwise the code at the top of the loop might
|
10752 |
|
|
never be executed. Unconditional jumps are
|
10753 |
|
|
followed a by barrier then loop end. */
|
10754 |
|
|
&& ! (JUMP_P (p)
|
10755 |
|
|
&& JUMP_LABEL (p) == loop->top
|
10756 |
|
|
&& NEXT_INSN (NEXT_INSN (p)) == loop->end
|
10757 |
|
|
&& any_uncondjump_p (p)))
|
10758 |
|
|
{
|
10759 |
|
|
/* If this is a jump outside of the loop but not right
|
10760 |
|
|
after the end of the loop, we would have to emit new fixup
|
10761 |
|
|
sequences for each such label. */
|
10762 |
|
|
if (/* If we can't tell where control might go when this
|
10763 |
|
|
JUMP_INSN is executed, we must be conservative. */
|
10764 |
|
|
!JUMP_LABEL (p)
|
10765 |
|
|
|| (JUMP_LABEL (p) != end_label
|
10766 |
|
|
&& (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
|
10767 |
|
|
|| INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
|
10768 |
|
|
|| INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
|
10769 |
|
|
return;
|
10770 |
|
|
|
10771 |
|
|
if (!any_condjump_p (p))
|
10772 |
|
|
/* Something complicated. */
|
10773 |
|
|
maybe_never = 1;
|
10774 |
|
|
else
|
10775 |
|
|
/* If there are any more instructions in the loop, they
|
10776 |
|
|
might not be reached. */
|
10777 |
|
|
next_maybe_never = 1;
|
10778 |
|
|
}
|
10779 |
|
|
else if (next_maybe_never)
|
10780 |
|
|
maybe_never = 1;
|
10781 |
|
|
}
|
10782 |
|
|
|
10783 |
|
|
/* Find start of the extended basic block that enters the loop. */
|
10784 |
|
|
for (p = loop->start;
|
10785 |
|
|
PREV_INSN (p) && !LABEL_P (p);
|
10786 |
|
|
p = PREV_INSN (p))
|
10787 |
|
|
;
|
10788 |
|
|
prev_ebb_head = p;
|
10789 |
|
|
|
10790 |
|
|
cselib_init (true);
|
10791 |
|
|
|
10792 |
|
|
/* Build table of mems that get set to constant values before the
|
10793 |
|
|
loop. */
|
10794 |
|
|
for (; p != loop->start; p = NEXT_INSN (p))
|
10795 |
|
|
cselib_process_insn (p);
|
10796 |
|
|
|
10797 |
|
|
/* Actually move the MEMs. */
|
10798 |
|
|
for (i = 0; i < loop_info->mems_idx; ++i)
|
10799 |
|
|
{
|
10800 |
|
|
regset_head load_copies;
|
10801 |
|
|
regset_head store_copies;
|
10802 |
|
|
int written = 0;
|
10803 |
|
|
rtx reg;
|
10804 |
|
|
rtx mem = loop_info->mems[i].mem;
|
10805 |
|
|
rtx mem_list_entry;
|
10806 |
|
|
|
10807 |
|
|
if (MEM_VOLATILE_P (mem)
|
10808 |
|
|
|| loop_invariant_p (loop, XEXP (mem, 0)) != 1)
|
10809 |
|
|
/* There's no telling whether or not MEM is modified. */
|
10810 |
|
|
loop_info->mems[i].optimize = 0;
|
10811 |
|
|
|
10812 |
|
|
/* Go through the MEMs written to in the loop to see if this
|
10813 |
|
|
one is aliased by one of them. */
|
10814 |
|
|
mem_list_entry = loop_info->store_mems;
|
10815 |
|
|
while (mem_list_entry)
|
10816 |
|
|
{
|
10817 |
|
|
if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
|
10818 |
|
|
written = 1;
|
10819 |
|
|
else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
|
10820 |
|
|
mem, rtx_varies_p))
|
10821 |
|
|
{
|
10822 |
|
|
/* MEM is indeed aliased by this store. */
|
10823 |
|
|
loop_info->mems[i].optimize = 0;
|
10824 |
|
|
break;
|
10825 |
|
|
}
|
10826 |
|
|
mem_list_entry = XEXP (mem_list_entry, 1);
|
10827 |
|
|
}
|
10828 |
|
|
|
10829 |
|
|
if (flag_float_store && written
|
10830 |
|
|
&& GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
|
10831 |
|
|
loop_info->mems[i].optimize = 0;
|
10832 |
|
|
|
10833 |
|
|
/* If this MEM is written to, we must be sure that there
|
10834 |
|
|
are no reads from another MEM that aliases this one. */
|
10835 |
|
|
if (loop_info->mems[i].optimize && written)
|
10836 |
|
|
{
|
10837 |
|
|
int j;
|
10838 |
|
|
|
10839 |
|
|
for (j = 0; j < loop_info->mems_idx; ++j)
|
10840 |
|
|
{
|
10841 |
|
|
if (j == i)
|
10842 |
|
|
continue;
|
10843 |
|
|
else if (true_dependence (mem,
|
10844 |
|
|
VOIDmode,
|
10845 |
|
|
loop_info->mems[j].mem,
|
10846 |
|
|
rtx_varies_p))
|
10847 |
|
|
{
|
10848 |
|
|
/* It's not safe to hoist loop_info->mems[i] out of
|
10849 |
|
|
the loop because writes to it might not be
|
10850 |
|
|
seen by reads from loop_info->mems[j]. */
|
10851 |
|
|
loop_info->mems[i].optimize = 0;
|
10852 |
|
|
break;
|
10853 |
|
|
}
|
10854 |
|
|
}
|
10855 |
|
|
}
|
10856 |
|
|
|
10857 |
|
|
if (maybe_never && may_trap_p (mem))
|
10858 |
|
|
/* We can't access the MEM outside the loop; it might
|
10859 |
|
|
cause a trap that wouldn't have happened otherwise. */
|
10860 |
|
|
loop_info->mems[i].optimize = 0;
|
10861 |
|
|
|
10862 |
|
|
if (!loop_info->mems[i].optimize)
|
10863 |
|
|
/* We thought we were going to lift this MEM out of the
|
10864 |
|
|
loop, but later discovered that we could not. */
|
10865 |
|
|
continue;
|
10866 |
|
|
|
10867 |
|
|
INIT_REG_SET (&load_copies);
|
10868 |
|
|
INIT_REG_SET (&store_copies);
|
10869 |
|
|
|
10870 |
|
|
/* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
|
10871 |
|
|
order to keep scan_loop from moving stores to this MEM
|
10872 |
|
|
out of the loop just because this REG is neither a
|
10873 |
|
|
user-variable nor used in the loop test. */
|
10874 |
|
|
reg = gen_reg_rtx (GET_MODE (mem));
|
10875 |
|
|
REG_USERVAR_P (reg) = 1;
|
10876 |
|
|
loop_info->mems[i].reg = reg;
|
10877 |
|
|
|
10878 |
|
|
/* Now, replace all references to the MEM with the
|
10879 |
|
|
corresponding pseudos. */
|
10880 |
|
|
maybe_never = 0;
|
10881 |
|
|
for (p = next_insn_in_loop (loop, loop->scan_start);
|
10882 |
|
|
p != NULL_RTX;
|
10883 |
|
|
p = next_insn_in_loop (loop, p))
|
10884 |
|
|
{
|
10885 |
|
|
if (INSN_P (p))
|
10886 |
|
|
{
|
10887 |
|
|
rtx set;
|
10888 |
|
|
|
10889 |
|
|
set = single_set (p);
|
10890 |
|
|
|
10891 |
|
|
/* See if this copies the mem into a register that isn't
|
10892 |
|
|
modified afterwards. We'll try to do copy propagation
|
10893 |
|
|
a little further on. */
|
10894 |
|
|
if (set
|
10895 |
|
|
/* @@@ This test is _way_ too conservative. */
|
10896 |
|
|
&& ! maybe_never
|
10897 |
|
|
&& REG_P (SET_DEST (set))
|
10898 |
|
|
&& REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
|
10899 |
|
|
&& REGNO (SET_DEST (set)) < last_max_reg
|
10900 |
|
|
&& regs->array[REGNO (SET_DEST (set))].n_times_set == 1
|
10901 |
|
|
&& rtx_equal_p (SET_SRC (set), mem))
|
10902 |
|
|
SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
|
10903 |
|
|
|
10904 |
|
|
/* See if this copies the mem from a register that isn't
|
10905 |
|
|
modified afterwards. We'll try to remove the
|
10906 |
|
|
redundant copy later on by doing a little register
|
10907 |
|
|
renaming and copy propagation. This will help
|
10908 |
|
|
to untangle things for the BIV detection code. */
|
10909 |
|
|
if (set
|
10910 |
|
|
&& ! maybe_never
|
10911 |
|
|
&& REG_P (SET_SRC (set))
|
10912 |
|
|
&& REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
|
10913 |
|
|
&& REGNO (SET_SRC (set)) < last_max_reg
|
10914 |
|
|
&& regs->array[REGNO (SET_SRC (set))].n_times_set == 1
|
10915 |
|
|
&& rtx_equal_p (SET_DEST (set), mem))
|
10916 |
|
|
SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
|
10917 |
|
|
|
10918 |
|
|
/* If this is a call which uses / clobbers this memory
|
10919 |
|
|
location, we must not change the interface here. */
|
10920 |
|
|
if (CALL_P (p)
|
10921 |
|
|
&& reg_mentioned_p (loop_info->mems[i].mem,
|
10922 |
|
|
CALL_INSN_FUNCTION_USAGE (p)))
|
10923 |
|
|
{
|
10924 |
|
|
cancel_changes (0);
|
10925 |
|
|
loop_info->mems[i].optimize = 0;
|
10926 |
|
|
break;
|
10927 |
|
|
}
|
10928 |
|
|
else
|
10929 |
|
|
/* Replace the memory reference with the shadow register. */
|
10930 |
|
|
replace_loop_mems (p, loop_info->mems[i].mem,
|
10931 |
|
|
loop_info->mems[i].reg, written);
|
10932 |
|
|
}
|
10933 |
|
|
|
10934 |
|
|
if (LABEL_P (p)
|
10935 |
|
|
|| JUMP_P (p))
|
10936 |
|
|
maybe_never = 1;
|
10937 |
|
|
}
|
10938 |
|
|
|
10939 |
|
|
if (! loop_info->mems[i].optimize)
|
10940 |
|
|
; /* We found we couldn't do the replacement, so do nothing. */
|
10941 |
|
|
else if (! apply_change_group ())
|
10942 |
|
|
/* We couldn't replace all occurrences of the MEM. */
|
10943 |
|
|
loop_info->mems[i].optimize = 0;
|
10944 |
|
|
else
|
10945 |
|
|
{
|
10946 |
|
|
/* Load the memory immediately before LOOP->START, which is
|
10947 |
|
|
the NOTE_LOOP_BEG. */
|
10948 |
|
|
cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
|
10949 |
|
|
rtx set;
|
10950 |
|
|
rtx best = mem;
|
10951 |
|
|
unsigned j;
|
10952 |
|
|
struct elt_loc_list *const_equiv = 0;
|
10953 |
|
|
reg_set_iterator rsi;
|
10954 |
|
|
|
10955 |
|
|
if (e)
|
10956 |
|
|
{
|
10957 |
|
|
struct elt_loc_list *equiv;
|
10958 |
|
|
struct elt_loc_list *best_equiv = 0;
|
10959 |
|
|
for (equiv = e->locs; equiv; equiv = equiv->next)
|
10960 |
|
|
{
|
10961 |
|
|
if (CONSTANT_P (equiv->loc))
|
10962 |
|
|
const_equiv = equiv;
|
10963 |
|
|
else if (REG_P (equiv->loc)
|
10964 |
|
|
/* Extending hard register lifetimes causes crash
|
10965 |
|
|
on SRC targets. Doing so on non-SRC is
|
10966 |
|
|
probably also not good idea, since we most
|
10967 |
|
|
probably have pseudoregister equivalence as
|
10968 |
|
|
well. */
|
10969 |
|
|
&& REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
|
10970 |
|
|
best_equiv = equiv;
|
10971 |
|
|
}
|
10972 |
|
|
/* Use the constant equivalence if that is cheap enough. */
|
10973 |
|
|
if (! best_equiv)
|
10974 |
|
|
best_equiv = const_equiv;
|
10975 |
|
|
else if (const_equiv
|
10976 |
|
|
&& (rtx_cost (const_equiv->loc, SET)
|
10977 |
|
|
<= rtx_cost (best_equiv->loc, SET)))
|
10978 |
|
|
{
|
10979 |
|
|
best_equiv = const_equiv;
|
10980 |
|
|
const_equiv = 0;
|
10981 |
|
|
}
|
10982 |
|
|
|
10983 |
|
|
/* If best_equiv is nonzero, we know that MEM is set to a
|
10984 |
|
|
constant or register before the loop. We will use this
|
10985 |
|
|
knowledge to initialize the shadow register with that
|
10986 |
|
|
constant or reg rather than by loading from MEM. */
|
10987 |
|
|
if (best_equiv)
|
10988 |
|
|
best = copy_rtx (best_equiv->loc);
|
10989 |
|
|
}
|
10990 |
|
|
|
10991 |
|
|
set = gen_move_insn (reg, best);
|
10992 |
|
|
set = loop_insn_hoist (loop, set);
|
10993 |
|
|
if (REG_P (best))
|
10994 |
|
|
{
|
10995 |
|
|
for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
|
10996 |
|
|
if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
|
10997 |
|
|
{
|
10998 |
|
|
REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
|
10999 |
|
|
break;
|
11000 |
|
|
}
|
11001 |
|
|
}
|
11002 |
|
|
|
11003 |
|
|
if (const_equiv)
|
11004 |
|
|
set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
|
11005 |
|
|
|
11006 |
|
|
if (written)
|
11007 |
|
|
{
|
11008 |
|
|
if (label == NULL_RTX)
|
11009 |
|
|
{
|
11010 |
|
|
label = gen_label_rtx ();
|
11011 |
|
|
emit_label_after (label, loop->end);
|
11012 |
|
|
}
|
11013 |
|
|
|
11014 |
|
|
/* Store the memory immediately after END, which is
|
11015 |
|
|
the NOTE_LOOP_END. */
|
11016 |
|
|
set = gen_move_insn (copy_rtx (mem), reg);
|
11017 |
|
|
loop_insn_emit_after (loop, 0, label, set);
|
11018 |
|
|
}
|
11019 |
|
|
|
11020 |
|
|
if (loop_dump_stream)
|
11021 |
|
|
{
|
11022 |
|
|
fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
|
11023 |
|
|
REGNO (reg), (written ? "r/w" : "r/o"));
|
11024 |
|
|
print_rtl (loop_dump_stream, mem);
|
11025 |
|
|
fputc ('\n', loop_dump_stream);
|
11026 |
|
|
}
|
11027 |
|
|
|
11028 |
|
|
/* Attempt a bit of copy propagation. This helps untangle the
|
11029 |
|
|
data flow, and enables {basic,general}_induction_var to find
|
11030 |
|
|
more bivs/givs. */
|
11031 |
|
|
EXECUTE_IF_SET_IN_REG_SET
|
11032 |
|
|
(&load_copies, FIRST_PSEUDO_REGISTER, j, rsi)
|
11033 |
|
|
{
|
11034 |
|
|
try_copy_prop (loop, reg, j);
|
11035 |
|
|
}
|
11036 |
|
|
CLEAR_REG_SET (&load_copies);
|
11037 |
|
|
|
11038 |
|
|
EXECUTE_IF_SET_IN_REG_SET
|
11039 |
|
|
(&store_copies, FIRST_PSEUDO_REGISTER, j, rsi)
|
11040 |
|
|
{
|
11041 |
|
|
try_swap_copy_prop (loop, reg, j);
|
11042 |
|
|
}
|
11043 |
|
|
CLEAR_REG_SET (&store_copies);
|
11044 |
|
|
}
|
11045 |
|
|
}
|
11046 |
|
|
|
11047 |
|
|
/* Now, we need to replace all references to the previous exit
|
11048 |
|
|
label with the new one. */
|
11049 |
|
|
if (label != NULL_RTX && end_label != NULL_RTX)
|
11050 |
|
|
for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
|
11051 |
|
|
if (JUMP_P (p) && JUMP_LABEL (p) == end_label)
|
11052 |
|
|
redirect_jump (p, label, false);
|
11053 |
|
|
|
11054 |
|
|
cselib_finish ();
|
11055 |
|
|
}
|
11056 |
|
|
|
11057 |
|
|
/* For communication between note_reg_stored and its caller. */
|
11058 |
|
|
struct note_reg_stored_arg
|
11059 |
|
|
{
|
11060 |
|
|
int set_seen;
|
11061 |
|
|
rtx reg;
|
11062 |
|
|
};
|
11063 |
|
|
|
11064 |
|
|
/* Called via note_stores, record in SET_SEEN whether X, which is written,
|
11065 |
|
|
is equal to ARG. */
|
11066 |
|
|
static void
|
11067 |
|
|
note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
|
11068 |
|
|
{
|
11069 |
|
|
struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
|
11070 |
|
|
if (t->reg == x)
|
11071 |
|
|
t->set_seen = 1;
|
11072 |
|
|
}
|
11073 |
|
|
|
11074 |
|
|
/* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
|
11075 |
|
|
There must be exactly one insn that sets this pseudo; it will be
|
11076 |
|
|
deleted if all replacements succeed and we can prove that the register
|
11077 |
|
|
is not used after the loop. */
|
11078 |
|
|
|
11079 |
|
|
static void
|
11080 |
|
|
try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
|
11081 |
|
|
{
|
11082 |
|
|
/* This is the reg that we are copying from. */
|
11083 |
|
|
rtx reg_rtx = regno_reg_rtx[regno];
|
11084 |
|
|
rtx init_insn = 0;
|
11085 |
|
|
rtx insn;
|
11086 |
|
|
/* These help keep track of whether we replaced all uses of the reg. */
|
11087 |
|
|
int replaced_last = 0;
|
11088 |
|
|
int store_is_first = 0;
|
11089 |
|
|
|
11090 |
|
|
for (insn = next_insn_in_loop (loop, loop->scan_start);
|
11091 |
|
|
insn != NULL_RTX;
|
11092 |
|
|
insn = next_insn_in_loop (loop, insn))
|
11093 |
|
|
{
|
11094 |
|
|
rtx set;
|
11095 |
|
|
|
11096 |
|
|
/* Only substitute within one extended basic block from the initializing
|
11097 |
|
|
insn. */
|
11098 |
|
|
if (LABEL_P (insn) && init_insn)
|
11099 |
|
|
break;
|
11100 |
|
|
|
11101 |
|
|
if (! INSN_P (insn))
|
11102 |
|
|
continue;
|
11103 |
|
|
|
11104 |
|
|
/* Is this the initializing insn? */
|
11105 |
|
|
set = single_set (insn);
|
11106 |
|
|
if (set
|
11107 |
|
|
&& REG_P (SET_DEST (set))
|
11108 |
|
|
&& REGNO (SET_DEST (set)) == regno)
|
11109 |
|
|
{
|
11110 |
|
|
gcc_assert (!init_insn);
|
11111 |
|
|
|
11112 |
|
|
init_insn = insn;
|
11113 |
|
|
if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
|
11114 |
|
|
store_is_first = 1;
|
11115 |
|
|
}
|
11116 |
|
|
|
11117 |
|
|
/* Only substitute after seeing the initializing insn. */
|
11118 |
|
|
if (init_insn && insn != init_insn)
|
11119 |
|
|
{
|
11120 |
|
|
struct note_reg_stored_arg arg;
|
11121 |
|
|
|
11122 |
|
|
replace_loop_regs (insn, reg_rtx, replacement);
|
11123 |
|
|
if (REGNO_LAST_UID (regno) == INSN_UID (insn))
|
11124 |
|
|
replaced_last = 1;
|
11125 |
|
|
|
11126 |
|
|
/* Stop replacing when REPLACEMENT is modified. */
|
11127 |
|
|
arg.reg = replacement;
|
11128 |
|
|
arg.set_seen = 0;
|
11129 |
|
|
note_stores (PATTERN (insn), note_reg_stored, &arg);
|
11130 |
|
|
if (arg.set_seen)
|
11131 |
|
|
{
|
11132 |
|
|
rtx note = find_reg_note (insn, REG_EQUAL, NULL);
|
11133 |
|
|
|
11134 |
|
|
/* It is possible that we've turned previously valid REG_EQUAL to
|
11135 |
|
|
invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
|
11136 |
|
|
REPLACEMENT is modified, we get different meaning. */
|
11137 |
|
|
if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
|
11138 |
|
|
remove_note (insn, note);
|
11139 |
|
|
break;
|
11140 |
|
|
}
|
11141 |
|
|
}
|
11142 |
|
|
}
|
11143 |
|
|
gcc_assert (init_insn);
|
11144 |
|
|
if (apply_change_group ())
|
11145 |
|
|
{
|
11146 |
|
|
if (loop_dump_stream)
|
11147 |
|
|
fprintf (loop_dump_stream, " Replaced reg %d", regno);
|
11148 |
|
|
if (store_is_first && replaced_last)
|
11149 |
|
|
{
|
11150 |
|
|
rtx first;
|
11151 |
|
|
rtx retval_note;
|
11152 |
|
|
|
11153 |
|
|
/* Assume we're just deleting INIT_INSN. */
|
11154 |
|
|
first = init_insn;
|
11155 |
|
|
/* Look for REG_RETVAL note. If we're deleting the end of
|
11156 |
|
|
the libcall sequence, the whole sequence can go. */
|
11157 |
|
|
retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
|
11158 |
|
|
/* If we found a REG_RETVAL note, find the first instruction
|
11159 |
|
|
in the sequence. */
|
11160 |
|
|
if (retval_note)
|
11161 |
|
|
first = XEXP (retval_note, 0);
|
11162 |
|
|
|
11163 |
|
|
/* Delete the instructions. */
|
11164 |
|
|
loop_delete_insns (first, init_insn);
|
11165 |
|
|
}
|
11166 |
|
|
if (loop_dump_stream)
|
11167 |
|
|
fprintf (loop_dump_stream, ".\n");
|
11168 |
|
|
}
|
11169 |
|
|
}
|
11170 |
|
|
|
11171 |
|
|
/* Replace all the instructions from FIRST up to and including LAST
|
11172 |
|
|
with NOTE_INSN_DELETED notes. */
|
11173 |
|
|
|
11174 |
|
|
static void
|
11175 |
|
|
loop_delete_insns (rtx first, rtx last)
|
11176 |
|
|
{
|
11177 |
|
|
while (1)
|
11178 |
|
|
{
|
11179 |
|
|
if (loop_dump_stream)
|
11180 |
|
|
fprintf (loop_dump_stream, ", deleting init_insn (%d)",
|
11181 |
|
|
INSN_UID (first));
|
11182 |
|
|
delete_insn (first);
|
11183 |
|
|
|
11184 |
|
|
/* If this was the LAST instructions we're supposed to delete,
|
11185 |
|
|
we're done. */
|
11186 |
|
|
if (first == last)
|
11187 |
|
|
break;
|
11188 |
|
|
|
11189 |
|
|
first = NEXT_INSN (first);
|
11190 |
|
|
}
|
11191 |
|
|
}
|
11192 |
|
|
|
11193 |
|
|
/* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
|
11194 |
|
|
loop LOOP if the order of the sets of these registers can be
|
11195 |
|
|
swapped. There must be exactly one insn within the loop that sets
|
11196 |
|
|
this pseudo followed immediately by a move insn that sets
|
11197 |
|
|
REPLACEMENT with REGNO. */
|
11198 |
|
|
static void
|
11199 |
|
|
try_swap_copy_prop (const struct loop *loop, rtx replacement,
|
11200 |
|
|
unsigned int regno)
|
11201 |
|
|
{
|
11202 |
|
|
rtx insn;
|
11203 |
|
|
rtx set = NULL_RTX;
|
11204 |
|
|
unsigned int new_regno;
|
11205 |
|
|
|
11206 |
|
|
new_regno = REGNO (replacement);
|
11207 |
|
|
|
11208 |
|
|
for (insn = next_insn_in_loop (loop, loop->scan_start);
|
11209 |
|
|
insn != NULL_RTX;
|
11210 |
|
|
insn = next_insn_in_loop (loop, insn))
|
11211 |
|
|
{
|
11212 |
|
|
/* Search for the insn that copies REGNO to NEW_REGNO? */
|
11213 |
|
|
if (INSN_P (insn)
|
11214 |
|
|
&& (set = single_set (insn))
|
11215 |
|
|
&& REG_P (SET_DEST (set))
|
11216 |
|
|
&& REGNO (SET_DEST (set)) == new_regno
|
11217 |
|
|
&& REG_P (SET_SRC (set))
|
11218 |
|
|
&& REGNO (SET_SRC (set)) == regno)
|
11219 |
|
|
break;
|
11220 |
|
|
}
|
11221 |
|
|
|
11222 |
|
|
if (insn != NULL_RTX)
|
11223 |
|
|
{
|
11224 |
|
|
rtx prev_insn;
|
11225 |
|
|
rtx prev_set;
|
11226 |
|
|
|
11227 |
|
|
/* Some DEF-USE info would come in handy here to make this
|
11228 |
|
|
function more general. For now, just check the previous insn
|
11229 |
|
|
which is the most likely candidate for setting REGNO. */
|
11230 |
|
|
|
11231 |
|
|
prev_insn = PREV_INSN (insn);
|
11232 |
|
|
|
11233 |
|
|
if (INSN_P (insn)
|
11234 |
|
|
&& (prev_set = single_set (prev_insn))
|
11235 |
|
|
&& REG_P (SET_DEST (prev_set))
|
11236 |
|
|
&& REGNO (SET_DEST (prev_set)) == regno)
|
11237 |
|
|
{
|
11238 |
|
|
/* We have:
|
11239 |
|
|
(set (reg regno) (expr))
|
11240 |
|
|
(set (reg new_regno) (reg regno))
|
11241 |
|
|
|
11242 |
|
|
so try converting this to:
|
11243 |
|
|
(set (reg new_regno) (expr))
|
11244 |
|
|
(set (reg regno) (reg new_regno))
|
11245 |
|
|
|
11246 |
|
|
The former construct is often generated when a global
|
11247 |
|
|
variable used for an induction variable is shadowed by a
|
11248 |
|
|
register (NEW_REGNO). The latter construct improves the
|
11249 |
|
|
chances of GIV replacement and BIV elimination. */
|
11250 |
|
|
|
11251 |
|
|
validate_change (prev_insn, &SET_DEST (prev_set),
|
11252 |
|
|
replacement, 1);
|
11253 |
|
|
validate_change (insn, &SET_DEST (set),
|
11254 |
|
|
SET_SRC (set), 1);
|
11255 |
|
|
validate_change (insn, &SET_SRC (set),
|
11256 |
|
|
replacement, 1);
|
11257 |
|
|
|
11258 |
|
|
if (apply_change_group ())
|
11259 |
|
|
{
|
11260 |
|
|
if (loop_dump_stream)
|
11261 |
|
|
fprintf (loop_dump_stream,
|
11262 |
|
|
" Swapped set of reg %d at %d with reg %d at %d.\n",
|
11263 |
|
|
regno, INSN_UID (insn),
|
11264 |
|
|
new_regno, INSN_UID (prev_insn));
|
11265 |
|
|
|
11266 |
|
|
/* Update first use of REGNO. */
|
11267 |
|
|
if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
|
11268 |
|
|
REGNO_FIRST_UID (regno) = INSN_UID (insn);
|
11269 |
|
|
|
11270 |
|
|
/* Now perform copy propagation to hopefully
|
11271 |
|
|
remove all uses of REGNO within the loop. */
|
11272 |
|
|
try_copy_prop (loop, replacement, regno);
|
11273 |
|
|
}
|
11274 |
|
|
}
|
11275 |
|
|
}
|
11276 |
|
|
}
|
11277 |
|
|
|
11278 |
|
|
/* Worker function for find_mem_in_note, called via for_each_rtx. */
|
11279 |
|
|
|
11280 |
|
|
static int
|
11281 |
|
|
find_mem_in_note_1 (rtx *x, void *data)
|
11282 |
|
|
{
|
11283 |
|
|
if (*x != NULL_RTX && MEM_P (*x))
|
11284 |
|
|
{
|
11285 |
|
|
rtx *res = (rtx *) data;
|
11286 |
|
|
*res = *x;
|
11287 |
|
|
return 1;
|
11288 |
|
|
}
|
11289 |
|
|
return 0;
|
11290 |
|
|
}
|
11291 |
|
|
|
11292 |
|
|
/* Returns the first MEM found in NOTE by depth-first search. */
|
11293 |
|
|
|
11294 |
|
|
static rtx
|
11295 |
|
|
find_mem_in_note (rtx note)
|
11296 |
|
|
{
|
11297 |
|
|
if (note && for_each_rtx (¬e, find_mem_in_note_1, ¬e))
|
11298 |
|
|
return note;
|
11299 |
|
|
return NULL_RTX;
|
11300 |
|
|
}
|
11301 |
|
|
|
11302 |
|
|
/* Replace MEM with its associated pseudo register. This function is
|
11303 |
|
|
called from load_mems via for_each_rtx. DATA is actually a pointer
|
11304 |
|
|
to a structure describing the instruction currently being scanned
|
11305 |
|
|
and the MEM we are currently replacing. */
|
11306 |
|
|
|
11307 |
|
|
static int
|
11308 |
|
|
replace_loop_mem (rtx *mem, void *data)
|
11309 |
|
|
{
|
11310 |
|
|
loop_replace_args *args = (loop_replace_args *) data;
|
11311 |
|
|
rtx m = *mem;
|
11312 |
|
|
|
11313 |
|
|
if (m == NULL_RTX)
|
11314 |
|
|
return 0;
|
11315 |
|
|
|
11316 |
|
|
switch (GET_CODE (m))
|
11317 |
|
|
{
|
11318 |
|
|
case MEM:
|
11319 |
|
|
break;
|
11320 |
|
|
|
11321 |
|
|
case CONST_DOUBLE:
|
11322 |
|
|
/* We're not interested in the MEM associated with a
|
11323 |
|
|
CONST_DOUBLE, so there's no need to traverse into one. */
|
11324 |
|
|
return -1;
|
11325 |
|
|
|
11326 |
|
|
default:
|
11327 |
|
|
/* This is not a MEM. */
|
11328 |
|
|
return 0;
|
11329 |
|
|
}
|
11330 |
|
|
|
11331 |
|
|
if (!rtx_equal_p (args->match, m))
|
11332 |
|
|
/* This is not the MEM we are currently replacing. */
|
11333 |
|
|
return 0;
|
11334 |
|
|
|
11335 |
|
|
/* Actually replace the MEM. */
|
11336 |
|
|
validate_change (args->insn, mem, args->replacement, 1);
|
11337 |
|
|
|
11338 |
|
|
return 0;
|
11339 |
|
|
}
|
11340 |
|
|
|
11341 |
|
|
static void
|
11342 |
|
|
replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
|
11343 |
|
|
{
|
11344 |
|
|
loop_replace_args args;
|
11345 |
|
|
|
11346 |
|
|
args.insn = insn;
|
11347 |
|
|
args.match = mem;
|
11348 |
|
|
args.replacement = reg;
|
11349 |
|
|
|
11350 |
|
|
for_each_rtx (&insn, replace_loop_mem, &args);
|
11351 |
|
|
|
11352 |
|
|
/* If we hoist a mem write out of the loop, then REG_EQUAL
|
11353 |
|
|
notes referring to the mem are no longer valid. */
|
11354 |
|
|
if (written)
|
11355 |
|
|
{
|
11356 |
|
|
rtx note, sub;
|
11357 |
|
|
rtx *link;
|
11358 |
|
|
|
11359 |
|
|
for (link = ®_NOTES (insn); (note = *link); link = &XEXP (note, 1))
|
11360 |
|
|
{
|
11361 |
|
|
if (REG_NOTE_KIND (note) == REG_EQUAL
|
11362 |
|
|
&& (sub = find_mem_in_note (note))
|
11363 |
|
|
&& true_dependence (mem, VOIDmode, sub, rtx_varies_p))
|
11364 |
|
|
{
|
11365 |
|
|
/* Remove the note. */
|
11366 |
|
|
validate_change (NULL_RTX, link, XEXP (note, 1), 1);
|
11367 |
|
|
break;
|
11368 |
|
|
}
|
11369 |
|
|
}
|
11370 |
|
|
}
|
11371 |
|
|
}
|
11372 |
|
|
|
11373 |
|
|
/* Replace one register with another. Called through for_each_rtx; PX points
|
11374 |
|
|
to the rtx being scanned. DATA is actually a pointer to
|
11375 |
|
|
a structure of arguments. */
|
11376 |
|
|
|
11377 |
|
|
static int
|
11378 |
|
|
replace_loop_reg (rtx *px, void *data)
|
11379 |
|
|
{
|
11380 |
|
|
rtx x = *px;
|
11381 |
|
|
loop_replace_args *args = (loop_replace_args *) data;
|
11382 |
|
|
|
11383 |
|
|
if (x == NULL_RTX)
|
11384 |
|
|
return 0;
|
11385 |
|
|
|
11386 |
|
|
if (x == args->match)
|
11387 |
|
|
validate_change (args->insn, px, args->replacement, 1);
|
11388 |
|
|
|
11389 |
|
|
return 0;
|
11390 |
|
|
}
|
11391 |
|
|
|
11392 |
|
|
static void
|
11393 |
|
|
replace_loop_regs (rtx insn, rtx reg, rtx replacement)
|
11394 |
|
|
{
|
11395 |
|
|
loop_replace_args args;
|
11396 |
|
|
|
11397 |
|
|
args.insn = insn;
|
11398 |
|
|
args.match = reg;
|
11399 |
|
|
args.replacement = replacement;
|
11400 |
|
|
|
11401 |
|
|
for_each_rtx (&insn, replace_loop_reg, &args);
|
11402 |
|
|
}
|
11403 |
|
|
|
11404 |
|
|
/* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
|
11405 |
|
|
(ignored in the interim). */
|
11406 |
|
|
|
11407 |
|
|
static rtx
|
11408 |
|
|
loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
|
11409 |
|
|
basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
|
11410 |
|
|
rtx pattern)
|
11411 |
|
|
{
|
11412 |
|
|
return emit_insn_after (pattern, where_insn);
|
11413 |
|
|
}
|
11414 |
|
|
|
11415 |
|
|
|
11416 |
|
|
/* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
|
11417 |
|
|
in basic block WHERE_BB (ignored in the interim) within the loop
|
11418 |
|
|
otherwise hoist PATTERN into the loop pre-header. */
|
11419 |
|
|
|
11420 |
|
|
static rtx
|
11421 |
|
|
loop_insn_emit_before (const struct loop *loop,
|
11422 |
|
|
basic_block where_bb ATTRIBUTE_UNUSED,
|
11423 |
|
|
rtx where_insn, rtx pattern)
|
11424 |
|
|
{
|
11425 |
|
|
if (! where_insn)
|
11426 |
|
|
return loop_insn_hoist (loop, pattern);
|
11427 |
|
|
return emit_insn_before (pattern, where_insn);
|
11428 |
|
|
}
|
11429 |
|
|
|
11430 |
|
|
|
11431 |
|
|
/* Emit call insn for PATTERN before WHERE_INSN in basic block
|
11432 |
|
|
WHERE_BB (ignored in the interim) within the loop. */
|
11433 |
|
|
|
11434 |
|
|
static rtx
|
11435 |
|
|
loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
|
11436 |
|
|
basic_block where_bb ATTRIBUTE_UNUSED,
|
11437 |
|
|
rtx where_insn, rtx pattern)
|
11438 |
|
|
{
|
11439 |
|
|
return emit_call_insn_before (pattern, where_insn);
|
11440 |
|
|
}
|
11441 |
|
|
|
11442 |
|
|
|
11443 |
|
|
/* Hoist insn for PATTERN into the loop pre-header. */
|
11444 |
|
|
|
11445 |
|
|
static rtx
|
11446 |
|
|
loop_insn_hoist (const struct loop *loop, rtx pattern)
|
11447 |
|
|
{
|
11448 |
|
|
return loop_insn_emit_before (loop, 0, loop->start, pattern);
|
11449 |
|
|
}
|
11450 |
|
|
|
11451 |
|
|
|
11452 |
|
|
/* Hoist call insn for PATTERN into the loop pre-header. */
|
11453 |
|
|
|
11454 |
|
|
static rtx
|
11455 |
|
|
loop_call_insn_hoist (const struct loop *loop, rtx pattern)
|
11456 |
|
|
{
|
11457 |
|
|
return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
|
11458 |
|
|
}
|
11459 |
|
|
|
11460 |
|
|
|
11461 |
|
|
/* Sink insn for PATTERN after the loop end. */
|
11462 |
|
|
|
11463 |
|
|
static rtx
|
11464 |
|
|
loop_insn_sink (const struct loop *loop, rtx pattern)
|
11465 |
|
|
{
|
11466 |
|
|
return loop_insn_emit_before (loop, 0, loop->sink, pattern);
|
11467 |
|
|
}
|
11468 |
|
|
|
11469 |
|
|
/* bl->final_value can be either general_operand or PLUS of general_operand
|
11470 |
|
|
and constant. Emit sequence of instructions to load it into REG. */
|
11471 |
|
|
static rtx
|
11472 |
|
|
gen_load_of_final_value (rtx reg, rtx final_value)
|
11473 |
|
|
{
|
11474 |
|
|
rtx seq;
|
11475 |
|
|
start_sequence ();
|
11476 |
|
|
final_value = force_operand (final_value, reg);
|
11477 |
|
|
if (final_value != reg)
|
11478 |
|
|
emit_move_insn (reg, final_value);
|
11479 |
|
|
seq = get_insns ();
|
11480 |
|
|
end_sequence ();
|
11481 |
|
|
return seq;
|
11482 |
|
|
}
|
11483 |
|
|
|
11484 |
|
|
/* If the loop has multiple exits, emit insn for PATTERN before the
|
11485 |
|
|
loop to ensure that it will always be executed no matter how the
|
11486 |
|
|
loop exits. Otherwise, emit the insn for PATTERN after the loop,
|
11487 |
|
|
since this is slightly more efficient. */
|
11488 |
|
|
|
11489 |
|
|
static rtx
|
11490 |
|
|
loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
|
11491 |
|
|
{
|
11492 |
|
|
if (loop->exit_count)
|
11493 |
|
|
return loop_insn_hoist (loop, pattern);
|
11494 |
|
|
else
|
11495 |
|
|
return loop_insn_sink (loop, pattern);
|
11496 |
|
|
}
|
11497 |
|
|
|
11498 |
|
|
static void
|
11499 |
|
|
loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
|
11500 |
|
|
{
|
11501 |
|
|
struct iv_class *bl;
|
11502 |
|
|
int iv_num = 0;
|
11503 |
|
|
|
11504 |
|
|
if (! loop || ! file)
|
11505 |
|
|
return;
|
11506 |
|
|
|
11507 |
|
|
for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
|
11508 |
|
|
iv_num++;
|
11509 |
|
|
|
11510 |
|
|
fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
|
11511 |
|
|
|
11512 |
|
|
for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
|
11513 |
|
|
{
|
11514 |
|
|
loop_iv_class_dump (bl, file, verbose);
|
11515 |
|
|
fputc ('\n', file);
|
11516 |
|
|
}
|
11517 |
|
|
}
|
11518 |
|
|
|
11519 |
|
|
|
11520 |
|
|
static void
|
11521 |
|
|
loop_iv_class_dump (const struct iv_class *bl, FILE *file,
|
11522 |
|
|
int verbose ATTRIBUTE_UNUSED)
|
11523 |
|
|
{
|
11524 |
|
|
struct induction *v;
|
11525 |
|
|
rtx incr;
|
11526 |
|
|
int i;
|
11527 |
|
|
|
11528 |
|
|
if (! bl || ! file)
|
11529 |
|
|
return;
|
11530 |
|
|
|
11531 |
|
|
fprintf (file, "IV class for reg %d, benefit %d\n",
|
11532 |
|
|
bl->regno, bl->total_benefit);
|
11533 |
|
|
|
11534 |
|
|
fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
|
11535 |
|
|
if (bl->initial_value)
|
11536 |
|
|
{
|
11537 |
|
|
fprintf (file, ", init val: ");
|
11538 |
|
|
print_simple_rtl (file, bl->initial_value);
|
11539 |
|
|
}
|
11540 |
|
|
if (bl->initial_test)
|
11541 |
|
|
{
|
11542 |
|
|
fprintf (file, ", init test: ");
|
11543 |
|
|
print_simple_rtl (file, bl->initial_test);
|
11544 |
|
|
}
|
11545 |
|
|
fputc ('\n', file);
|
11546 |
|
|
|
11547 |
|
|
if (bl->final_value)
|
11548 |
|
|
{
|
11549 |
|
|
fprintf (file, " Final val: ");
|
11550 |
|
|
print_simple_rtl (file, bl->final_value);
|
11551 |
|
|
fputc ('\n', file);
|
11552 |
|
|
}
|
11553 |
|
|
|
11554 |
|
|
if ((incr = biv_total_increment (bl)))
|
11555 |
|
|
{
|
11556 |
|
|
fprintf (file, " Total increment: ");
|
11557 |
|
|
print_simple_rtl (file, incr);
|
11558 |
|
|
fputc ('\n', file);
|
11559 |
|
|
}
|
11560 |
|
|
|
11561 |
|
|
/* List the increments. */
|
11562 |
|
|
for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
|
11563 |
|
|
{
|
11564 |
|
|
fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
|
11565 |
|
|
print_simple_rtl (file, v->add_val);
|
11566 |
|
|
fputc ('\n', file);
|
11567 |
|
|
}
|
11568 |
|
|
|
11569 |
|
|
/* List the givs. */
|
11570 |
|
|
for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
|
11571 |
|
|
{
|
11572 |
|
|
fprintf (file, " Giv%d: insn %d, benefit %d, ",
|
11573 |
|
|
i, INSN_UID (v->insn), v->benefit);
|
11574 |
|
|
if (v->giv_type == DEST_ADDR)
|
11575 |
|
|
print_simple_rtl (file, v->mem);
|
11576 |
|
|
else
|
11577 |
|
|
print_simple_rtl (file, single_set (v->insn));
|
11578 |
|
|
fputc ('\n', file);
|
11579 |
|
|
}
|
11580 |
|
|
}
|
11581 |
|
|
|
11582 |
|
|
|
11583 |
|
|
static void
|
11584 |
|
|
loop_biv_dump (const struct induction *v, FILE *file, int verbose)
|
11585 |
|
|
{
|
11586 |
|
|
if (! v || ! file)
|
11587 |
|
|
return;
|
11588 |
|
|
|
11589 |
|
|
fprintf (file,
|
11590 |
|
|
"Biv %d: insn %d",
|
11591 |
|
|
REGNO (v->dest_reg), INSN_UID (v->insn));
|
11592 |
|
|
fprintf (file, " const ");
|
11593 |
|
|
print_simple_rtl (file, v->add_val);
|
11594 |
|
|
|
11595 |
|
|
if (verbose && v->final_value)
|
11596 |
|
|
{
|
11597 |
|
|
fputc ('\n', file);
|
11598 |
|
|
fprintf (file, " final ");
|
11599 |
|
|
print_simple_rtl (file, v->final_value);
|
11600 |
|
|
}
|
11601 |
|
|
|
11602 |
|
|
fputc ('\n', file);
|
11603 |
|
|
}
|
11604 |
|
|
|
11605 |
|
|
|
11606 |
|
|
static void
|
11607 |
|
|
loop_giv_dump (const struct induction *v, FILE *file, int verbose)
|
11608 |
|
|
{
|
11609 |
|
|
if (! v || ! file)
|
11610 |
|
|
return;
|
11611 |
|
|
|
11612 |
|
|
if (v->giv_type == DEST_REG)
|
11613 |
|
|
fprintf (file, "Giv %d: insn %d",
|
11614 |
|
|
REGNO (v->dest_reg), INSN_UID (v->insn));
|
11615 |
|
|
else
|
11616 |
|
|
fprintf (file, "Dest address: insn %d",
|
11617 |
|
|
INSN_UID (v->insn));
|
11618 |
|
|
|
11619 |
|
|
fprintf (file, " src reg %d benefit %d",
|
11620 |
|
|
REGNO (v->src_reg), v->benefit);
|
11621 |
|
|
fprintf (file, " lifetime %d",
|
11622 |
|
|
v->lifetime);
|
11623 |
|
|
|
11624 |
|
|
if (v->replaceable)
|
11625 |
|
|
fprintf (file, " replaceable");
|
11626 |
|
|
|
11627 |
|
|
if (v->no_const_addval)
|
11628 |
|
|
fprintf (file, " ncav");
|
11629 |
|
|
|
11630 |
|
|
if (v->ext_dependent)
|
11631 |
|
|
{
|
11632 |
|
|
switch (GET_CODE (v->ext_dependent))
|
11633 |
|
|
{
|
11634 |
|
|
case SIGN_EXTEND:
|
11635 |
|
|
fprintf (file, " ext se");
|
11636 |
|
|
break;
|
11637 |
|
|
case ZERO_EXTEND:
|
11638 |
|
|
fprintf (file, " ext ze");
|
11639 |
|
|
break;
|
11640 |
|
|
case TRUNCATE:
|
11641 |
|
|
fprintf (file, " ext tr");
|
11642 |
|
|
break;
|
11643 |
|
|
default:
|
11644 |
|
|
gcc_unreachable ();
|
11645 |
|
|
}
|
11646 |
|
|
}
|
11647 |
|
|
|
11648 |
|
|
fputc ('\n', file);
|
11649 |
|
|
fprintf (file, " mult ");
|
11650 |
|
|
print_simple_rtl (file, v->mult_val);
|
11651 |
|
|
|
11652 |
|
|
fputc ('\n', file);
|
11653 |
|
|
fprintf (file, " add ");
|
11654 |
|
|
print_simple_rtl (file, v->add_val);
|
11655 |
|
|
|
11656 |
|
|
if (verbose && v->final_value)
|
11657 |
|
|
{
|
11658 |
|
|
fputc ('\n', file);
|
11659 |
|
|
fprintf (file, " final ");
|
11660 |
|
|
print_simple_rtl (file, v->final_value);
|
11661 |
|
|
}
|
11662 |
|
|
|
11663 |
|
|
fputc ('\n', file);
|
11664 |
|
|
}
|
11665 |
|
|
|
11666 |
|
|
|
11667 |
|
|
void
|
11668 |
|
|
debug_ivs (const struct loop *loop)
|
11669 |
|
|
{
|
11670 |
|
|
loop_ivs_dump (loop, stderr, 1);
|
11671 |
|
|
}
|
11672 |
|
|
|
11673 |
|
|
|
11674 |
|
|
void
|
11675 |
|
|
debug_iv_class (const struct iv_class *bl)
|
11676 |
|
|
{
|
11677 |
|
|
loop_iv_class_dump (bl, stderr, 1);
|
11678 |
|
|
}
|
11679 |
|
|
|
11680 |
|
|
|
11681 |
|
|
void
|
11682 |
|
|
debug_biv (const struct induction *v)
|
11683 |
|
|
{
|
11684 |
|
|
loop_biv_dump (v, stderr, 1);
|
11685 |
|
|
}
|
11686 |
|
|
|
11687 |
|
|
|
11688 |
|
|
void
|
11689 |
|
|
debug_giv (const struct induction *v)
|
11690 |
|
|
{
|
11691 |
|
|
loop_giv_dump (v, stderr, 1);
|
11692 |
|
|
}
|
11693 |
|
|
|
11694 |
|
|
|
11695 |
|
|
#define LOOP_BLOCK_NUM_1(INSN) \
|
11696 |
|
|
((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
|
11697 |
|
|
|
11698 |
|
|
/* The notes do not have an assigned block, so look at the next insn. */
|
11699 |
|
|
#define LOOP_BLOCK_NUM(INSN) \
|
11700 |
|
|
((INSN) ? (NOTE_P (INSN) \
|
11701 |
|
|
? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
|
11702 |
|
|
: LOOP_BLOCK_NUM_1 (INSN)) \
|
11703 |
|
|
: -1)
|
11704 |
|
|
|
11705 |
|
|
#define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
|
11706 |
|
|
|
11707 |
|
|
static void
|
11708 |
|
|
loop_dump_aux (const struct loop *loop, FILE *file,
|
11709 |
|
|
int verbose ATTRIBUTE_UNUSED)
|
11710 |
|
|
{
|
11711 |
|
|
rtx label;
|
11712 |
|
|
|
11713 |
|
|
if (! loop || ! file || !BB_HEAD (loop->first))
|
11714 |
|
|
return;
|
11715 |
|
|
|
11716 |
|
|
/* Print diagnostics to compare our concept of a loop with
|
11717 |
|
|
what the loop notes say. */
|
11718 |
|
|
if (! PREV_INSN (BB_HEAD (loop->first))
|
11719 |
|
|
|| !NOTE_P (PREV_INSN (BB_HEAD (loop->first)))
|
11720 |
|
|
|| NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
|
11721 |
|
|
!= NOTE_INSN_LOOP_BEG)
|
11722 |
|
|
fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
|
11723 |
|
|
INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
|
11724 |
|
|
if (! NEXT_INSN (BB_END (loop->last))
|
11725 |
|
|
|| !NOTE_P (NEXT_INSN (BB_END (loop->last)))
|
11726 |
|
|
|| NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
|
11727 |
|
|
!= NOTE_INSN_LOOP_END)
|
11728 |
|
|
fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
|
11729 |
|
|
INSN_UID (NEXT_INSN (BB_END (loop->last))));
|
11730 |
|
|
|
11731 |
|
|
if (loop->start)
|
11732 |
|
|
{
|
11733 |
|
|
fprintf (file,
|
11734 |
|
|
";; start %d (%d), end %d (%d)\n",
|
11735 |
|
|
LOOP_BLOCK_NUM (loop->start),
|
11736 |
|
|
LOOP_INSN_UID (loop->start),
|
11737 |
|
|
LOOP_BLOCK_NUM (loop->end),
|
11738 |
|
|
LOOP_INSN_UID (loop->end));
|
11739 |
|
|
fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
|
11740 |
|
|
LOOP_BLOCK_NUM (loop->top),
|
11741 |
|
|
LOOP_INSN_UID (loop->top),
|
11742 |
|
|
LOOP_BLOCK_NUM (loop->scan_start),
|
11743 |
|
|
LOOP_INSN_UID (loop->scan_start));
|
11744 |
|
|
fprintf (file, ";; exit_count %d", loop->exit_count);
|
11745 |
|
|
if (loop->exit_count)
|
11746 |
|
|
{
|
11747 |
|
|
fputs (", labels:", file);
|
11748 |
|
|
for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
|
11749 |
|
|
{
|
11750 |
|
|
fprintf (file, " %d ",
|
11751 |
|
|
LOOP_INSN_UID (XEXP (label, 0)));
|
11752 |
|
|
}
|
11753 |
|
|
}
|
11754 |
|
|
fputs ("\n", file);
|
11755 |
|
|
}
|
11756 |
|
|
}
|
11757 |
|
|
|
11758 |
|
|
/* Call this function from the debugger to dump LOOP. */
|
11759 |
|
|
|
11760 |
|
|
void
|
11761 |
|
|
debug_loop (const struct loop *loop)
|
11762 |
|
|
{
|
11763 |
|
|
flow_loop_dump (loop, stderr, loop_dump_aux, 1);
|
11764 |
|
|
}
|
11765 |
|
|
|
11766 |
|
|
/* Call this function from the debugger to dump LOOPS. */
|
11767 |
|
|
|
11768 |
|
|
void
|
11769 |
|
|
debug_loops (const struct loops *loops)
|
11770 |
|
|
{
|
11771 |
|
|
flow_loops_dump (loops, stderr, loop_dump_aux, 1);
|
11772 |
|
|
}
|
11773 |
|
|
|
11774 |
|
|
static bool
|
11775 |
|
|
gate_handle_loop_optimize (void)
|
11776 |
|
|
{
|
11777 |
|
|
return (optimize > 0 && flag_loop_optimize);
|
11778 |
|
|
}
|
11779 |
|
|
|
11780 |
|
|
/* Move constant computations out of loops. */
|
11781 |
|
|
static void
|
11782 |
|
|
rest_of_handle_loop_optimize (void)
|
11783 |
|
|
{
|
11784 |
|
|
int do_prefetch;
|
11785 |
|
|
|
11786 |
|
|
/* CFG is no longer maintained up-to-date. */
|
11787 |
|
|
free_bb_for_insn ();
|
11788 |
|
|
profile_status = PROFILE_ABSENT;
|
11789 |
|
|
|
11790 |
|
|
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
|
11791 |
|
|
|
11792 |
|
|
if (flag_rerun_loop_opt)
|
11793 |
|
|
{
|
11794 |
|
|
cleanup_barriers ();
|
11795 |
|
|
|
11796 |
|
|
/* We only want to perform unrolling once. */
|
11797 |
|
|
loop_optimize (get_insns (), dump_file, 0);
|
11798 |
|
|
|
11799 |
|
|
/* The first call to loop_optimize makes some instructions
|
11800 |
|
|
trivially dead. We delete those instructions now in the
|
11801 |
|
|
hope that doing so will make the heuristics in loop work
|
11802 |
|
|
better and possibly speed up compilation. */
|
11803 |
|
|
delete_trivially_dead_insns (get_insns (), max_reg_num ());
|
11804 |
|
|
|
11805 |
|
|
/* The regscan pass is currently necessary as the alias
|
11806 |
|
|
analysis code depends on this information. */
|
11807 |
|
|
reg_scan (get_insns (), max_reg_num ());
|
11808 |
|
|
}
|
11809 |
|
|
cleanup_barriers ();
|
11810 |
|
|
loop_optimize (get_insns (), dump_file, do_prefetch);
|
11811 |
|
|
|
11812 |
|
|
/* Loop can create trivially dead instructions. */
|
11813 |
|
|
delete_trivially_dead_insns (get_insns (), max_reg_num ());
|
11814 |
|
|
find_basic_blocks (get_insns ());
|
11815 |
|
|
}
|
11816 |
|
|
|
11817 |
|
|
struct tree_opt_pass pass_loop_optimize =
|
11818 |
|
|
{
|
11819 |
|
|
"old-loop", /* name */
|
11820 |
|
|
gate_handle_loop_optimize, /* gate */
|
11821 |
|
|
rest_of_handle_loop_optimize, /* execute */
|
11822 |
|
|
NULL, /* sub */
|
11823 |
|
|
NULL, /* next */
|
11824 |
|
|
0, /* static_pass_number */
|
11825 |
|
|
TV_LOOP, /* tv_id */
|
11826 |
|
|
0, /* properties_required */
|
11827 |
|
|
0, /* properties_provided */
|
11828 |
|
|
0, /* properties_destroyed */
|
11829 |
|
|
0, /* todo_flags_start */
|
11830 |
|
|
TODO_dump_func |
|
11831 |
|
|
TODO_ggc_collect, /* todo_flags_finish */
|
11832 |
|
|
'L' /* letter */
|
11833 |
|
|
};
|
11834 |
|
|
|
11835 |
|
|
|