| 1 |
280 |
jeremybenn |
/* RTL-based forward propagation pass for GNU compiler.
|
| 2 |
|
|
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
|
| 3 |
|
|
Free Software Foundation, Inc.
|
| 4 |
|
|
Contributed by Paolo Bonzini and Steven Bosscher.
|
| 5 |
|
|
|
| 6 |
|
|
This file is part of GCC.
|
| 7 |
|
|
|
| 8 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
| 9 |
|
|
the terms of the GNU General Public License as published by the Free
|
| 10 |
|
|
Software Foundation; either version 3, or (at your option) any later
|
| 11 |
|
|
version.
|
| 12 |
|
|
|
| 13 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
| 14 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
| 15 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
| 16 |
|
|
for more details.
|
| 17 |
|
|
|
| 18 |
|
|
You should have received a copy of the GNU General Public License
|
| 19 |
|
|
along with GCC; see the file COPYING3. If not see
|
| 20 |
|
|
<http://www.gnu.org/licenses/>. */
|
| 21 |
|
|
|
| 22 |
|
|
#include "config.h"
|
| 23 |
|
|
#include "system.h"
|
| 24 |
|
|
#include "coretypes.h"
|
| 25 |
|
|
#include "tm.h"
|
| 26 |
|
|
#include "toplev.h"
|
| 27 |
|
|
|
| 28 |
|
|
#include "timevar.h"
|
| 29 |
|
|
#include "rtl.h"
|
| 30 |
|
|
#include "tm_p.h"
|
| 31 |
|
|
#include "emit-rtl.h"
|
| 32 |
|
|
#include "insn-config.h"
|
| 33 |
|
|
#include "recog.h"
|
| 34 |
|
|
#include "flags.h"
|
| 35 |
|
|
#include "obstack.h"
|
| 36 |
|
|
#include "basic-block.h"
|
| 37 |
|
|
#include "output.h"
|
| 38 |
|
|
#include "df.h"
|
| 39 |
|
|
#include "target.h"
|
| 40 |
|
|
#include "cfgloop.h"
|
| 41 |
|
|
#include "tree-pass.h"
|
| 42 |
|
|
#include "domwalk.h"
|
| 43 |
|
|
|
| 44 |
|
|
|
| 45 |
|
|
/* This pass does simple forward propagation and simplification when an
|
| 46 |
|
|
operand of an insn can only come from a single def. This pass uses
|
| 47 |
|
|
df.c, so it is global. However, we only do limited analysis of
|
| 48 |
|
|
available expressions.
|
| 49 |
|
|
|
| 50 |
|
|
1) The pass tries to propagate the source of the def into the use,
|
| 51 |
|
|
and checks if the result is independent of the substituted value.
|
| 52 |
|
|
For example, the high word of a (zero_extend:DI (reg:SI M)) is always
|
| 53 |
|
|
zero, independent of the source register.
|
| 54 |
|
|
|
| 55 |
|
|
In particular, we propagate constants into the use site. Sometimes
|
| 56 |
|
|
RTL expansion did not put the constant in the same insn on purpose,
|
| 57 |
|
|
to satisfy a predicate, and the result will fail to be recognized;
|
| 58 |
|
|
but this happens rarely and in this case we can still create a
|
| 59 |
|
|
REG_EQUAL note. For multi-word operations, this
|
| 60 |
|
|
|
| 61 |
|
|
(set (subreg:SI (reg:DI 120) 0) (const_int 0))
|
| 62 |
|
|
(set (subreg:SI (reg:DI 120) 4) (const_int -1))
|
| 63 |
|
|
(set (subreg:SI (reg:DI 122) 0)
|
| 64 |
|
|
(ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
|
| 65 |
|
|
(set (subreg:SI (reg:DI 122) 4)
|
| 66 |
|
|
(ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
|
| 67 |
|
|
|
| 68 |
|
|
can be simplified to the much simpler
|
| 69 |
|
|
|
| 70 |
|
|
(set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
|
| 71 |
|
|
(set (subreg:SI (reg:DI 122) 4) (const_int -1))
|
| 72 |
|
|
|
| 73 |
|
|
This particular propagation is also effective at putting together
|
| 74 |
|
|
complex addressing modes. We are more aggressive inside MEMs, in
|
| 75 |
|
|
that all definitions are propagated if the use is in a MEM; if the
|
| 76 |
|
|
result is a valid memory address we check address_cost to decide
|
| 77 |
|
|
whether the substitution is worthwhile.
|
| 78 |
|
|
|
| 79 |
|
|
2) The pass propagates register copies. This is not as effective as
|
| 80 |
|
|
the copy propagation done by CSE's canon_reg, which works by walking
|
| 81 |
|
|
the instruction chain, it can help the other transformations.
|
| 82 |
|
|
|
| 83 |
|
|
We should consider removing this optimization, and instead reorder the
|
| 84 |
|
|
RTL passes, because GCSE does this transformation too. With some luck,
|
| 85 |
|
|
the CSE pass at the end of rest_of_handle_gcse could also go away.
|
| 86 |
|
|
|
| 87 |
|
|
3) The pass looks for paradoxical subregs that are actually unnecessary.
|
| 88 |
|
|
Things like this:
|
| 89 |
|
|
|
| 90 |
|
|
(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
|
| 91 |
|
|
(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
|
| 92 |
|
|
(set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
|
| 93 |
|
|
(subreg:SI (reg:QI 121) 0)))
|
| 94 |
|
|
|
| 95 |
|
|
are very common on machines that can only do word-sized operations.
|
| 96 |
|
|
For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
|
| 97 |
|
|
if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
|
| 98 |
|
|
we can replace the paradoxical subreg with simply (reg:WIDE M). The
|
| 99 |
|
|
above will simplify this to
|
| 100 |
|
|
|
| 101 |
|
|
(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
|
| 102 |
|
|
(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
|
| 103 |
|
|
(set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
|
| 104 |
|
|
|
| 105 |
|
|
where the first two insns are now dead.
|
| 106 |
|
|
|
| 107 |
|
|
We used to use reaching definitions to find which uses have a
|
| 108 |
|
|
single reaching definition (sounds obvious...), but this is too
|
| 109 |
|
|
complex a problem in nasty testcases like PR33928. Now we use the
|
| 110 |
|
|
multiple definitions problem in df-problems.c. The similarity
|
| 111 |
|
|
between that problem and SSA form creation is taken further, in
|
| 112 |
|
|
that fwprop does a dominator walk to create its chains; however,
|
| 113 |
|
|
instead of creating a PHI function where multiple definitions meet
|
| 114 |
|
|
I just punt and record only singleton use-def chains, which is
|
| 115 |
|
|
all that is needed by fwprop. */
|
| 116 |
|
|
|
| 117 |
|
|
|
| 118 |
|
|
static int num_changes;
|
| 119 |
|
|
|
| 120 |
|
|
DEF_VEC_P(df_ref);
|
| 121 |
|
|
DEF_VEC_ALLOC_P(df_ref,heap);
|
| 122 |
|
|
static VEC(df_ref,heap) *use_def_ref;
|
| 123 |
|
|
static VEC(df_ref,heap) *reg_defs;
|
| 124 |
|
|
static VEC(df_ref,heap) *reg_defs_stack;
|
| 125 |
|
|
|
| 126 |
|
|
/* The MD bitmaps are trimmed to include only live registers to cut
|
| 127 |
|
|
memory usage on testcases like insn-recog.c. Track live registers
|
| 128 |
|
|
in the basic block and do not perform forward propagation if the
|
| 129 |
|
|
destination is a dead pseudo occurring in a note. */
|
| 130 |
|
|
static bitmap local_md;
|
| 131 |
|
|
static bitmap local_lr;
|
| 132 |
|
|
|
| 133 |
|
|
/* Return the only def in USE's use-def chain, or NULL if there is
|
| 134 |
|
|
more than one def in the chain. */
|
| 135 |
|
|
|
| 136 |
|
|
static inline df_ref
|
| 137 |
|
|
get_def_for_use (df_ref use)
|
| 138 |
|
|
{
|
| 139 |
|
|
return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
|
| 140 |
|
|
}
|
| 141 |
|
|
|
| 142 |
|
|
|
| 143 |
|
|
/* Update the reg_defs vector with non-partial definitions in DEF_REC.
|
| 144 |
|
|
TOP_FLAG says which artificials uses should be used, when DEF_REC
|
| 145 |
|
|
is an artificial def vector. LOCAL_MD is modified as after a
|
| 146 |
|
|
df_md_simulate_* function; we do more or less the same processing
|
| 147 |
|
|
done there, so we do not use those functions. */
|
| 148 |
|
|
|
| 149 |
|
|
#define DF_MD_GEN_FLAGS \
|
| 150 |
|
|
(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
|
| 151 |
|
|
|
| 152 |
|
|
static void
|
| 153 |
|
|
process_defs (df_ref *def_rec, int top_flag)
|
| 154 |
|
|
{
|
| 155 |
|
|
df_ref def;
|
| 156 |
|
|
while ((def = *def_rec++) != NULL)
|
| 157 |
|
|
{
|
| 158 |
|
|
df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
|
| 159 |
|
|
unsigned int dregno;
|
| 160 |
|
|
|
| 161 |
|
|
if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
|
| 162 |
|
|
continue;
|
| 163 |
|
|
|
| 164 |
|
|
dregno = DF_REF_REGNO (def);
|
| 165 |
|
|
if (curr_def)
|
| 166 |
|
|
VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
|
| 167 |
|
|
else
|
| 168 |
|
|
{
|
| 169 |
|
|
/* Do not store anything if "transitioning" from NULL to NULL. But
|
| 170 |
|
|
otherwise, push a special entry on the stack to tell the
|
| 171 |
|
|
leave_block callback that the entry in reg_defs was NULL. */
|
| 172 |
|
|
if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
|
| 173 |
|
|
;
|
| 174 |
|
|
else
|
| 175 |
|
|
VEC_safe_push (df_ref, heap, reg_defs_stack, def);
|
| 176 |
|
|
}
|
| 177 |
|
|
|
| 178 |
|
|
if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
|
| 179 |
|
|
{
|
| 180 |
|
|
bitmap_set_bit (local_md, dregno);
|
| 181 |
|
|
VEC_replace (df_ref, reg_defs, dregno, NULL);
|
| 182 |
|
|
}
|
| 183 |
|
|
else
|
| 184 |
|
|
{
|
| 185 |
|
|
bitmap_clear_bit (local_md, dregno);
|
| 186 |
|
|
VEC_replace (df_ref, reg_defs, dregno, def);
|
| 187 |
|
|
}
|
| 188 |
|
|
}
|
| 189 |
|
|
}
|
| 190 |
|
|
|
| 191 |
|
|
|
| 192 |
|
|
/* Fill the use_def_ref vector with values for the uses in USE_REC,
|
| 193 |
|
|
taking reaching definitions info from LOCAL_MD and REG_DEFS.
|
| 194 |
|
|
TOP_FLAG says which artificials uses should be used, when USE_REC
|
| 195 |
|
|
is an artificial use vector. */
|
| 196 |
|
|
|
| 197 |
|
|
static void
|
| 198 |
|
|
process_uses (df_ref *use_rec, int top_flag)
|
| 199 |
|
|
{
|
| 200 |
|
|
df_ref use;
|
| 201 |
|
|
while ((use = *use_rec++) != NULL)
|
| 202 |
|
|
if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
|
| 203 |
|
|
{
|
| 204 |
|
|
unsigned int uregno = DF_REF_REGNO (use);
|
| 205 |
|
|
if (VEC_index (df_ref, reg_defs, uregno)
|
| 206 |
|
|
&& !bitmap_bit_p (local_md, uregno)
|
| 207 |
|
|
&& bitmap_bit_p (local_lr, uregno))
|
| 208 |
|
|
VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
|
| 209 |
|
|
VEC_index (df_ref, reg_defs, uregno));
|
| 210 |
|
|
}
|
| 211 |
|
|
}
|
| 212 |
|
|
|
| 213 |
|
|
|
| 214 |
|
|
static void
|
| 215 |
|
|
single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
|
| 216 |
|
|
basic_block bb)
|
| 217 |
|
|
{
|
| 218 |
|
|
int bb_index = bb->index;
|
| 219 |
|
|
struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
|
| 220 |
|
|
struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
|
| 221 |
|
|
rtx insn;
|
| 222 |
|
|
|
| 223 |
|
|
bitmap_copy (local_md, md_bb_info->in);
|
| 224 |
|
|
bitmap_copy (local_lr, lr_bb_info->in);
|
| 225 |
|
|
|
| 226 |
|
|
/* Push a marker for the leave_block callback. */
|
| 227 |
|
|
VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
|
| 228 |
|
|
|
| 229 |
|
|
process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
|
| 230 |
|
|
process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
|
| 231 |
|
|
df_simulate_initialize_forwards (bb, local_lr);
|
| 232 |
|
|
|
| 233 |
|
|
FOR_BB_INSNS (bb, insn)
|
| 234 |
|
|
if (INSN_P (insn))
|
| 235 |
|
|
{
|
| 236 |
|
|
unsigned int uid = INSN_UID (insn);
|
| 237 |
|
|
process_uses (DF_INSN_UID_USES (uid), 0);
|
| 238 |
|
|
process_uses (DF_INSN_UID_EQ_USES (uid), 0);
|
| 239 |
|
|
process_defs (DF_INSN_UID_DEFS (uid), 0);
|
| 240 |
|
|
df_simulate_one_insn_forwards (bb, insn, local_lr);
|
| 241 |
|
|
}
|
| 242 |
|
|
|
| 243 |
|
|
process_uses (df_get_artificial_uses (bb_index), 0);
|
| 244 |
|
|
process_defs (df_get_artificial_defs (bb_index), 0);
|
| 245 |
|
|
}
|
| 246 |
|
|
|
| 247 |
|
|
/* Pop the definitions created in this basic block when leaving its
|
| 248 |
|
|
dominated parts. */
|
| 249 |
|
|
|
| 250 |
|
|
static void
|
| 251 |
|
|
single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
|
| 252 |
|
|
basic_block bb ATTRIBUTE_UNUSED)
|
| 253 |
|
|
{
|
| 254 |
|
|
df_ref saved_def;
|
| 255 |
|
|
while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
|
| 256 |
|
|
{
|
| 257 |
|
|
unsigned int dregno = DF_REF_REGNO (saved_def);
|
| 258 |
|
|
|
| 259 |
|
|
/* See also process_defs. */
|
| 260 |
|
|
if (saved_def == VEC_index (df_ref, reg_defs, dregno))
|
| 261 |
|
|
VEC_replace (df_ref, reg_defs, dregno, NULL);
|
| 262 |
|
|
else
|
| 263 |
|
|
VEC_replace (df_ref, reg_defs, dregno, saved_def);
|
| 264 |
|
|
}
|
| 265 |
|
|
}
|
| 266 |
|
|
|
| 267 |
|
|
|
| 268 |
|
|
/* Build a vector holding the reaching definitions of uses reached by a
|
| 269 |
|
|
single dominating definition. */
|
| 270 |
|
|
|
| 271 |
|
|
static void
|
| 272 |
|
|
build_single_def_use_links (void)
|
| 273 |
|
|
{
|
| 274 |
|
|
struct dom_walk_data walk_data;
|
| 275 |
|
|
|
| 276 |
|
|
/* We use the multiple definitions problem to compute our restricted
|
| 277 |
|
|
use-def chains. */
|
| 278 |
|
|
df_set_flags (DF_EQ_NOTES);
|
| 279 |
|
|
df_md_add_problem ();
|
| 280 |
|
|
df_note_add_problem ();
|
| 281 |
|
|
df_analyze ();
|
| 282 |
|
|
df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
|
| 283 |
|
|
|
| 284 |
|
|
use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
|
| 285 |
|
|
VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
|
| 286 |
|
|
|
| 287 |
|
|
reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
|
| 288 |
|
|
VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
|
| 289 |
|
|
|
| 290 |
|
|
reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
|
| 291 |
|
|
local_md = BITMAP_ALLOC (NULL);
|
| 292 |
|
|
local_lr = BITMAP_ALLOC (NULL);
|
| 293 |
|
|
|
| 294 |
|
|
/* Walk the dominator tree looking for single reaching definitions
|
| 295 |
|
|
dominating the uses. This is similar to how SSA form is built. */
|
| 296 |
|
|
walk_data.dom_direction = CDI_DOMINATORS;
|
| 297 |
|
|
walk_data.initialize_block_local_data = NULL;
|
| 298 |
|
|
walk_data.before_dom_children = single_def_use_enter_block;
|
| 299 |
|
|
walk_data.after_dom_children = single_def_use_leave_block;
|
| 300 |
|
|
|
| 301 |
|
|
init_walk_dominator_tree (&walk_data);
|
| 302 |
|
|
walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
|
| 303 |
|
|
fini_walk_dominator_tree (&walk_data);
|
| 304 |
|
|
|
| 305 |
|
|
BITMAP_FREE (local_lr);
|
| 306 |
|
|
BITMAP_FREE (local_md);
|
| 307 |
|
|
VEC_free (df_ref, heap, reg_defs);
|
| 308 |
|
|
VEC_free (df_ref, heap, reg_defs_stack);
|
| 309 |
|
|
}
|
| 310 |
|
|
|
| 311 |
|
|
|
| 312 |
|
|
/* Do not try to replace constant addresses or addresses of local and
|
| 313 |
|
|
argument slots. These MEM expressions are made only once and inserted
|
| 314 |
|
|
in many instructions, as well as being used to control symbol table
|
| 315 |
|
|
output. It is not safe to clobber them.
|
| 316 |
|
|
|
| 317 |
|
|
There are some uncommon cases where the address is already in a register
|
| 318 |
|
|
for some reason, but we cannot take advantage of that because we have
|
| 319 |
|
|
no easy way to unshare the MEM. In addition, looking up all stack
|
| 320 |
|
|
addresses is costly. */
|
| 321 |
|
|
|
| 322 |
|
|
static bool
|
| 323 |
|
|
can_simplify_addr (rtx addr)
|
| 324 |
|
|
{
|
| 325 |
|
|
rtx reg;
|
| 326 |
|
|
|
| 327 |
|
|
if (CONSTANT_ADDRESS_P (addr))
|
| 328 |
|
|
return false;
|
| 329 |
|
|
|
| 330 |
|
|
if (GET_CODE (addr) == PLUS)
|
| 331 |
|
|
reg = XEXP (addr, 0);
|
| 332 |
|
|
else
|
| 333 |
|
|
reg = addr;
|
| 334 |
|
|
|
| 335 |
|
|
return (!REG_P (reg)
|
| 336 |
|
|
|| (REGNO (reg) != FRAME_POINTER_REGNUM
|
| 337 |
|
|
&& REGNO (reg) != HARD_FRAME_POINTER_REGNUM
|
| 338 |
|
|
&& REGNO (reg) != ARG_POINTER_REGNUM));
|
| 339 |
|
|
}
|
| 340 |
|
|
|
| 341 |
|
|
/* Returns a canonical version of X for the address, from the point of view,
|
| 342 |
|
|
that all multiplications are represented as MULT instead of the multiply
|
| 343 |
|
|
by a power of 2 being represented as ASHIFT.
|
| 344 |
|
|
|
| 345 |
|
|
Every ASHIFT we find has been made by simplify_gen_binary and was not
|
| 346 |
|
|
there before, so it is not shared. So we can do this in place. */
|
| 347 |
|
|
|
| 348 |
|
|
static void
|
| 349 |
|
|
canonicalize_address (rtx x)
|
| 350 |
|
|
{
|
| 351 |
|
|
for (;;)
|
| 352 |
|
|
switch (GET_CODE (x))
|
| 353 |
|
|
{
|
| 354 |
|
|
case ASHIFT:
|
| 355 |
|
|
if (CONST_INT_P (XEXP (x, 1))
|
| 356 |
|
|
&& INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
|
| 357 |
|
|
&& INTVAL (XEXP (x, 1)) >= 0)
|
| 358 |
|
|
{
|
| 359 |
|
|
HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
|
| 360 |
|
|
PUT_CODE (x, MULT);
|
| 361 |
|
|
XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
|
| 362 |
|
|
GET_MODE (x));
|
| 363 |
|
|
}
|
| 364 |
|
|
|
| 365 |
|
|
x = XEXP (x, 0);
|
| 366 |
|
|
break;
|
| 367 |
|
|
|
| 368 |
|
|
case PLUS:
|
| 369 |
|
|
if (GET_CODE (XEXP (x, 0)) == PLUS
|
| 370 |
|
|
|| GET_CODE (XEXP (x, 0)) == ASHIFT
|
| 371 |
|
|
|| GET_CODE (XEXP (x, 0)) == CONST)
|
| 372 |
|
|
canonicalize_address (XEXP (x, 0));
|
| 373 |
|
|
|
| 374 |
|
|
x = XEXP (x, 1);
|
| 375 |
|
|
break;
|
| 376 |
|
|
|
| 377 |
|
|
case CONST:
|
| 378 |
|
|
x = XEXP (x, 0);
|
| 379 |
|
|
break;
|
| 380 |
|
|
|
| 381 |
|
|
default:
|
| 382 |
|
|
return;
|
| 383 |
|
|
}
|
| 384 |
|
|
}
|
| 385 |
|
|
|
| 386 |
|
|
/* OLD is a memory address. Return whether it is good to use NEW instead,
|
| 387 |
|
|
for a memory access in the given MODE. */
|
| 388 |
|
|
|
| 389 |
|
|
static bool
|
| 390 |
|
|
should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
|
| 391 |
|
|
addr_space_t as, bool speed)
|
| 392 |
|
|
{
|
| 393 |
|
|
int gain;
|
| 394 |
|
|
|
| 395 |
|
|
if (rtx_equal_p (old_rtx, new_rtx)
|
| 396 |
|
|
|| !memory_address_addr_space_p (mode, new_rtx, as))
|
| 397 |
|
|
return false;
|
| 398 |
|
|
|
| 399 |
|
|
/* Copy propagation is always ok. */
|
| 400 |
|
|
if (REG_P (old_rtx) && REG_P (new_rtx))
|
| 401 |
|
|
return true;
|
| 402 |
|
|
|
| 403 |
|
|
/* Prefer the new address if it is less expensive. */
|
| 404 |
|
|
gain = (address_cost (old_rtx, mode, as, speed)
|
| 405 |
|
|
- address_cost (new_rtx, mode, as, speed));
|
| 406 |
|
|
|
| 407 |
|
|
/* If the addresses have equivalent cost, prefer the new address
|
| 408 |
|
|
if it has the highest `rtx_cost'. That has the potential of
|
| 409 |
|
|
eliminating the most insns without additional costs, and it
|
| 410 |
|
|
is the same that cse.c used to do. */
|
| 411 |
|
|
if (gain == 0)
|
| 412 |
|
|
gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
|
| 413 |
|
|
|
| 414 |
|
|
return (gain > 0);
|
| 415 |
|
|
}
|
| 416 |
|
|
|
| 417 |
|
|
|
| 418 |
|
|
/* Flags for the last parameter of propagate_rtx_1. */
|
| 419 |
|
|
|
| 420 |
|
|
enum {
|
| 421 |
|
|
/* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
|
| 422 |
|
|
if it is false, propagate_rtx_1 returns false if, for at least
|
| 423 |
|
|
one occurrence OLD, it failed to collapse the result to a constant.
|
| 424 |
|
|
For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
|
| 425 |
|
|
collapse to zero if replacing (reg:M B) with (reg:M A).
|
| 426 |
|
|
|
| 427 |
|
|
PR_CAN_APPEAR is disregarded inside MEMs: in that case,
|
| 428 |
|
|
propagate_rtx_1 just tries to make cheaper and valid memory
|
| 429 |
|
|
addresses. */
|
| 430 |
|
|
PR_CAN_APPEAR = 1,
|
| 431 |
|
|
|
| 432 |
|
|
/* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
|
| 433 |
|
|
outside memory addresses. This is needed because propagate_rtx_1 does
|
| 434 |
|
|
not do any analysis on memory; thus it is very conservative and in general
|
| 435 |
|
|
it will fail if non-read-only MEMs are found in the source expression.
|
| 436 |
|
|
|
| 437 |
|
|
PR_HANDLE_MEM is set when the source of the propagation was not
|
| 438 |
|
|
another MEM. Then, it is safe not to treat non-read-only MEMs as
|
| 439 |
|
|
``opaque'' objects. */
|
| 440 |
|
|
PR_HANDLE_MEM = 2,
|
| 441 |
|
|
|
| 442 |
|
|
/* Set when costs should be optimized for speed. */
|
| 443 |
|
|
PR_OPTIMIZE_FOR_SPEED = 4
|
| 444 |
|
|
};
|
| 445 |
|
|
|
| 446 |
|
|
|
| 447 |
|
|
/* Replace all occurrences of OLD in *PX with NEW and try to simplify the
|
| 448 |
|
|
resulting expression. Replace *PX with a new RTL expression if an
|
| 449 |
|
|
occurrence of OLD was found.
|
| 450 |
|
|
|
| 451 |
|
|
This is only a wrapper around simplify-rtx.c: do not add any pattern
|
| 452 |
|
|
matching code here. (The sole exception is the handling of LO_SUM, but
|
| 453 |
|
|
that is because there is no simplify_gen_* function for LO_SUM). */
|
| 454 |
|
|
|
| 455 |
|
|
static bool
|
| 456 |
|
|
propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
|
| 457 |
|
|
{
|
| 458 |
|
|
rtx x = *px, tem = NULL_RTX, op0, op1, op2;
|
| 459 |
|
|
enum rtx_code code = GET_CODE (x);
|
| 460 |
|
|
enum machine_mode mode = GET_MODE (x);
|
| 461 |
|
|
enum machine_mode op_mode;
|
| 462 |
|
|
bool can_appear = (flags & PR_CAN_APPEAR) != 0;
|
| 463 |
|
|
bool valid_ops = true;
|
| 464 |
|
|
|
| 465 |
|
|
if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
|
| 466 |
|
|
{
|
| 467 |
|
|
/* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
|
| 468 |
|
|
they have side effects or not). */
|
| 469 |
|
|
*px = (side_effects_p (x)
|
| 470 |
|
|
? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
|
| 471 |
|
|
: gen_rtx_SCRATCH (GET_MODE (x)));
|
| 472 |
|
|
return false;
|
| 473 |
|
|
}
|
| 474 |
|
|
|
| 475 |
|
|
/* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an
|
| 476 |
|
|
address, and we are *not* inside one. */
|
| 477 |
|
|
if (x == old_rtx)
|
| 478 |
|
|
{
|
| 479 |
|
|
*px = new_rtx;
|
| 480 |
|
|
return can_appear;
|
| 481 |
|
|
}
|
| 482 |
|
|
|
| 483 |
|
|
/* If this is an expression, try recursive substitution. */
|
| 484 |
|
|
switch (GET_RTX_CLASS (code))
|
| 485 |
|
|
{
|
| 486 |
|
|
case RTX_UNARY:
|
| 487 |
|
|
op0 = XEXP (x, 0);
|
| 488 |
|
|
op_mode = GET_MODE (op0);
|
| 489 |
|
|
valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
|
| 490 |
|
|
if (op0 == XEXP (x, 0))
|
| 491 |
|
|
return true;
|
| 492 |
|
|
tem = simplify_gen_unary (code, mode, op0, op_mode);
|
| 493 |
|
|
break;
|
| 494 |
|
|
|
| 495 |
|
|
case RTX_BIN_ARITH:
|
| 496 |
|
|
case RTX_COMM_ARITH:
|
| 497 |
|
|
op0 = XEXP (x, 0);
|
| 498 |
|
|
op1 = XEXP (x, 1);
|
| 499 |
|
|
valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
|
| 500 |
|
|
valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
|
| 501 |
|
|
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
| 502 |
|
|
return true;
|
| 503 |
|
|
tem = simplify_gen_binary (code, mode, op0, op1);
|
| 504 |
|
|
break;
|
| 505 |
|
|
|
| 506 |
|
|
case RTX_COMPARE:
|
| 507 |
|
|
case RTX_COMM_COMPARE:
|
| 508 |
|
|
op0 = XEXP (x, 0);
|
| 509 |
|
|
op1 = XEXP (x, 1);
|
| 510 |
|
|
op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
|
| 511 |
|
|
valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
|
| 512 |
|
|
valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
|
| 513 |
|
|
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
| 514 |
|
|
return true;
|
| 515 |
|
|
tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
|
| 516 |
|
|
break;
|
| 517 |
|
|
|
| 518 |
|
|
case RTX_TERNARY:
|
| 519 |
|
|
case RTX_BITFIELD_OPS:
|
| 520 |
|
|
op0 = XEXP (x, 0);
|
| 521 |
|
|
op1 = XEXP (x, 1);
|
| 522 |
|
|
op2 = XEXP (x, 2);
|
| 523 |
|
|
op_mode = GET_MODE (op0);
|
| 524 |
|
|
valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
|
| 525 |
|
|
valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
|
| 526 |
|
|
valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
|
| 527 |
|
|
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
|
| 528 |
|
|
return true;
|
| 529 |
|
|
if (op_mode == VOIDmode)
|
| 530 |
|
|
op_mode = GET_MODE (op0);
|
| 531 |
|
|
tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
|
| 532 |
|
|
break;
|
| 533 |
|
|
|
| 534 |
|
|
case RTX_EXTRA:
|
| 535 |
|
|
/* The only case we try to handle is a SUBREG. */
|
| 536 |
|
|
if (code == SUBREG)
|
| 537 |
|
|
{
|
| 538 |
|
|
op0 = XEXP (x, 0);
|
| 539 |
|
|
valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
|
| 540 |
|
|
if (op0 == XEXP (x, 0))
|
| 541 |
|
|
return true;
|
| 542 |
|
|
tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
|
| 543 |
|
|
SUBREG_BYTE (x));
|
| 544 |
|
|
}
|
| 545 |
|
|
break;
|
| 546 |
|
|
|
| 547 |
|
|
case RTX_OBJ:
|
| 548 |
|
|
if (code == MEM && x != new_rtx)
|
| 549 |
|
|
{
|
| 550 |
|
|
rtx new_op0;
|
| 551 |
|
|
op0 = XEXP (x, 0);
|
| 552 |
|
|
|
| 553 |
|
|
/* There are some addresses that we cannot work on. */
|
| 554 |
|
|
if (!can_simplify_addr (op0))
|
| 555 |
|
|
return true;
|
| 556 |
|
|
|
| 557 |
|
|
op0 = new_op0 = targetm.delegitimize_address (op0);
|
| 558 |
|
|
valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
|
| 559 |
|
|
flags | PR_CAN_APPEAR);
|
| 560 |
|
|
|
| 561 |
|
|
/* Dismiss transformation that we do not want to carry on. */
|
| 562 |
|
|
if (!valid_ops
|
| 563 |
|
|
|| new_op0 == op0
|
| 564 |
|
|
|| !(GET_MODE (new_op0) == GET_MODE (op0)
|
| 565 |
|
|
|| GET_MODE (new_op0) == VOIDmode))
|
| 566 |
|
|
return true;
|
| 567 |
|
|
|
| 568 |
|
|
canonicalize_address (new_op0);
|
| 569 |
|
|
|
| 570 |
|
|
/* Copy propagations are always ok. Otherwise check the costs. */
|
| 571 |
|
|
if (!(REG_P (old_rtx) && REG_P (new_rtx))
|
| 572 |
|
|
&& !should_replace_address (op0, new_op0, GET_MODE (x),
|
| 573 |
|
|
MEM_ADDR_SPACE (x),
|
| 574 |
|
|
flags & PR_OPTIMIZE_FOR_SPEED))
|
| 575 |
|
|
return true;
|
| 576 |
|
|
|
| 577 |
|
|
tem = replace_equiv_address_nv (x, new_op0);
|
| 578 |
|
|
}
|
| 579 |
|
|
|
| 580 |
|
|
else if (code == LO_SUM)
|
| 581 |
|
|
{
|
| 582 |
|
|
op0 = XEXP (x, 0);
|
| 583 |
|
|
op1 = XEXP (x, 1);
|
| 584 |
|
|
|
| 585 |
|
|
/* The only simplification we do attempts to remove references to op0
|
| 586 |
|
|
or make it constant -- in both cases, op0's invalidity will not
|
| 587 |
|
|
make the result invalid. */
|
| 588 |
|
|
propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
|
| 589 |
|
|
valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
|
| 590 |
|
|
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
| 591 |
|
|
return true;
|
| 592 |
|
|
|
| 593 |
|
|
/* (lo_sum (high x) x) -> x */
|
| 594 |
|
|
if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
|
| 595 |
|
|
tem = op1;
|
| 596 |
|
|
else
|
| 597 |
|
|
tem = gen_rtx_LO_SUM (mode, op0, op1);
|
| 598 |
|
|
|
| 599 |
|
|
/* OP1 is likely not a legitimate address, otherwise there would have
|
| 600 |
|
|
been no LO_SUM. We want it to disappear if it is invalid, return
|
| 601 |
|
|
false in that case. */
|
| 602 |
|
|
return memory_address_p (mode, tem);
|
| 603 |
|
|
}
|
| 604 |
|
|
|
| 605 |
|
|
else if (code == REG)
|
| 606 |
|
|
{
|
| 607 |
|
|
if (rtx_equal_p (x, old_rtx))
|
| 608 |
|
|
{
|
| 609 |
|
|
*px = new_rtx;
|
| 610 |
|
|
return can_appear;
|
| 611 |
|
|
}
|
| 612 |
|
|
}
|
| 613 |
|
|
break;
|
| 614 |
|
|
|
| 615 |
|
|
default:
|
| 616 |
|
|
break;
|
| 617 |
|
|
}
|
| 618 |
|
|
|
| 619 |
|
|
/* No change, no trouble. */
|
| 620 |
|
|
if (tem == NULL_RTX)
|
| 621 |
|
|
return true;
|
| 622 |
|
|
|
| 623 |
|
|
*px = tem;
|
| 624 |
|
|
|
| 625 |
|
|
/* The replacement we made so far is valid, if all of the recursive
|
| 626 |
|
|
replacements were valid, or we could simplify everything to
|
| 627 |
|
|
a constant. */
|
| 628 |
|
|
return valid_ops || can_appear || CONSTANT_P (tem);
|
| 629 |
|
|
}
|
| 630 |
|
|
|
| 631 |
|
|
|
| 632 |
|
|
/* for_each_rtx traversal function that returns 1 if BODY points to
|
| 633 |
|
|
a non-constant mem. */
|
| 634 |
|
|
|
| 635 |
|
|
static int
|
| 636 |
|
|
varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
|
| 637 |
|
|
{
|
| 638 |
|
|
rtx x = *body;
|
| 639 |
|
|
return MEM_P (x) && !MEM_READONLY_P (x);
|
| 640 |
|
|
}
|
| 641 |
|
|
|
| 642 |
|
|
|
| 643 |
|
|
/* Replace all occurrences of OLD in X with NEW and try to simplify the
|
| 644 |
|
|
resulting expression (in mode MODE). Return a new expression if it is
|
| 645 |
|
|
a constant, otherwise X.
|
| 646 |
|
|
|
| 647 |
|
|
Simplifications where occurrences of NEW collapse to a constant are always
|
| 648 |
|
|
accepted. All simplifications are accepted if NEW is a pseudo too.
|
| 649 |
|
|
Otherwise, we accept simplifications that have a lower or equal cost. */
|
| 650 |
|
|
|
| 651 |
|
|
static rtx
|
| 652 |
|
|
propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
|
| 653 |
|
|
bool speed)
|
| 654 |
|
|
{
|
| 655 |
|
|
rtx tem;
|
| 656 |
|
|
bool collapsed;
|
| 657 |
|
|
int flags;
|
| 658 |
|
|
|
| 659 |
|
|
if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
|
| 660 |
|
|
return NULL_RTX;
|
| 661 |
|
|
|
| 662 |
|
|
flags = 0;
|
| 663 |
|
|
if (REG_P (new_rtx) || CONSTANT_P (new_rtx))
|
| 664 |
|
|
flags |= PR_CAN_APPEAR;
|
| 665 |
|
|
if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
|
| 666 |
|
|
flags |= PR_HANDLE_MEM;
|
| 667 |
|
|
|
| 668 |
|
|
if (speed)
|
| 669 |
|
|
flags |= PR_OPTIMIZE_FOR_SPEED;
|
| 670 |
|
|
|
| 671 |
|
|
tem = x;
|
| 672 |
|
|
collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
|
| 673 |
|
|
if (tem == x || !collapsed)
|
| 674 |
|
|
return NULL_RTX;
|
| 675 |
|
|
|
| 676 |
|
|
/* gen_lowpart_common will not be able to process VOIDmode entities other
|
| 677 |
|
|
than CONST_INTs. */
|
| 678 |
|
|
if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
|
| 679 |
|
|
return NULL_RTX;
|
| 680 |
|
|
|
| 681 |
|
|
if (GET_MODE (tem) == VOIDmode)
|
| 682 |
|
|
tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
|
| 683 |
|
|
else
|
| 684 |
|
|
gcc_assert (GET_MODE (tem) == mode);
|
| 685 |
|
|
|
| 686 |
|
|
return tem;
|
| 687 |
|
|
}
|
| 688 |
|
|
|
| 689 |
|
|
|
| 690 |
|
|
|
| 691 |
|
|
|
| 692 |
|
|
/* Return true if the register from reference REF is killed
|
| 693 |
|
|
between FROM to (but not including) TO. */
|
| 694 |
|
|
|
| 695 |
|
|
static bool
|
| 696 |
|
|
local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
|
| 697 |
|
|
{
|
| 698 |
|
|
rtx insn;
|
| 699 |
|
|
|
| 700 |
|
|
for (insn = from; insn != to; insn = NEXT_INSN (insn))
|
| 701 |
|
|
{
|
| 702 |
|
|
df_ref *def_rec;
|
| 703 |
|
|
if (!INSN_P (insn))
|
| 704 |
|
|
continue;
|
| 705 |
|
|
|
| 706 |
|
|
for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
|
| 707 |
|
|
{
|
| 708 |
|
|
df_ref def = *def_rec;
|
| 709 |
|
|
if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
|
| 710 |
|
|
return true;
|
| 711 |
|
|
}
|
| 712 |
|
|
}
|
| 713 |
|
|
return false;
|
| 714 |
|
|
}
|
| 715 |
|
|
|
| 716 |
|
|
|
| 717 |
|
|
/* Check if the given DEF is available in INSN. This would require full
|
| 718 |
|
|
computation of available expressions; we check only restricted conditions:
|
| 719 |
|
|
- if DEF is the sole definition of its register, go ahead;
|
| 720 |
|
|
- in the same basic block, we check for no definitions killing the
|
| 721 |
|
|
definition of DEF_INSN;
|
| 722 |
|
|
- if USE's basic block has DEF's basic block as the sole predecessor,
|
| 723 |
|
|
we check if the definition is killed after DEF_INSN or before
|
| 724 |
|
|
TARGET_INSN insn, in their respective basic blocks. */
|
| 725 |
|
|
static bool
|
| 726 |
|
|
use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
|
| 727 |
|
|
{
|
| 728 |
|
|
basic_block def_bb = BLOCK_FOR_INSN (def_insn);
|
| 729 |
|
|
basic_block target_bb = BLOCK_FOR_INSN (target_insn);
|
| 730 |
|
|
int regno;
|
| 731 |
|
|
df_ref def;
|
| 732 |
|
|
|
| 733 |
|
|
/* We used to have a def reaching a use that is _before_ the def,
|
| 734 |
|
|
with the def not dominating the use even though the use and def
|
| 735 |
|
|
are in the same basic block, when a register may be used
|
| 736 |
|
|
uninitialized in a loop. This should not happen anymore since
|
| 737 |
|
|
we do not use reaching definitions, but still we test for such
|
| 738 |
|
|
cases and assume that DEF is not available. */
|
| 739 |
|
|
if (def_bb == target_bb
|
| 740 |
|
|
? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
|
| 741 |
|
|
: !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
|
| 742 |
|
|
return true;
|
| 743 |
|
|
|
| 744 |
|
|
/* Check if the reg in USE has only one definition. We already
|
| 745 |
|
|
know that this definition reaches use, or we wouldn't be here.
|
| 746 |
|
|
However, this is invalid for hard registers because if they are
|
| 747 |
|
|
live at the beginning of the function it does not mean that we
|
| 748 |
|
|
have an uninitialized access. */
|
| 749 |
|
|
regno = DF_REF_REGNO (use);
|
| 750 |
|
|
def = DF_REG_DEF_CHAIN (regno);
|
| 751 |
|
|
if (def
|
| 752 |
|
|
&& DF_REF_NEXT_REG (def) == NULL
|
| 753 |
|
|
&& regno >= FIRST_PSEUDO_REGISTER)
|
| 754 |
|
|
return false;
|
| 755 |
|
|
|
| 756 |
|
|
/* Check locally if we are in the same basic block. */
|
| 757 |
|
|
if (def_bb == target_bb)
|
| 758 |
|
|
return local_ref_killed_between_p (use, def_insn, target_insn);
|
| 759 |
|
|
|
| 760 |
|
|
/* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */
|
| 761 |
|
|
if (single_pred_p (target_bb)
|
| 762 |
|
|
&& single_pred (target_bb) == def_bb)
|
| 763 |
|
|
{
|
| 764 |
|
|
df_ref x;
|
| 765 |
|
|
|
| 766 |
|
|
/* See if USE is killed between DEF_INSN and the last insn in the
|
| 767 |
|
|
basic block containing DEF_INSN. */
|
| 768 |
|
|
x = df_bb_regno_last_def_find (def_bb, regno);
|
| 769 |
|
|
if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
|
| 770 |
|
|
return true;
|
| 771 |
|
|
|
| 772 |
|
|
/* See if USE is killed between TARGET_INSN and the first insn in the
|
| 773 |
|
|
basic block containing TARGET_INSN. */
|
| 774 |
|
|
x = df_bb_regno_first_def_find (target_bb, regno);
|
| 775 |
|
|
if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
|
| 776 |
|
|
return true;
|
| 777 |
|
|
|
| 778 |
|
|
return false;
|
| 779 |
|
|
}
|
| 780 |
|
|
|
| 781 |
|
|
/* Otherwise assume the worst case. */
|
| 782 |
|
|
return true;
|
| 783 |
|
|
}
|
| 784 |
|
|
|
| 785 |
|
|
|
| 786 |
|
|
/* Check if all uses in DEF_INSN can be used in TARGET_INSN. This
|
| 787 |
|
|
would require full computation of available expressions;
|
| 788 |
|
|
we check only restricted conditions, see use_killed_between. */
|
| 789 |
|
|
static bool
|
| 790 |
|
|
all_uses_available_at (rtx def_insn, rtx target_insn)
|
| 791 |
|
|
{
|
| 792 |
|
|
df_ref *use_rec;
|
| 793 |
|
|
struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
|
| 794 |
|
|
rtx def_set = single_set (def_insn);
|
| 795 |
|
|
|
| 796 |
|
|
gcc_assert (def_set);
|
| 797 |
|
|
|
| 798 |
|
|
/* If target_insn comes right after def_insn, which is very common
|
| 799 |
|
|
for addresses, we can use a quicker test. */
|
| 800 |
|
|
if (NEXT_INSN (def_insn) == target_insn
|
| 801 |
|
|
&& REG_P (SET_DEST (def_set)))
|
| 802 |
|
|
{
|
| 803 |
|
|
rtx def_reg = SET_DEST (def_set);
|
| 804 |
|
|
|
| 805 |
|
|
/* If the insn uses the reg that it defines, the substitution is
|
| 806 |
|
|
invalid. */
|
| 807 |
|
|
for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
|
| 808 |
|
|
{
|
| 809 |
|
|
df_ref use = *use_rec;
|
| 810 |
|
|
if (rtx_equal_p (DF_REF_REG (use), def_reg))
|
| 811 |
|
|
return false;
|
| 812 |
|
|
}
|
| 813 |
|
|
for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
|
| 814 |
|
|
{
|
| 815 |
|
|
df_ref use = *use_rec;
|
| 816 |
|
|
if (rtx_equal_p (DF_REF_REG (use), def_reg))
|
| 817 |
|
|
return false;
|
| 818 |
|
|
}
|
| 819 |
|
|
}
|
| 820 |
|
|
else
|
| 821 |
|
|
{
|
| 822 |
|
|
rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
|
| 823 |
|
|
|
| 824 |
|
|
/* Look at all the uses of DEF_INSN, and see if they are not
|
| 825 |
|
|
killed between DEF_INSN and TARGET_INSN. */
|
| 826 |
|
|
for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
|
| 827 |
|
|
{
|
| 828 |
|
|
df_ref use = *use_rec;
|
| 829 |
|
|
if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
|
| 830 |
|
|
return false;
|
| 831 |
|
|
if (use_killed_between (use, def_insn, target_insn))
|
| 832 |
|
|
return false;
|
| 833 |
|
|
}
|
| 834 |
|
|
for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
|
| 835 |
|
|
{
|
| 836 |
|
|
df_ref use = *use_rec;
|
| 837 |
|
|
if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
|
| 838 |
|
|
return false;
|
| 839 |
|
|
if (use_killed_between (use, def_insn, target_insn))
|
| 840 |
|
|
return false;
|
| 841 |
|
|
}
|
| 842 |
|
|
}
|
| 843 |
|
|
|
| 844 |
|
|
return true;
|
| 845 |
|
|
}
|
| 846 |
|
|
|
| 847 |
|
|
|
| 848 |
|
|
struct find_occurrence_data
|
| 849 |
|
|
{
|
| 850 |
|
|
rtx find;
|
| 851 |
|
|
rtx *retval;
|
| 852 |
|
|
};
|
| 853 |
|
|
|
| 854 |
|
|
/* Callback for for_each_rtx, used in find_occurrence.
|
| 855 |
|
|
See if PX is the rtx we have to find. Return 1 to stop for_each_rtx
|
| 856 |
|
|
if successful, or 0 to continue traversing otherwise. */
|
| 857 |
|
|
|
| 858 |
|
|
static int
|
| 859 |
|
|
find_occurrence_callback (rtx *px, void *data)
|
| 860 |
|
|
{
|
| 861 |
|
|
struct find_occurrence_data *fod = (struct find_occurrence_data *) data;
|
| 862 |
|
|
rtx x = *px;
|
| 863 |
|
|
rtx find = fod->find;
|
| 864 |
|
|
|
| 865 |
|
|
if (x == find)
|
| 866 |
|
|
{
|
| 867 |
|
|
fod->retval = px;
|
| 868 |
|
|
return 1;
|
| 869 |
|
|
}
|
| 870 |
|
|
|
| 871 |
|
|
return 0;
|
| 872 |
|
|
}
|
| 873 |
|
|
|
| 874 |
|
|
/* Return a pointer to one of the occurrences of register FIND in *PX. */
|
| 875 |
|
|
|
| 876 |
|
|
static rtx *
|
| 877 |
|
|
find_occurrence (rtx *px, rtx find)
|
| 878 |
|
|
{
|
| 879 |
|
|
struct find_occurrence_data data;
|
| 880 |
|
|
|
| 881 |
|
|
gcc_assert (REG_P (find)
|
| 882 |
|
|
|| (GET_CODE (find) == SUBREG
|
| 883 |
|
|
&& REG_P (SUBREG_REG (find))));
|
| 884 |
|
|
|
| 885 |
|
|
data.find = find;
|
| 886 |
|
|
data.retval = NULL;
|
| 887 |
|
|
for_each_rtx (px, find_occurrence_callback, &data);
|
| 888 |
|
|
return data.retval;
|
| 889 |
|
|
}
|
| 890 |
|
|
|
| 891 |
|
|
|
| 892 |
|
|
/* Inside INSN, the expression rooted at *LOC has been changed, moving some
|
| 893 |
|
|
uses from USE_VEC. Find those that are present, and create new items
|
| 894 |
|
|
in the data flow object of the pass. Mark any new uses as having the
|
| 895 |
|
|
given TYPE. */
|
| 896 |
|
|
static void
|
| 897 |
|
|
update_df (rtx insn, rtx *loc, df_ref *use_rec, enum df_ref_type type,
|
| 898 |
|
|
int new_flags)
|
| 899 |
|
|
{
|
| 900 |
|
|
bool changed = false;
|
| 901 |
|
|
|
| 902 |
|
|
/* Add a use for the registers that were propagated. */
|
| 903 |
|
|
while (*use_rec)
|
| 904 |
|
|
{
|
| 905 |
|
|
df_ref use = *use_rec;
|
| 906 |
|
|
df_ref orig_use = use, new_use;
|
| 907 |
|
|
int width = -1;
|
| 908 |
|
|
int offset = -1;
|
| 909 |
|
|
enum machine_mode mode = VOIDmode;
|
| 910 |
|
|
rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use));
|
| 911 |
|
|
use_rec++;
|
| 912 |
|
|
|
| 913 |
|
|
if (!new_loc)
|
| 914 |
|
|
continue;
|
| 915 |
|
|
|
| 916 |
|
|
if (DF_REF_FLAGS_IS_SET (orig_use, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT))
|
| 917 |
|
|
{
|
| 918 |
|
|
width = DF_REF_EXTRACT_WIDTH (orig_use);
|
| 919 |
|
|
offset = DF_REF_EXTRACT_OFFSET (orig_use);
|
| 920 |
|
|
mode = DF_REF_EXTRACT_MODE (orig_use);
|
| 921 |
|
|
}
|
| 922 |
|
|
|
| 923 |
|
|
/* Add a new insn use. Use the original type, because it says if the
|
| 924 |
|
|
use was within a MEM. */
|
| 925 |
|
|
new_use = df_ref_create (DF_REF_REG (orig_use), new_loc,
|
| 926 |
|
|
insn, BLOCK_FOR_INSN (insn),
|
| 927 |
|
|
type, DF_REF_FLAGS (orig_use) | new_flags,
|
| 928 |
|
|
width, offset, mode);
|
| 929 |
|
|
|
| 930 |
|
|
/* Set up the use-def chain. */
|
| 931 |
|
|
gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
|
| 932 |
|
|
VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
|
| 933 |
|
|
changed = true;
|
| 934 |
|
|
}
|
| 935 |
|
|
if (changed)
|
| 936 |
|
|
df_insn_rescan (insn);
|
| 937 |
|
|
}
|
| 938 |
|
|
|
| 939 |
|
|
|
| 940 |
|
|
/* Try substituting NEW into LOC, which originated from forward propagation
|
| 941 |
|
|
of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are
|
| 942 |
|
|
substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
|
| 943 |
|
|
new insn is not recognized. Return whether the substitution was
|
| 944 |
|
|
performed. */
|
| 945 |
|
|
|
| 946 |
|
|
static bool
|
| 947 |
|
|
try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
|
| 948 |
|
|
{
|
| 949 |
|
|
rtx insn = DF_REF_INSN (use);
|
| 950 |
|
|
enum df_ref_type type = DF_REF_TYPE (use);
|
| 951 |
|
|
int flags = DF_REF_FLAGS (use);
|
| 952 |
|
|
rtx set = single_set (insn);
|
| 953 |
|
|
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
|
| 954 |
|
|
int old_cost = 0;
|
| 955 |
|
|
bool ok;
|
| 956 |
|
|
|
| 957 |
|
|
/* forward_propagate_subreg may be operating on an instruction with
|
| 958 |
|
|
multiple sets. If so, assume the cost of the new instruction is
|
| 959 |
|
|
not greater than the old one. */
|
| 960 |
|
|
if (set)
|
| 961 |
|
|
old_cost = rtx_cost (SET_SRC (set), SET, speed);
|
| 962 |
|
|
if (dump_file)
|
| 963 |
|
|
{
|
| 964 |
|
|
fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
|
| 965 |
|
|
print_inline_rtx (dump_file, *loc, 2);
|
| 966 |
|
|
fprintf (dump_file, "\n with ");
|
| 967 |
|
|
print_inline_rtx (dump_file, new_rtx, 2);
|
| 968 |
|
|
fprintf (dump_file, "\n");
|
| 969 |
|
|
}
|
| 970 |
|
|
|
| 971 |
|
|
validate_unshare_change (insn, loc, new_rtx, true);
|
| 972 |
|
|
if (!verify_changes (0))
|
| 973 |
|
|
{
|
| 974 |
|
|
if (dump_file)
|
| 975 |
|
|
fprintf (dump_file, "Changes to insn %d not recognized\n",
|
| 976 |
|
|
INSN_UID (insn));
|
| 977 |
|
|
ok = false;
|
| 978 |
|
|
}
|
| 979 |
|
|
|
| 980 |
|
|
else if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
| 981 |
|
|
&& set
|
| 982 |
|
|
&& rtx_cost (SET_SRC (set), SET, speed) > old_cost)
|
| 983 |
|
|
{
|
| 984 |
|
|
if (dump_file)
|
| 985 |
|
|
fprintf (dump_file, "Changes to insn %d not profitable\n",
|
| 986 |
|
|
INSN_UID (insn));
|
| 987 |
|
|
ok = false;
|
| 988 |
|
|
}
|
| 989 |
|
|
|
| 990 |
|
|
else
|
| 991 |
|
|
{
|
| 992 |
|
|
if (dump_file)
|
| 993 |
|
|
fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
|
| 994 |
|
|
ok = true;
|
| 995 |
|
|
}
|
| 996 |
|
|
|
| 997 |
|
|
if (ok)
|
| 998 |
|
|
{
|
| 999 |
|
|
confirm_change_group ();
|
| 1000 |
|
|
num_changes++;
|
| 1001 |
|
|
|
| 1002 |
|
|
df_ref_remove (use);
|
| 1003 |
|
|
if (!CONSTANT_P (new_rtx))
|
| 1004 |
|
|
{
|
| 1005 |
|
|
struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
|
| 1006 |
|
|
update_df (insn, loc, DF_INSN_INFO_USES (insn_info), type, flags);
|
| 1007 |
|
|
update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), type, flags);
|
| 1008 |
|
|
}
|
| 1009 |
|
|
}
|
| 1010 |
|
|
else
|
| 1011 |
|
|
{
|
| 1012 |
|
|
cancel_changes (0);
|
| 1013 |
|
|
|
| 1014 |
|
|
/* Can also record a simplified value in a REG_EQUAL note,
|
| 1015 |
|
|
making a new one if one does not already exist. */
|
| 1016 |
|
|
if (set_reg_equal)
|
| 1017 |
|
|
{
|
| 1018 |
|
|
if (dump_file)
|
| 1019 |
|
|
fprintf (dump_file, " Setting REG_EQUAL note\n");
|
| 1020 |
|
|
|
| 1021 |
|
|
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
|
| 1022 |
|
|
|
| 1023 |
|
|
/* ??? Is this still necessary if we add the note through
|
| 1024 |
|
|
set_unique_reg_note? */
|
| 1025 |
|
|
if (!CONSTANT_P (new_rtx))
|
| 1026 |
|
|
{
|
| 1027 |
|
|
struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
|
| 1028 |
|
|
update_df (insn, loc, DF_INSN_INFO_USES (insn_info),
|
| 1029 |
|
|
type, DF_REF_IN_NOTE);
|
| 1030 |
|
|
update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info),
|
| 1031 |
|
|
type, DF_REF_IN_NOTE);
|
| 1032 |
|
|
}
|
| 1033 |
|
|
}
|
| 1034 |
|
|
}
|
| 1035 |
|
|
|
| 1036 |
|
|
return ok;
|
| 1037 |
|
|
}
|
| 1038 |
|
|
|
| 1039 |
|
|
/* For the given single_set INSN, containing SRC known to be a
|
| 1040 |
|
|
ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
|
| 1041 |
|
|
is redundant due to the register being set by a LOAD_EXTEND_OP
|
| 1042 |
|
|
load from memory. */
|
| 1043 |
|
|
|
| 1044 |
|
|
static bool
|
| 1045 |
|
|
free_load_extend (rtx src, rtx insn)
|
| 1046 |
|
|
{
|
| 1047 |
|
|
rtx reg;
|
| 1048 |
|
|
df_ref *use_vec;
|
| 1049 |
|
|
df_ref use = 0, def;
|
| 1050 |
|
|
|
| 1051 |
|
|
reg = XEXP (src, 0);
|
| 1052 |
|
|
#ifdef LOAD_EXTEND_OP
|
| 1053 |
|
|
if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
|
| 1054 |
|
|
#endif
|
| 1055 |
|
|
return false;
|
| 1056 |
|
|
|
| 1057 |
|
|
for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
|
| 1058 |
|
|
{
|
| 1059 |
|
|
use = *use_vec;
|
| 1060 |
|
|
|
| 1061 |
|
|
if (!DF_REF_IS_ARTIFICIAL (use)
|
| 1062 |
|
|
&& DF_REF_TYPE (use) == DF_REF_REG_USE
|
| 1063 |
|
|
&& DF_REF_REG (use) == reg)
|
| 1064 |
|
|
break;
|
| 1065 |
|
|
}
|
| 1066 |
|
|
if (!use)
|
| 1067 |
|
|
return false;
|
| 1068 |
|
|
|
| 1069 |
|
|
def = get_def_for_use (use);
|
| 1070 |
|
|
if (!def)
|
| 1071 |
|
|
return false;
|
| 1072 |
|
|
|
| 1073 |
|
|
if (DF_REF_IS_ARTIFICIAL (def))
|
| 1074 |
|
|
return false;
|
| 1075 |
|
|
|
| 1076 |
|
|
if (NONJUMP_INSN_P (DF_REF_INSN (def)))
|
| 1077 |
|
|
{
|
| 1078 |
|
|
rtx patt = PATTERN (DF_REF_INSN (def));
|
| 1079 |
|
|
|
| 1080 |
|
|
if (GET_CODE (patt) == SET
|
| 1081 |
|
|
&& GET_CODE (SET_SRC (patt)) == MEM
|
| 1082 |
|
|
&& rtx_equal_p (SET_DEST (patt), reg))
|
| 1083 |
|
|
return true;
|
| 1084 |
|
|
}
|
| 1085 |
|
|
return false;
|
| 1086 |
|
|
}
|
| 1087 |
|
|
|
| 1088 |
|
|
/* If USE is a subreg, see if it can be replaced by a pseudo. */
|
| 1089 |
|
|
|
| 1090 |
|
|
static bool
|
| 1091 |
|
|
forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
|
| 1092 |
|
|
{
|
| 1093 |
|
|
rtx use_reg = DF_REF_REG (use);
|
| 1094 |
|
|
rtx use_insn, src;
|
| 1095 |
|
|
|
| 1096 |
|
|
/* Only consider subregs... */
|
| 1097 |
|
|
enum machine_mode use_mode = GET_MODE (use_reg);
|
| 1098 |
|
|
if (GET_CODE (use_reg) != SUBREG
|
| 1099 |
|
|
|| !REG_P (SET_DEST (def_set)))
|
| 1100 |
|
|
return false;
|
| 1101 |
|
|
|
| 1102 |
|
|
/* If this is a paradoxical SUBREG... */
|
| 1103 |
|
|
if (GET_MODE_SIZE (use_mode)
|
| 1104 |
|
|
> GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
|
| 1105 |
|
|
{
|
| 1106 |
|
|
/* If this is a paradoxical SUBREG, we have no idea what value the
|
| 1107 |
|
|
extra bits would have. However, if the operand is equivalent to
|
| 1108 |
|
|
a SUBREG whose operand is the same as our mode, and all the modes
|
| 1109 |
|
|
are within a word, we can just use the inner operand because
|
| 1110 |
|
|
these SUBREGs just say how to treat the register. */
|
| 1111 |
|
|
use_insn = DF_REF_INSN (use);
|
| 1112 |
|
|
src = SET_SRC (def_set);
|
| 1113 |
|
|
if (GET_CODE (src) == SUBREG
|
| 1114 |
|
|
&& REG_P (SUBREG_REG (src))
|
| 1115 |
|
|
&& GET_MODE (SUBREG_REG (src)) == use_mode
|
| 1116 |
|
|
&& subreg_lowpart_p (src)
|
| 1117 |
|
|
&& all_uses_available_at (def_insn, use_insn))
|
| 1118 |
|
|
return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
|
| 1119 |
|
|
def_insn, false);
|
| 1120 |
|
|
}
|
| 1121 |
|
|
|
| 1122 |
|
|
/* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
|
| 1123 |
|
|
is the low part of the reg being extended then just use the inner
|
| 1124 |
|
|
operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
|
| 1125 |
|
|
be removed due to it matching a LOAD_EXTEND_OP load from memory. */
|
| 1126 |
|
|
else if (subreg_lowpart_p (use_reg))
|
| 1127 |
|
|
{
|
| 1128 |
|
|
use_insn = DF_REF_INSN (use);
|
| 1129 |
|
|
src = SET_SRC (def_set);
|
| 1130 |
|
|
if ((GET_CODE (src) == ZERO_EXTEND
|
| 1131 |
|
|
|| GET_CODE (src) == SIGN_EXTEND)
|
| 1132 |
|
|
&& REG_P (XEXP (src, 0))
|
| 1133 |
|
|
&& GET_MODE (XEXP (src, 0)) == use_mode
|
| 1134 |
|
|
&& !free_load_extend (src, def_insn)
|
| 1135 |
|
|
&& all_uses_available_at (def_insn, use_insn))
|
| 1136 |
|
|
return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
|
| 1137 |
|
|
def_insn, false);
|
| 1138 |
|
|
}
|
| 1139 |
|
|
|
| 1140 |
|
|
return false;
|
| 1141 |
|
|
}
|
| 1142 |
|
|
|
| 1143 |
|
|
/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
|
| 1144 |
|
|
|
| 1145 |
|
|
static bool
|
| 1146 |
|
|
forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
|
| 1147 |
|
|
{
|
| 1148 |
|
|
rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
|
| 1149 |
|
|
int speed_p, i;
|
| 1150 |
|
|
df_ref *use_vec;
|
| 1151 |
|
|
|
| 1152 |
|
|
gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
|
| 1153 |
|
|
|
| 1154 |
|
|
src = SET_SRC (def_set);
|
| 1155 |
|
|
use_pat = PATTERN (use_insn);
|
| 1156 |
|
|
|
| 1157 |
|
|
/* In __asm don't replace if src might need more registers than
|
| 1158 |
|
|
reg, as that could increase register pressure on the __asm. */
|
| 1159 |
|
|
use_vec = DF_INSN_USES (def_insn);
|
| 1160 |
|
|
if (use_vec[0] && use_vec[1])
|
| 1161 |
|
|
return false;
|
| 1162 |
|
|
|
| 1163 |
|
|
speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
|
| 1164 |
|
|
asm_operands = NULL_RTX;
|
| 1165 |
|
|
switch (GET_CODE (use_pat))
|
| 1166 |
|
|
{
|
| 1167 |
|
|
case ASM_OPERANDS:
|
| 1168 |
|
|
asm_operands = use_pat;
|
| 1169 |
|
|
break;
|
| 1170 |
|
|
case SET:
|
| 1171 |
|
|
if (MEM_P (SET_DEST (use_pat)))
|
| 1172 |
|
|
{
|
| 1173 |
|
|
loc = &SET_DEST (use_pat);
|
| 1174 |
|
|
new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
|
| 1175 |
|
|
if (new_rtx)
|
| 1176 |
|
|
validate_unshare_change (use_insn, loc, new_rtx, true);
|
| 1177 |
|
|
}
|
| 1178 |
|
|
asm_operands = SET_SRC (use_pat);
|
| 1179 |
|
|
break;
|
| 1180 |
|
|
case PARALLEL:
|
| 1181 |
|
|
for (i = 0; i < XVECLEN (use_pat, 0); i++)
|
| 1182 |
|
|
if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
|
| 1183 |
|
|
{
|
| 1184 |
|
|
if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
|
| 1185 |
|
|
{
|
| 1186 |
|
|
loc = &SET_DEST (XVECEXP (use_pat, 0, i));
|
| 1187 |
|
|
new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
|
| 1188 |
|
|
src, speed_p);
|
| 1189 |
|
|
if (new_rtx)
|
| 1190 |
|
|
validate_unshare_change (use_insn, loc, new_rtx, true);
|
| 1191 |
|
|
}
|
| 1192 |
|
|
asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
|
| 1193 |
|
|
}
|
| 1194 |
|
|
else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
|
| 1195 |
|
|
asm_operands = XVECEXP (use_pat, 0, i);
|
| 1196 |
|
|
break;
|
| 1197 |
|
|
default:
|
| 1198 |
|
|
gcc_unreachable ();
|
| 1199 |
|
|
}
|
| 1200 |
|
|
|
| 1201 |
|
|
gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
|
| 1202 |
|
|
for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
|
| 1203 |
|
|
{
|
| 1204 |
|
|
loc = &ASM_OPERANDS_INPUT (asm_operands, i);
|
| 1205 |
|
|
new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
|
| 1206 |
|
|
if (new_rtx)
|
| 1207 |
|
|
validate_unshare_change (use_insn, loc, new_rtx, true);
|
| 1208 |
|
|
}
|
| 1209 |
|
|
|
| 1210 |
|
|
if (num_changes_pending () == 0 || !apply_change_group ())
|
| 1211 |
|
|
return false;
|
| 1212 |
|
|
|
| 1213 |
|
|
num_changes++;
|
| 1214 |
|
|
return true;
|
| 1215 |
|
|
}
|
| 1216 |
|
|
|
| 1217 |
|
|
/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
|
| 1218 |
|
|
result. */
|
| 1219 |
|
|
|
| 1220 |
|
|
static bool
|
| 1221 |
|
|
forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
|
| 1222 |
|
|
{
|
| 1223 |
|
|
rtx use_insn = DF_REF_INSN (use);
|
| 1224 |
|
|
rtx use_set = single_set (use_insn);
|
| 1225 |
|
|
rtx src, reg, new_rtx, *loc;
|
| 1226 |
|
|
bool set_reg_equal;
|
| 1227 |
|
|
enum machine_mode mode;
|
| 1228 |
|
|
int asm_use = -1;
|
| 1229 |
|
|
|
| 1230 |
|
|
if (INSN_CODE (use_insn) < 0)
|
| 1231 |
|
|
asm_use = asm_noperands (PATTERN (use_insn));
|
| 1232 |
|
|
|
| 1233 |
|
|
if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
|
| 1234 |
|
|
return false;
|
| 1235 |
|
|
|
| 1236 |
|
|
/* Do not propagate into PC, CC0, etc. */
|
| 1237 |
|
|
if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
|
| 1238 |
|
|
return false;
|
| 1239 |
|
|
|
| 1240 |
|
|
/* If def and use are subreg, check if they match. */
|
| 1241 |
|
|
reg = DF_REF_REG (use);
|
| 1242 |
|
|
if (GET_CODE (reg) == SUBREG
|
| 1243 |
|
|
&& GET_CODE (SET_DEST (def_set)) == SUBREG
|
| 1244 |
|
|
&& (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
|
| 1245 |
|
|
|| GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
|
| 1246 |
|
|
return false;
|
| 1247 |
|
|
|
| 1248 |
|
|
/* Check if the def had a subreg, but the use has the whole reg. */
|
| 1249 |
|
|
if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
|
| 1250 |
|
|
return false;
|
| 1251 |
|
|
|
| 1252 |
|
|
/* Check if the use has a subreg, but the def had the whole reg. Unlike the
|
| 1253 |
|
|
previous case, the optimization is possible and often useful indeed. */
|
| 1254 |
|
|
if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
|
| 1255 |
|
|
reg = SUBREG_REG (reg);
|
| 1256 |
|
|
|
| 1257 |
|
|
/* Check if the substitution is valid (last, because it's the most
|
| 1258 |
|
|
expensive check!). */
|
| 1259 |
|
|
src = SET_SRC (def_set);
|
| 1260 |
|
|
if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
|
| 1261 |
|
|
return false;
|
| 1262 |
|
|
|
| 1263 |
|
|
/* Check if the def is loading something from the constant pool; in this
|
| 1264 |
|
|
case we would undo optimization such as compress_float_constant.
|
| 1265 |
|
|
Still, we can set a REG_EQUAL note. */
|
| 1266 |
|
|
if (MEM_P (src) && MEM_READONLY_P (src))
|
| 1267 |
|
|
{
|
| 1268 |
|
|
rtx x = avoid_constant_pool_reference (src);
|
| 1269 |
|
|
if (x != src && use_set)
|
| 1270 |
|
|
{
|
| 1271 |
|
|
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
| 1272 |
|
|
rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
|
| 1273 |
|
|
rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
|
| 1274 |
|
|
if (old_rtx != new_rtx)
|
| 1275 |
|
|
set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
|
| 1276 |
|
|
}
|
| 1277 |
|
|
return false;
|
| 1278 |
|
|
}
|
| 1279 |
|
|
|
| 1280 |
|
|
if (asm_use >= 0)
|
| 1281 |
|
|
return forward_propagate_asm (use, def_insn, def_set, reg);
|
| 1282 |
|
|
|
| 1283 |
|
|
/* Else try simplifying. */
|
| 1284 |
|
|
|
| 1285 |
|
|
if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
|
| 1286 |
|
|
{
|
| 1287 |
|
|
loc = &SET_DEST (use_set);
|
| 1288 |
|
|
set_reg_equal = false;
|
| 1289 |
|
|
}
|
| 1290 |
|
|
else if (!use_set)
|
| 1291 |
|
|
{
|
| 1292 |
|
|
loc = &INSN_VAR_LOCATION_LOC (use_insn);
|
| 1293 |
|
|
set_reg_equal = false;
|
| 1294 |
|
|
}
|
| 1295 |
|
|
else
|
| 1296 |
|
|
{
|
| 1297 |
|
|
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
| 1298 |
|
|
if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
|
| 1299 |
|
|
loc = &XEXP (note, 0);
|
| 1300 |
|
|
else
|
| 1301 |
|
|
loc = &SET_SRC (use_set);
|
| 1302 |
|
|
|
| 1303 |
|
|
/* Do not replace an existing REG_EQUAL note if the insn is not
|
| 1304 |
|
|
recognized. Either we're already replacing in the note, or
|
| 1305 |
|
|
we'll separately try plugging the definition in the note and
|
| 1306 |
|
|
simplifying. */
|
| 1307 |
|
|
set_reg_equal = (note == NULL_RTX);
|
| 1308 |
|
|
}
|
| 1309 |
|
|
|
| 1310 |
|
|
if (GET_MODE (*loc) == VOIDmode)
|
| 1311 |
|
|
mode = GET_MODE (SET_DEST (use_set));
|
| 1312 |
|
|
else
|
| 1313 |
|
|
mode = GET_MODE (*loc);
|
| 1314 |
|
|
|
| 1315 |
|
|
new_rtx = propagate_rtx (*loc, mode, reg, src,
|
| 1316 |
|
|
optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
|
| 1317 |
|
|
|
| 1318 |
|
|
if (!new_rtx)
|
| 1319 |
|
|
return false;
|
| 1320 |
|
|
|
| 1321 |
|
|
return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
|
| 1322 |
|
|
}
|
| 1323 |
|
|
|
| 1324 |
|
|
|
| 1325 |
|
|
/* Given a use USE of an insn, if it has a single reaching
|
| 1326 |
|
|
definition, try to forward propagate it into that insn. */
|
| 1327 |
|
|
|
| 1328 |
|
|
static void
|
| 1329 |
|
|
forward_propagate_into (df_ref use)
|
| 1330 |
|
|
{
|
| 1331 |
|
|
df_ref def;
|
| 1332 |
|
|
rtx def_insn, def_set, use_insn;
|
| 1333 |
|
|
rtx parent;
|
| 1334 |
|
|
|
| 1335 |
|
|
if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
|
| 1336 |
|
|
return;
|
| 1337 |
|
|
if (DF_REF_IS_ARTIFICIAL (use))
|
| 1338 |
|
|
return;
|
| 1339 |
|
|
|
| 1340 |
|
|
/* Only consider uses that have a single definition. */
|
| 1341 |
|
|
def = get_def_for_use (use);
|
| 1342 |
|
|
if (!def)
|
| 1343 |
|
|
return;
|
| 1344 |
|
|
if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
|
| 1345 |
|
|
return;
|
| 1346 |
|
|
if (DF_REF_IS_ARTIFICIAL (def))
|
| 1347 |
|
|
return;
|
| 1348 |
|
|
|
| 1349 |
|
|
/* Do not propagate loop invariant definitions inside the loop. */
|
| 1350 |
|
|
if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
|
| 1351 |
|
|
return;
|
| 1352 |
|
|
|
| 1353 |
|
|
/* Check if the use is still present in the insn! */
|
| 1354 |
|
|
use_insn = DF_REF_INSN (use);
|
| 1355 |
|
|
if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
|
| 1356 |
|
|
parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
| 1357 |
|
|
else
|
| 1358 |
|
|
parent = PATTERN (use_insn);
|
| 1359 |
|
|
|
| 1360 |
|
|
if (!reg_mentioned_p (DF_REF_REG (use), parent))
|
| 1361 |
|
|
return;
|
| 1362 |
|
|
|
| 1363 |
|
|
def_insn = DF_REF_INSN (def);
|
| 1364 |
|
|
if (multiple_sets (def_insn))
|
| 1365 |
|
|
return;
|
| 1366 |
|
|
def_set = single_set (def_insn);
|
| 1367 |
|
|
if (!def_set)
|
| 1368 |
|
|
return;
|
| 1369 |
|
|
|
| 1370 |
|
|
/* Only try one kind of propagation. If two are possible, we'll
|
| 1371 |
|
|
do it on the following iterations. */
|
| 1372 |
|
|
if (!forward_propagate_and_simplify (use, def_insn, def_set))
|
| 1373 |
|
|
forward_propagate_subreg (use, def_insn, def_set);
|
| 1374 |
|
|
}
|
| 1375 |
|
|
|
| 1376 |
|
|
|
| 1377 |
|
|
static void
|
| 1378 |
|
|
fwprop_init (void)
|
| 1379 |
|
|
{
|
| 1380 |
|
|
num_changes = 0;
|
| 1381 |
|
|
calculate_dominance_info (CDI_DOMINATORS);
|
| 1382 |
|
|
|
| 1383 |
|
|
/* We do not always want to propagate into loops, so we have to find
|
| 1384 |
|
|
loops and be careful about them. But we have to call flow_loops_find
|
| 1385 |
|
|
before df_analyze, because flow_loops_find may introduce new jump
|
| 1386 |
|
|
insns (sadly) if we are not working in cfglayout mode. */
|
| 1387 |
|
|
loop_optimizer_init (0);
|
| 1388 |
|
|
|
| 1389 |
|
|
build_single_def_use_links ();
|
| 1390 |
|
|
df_set_flags (DF_DEFER_INSN_RESCAN);
|
| 1391 |
|
|
}
|
| 1392 |
|
|
|
| 1393 |
|
|
static void
|
| 1394 |
|
|
fwprop_done (void)
|
| 1395 |
|
|
{
|
| 1396 |
|
|
loop_optimizer_finalize ();
|
| 1397 |
|
|
|
| 1398 |
|
|
VEC_free (df_ref, heap, use_def_ref);
|
| 1399 |
|
|
free_dominance_info (CDI_DOMINATORS);
|
| 1400 |
|
|
cleanup_cfg (0);
|
| 1401 |
|
|
delete_trivially_dead_insns (get_insns (), max_reg_num ());
|
| 1402 |
|
|
|
| 1403 |
|
|
if (dump_file)
|
| 1404 |
|
|
fprintf (dump_file,
|
| 1405 |
|
|
"\nNumber of successful forward propagations: %d\n\n",
|
| 1406 |
|
|
num_changes);
|
| 1407 |
|
|
}
|
| 1408 |
|
|
|
| 1409 |
|
|
|
| 1410 |
|
|
/* Main entry point. */
|
| 1411 |
|
|
|
| 1412 |
|
|
static bool
|
| 1413 |
|
|
gate_fwprop (void)
|
| 1414 |
|
|
{
|
| 1415 |
|
|
return optimize > 0 && flag_forward_propagate;
|
| 1416 |
|
|
}
|
| 1417 |
|
|
|
| 1418 |
|
|
static unsigned int
|
| 1419 |
|
|
fwprop (void)
|
| 1420 |
|
|
{
|
| 1421 |
|
|
unsigned i;
|
| 1422 |
|
|
|
| 1423 |
|
|
fwprop_init ();
|
| 1424 |
|
|
|
| 1425 |
|
|
/* Go through all the uses. update_df will create new ones at the
|
| 1426 |
|
|
end, and we'll go through them as well.
|
| 1427 |
|
|
|
| 1428 |
|
|
Do not forward propagate addresses into loops until after unrolling.
|
| 1429 |
|
|
CSE did so because it was able to fix its own mess, but we are not. */
|
| 1430 |
|
|
|
| 1431 |
|
|
for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
|
| 1432 |
|
|
{
|
| 1433 |
|
|
df_ref use = DF_USES_GET (i);
|
| 1434 |
|
|
if (use)
|
| 1435 |
|
|
if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
| 1436 |
|
|
|| DF_REF_BB (use)->loop_father == NULL
|
| 1437 |
|
|
/* The outer most loop is not really a loop. */
|
| 1438 |
|
|
|| loop_outer (DF_REF_BB (use)->loop_father) == NULL)
|
| 1439 |
|
|
forward_propagate_into (use);
|
| 1440 |
|
|
}
|
| 1441 |
|
|
|
| 1442 |
|
|
fwprop_done ();
|
| 1443 |
|
|
return 0;
|
| 1444 |
|
|
}
|
| 1445 |
|
|
|
| 1446 |
|
|
struct rtl_opt_pass pass_rtl_fwprop =
|
| 1447 |
|
|
{
|
| 1448 |
|
|
{
|
| 1449 |
|
|
RTL_PASS,
|
| 1450 |
|
|
"fwprop1", /* name */
|
| 1451 |
|
|
gate_fwprop, /* gate */
|
| 1452 |
|
|
fwprop, /* execute */
|
| 1453 |
|
|
NULL, /* sub */
|
| 1454 |
|
|
NULL, /* next */
|
| 1455 |
|
|
0, /* static_pass_number */
|
| 1456 |
|
|
TV_FWPROP, /* tv_id */
|
| 1457 |
|
|
0, /* properties_required */
|
| 1458 |
|
|
0, /* properties_provided */
|
| 1459 |
|
|
0, /* properties_destroyed */
|
| 1460 |
|
|
0, /* todo_flags_start */
|
| 1461 |
|
|
TODO_df_finish | TODO_verify_rtl_sharing |
|
| 1462 |
|
|
TODO_dump_func /* todo_flags_finish */
|
| 1463 |
|
|
}
|
| 1464 |
|
|
};
|
| 1465 |
|
|
|
| 1466 |
|
|
static unsigned int
|
| 1467 |
|
|
fwprop_addr (void)
|
| 1468 |
|
|
{
|
| 1469 |
|
|
unsigned i;
|
| 1470 |
|
|
fwprop_init ();
|
| 1471 |
|
|
|
| 1472 |
|
|
/* Go through all the uses. update_df will create new ones at the
|
| 1473 |
|
|
end, and we'll go through them as well. */
|
| 1474 |
|
|
for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
|
| 1475 |
|
|
{
|
| 1476 |
|
|
df_ref use = DF_USES_GET (i);
|
| 1477 |
|
|
if (use)
|
| 1478 |
|
|
if (DF_REF_TYPE (use) != DF_REF_REG_USE
|
| 1479 |
|
|
&& DF_REF_BB (use)->loop_father != NULL
|
| 1480 |
|
|
/* The outer most loop is not really a loop. */
|
| 1481 |
|
|
&& loop_outer (DF_REF_BB (use)->loop_father) != NULL)
|
| 1482 |
|
|
forward_propagate_into (use);
|
| 1483 |
|
|
}
|
| 1484 |
|
|
|
| 1485 |
|
|
fwprop_done ();
|
| 1486 |
|
|
|
| 1487 |
|
|
return 0;
|
| 1488 |
|
|
}
|
| 1489 |
|
|
|
| 1490 |
|
|
struct rtl_opt_pass pass_rtl_fwprop_addr =
|
| 1491 |
|
|
{
|
| 1492 |
|
|
{
|
| 1493 |
|
|
RTL_PASS,
|
| 1494 |
|
|
"fwprop2", /* name */
|
| 1495 |
|
|
gate_fwprop, /* gate */
|
| 1496 |
|
|
fwprop_addr, /* execute */
|
| 1497 |
|
|
NULL, /* sub */
|
| 1498 |
|
|
NULL, /* next */
|
| 1499 |
|
|
0, /* static_pass_number */
|
| 1500 |
|
|
TV_FWPROP, /* tv_id */
|
| 1501 |
|
|
0, /* properties_required */
|
| 1502 |
|
|
0, /* properties_provided */
|
| 1503 |
|
|
0, /* properties_destroyed */
|
| 1504 |
|
|
0, /* todo_flags_start */
|
| 1505 |
|
|
TODO_df_finish | TODO_verify_rtl_sharing |
|
| 1506 |
|
|
TODO_dump_func /* todo_flags_finish */
|
| 1507 |
|
|
}
|
| 1508 |
|
|
};
|