1 |
38 |
julius |
/* Definitions of target machine for GNU compiler.
|
2 |
|
|
Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
|
3 |
|
|
Free Software Foundation, Inc.
|
4 |
|
|
Contributed by James E. Wilson <wilson@cygnus.com> and
|
5 |
|
|
David Mosberger <davidm@hpl.hp.com>.
|
6 |
|
|
|
7 |
|
|
This file is part of GCC.
|
8 |
|
|
|
9 |
|
|
GCC is free software; you can redistribute it and/or modify
|
10 |
|
|
it under the terms of the GNU General Public License as published by
|
11 |
|
|
the Free Software Foundation; either version 3, or (at your option)
|
12 |
|
|
any later version.
|
13 |
|
|
|
14 |
|
|
GCC is distributed in the hope that it will be useful,
|
15 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17 |
|
|
GNU General Public License for more details.
|
18 |
|
|
|
19 |
|
|
You should have received a copy of the GNU General Public License
|
20 |
|
|
along with GCC; see the file COPYING3. If not see
|
21 |
|
|
<http://www.gnu.org/licenses/>. */
|
22 |
|
|
|
23 |
|
|
#include "config.h"
|
24 |
|
|
#include "system.h"
|
25 |
|
|
#include "coretypes.h"
|
26 |
|
|
#include "tm.h"
|
27 |
|
|
#include "rtl.h"
|
28 |
|
|
#include "tree.h"
|
29 |
|
|
#include "regs.h"
|
30 |
|
|
#include "hard-reg-set.h"
|
31 |
|
|
#include "real.h"
|
32 |
|
|
#include "insn-config.h"
|
33 |
|
|
#include "conditions.h"
|
34 |
|
|
#include "output.h"
|
35 |
|
|
#include "insn-attr.h"
|
36 |
|
|
#include "flags.h"
|
37 |
|
|
#include "recog.h"
|
38 |
|
|
#include "expr.h"
|
39 |
|
|
#include "optabs.h"
|
40 |
|
|
#include "except.h"
|
41 |
|
|
#include "function.h"
|
42 |
|
|
#include "ggc.h"
|
43 |
|
|
#include "basic-block.h"
|
44 |
|
|
#include "toplev.h"
|
45 |
|
|
#include "sched-int.h"
|
46 |
|
|
#include "timevar.h"
|
47 |
|
|
#include "target.h"
|
48 |
|
|
#include "target-def.h"
|
49 |
|
|
#include "tm_p.h"
|
50 |
|
|
#include "hashtab.h"
|
51 |
|
|
#include "langhooks.h"
|
52 |
|
|
#include "cfglayout.h"
|
53 |
|
|
#include "tree-gimple.h"
|
54 |
|
|
#include "intl.h"
|
55 |
|
|
#include "debug.h"
|
56 |
|
|
#include "params.h"
|
57 |
|
|
|
58 |
|
|
/* This is used for communication between ASM_OUTPUT_LABEL and
|
59 |
|
|
ASM_OUTPUT_LABELREF. */
|
60 |
|
|
int ia64_asm_output_label = 0;
|
61 |
|
|
|
62 |
|
|
/* Define the information needed to generate branch and scc insns. This is
|
63 |
|
|
stored from the compare operation. */
|
64 |
|
|
struct rtx_def * ia64_compare_op0;
|
65 |
|
|
struct rtx_def * ia64_compare_op1;
|
66 |
|
|
|
67 |
|
|
/* Register names for ia64_expand_prologue. */
|
68 |
|
|
static const char * const ia64_reg_numbers[96] =
|
69 |
|
|
{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
|
70 |
|
|
"r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
|
71 |
|
|
"r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
|
72 |
|
|
"r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
|
73 |
|
|
"r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
|
74 |
|
|
"r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
|
75 |
|
|
"r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
|
76 |
|
|
"r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
|
77 |
|
|
"r96", "r97", "r98", "r99", "r100","r101","r102","r103",
|
78 |
|
|
"r104","r105","r106","r107","r108","r109","r110","r111",
|
79 |
|
|
"r112","r113","r114","r115","r116","r117","r118","r119",
|
80 |
|
|
"r120","r121","r122","r123","r124","r125","r126","r127"};
|
81 |
|
|
|
82 |
|
|
/* ??? These strings could be shared with REGISTER_NAMES. */
|
83 |
|
|
static const char * const ia64_input_reg_names[8] =
|
84 |
|
|
{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
|
85 |
|
|
|
86 |
|
|
/* ??? These strings could be shared with REGISTER_NAMES. */
|
87 |
|
|
static const char * const ia64_local_reg_names[80] =
|
88 |
|
|
{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
|
89 |
|
|
"loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
|
90 |
|
|
"loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
|
91 |
|
|
"loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
|
92 |
|
|
"loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
|
93 |
|
|
"loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
|
94 |
|
|
"loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
|
95 |
|
|
"loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
|
96 |
|
|
"loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
|
97 |
|
|
"loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
|
98 |
|
|
|
99 |
|
|
/* ??? These strings could be shared with REGISTER_NAMES. */
|
100 |
|
|
static const char * const ia64_output_reg_names[8] =
|
101 |
|
|
{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
|
102 |
|
|
|
103 |
|
|
/* Which cpu are we scheduling for. */
|
104 |
|
|
enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
|
105 |
|
|
|
106 |
|
|
/* Determines whether we run our final scheduling pass or not. We always
|
107 |
|
|
avoid the normal second scheduling pass. */
|
108 |
|
|
static int ia64_flag_schedule_insns2;
|
109 |
|
|
|
110 |
|
|
/* Determines whether we run variable tracking in machine dependent
|
111 |
|
|
reorganization. */
|
112 |
|
|
static int ia64_flag_var_tracking;
|
113 |
|
|
|
114 |
|
|
/* Variables which are this size or smaller are put in the sdata/sbss
|
115 |
|
|
sections. */
|
116 |
|
|
|
117 |
|
|
unsigned int ia64_section_threshold;
|
118 |
|
|
|
119 |
|
|
/* The following variable is used by the DFA insn scheduler. The value is
|
120 |
|
|
TRUE if we do insn bundling instead of insn scheduling. */
|
121 |
|
|
int bundling_p = 0;
|
122 |
|
|
|
123 |
|
|
/* Structure to be filled in by ia64_compute_frame_size with register
|
124 |
|
|
save masks and offsets for the current function. */
|
125 |
|
|
|
126 |
|
|
struct ia64_frame_info
|
127 |
|
|
{
|
128 |
|
|
HOST_WIDE_INT total_size; /* size of the stack frame, not including
|
129 |
|
|
the caller's scratch area. */
|
130 |
|
|
HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
|
131 |
|
|
HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
|
132 |
|
|
HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
|
133 |
|
|
HARD_REG_SET mask; /* mask of saved registers. */
|
134 |
|
|
unsigned int gr_used_mask; /* mask of registers in use as gr spill
|
135 |
|
|
registers or long-term scratches. */
|
136 |
|
|
int n_spilled; /* number of spilled registers. */
|
137 |
|
|
int reg_fp; /* register for fp. */
|
138 |
|
|
int reg_save_b0; /* save register for b0. */
|
139 |
|
|
int reg_save_pr; /* save register for prs. */
|
140 |
|
|
int reg_save_ar_pfs; /* save register for ar.pfs. */
|
141 |
|
|
int reg_save_ar_unat; /* save register for ar.unat. */
|
142 |
|
|
int reg_save_ar_lc; /* save register for ar.lc. */
|
143 |
|
|
int reg_save_gp; /* save register for gp. */
|
144 |
|
|
int n_input_regs; /* number of input registers used. */
|
145 |
|
|
int n_local_regs; /* number of local registers used. */
|
146 |
|
|
int n_output_regs; /* number of output registers used. */
|
147 |
|
|
int n_rotate_regs; /* number of rotating registers used. */
|
148 |
|
|
|
149 |
|
|
char need_regstk; /* true if a .regstk directive needed. */
|
150 |
|
|
char initialized; /* true if the data is finalized. */
|
151 |
|
|
};
|
152 |
|
|
|
153 |
|
|
/* Current frame information calculated by ia64_compute_frame_size. */
|
154 |
|
|
static struct ia64_frame_info current_frame_info;
|
155 |
|
|
|
156 |
|
|
static int ia64_first_cycle_multipass_dfa_lookahead (void);
|
157 |
|
|
static void ia64_dependencies_evaluation_hook (rtx, rtx);
|
158 |
|
|
static void ia64_init_dfa_pre_cycle_insn (void);
|
159 |
|
|
static rtx ia64_dfa_pre_cycle_insn (void);
|
160 |
|
|
static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
|
161 |
|
|
static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
|
162 |
|
|
static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
|
163 |
|
|
static void ia64_h_i_d_extended (void);
|
164 |
|
|
static int ia64_mode_to_int (enum machine_mode);
|
165 |
|
|
static void ia64_set_sched_flags (spec_info_t);
|
166 |
|
|
static int ia64_speculate_insn (rtx, ds_t, rtx *);
|
167 |
|
|
static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
|
168 |
|
|
static bool ia64_needs_block_p (rtx);
|
169 |
|
|
static rtx ia64_gen_check (rtx, rtx, bool);
|
170 |
|
|
static int ia64_spec_check_p (rtx);
|
171 |
|
|
static int ia64_spec_check_src_p (rtx);
|
172 |
|
|
static rtx gen_tls_get_addr (void);
|
173 |
|
|
static rtx gen_thread_pointer (void);
|
174 |
|
|
static int find_gr_spill (int);
|
175 |
|
|
static int next_scratch_gr_reg (void);
|
176 |
|
|
static void mark_reg_gr_used_mask (rtx, void *);
|
177 |
|
|
static void ia64_compute_frame_size (HOST_WIDE_INT);
|
178 |
|
|
static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
|
179 |
|
|
static void finish_spill_pointers (void);
|
180 |
|
|
static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
|
181 |
|
|
static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
|
182 |
|
|
static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
|
183 |
|
|
static rtx gen_movdi_x (rtx, rtx, rtx);
|
184 |
|
|
static rtx gen_fr_spill_x (rtx, rtx, rtx);
|
185 |
|
|
static rtx gen_fr_restore_x (rtx, rtx, rtx);
|
186 |
|
|
|
187 |
|
|
static enum machine_mode hfa_element_mode (tree, bool);
|
188 |
|
|
static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
|
189 |
|
|
tree, int *, int);
|
190 |
|
|
static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
|
191 |
|
|
tree, bool);
|
192 |
|
|
static bool ia64_function_ok_for_sibcall (tree, tree);
|
193 |
|
|
static bool ia64_return_in_memory (tree, tree);
|
194 |
|
|
static bool ia64_rtx_costs (rtx, int, int, int *);
|
195 |
|
|
static void fix_range (const char *);
|
196 |
|
|
static bool ia64_handle_option (size_t, const char *, int);
|
197 |
|
|
static struct machine_function * ia64_init_machine_status (void);
|
198 |
|
|
static void emit_insn_group_barriers (FILE *);
|
199 |
|
|
static void emit_all_insn_group_barriers (FILE *);
|
200 |
|
|
static void final_emit_insn_group_barriers (FILE *);
|
201 |
|
|
static void emit_predicate_relation_info (void);
|
202 |
|
|
static void ia64_reorg (void);
|
203 |
|
|
static bool ia64_in_small_data_p (tree);
|
204 |
|
|
static void process_epilogue (FILE *, rtx, bool, bool);
|
205 |
|
|
static int process_set (FILE *, rtx, rtx, bool, bool);
|
206 |
|
|
|
207 |
|
|
static bool ia64_assemble_integer (rtx, unsigned int, int);
|
208 |
|
|
static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
|
209 |
|
|
static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
|
210 |
|
|
static void ia64_output_function_end_prologue (FILE *);
|
211 |
|
|
|
212 |
|
|
static int ia64_issue_rate (void);
|
213 |
|
|
static int ia64_adjust_cost_2 (rtx, int, rtx, int);
|
214 |
|
|
static void ia64_sched_init (FILE *, int, int);
|
215 |
|
|
static void ia64_sched_init_global (FILE *, int, int);
|
216 |
|
|
static void ia64_sched_finish_global (FILE *, int);
|
217 |
|
|
static void ia64_sched_finish (FILE *, int);
|
218 |
|
|
static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
|
219 |
|
|
static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
|
220 |
|
|
static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
|
221 |
|
|
static int ia64_variable_issue (FILE *, int, rtx, int);
|
222 |
|
|
|
223 |
|
|
static struct bundle_state *get_free_bundle_state (void);
|
224 |
|
|
static void free_bundle_state (struct bundle_state *);
|
225 |
|
|
static void initiate_bundle_states (void);
|
226 |
|
|
static void finish_bundle_states (void);
|
227 |
|
|
static unsigned bundle_state_hash (const void *);
|
228 |
|
|
static int bundle_state_eq_p (const void *, const void *);
|
229 |
|
|
static int insert_bundle_state (struct bundle_state *);
|
230 |
|
|
static void initiate_bundle_state_table (void);
|
231 |
|
|
static void finish_bundle_state_table (void);
|
232 |
|
|
static int try_issue_nops (struct bundle_state *, int);
|
233 |
|
|
static int try_issue_insn (struct bundle_state *, rtx);
|
234 |
|
|
static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
|
235 |
|
|
static int get_max_pos (state_t);
|
236 |
|
|
static int get_template (state_t, int);
|
237 |
|
|
|
238 |
|
|
static rtx get_next_important_insn (rtx, rtx);
|
239 |
|
|
static void bundling (FILE *, int, rtx, rtx);
|
240 |
|
|
|
241 |
|
|
static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
|
242 |
|
|
HOST_WIDE_INT, tree);
|
243 |
|
|
static void ia64_file_start (void);
|
244 |
|
|
|
245 |
|
|
static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
|
246 |
|
|
static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
|
247 |
|
|
static section *ia64_select_rtx_section (enum machine_mode, rtx,
|
248 |
|
|
unsigned HOST_WIDE_INT);
|
249 |
|
|
static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
|
250 |
|
|
ATTRIBUTE_UNUSED;
|
251 |
|
|
static unsigned int ia64_section_type_flags (tree, const char *, int);
|
252 |
|
|
static void ia64_hpux_add_extern_decl (tree decl)
|
253 |
|
|
ATTRIBUTE_UNUSED;
|
254 |
|
|
static void ia64_hpux_file_end (void)
|
255 |
|
|
ATTRIBUTE_UNUSED;
|
256 |
|
|
static void ia64_init_libfuncs (void)
|
257 |
|
|
ATTRIBUTE_UNUSED;
|
258 |
|
|
static void ia64_hpux_init_libfuncs (void)
|
259 |
|
|
ATTRIBUTE_UNUSED;
|
260 |
|
|
static void ia64_sysv4_init_libfuncs (void)
|
261 |
|
|
ATTRIBUTE_UNUSED;
|
262 |
|
|
static void ia64_vms_init_libfuncs (void)
|
263 |
|
|
ATTRIBUTE_UNUSED;
|
264 |
|
|
|
265 |
|
|
static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
|
266 |
|
|
static void ia64_encode_section_info (tree, rtx, int);
|
267 |
|
|
static rtx ia64_struct_value_rtx (tree, int);
|
268 |
|
|
static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
|
269 |
|
|
static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
|
270 |
|
|
static bool ia64_vector_mode_supported_p (enum machine_mode mode);
|
271 |
|
|
static bool ia64_cannot_force_const_mem (rtx);
|
272 |
|
|
static const char *ia64_mangle_fundamental_type (tree);
|
273 |
|
|
static const char *ia64_invalid_conversion (tree, tree);
|
274 |
|
|
static const char *ia64_invalid_unary_op (int, tree);
|
275 |
|
|
static const char *ia64_invalid_binary_op (int, tree, tree);
|
276 |
|
|
|
277 |
|
|
/* Table of valid machine attributes. */
|
278 |
|
|
static const struct attribute_spec ia64_attribute_table[] =
|
279 |
|
|
{
|
280 |
|
|
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
281 |
|
|
{ "syscall_linkage", 0, 0, false, true, true, NULL },
|
282 |
|
|
{ "model", 1, 1, true, false, false, ia64_handle_model_attribute },
|
283 |
|
|
{ NULL, 0, 0, false, false, false, NULL }
|
284 |
|
|
};
|
285 |
|
|
|
286 |
|
|
/* Initialize the GCC target structure. */
|
287 |
|
|
#undef TARGET_ATTRIBUTE_TABLE
|
288 |
|
|
#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
|
289 |
|
|
|
290 |
|
|
#undef TARGET_INIT_BUILTINS
|
291 |
|
|
#define TARGET_INIT_BUILTINS ia64_init_builtins
|
292 |
|
|
|
293 |
|
|
#undef TARGET_EXPAND_BUILTIN
|
294 |
|
|
#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
|
295 |
|
|
|
296 |
|
|
#undef TARGET_ASM_BYTE_OP
|
297 |
|
|
#define TARGET_ASM_BYTE_OP "\tdata1\t"
|
298 |
|
|
#undef TARGET_ASM_ALIGNED_HI_OP
|
299 |
|
|
#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
|
300 |
|
|
#undef TARGET_ASM_ALIGNED_SI_OP
|
301 |
|
|
#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
|
302 |
|
|
#undef TARGET_ASM_ALIGNED_DI_OP
|
303 |
|
|
#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
|
304 |
|
|
#undef TARGET_ASM_UNALIGNED_HI_OP
|
305 |
|
|
#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
|
306 |
|
|
#undef TARGET_ASM_UNALIGNED_SI_OP
|
307 |
|
|
#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
|
308 |
|
|
#undef TARGET_ASM_UNALIGNED_DI_OP
|
309 |
|
|
#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
|
310 |
|
|
#undef TARGET_ASM_INTEGER
|
311 |
|
|
#define TARGET_ASM_INTEGER ia64_assemble_integer
|
312 |
|
|
|
313 |
|
|
#undef TARGET_ASM_FUNCTION_PROLOGUE
|
314 |
|
|
#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
|
315 |
|
|
#undef TARGET_ASM_FUNCTION_END_PROLOGUE
|
316 |
|
|
#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
|
317 |
|
|
#undef TARGET_ASM_FUNCTION_EPILOGUE
|
318 |
|
|
#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
|
319 |
|
|
|
320 |
|
|
#undef TARGET_IN_SMALL_DATA_P
|
321 |
|
|
#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
|
322 |
|
|
|
323 |
|
|
#undef TARGET_SCHED_ADJUST_COST_2
|
324 |
|
|
#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
|
325 |
|
|
#undef TARGET_SCHED_ISSUE_RATE
|
326 |
|
|
#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
|
327 |
|
|
#undef TARGET_SCHED_VARIABLE_ISSUE
|
328 |
|
|
#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
|
329 |
|
|
#undef TARGET_SCHED_INIT
|
330 |
|
|
#define TARGET_SCHED_INIT ia64_sched_init
|
331 |
|
|
#undef TARGET_SCHED_FINISH
|
332 |
|
|
#define TARGET_SCHED_FINISH ia64_sched_finish
|
333 |
|
|
#undef TARGET_SCHED_INIT_GLOBAL
|
334 |
|
|
#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
|
335 |
|
|
#undef TARGET_SCHED_FINISH_GLOBAL
|
336 |
|
|
#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
|
337 |
|
|
#undef TARGET_SCHED_REORDER
|
338 |
|
|
#define TARGET_SCHED_REORDER ia64_sched_reorder
|
339 |
|
|
#undef TARGET_SCHED_REORDER2
|
340 |
|
|
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
|
341 |
|
|
|
342 |
|
|
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
|
343 |
|
|
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
|
344 |
|
|
|
345 |
|
|
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
|
346 |
|
|
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
|
347 |
|
|
|
348 |
|
|
#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
|
349 |
|
|
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
|
350 |
|
|
#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
|
351 |
|
|
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
|
352 |
|
|
|
353 |
|
|
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
|
354 |
|
|
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
|
355 |
|
|
ia64_first_cycle_multipass_dfa_lookahead_guard
|
356 |
|
|
|
357 |
|
|
#undef TARGET_SCHED_DFA_NEW_CYCLE
|
358 |
|
|
#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
|
359 |
|
|
|
360 |
|
|
#undef TARGET_SCHED_H_I_D_EXTENDED
|
361 |
|
|
#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
|
362 |
|
|
|
363 |
|
|
#undef TARGET_SCHED_SET_SCHED_FLAGS
|
364 |
|
|
#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
|
365 |
|
|
|
366 |
|
|
#undef TARGET_SCHED_SPECULATE_INSN
|
367 |
|
|
#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
|
368 |
|
|
|
369 |
|
|
#undef TARGET_SCHED_NEEDS_BLOCK_P
|
370 |
|
|
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
|
371 |
|
|
|
372 |
|
|
#undef TARGET_SCHED_GEN_CHECK
|
373 |
|
|
#define TARGET_SCHED_GEN_CHECK ia64_gen_check
|
374 |
|
|
|
375 |
|
|
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
|
376 |
|
|
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
|
377 |
|
|
ia64_first_cycle_multipass_dfa_lookahead_guard_spec
|
378 |
|
|
|
379 |
|
|
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
|
380 |
|
|
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
|
381 |
|
|
#undef TARGET_ARG_PARTIAL_BYTES
|
382 |
|
|
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
|
383 |
|
|
|
384 |
|
|
#undef TARGET_ASM_OUTPUT_MI_THUNK
|
385 |
|
|
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
|
386 |
|
|
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
|
387 |
|
|
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
|
388 |
|
|
|
389 |
|
|
#undef TARGET_ASM_FILE_START
|
390 |
|
|
#define TARGET_ASM_FILE_START ia64_file_start
|
391 |
|
|
|
392 |
|
|
#undef TARGET_RTX_COSTS
|
393 |
|
|
#define TARGET_RTX_COSTS ia64_rtx_costs
|
394 |
|
|
#undef TARGET_ADDRESS_COST
|
395 |
|
|
#define TARGET_ADDRESS_COST hook_int_rtx_0
|
396 |
|
|
|
397 |
|
|
#undef TARGET_MACHINE_DEPENDENT_REORG
|
398 |
|
|
#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
|
399 |
|
|
|
400 |
|
|
#undef TARGET_ENCODE_SECTION_INFO
|
401 |
|
|
#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
|
402 |
|
|
|
403 |
|
|
#undef TARGET_SECTION_TYPE_FLAGS
|
404 |
|
|
#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
|
405 |
|
|
|
406 |
|
|
#ifdef HAVE_AS_TLS
|
407 |
|
|
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
|
408 |
|
|
#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
|
409 |
|
|
#endif
|
410 |
|
|
|
411 |
|
|
/* ??? ABI doesn't allow us to define this. */
|
412 |
|
|
#if 0
|
413 |
|
|
#undef TARGET_PROMOTE_FUNCTION_ARGS
|
414 |
|
|
#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
|
415 |
|
|
#endif
|
416 |
|
|
|
417 |
|
|
/* ??? ABI doesn't allow us to define this. */
|
418 |
|
|
#if 0
|
419 |
|
|
#undef TARGET_PROMOTE_FUNCTION_RETURN
|
420 |
|
|
#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
|
421 |
|
|
#endif
|
422 |
|
|
|
423 |
|
|
/* ??? Investigate. */
|
424 |
|
|
#if 0
|
425 |
|
|
#undef TARGET_PROMOTE_PROTOTYPES
|
426 |
|
|
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
|
427 |
|
|
#endif
|
428 |
|
|
|
429 |
|
|
#undef TARGET_STRUCT_VALUE_RTX
|
430 |
|
|
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
|
431 |
|
|
#undef TARGET_RETURN_IN_MEMORY
|
432 |
|
|
#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
|
433 |
|
|
#undef TARGET_SETUP_INCOMING_VARARGS
|
434 |
|
|
#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
|
435 |
|
|
#undef TARGET_STRICT_ARGUMENT_NAMING
|
436 |
|
|
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
|
437 |
|
|
#undef TARGET_MUST_PASS_IN_STACK
|
438 |
|
|
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
|
439 |
|
|
|
440 |
|
|
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
|
441 |
|
|
#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
|
442 |
|
|
|
443 |
|
|
#undef TARGET_UNWIND_EMIT
|
444 |
|
|
#define TARGET_UNWIND_EMIT process_for_unwind_directive
|
445 |
|
|
|
446 |
|
|
#undef TARGET_SCALAR_MODE_SUPPORTED_P
|
447 |
|
|
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
|
448 |
|
|
#undef TARGET_VECTOR_MODE_SUPPORTED_P
|
449 |
|
|
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
|
450 |
|
|
|
451 |
|
|
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
|
452 |
|
|
in an order different from the specified program order. */
|
453 |
|
|
#undef TARGET_RELAXED_ORDERING
|
454 |
|
|
#define TARGET_RELAXED_ORDERING true
|
455 |
|
|
|
456 |
|
|
#undef TARGET_DEFAULT_TARGET_FLAGS
|
457 |
|
|
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
|
458 |
|
|
#undef TARGET_HANDLE_OPTION
|
459 |
|
|
#define TARGET_HANDLE_OPTION ia64_handle_option
|
460 |
|
|
|
461 |
|
|
#undef TARGET_CANNOT_FORCE_CONST_MEM
|
462 |
|
|
#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
|
463 |
|
|
|
464 |
|
|
#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
|
465 |
|
|
#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
|
466 |
|
|
|
467 |
|
|
#undef TARGET_INVALID_CONVERSION
|
468 |
|
|
#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
|
469 |
|
|
#undef TARGET_INVALID_UNARY_OP
|
470 |
|
|
#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
|
471 |
|
|
#undef TARGET_INVALID_BINARY_OP
|
472 |
|
|
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
|
473 |
|
|
|
474 |
|
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
475 |
|
|
|
476 |
|
|
typedef enum
|
477 |
|
|
{
|
478 |
|
|
ADDR_AREA_NORMAL, /* normal address area */
|
479 |
|
|
ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
|
480 |
|
|
}
|
481 |
|
|
ia64_addr_area;
|
482 |
|
|
|
483 |
|
|
static GTY(()) tree small_ident1;
|
484 |
|
|
static GTY(()) tree small_ident2;
|
485 |
|
|
|
486 |
|
|
static void
|
487 |
|
|
init_idents (void)
|
488 |
|
|
{
|
489 |
|
|
if (small_ident1 == 0)
|
490 |
|
|
{
|
491 |
|
|
small_ident1 = get_identifier ("small");
|
492 |
|
|
small_ident2 = get_identifier ("__small__");
|
493 |
|
|
}
|
494 |
|
|
}
|
495 |
|
|
|
496 |
|
|
/* Retrieve the address area that has been chosen for the given decl. */
|
497 |
|
|
|
498 |
|
|
static ia64_addr_area
|
499 |
|
|
ia64_get_addr_area (tree decl)
|
500 |
|
|
{
|
501 |
|
|
tree model_attr;
|
502 |
|
|
|
503 |
|
|
model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
|
504 |
|
|
if (model_attr)
|
505 |
|
|
{
|
506 |
|
|
tree id;
|
507 |
|
|
|
508 |
|
|
init_idents ();
|
509 |
|
|
id = TREE_VALUE (TREE_VALUE (model_attr));
|
510 |
|
|
if (id == small_ident1 || id == small_ident2)
|
511 |
|
|
return ADDR_AREA_SMALL;
|
512 |
|
|
}
|
513 |
|
|
return ADDR_AREA_NORMAL;
|
514 |
|
|
}
|
515 |
|
|
|
516 |
|
|
static tree
|
517 |
|
|
ia64_handle_model_attribute (tree *node, tree name, tree args,
|
518 |
|
|
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
|
519 |
|
|
{
|
520 |
|
|
ia64_addr_area addr_area = ADDR_AREA_NORMAL;
|
521 |
|
|
ia64_addr_area area;
|
522 |
|
|
tree arg, decl = *node;
|
523 |
|
|
|
524 |
|
|
init_idents ();
|
525 |
|
|
arg = TREE_VALUE (args);
|
526 |
|
|
if (arg == small_ident1 || arg == small_ident2)
|
527 |
|
|
{
|
528 |
|
|
addr_area = ADDR_AREA_SMALL;
|
529 |
|
|
}
|
530 |
|
|
else
|
531 |
|
|
{
|
532 |
|
|
warning (OPT_Wattributes, "invalid argument of %qs attribute",
|
533 |
|
|
IDENTIFIER_POINTER (name));
|
534 |
|
|
*no_add_attrs = true;
|
535 |
|
|
}
|
536 |
|
|
|
537 |
|
|
switch (TREE_CODE (decl))
|
538 |
|
|
{
|
539 |
|
|
case VAR_DECL:
|
540 |
|
|
if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
|
541 |
|
|
== FUNCTION_DECL)
|
542 |
|
|
&& !TREE_STATIC (decl))
|
543 |
|
|
{
|
544 |
|
|
error ("%Jan address area attribute cannot be specified for "
|
545 |
|
|
"local variables", decl);
|
546 |
|
|
*no_add_attrs = true;
|
547 |
|
|
}
|
548 |
|
|
area = ia64_get_addr_area (decl);
|
549 |
|
|
if (area != ADDR_AREA_NORMAL && addr_area != area)
|
550 |
|
|
{
|
551 |
|
|
error ("address area of %q+D conflicts with previous "
|
552 |
|
|
"declaration", decl);
|
553 |
|
|
*no_add_attrs = true;
|
554 |
|
|
}
|
555 |
|
|
break;
|
556 |
|
|
|
557 |
|
|
case FUNCTION_DECL:
|
558 |
|
|
error ("%Jaddress area attribute cannot be specified for functions",
|
559 |
|
|
decl);
|
560 |
|
|
*no_add_attrs = true;
|
561 |
|
|
break;
|
562 |
|
|
|
563 |
|
|
default:
|
564 |
|
|
warning (OPT_Wattributes, "%qs attribute ignored",
|
565 |
|
|
IDENTIFIER_POINTER (name));
|
566 |
|
|
*no_add_attrs = true;
|
567 |
|
|
break;
|
568 |
|
|
}
|
569 |
|
|
|
570 |
|
|
return NULL_TREE;
|
571 |
|
|
}
|
572 |
|
|
|
573 |
|
|
static void
|
574 |
|
|
ia64_encode_addr_area (tree decl, rtx symbol)
|
575 |
|
|
{
|
576 |
|
|
int flags;
|
577 |
|
|
|
578 |
|
|
flags = SYMBOL_REF_FLAGS (symbol);
|
579 |
|
|
switch (ia64_get_addr_area (decl))
|
580 |
|
|
{
|
581 |
|
|
case ADDR_AREA_NORMAL: break;
|
582 |
|
|
case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
|
583 |
|
|
default: gcc_unreachable ();
|
584 |
|
|
}
|
585 |
|
|
SYMBOL_REF_FLAGS (symbol) = flags;
|
586 |
|
|
}
|
587 |
|
|
|
588 |
|
|
static void
|
589 |
|
|
ia64_encode_section_info (tree decl, rtx rtl, int first)
|
590 |
|
|
{
|
591 |
|
|
default_encode_section_info (decl, rtl, first);
|
592 |
|
|
|
593 |
|
|
/* Careful not to prod global register variables. */
|
594 |
|
|
if (TREE_CODE (decl) == VAR_DECL
|
595 |
|
|
&& GET_CODE (DECL_RTL (decl)) == MEM
|
596 |
|
|
&& GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
|
597 |
|
|
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
|
598 |
|
|
ia64_encode_addr_area (decl, XEXP (rtl, 0));
|
599 |
|
|
}
|
600 |
|
|
|
601 |
|
|
/* Implement CONST_OK_FOR_LETTER_P. */
|
602 |
|
|
|
603 |
|
|
bool
|
604 |
|
|
ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
|
605 |
|
|
{
|
606 |
|
|
switch (c)
|
607 |
|
|
{
|
608 |
|
|
case 'I':
|
609 |
|
|
return CONST_OK_FOR_I (value);
|
610 |
|
|
case 'J':
|
611 |
|
|
return CONST_OK_FOR_J (value);
|
612 |
|
|
case 'K':
|
613 |
|
|
return CONST_OK_FOR_K (value);
|
614 |
|
|
case 'L':
|
615 |
|
|
return CONST_OK_FOR_L (value);
|
616 |
|
|
case 'M':
|
617 |
|
|
return CONST_OK_FOR_M (value);
|
618 |
|
|
case 'N':
|
619 |
|
|
return CONST_OK_FOR_N (value);
|
620 |
|
|
case 'O':
|
621 |
|
|
return CONST_OK_FOR_O (value);
|
622 |
|
|
case 'P':
|
623 |
|
|
return CONST_OK_FOR_P (value);
|
624 |
|
|
default:
|
625 |
|
|
return false;
|
626 |
|
|
}
|
627 |
|
|
}
|
628 |
|
|
|
629 |
|
|
/* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
|
630 |
|
|
|
631 |
|
|
bool
|
632 |
|
|
ia64_const_double_ok_for_letter_p (rtx value, char c)
|
633 |
|
|
{
|
634 |
|
|
switch (c)
|
635 |
|
|
{
|
636 |
|
|
case 'G':
|
637 |
|
|
return CONST_DOUBLE_OK_FOR_G (value);
|
638 |
|
|
default:
|
639 |
|
|
return false;
|
640 |
|
|
}
|
641 |
|
|
}
|
642 |
|
|
|
643 |
|
|
/* Implement EXTRA_CONSTRAINT. */
|
644 |
|
|
|
645 |
|
|
bool
|
646 |
|
|
ia64_extra_constraint (rtx value, char c)
|
647 |
|
|
{
|
648 |
|
|
switch (c)
|
649 |
|
|
{
|
650 |
|
|
case 'Q':
|
651 |
|
|
/* Non-volatile memory for FP_REG loads/stores. */
|
652 |
|
|
return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
|
653 |
|
|
|
654 |
|
|
case 'R':
|
655 |
|
|
/* 1..4 for shladd arguments. */
|
656 |
|
|
return (GET_CODE (value) == CONST_INT
|
657 |
|
|
&& INTVAL (value) >= 1 && INTVAL (value) <= 4);
|
658 |
|
|
|
659 |
|
|
case 'S':
|
660 |
|
|
/* Non-post-inc memory for asms and other unsavory creatures. */
|
661 |
|
|
return (GET_CODE (value) == MEM
|
662 |
|
|
&& GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
|
663 |
|
|
&& (reload_in_progress || memory_operand (value, VOIDmode)));
|
664 |
|
|
|
665 |
|
|
case 'T':
|
666 |
|
|
/* Symbol ref to small-address-area. */
|
667 |
|
|
return small_addr_symbolic_operand (value, VOIDmode);
|
668 |
|
|
|
669 |
|
|
case 'U':
|
670 |
|
|
/* Vector zero. */
|
671 |
|
|
return value == CONST0_RTX (GET_MODE (value));
|
672 |
|
|
|
673 |
|
|
case 'W':
|
674 |
|
|
/* An integer vector, such that conversion to an integer yields a
|
675 |
|
|
value appropriate for an integer 'J' constraint. */
|
676 |
|
|
if (GET_CODE (value) == CONST_VECTOR
|
677 |
|
|
&& GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
|
678 |
|
|
{
|
679 |
|
|
value = simplify_subreg (DImode, value, GET_MODE (value), 0);
|
680 |
|
|
return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
|
681 |
|
|
}
|
682 |
|
|
return false;
|
683 |
|
|
|
684 |
|
|
case 'Y':
|
685 |
|
|
/* A V2SF vector containing elements that satisfy 'G'. */
|
686 |
|
|
return
|
687 |
|
|
(GET_CODE (value) == CONST_VECTOR
|
688 |
|
|
&& GET_MODE (value) == V2SFmode
|
689 |
|
|
&& ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
|
690 |
|
|
&& ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
|
691 |
|
|
|
692 |
|
|
default:
|
693 |
|
|
return false;
|
694 |
|
|
}
|
695 |
|
|
}
|
696 |
|
|
|
697 |
|
|
/* Return 1 if the operands of a move are ok. */
|
698 |
|
|
|
699 |
|
|
int
|
700 |
|
|
ia64_move_ok (rtx dst, rtx src)
|
701 |
|
|
{
|
702 |
|
|
/* If we're under init_recog_no_volatile, we'll not be able to use
|
703 |
|
|
memory_operand. So check the code directly and don't worry about
|
704 |
|
|
the validity of the underlying address, which should have been
|
705 |
|
|
checked elsewhere anyway. */
|
706 |
|
|
if (GET_CODE (dst) != MEM)
|
707 |
|
|
return 1;
|
708 |
|
|
if (GET_CODE (src) == MEM)
|
709 |
|
|
return 0;
|
710 |
|
|
if (register_operand (src, VOIDmode))
|
711 |
|
|
return 1;
|
712 |
|
|
|
713 |
|
|
/* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
|
714 |
|
|
if (INTEGRAL_MODE_P (GET_MODE (dst)))
|
715 |
|
|
return src == const0_rtx;
|
716 |
|
|
else
|
717 |
|
|
return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
|
718 |
|
|
}
|
719 |
|
|
|
720 |
|
|
/* Return 1 if the operands are ok for a floating point load pair. */
|
721 |
|
|
|
722 |
|
|
int
|
723 |
|
|
ia64_load_pair_ok (rtx dst, rtx src)
|
724 |
|
|
{
|
725 |
|
|
if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
|
726 |
|
|
return 0;
|
727 |
|
|
if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
|
728 |
|
|
return 0;
|
729 |
|
|
switch (GET_CODE (XEXP (src, 0)))
|
730 |
|
|
{
|
731 |
|
|
case REG:
|
732 |
|
|
case POST_INC:
|
733 |
|
|
break;
|
734 |
|
|
case POST_DEC:
|
735 |
|
|
return 0;
|
736 |
|
|
case POST_MODIFY:
|
737 |
|
|
{
|
738 |
|
|
rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
|
739 |
|
|
|
740 |
|
|
if (GET_CODE (adjust) != CONST_INT
|
741 |
|
|
|| INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
|
742 |
|
|
return 0;
|
743 |
|
|
}
|
744 |
|
|
break;
|
745 |
|
|
default:
|
746 |
|
|
abort ();
|
747 |
|
|
}
|
748 |
|
|
return 1;
|
749 |
|
|
}
|
750 |
|
|
|
751 |
|
|
int
|
752 |
|
|
addp4_optimize_ok (rtx op1, rtx op2)
|
753 |
|
|
{
|
754 |
|
|
return (basereg_operand (op1, GET_MODE(op1)) !=
|
755 |
|
|
basereg_operand (op2, GET_MODE(op2)));
|
756 |
|
|
}
|
757 |
|
|
|
758 |
|
|
/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
|
759 |
|
|
Return the length of the field, or <= 0 on failure. */
|
760 |
|
|
|
761 |
|
|
int
|
762 |
|
|
ia64_depz_field_mask (rtx rop, rtx rshift)
|
763 |
|
|
{
|
764 |
|
|
unsigned HOST_WIDE_INT op = INTVAL (rop);
|
765 |
|
|
unsigned HOST_WIDE_INT shift = INTVAL (rshift);
|
766 |
|
|
|
767 |
|
|
/* Get rid of the zero bits we're shifting in. */
|
768 |
|
|
op >>= shift;
|
769 |
|
|
|
770 |
|
|
/* We must now have a solid block of 1's at bit 0. */
|
771 |
|
|
return exact_log2 (op + 1);
|
772 |
|
|
}
|
773 |
|
|
|
774 |
|
|
/* Return the TLS model to use for ADDR. */
|
775 |
|
|
|
776 |
|
|
static enum tls_model
|
777 |
|
|
tls_symbolic_operand_type (rtx addr)
|
778 |
|
|
{
|
779 |
|
|
enum tls_model tls_kind = 0;
|
780 |
|
|
|
781 |
|
|
if (GET_CODE (addr) == CONST)
|
782 |
|
|
{
|
783 |
|
|
if (GET_CODE (XEXP (addr, 0)) == PLUS
|
784 |
|
|
&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
|
785 |
|
|
tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
|
786 |
|
|
}
|
787 |
|
|
else if (GET_CODE (addr) == SYMBOL_REF)
|
788 |
|
|
tls_kind = SYMBOL_REF_TLS_MODEL (addr);
|
789 |
|
|
|
790 |
|
|
return tls_kind;
|
791 |
|
|
}
|
792 |
|
|
|
793 |
|
|
/* Return true if X is a constant that is valid for some immediate
|
794 |
|
|
field in an instruction. */
|
795 |
|
|
|
796 |
|
|
bool
|
797 |
|
|
ia64_legitimate_constant_p (rtx x)
|
798 |
|
|
{
|
799 |
|
|
switch (GET_CODE (x))
|
800 |
|
|
{
|
801 |
|
|
case CONST_INT:
|
802 |
|
|
case LABEL_REF:
|
803 |
|
|
return true;
|
804 |
|
|
|
805 |
|
|
case CONST_DOUBLE:
|
806 |
|
|
if (GET_MODE (x) == VOIDmode)
|
807 |
|
|
return true;
|
808 |
|
|
return CONST_DOUBLE_OK_FOR_G (x);
|
809 |
|
|
|
810 |
|
|
case CONST:
|
811 |
|
|
case SYMBOL_REF:
|
812 |
|
|
/* ??? Short term workaround for PR 28490. We must make the code here
|
813 |
|
|
match the code in ia64_expand_move and move_operand, even though they
|
814 |
|
|
are both technically wrong. */
|
815 |
|
|
if (tls_symbolic_operand_type (x) == 0)
|
816 |
|
|
{
|
817 |
|
|
HOST_WIDE_INT addend = 0;
|
818 |
|
|
rtx op = x;
|
819 |
|
|
|
820 |
|
|
if (GET_CODE (op) == CONST
|
821 |
|
|
&& GET_CODE (XEXP (op, 0)) == PLUS
|
822 |
|
|
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
|
823 |
|
|
{
|
824 |
|
|
addend = INTVAL (XEXP (XEXP (op, 0), 1));
|
825 |
|
|
op = XEXP (XEXP (op, 0), 0);
|
826 |
|
|
}
|
827 |
|
|
|
828 |
|
|
if (any_offset_symbol_operand (op, GET_MODE (op))
|
829 |
|
|
|| function_operand (op, GET_MODE (op)))
|
830 |
|
|
return true;
|
831 |
|
|
if (aligned_offset_symbol_operand (op, GET_MODE (op)))
|
832 |
|
|
return (addend & 0x3fff) == 0;
|
833 |
|
|
return false;
|
834 |
|
|
}
|
835 |
|
|
return false;
|
836 |
|
|
|
837 |
|
|
case CONST_VECTOR:
|
838 |
|
|
{
|
839 |
|
|
enum machine_mode mode = GET_MODE (x);
|
840 |
|
|
|
841 |
|
|
if (mode == V2SFmode)
|
842 |
|
|
return ia64_extra_constraint (x, 'Y');
|
843 |
|
|
|
844 |
|
|
return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
845 |
|
|
&& GET_MODE_SIZE (mode) <= 8);
|
846 |
|
|
}
|
847 |
|
|
|
848 |
|
|
default:
|
849 |
|
|
return false;
|
850 |
|
|
}
|
851 |
|
|
}
|
852 |
|
|
|
853 |
|
|
/* Don't allow TLS addresses to get spilled to memory. */
|
854 |
|
|
|
855 |
|
|
static bool
|
856 |
|
|
ia64_cannot_force_const_mem (rtx x)
|
857 |
|
|
{
|
858 |
|
|
return tls_symbolic_operand_type (x) != 0;
|
859 |
|
|
}
|
860 |
|
|
|
861 |
|
|
/* Expand a symbolic constant load. */
|
862 |
|
|
|
863 |
|
|
bool
|
864 |
|
|
ia64_expand_load_address (rtx dest, rtx src)
|
865 |
|
|
{
|
866 |
|
|
gcc_assert (GET_CODE (dest) == REG);
|
867 |
|
|
|
868 |
|
|
/* ILP32 mode still loads 64-bits of data from the GOT. This avoids
|
869 |
|
|
having to pointer-extend the value afterward. Other forms of address
|
870 |
|
|
computation below are also more natural to compute as 64-bit quantities.
|
871 |
|
|
If we've been given an SImode destination register, change it. */
|
872 |
|
|
if (GET_MODE (dest) != Pmode)
|
873 |
|
|
dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
|
874 |
|
|
|
875 |
|
|
if (TARGET_NO_PIC)
|
876 |
|
|
return false;
|
877 |
|
|
if (small_addr_symbolic_operand (src, VOIDmode))
|
878 |
|
|
return false;
|
879 |
|
|
|
880 |
|
|
if (TARGET_AUTO_PIC)
|
881 |
|
|
emit_insn (gen_load_gprel64 (dest, src));
|
882 |
|
|
else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
|
883 |
|
|
emit_insn (gen_load_fptr (dest, src));
|
884 |
|
|
else if (sdata_symbolic_operand (src, VOIDmode))
|
885 |
|
|
emit_insn (gen_load_gprel (dest, src));
|
886 |
|
|
else
|
887 |
|
|
{
|
888 |
|
|
HOST_WIDE_INT addend = 0;
|
889 |
|
|
rtx tmp;
|
890 |
|
|
|
891 |
|
|
/* We did split constant offsets in ia64_expand_move, and we did try
|
892 |
|
|
to keep them split in move_operand, but we also allowed reload to
|
893 |
|
|
rematerialize arbitrary constants rather than spill the value to
|
894 |
|
|
the stack and reload it. So we have to be prepared here to split
|
895 |
|
|
them apart again. */
|
896 |
|
|
if (GET_CODE (src) == CONST)
|
897 |
|
|
{
|
898 |
|
|
HOST_WIDE_INT hi, lo;
|
899 |
|
|
|
900 |
|
|
hi = INTVAL (XEXP (XEXP (src, 0), 1));
|
901 |
|
|
lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
|
902 |
|
|
hi = hi - lo;
|
903 |
|
|
|
904 |
|
|
if (lo != 0)
|
905 |
|
|
{
|
906 |
|
|
addend = lo;
|
907 |
|
|
src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
|
908 |
|
|
}
|
909 |
|
|
}
|
910 |
|
|
|
911 |
|
|
tmp = gen_rtx_HIGH (Pmode, src);
|
912 |
|
|
tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
|
913 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
|
914 |
|
|
|
915 |
|
|
tmp = gen_rtx_LO_SUM (Pmode, dest, src);
|
916 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
|
917 |
|
|
|
918 |
|
|
if (addend)
|
919 |
|
|
{
|
920 |
|
|
tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
|
921 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
|
922 |
|
|
}
|
923 |
|
|
}
|
924 |
|
|
|
925 |
|
|
return true;
|
926 |
|
|
}
|
927 |
|
|
|
928 |
|
|
static GTY(()) rtx gen_tls_tga;
|
929 |
|
|
static rtx
|
930 |
|
|
gen_tls_get_addr (void)
|
931 |
|
|
{
|
932 |
|
|
if (!gen_tls_tga)
|
933 |
|
|
gen_tls_tga = init_one_libfunc ("__tls_get_addr");
|
934 |
|
|
return gen_tls_tga;
|
935 |
|
|
}
|
936 |
|
|
|
937 |
|
|
static GTY(()) rtx thread_pointer_rtx;
|
938 |
|
|
static rtx
|
939 |
|
|
gen_thread_pointer (void)
|
940 |
|
|
{
|
941 |
|
|
if (!thread_pointer_rtx)
|
942 |
|
|
thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
|
943 |
|
|
return thread_pointer_rtx;
|
944 |
|
|
}
|
945 |
|
|
|
946 |
|
|
static rtx
|
947 |
|
|
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
|
948 |
|
|
rtx orig_op1, HOST_WIDE_INT addend)
|
949 |
|
|
{
|
950 |
|
|
rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
|
951 |
|
|
rtx orig_op0 = op0;
|
952 |
|
|
HOST_WIDE_INT addend_lo, addend_hi;
|
953 |
|
|
|
954 |
|
|
switch (tls_kind)
|
955 |
|
|
{
|
956 |
|
|
case TLS_MODEL_GLOBAL_DYNAMIC:
|
957 |
|
|
start_sequence ();
|
958 |
|
|
|
959 |
|
|
tga_op1 = gen_reg_rtx (Pmode);
|
960 |
|
|
emit_insn (gen_load_dtpmod (tga_op1, op1));
|
961 |
|
|
|
962 |
|
|
tga_op2 = gen_reg_rtx (Pmode);
|
963 |
|
|
emit_insn (gen_load_dtprel (tga_op2, op1));
|
964 |
|
|
|
965 |
|
|
tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
|
966 |
|
|
LCT_CONST, Pmode, 2, tga_op1,
|
967 |
|
|
Pmode, tga_op2, Pmode);
|
968 |
|
|
|
969 |
|
|
insns = get_insns ();
|
970 |
|
|
end_sequence ();
|
971 |
|
|
|
972 |
|
|
if (GET_MODE (op0) != Pmode)
|
973 |
|
|
op0 = tga_ret;
|
974 |
|
|
emit_libcall_block (insns, op0, tga_ret, op1);
|
975 |
|
|
break;
|
976 |
|
|
|
977 |
|
|
case TLS_MODEL_LOCAL_DYNAMIC:
|
978 |
|
|
/* ??? This isn't the completely proper way to do local-dynamic
|
979 |
|
|
If the call to __tls_get_addr is used only by a single symbol,
|
980 |
|
|
then we should (somehow) move the dtprel to the second arg
|
981 |
|
|
to avoid the extra add. */
|
982 |
|
|
start_sequence ();
|
983 |
|
|
|
984 |
|
|
tga_op1 = gen_reg_rtx (Pmode);
|
985 |
|
|
emit_insn (gen_load_dtpmod (tga_op1, op1));
|
986 |
|
|
|
987 |
|
|
tga_op2 = const0_rtx;
|
988 |
|
|
|
989 |
|
|
tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
|
990 |
|
|
LCT_CONST, Pmode, 2, tga_op1,
|
991 |
|
|
Pmode, tga_op2, Pmode);
|
992 |
|
|
|
993 |
|
|
insns = get_insns ();
|
994 |
|
|
end_sequence ();
|
995 |
|
|
|
996 |
|
|
tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
|
997 |
|
|
UNSPEC_LD_BASE);
|
998 |
|
|
tmp = gen_reg_rtx (Pmode);
|
999 |
|
|
emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
|
1000 |
|
|
|
1001 |
|
|
if (!register_operand (op0, Pmode))
|
1002 |
|
|
op0 = gen_reg_rtx (Pmode);
|
1003 |
|
|
if (TARGET_TLS64)
|
1004 |
|
|
{
|
1005 |
|
|
emit_insn (gen_load_dtprel (op0, op1));
|
1006 |
|
|
emit_insn (gen_adddi3 (op0, tmp, op0));
|
1007 |
|
|
}
|
1008 |
|
|
else
|
1009 |
|
|
emit_insn (gen_add_dtprel (op0, op1, tmp));
|
1010 |
|
|
break;
|
1011 |
|
|
|
1012 |
|
|
case TLS_MODEL_INITIAL_EXEC:
|
1013 |
|
|
addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
|
1014 |
|
|
addend_hi = addend - addend_lo;
|
1015 |
|
|
|
1016 |
|
|
op1 = plus_constant (op1, addend_hi);
|
1017 |
|
|
addend = addend_lo;
|
1018 |
|
|
|
1019 |
|
|
tmp = gen_reg_rtx (Pmode);
|
1020 |
|
|
emit_insn (gen_load_tprel (tmp, op1));
|
1021 |
|
|
|
1022 |
|
|
if (!register_operand (op0, Pmode))
|
1023 |
|
|
op0 = gen_reg_rtx (Pmode);
|
1024 |
|
|
emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
|
1025 |
|
|
break;
|
1026 |
|
|
|
1027 |
|
|
case TLS_MODEL_LOCAL_EXEC:
|
1028 |
|
|
if (!register_operand (op0, Pmode))
|
1029 |
|
|
op0 = gen_reg_rtx (Pmode);
|
1030 |
|
|
|
1031 |
|
|
op1 = orig_op1;
|
1032 |
|
|
addend = 0;
|
1033 |
|
|
if (TARGET_TLS64)
|
1034 |
|
|
{
|
1035 |
|
|
emit_insn (gen_load_tprel (op0, op1));
|
1036 |
|
|
emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
|
1037 |
|
|
}
|
1038 |
|
|
else
|
1039 |
|
|
emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
|
1040 |
|
|
break;
|
1041 |
|
|
|
1042 |
|
|
default:
|
1043 |
|
|
gcc_unreachable ();
|
1044 |
|
|
}
|
1045 |
|
|
|
1046 |
|
|
if (addend)
|
1047 |
|
|
op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
|
1048 |
|
|
orig_op0, 1, OPTAB_DIRECT);
|
1049 |
|
|
if (orig_op0 == op0)
|
1050 |
|
|
return NULL_RTX;
|
1051 |
|
|
if (GET_MODE (orig_op0) == Pmode)
|
1052 |
|
|
return op0;
|
1053 |
|
|
return gen_lowpart (GET_MODE (orig_op0), op0);
|
1054 |
|
|
}
|
1055 |
|
|
|
1056 |
|
|
rtx
|
1057 |
|
|
ia64_expand_move (rtx op0, rtx op1)
|
1058 |
|
|
{
|
1059 |
|
|
enum machine_mode mode = GET_MODE (op0);
|
1060 |
|
|
|
1061 |
|
|
if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
|
1062 |
|
|
op1 = force_reg (mode, op1);
|
1063 |
|
|
|
1064 |
|
|
if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
|
1065 |
|
|
{
|
1066 |
|
|
HOST_WIDE_INT addend = 0;
|
1067 |
|
|
enum tls_model tls_kind;
|
1068 |
|
|
rtx sym = op1;
|
1069 |
|
|
|
1070 |
|
|
if (GET_CODE (op1) == CONST
|
1071 |
|
|
&& GET_CODE (XEXP (op1, 0)) == PLUS
|
1072 |
|
|
&& GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
|
1073 |
|
|
{
|
1074 |
|
|
addend = INTVAL (XEXP (XEXP (op1, 0), 1));
|
1075 |
|
|
sym = XEXP (XEXP (op1, 0), 0);
|
1076 |
|
|
}
|
1077 |
|
|
|
1078 |
|
|
tls_kind = tls_symbolic_operand_type (sym);
|
1079 |
|
|
if (tls_kind)
|
1080 |
|
|
return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
|
1081 |
|
|
|
1082 |
|
|
if (any_offset_symbol_operand (sym, mode))
|
1083 |
|
|
addend = 0;
|
1084 |
|
|
else if (aligned_offset_symbol_operand (sym, mode))
|
1085 |
|
|
{
|
1086 |
|
|
HOST_WIDE_INT addend_lo, addend_hi;
|
1087 |
|
|
|
1088 |
|
|
addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
|
1089 |
|
|
addend_hi = addend - addend_lo;
|
1090 |
|
|
|
1091 |
|
|
if (addend_lo != 0)
|
1092 |
|
|
{
|
1093 |
|
|
op1 = plus_constant (sym, addend_hi);
|
1094 |
|
|
addend = addend_lo;
|
1095 |
|
|
}
|
1096 |
|
|
else
|
1097 |
|
|
addend = 0;
|
1098 |
|
|
}
|
1099 |
|
|
else
|
1100 |
|
|
op1 = sym;
|
1101 |
|
|
|
1102 |
|
|
if (reload_completed)
|
1103 |
|
|
{
|
1104 |
|
|
/* We really should have taken care of this offset earlier. */
|
1105 |
|
|
gcc_assert (addend == 0);
|
1106 |
|
|
if (ia64_expand_load_address (op0, op1))
|
1107 |
|
|
return NULL_RTX;
|
1108 |
|
|
}
|
1109 |
|
|
|
1110 |
|
|
if (addend)
|
1111 |
|
|
{
|
1112 |
|
|
rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
|
1113 |
|
|
|
1114 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
|
1115 |
|
|
|
1116 |
|
|
op1 = expand_simple_binop (mode, PLUS, subtarget,
|
1117 |
|
|
GEN_INT (addend), op0, 1, OPTAB_DIRECT);
|
1118 |
|
|
if (op0 == op1)
|
1119 |
|
|
return NULL_RTX;
|
1120 |
|
|
}
|
1121 |
|
|
}
|
1122 |
|
|
|
1123 |
|
|
return op1;
|
1124 |
|
|
}
|
1125 |
|
|
|
1126 |
|
|
/* Split a move from OP1 to OP0 conditional on COND. */
|
1127 |
|
|
|
1128 |
|
|
void
|
1129 |
|
|
ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
|
1130 |
|
|
{
|
1131 |
|
|
rtx insn, first = get_last_insn ();
|
1132 |
|
|
|
1133 |
|
|
emit_move_insn (op0, op1);
|
1134 |
|
|
|
1135 |
|
|
for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
|
1136 |
|
|
if (INSN_P (insn))
|
1137 |
|
|
PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
|
1138 |
|
|
PATTERN (insn));
|
1139 |
|
|
}
|
1140 |
|
|
|
1141 |
|
|
/* Split a post-reload TImode or TFmode reference into two DImode
|
1142 |
|
|
components. This is made extra difficult by the fact that we do
|
1143 |
|
|
not get any scratch registers to work with, because reload cannot
|
1144 |
|
|
be prevented from giving us a scratch that overlaps the register
|
1145 |
|
|
pair involved. So instead, when addressing memory, we tweak the
|
1146 |
|
|
pointer register up and back down with POST_INCs. Or up and not
|
1147 |
|
|
back down when we can get away with it.
|
1148 |
|
|
|
1149 |
|
|
REVERSED is true when the loads must be done in reversed order
|
1150 |
|
|
(high word first) for correctness. DEAD is true when the pointer
|
1151 |
|
|
dies with the second insn we generate and therefore the second
|
1152 |
|
|
address must not carry a postmodify.
|
1153 |
|
|
|
1154 |
|
|
May return an insn which is to be emitted after the moves. */
|
1155 |
|
|
|
1156 |
|
|
static rtx
|
1157 |
|
|
ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
|
1158 |
|
|
{
|
1159 |
|
|
rtx fixup = 0;
|
1160 |
|
|
|
1161 |
|
|
switch (GET_CODE (in))
|
1162 |
|
|
{
|
1163 |
|
|
case REG:
|
1164 |
|
|
out[reversed] = gen_rtx_REG (DImode, REGNO (in));
|
1165 |
|
|
out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
|
1166 |
|
|
break;
|
1167 |
|
|
|
1168 |
|
|
case CONST_INT:
|
1169 |
|
|
case CONST_DOUBLE:
|
1170 |
|
|
/* Cannot occur reversed. */
|
1171 |
|
|
gcc_assert (!reversed);
|
1172 |
|
|
|
1173 |
|
|
if (GET_MODE (in) != TFmode)
|
1174 |
|
|
split_double (in, &out[0], &out[1]);
|
1175 |
|
|
else
|
1176 |
|
|
/* split_double does not understand how to split a TFmode
|
1177 |
|
|
quantity into a pair of DImode constants. */
|
1178 |
|
|
{
|
1179 |
|
|
REAL_VALUE_TYPE r;
|
1180 |
|
|
unsigned HOST_WIDE_INT p[2];
|
1181 |
|
|
long l[4]; /* TFmode is 128 bits */
|
1182 |
|
|
|
1183 |
|
|
REAL_VALUE_FROM_CONST_DOUBLE (r, in);
|
1184 |
|
|
real_to_target (l, &r, TFmode);
|
1185 |
|
|
|
1186 |
|
|
if (FLOAT_WORDS_BIG_ENDIAN)
|
1187 |
|
|
{
|
1188 |
|
|
p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
|
1189 |
|
|
p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
|
1190 |
|
|
}
|
1191 |
|
|
else
|
1192 |
|
|
{
|
1193 |
|
|
p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
|
1194 |
|
|
p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
|
1195 |
|
|
}
|
1196 |
|
|
out[0] = GEN_INT (p[0]);
|
1197 |
|
|
out[1] = GEN_INT (p[1]);
|
1198 |
|
|
}
|
1199 |
|
|
break;
|
1200 |
|
|
|
1201 |
|
|
case MEM:
|
1202 |
|
|
{
|
1203 |
|
|
rtx base = XEXP (in, 0);
|
1204 |
|
|
rtx offset;
|
1205 |
|
|
|
1206 |
|
|
switch (GET_CODE (base))
|
1207 |
|
|
{
|
1208 |
|
|
case REG:
|
1209 |
|
|
if (!reversed)
|
1210 |
|
|
{
|
1211 |
|
|
out[0] = adjust_automodify_address
|
1212 |
|
|
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
|
1213 |
|
|
out[1] = adjust_automodify_address
|
1214 |
|
|
(in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
|
1215 |
|
|
}
|
1216 |
|
|
else
|
1217 |
|
|
{
|
1218 |
|
|
/* Reversal requires a pre-increment, which can only
|
1219 |
|
|
be done as a separate insn. */
|
1220 |
|
|
emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
|
1221 |
|
|
out[0] = adjust_automodify_address
|
1222 |
|
|
(in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
|
1223 |
|
|
out[1] = adjust_address (in, DImode, 0);
|
1224 |
|
|
}
|
1225 |
|
|
break;
|
1226 |
|
|
|
1227 |
|
|
case POST_INC:
|
1228 |
|
|
gcc_assert (!reversed && !dead);
|
1229 |
|
|
|
1230 |
|
|
/* Just do the increment in two steps. */
|
1231 |
|
|
out[0] = adjust_automodify_address (in, DImode, 0, 0);
|
1232 |
|
|
out[1] = adjust_automodify_address (in, DImode, 0, 8);
|
1233 |
|
|
break;
|
1234 |
|
|
|
1235 |
|
|
case POST_DEC:
|
1236 |
|
|
gcc_assert (!reversed && !dead);
|
1237 |
|
|
|
1238 |
|
|
/* Add 8, subtract 24. */
|
1239 |
|
|
base = XEXP (base, 0);
|
1240 |
|
|
out[0] = adjust_automodify_address
|
1241 |
|
|
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
|
1242 |
|
|
out[1] = adjust_automodify_address
|
1243 |
|
|
(in, DImode,
|
1244 |
|
|
gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
|
1245 |
|
|
8);
|
1246 |
|
|
break;
|
1247 |
|
|
|
1248 |
|
|
case POST_MODIFY:
|
1249 |
|
|
gcc_assert (!reversed && !dead);
|
1250 |
|
|
|
1251 |
|
|
/* Extract and adjust the modification. This case is
|
1252 |
|
|
trickier than the others, because we might have an
|
1253 |
|
|
index register, or we might have a combined offset that
|
1254 |
|
|
doesn't fit a signed 9-bit displacement field. We can
|
1255 |
|
|
assume the incoming expression is already legitimate. */
|
1256 |
|
|
offset = XEXP (base, 1);
|
1257 |
|
|
base = XEXP (base, 0);
|
1258 |
|
|
|
1259 |
|
|
out[0] = adjust_automodify_address
|
1260 |
|
|
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
|
1261 |
|
|
|
1262 |
|
|
if (GET_CODE (XEXP (offset, 1)) == REG)
|
1263 |
|
|
{
|
1264 |
|
|
/* Can't adjust the postmodify to match. Emit the
|
1265 |
|
|
original, then a separate addition insn. */
|
1266 |
|
|
out[1] = adjust_automodify_address (in, DImode, 0, 8);
|
1267 |
|
|
fixup = gen_adddi3 (base, base, GEN_INT (-8));
|
1268 |
|
|
}
|
1269 |
|
|
else
|
1270 |
|
|
{
|
1271 |
|
|
gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
|
1272 |
|
|
if (INTVAL (XEXP (offset, 1)) < -256 + 8)
|
1273 |
|
|
{
|
1274 |
|
|
/* Again the postmodify cannot be made to match,
|
1275 |
|
|
but in this case it's more efficient to get rid
|
1276 |
|
|
of the postmodify entirely and fix up with an
|
1277 |
|
|
add insn. */
|
1278 |
|
|
out[1] = adjust_automodify_address (in, DImode, base, 8);
|
1279 |
|
|
fixup = gen_adddi3
|
1280 |
|
|
(base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
|
1281 |
|
|
}
|
1282 |
|
|
else
|
1283 |
|
|
{
|
1284 |
|
|
/* Combined offset still fits in the displacement field.
|
1285 |
|
|
(We cannot overflow it at the high end.) */
|
1286 |
|
|
out[1] = adjust_automodify_address
|
1287 |
|
|
(in, DImode, gen_rtx_POST_MODIFY
|
1288 |
|
|
(Pmode, base, gen_rtx_PLUS
|
1289 |
|
|
(Pmode, base,
|
1290 |
|
|
GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
|
1291 |
|
|
8);
|
1292 |
|
|
}
|
1293 |
|
|
}
|
1294 |
|
|
break;
|
1295 |
|
|
|
1296 |
|
|
default:
|
1297 |
|
|
gcc_unreachable ();
|
1298 |
|
|
}
|
1299 |
|
|
break;
|
1300 |
|
|
}
|
1301 |
|
|
|
1302 |
|
|
default:
|
1303 |
|
|
gcc_unreachable ();
|
1304 |
|
|
}
|
1305 |
|
|
|
1306 |
|
|
return fixup;
|
1307 |
|
|
}
|
1308 |
|
|
|
1309 |
|
|
/* Split a TImode or TFmode move instruction after reload.
|
1310 |
|
|
This is used by *movtf_internal and *movti_internal. */
|
1311 |
|
|
void
|
1312 |
|
|
ia64_split_tmode_move (rtx operands[])
|
1313 |
|
|
{
|
1314 |
|
|
rtx in[2], out[2], insn;
|
1315 |
|
|
rtx fixup[2];
|
1316 |
|
|
bool dead = false;
|
1317 |
|
|
bool reversed = false;
|
1318 |
|
|
|
1319 |
|
|
/* It is possible for reload to decide to overwrite a pointer with
|
1320 |
|
|
the value it points to. In that case we have to do the loads in
|
1321 |
|
|
the appropriate order so that the pointer is not destroyed too
|
1322 |
|
|
early. Also we must not generate a postmodify for that second
|
1323 |
|
|
load, or rws_access_regno will die. */
|
1324 |
|
|
if (GET_CODE (operands[1]) == MEM
|
1325 |
|
|
&& reg_overlap_mentioned_p (operands[0], operands[1]))
|
1326 |
|
|
{
|
1327 |
|
|
rtx base = XEXP (operands[1], 0);
|
1328 |
|
|
while (GET_CODE (base) != REG)
|
1329 |
|
|
base = XEXP (base, 0);
|
1330 |
|
|
|
1331 |
|
|
if (REGNO (base) == REGNO (operands[0]))
|
1332 |
|
|
reversed = true;
|
1333 |
|
|
dead = true;
|
1334 |
|
|
}
|
1335 |
|
|
/* Another reason to do the moves in reversed order is if the first
|
1336 |
|
|
element of the target register pair is also the second element of
|
1337 |
|
|
the source register pair. */
|
1338 |
|
|
if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
|
1339 |
|
|
&& REGNO (operands[0]) == REGNO (operands[1]) + 1)
|
1340 |
|
|
reversed = true;
|
1341 |
|
|
|
1342 |
|
|
fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
|
1343 |
|
|
fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
|
1344 |
|
|
|
1345 |
|
|
#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
|
1346 |
|
|
if (GET_CODE (EXP) == MEM \
|
1347 |
|
|
&& (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
|
1348 |
|
|
|| GET_CODE (XEXP (EXP, 0)) == POST_INC \
|
1349 |
|
|
|| GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
|
1350 |
|
|
REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
|
1351 |
|
|
XEXP (XEXP (EXP, 0), 0), \
|
1352 |
|
|
REG_NOTES (INSN))
|
1353 |
|
|
|
1354 |
|
|
insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
|
1355 |
|
|
MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
|
1356 |
|
|
MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
|
1357 |
|
|
|
1358 |
|
|
insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
|
1359 |
|
|
MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
|
1360 |
|
|
MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
|
1361 |
|
|
|
1362 |
|
|
if (fixup[0])
|
1363 |
|
|
emit_insn (fixup[0]);
|
1364 |
|
|
if (fixup[1])
|
1365 |
|
|
emit_insn (fixup[1]);
|
1366 |
|
|
|
1367 |
|
|
#undef MAYBE_ADD_REG_INC_NOTE
|
1368 |
|
|
}
|
1369 |
|
|
|
1370 |
|
|
/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
|
1371 |
|
|
through memory plus an extra GR scratch register. Except that you can
|
1372 |
|
|
either get the first from SECONDARY_MEMORY_NEEDED or the second from
|
1373 |
|
|
SECONDARY_RELOAD_CLASS, but not both.
|
1374 |
|
|
|
1375 |
|
|
We got into problems in the first place by allowing a construct like
|
1376 |
|
|
(subreg:XF (reg:TI)), which we got from a union containing a long double.
|
1377 |
|
|
This solution attempts to prevent this situation from occurring. When
|
1378 |
|
|
we see something like the above, we spill the inner register to memory. */
|
1379 |
|
|
|
1380 |
|
|
static rtx
|
1381 |
|
|
spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
|
1382 |
|
|
{
|
1383 |
|
|
if (GET_CODE (in) == SUBREG
|
1384 |
|
|
&& GET_MODE (SUBREG_REG (in)) == TImode
|
1385 |
|
|
&& GET_CODE (SUBREG_REG (in)) == REG)
|
1386 |
|
|
{
|
1387 |
|
|
rtx memt = assign_stack_temp (TImode, 16, 0);
|
1388 |
|
|
emit_move_insn (memt, SUBREG_REG (in));
|
1389 |
|
|
return adjust_address (memt, mode, 0);
|
1390 |
|
|
}
|
1391 |
|
|
else if (force && GET_CODE (in) == REG)
|
1392 |
|
|
{
|
1393 |
|
|
rtx memx = assign_stack_temp (mode, 16, 0);
|
1394 |
|
|
emit_move_insn (memx, in);
|
1395 |
|
|
return memx;
|
1396 |
|
|
}
|
1397 |
|
|
else
|
1398 |
|
|
return in;
|
1399 |
|
|
}
|
1400 |
|
|
|
1401 |
|
|
/* Expand the movxf or movrf pattern (MODE says which) with the given
|
1402 |
|
|
OPERANDS, returning true if the pattern should then invoke
|
1403 |
|
|
DONE. */
|
1404 |
|
|
|
1405 |
|
|
bool
|
1406 |
|
|
ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
|
1407 |
|
|
{
|
1408 |
|
|
rtx op0 = operands[0];
|
1409 |
|
|
|
1410 |
|
|
if (GET_CODE (op0) == SUBREG)
|
1411 |
|
|
op0 = SUBREG_REG (op0);
|
1412 |
|
|
|
1413 |
|
|
/* We must support XFmode loads into general registers for stdarg/vararg,
|
1414 |
|
|
unprototyped calls, and a rare case where a long double is passed as
|
1415 |
|
|
an argument after a float HFA fills the FP registers. We split them into
|
1416 |
|
|
DImode loads for convenience. We also need to support XFmode stores
|
1417 |
|
|
for the last case. This case does not happen for stdarg/vararg routines,
|
1418 |
|
|
because we do a block store to memory of unnamed arguments. */
|
1419 |
|
|
|
1420 |
|
|
if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
|
1421 |
|
|
{
|
1422 |
|
|
rtx out[2];
|
1423 |
|
|
|
1424 |
|
|
/* We're hoping to transform everything that deals with XFmode
|
1425 |
|
|
quantities and GR registers early in the compiler. */
|
1426 |
|
|
gcc_assert (!no_new_pseudos);
|
1427 |
|
|
|
1428 |
|
|
/* Struct to register can just use TImode instead. */
|
1429 |
|
|
if ((GET_CODE (operands[1]) == SUBREG
|
1430 |
|
|
&& GET_MODE (SUBREG_REG (operands[1])) == TImode)
|
1431 |
|
|
|| (GET_CODE (operands[1]) == REG
|
1432 |
|
|
&& GR_REGNO_P (REGNO (operands[1]))))
|
1433 |
|
|
{
|
1434 |
|
|
rtx op1 = operands[1];
|
1435 |
|
|
|
1436 |
|
|
if (GET_CODE (op1) == SUBREG)
|
1437 |
|
|
op1 = SUBREG_REG (op1);
|
1438 |
|
|
else
|
1439 |
|
|
op1 = gen_rtx_REG (TImode, REGNO (op1));
|
1440 |
|
|
|
1441 |
|
|
emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
|
1442 |
|
|
return true;
|
1443 |
|
|
}
|
1444 |
|
|
|
1445 |
|
|
if (GET_CODE (operands[1]) == CONST_DOUBLE)
|
1446 |
|
|
{
|
1447 |
|
|
/* Don't word-swap when reading in the constant. */
|
1448 |
|
|
emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
|
1449 |
|
|
operand_subword (operands[1], WORDS_BIG_ENDIAN,
|
1450 |
|
|
0, mode));
|
1451 |
|
|
emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
|
1452 |
|
|
operand_subword (operands[1], !WORDS_BIG_ENDIAN,
|
1453 |
|
|
0, mode));
|
1454 |
|
|
return true;
|
1455 |
|
|
}
|
1456 |
|
|
|
1457 |
|
|
/* If the quantity is in a register not known to be GR, spill it. */
|
1458 |
|
|
if (register_operand (operands[1], mode))
|
1459 |
|
|
operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
|
1460 |
|
|
|
1461 |
|
|
gcc_assert (GET_CODE (operands[1]) == MEM);
|
1462 |
|
|
|
1463 |
|
|
/* Don't word-swap when reading in the value. */
|
1464 |
|
|
out[0] = gen_rtx_REG (DImode, REGNO (op0));
|
1465 |
|
|
out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
|
1466 |
|
|
|
1467 |
|
|
emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
|
1468 |
|
|
emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
|
1469 |
|
|
return true;
|
1470 |
|
|
}
|
1471 |
|
|
|
1472 |
|
|
if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
|
1473 |
|
|
{
|
1474 |
|
|
/* We're hoping to transform everything that deals with XFmode
|
1475 |
|
|
quantities and GR registers early in the compiler. */
|
1476 |
|
|
gcc_assert (!no_new_pseudos);
|
1477 |
|
|
|
1478 |
|
|
/* Op0 can't be a GR_REG here, as that case is handled above.
|
1479 |
|
|
If op0 is a register, then we spill op1, so that we now have a
|
1480 |
|
|
MEM operand. This requires creating an XFmode subreg of a TImode reg
|
1481 |
|
|
to force the spill. */
|
1482 |
|
|
if (register_operand (operands[0], mode))
|
1483 |
|
|
{
|
1484 |
|
|
rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
|
1485 |
|
|
op1 = gen_rtx_SUBREG (mode, op1, 0);
|
1486 |
|
|
operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
|
1487 |
|
|
}
|
1488 |
|
|
|
1489 |
|
|
else
|
1490 |
|
|
{
|
1491 |
|
|
rtx in[2];
|
1492 |
|
|
|
1493 |
|
|
gcc_assert (GET_CODE (operands[0]) == MEM);
|
1494 |
|
|
|
1495 |
|
|
/* Don't word-swap when writing out the value. */
|
1496 |
|
|
in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
|
1497 |
|
|
in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
|
1498 |
|
|
|
1499 |
|
|
emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
|
1500 |
|
|
emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
|
1501 |
|
|
return true;
|
1502 |
|
|
}
|
1503 |
|
|
}
|
1504 |
|
|
|
1505 |
|
|
if (!reload_in_progress && !reload_completed)
|
1506 |
|
|
{
|
1507 |
|
|
operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
|
1508 |
|
|
|
1509 |
|
|
if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
|
1510 |
|
|
{
|
1511 |
|
|
rtx memt, memx, in = operands[1];
|
1512 |
|
|
if (CONSTANT_P (in))
|
1513 |
|
|
in = validize_mem (force_const_mem (mode, in));
|
1514 |
|
|
if (GET_CODE (in) == MEM)
|
1515 |
|
|
memt = adjust_address (in, TImode, 0);
|
1516 |
|
|
else
|
1517 |
|
|
{
|
1518 |
|
|
memt = assign_stack_temp (TImode, 16, 0);
|
1519 |
|
|
memx = adjust_address (memt, mode, 0);
|
1520 |
|
|
emit_move_insn (memx, in);
|
1521 |
|
|
}
|
1522 |
|
|
emit_move_insn (op0, memt);
|
1523 |
|
|
return true;
|
1524 |
|
|
}
|
1525 |
|
|
|
1526 |
|
|
if (!ia64_move_ok (operands[0], operands[1]))
|
1527 |
|
|
operands[1] = force_reg (mode, operands[1]);
|
1528 |
|
|
}
|
1529 |
|
|
|
1530 |
|
|
return false;
|
1531 |
|
|
}
|
1532 |
|
|
|
1533 |
|
|
/* Emit comparison instruction if necessary, returning the expression
|
1534 |
|
|
that holds the compare result in the proper mode. */
|
1535 |
|
|
|
1536 |
|
|
static GTY(()) rtx cmptf_libfunc;
|
1537 |
|
|
|
1538 |
|
|
rtx
|
1539 |
|
|
ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
|
1540 |
|
|
{
|
1541 |
|
|
rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
|
1542 |
|
|
rtx cmp;
|
1543 |
|
|
|
1544 |
|
|
/* If we have a BImode input, then we already have a compare result, and
|
1545 |
|
|
do not need to emit another comparison. */
|
1546 |
|
|
if (GET_MODE (op0) == BImode)
|
1547 |
|
|
{
|
1548 |
|
|
gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
|
1549 |
|
|
cmp = op0;
|
1550 |
|
|
}
|
1551 |
|
|
/* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
|
1552 |
|
|
magic number as its third argument, that indicates what to do.
|
1553 |
|
|
The return value is an integer to be compared against zero. */
|
1554 |
|
|
else if (GET_MODE (op0) == TFmode)
|
1555 |
|
|
{
|
1556 |
|
|
enum qfcmp_magic {
|
1557 |
|
|
QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
|
1558 |
|
|
QCMP_UNORD = 2,
|
1559 |
|
|
QCMP_EQ = 4,
|
1560 |
|
|
QCMP_LT = 8,
|
1561 |
|
|
QCMP_GT = 16
|
1562 |
|
|
} magic;
|
1563 |
|
|
enum rtx_code ncode;
|
1564 |
|
|
rtx ret, insns;
|
1565 |
|
|
|
1566 |
|
|
gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
|
1567 |
|
|
switch (code)
|
1568 |
|
|
{
|
1569 |
|
|
/* 1 = equal, 0 = not equal. Equality operators do
|
1570 |
|
|
not raise FP_INVALID when given an SNaN operand. */
|
1571 |
|
|
case EQ: magic = QCMP_EQ; ncode = NE; break;
|
1572 |
|
|
case NE: magic = QCMP_EQ; ncode = EQ; break;
|
1573 |
|
|
/* isunordered() from C99. */
|
1574 |
|
|
case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
|
1575 |
|
|
case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
|
1576 |
|
|
/* Relational operators raise FP_INVALID when given
|
1577 |
|
|
an SNaN operand. */
|
1578 |
|
|
case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
|
1579 |
|
|
case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
|
1580 |
|
|
case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
|
1581 |
|
|
case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
|
1582 |
|
|
/* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
|
1583 |
|
|
Expanders for buneq etc. weuld have to be added to ia64.md
|
1584 |
|
|
for this to be useful. */
|
1585 |
|
|
default: gcc_unreachable ();
|
1586 |
|
|
}
|
1587 |
|
|
|
1588 |
|
|
start_sequence ();
|
1589 |
|
|
|
1590 |
|
|
ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
|
1591 |
|
|
op0, TFmode, op1, TFmode,
|
1592 |
|
|
GEN_INT (magic), DImode);
|
1593 |
|
|
cmp = gen_reg_rtx (BImode);
|
1594 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, cmp,
|
1595 |
|
|
gen_rtx_fmt_ee (ncode, BImode,
|
1596 |
|
|
ret, const0_rtx)));
|
1597 |
|
|
|
1598 |
|
|
insns = get_insns ();
|
1599 |
|
|
end_sequence ();
|
1600 |
|
|
|
1601 |
|
|
emit_libcall_block (insns, cmp, cmp,
|
1602 |
|
|
gen_rtx_fmt_ee (code, BImode, op0, op1));
|
1603 |
|
|
code = NE;
|
1604 |
|
|
}
|
1605 |
|
|
else
|
1606 |
|
|
{
|
1607 |
|
|
cmp = gen_reg_rtx (BImode);
|
1608 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, cmp,
|
1609 |
|
|
gen_rtx_fmt_ee (code, BImode, op0, op1)));
|
1610 |
|
|
code = NE;
|
1611 |
|
|
}
|
1612 |
|
|
|
1613 |
|
|
return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
|
1614 |
|
|
}
|
1615 |
|
|
|
1616 |
|
|
/* Generate an integral vector comparison. Return true if the condition has
|
1617 |
|
|
been reversed, and so the sense of the comparison should be inverted. */
|
1618 |
|
|
|
1619 |
|
|
static bool
|
1620 |
|
|
ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
|
1621 |
|
|
rtx dest, rtx op0, rtx op1)
|
1622 |
|
|
{
|
1623 |
|
|
bool negate = false;
|
1624 |
|
|
rtx x;
|
1625 |
|
|
|
1626 |
|
|
/* Canonicalize the comparison to EQ, GT, GTU. */
|
1627 |
|
|
switch (code)
|
1628 |
|
|
{
|
1629 |
|
|
case EQ:
|
1630 |
|
|
case GT:
|
1631 |
|
|
case GTU:
|
1632 |
|
|
break;
|
1633 |
|
|
|
1634 |
|
|
case NE:
|
1635 |
|
|
case LE:
|
1636 |
|
|
case LEU:
|
1637 |
|
|
code = reverse_condition (code);
|
1638 |
|
|
negate = true;
|
1639 |
|
|
break;
|
1640 |
|
|
|
1641 |
|
|
case GE:
|
1642 |
|
|
case GEU:
|
1643 |
|
|
code = reverse_condition (code);
|
1644 |
|
|
negate = true;
|
1645 |
|
|
/* FALLTHRU */
|
1646 |
|
|
|
1647 |
|
|
case LT:
|
1648 |
|
|
case LTU:
|
1649 |
|
|
code = swap_condition (code);
|
1650 |
|
|
x = op0, op0 = op1, op1 = x;
|
1651 |
|
|
break;
|
1652 |
|
|
|
1653 |
|
|
default:
|
1654 |
|
|
gcc_unreachable ();
|
1655 |
|
|
}
|
1656 |
|
|
|
1657 |
|
|
/* Unsigned parallel compare is not supported by the hardware. Play some
|
1658 |
|
|
tricks to turn this into a signed comparison against 0. */
|
1659 |
|
|
if (code == GTU)
|
1660 |
|
|
{
|
1661 |
|
|
switch (mode)
|
1662 |
|
|
{
|
1663 |
|
|
case V2SImode:
|
1664 |
|
|
{
|
1665 |
|
|
rtx t1, t2, mask;
|
1666 |
|
|
|
1667 |
|
|
/* Perform a parallel modulo subtraction. */
|
1668 |
|
|
t1 = gen_reg_rtx (V2SImode);
|
1669 |
|
|
emit_insn (gen_subv2si3 (t1, op0, op1));
|
1670 |
|
|
|
1671 |
|
|
/* Extract the original sign bit of op0. */
|
1672 |
|
|
mask = GEN_INT (-0x80000000);
|
1673 |
|
|
mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
|
1674 |
|
|
mask = force_reg (V2SImode, mask);
|
1675 |
|
|
t2 = gen_reg_rtx (V2SImode);
|
1676 |
|
|
emit_insn (gen_andv2si3 (t2, op0, mask));
|
1677 |
|
|
|
1678 |
|
|
/* XOR it back into the result of the subtraction. This results
|
1679 |
|
|
in the sign bit set iff we saw unsigned underflow. */
|
1680 |
|
|
x = gen_reg_rtx (V2SImode);
|
1681 |
|
|
emit_insn (gen_xorv2si3 (x, t1, t2));
|
1682 |
|
|
|
1683 |
|
|
code = GT;
|
1684 |
|
|
op0 = x;
|
1685 |
|
|
op1 = CONST0_RTX (mode);
|
1686 |
|
|
}
|
1687 |
|
|
break;
|
1688 |
|
|
|
1689 |
|
|
case V8QImode:
|
1690 |
|
|
case V4HImode:
|
1691 |
|
|
/* Perform a parallel unsigned saturating subtraction. */
|
1692 |
|
|
x = gen_reg_rtx (mode);
|
1693 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, x,
|
1694 |
|
|
gen_rtx_US_MINUS (mode, op0, op1)));
|
1695 |
|
|
|
1696 |
|
|
code = EQ;
|
1697 |
|
|
op0 = x;
|
1698 |
|
|
op1 = CONST0_RTX (mode);
|
1699 |
|
|
negate = !negate;
|
1700 |
|
|
break;
|
1701 |
|
|
|
1702 |
|
|
default:
|
1703 |
|
|
gcc_unreachable ();
|
1704 |
|
|
}
|
1705 |
|
|
}
|
1706 |
|
|
|
1707 |
|
|
x = gen_rtx_fmt_ee (code, mode, op0, op1);
|
1708 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
|
1709 |
|
|
|
1710 |
|
|
return negate;
|
1711 |
|
|
}
|
1712 |
|
|
|
1713 |
|
|
/* Emit an integral vector conditional move. */
|
1714 |
|
|
|
1715 |
|
|
void
|
1716 |
|
|
ia64_expand_vecint_cmov (rtx operands[])
|
1717 |
|
|
{
|
1718 |
|
|
enum machine_mode mode = GET_MODE (operands[0]);
|
1719 |
|
|
enum rtx_code code = GET_CODE (operands[3]);
|
1720 |
|
|
bool negate;
|
1721 |
|
|
rtx cmp, x, ot, of;
|
1722 |
|
|
|
1723 |
|
|
cmp = gen_reg_rtx (mode);
|
1724 |
|
|
negate = ia64_expand_vecint_compare (code, mode, cmp,
|
1725 |
|
|
operands[4], operands[5]);
|
1726 |
|
|
|
1727 |
|
|
ot = operands[1+negate];
|
1728 |
|
|
of = operands[2-negate];
|
1729 |
|
|
|
1730 |
|
|
if (ot == CONST0_RTX (mode))
|
1731 |
|
|
{
|
1732 |
|
|
if (of == CONST0_RTX (mode))
|
1733 |
|
|
{
|
1734 |
|
|
emit_move_insn (operands[0], ot);
|
1735 |
|
|
return;
|
1736 |
|
|
}
|
1737 |
|
|
|
1738 |
|
|
x = gen_rtx_NOT (mode, cmp);
|
1739 |
|
|
x = gen_rtx_AND (mode, x, of);
|
1740 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
|
1741 |
|
|
}
|
1742 |
|
|
else if (of == CONST0_RTX (mode))
|
1743 |
|
|
{
|
1744 |
|
|
x = gen_rtx_AND (mode, cmp, ot);
|
1745 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
|
1746 |
|
|
}
|
1747 |
|
|
else
|
1748 |
|
|
{
|
1749 |
|
|
rtx t, f;
|
1750 |
|
|
|
1751 |
|
|
t = gen_reg_rtx (mode);
|
1752 |
|
|
x = gen_rtx_AND (mode, cmp, operands[1+negate]);
|
1753 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, t, x));
|
1754 |
|
|
|
1755 |
|
|
f = gen_reg_rtx (mode);
|
1756 |
|
|
x = gen_rtx_NOT (mode, cmp);
|
1757 |
|
|
x = gen_rtx_AND (mode, x, operands[2-negate]);
|
1758 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, f, x));
|
1759 |
|
|
|
1760 |
|
|
x = gen_rtx_IOR (mode, t, f);
|
1761 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
|
1762 |
|
|
}
|
1763 |
|
|
}
|
1764 |
|
|
|
1765 |
|
|
/* Emit an integral vector min or max operation. Return true if all done. */
|
1766 |
|
|
|
1767 |
|
|
bool
|
1768 |
|
|
ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
|
1769 |
|
|
rtx operands[])
|
1770 |
|
|
{
|
1771 |
|
|
rtx xops[6];
|
1772 |
|
|
|
1773 |
|
|
/* These four combinations are supported directly. */
|
1774 |
|
|
if (mode == V8QImode && (code == UMIN || code == UMAX))
|
1775 |
|
|
return false;
|
1776 |
|
|
if (mode == V4HImode && (code == SMIN || code == SMAX))
|
1777 |
|
|
return false;
|
1778 |
|
|
|
1779 |
|
|
/* This combination can be implemented with only saturating subtraction. */
|
1780 |
|
|
if (mode == V4HImode && code == UMAX)
|
1781 |
|
|
{
|
1782 |
|
|
rtx x, tmp = gen_reg_rtx (mode);
|
1783 |
|
|
|
1784 |
|
|
x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
|
1785 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
|
1786 |
|
|
|
1787 |
|
|
emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
|
1788 |
|
|
return true;
|
1789 |
|
|
}
|
1790 |
|
|
|
1791 |
|
|
/* Everything else implemented via vector comparisons. */
|
1792 |
|
|
xops[0] = operands[0];
|
1793 |
|
|
xops[4] = xops[1] = operands[1];
|
1794 |
|
|
xops[5] = xops[2] = operands[2];
|
1795 |
|
|
|
1796 |
|
|
switch (code)
|
1797 |
|
|
{
|
1798 |
|
|
case UMIN:
|
1799 |
|
|
code = LTU;
|
1800 |
|
|
break;
|
1801 |
|
|
case UMAX:
|
1802 |
|
|
code = GTU;
|
1803 |
|
|
break;
|
1804 |
|
|
case SMIN:
|
1805 |
|
|
code = LT;
|
1806 |
|
|
break;
|
1807 |
|
|
case SMAX:
|
1808 |
|
|
code = GT;
|
1809 |
|
|
break;
|
1810 |
|
|
default:
|
1811 |
|
|
gcc_unreachable ();
|
1812 |
|
|
}
|
1813 |
|
|
xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
|
1814 |
|
|
|
1815 |
|
|
ia64_expand_vecint_cmov (xops);
|
1816 |
|
|
return true;
|
1817 |
|
|
}
|
1818 |
|
|
|
1819 |
|
|
/* Emit an integral vector widening sum operations. */
|
1820 |
|
|
|
1821 |
|
|
void
|
1822 |
|
|
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
|
1823 |
|
|
{
|
1824 |
|
|
rtx l, h, x, s;
|
1825 |
|
|
enum machine_mode wmode, mode;
|
1826 |
|
|
rtx (*unpack_l) (rtx, rtx, rtx);
|
1827 |
|
|
rtx (*unpack_h) (rtx, rtx, rtx);
|
1828 |
|
|
rtx (*plus) (rtx, rtx, rtx);
|
1829 |
|
|
|
1830 |
|
|
wmode = GET_MODE (operands[0]);
|
1831 |
|
|
mode = GET_MODE (operands[1]);
|
1832 |
|
|
|
1833 |
|
|
switch (mode)
|
1834 |
|
|
{
|
1835 |
|
|
case V8QImode:
|
1836 |
|
|
unpack_l = gen_unpack1_l;
|
1837 |
|
|
unpack_h = gen_unpack1_h;
|
1838 |
|
|
plus = gen_addv4hi3;
|
1839 |
|
|
break;
|
1840 |
|
|
case V4HImode:
|
1841 |
|
|
unpack_l = gen_unpack2_l;
|
1842 |
|
|
unpack_h = gen_unpack2_h;
|
1843 |
|
|
plus = gen_addv2si3;
|
1844 |
|
|
break;
|
1845 |
|
|
default:
|
1846 |
|
|
gcc_unreachable ();
|
1847 |
|
|
}
|
1848 |
|
|
|
1849 |
|
|
/* Fill in x with the sign extension of each element in op1. */
|
1850 |
|
|
if (unsignedp)
|
1851 |
|
|
x = CONST0_RTX (mode);
|
1852 |
|
|
else
|
1853 |
|
|
{
|
1854 |
|
|
bool neg;
|
1855 |
|
|
|
1856 |
|
|
x = gen_reg_rtx (mode);
|
1857 |
|
|
|
1858 |
|
|
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
|
1859 |
|
|
CONST0_RTX (mode));
|
1860 |
|
|
gcc_assert (!neg);
|
1861 |
|
|
}
|
1862 |
|
|
|
1863 |
|
|
l = gen_reg_rtx (wmode);
|
1864 |
|
|
h = gen_reg_rtx (wmode);
|
1865 |
|
|
s = gen_reg_rtx (wmode);
|
1866 |
|
|
|
1867 |
|
|
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
|
1868 |
|
|
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
|
1869 |
|
|
emit_insn (plus (s, l, operands[2]));
|
1870 |
|
|
emit_insn (plus (operands[0], h, s));
|
1871 |
|
|
}
|
1872 |
|
|
|
1873 |
|
|
/* Emit a signed or unsigned V8QI dot product operation. */
|
1874 |
|
|
|
1875 |
|
|
void
|
1876 |
|
|
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
|
1877 |
|
|
{
|
1878 |
|
|
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
|
1879 |
|
|
|
1880 |
|
|
/* Fill in x1 and x2 with the sign extension of each element. */
|
1881 |
|
|
if (unsignedp)
|
1882 |
|
|
x1 = x2 = CONST0_RTX (V8QImode);
|
1883 |
|
|
else
|
1884 |
|
|
{
|
1885 |
|
|
bool neg;
|
1886 |
|
|
|
1887 |
|
|
x1 = gen_reg_rtx (V8QImode);
|
1888 |
|
|
x2 = gen_reg_rtx (V8QImode);
|
1889 |
|
|
|
1890 |
|
|
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
|
1891 |
|
|
CONST0_RTX (V8QImode));
|
1892 |
|
|
gcc_assert (!neg);
|
1893 |
|
|
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
|
1894 |
|
|
CONST0_RTX (V8QImode));
|
1895 |
|
|
gcc_assert (!neg);
|
1896 |
|
|
}
|
1897 |
|
|
|
1898 |
|
|
l1 = gen_reg_rtx (V4HImode);
|
1899 |
|
|
l2 = gen_reg_rtx (V4HImode);
|
1900 |
|
|
h1 = gen_reg_rtx (V4HImode);
|
1901 |
|
|
h2 = gen_reg_rtx (V4HImode);
|
1902 |
|
|
|
1903 |
|
|
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
|
1904 |
|
|
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
|
1905 |
|
|
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
|
1906 |
|
|
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
|
1907 |
|
|
|
1908 |
|
|
p1 = gen_reg_rtx (V2SImode);
|
1909 |
|
|
p2 = gen_reg_rtx (V2SImode);
|
1910 |
|
|
p3 = gen_reg_rtx (V2SImode);
|
1911 |
|
|
p4 = gen_reg_rtx (V2SImode);
|
1912 |
|
|
emit_insn (gen_pmpy2_r (p1, l1, l2));
|
1913 |
|
|
emit_insn (gen_pmpy2_l (p2, l1, l2));
|
1914 |
|
|
emit_insn (gen_pmpy2_r (p3, h1, h2));
|
1915 |
|
|
emit_insn (gen_pmpy2_l (p4, h1, h2));
|
1916 |
|
|
|
1917 |
|
|
s1 = gen_reg_rtx (V2SImode);
|
1918 |
|
|
s2 = gen_reg_rtx (V2SImode);
|
1919 |
|
|
s3 = gen_reg_rtx (V2SImode);
|
1920 |
|
|
emit_insn (gen_addv2si3 (s1, p1, p2));
|
1921 |
|
|
emit_insn (gen_addv2si3 (s2, p3, p4));
|
1922 |
|
|
emit_insn (gen_addv2si3 (s3, s1, operands[3]));
|
1923 |
|
|
emit_insn (gen_addv2si3 (operands[0], s2, s3));
|
1924 |
|
|
}
|
1925 |
|
|
|
1926 |
|
|
/* Emit the appropriate sequence for a call. */
|
1927 |
|
|
|
1928 |
|
|
void
|
1929 |
|
|
ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
|
1930 |
|
|
int sibcall_p)
|
1931 |
|
|
{
|
1932 |
|
|
rtx insn, b0;
|
1933 |
|
|
|
1934 |
|
|
addr = XEXP (addr, 0);
|
1935 |
|
|
addr = convert_memory_address (DImode, addr);
|
1936 |
|
|
b0 = gen_rtx_REG (DImode, R_BR (0));
|
1937 |
|
|
|
1938 |
|
|
/* ??? Should do this for functions known to bind local too. */
|
1939 |
|
|
if (TARGET_NO_PIC || TARGET_AUTO_PIC)
|
1940 |
|
|
{
|
1941 |
|
|
if (sibcall_p)
|
1942 |
|
|
insn = gen_sibcall_nogp (addr);
|
1943 |
|
|
else if (! retval)
|
1944 |
|
|
insn = gen_call_nogp (addr, b0);
|
1945 |
|
|
else
|
1946 |
|
|
insn = gen_call_value_nogp (retval, addr, b0);
|
1947 |
|
|
insn = emit_call_insn (insn);
|
1948 |
|
|
}
|
1949 |
|
|
else
|
1950 |
|
|
{
|
1951 |
|
|
if (sibcall_p)
|
1952 |
|
|
insn = gen_sibcall_gp (addr);
|
1953 |
|
|
else if (! retval)
|
1954 |
|
|
insn = gen_call_gp (addr, b0);
|
1955 |
|
|
else
|
1956 |
|
|
insn = gen_call_value_gp (retval, addr, b0);
|
1957 |
|
|
insn = emit_call_insn (insn);
|
1958 |
|
|
|
1959 |
|
|
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
|
1960 |
|
|
}
|
1961 |
|
|
|
1962 |
|
|
if (sibcall_p)
|
1963 |
|
|
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
|
1964 |
|
|
}
|
1965 |
|
|
|
1966 |
|
|
void
|
1967 |
|
|
ia64_reload_gp (void)
|
1968 |
|
|
{
|
1969 |
|
|
rtx tmp;
|
1970 |
|
|
|
1971 |
|
|
if (current_frame_info.reg_save_gp)
|
1972 |
|
|
tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
|
1973 |
|
|
else
|
1974 |
|
|
{
|
1975 |
|
|
HOST_WIDE_INT offset;
|
1976 |
|
|
|
1977 |
|
|
offset = (current_frame_info.spill_cfa_off
|
1978 |
|
|
+ current_frame_info.spill_size);
|
1979 |
|
|
if (frame_pointer_needed)
|
1980 |
|
|
{
|
1981 |
|
|
tmp = hard_frame_pointer_rtx;
|
1982 |
|
|
offset = -offset;
|
1983 |
|
|
}
|
1984 |
|
|
else
|
1985 |
|
|
{
|
1986 |
|
|
tmp = stack_pointer_rtx;
|
1987 |
|
|
offset = current_frame_info.total_size - offset;
|
1988 |
|
|
}
|
1989 |
|
|
|
1990 |
|
|
if (CONST_OK_FOR_I (offset))
|
1991 |
|
|
emit_insn (gen_adddi3 (pic_offset_table_rtx,
|
1992 |
|
|
tmp, GEN_INT (offset)));
|
1993 |
|
|
else
|
1994 |
|
|
{
|
1995 |
|
|
emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
|
1996 |
|
|
emit_insn (gen_adddi3 (pic_offset_table_rtx,
|
1997 |
|
|
pic_offset_table_rtx, tmp));
|
1998 |
|
|
}
|
1999 |
|
|
|
2000 |
|
|
tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
|
2001 |
|
|
}
|
2002 |
|
|
|
2003 |
|
|
emit_move_insn (pic_offset_table_rtx, tmp);
|
2004 |
|
|
}
|
2005 |
|
|
|
2006 |
|
|
void
|
2007 |
|
|
ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
|
2008 |
|
|
rtx scratch_b, int noreturn_p, int sibcall_p)
|
2009 |
|
|
{
|
2010 |
|
|
rtx insn;
|
2011 |
|
|
bool is_desc = false;
|
2012 |
|
|
|
2013 |
|
|
/* If we find we're calling through a register, then we're actually
|
2014 |
|
|
calling through a descriptor, so load up the values. */
|
2015 |
|
|
if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
|
2016 |
|
|
{
|
2017 |
|
|
rtx tmp;
|
2018 |
|
|
bool addr_dead_p;
|
2019 |
|
|
|
2020 |
|
|
/* ??? We are currently constrained to *not* use peep2, because
|
2021 |
|
|
we can legitimately change the global lifetime of the GP
|
2022 |
|
|
(in the form of killing where previously live). This is
|
2023 |
|
|
because a call through a descriptor doesn't use the previous
|
2024 |
|
|
value of the GP, while a direct call does, and we do not
|
2025 |
|
|
commit to either form until the split here.
|
2026 |
|
|
|
2027 |
|
|
That said, this means that we lack precise life info for
|
2028 |
|
|
whether ADDR is dead after this call. This is not terribly
|
2029 |
|
|
important, since we can fix things up essentially for free
|
2030 |
|
|
with the POST_DEC below, but it's nice to not use it when we
|
2031 |
|
|
can immediately tell it's not necessary. */
|
2032 |
|
|
addr_dead_p = ((noreturn_p || sibcall_p
|
2033 |
|
|
|| TEST_HARD_REG_BIT (regs_invalidated_by_call,
|
2034 |
|
|
REGNO (addr)))
|
2035 |
|
|
&& !FUNCTION_ARG_REGNO_P (REGNO (addr)));
|
2036 |
|
|
|
2037 |
|
|
/* Load the code address into scratch_b. */
|
2038 |
|
|
tmp = gen_rtx_POST_INC (Pmode, addr);
|
2039 |
|
|
tmp = gen_rtx_MEM (Pmode, tmp);
|
2040 |
|
|
emit_move_insn (scratch_r, tmp);
|
2041 |
|
|
emit_move_insn (scratch_b, scratch_r);
|
2042 |
|
|
|
2043 |
|
|
/* Load the GP address. If ADDR is not dead here, then we must
|
2044 |
|
|
revert the change made above via the POST_INCREMENT. */
|
2045 |
|
|
if (!addr_dead_p)
|
2046 |
|
|
tmp = gen_rtx_POST_DEC (Pmode, addr);
|
2047 |
|
|
else
|
2048 |
|
|
tmp = addr;
|
2049 |
|
|
tmp = gen_rtx_MEM (Pmode, tmp);
|
2050 |
|
|
emit_move_insn (pic_offset_table_rtx, tmp);
|
2051 |
|
|
|
2052 |
|
|
is_desc = true;
|
2053 |
|
|
addr = scratch_b;
|
2054 |
|
|
}
|
2055 |
|
|
|
2056 |
|
|
if (sibcall_p)
|
2057 |
|
|
insn = gen_sibcall_nogp (addr);
|
2058 |
|
|
else if (retval)
|
2059 |
|
|
insn = gen_call_value_nogp (retval, addr, retaddr);
|
2060 |
|
|
else
|
2061 |
|
|
insn = gen_call_nogp (addr, retaddr);
|
2062 |
|
|
emit_call_insn (insn);
|
2063 |
|
|
|
2064 |
|
|
if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
|
2065 |
|
|
ia64_reload_gp ();
|
2066 |
|
|
}
|
2067 |
|
|
|
2068 |
|
|
/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
|
2069 |
|
|
|
2070 |
|
|
This differs from the generic code in that we know about the zero-extending
|
2071 |
|
|
properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
|
2072 |
|
|
also know that ld.acq+cmpxchg.rel equals a full barrier.
|
2073 |
|
|
|
2074 |
|
|
The loop we want to generate looks like
|
2075 |
|
|
|
2076 |
|
|
cmp_reg = mem;
|
2077 |
|
|
label:
|
2078 |
|
|
old_reg = cmp_reg;
|
2079 |
|
|
new_reg = cmp_reg op val;
|
2080 |
|
|
cmp_reg = compare-and-swap(mem, old_reg, new_reg)
|
2081 |
|
|
if (cmp_reg != old_reg)
|
2082 |
|
|
goto label;
|
2083 |
|
|
|
2084 |
|
|
Note that we only do the plain load from memory once. Subsequent
|
2085 |
|
|
iterations use the value loaded by the compare-and-swap pattern. */
|
2086 |
|
|
|
2087 |
|
|
void
|
2088 |
|
|
ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
|
2089 |
|
|
rtx old_dst, rtx new_dst)
|
2090 |
|
|
{
|
2091 |
|
|
enum machine_mode mode = GET_MODE (mem);
|
2092 |
|
|
rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
|
2093 |
|
|
enum insn_code icode;
|
2094 |
|
|
|
2095 |
|
|
/* Special case for using fetchadd. */
|
2096 |
|
|
if ((mode == SImode || mode == DImode)
|
2097 |
|
|
&& (code == PLUS || code == MINUS)
|
2098 |
|
|
&& fetchadd_operand (val, mode))
|
2099 |
|
|
{
|
2100 |
|
|
if (code == MINUS)
|
2101 |
|
|
val = GEN_INT (-INTVAL (val));
|
2102 |
|
|
|
2103 |
|
|
if (!old_dst)
|
2104 |
|
|
old_dst = gen_reg_rtx (mode);
|
2105 |
|
|
|
2106 |
|
|
emit_insn (gen_memory_barrier ());
|
2107 |
|
|
|
2108 |
|
|
if (mode == SImode)
|
2109 |
|
|
icode = CODE_FOR_fetchadd_acq_si;
|
2110 |
|
|
else
|
2111 |
|
|
icode = CODE_FOR_fetchadd_acq_di;
|
2112 |
|
|
emit_insn (GEN_FCN (icode) (old_dst, mem, val));
|
2113 |
|
|
|
2114 |
|
|
if (new_dst)
|
2115 |
|
|
{
|
2116 |
|
|
new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
|
2117 |
|
|
true, OPTAB_WIDEN);
|
2118 |
|
|
if (new_reg != new_dst)
|
2119 |
|
|
emit_move_insn (new_dst, new_reg);
|
2120 |
|
|
}
|
2121 |
|
|
return;
|
2122 |
|
|
}
|
2123 |
|
|
|
2124 |
|
|
/* Because of the volatile mem read, we get an ld.acq, which is the
|
2125 |
|
|
front half of the full barrier. The end half is the cmpxchg.rel. */
|
2126 |
|
|
gcc_assert (MEM_VOLATILE_P (mem));
|
2127 |
|
|
|
2128 |
|
|
old_reg = gen_reg_rtx (DImode);
|
2129 |
|
|
cmp_reg = gen_reg_rtx (DImode);
|
2130 |
|
|
label = gen_label_rtx ();
|
2131 |
|
|
|
2132 |
|
|
if (mode != DImode)
|
2133 |
|
|
{
|
2134 |
|
|
val = simplify_gen_subreg (DImode, val, mode, 0);
|
2135 |
|
|
emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
|
2136 |
|
|
}
|
2137 |
|
|
else
|
2138 |
|
|
emit_move_insn (cmp_reg, mem);
|
2139 |
|
|
|
2140 |
|
|
emit_label (label);
|
2141 |
|
|
|
2142 |
|
|
ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
|
2143 |
|
|
emit_move_insn (old_reg, cmp_reg);
|
2144 |
|
|
emit_move_insn (ar_ccv, cmp_reg);
|
2145 |
|
|
|
2146 |
|
|
if (old_dst)
|
2147 |
|
|
emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
|
2148 |
|
|
|
2149 |
|
|
new_reg = cmp_reg;
|
2150 |
|
|
if (code == NOT)
|
2151 |
|
|
{
|
2152 |
|
|
new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
|
2153 |
|
|
code = AND;
|
2154 |
|
|
}
|
2155 |
|
|
new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
|
2156 |
|
|
true, OPTAB_DIRECT);
|
2157 |
|
|
|
2158 |
|
|
if (mode != DImode)
|
2159 |
|
|
new_reg = gen_lowpart (mode, new_reg);
|
2160 |
|
|
if (new_dst)
|
2161 |
|
|
emit_move_insn (new_dst, new_reg);
|
2162 |
|
|
|
2163 |
|
|
switch (mode)
|
2164 |
|
|
{
|
2165 |
|
|
case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
|
2166 |
|
|
case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
|
2167 |
|
|
case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
|
2168 |
|
|
case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
|
2169 |
|
|
default:
|
2170 |
|
|
gcc_unreachable ();
|
2171 |
|
|
}
|
2172 |
|
|
|
2173 |
|
|
emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
|
2174 |
|
|
|
2175 |
|
|
emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
|
2176 |
|
|
}
|
2177 |
|
|
|
2178 |
|
|
/* Begin the assembly file. */
|
2179 |
|
|
|
2180 |
|
|
static void
|
2181 |
|
|
ia64_file_start (void)
|
2182 |
|
|
{
|
2183 |
|
|
/* Variable tracking should be run after all optimizations which change order
|
2184 |
|
|
of insns. It also needs a valid CFG. This can't be done in
|
2185 |
|
|
ia64_override_options, because flag_var_tracking is finalized after
|
2186 |
|
|
that. */
|
2187 |
|
|
ia64_flag_var_tracking = flag_var_tracking;
|
2188 |
|
|
flag_var_tracking = 0;
|
2189 |
|
|
|
2190 |
|
|
default_file_start ();
|
2191 |
|
|
emit_safe_across_calls ();
|
2192 |
|
|
}
|
2193 |
|
|
|
2194 |
|
|
void
|
2195 |
|
|
emit_safe_across_calls (void)
|
2196 |
|
|
{
|
2197 |
|
|
unsigned int rs, re;
|
2198 |
|
|
int out_state;
|
2199 |
|
|
|
2200 |
|
|
rs = 1;
|
2201 |
|
|
out_state = 0;
|
2202 |
|
|
while (1)
|
2203 |
|
|
{
|
2204 |
|
|
while (rs < 64 && call_used_regs[PR_REG (rs)])
|
2205 |
|
|
rs++;
|
2206 |
|
|
if (rs >= 64)
|
2207 |
|
|
break;
|
2208 |
|
|
for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
|
2209 |
|
|
continue;
|
2210 |
|
|
if (out_state == 0)
|
2211 |
|
|
{
|
2212 |
|
|
fputs ("\t.pred.safe_across_calls ", asm_out_file);
|
2213 |
|
|
out_state = 1;
|
2214 |
|
|
}
|
2215 |
|
|
else
|
2216 |
|
|
fputc (',', asm_out_file);
|
2217 |
|
|
if (re == rs + 1)
|
2218 |
|
|
fprintf (asm_out_file, "p%u", rs);
|
2219 |
|
|
else
|
2220 |
|
|
fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
|
2221 |
|
|
rs = re + 1;
|
2222 |
|
|
}
|
2223 |
|
|
if (out_state)
|
2224 |
|
|
fputc ('\n', asm_out_file);
|
2225 |
|
|
}
|
2226 |
|
|
|
2227 |
|
|
/* Helper function for ia64_compute_frame_size: find an appropriate general
|
2228 |
|
|
register to spill some special register to. SPECIAL_SPILL_MASK contains
|
2229 |
|
|
bits in GR0 to GR31 that have already been allocated by this routine.
|
2230 |
|
|
TRY_LOCALS is true if we should attempt to locate a local regnum. */
|
2231 |
|
|
|
2232 |
|
|
static int
|
2233 |
|
|
find_gr_spill (int try_locals)
|
2234 |
|
|
{
|
2235 |
|
|
int regno;
|
2236 |
|
|
|
2237 |
|
|
/* If this is a leaf function, first try an otherwise unused
|
2238 |
|
|
call-clobbered register. */
|
2239 |
|
|
if (current_function_is_leaf)
|
2240 |
|
|
{
|
2241 |
|
|
for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
|
2242 |
|
|
if (! regs_ever_live[regno]
|
2243 |
|
|
&& call_used_regs[regno]
|
2244 |
|
|
&& ! fixed_regs[regno]
|
2245 |
|
|
&& ! global_regs[regno]
|
2246 |
|
|
&& ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
|
2247 |
|
|
{
|
2248 |
|
|
current_frame_info.gr_used_mask |= 1 << regno;
|
2249 |
|
|
return regno;
|
2250 |
|
|
}
|
2251 |
|
|
}
|
2252 |
|
|
|
2253 |
|
|
if (try_locals)
|
2254 |
|
|
{
|
2255 |
|
|
regno = current_frame_info.n_local_regs;
|
2256 |
|
|
/* If there is a frame pointer, then we can't use loc79, because
|
2257 |
|
|
that is HARD_FRAME_POINTER_REGNUM. In particular, see the
|
2258 |
|
|
reg_name switching code in ia64_expand_prologue. */
|
2259 |
|
|
if (regno < (80 - frame_pointer_needed))
|
2260 |
|
|
{
|
2261 |
|
|
current_frame_info.n_local_regs = regno + 1;
|
2262 |
|
|
return LOC_REG (0) + regno;
|
2263 |
|
|
}
|
2264 |
|
|
}
|
2265 |
|
|
|
2266 |
|
|
/* Failed to find a general register to spill to. Must use stack. */
|
2267 |
|
|
return 0;
|
2268 |
|
|
}
|
2269 |
|
|
|
2270 |
|
|
/* In order to make for nice schedules, we try to allocate every temporary
|
2271 |
|
|
to a different register. We must of course stay away from call-saved,
|
2272 |
|
|
fixed, and global registers. We must also stay away from registers
|
2273 |
|
|
allocated in current_frame_info.gr_used_mask, since those include regs
|
2274 |
|
|
used all through the prologue.
|
2275 |
|
|
|
2276 |
|
|
Any register allocated here must be used immediately. The idea is to
|
2277 |
|
|
aid scheduling, not to solve data flow problems. */
|
2278 |
|
|
|
2279 |
|
|
static int last_scratch_gr_reg;
|
2280 |
|
|
|
2281 |
|
|
static int
|
2282 |
|
|
next_scratch_gr_reg (void)
|
2283 |
|
|
{
|
2284 |
|
|
int i, regno;
|
2285 |
|
|
|
2286 |
|
|
for (i = 0; i < 32; ++i)
|
2287 |
|
|
{
|
2288 |
|
|
regno = (last_scratch_gr_reg + i + 1) & 31;
|
2289 |
|
|
if (call_used_regs[regno]
|
2290 |
|
|
&& ! fixed_regs[regno]
|
2291 |
|
|
&& ! global_regs[regno]
|
2292 |
|
|
&& ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
|
2293 |
|
|
{
|
2294 |
|
|
last_scratch_gr_reg = regno;
|
2295 |
|
|
return regno;
|
2296 |
|
|
}
|
2297 |
|
|
}
|
2298 |
|
|
|
2299 |
|
|
/* There must be _something_ available. */
|
2300 |
|
|
gcc_unreachable ();
|
2301 |
|
|
}
|
2302 |
|
|
|
2303 |
|
|
/* Helper function for ia64_compute_frame_size, called through
|
2304 |
|
|
diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
|
2305 |
|
|
|
2306 |
|
|
static void
|
2307 |
|
|
mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
|
2308 |
|
|
{
|
2309 |
|
|
unsigned int regno = REGNO (reg);
|
2310 |
|
|
if (regno < 32)
|
2311 |
|
|
{
|
2312 |
|
|
unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
|
2313 |
|
|
for (i = 0; i < n; ++i)
|
2314 |
|
|
current_frame_info.gr_used_mask |= 1 << (regno + i);
|
2315 |
|
|
}
|
2316 |
|
|
}
|
2317 |
|
|
|
2318 |
|
|
/* Returns the number of bytes offset between the frame pointer and the stack
|
2319 |
|
|
pointer for the current function. SIZE is the number of bytes of space
|
2320 |
|
|
needed for local variables. */
|
2321 |
|
|
|
2322 |
|
|
static void
|
2323 |
|
|
ia64_compute_frame_size (HOST_WIDE_INT size)
|
2324 |
|
|
{
|
2325 |
|
|
HOST_WIDE_INT total_size;
|
2326 |
|
|
HOST_WIDE_INT spill_size = 0;
|
2327 |
|
|
HOST_WIDE_INT extra_spill_size = 0;
|
2328 |
|
|
HOST_WIDE_INT pretend_args_size;
|
2329 |
|
|
HARD_REG_SET mask;
|
2330 |
|
|
int n_spilled = 0;
|
2331 |
|
|
int spilled_gr_p = 0;
|
2332 |
|
|
int spilled_fr_p = 0;
|
2333 |
|
|
unsigned int regno;
|
2334 |
|
|
int i;
|
2335 |
|
|
|
2336 |
|
|
if (current_frame_info.initialized)
|
2337 |
|
|
return;
|
2338 |
|
|
|
2339 |
|
|
memset (¤t_frame_info, 0, sizeof current_frame_info);
|
2340 |
|
|
CLEAR_HARD_REG_SET (mask);
|
2341 |
|
|
|
2342 |
|
|
/* Don't allocate scratches to the return register. */
|
2343 |
|
|
diddle_return_value (mark_reg_gr_used_mask, NULL);
|
2344 |
|
|
|
2345 |
|
|
/* Don't allocate scratches to the EH scratch registers. */
|
2346 |
|
|
if (cfun->machine->ia64_eh_epilogue_sp)
|
2347 |
|
|
mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
|
2348 |
|
|
if (cfun->machine->ia64_eh_epilogue_bsp)
|
2349 |
|
|
mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
|
2350 |
|
|
|
2351 |
|
|
/* Find the size of the register stack frame. We have only 80 local
|
2352 |
|
|
registers, because we reserve 8 for the inputs and 8 for the
|
2353 |
|
|
outputs. */
|
2354 |
|
|
|
2355 |
|
|
/* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
|
2356 |
|
|
since we'll be adjusting that down later. */
|
2357 |
|
|
regno = LOC_REG (78) + ! frame_pointer_needed;
|
2358 |
|
|
for (; regno >= LOC_REG (0); regno--)
|
2359 |
|
|
if (regs_ever_live[regno])
|
2360 |
|
|
break;
|
2361 |
|
|
current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
|
2362 |
|
|
|
2363 |
|
|
/* For functions marked with the syscall_linkage attribute, we must mark
|
2364 |
|
|
all eight input registers as in use, so that locals aren't visible to
|
2365 |
|
|
the caller. */
|
2366 |
|
|
|
2367 |
|
|
if (cfun->machine->n_varargs > 0
|
2368 |
|
|
|| lookup_attribute ("syscall_linkage",
|
2369 |
|
|
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
|
2370 |
|
|
current_frame_info.n_input_regs = 8;
|
2371 |
|
|
else
|
2372 |
|
|
{
|
2373 |
|
|
for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
|
2374 |
|
|
if (regs_ever_live[regno])
|
2375 |
|
|
break;
|
2376 |
|
|
current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
|
2377 |
|
|
}
|
2378 |
|
|
|
2379 |
|
|
for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
|
2380 |
|
|
if (regs_ever_live[regno])
|
2381 |
|
|
break;
|
2382 |
|
|
i = regno - OUT_REG (0) + 1;
|
2383 |
|
|
|
2384 |
|
|
#ifndef PROFILE_HOOK
|
2385 |
|
|
/* When -p profiling, we need one output register for the mcount argument.
|
2386 |
|
|
Likewise for -a profiling for the bb_init_func argument. For -ax
|
2387 |
|
|
profiling, we need two output registers for the two bb_init_trace_func
|
2388 |
|
|
arguments. */
|
2389 |
|
|
if (current_function_profile)
|
2390 |
|
|
i = MAX (i, 1);
|
2391 |
|
|
#endif
|
2392 |
|
|
current_frame_info.n_output_regs = i;
|
2393 |
|
|
|
2394 |
|
|
/* ??? No rotating register support yet. */
|
2395 |
|
|
current_frame_info.n_rotate_regs = 0;
|
2396 |
|
|
|
2397 |
|
|
/* Discover which registers need spilling, and how much room that
|
2398 |
|
|
will take. Begin with floating point and general registers,
|
2399 |
|
|
which will always wind up on the stack. */
|
2400 |
|
|
|
2401 |
|
|
for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
|
2402 |
|
|
if (regs_ever_live[regno] && ! call_used_regs[regno])
|
2403 |
|
|
{
|
2404 |
|
|
SET_HARD_REG_BIT (mask, regno);
|
2405 |
|
|
spill_size += 16;
|
2406 |
|
|
n_spilled += 1;
|
2407 |
|
|
spilled_fr_p = 1;
|
2408 |
|
|
}
|
2409 |
|
|
|
2410 |
|
|
for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
|
2411 |
|
|
if (regs_ever_live[regno] && ! call_used_regs[regno])
|
2412 |
|
|
{
|
2413 |
|
|
SET_HARD_REG_BIT (mask, regno);
|
2414 |
|
|
spill_size += 8;
|
2415 |
|
|
n_spilled += 1;
|
2416 |
|
|
spilled_gr_p = 1;
|
2417 |
|
|
}
|
2418 |
|
|
|
2419 |
|
|
for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
|
2420 |
|
|
if (regs_ever_live[regno] && ! call_used_regs[regno])
|
2421 |
|
|
{
|
2422 |
|
|
SET_HARD_REG_BIT (mask, regno);
|
2423 |
|
|
spill_size += 8;
|
2424 |
|
|
n_spilled += 1;
|
2425 |
|
|
}
|
2426 |
|
|
|
2427 |
|
|
/* Now come all special registers that might get saved in other
|
2428 |
|
|
general registers. */
|
2429 |
|
|
|
2430 |
|
|
if (frame_pointer_needed)
|
2431 |
|
|
{
|
2432 |
|
|
current_frame_info.reg_fp = find_gr_spill (1);
|
2433 |
|
|
/* If we did not get a register, then we take LOC79. This is guaranteed
|
2434 |
|
|
to be free, even if regs_ever_live is already set, because this is
|
2435 |
|
|
HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
|
2436 |
|
|
as we don't count loc79 above. */
|
2437 |
|
|
if (current_frame_info.reg_fp == 0)
|
2438 |
|
|
{
|
2439 |
|
|
current_frame_info.reg_fp = LOC_REG (79);
|
2440 |
|
|
current_frame_info.n_local_regs++;
|
2441 |
|
|
}
|
2442 |
|
|
}
|
2443 |
|
|
|
2444 |
|
|
if (! current_function_is_leaf)
|
2445 |
|
|
{
|
2446 |
|
|
/* Emit a save of BR0 if we call other functions. Do this even
|
2447 |
|
|
if this function doesn't return, as EH depends on this to be
|
2448 |
|
|
able to unwind the stack. */
|
2449 |
|
|
SET_HARD_REG_BIT (mask, BR_REG (0));
|
2450 |
|
|
|
2451 |
|
|
current_frame_info.reg_save_b0 = find_gr_spill (1);
|
2452 |
|
|
if (current_frame_info.reg_save_b0 == 0)
|
2453 |
|
|
{
|
2454 |
|
|
extra_spill_size += 8;
|
2455 |
|
|
n_spilled += 1;
|
2456 |
|
|
}
|
2457 |
|
|
|
2458 |
|
|
/* Similarly for ar.pfs. */
|
2459 |
|
|
SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
|
2460 |
|
|
current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
|
2461 |
|
|
if (current_frame_info.reg_save_ar_pfs == 0)
|
2462 |
|
|
{
|
2463 |
|
|
extra_spill_size += 8;
|
2464 |
|
|
n_spilled += 1;
|
2465 |
|
|
}
|
2466 |
|
|
|
2467 |
|
|
/* Similarly for gp. Note that if we're calling setjmp, the stacked
|
2468 |
|
|
registers are clobbered, so we fall back to the stack. */
|
2469 |
|
|
current_frame_info.reg_save_gp
|
2470 |
|
|
= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
|
2471 |
|
|
if (current_frame_info.reg_save_gp == 0)
|
2472 |
|
|
{
|
2473 |
|
|
SET_HARD_REG_BIT (mask, GR_REG (1));
|
2474 |
|
|
spill_size += 8;
|
2475 |
|
|
n_spilled += 1;
|
2476 |
|
|
}
|
2477 |
|
|
}
|
2478 |
|
|
else
|
2479 |
|
|
{
|
2480 |
|
|
if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
|
2481 |
|
|
{
|
2482 |
|
|
SET_HARD_REG_BIT (mask, BR_REG (0));
|
2483 |
|
|
extra_spill_size += 8;
|
2484 |
|
|
n_spilled += 1;
|
2485 |
|
|
}
|
2486 |
|
|
|
2487 |
|
|
if (regs_ever_live[AR_PFS_REGNUM])
|
2488 |
|
|
{
|
2489 |
|
|
SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
|
2490 |
|
|
current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
|
2491 |
|
|
if (current_frame_info.reg_save_ar_pfs == 0)
|
2492 |
|
|
{
|
2493 |
|
|
extra_spill_size += 8;
|
2494 |
|
|
n_spilled += 1;
|
2495 |
|
|
}
|
2496 |
|
|
}
|
2497 |
|
|
}
|
2498 |
|
|
|
2499 |
|
|
/* Unwind descriptor hackery: things are most efficient if we allocate
|
2500 |
|
|
consecutive GR save registers for RP, PFS, FP in that order. However,
|
2501 |
|
|
it is absolutely critical that FP get the only hard register that's
|
2502 |
|
|
guaranteed to be free, so we allocated it first. If all three did
|
2503 |
|
|
happen to be allocated hard regs, and are consecutive, rearrange them
|
2504 |
|
|
into the preferred order now. */
|
2505 |
|
|
if (current_frame_info.reg_fp != 0
|
2506 |
|
|
&& current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
|
2507 |
|
|
&& current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
|
2508 |
|
|
{
|
2509 |
|
|
current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
|
2510 |
|
|
current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
|
2511 |
|
|
current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
|
2512 |
|
|
}
|
2513 |
|
|
|
2514 |
|
|
/* See if we need to store the predicate register block. */
|
2515 |
|
|
for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
|
2516 |
|
|
if (regs_ever_live[regno] && ! call_used_regs[regno])
|
2517 |
|
|
break;
|
2518 |
|
|
if (regno <= PR_REG (63))
|
2519 |
|
|
{
|
2520 |
|
|
SET_HARD_REG_BIT (mask, PR_REG (0));
|
2521 |
|
|
current_frame_info.reg_save_pr = find_gr_spill (1);
|
2522 |
|
|
if (current_frame_info.reg_save_pr == 0)
|
2523 |
|
|
{
|
2524 |
|
|
extra_spill_size += 8;
|
2525 |
|
|
n_spilled += 1;
|
2526 |
|
|
}
|
2527 |
|
|
|
2528 |
|
|
/* ??? Mark them all as used so that register renaming and such
|
2529 |
|
|
are free to use them. */
|
2530 |
|
|
for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
|
2531 |
|
|
regs_ever_live[regno] = 1;
|
2532 |
|
|
}
|
2533 |
|
|
|
2534 |
|
|
/* If we're forced to use st8.spill, we're forced to save and restore
|
2535 |
|
|
ar.unat as well. The check for existing liveness allows inline asm
|
2536 |
|
|
to touch ar.unat. */
|
2537 |
|
|
if (spilled_gr_p || cfun->machine->n_varargs
|
2538 |
|
|
|| regs_ever_live[AR_UNAT_REGNUM])
|
2539 |
|
|
{
|
2540 |
|
|
regs_ever_live[AR_UNAT_REGNUM] = 1;
|
2541 |
|
|
SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
|
2542 |
|
|
current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
|
2543 |
|
|
if (current_frame_info.reg_save_ar_unat == 0)
|
2544 |
|
|
{
|
2545 |
|
|
extra_spill_size += 8;
|
2546 |
|
|
n_spilled += 1;
|
2547 |
|
|
}
|
2548 |
|
|
}
|
2549 |
|
|
|
2550 |
|
|
if (regs_ever_live[AR_LC_REGNUM])
|
2551 |
|
|
{
|
2552 |
|
|
SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
|
2553 |
|
|
current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
|
2554 |
|
|
if (current_frame_info.reg_save_ar_lc == 0)
|
2555 |
|
|
{
|
2556 |
|
|
extra_spill_size += 8;
|
2557 |
|
|
n_spilled += 1;
|
2558 |
|
|
}
|
2559 |
|
|
}
|
2560 |
|
|
|
2561 |
|
|
/* If we have an odd number of words of pretend arguments written to
|
2562 |
|
|
the stack, then the FR save area will be unaligned. We round the
|
2563 |
|
|
size of this area up to keep things 16 byte aligned. */
|
2564 |
|
|
if (spilled_fr_p)
|
2565 |
|
|
pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
|
2566 |
|
|
else
|
2567 |
|
|
pretend_args_size = current_function_pretend_args_size;
|
2568 |
|
|
|
2569 |
|
|
total_size = (spill_size + extra_spill_size + size + pretend_args_size
|
2570 |
|
|
+ current_function_outgoing_args_size);
|
2571 |
|
|
total_size = IA64_STACK_ALIGN (total_size);
|
2572 |
|
|
|
2573 |
|
|
/* We always use the 16-byte scratch area provided by the caller, but
|
2574 |
|
|
if we are a leaf function, there's no one to which we need to provide
|
2575 |
|
|
a scratch area. */
|
2576 |
|
|
if (current_function_is_leaf)
|
2577 |
|
|
total_size = MAX (0, total_size - 16);
|
2578 |
|
|
|
2579 |
|
|
current_frame_info.total_size = total_size;
|
2580 |
|
|
current_frame_info.spill_cfa_off = pretend_args_size - 16;
|
2581 |
|
|
current_frame_info.spill_size = spill_size;
|
2582 |
|
|
current_frame_info.extra_spill_size = extra_spill_size;
|
2583 |
|
|
COPY_HARD_REG_SET (current_frame_info.mask, mask);
|
2584 |
|
|
current_frame_info.n_spilled = n_spilled;
|
2585 |
|
|
current_frame_info.initialized = reload_completed;
|
2586 |
|
|
}
|
2587 |
|
|
|
2588 |
|
|
/* Compute the initial difference between the specified pair of registers. */
|
2589 |
|
|
|
2590 |
|
|
HOST_WIDE_INT
|
2591 |
|
|
ia64_initial_elimination_offset (int from, int to)
|
2592 |
|
|
{
|
2593 |
|
|
HOST_WIDE_INT offset;
|
2594 |
|
|
|
2595 |
|
|
ia64_compute_frame_size (get_frame_size ());
|
2596 |
|
|
switch (from)
|
2597 |
|
|
{
|
2598 |
|
|
case FRAME_POINTER_REGNUM:
|
2599 |
|
|
switch (to)
|
2600 |
|
|
{
|
2601 |
|
|
case HARD_FRAME_POINTER_REGNUM:
|
2602 |
|
|
if (current_function_is_leaf)
|
2603 |
|
|
offset = -current_frame_info.total_size;
|
2604 |
|
|
else
|
2605 |
|
|
offset = -(current_frame_info.total_size
|
2606 |
|
|
- current_function_outgoing_args_size - 16);
|
2607 |
|
|
break;
|
2608 |
|
|
|
2609 |
|
|
case STACK_POINTER_REGNUM:
|
2610 |
|
|
if (current_function_is_leaf)
|
2611 |
|
|
offset = 0;
|
2612 |
|
|
else
|
2613 |
|
|
offset = 16 + current_function_outgoing_args_size;
|
2614 |
|
|
break;
|
2615 |
|
|
|
2616 |
|
|
default:
|
2617 |
|
|
gcc_unreachable ();
|
2618 |
|
|
}
|
2619 |
|
|
break;
|
2620 |
|
|
|
2621 |
|
|
case ARG_POINTER_REGNUM:
|
2622 |
|
|
/* Arguments start above the 16 byte save area, unless stdarg
|
2623 |
|
|
in which case we store through the 16 byte save area. */
|
2624 |
|
|
switch (to)
|
2625 |
|
|
{
|
2626 |
|
|
case HARD_FRAME_POINTER_REGNUM:
|
2627 |
|
|
offset = 16 - current_function_pretend_args_size;
|
2628 |
|
|
break;
|
2629 |
|
|
|
2630 |
|
|
case STACK_POINTER_REGNUM:
|
2631 |
|
|
offset = (current_frame_info.total_size
|
2632 |
|
|
+ 16 - current_function_pretend_args_size);
|
2633 |
|
|
break;
|
2634 |
|
|
|
2635 |
|
|
default:
|
2636 |
|
|
gcc_unreachable ();
|
2637 |
|
|
}
|
2638 |
|
|
break;
|
2639 |
|
|
|
2640 |
|
|
default:
|
2641 |
|
|
gcc_unreachable ();
|
2642 |
|
|
}
|
2643 |
|
|
|
2644 |
|
|
return offset;
|
2645 |
|
|
}
|
2646 |
|
|
|
2647 |
|
|
/* If there are more than a trivial number of register spills, we use
|
2648 |
|
|
two interleaved iterators so that we can get two memory references
|
2649 |
|
|
per insn group.
|
2650 |
|
|
|
2651 |
|
|
In order to simplify things in the prologue and epilogue expanders,
|
2652 |
|
|
we use helper functions to fix up the memory references after the
|
2653 |
|
|
fact with the appropriate offsets to a POST_MODIFY memory mode.
|
2654 |
|
|
The following data structure tracks the state of the two iterators
|
2655 |
|
|
while insns are being emitted. */
|
2656 |
|
|
|
2657 |
|
|
struct spill_fill_data
|
2658 |
|
|
{
|
2659 |
|
|
rtx init_after; /* point at which to emit initializations */
|
2660 |
|
|
rtx init_reg[2]; /* initial base register */
|
2661 |
|
|
rtx iter_reg[2]; /* the iterator registers */
|
2662 |
|
|
rtx *prev_addr[2]; /* address of last memory use */
|
2663 |
|
|
rtx prev_insn[2]; /* the insn corresponding to prev_addr */
|
2664 |
|
|
HOST_WIDE_INT prev_off[2]; /* last offset */
|
2665 |
|
|
int n_iter; /* number of iterators in use */
|
2666 |
|
|
int next_iter; /* next iterator to use */
|
2667 |
|
|
unsigned int save_gr_used_mask;
|
2668 |
|
|
};
|
2669 |
|
|
|
2670 |
|
|
static struct spill_fill_data spill_fill_data;
|
2671 |
|
|
|
2672 |
|
|
static void
|
2673 |
|
|
setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
|
2674 |
|
|
{
|
2675 |
|
|
int i;
|
2676 |
|
|
|
2677 |
|
|
spill_fill_data.init_after = get_last_insn ();
|
2678 |
|
|
spill_fill_data.init_reg[0] = init_reg;
|
2679 |
|
|
spill_fill_data.init_reg[1] = init_reg;
|
2680 |
|
|
spill_fill_data.prev_addr[0] = NULL;
|
2681 |
|
|
spill_fill_data.prev_addr[1] = NULL;
|
2682 |
|
|
spill_fill_data.prev_insn[0] = NULL;
|
2683 |
|
|
spill_fill_data.prev_insn[1] = NULL;
|
2684 |
|
|
spill_fill_data.prev_off[0] = cfa_off;
|
2685 |
|
|
spill_fill_data.prev_off[1] = cfa_off;
|
2686 |
|
|
spill_fill_data.next_iter = 0;
|
2687 |
|
|
spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
|
2688 |
|
|
|
2689 |
|
|
spill_fill_data.n_iter = 1 + (n_spills > 2);
|
2690 |
|
|
for (i = 0; i < spill_fill_data.n_iter; ++i)
|
2691 |
|
|
{
|
2692 |
|
|
int regno = next_scratch_gr_reg ();
|
2693 |
|
|
spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
|
2694 |
|
|
current_frame_info.gr_used_mask |= 1 << regno;
|
2695 |
|
|
}
|
2696 |
|
|
}
|
2697 |
|
|
|
2698 |
|
|
static void
|
2699 |
|
|
finish_spill_pointers (void)
|
2700 |
|
|
{
|
2701 |
|
|
current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
|
2702 |
|
|
}
|
2703 |
|
|
|
2704 |
|
|
static rtx
|
2705 |
|
|
spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
|
2706 |
|
|
{
|
2707 |
|
|
int iter = spill_fill_data.next_iter;
|
2708 |
|
|
HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
|
2709 |
|
|
rtx disp_rtx = GEN_INT (disp);
|
2710 |
|
|
rtx mem;
|
2711 |
|
|
|
2712 |
|
|
if (spill_fill_data.prev_addr[iter])
|
2713 |
|
|
{
|
2714 |
|
|
if (CONST_OK_FOR_N (disp))
|
2715 |
|
|
{
|
2716 |
|
|
*spill_fill_data.prev_addr[iter]
|
2717 |
|
|
= gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
|
2718 |
|
|
gen_rtx_PLUS (DImode,
|
2719 |
|
|
spill_fill_data.iter_reg[iter],
|
2720 |
|
|
disp_rtx));
|
2721 |
|
|
REG_NOTES (spill_fill_data.prev_insn[iter])
|
2722 |
|
|
= gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
|
2723 |
|
|
REG_NOTES (spill_fill_data.prev_insn[iter]));
|
2724 |
|
|
}
|
2725 |
|
|
else
|
2726 |
|
|
{
|
2727 |
|
|
/* ??? Could use register post_modify for loads. */
|
2728 |
|
|
if (! CONST_OK_FOR_I (disp))
|
2729 |
|
|
{
|
2730 |
|
|
rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
|
2731 |
|
|
emit_move_insn (tmp, disp_rtx);
|
2732 |
|
|
disp_rtx = tmp;
|
2733 |
|
|
}
|
2734 |
|
|
emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
|
2735 |
|
|
spill_fill_data.iter_reg[iter], disp_rtx));
|
2736 |
|
|
}
|
2737 |
|
|
}
|
2738 |
|
|
/* Micro-optimization: if we've created a frame pointer, it's at
|
2739 |
|
|
CFA 0, which may allow the real iterator to be initialized lower,
|
2740 |
|
|
slightly increasing parallelism. Also, if there are few saves
|
2741 |
|
|
it may eliminate the iterator entirely. */
|
2742 |
|
|
else if (disp == 0
|
2743 |
|
|
&& spill_fill_data.init_reg[iter] == stack_pointer_rtx
|
2744 |
|
|
&& frame_pointer_needed)
|
2745 |
|
|
{
|
2746 |
|
|
mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
|
2747 |
|
|
set_mem_alias_set (mem, get_varargs_alias_set ());
|
2748 |
|
|
return mem;
|
2749 |
|
|
}
|
2750 |
|
|
else
|
2751 |
|
|
{
|
2752 |
|
|
rtx seq, insn;
|
2753 |
|
|
|
2754 |
|
|
if (disp == 0)
|
2755 |
|
|
seq = gen_movdi (spill_fill_data.iter_reg[iter],
|
2756 |
|
|
spill_fill_data.init_reg[iter]);
|
2757 |
|
|
else
|
2758 |
|
|
{
|
2759 |
|
|
start_sequence ();
|
2760 |
|
|
|
2761 |
|
|
if (! CONST_OK_FOR_I (disp))
|
2762 |
|
|
{
|
2763 |
|
|
rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
|
2764 |
|
|
emit_move_insn (tmp, disp_rtx);
|
2765 |
|
|
disp_rtx = tmp;
|
2766 |
|
|
}
|
2767 |
|
|
|
2768 |
|
|
emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
|
2769 |
|
|
spill_fill_data.init_reg[iter],
|
2770 |
|
|
disp_rtx));
|
2771 |
|
|
|
2772 |
|
|
seq = get_insns ();
|
2773 |
|
|
end_sequence ();
|
2774 |
|
|
}
|
2775 |
|
|
|
2776 |
|
|
/* Careful for being the first insn in a sequence. */
|
2777 |
|
|
if (spill_fill_data.init_after)
|
2778 |
|
|
insn = emit_insn_after (seq, spill_fill_data.init_after);
|
2779 |
|
|
else
|
2780 |
|
|
{
|
2781 |
|
|
rtx first = get_insns ();
|
2782 |
|
|
if (first)
|
2783 |
|
|
insn = emit_insn_before (seq, first);
|
2784 |
|
|
else
|
2785 |
|
|
insn = emit_insn (seq);
|
2786 |
|
|
}
|
2787 |
|
|
spill_fill_data.init_after = insn;
|
2788 |
|
|
|
2789 |
|
|
/* If DISP is 0, we may or may not have a further adjustment
|
2790 |
|
|
afterward. If we do, then the load/store insn may be modified
|
2791 |
|
|
to be a post-modify. If we don't, then this copy may be
|
2792 |
|
|
eliminated by copyprop_hardreg_forward, which makes this
|
2793 |
|
|
insn garbage, which runs afoul of the sanity check in
|
2794 |
|
|
propagate_one_insn. So mark this insn as legal to delete. */
|
2795 |
|
|
if (disp == 0)
|
2796 |
|
|
REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
|
2797 |
|
|
REG_NOTES (insn));
|
2798 |
|
|
}
|
2799 |
|
|
|
2800 |
|
|
mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
|
2801 |
|
|
|
2802 |
|
|
/* ??? Not all of the spills are for varargs, but some of them are.
|
2803 |
|
|
The rest of the spills belong in an alias set of their own. But
|
2804 |
|
|
it doesn't actually hurt to include them here. */
|
2805 |
|
|
set_mem_alias_set (mem, get_varargs_alias_set ());
|
2806 |
|
|
|
2807 |
|
|
spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
|
2808 |
|
|
spill_fill_data.prev_off[iter] = cfa_off;
|
2809 |
|
|
|
2810 |
|
|
if (++iter >= spill_fill_data.n_iter)
|
2811 |
|
|
iter = 0;
|
2812 |
|
|
spill_fill_data.next_iter = iter;
|
2813 |
|
|
|
2814 |
|
|
return mem;
|
2815 |
|
|
}
|
2816 |
|
|
|
2817 |
|
|
static void
|
2818 |
|
|
do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
|
2819 |
|
|
rtx frame_reg)
|
2820 |
|
|
{
|
2821 |
|
|
int iter = spill_fill_data.next_iter;
|
2822 |
|
|
rtx mem, insn;
|
2823 |
|
|
|
2824 |
|
|
mem = spill_restore_mem (reg, cfa_off);
|
2825 |
|
|
insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
|
2826 |
|
|
spill_fill_data.prev_insn[iter] = insn;
|
2827 |
|
|
|
2828 |
|
|
if (frame_reg)
|
2829 |
|
|
{
|
2830 |
|
|
rtx base;
|
2831 |
|
|
HOST_WIDE_INT off;
|
2832 |
|
|
|
2833 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
2834 |
|
|
|
2835 |
|
|
/* Don't even pretend that the unwind code can intuit its way
|
2836 |
|
|
through a pair of interleaved post_modify iterators. Just
|
2837 |
|
|
provide the correct answer. */
|
2838 |
|
|
|
2839 |
|
|
if (frame_pointer_needed)
|
2840 |
|
|
{
|
2841 |
|
|
base = hard_frame_pointer_rtx;
|
2842 |
|
|
off = - cfa_off;
|
2843 |
|
|
}
|
2844 |
|
|
else
|
2845 |
|
|
{
|
2846 |
|
|
base = stack_pointer_rtx;
|
2847 |
|
|
off = current_frame_info.total_size - cfa_off;
|
2848 |
|
|
}
|
2849 |
|
|
|
2850 |
|
|
REG_NOTES (insn)
|
2851 |
|
|
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
2852 |
|
|
gen_rtx_SET (VOIDmode,
|
2853 |
|
|
gen_rtx_MEM (GET_MODE (reg),
|
2854 |
|
|
plus_constant (base, off)),
|
2855 |
|
|
frame_reg),
|
2856 |
|
|
REG_NOTES (insn));
|
2857 |
|
|
}
|
2858 |
|
|
}
|
2859 |
|
|
|
2860 |
|
|
static void
|
2861 |
|
|
do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
|
2862 |
|
|
{
|
2863 |
|
|
int iter = spill_fill_data.next_iter;
|
2864 |
|
|
rtx insn;
|
2865 |
|
|
|
2866 |
|
|
insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
|
2867 |
|
|
GEN_INT (cfa_off)));
|
2868 |
|
|
spill_fill_data.prev_insn[iter] = insn;
|
2869 |
|
|
}
|
2870 |
|
|
|
2871 |
|
|
/* Wrapper functions that discards the CONST_INT spill offset. These
|
2872 |
|
|
exist so that we can give gr_spill/gr_fill the offset they need and
|
2873 |
|
|
use a consistent function interface. */
|
2874 |
|
|
|
2875 |
|
|
static rtx
|
2876 |
|
|
gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
|
2877 |
|
|
{
|
2878 |
|
|
return gen_movdi (dest, src);
|
2879 |
|
|
}
|
2880 |
|
|
|
2881 |
|
|
static rtx
|
2882 |
|
|
gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
|
2883 |
|
|
{
|
2884 |
|
|
return gen_fr_spill (dest, src);
|
2885 |
|
|
}
|
2886 |
|
|
|
2887 |
|
|
static rtx
|
2888 |
|
|
gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
|
2889 |
|
|
{
|
2890 |
|
|
return gen_fr_restore (dest, src);
|
2891 |
|
|
}
|
2892 |
|
|
|
2893 |
|
|
/* Called after register allocation to add any instructions needed for the
|
2894 |
|
|
prologue. Using a prologue insn is favored compared to putting all of the
|
2895 |
|
|
instructions in output_function_prologue(), since it allows the scheduler
|
2896 |
|
|
to intermix instructions with the saves of the caller saved registers. In
|
2897 |
|
|
some cases, it might be necessary to emit a barrier instruction as the last
|
2898 |
|
|
insn to prevent such scheduling.
|
2899 |
|
|
|
2900 |
|
|
Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
|
2901 |
|
|
so that the debug info generation code can handle them properly.
|
2902 |
|
|
|
2903 |
|
|
The register save area is layed out like so:
|
2904 |
|
|
cfa+16
|
2905 |
|
|
[ varargs spill area ]
|
2906 |
|
|
[ fr register spill area ]
|
2907 |
|
|
[ br register spill area ]
|
2908 |
|
|
[ ar register spill area ]
|
2909 |
|
|
[ pr register spill area ]
|
2910 |
|
|
[ gr register spill area ] */
|
2911 |
|
|
|
2912 |
|
|
/* ??? Get inefficient code when the frame size is larger than can fit in an
|
2913 |
|
|
adds instruction. */
|
2914 |
|
|
|
2915 |
|
|
void
|
2916 |
|
|
ia64_expand_prologue (void)
|
2917 |
|
|
{
|
2918 |
|
|
rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
|
2919 |
|
|
int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
|
2920 |
|
|
rtx reg, alt_reg;
|
2921 |
|
|
|
2922 |
|
|
ia64_compute_frame_size (get_frame_size ());
|
2923 |
|
|
last_scratch_gr_reg = 15;
|
2924 |
|
|
|
2925 |
|
|
/* If there is no epilogue, then we don't need some prologue insns.
|
2926 |
|
|
We need to avoid emitting the dead prologue insns, because flow
|
2927 |
|
|
will complain about them. */
|
2928 |
|
|
if (optimize)
|
2929 |
|
|
{
|
2930 |
|
|
edge e;
|
2931 |
|
|
edge_iterator ei;
|
2932 |
|
|
|
2933 |
|
|
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
|
2934 |
|
|
if ((e->flags & EDGE_FAKE) == 0
|
2935 |
|
|
&& (e->flags & EDGE_FALLTHRU) != 0)
|
2936 |
|
|
break;
|
2937 |
|
|
epilogue_p = (e != NULL);
|
2938 |
|
|
}
|
2939 |
|
|
else
|
2940 |
|
|
epilogue_p = 1;
|
2941 |
|
|
|
2942 |
|
|
/* Set the local, input, and output register names. We need to do this
|
2943 |
|
|
for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
|
2944 |
|
|
half. If we use in/loc/out register names, then we get assembler errors
|
2945 |
|
|
in crtn.S because there is no alloc insn or regstk directive in there. */
|
2946 |
|
|
if (! TARGET_REG_NAMES)
|
2947 |
|
|
{
|
2948 |
|
|
int inputs = current_frame_info.n_input_regs;
|
2949 |
|
|
int locals = current_frame_info.n_local_regs;
|
2950 |
|
|
int outputs = current_frame_info.n_output_regs;
|
2951 |
|
|
|
2952 |
|
|
for (i = 0; i < inputs; i++)
|
2953 |
|
|
reg_names[IN_REG (i)] = ia64_reg_numbers[i];
|
2954 |
|
|
for (i = 0; i < locals; i++)
|
2955 |
|
|
reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
|
2956 |
|
|
for (i = 0; i < outputs; i++)
|
2957 |
|
|
reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
|
2958 |
|
|
}
|
2959 |
|
|
|
2960 |
|
|
/* Set the frame pointer register name. The regnum is logically loc79,
|
2961 |
|
|
but of course we'll not have allocated that many locals. Rather than
|
2962 |
|
|
worrying about renumbering the existing rtxs, we adjust the name. */
|
2963 |
|
|
/* ??? This code means that we can never use one local register when
|
2964 |
|
|
there is a frame pointer. loc79 gets wasted in this case, as it is
|
2965 |
|
|
renamed to a register that will never be used. See also the try_locals
|
2966 |
|
|
code in find_gr_spill. */
|
2967 |
|
|
if (current_frame_info.reg_fp)
|
2968 |
|
|
{
|
2969 |
|
|
const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
|
2970 |
|
|
reg_names[HARD_FRAME_POINTER_REGNUM]
|
2971 |
|
|
= reg_names[current_frame_info.reg_fp];
|
2972 |
|
|
reg_names[current_frame_info.reg_fp] = tmp;
|
2973 |
|
|
}
|
2974 |
|
|
|
2975 |
|
|
/* We don't need an alloc instruction if we've used no outputs or locals. */
|
2976 |
|
|
if (current_frame_info.n_local_regs == 0
|
2977 |
|
|
&& current_frame_info.n_output_regs == 0
|
2978 |
|
|
&& current_frame_info.n_input_regs <= current_function_args_info.int_regs
|
2979 |
|
|
&& !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
|
2980 |
|
|
{
|
2981 |
|
|
/* If there is no alloc, but there are input registers used, then we
|
2982 |
|
|
need a .regstk directive. */
|
2983 |
|
|
current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
|
2984 |
|
|
ar_pfs_save_reg = NULL_RTX;
|
2985 |
|
|
}
|
2986 |
|
|
else
|
2987 |
|
|
{
|
2988 |
|
|
current_frame_info.need_regstk = 0;
|
2989 |
|
|
|
2990 |
|
|
if (current_frame_info.reg_save_ar_pfs)
|
2991 |
|
|
regno = current_frame_info.reg_save_ar_pfs;
|
2992 |
|
|
else
|
2993 |
|
|
regno = next_scratch_gr_reg ();
|
2994 |
|
|
ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
|
2995 |
|
|
|
2996 |
|
|
insn = emit_insn (gen_alloc (ar_pfs_save_reg,
|
2997 |
|
|
GEN_INT (current_frame_info.n_input_regs),
|
2998 |
|
|
GEN_INT (current_frame_info.n_local_regs),
|
2999 |
|
|
GEN_INT (current_frame_info.n_output_regs),
|
3000 |
|
|
GEN_INT (current_frame_info.n_rotate_regs)));
|
3001 |
|
|
RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
|
3002 |
|
|
}
|
3003 |
|
|
|
3004 |
|
|
/* Set up frame pointer, stack pointer, and spill iterators. */
|
3005 |
|
|
|
3006 |
|
|
n_varargs = cfun->machine->n_varargs;
|
3007 |
|
|
setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
|
3008 |
|
|
stack_pointer_rtx, 0);
|
3009 |
|
|
|
3010 |
|
|
if (frame_pointer_needed)
|
3011 |
|
|
{
|
3012 |
|
|
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
|
3013 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3014 |
|
|
}
|
3015 |
|
|
|
3016 |
|
|
if (current_frame_info.total_size != 0)
|
3017 |
|
|
{
|
3018 |
|
|
rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
|
3019 |
|
|
rtx offset;
|
3020 |
|
|
|
3021 |
|
|
if (CONST_OK_FOR_I (- current_frame_info.total_size))
|
3022 |
|
|
offset = frame_size_rtx;
|
3023 |
|
|
else
|
3024 |
|
|
{
|
3025 |
|
|
regno = next_scratch_gr_reg ();
|
3026 |
|
|
offset = gen_rtx_REG (DImode, regno);
|
3027 |
|
|
emit_move_insn (offset, frame_size_rtx);
|
3028 |
|
|
}
|
3029 |
|
|
|
3030 |
|
|
insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
|
3031 |
|
|
stack_pointer_rtx, offset));
|
3032 |
|
|
|
3033 |
|
|
if (! frame_pointer_needed)
|
3034 |
|
|
{
|
3035 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3036 |
|
|
if (GET_CODE (offset) != CONST_INT)
|
3037 |
|
|
{
|
3038 |
|
|
REG_NOTES (insn)
|
3039 |
|
|
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
3040 |
|
|
gen_rtx_SET (VOIDmode,
|
3041 |
|
|
stack_pointer_rtx,
|
3042 |
|
|
gen_rtx_PLUS (DImode,
|
3043 |
|
|
stack_pointer_rtx,
|
3044 |
|
|
frame_size_rtx)),
|
3045 |
|
|
REG_NOTES (insn));
|
3046 |
|
|
}
|
3047 |
|
|
}
|
3048 |
|
|
|
3049 |
|
|
/* ??? At this point we must generate a magic insn that appears to
|
3050 |
|
|
modify the stack pointer, the frame pointer, and all spill
|
3051 |
|
|
iterators. This would allow the most scheduling freedom. For
|
3052 |
|
|
now, just hard stop. */
|
3053 |
|
|
emit_insn (gen_blockage ());
|
3054 |
|
|
}
|
3055 |
|
|
|
3056 |
|
|
/* Must copy out ar.unat before doing any integer spills. */
|
3057 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
|
3058 |
|
|
{
|
3059 |
|
|
if (current_frame_info.reg_save_ar_unat)
|
3060 |
|
|
ar_unat_save_reg
|
3061 |
|
|
= gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
|
3062 |
|
|
else
|
3063 |
|
|
{
|
3064 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3065 |
|
|
ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
|
3066 |
|
|
current_frame_info.gr_used_mask |= 1 << alt_regno;
|
3067 |
|
|
}
|
3068 |
|
|
|
3069 |
|
|
reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
|
3070 |
|
|
insn = emit_move_insn (ar_unat_save_reg, reg);
|
3071 |
|
|
RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
|
3072 |
|
|
|
3073 |
|
|
/* Even if we're not going to generate an epilogue, we still
|
3074 |
|
|
need to save the register so that EH works. */
|
3075 |
|
|
if (! epilogue_p && current_frame_info.reg_save_ar_unat)
|
3076 |
|
|
emit_insn (gen_prologue_use (ar_unat_save_reg));
|
3077 |
|
|
}
|
3078 |
|
|
else
|
3079 |
|
|
ar_unat_save_reg = NULL_RTX;
|
3080 |
|
|
|
3081 |
|
|
/* Spill all varargs registers. Do this before spilling any GR registers,
|
3082 |
|
|
since we want the UNAT bits for the GR registers to override the UNAT
|
3083 |
|
|
bits from varargs, which we don't care about. */
|
3084 |
|
|
|
3085 |
|
|
cfa_off = -16;
|
3086 |
|
|
for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
|
3087 |
|
|
{
|
3088 |
|
|
reg = gen_rtx_REG (DImode, regno);
|
3089 |
|
|
do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
|
3090 |
|
|
}
|
3091 |
|
|
|
3092 |
|
|
/* Locate the bottom of the register save area. */
|
3093 |
|
|
cfa_off = (current_frame_info.spill_cfa_off
|
3094 |
|
|
+ current_frame_info.spill_size
|
3095 |
|
|
+ current_frame_info.extra_spill_size);
|
3096 |
|
|
|
3097 |
|
|
/* Save the predicate register block either in a register or in memory. */
|
3098 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
|
3099 |
|
|
{
|
3100 |
|
|
reg = gen_rtx_REG (DImode, PR_REG (0));
|
3101 |
|
|
if (current_frame_info.reg_save_pr != 0)
|
3102 |
|
|
{
|
3103 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
|
3104 |
|
|
insn = emit_move_insn (alt_reg, reg);
|
3105 |
|
|
|
3106 |
|
|
/* ??? Denote pr spill/fill by a DImode move that modifies all
|
3107 |
|
|
64 hard registers. */
|
3108 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3109 |
|
|
REG_NOTES (insn)
|
3110 |
|
|
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
3111 |
|
|
gen_rtx_SET (VOIDmode, alt_reg, reg),
|
3112 |
|
|
REG_NOTES (insn));
|
3113 |
|
|
|
3114 |
|
|
/* Even if we're not going to generate an epilogue, we still
|
3115 |
|
|
need to save the register so that EH works. */
|
3116 |
|
|
if (! epilogue_p)
|
3117 |
|
|
emit_insn (gen_prologue_use (alt_reg));
|
3118 |
|
|
}
|
3119 |
|
|
else
|
3120 |
|
|
{
|
3121 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3122 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3123 |
|
|
insn = emit_move_insn (alt_reg, reg);
|
3124 |
|
|
do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
|
3125 |
|
|
cfa_off -= 8;
|
3126 |
|
|
}
|
3127 |
|
|
}
|
3128 |
|
|
|
3129 |
|
|
/* Handle AR regs in numerical order. All of them get special handling. */
|
3130 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
|
3131 |
|
|
&& current_frame_info.reg_save_ar_unat == 0)
|
3132 |
|
|
{
|
3133 |
|
|
reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
|
3134 |
|
|
do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
|
3135 |
|
|
cfa_off -= 8;
|
3136 |
|
|
}
|
3137 |
|
|
|
3138 |
|
|
/* The alloc insn already copied ar.pfs into a general register. The
|
3139 |
|
|
only thing we have to do now is copy that register to a stack slot
|
3140 |
|
|
if we'd not allocated a local register for the job. */
|
3141 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
|
3142 |
|
|
&& current_frame_info.reg_save_ar_pfs == 0)
|
3143 |
|
|
{
|
3144 |
|
|
reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
|
3145 |
|
|
do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
|
3146 |
|
|
cfa_off -= 8;
|
3147 |
|
|
}
|
3148 |
|
|
|
3149 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
|
3150 |
|
|
{
|
3151 |
|
|
reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
|
3152 |
|
|
if (current_frame_info.reg_save_ar_lc != 0)
|
3153 |
|
|
{
|
3154 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
|
3155 |
|
|
insn = emit_move_insn (alt_reg, reg);
|
3156 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3157 |
|
|
|
3158 |
|
|
/* Even if we're not going to generate an epilogue, we still
|
3159 |
|
|
need to save the register so that EH works. */
|
3160 |
|
|
if (! epilogue_p)
|
3161 |
|
|
emit_insn (gen_prologue_use (alt_reg));
|
3162 |
|
|
}
|
3163 |
|
|
else
|
3164 |
|
|
{
|
3165 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3166 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3167 |
|
|
emit_move_insn (alt_reg, reg);
|
3168 |
|
|
do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
|
3169 |
|
|
cfa_off -= 8;
|
3170 |
|
|
}
|
3171 |
|
|
}
|
3172 |
|
|
|
3173 |
|
|
/* Save the return pointer. */
|
3174 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
|
3175 |
|
|
{
|
3176 |
|
|
reg = gen_rtx_REG (DImode, BR_REG (0));
|
3177 |
|
|
if (current_frame_info.reg_save_b0 != 0)
|
3178 |
|
|
{
|
3179 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
|
3180 |
|
|
insn = emit_move_insn (alt_reg, reg);
|
3181 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3182 |
|
|
|
3183 |
|
|
/* Even if we're not going to generate an epilogue, we still
|
3184 |
|
|
need to save the register so that EH works. */
|
3185 |
|
|
if (! epilogue_p)
|
3186 |
|
|
emit_insn (gen_prologue_use (alt_reg));
|
3187 |
|
|
}
|
3188 |
|
|
else
|
3189 |
|
|
{
|
3190 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3191 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3192 |
|
|
emit_move_insn (alt_reg, reg);
|
3193 |
|
|
do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
|
3194 |
|
|
cfa_off -= 8;
|
3195 |
|
|
}
|
3196 |
|
|
}
|
3197 |
|
|
|
3198 |
|
|
if (current_frame_info.reg_save_gp)
|
3199 |
|
|
{
|
3200 |
|
|
insn = emit_move_insn (gen_rtx_REG (DImode,
|
3201 |
|
|
current_frame_info.reg_save_gp),
|
3202 |
|
|
pic_offset_table_rtx);
|
3203 |
|
|
/* We don't know for sure yet if this is actually needed, since
|
3204 |
|
|
we've not split the PIC call patterns. If all of the calls
|
3205 |
|
|
are indirect, and not followed by any uses of the gp, then
|
3206 |
|
|
this save is dead. Allow it to go away. */
|
3207 |
|
|
REG_NOTES (insn)
|
3208 |
|
|
= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
|
3209 |
|
|
}
|
3210 |
|
|
|
3211 |
|
|
/* We should now be at the base of the gr/br/fr spill area. */
|
3212 |
|
|
gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
|
3213 |
|
|
+ current_frame_info.spill_size));
|
3214 |
|
|
|
3215 |
|
|
/* Spill all general registers. */
|
3216 |
|
|
for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
|
3217 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3218 |
|
|
{
|
3219 |
|
|
reg = gen_rtx_REG (DImode, regno);
|
3220 |
|
|
do_spill (gen_gr_spill, reg, cfa_off, reg);
|
3221 |
|
|
cfa_off -= 8;
|
3222 |
|
|
}
|
3223 |
|
|
|
3224 |
|
|
/* Spill the rest of the BR registers. */
|
3225 |
|
|
for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
|
3226 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3227 |
|
|
{
|
3228 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3229 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3230 |
|
|
reg = gen_rtx_REG (DImode, regno);
|
3231 |
|
|
emit_move_insn (alt_reg, reg);
|
3232 |
|
|
do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
|
3233 |
|
|
cfa_off -= 8;
|
3234 |
|
|
}
|
3235 |
|
|
|
3236 |
|
|
/* Align the frame and spill all FR registers. */
|
3237 |
|
|
for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
|
3238 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3239 |
|
|
{
|
3240 |
|
|
gcc_assert (!(cfa_off & 15));
|
3241 |
|
|
reg = gen_rtx_REG (XFmode, regno);
|
3242 |
|
|
do_spill (gen_fr_spill_x, reg, cfa_off, reg);
|
3243 |
|
|
cfa_off -= 16;
|
3244 |
|
|
}
|
3245 |
|
|
|
3246 |
|
|
gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
|
3247 |
|
|
|
3248 |
|
|
finish_spill_pointers ();
|
3249 |
|
|
}
|
3250 |
|
|
|
3251 |
|
|
/* Called after register allocation to add any instructions needed for the
|
3252 |
|
|
epilogue. Using an epilogue insn is favored compared to putting all of the
|
3253 |
|
|
instructions in output_function_prologue(), since it allows the scheduler
|
3254 |
|
|
to intermix instructions with the saves of the caller saved registers. In
|
3255 |
|
|
some cases, it might be necessary to emit a barrier instruction as the last
|
3256 |
|
|
insn to prevent such scheduling. */
|
3257 |
|
|
|
3258 |
|
|
void
|
3259 |
|
|
ia64_expand_epilogue (int sibcall_p)
|
3260 |
|
|
{
|
3261 |
|
|
rtx insn, reg, alt_reg, ar_unat_save_reg;
|
3262 |
|
|
int regno, alt_regno, cfa_off;
|
3263 |
|
|
|
3264 |
|
|
ia64_compute_frame_size (get_frame_size ());
|
3265 |
|
|
|
3266 |
|
|
/* If there is a frame pointer, then we use it instead of the stack
|
3267 |
|
|
pointer, so that the stack pointer does not need to be valid when
|
3268 |
|
|
the epilogue starts. See EXIT_IGNORE_STACK. */
|
3269 |
|
|
if (frame_pointer_needed)
|
3270 |
|
|
setup_spill_pointers (current_frame_info.n_spilled,
|
3271 |
|
|
hard_frame_pointer_rtx, 0);
|
3272 |
|
|
else
|
3273 |
|
|
setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
|
3274 |
|
|
current_frame_info.total_size);
|
3275 |
|
|
|
3276 |
|
|
if (current_frame_info.total_size != 0)
|
3277 |
|
|
{
|
3278 |
|
|
/* ??? At this point we must generate a magic insn that appears to
|
3279 |
|
|
modify the spill iterators and the frame pointer. This would
|
3280 |
|
|
allow the most scheduling freedom. For now, just hard stop. */
|
3281 |
|
|
emit_insn (gen_blockage ());
|
3282 |
|
|
}
|
3283 |
|
|
|
3284 |
|
|
/* Locate the bottom of the register save area. */
|
3285 |
|
|
cfa_off = (current_frame_info.spill_cfa_off
|
3286 |
|
|
+ current_frame_info.spill_size
|
3287 |
|
|
+ current_frame_info.extra_spill_size);
|
3288 |
|
|
|
3289 |
|
|
/* Restore the predicate registers. */
|
3290 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
|
3291 |
|
|
{
|
3292 |
|
|
if (current_frame_info.reg_save_pr != 0)
|
3293 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
|
3294 |
|
|
else
|
3295 |
|
|
{
|
3296 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3297 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3298 |
|
|
do_restore (gen_movdi_x, alt_reg, cfa_off);
|
3299 |
|
|
cfa_off -= 8;
|
3300 |
|
|
}
|
3301 |
|
|
reg = gen_rtx_REG (DImode, PR_REG (0));
|
3302 |
|
|
emit_move_insn (reg, alt_reg);
|
3303 |
|
|
}
|
3304 |
|
|
|
3305 |
|
|
/* Restore the application registers. */
|
3306 |
|
|
|
3307 |
|
|
/* Load the saved unat from the stack, but do not restore it until
|
3308 |
|
|
after the GRs have been restored. */
|
3309 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
|
3310 |
|
|
{
|
3311 |
|
|
if (current_frame_info.reg_save_ar_unat != 0)
|
3312 |
|
|
ar_unat_save_reg
|
3313 |
|
|
= gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
|
3314 |
|
|
else
|
3315 |
|
|
{
|
3316 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3317 |
|
|
ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
|
3318 |
|
|
current_frame_info.gr_used_mask |= 1 << alt_regno;
|
3319 |
|
|
do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
|
3320 |
|
|
cfa_off -= 8;
|
3321 |
|
|
}
|
3322 |
|
|
}
|
3323 |
|
|
else
|
3324 |
|
|
ar_unat_save_reg = NULL_RTX;
|
3325 |
|
|
|
3326 |
|
|
if (current_frame_info.reg_save_ar_pfs != 0)
|
3327 |
|
|
{
|
3328 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
|
3329 |
|
|
reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
|
3330 |
|
|
emit_move_insn (reg, alt_reg);
|
3331 |
|
|
}
|
3332 |
|
|
else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
|
3333 |
|
|
{
|
3334 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3335 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3336 |
|
|
do_restore (gen_movdi_x, alt_reg, cfa_off);
|
3337 |
|
|
cfa_off -= 8;
|
3338 |
|
|
reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
|
3339 |
|
|
emit_move_insn (reg, alt_reg);
|
3340 |
|
|
}
|
3341 |
|
|
|
3342 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
|
3343 |
|
|
{
|
3344 |
|
|
if (current_frame_info.reg_save_ar_lc != 0)
|
3345 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
|
3346 |
|
|
else
|
3347 |
|
|
{
|
3348 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3349 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3350 |
|
|
do_restore (gen_movdi_x, alt_reg, cfa_off);
|
3351 |
|
|
cfa_off -= 8;
|
3352 |
|
|
}
|
3353 |
|
|
reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
|
3354 |
|
|
emit_move_insn (reg, alt_reg);
|
3355 |
|
|
}
|
3356 |
|
|
|
3357 |
|
|
/* Restore the return pointer. */
|
3358 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
|
3359 |
|
|
{
|
3360 |
|
|
if (current_frame_info.reg_save_b0 != 0)
|
3361 |
|
|
alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
|
3362 |
|
|
else
|
3363 |
|
|
{
|
3364 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3365 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3366 |
|
|
do_restore (gen_movdi_x, alt_reg, cfa_off);
|
3367 |
|
|
cfa_off -= 8;
|
3368 |
|
|
}
|
3369 |
|
|
reg = gen_rtx_REG (DImode, BR_REG (0));
|
3370 |
|
|
emit_move_insn (reg, alt_reg);
|
3371 |
|
|
}
|
3372 |
|
|
|
3373 |
|
|
/* We should now be at the base of the gr/br/fr spill area. */
|
3374 |
|
|
gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
|
3375 |
|
|
+ current_frame_info.spill_size));
|
3376 |
|
|
|
3377 |
|
|
/* The GP may be stored on the stack in the prologue, but it's
|
3378 |
|
|
never restored in the epilogue. Skip the stack slot. */
|
3379 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
|
3380 |
|
|
cfa_off -= 8;
|
3381 |
|
|
|
3382 |
|
|
/* Restore all general registers. */
|
3383 |
|
|
for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
|
3384 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3385 |
|
|
{
|
3386 |
|
|
reg = gen_rtx_REG (DImode, regno);
|
3387 |
|
|
do_restore (gen_gr_restore, reg, cfa_off);
|
3388 |
|
|
cfa_off -= 8;
|
3389 |
|
|
}
|
3390 |
|
|
|
3391 |
|
|
/* Restore the branch registers. */
|
3392 |
|
|
for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
|
3393 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3394 |
|
|
{
|
3395 |
|
|
alt_regno = next_scratch_gr_reg ();
|
3396 |
|
|
alt_reg = gen_rtx_REG (DImode, alt_regno);
|
3397 |
|
|
do_restore (gen_movdi_x, alt_reg, cfa_off);
|
3398 |
|
|
cfa_off -= 8;
|
3399 |
|
|
reg = gen_rtx_REG (DImode, regno);
|
3400 |
|
|
emit_move_insn (reg, alt_reg);
|
3401 |
|
|
}
|
3402 |
|
|
|
3403 |
|
|
/* Restore floating point registers. */
|
3404 |
|
|
for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
|
3405 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3406 |
|
|
{
|
3407 |
|
|
gcc_assert (!(cfa_off & 15));
|
3408 |
|
|
reg = gen_rtx_REG (XFmode, regno);
|
3409 |
|
|
do_restore (gen_fr_restore_x, reg, cfa_off);
|
3410 |
|
|
cfa_off -= 16;
|
3411 |
|
|
}
|
3412 |
|
|
|
3413 |
|
|
/* Restore ar.unat for real. */
|
3414 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
|
3415 |
|
|
{
|
3416 |
|
|
reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
|
3417 |
|
|
emit_move_insn (reg, ar_unat_save_reg);
|
3418 |
|
|
}
|
3419 |
|
|
|
3420 |
|
|
gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
|
3421 |
|
|
|
3422 |
|
|
finish_spill_pointers ();
|
3423 |
|
|
|
3424 |
|
|
if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
|
3425 |
|
|
{
|
3426 |
|
|
/* ??? At this point we must generate a magic insn that appears to
|
3427 |
|
|
modify the spill iterators, the stack pointer, and the frame
|
3428 |
|
|
pointer. This would allow the most scheduling freedom. For now,
|
3429 |
|
|
just hard stop. */
|
3430 |
|
|
emit_insn (gen_blockage ());
|
3431 |
|
|
}
|
3432 |
|
|
|
3433 |
|
|
if (cfun->machine->ia64_eh_epilogue_sp)
|
3434 |
|
|
emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
|
3435 |
|
|
else if (frame_pointer_needed)
|
3436 |
|
|
{
|
3437 |
|
|
insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
|
3438 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3439 |
|
|
}
|
3440 |
|
|
else if (current_frame_info.total_size)
|
3441 |
|
|
{
|
3442 |
|
|
rtx offset, frame_size_rtx;
|
3443 |
|
|
|
3444 |
|
|
frame_size_rtx = GEN_INT (current_frame_info.total_size);
|
3445 |
|
|
if (CONST_OK_FOR_I (current_frame_info.total_size))
|
3446 |
|
|
offset = frame_size_rtx;
|
3447 |
|
|
else
|
3448 |
|
|
{
|
3449 |
|
|
regno = next_scratch_gr_reg ();
|
3450 |
|
|
offset = gen_rtx_REG (DImode, regno);
|
3451 |
|
|
emit_move_insn (offset, frame_size_rtx);
|
3452 |
|
|
}
|
3453 |
|
|
|
3454 |
|
|
insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
|
3455 |
|
|
offset));
|
3456 |
|
|
|
3457 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3458 |
|
|
if (GET_CODE (offset) != CONST_INT)
|
3459 |
|
|
{
|
3460 |
|
|
REG_NOTES (insn)
|
3461 |
|
|
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
3462 |
|
|
gen_rtx_SET (VOIDmode,
|
3463 |
|
|
stack_pointer_rtx,
|
3464 |
|
|
gen_rtx_PLUS (DImode,
|
3465 |
|
|
stack_pointer_rtx,
|
3466 |
|
|
frame_size_rtx)),
|
3467 |
|
|
REG_NOTES (insn));
|
3468 |
|
|
}
|
3469 |
|
|
}
|
3470 |
|
|
|
3471 |
|
|
if (cfun->machine->ia64_eh_epilogue_bsp)
|
3472 |
|
|
emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
|
3473 |
|
|
|
3474 |
|
|
if (! sibcall_p)
|
3475 |
|
|
emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
|
3476 |
|
|
else
|
3477 |
|
|
{
|
3478 |
|
|
int fp = GR_REG (2);
|
3479 |
|
|
/* We need a throw away register here, r0 and r1 are reserved, so r2 is the
|
3480 |
|
|
first available call clobbered register. If there was a frame_pointer
|
3481 |
|
|
register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
|
3482 |
|
|
so we have to make sure we're using the string "r2" when emitting
|
3483 |
|
|
the register name for the assembler. */
|
3484 |
|
|
if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
|
3485 |
|
|
fp = HARD_FRAME_POINTER_REGNUM;
|
3486 |
|
|
|
3487 |
|
|
/* We must emit an alloc to force the input registers to become output
|
3488 |
|
|
registers. Otherwise, if the callee tries to pass its parameters
|
3489 |
|
|
through to another call without an intervening alloc, then these
|
3490 |
|
|
values get lost. */
|
3491 |
|
|
/* ??? We don't need to preserve all input registers. We only need to
|
3492 |
|
|
preserve those input registers used as arguments to the sibling call.
|
3493 |
|
|
It is unclear how to compute that number here. */
|
3494 |
|
|
if (current_frame_info.n_input_regs != 0)
|
3495 |
|
|
{
|
3496 |
|
|
rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
|
3497 |
|
|
insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
|
3498 |
|
|
const0_rtx, const0_rtx,
|
3499 |
|
|
n_inputs, const0_rtx));
|
3500 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
3501 |
|
|
}
|
3502 |
|
|
}
|
3503 |
|
|
}
|
3504 |
|
|
|
3505 |
|
|
/* Return 1 if br.ret can do all the work required to return from a
|
3506 |
|
|
function. */
|
3507 |
|
|
|
3508 |
|
|
int
|
3509 |
|
|
ia64_direct_return (void)
|
3510 |
|
|
{
|
3511 |
|
|
if (reload_completed && ! frame_pointer_needed)
|
3512 |
|
|
{
|
3513 |
|
|
ia64_compute_frame_size (get_frame_size ());
|
3514 |
|
|
|
3515 |
|
|
return (current_frame_info.total_size == 0
|
3516 |
|
|
&& current_frame_info.n_spilled == 0
|
3517 |
|
|
&& current_frame_info.reg_save_b0 == 0
|
3518 |
|
|
&& current_frame_info.reg_save_pr == 0
|
3519 |
|
|
&& current_frame_info.reg_save_ar_pfs == 0
|
3520 |
|
|
&& current_frame_info.reg_save_ar_unat == 0
|
3521 |
|
|
&& current_frame_info.reg_save_ar_lc == 0);
|
3522 |
|
|
}
|
3523 |
|
|
return 0;
|
3524 |
|
|
}
|
3525 |
|
|
|
3526 |
|
|
/* Return the magic cookie that we use to hold the return address
|
3527 |
|
|
during early compilation. */
|
3528 |
|
|
|
3529 |
|
|
rtx
|
3530 |
|
|
ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
|
3531 |
|
|
{
|
3532 |
|
|
if (count != 0)
|
3533 |
|
|
return NULL;
|
3534 |
|
|
return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
|
3535 |
|
|
}
|
3536 |
|
|
|
3537 |
|
|
/* Split this value after reload, now that we know where the return
|
3538 |
|
|
address is saved. */
|
3539 |
|
|
|
3540 |
|
|
void
|
3541 |
|
|
ia64_split_return_addr_rtx (rtx dest)
|
3542 |
|
|
{
|
3543 |
|
|
rtx src;
|
3544 |
|
|
|
3545 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
|
3546 |
|
|
{
|
3547 |
|
|
if (current_frame_info.reg_save_b0 != 0)
|
3548 |
|
|
src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
|
3549 |
|
|
else
|
3550 |
|
|
{
|
3551 |
|
|
HOST_WIDE_INT off;
|
3552 |
|
|
unsigned int regno;
|
3553 |
|
|
|
3554 |
|
|
/* Compute offset from CFA for BR0. */
|
3555 |
|
|
/* ??? Must be kept in sync with ia64_expand_prologue. */
|
3556 |
|
|
off = (current_frame_info.spill_cfa_off
|
3557 |
|
|
+ current_frame_info.spill_size);
|
3558 |
|
|
for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
|
3559 |
|
|
if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
|
3560 |
|
|
off -= 8;
|
3561 |
|
|
|
3562 |
|
|
/* Convert CFA offset to a register based offset. */
|
3563 |
|
|
if (frame_pointer_needed)
|
3564 |
|
|
src = hard_frame_pointer_rtx;
|
3565 |
|
|
else
|
3566 |
|
|
{
|
3567 |
|
|
src = stack_pointer_rtx;
|
3568 |
|
|
off += current_frame_info.total_size;
|
3569 |
|
|
}
|
3570 |
|
|
|
3571 |
|
|
/* Load address into scratch register. */
|
3572 |
|
|
if (CONST_OK_FOR_I (off))
|
3573 |
|
|
emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
|
3574 |
|
|
else
|
3575 |
|
|
{
|
3576 |
|
|
emit_move_insn (dest, GEN_INT (off));
|
3577 |
|
|
emit_insn (gen_adddi3 (dest, src, dest));
|
3578 |
|
|
}
|
3579 |
|
|
|
3580 |
|
|
src = gen_rtx_MEM (Pmode, dest);
|
3581 |
|
|
}
|
3582 |
|
|
}
|
3583 |
|
|
else
|
3584 |
|
|
src = gen_rtx_REG (DImode, BR_REG (0));
|
3585 |
|
|
|
3586 |
|
|
emit_move_insn (dest, src);
|
3587 |
|
|
}
|
3588 |
|
|
|
3589 |
|
|
int
|
3590 |
|
|
ia64_hard_regno_rename_ok (int from, int to)
|
3591 |
|
|
{
|
3592 |
|
|
/* Don't clobber any of the registers we reserved for the prologue. */
|
3593 |
|
|
if (to == current_frame_info.reg_fp
|
3594 |
|
|
|| to == current_frame_info.reg_save_b0
|
3595 |
|
|
|| to == current_frame_info.reg_save_pr
|
3596 |
|
|
|| to == current_frame_info.reg_save_ar_pfs
|
3597 |
|
|
|| to == current_frame_info.reg_save_ar_unat
|
3598 |
|
|
|| to == current_frame_info.reg_save_ar_lc)
|
3599 |
|
|
return 0;
|
3600 |
|
|
|
3601 |
|
|
if (from == current_frame_info.reg_fp
|
3602 |
|
|
|| from == current_frame_info.reg_save_b0
|
3603 |
|
|
|| from == current_frame_info.reg_save_pr
|
3604 |
|
|
|| from == current_frame_info.reg_save_ar_pfs
|
3605 |
|
|
|| from == current_frame_info.reg_save_ar_unat
|
3606 |
|
|
|| from == current_frame_info.reg_save_ar_lc)
|
3607 |
|
|
return 0;
|
3608 |
|
|
|
3609 |
|
|
/* Don't use output registers outside the register frame. */
|
3610 |
|
|
if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
|
3611 |
|
|
return 0;
|
3612 |
|
|
|
3613 |
|
|
/* Retain even/oddness on predicate register pairs. */
|
3614 |
|
|
if (PR_REGNO_P (from) && PR_REGNO_P (to))
|
3615 |
|
|
return (from & 1) == (to & 1);
|
3616 |
|
|
|
3617 |
|
|
return 1;
|
3618 |
|
|
}
|
3619 |
|
|
|
3620 |
|
|
/* Target hook for assembling integer objects. Handle word-sized
|
3621 |
|
|
aligned objects and detect the cases when @fptr is needed. */
|
3622 |
|
|
|
3623 |
|
|
static bool
|
3624 |
|
|
ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
|
3625 |
|
|
{
|
3626 |
|
|
if (size == POINTER_SIZE / BITS_PER_UNIT
|
3627 |
|
|
&& !(TARGET_NO_PIC || TARGET_AUTO_PIC)
|
3628 |
|
|
&& GET_CODE (x) == SYMBOL_REF
|
3629 |
|
|
&& SYMBOL_REF_FUNCTION_P (x))
|
3630 |
|
|
{
|
3631 |
|
|
static const char * const directive[2][2] = {
|
3632 |
|
|
/* 64-bit pointer */ /* 32-bit pointer */
|
3633 |
|
|
{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
|
3634 |
|
|
{ "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
|
3635 |
|
|
};
|
3636 |
|
|
fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
|
3637 |
|
|
output_addr_const (asm_out_file, x);
|
3638 |
|
|
fputs (")\n", asm_out_file);
|
3639 |
|
|
return true;
|
3640 |
|
|
}
|
3641 |
|
|
return default_assemble_integer (x, size, aligned_p);
|
3642 |
|
|
}
|
3643 |
|
|
|
3644 |
|
|
/* Emit the function prologue. */
|
3645 |
|
|
|
3646 |
|
|
static void
|
3647 |
|
|
ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
|
3648 |
|
|
{
|
3649 |
|
|
int mask, grsave, grsave_prev;
|
3650 |
|
|
|
3651 |
|
|
if (current_frame_info.need_regstk)
|
3652 |
|
|
fprintf (file, "\t.regstk %d, %d, %d, %d\n",
|
3653 |
|
|
current_frame_info.n_input_regs,
|
3654 |
|
|
current_frame_info.n_local_regs,
|
3655 |
|
|
current_frame_info.n_output_regs,
|
3656 |
|
|
current_frame_info.n_rotate_regs);
|
3657 |
|
|
|
3658 |
|
|
if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
|
3659 |
|
|
return;
|
3660 |
|
|
|
3661 |
|
|
/* Emit the .prologue directive. */
|
3662 |
|
|
|
3663 |
|
|
mask = 0;
|
3664 |
|
|
grsave = grsave_prev = 0;
|
3665 |
|
|
if (current_frame_info.reg_save_b0 != 0)
|
3666 |
|
|
{
|
3667 |
|
|
mask |= 8;
|
3668 |
|
|
grsave = grsave_prev = current_frame_info.reg_save_b0;
|
3669 |
|
|
}
|
3670 |
|
|
if (current_frame_info.reg_save_ar_pfs != 0
|
3671 |
|
|
&& (grsave_prev == 0
|
3672 |
|
|
|| current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
|
3673 |
|
|
{
|
3674 |
|
|
mask |= 4;
|
3675 |
|
|
if (grsave_prev == 0)
|
3676 |
|
|
grsave = current_frame_info.reg_save_ar_pfs;
|
3677 |
|
|
grsave_prev = current_frame_info.reg_save_ar_pfs;
|
3678 |
|
|
}
|
3679 |
|
|
if (current_frame_info.reg_fp != 0
|
3680 |
|
|
&& (grsave_prev == 0
|
3681 |
|
|
|| current_frame_info.reg_fp == grsave_prev + 1))
|
3682 |
|
|
{
|
3683 |
|
|
mask |= 2;
|
3684 |
|
|
if (grsave_prev == 0)
|
3685 |
|
|
grsave = HARD_FRAME_POINTER_REGNUM;
|
3686 |
|
|
grsave_prev = current_frame_info.reg_fp;
|
3687 |
|
|
}
|
3688 |
|
|
if (current_frame_info.reg_save_pr != 0
|
3689 |
|
|
&& (grsave_prev == 0
|
3690 |
|
|
|| current_frame_info.reg_save_pr == grsave_prev + 1))
|
3691 |
|
|
{
|
3692 |
|
|
mask |= 1;
|
3693 |
|
|
if (grsave_prev == 0)
|
3694 |
|
|
grsave = current_frame_info.reg_save_pr;
|
3695 |
|
|
}
|
3696 |
|
|
|
3697 |
|
|
if (mask && TARGET_GNU_AS)
|
3698 |
|
|
fprintf (file, "\t.prologue %d, %d\n", mask,
|
3699 |
|
|
ia64_dbx_register_number (grsave));
|
3700 |
|
|
else
|
3701 |
|
|
fputs ("\t.prologue\n", file);
|
3702 |
|
|
|
3703 |
|
|
/* Emit a .spill directive, if necessary, to relocate the base of
|
3704 |
|
|
the register spill area. */
|
3705 |
|
|
if (current_frame_info.spill_cfa_off != -16)
|
3706 |
|
|
fprintf (file, "\t.spill %ld\n",
|
3707 |
|
|
(long) (current_frame_info.spill_cfa_off
|
3708 |
|
|
+ current_frame_info.spill_size));
|
3709 |
|
|
}
|
3710 |
|
|
|
3711 |
|
|
/* Emit the .body directive at the scheduled end of the prologue. */
|
3712 |
|
|
|
3713 |
|
|
static void
|
3714 |
|
|
ia64_output_function_end_prologue (FILE *file)
|
3715 |
|
|
{
|
3716 |
|
|
if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
|
3717 |
|
|
return;
|
3718 |
|
|
|
3719 |
|
|
fputs ("\t.body\n", file);
|
3720 |
|
|
}
|
3721 |
|
|
|
3722 |
|
|
/* Emit the function epilogue. */
|
3723 |
|
|
|
3724 |
|
|
static void
|
3725 |
|
|
ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|
3726 |
|
|
HOST_WIDE_INT size ATTRIBUTE_UNUSED)
|
3727 |
|
|
{
|
3728 |
|
|
int i;
|
3729 |
|
|
|
3730 |
|
|
if (current_frame_info.reg_fp)
|
3731 |
|
|
{
|
3732 |
|
|
const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
|
3733 |
|
|
reg_names[HARD_FRAME_POINTER_REGNUM]
|
3734 |
|
|
= reg_names[current_frame_info.reg_fp];
|
3735 |
|
|
reg_names[current_frame_info.reg_fp] = tmp;
|
3736 |
|
|
}
|
3737 |
|
|
if (! TARGET_REG_NAMES)
|
3738 |
|
|
{
|
3739 |
|
|
for (i = 0; i < current_frame_info.n_input_regs; i++)
|
3740 |
|
|
reg_names[IN_REG (i)] = ia64_input_reg_names[i];
|
3741 |
|
|
for (i = 0; i < current_frame_info.n_local_regs; i++)
|
3742 |
|
|
reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
|
3743 |
|
|
for (i = 0; i < current_frame_info.n_output_regs; i++)
|
3744 |
|
|
reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
|
3745 |
|
|
}
|
3746 |
|
|
|
3747 |
|
|
current_frame_info.initialized = 0;
|
3748 |
|
|
}
|
3749 |
|
|
|
3750 |
|
|
int
|
3751 |
|
|
ia64_dbx_register_number (int regno)
|
3752 |
|
|
{
|
3753 |
|
|
/* In ia64_expand_prologue we quite literally renamed the frame pointer
|
3754 |
|
|
from its home at loc79 to something inside the register frame. We
|
3755 |
|
|
must perform the same renumbering here for the debug info. */
|
3756 |
|
|
if (current_frame_info.reg_fp)
|
3757 |
|
|
{
|
3758 |
|
|
if (regno == HARD_FRAME_POINTER_REGNUM)
|
3759 |
|
|
regno = current_frame_info.reg_fp;
|
3760 |
|
|
else if (regno == current_frame_info.reg_fp)
|
3761 |
|
|
regno = HARD_FRAME_POINTER_REGNUM;
|
3762 |
|
|
}
|
3763 |
|
|
|
3764 |
|
|
if (IN_REGNO_P (regno))
|
3765 |
|
|
return 32 + regno - IN_REG (0);
|
3766 |
|
|
else if (LOC_REGNO_P (regno))
|
3767 |
|
|
return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
|
3768 |
|
|
else if (OUT_REGNO_P (regno))
|
3769 |
|
|
return (32 + current_frame_info.n_input_regs
|
3770 |
|
|
+ current_frame_info.n_local_regs + regno - OUT_REG (0));
|
3771 |
|
|
else
|
3772 |
|
|
return regno;
|
3773 |
|
|
}
|
3774 |
|
|
|
3775 |
|
|
void
|
3776 |
|
|
ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
|
3777 |
|
|
{
|
3778 |
|
|
rtx addr_reg, eight = GEN_INT (8);
|
3779 |
|
|
|
3780 |
|
|
/* The Intel assembler requires that the global __ia64_trampoline symbol
|
3781 |
|
|
be declared explicitly */
|
3782 |
|
|
if (!TARGET_GNU_AS)
|
3783 |
|
|
{
|
3784 |
|
|
static bool declared_ia64_trampoline = false;
|
3785 |
|
|
|
3786 |
|
|
if (!declared_ia64_trampoline)
|
3787 |
|
|
{
|
3788 |
|
|
declared_ia64_trampoline = true;
|
3789 |
|
|
(*targetm.asm_out.globalize_label) (asm_out_file,
|
3790 |
|
|
"__ia64_trampoline");
|
3791 |
|
|
}
|
3792 |
|
|
}
|
3793 |
|
|
|
3794 |
|
|
/* Make sure addresses are Pmode even if we are in ILP32 mode. */
|
3795 |
|
|
addr = convert_memory_address (Pmode, addr);
|
3796 |
|
|
fnaddr = convert_memory_address (Pmode, fnaddr);
|
3797 |
|
|
static_chain = convert_memory_address (Pmode, static_chain);
|
3798 |
|
|
|
3799 |
|
|
/* Load up our iterator. */
|
3800 |
|
|
addr_reg = gen_reg_rtx (Pmode);
|
3801 |
|
|
emit_move_insn (addr_reg, addr);
|
3802 |
|
|
|
3803 |
|
|
/* The first two words are the fake descriptor:
|
3804 |
|
|
__ia64_trampoline, ADDR+16. */
|
3805 |
|
|
emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
|
3806 |
|
|
gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
|
3807 |
|
|
emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
|
3808 |
|
|
|
3809 |
|
|
emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
|
3810 |
|
|
copy_to_reg (plus_constant (addr, 16)));
|
3811 |
|
|
emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
|
3812 |
|
|
|
3813 |
|
|
/* The third word is the target descriptor. */
|
3814 |
|
|
emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
|
3815 |
|
|
emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
|
3816 |
|
|
|
3817 |
|
|
/* The fourth word is the static chain. */
|
3818 |
|
|
emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
|
3819 |
|
|
}
|
3820 |
|
|
|
3821 |
|
|
/* Do any needed setup for a variadic function. CUM has not been updated
|
3822 |
|
|
for the last named argument which has type TYPE and mode MODE.
|
3823 |
|
|
|
3824 |
|
|
We generate the actual spill instructions during prologue generation. */
|
3825 |
|
|
|
3826 |
|
|
static void
|
3827 |
|
|
ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
3828 |
|
|
tree type, int * pretend_size,
|
3829 |
|
|
int second_time ATTRIBUTE_UNUSED)
|
3830 |
|
|
{
|
3831 |
|
|
CUMULATIVE_ARGS next_cum = *cum;
|
3832 |
|
|
|
3833 |
|
|
/* Skip the current argument. */
|
3834 |
|
|
ia64_function_arg_advance (&next_cum, mode, type, 1);
|
3835 |
|
|
|
3836 |
|
|
if (next_cum.words < MAX_ARGUMENT_SLOTS)
|
3837 |
|
|
{
|
3838 |
|
|
int n = MAX_ARGUMENT_SLOTS - next_cum.words;
|
3839 |
|
|
*pretend_size = n * UNITS_PER_WORD;
|
3840 |
|
|
cfun->machine->n_varargs = n;
|
3841 |
|
|
}
|
3842 |
|
|
}
|
3843 |
|
|
|
3844 |
|
|
/* Check whether TYPE is a homogeneous floating point aggregate. If
|
3845 |
|
|
it is, return the mode of the floating point type that appears
|
3846 |
|
|
in all leafs. If it is not, return VOIDmode.
|
3847 |
|
|
|
3848 |
|
|
An aggregate is a homogeneous floating point aggregate is if all
|
3849 |
|
|
fields/elements in it have the same floating point type (e.g,
|
3850 |
|
|
SFmode). 128-bit quad-precision floats are excluded.
|
3851 |
|
|
|
3852 |
|
|
Variable sized aggregates should never arrive here, since we should
|
3853 |
|
|
have already decided to pass them by reference. Top-level zero-sized
|
3854 |
|
|
aggregates are excluded because our parallels crash the middle-end. */
|
3855 |
|
|
|
3856 |
|
|
static enum machine_mode
|
3857 |
|
|
hfa_element_mode (tree type, bool nested)
|
3858 |
|
|
{
|
3859 |
|
|
enum machine_mode element_mode = VOIDmode;
|
3860 |
|
|
enum machine_mode mode;
|
3861 |
|
|
enum tree_code code = TREE_CODE (type);
|
3862 |
|
|
int know_element_mode = 0;
|
3863 |
|
|
tree t;
|
3864 |
|
|
|
3865 |
|
|
if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
|
3866 |
|
|
return VOIDmode;
|
3867 |
|
|
|
3868 |
|
|
switch (code)
|
3869 |
|
|
{
|
3870 |
|
|
case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
|
3871 |
|
|
case BOOLEAN_TYPE: case POINTER_TYPE:
|
3872 |
|
|
case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
|
3873 |
|
|
case LANG_TYPE: case FUNCTION_TYPE:
|
3874 |
|
|
return VOIDmode;
|
3875 |
|
|
|
3876 |
|
|
/* Fortran complex types are supposed to be HFAs, so we need to handle
|
3877 |
|
|
gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
|
3878 |
|
|
types though. */
|
3879 |
|
|
case COMPLEX_TYPE:
|
3880 |
|
|
if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
|
3881 |
|
|
&& TYPE_MODE (type) != TCmode)
|
3882 |
|
|
return GET_MODE_INNER (TYPE_MODE (type));
|
3883 |
|
|
else
|
3884 |
|
|
return VOIDmode;
|
3885 |
|
|
|
3886 |
|
|
case REAL_TYPE:
|
3887 |
|
|
/* We want to return VOIDmode for raw REAL_TYPEs, but the actual
|
3888 |
|
|
mode if this is contained within an aggregate. */
|
3889 |
|
|
if (nested && TYPE_MODE (type) != TFmode)
|
3890 |
|
|
return TYPE_MODE (type);
|
3891 |
|
|
else
|
3892 |
|
|
return VOIDmode;
|
3893 |
|
|
|
3894 |
|
|
case ARRAY_TYPE:
|
3895 |
|
|
return hfa_element_mode (TREE_TYPE (type), 1);
|
3896 |
|
|
|
3897 |
|
|
case RECORD_TYPE:
|
3898 |
|
|
case UNION_TYPE:
|
3899 |
|
|
case QUAL_UNION_TYPE:
|
3900 |
|
|
for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
|
3901 |
|
|
{
|
3902 |
|
|
if (TREE_CODE (t) != FIELD_DECL)
|
3903 |
|
|
continue;
|
3904 |
|
|
|
3905 |
|
|
mode = hfa_element_mode (TREE_TYPE (t), 1);
|
3906 |
|
|
if (know_element_mode)
|
3907 |
|
|
{
|
3908 |
|
|
if (mode != element_mode)
|
3909 |
|
|
return VOIDmode;
|
3910 |
|
|
}
|
3911 |
|
|
else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
|
3912 |
|
|
return VOIDmode;
|
3913 |
|
|
else
|
3914 |
|
|
{
|
3915 |
|
|
know_element_mode = 1;
|
3916 |
|
|
element_mode = mode;
|
3917 |
|
|
}
|
3918 |
|
|
}
|
3919 |
|
|
return element_mode;
|
3920 |
|
|
|
3921 |
|
|
default:
|
3922 |
|
|
/* If we reach here, we probably have some front-end specific type
|
3923 |
|
|
that the backend doesn't know about. This can happen via the
|
3924 |
|
|
aggregate_value_p call in init_function_start. All we can do is
|
3925 |
|
|
ignore unknown tree types. */
|
3926 |
|
|
return VOIDmode;
|
3927 |
|
|
}
|
3928 |
|
|
|
3929 |
|
|
return VOIDmode;
|
3930 |
|
|
}
|
3931 |
|
|
|
3932 |
|
|
/* Return the number of words required to hold a quantity of TYPE and MODE
|
3933 |
|
|
when passed as an argument. */
|
3934 |
|
|
static int
|
3935 |
|
|
ia64_function_arg_words (tree type, enum machine_mode mode)
|
3936 |
|
|
{
|
3937 |
|
|
int words;
|
3938 |
|
|
|
3939 |
|
|
if (mode == BLKmode)
|
3940 |
|
|
words = int_size_in_bytes (type);
|
3941 |
|
|
else
|
3942 |
|
|
words = GET_MODE_SIZE (mode);
|
3943 |
|
|
|
3944 |
|
|
return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
|
3945 |
|
|
}
|
3946 |
|
|
|
3947 |
|
|
/* Return the number of registers that should be skipped so the current
|
3948 |
|
|
argument (described by TYPE and WORDS) will be properly aligned.
|
3949 |
|
|
|
3950 |
|
|
Integer and float arguments larger than 8 bytes start at the next
|
3951 |
|
|
even boundary. Aggregates larger than 8 bytes start at the next
|
3952 |
|
|
even boundary if the aggregate has 16 byte alignment. Note that
|
3953 |
|
|
in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
|
3954 |
|
|
but are still to be aligned in registers.
|
3955 |
|
|
|
3956 |
|
|
??? The ABI does not specify how to handle aggregates with
|
3957 |
|
|
alignment from 9 to 15 bytes, or greater than 16. We handle them
|
3958 |
|
|
all as if they had 16 byte alignment. Such aggregates can occur
|
3959 |
|
|
only if gcc extensions are used. */
|
3960 |
|
|
static int
|
3961 |
|
|
ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
|
3962 |
|
|
{
|
3963 |
|
|
if ((cum->words & 1) == 0)
|
3964 |
|
|
return 0;
|
3965 |
|
|
|
3966 |
|
|
if (type
|
3967 |
|
|
&& TREE_CODE (type) != INTEGER_TYPE
|
3968 |
|
|
&& TREE_CODE (type) != REAL_TYPE)
|
3969 |
|
|
return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
|
3970 |
|
|
else
|
3971 |
|
|
return words > 1;
|
3972 |
|
|
}
|
3973 |
|
|
|
3974 |
|
|
/* Return rtx for register where argument is passed, or zero if it is passed
|
3975 |
|
|
on the stack. */
|
3976 |
|
|
/* ??? 128-bit quad-precision floats are always passed in general
|
3977 |
|
|
registers. */
|
3978 |
|
|
|
3979 |
|
|
rtx
|
3980 |
|
|
ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
|
3981 |
|
|
int named, int incoming)
|
3982 |
|
|
{
|
3983 |
|
|
int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
|
3984 |
|
|
int words = ia64_function_arg_words (type, mode);
|
3985 |
|
|
int offset = ia64_function_arg_offset (cum, type, words);
|
3986 |
|
|
enum machine_mode hfa_mode = VOIDmode;
|
3987 |
|
|
|
3988 |
|
|
/* If all argument slots are used, then it must go on the stack. */
|
3989 |
|
|
if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
|
3990 |
|
|
return 0;
|
3991 |
|
|
|
3992 |
|
|
/* Check for and handle homogeneous FP aggregates. */
|
3993 |
|
|
if (type)
|
3994 |
|
|
hfa_mode = hfa_element_mode (type, 0);
|
3995 |
|
|
|
3996 |
|
|
/* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
|
3997 |
|
|
and unprototyped hfas are passed specially. */
|
3998 |
|
|
if (hfa_mode != VOIDmode && (! cum->prototype || named))
|
3999 |
|
|
{
|
4000 |
|
|
rtx loc[16];
|
4001 |
|
|
int i = 0;
|
4002 |
|
|
int fp_regs = cum->fp_regs;
|
4003 |
|
|
int int_regs = cum->words + offset;
|
4004 |
|
|
int hfa_size = GET_MODE_SIZE (hfa_mode);
|
4005 |
|
|
int byte_size;
|
4006 |
|
|
int args_byte_size;
|
4007 |
|
|
|
4008 |
|
|
/* If prototyped, pass it in FR regs then GR regs.
|
4009 |
|
|
If not prototyped, pass it in both FR and GR regs.
|
4010 |
|
|
|
4011 |
|
|
If this is an SFmode aggregate, then it is possible to run out of
|
4012 |
|
|
FR regs while GR regs are still left. In that case, we pass the
|
4013 |
|
|
remaining part in the GR regs. */
|
4014 |
|
|
|
4015 |
|
|
/* Fill the FP regs. We do this always. We stop if we reach the end
|
4016 |
|
|
of the argument, the last FP register, or the last argument slot. */
|
4017 |
|
|
|
4018 |
|
|
byte_size = ((mode == BLKmode)
|
4019 |
|
|
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
|
4020 |
|
|
args_byte_size = int_regs * UNITS_PER_WORD;
|
4021 |
|
|
offset = 0;
|
4022 |
|
|
for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
|
4023 |
|
|
&& args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
|
4024 |
|
|
{
|
4025 |
|
|
loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
|
4026 |
|
|
gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
|
4027 |
|
|
+ fp_regs)),
|
4028 |
|
|
GEN_INT (offset));
|
4029 |
|
|
offset += hfa_size;
|
4030 |
|
|
args_byte_size += hfa_size;
|
4031 |
|
|
fp_regs++;
|
4032 |
|
|
}
|
4033 |
|
|
|
4034 |
|
|
/* If no prototype, then the whole thing must go in GR regs. */
|
4035 |
|
|
if (! cum->prototype)
|
4036 |
|
|
offset = 0;
|
4037 |
|
|
/* If this is an SFmode aggregate, then we might have some left over
|
4038 |
|
|
that needs to go in GR regs. */
|
4039 |
|
|
else if (byte_size != offset)
|
4040 |
|
|
int_regs += offset / UNITS_PER_WORD;
|
4041 |
|
|
|
4042 |
|
|
/* Fill in the GR regs. We must use DImode here, not the hfa mode. */
|
4043 |
|
|
|
4044 |
|
|
for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
|
4045 |
|
|
{
|
4046 |
|
|
enum machine_mode gr_mode = DImode;
|
4047 |
|
|
unsigned int gr_size;
|
4048 |
|
|
|
4049 |
|
|
/* If we have an odd 4 byte hunk because we ran out of FR regs,
|
4050 |
|
|
then this goes in a GR reg left adjusted/little endian, right
|
4051 |
|
|
adjusted/big endian. */
|
4052 |
|
|
/* ??? Currently this is handled wrong, because 4-byte hunks are
|
4053 |
|
|
always right adjusted/little endian. */
|
4054 |
|
|
if (offset & 0x4)
|
4055 |
|
|
gr_mode = SImode;
|
4056 |
|
|
/* If we have an even 4 byte hunk because the aggregate is a
|
4057 |
|
|
multiple of 4 bytes in size, then this goes in a GR reg right
|
4058 |
|
|
adjusted/little endian. */
|
4059 |
|
|
else if (byte_size - offset == 4)
|
4060 |
|
|
gr_mode = SImode;
|
4061 |
|
|
|
4062 |
|
|
loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
|
4063 |
|
|
gen_rtx_REG (gr_mode, (basereg
|
4064 |
|
|
+ int_regs)),
|
4065 |
|
|
GEN_INT (offset));
|
4066 |
|
|
|
4067 |
|
|
gr_size = GET_MODE_SIZE (gr_mode);
|
4068 |
|
|
offset += gr_size;
|
4069 |
|
|
if (gr_size == UNITS_PER_WORD
|
4070 |
|
|
|| (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
|
4071 |
|
|
int_regs++;
|
4072 |
|
|
else if (gr_size > UNITS_PER_WORD)
|
4073 |
|
|
int_regs += gr_size / UNITS_PER_WORD;
|
4074 |
|
|
}
|
4075 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
|
4076 |
|
|
}
|
4077 |
|
|
|
4078 |
|
|
/* Integral and aggregates go in general registers. If we have run out of
|
4079 |
|
|
FR registers, then FP values must also go in general registers. This can
|
4080 |
|
|
happen when we have a SFmode HFA. */
|
4081 |
|
|
else if (mode == TFmode || mode == TCmode
|
4082 |
|
|
|| (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
|
4083 |
|
|
{
|
4084 |
|
|
int byte_size = ((mode == BLKmode)
|
4085 |
|
|
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
|
4086 |
|
|
if (BYTES_BIG_ENDIAN
|
4087 |
|
|
&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
|
4088 |
|
|
&& byte_size < UNITS_PER_WORD
|
4089 |
|
|
&& byte_size > 0)
|
4090 |
|
|
{
|
4091 |
|
|
rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
|
4092 |
|
|
gen_rtx_REG (DImode,
|
4093 |
|
|
(basereg + cum->words
|
4094 |
|
|
+ offset)),
|
4095 |
|
|
const0_rtx);
|
4096 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
|
4097 |
|
|
}
|
4098 |
|
|
else
|
4099 |
|
|
return gen_rtx_REG (mode, basereg + cum->words + offset);
|
4100 |
|
|
|
4101 |
|
|
}
|
4102 |
|
|
|
4103 |
|
|
/* If there is a prototype, then FP values go in a FR register when
|
4104 |
|
|
named, and in a GR register when unnamed. */
|
4105 |
|
|
else if (cum->prototype)
|
4106 |
|
|
{
|
4107 |
|
|
if (named)
|
4108 |
|
|
return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
|
4109 |
|
|
/* In big-endian mode, an anonymous SFmode value must be represented
|
4110 |
|
|
as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
|
4111 |
|
|
the value into the high half of the general register. */
|
4112 |
|
|
else if (BYTES_BIG_ENDIAN && mode == SFmode)
|
4113 |
|
|
return gen_rtx_PARALLEL (mode,
|
4114 |
|
|
gen_rtvec (1,
|
4115 |
|
|
gen_rtx_EXPR_LIST (VOIDmode,
|
4116 |
|
|
gen_rtx_REG (DImode, basereg + cum->words + offset),
|
4117 |
|
|
const0_rtx)));
|
4118 |
|
|
else
|
4119 |
|
|
return gen_rtx_REG (mode, basereg + cum->words + offset);
|
4120 |
|
|
}
|
4121 |
|
|
/* If there is no prototype, then FP values go in both FR and GR
|
4122 |
|
|
registers. */
|
4123 |
|
|
else
|
4124 |
|
|
{
|
4125 |
|
|
/* See comment above. */
|
4126 |
|
|
enum machine_mode inner_mode =
|
4127 |
|
|
(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
|
4128 |
|
|
|
4129 |
|
|
rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
|
4130 |
|
|
gen_rtx_REG (mode, (FR_ARG_FIRST
|
4131 |
|
|
+ cum->fp_regs)),
|
4132 |
|
|
const0_rtx);
|
4133 |
|
|
rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
|
4134 |
|
|
gen_rtx_REG (inner_mode,
|
4135 |
|
|
(basereg + cum->words
|
4136 |
|
|
+ offset)),
|
4137 |
|
|
const0_rtx);
|
4138 |
|
|
|
4139 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
|
4140 |
|
|
}
|
4141 |
|
|
}
|
4142 |
|
|
|
4143 |
|
|
/* Return number of bytes, at the beginning of the argument, that must be
|
4144 |
|
|
put in registers. 0 is the argument is entirely in registers or entirely
|
4145 |
|
|
in memory. */
|
4146 |
|
|
|
4147 |
|
|
static int
|
4148 |
|
|
ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
4149 |
|
|
tree type, bool named ATTRIBUTE_UNUSED)
|
4150 |
|
|
{
|
4151 |
|
|
int words = ia64_function_arg_words (type, mode);
|
4152 |
|
|
int offset = ia64_function_arg_offset (cum, type, words);
|
4153 |
|
|
|
4154 |
|
|
/* If all argument slots are used, then it must go on the stack. */
|
4155 |
|
|
if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
|
4156 |
|
|
return 0;
|
4157 |
|
|
|
4158 |
|
|
/* It doesn't matter whether the argument goes in FR or GR regs. If
|
4159 |
|
|
it fits within the 8 argument slots, then it goes entirely in
|
4160 |
|
|
registers. If it extends past the last argument slot, then the rest
|
4161 |
|
|
goes on the stack. */
|
4162 |
|
|
|
4163 |
|
|
if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
|
4164 |
|
|
return 0;
|
4165 |
|
|
|
4166 |
|
|
return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
|
4167 |
|
|
}
|
4168 |
|
|
|
4169 |
|
|
/* Update CUM to point after this argument. This is patterned after
|
4170 |
|
|
ia64_function_arg. */
|
4171 |
|
|
|
4172 |
|
|
void
|
4173 |
|
|
ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
4174 |
|
|
tree type, int named)
|
4175 |
|
|
{
|
4176 |
|
|
int words = ia64_function_arg_words (type, mode);
|
4177 |
|
|
int offset = ia64_function_arg_offset (cum, type, words);
|
4178 |
|
|
enum machine_mode hfa_mode = VOIDmode;
|
4179 |
|
|
|
4180 |
|
|
/* If all arg slots are already full, then there is nothing to do. */
|
4181 |
|
|
if (cum->words >= MAX_ARGUMENT_SLOTS)
|
4182 |
|
|
return;
|
4183 |
|
|
|
4184 |
|
|
cum->words += words + offset;
|
4185 |
|
|
|
4186 |
|
|
/* Check for and handle homogeneous FP aggregates. */
|
4187 |
|
|
if (type)
|
4188 |
|
|
hfa_mode = hfa_element_mode (type, 0);
|
4189 |
|
|
|
4190 |
|
|
/* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
|
4191 |
|
|
and unprototyped hfas are passed specially. */
|
4192 |
|
|
if (hfa_mode != VOIDmode && (! cum->prototype || named))
|
4193 |
|
|
{
|
4194 |
|
|
int fp_regs = cum->fp_regs;
|
4195 |
|
|
/* This is the original value of cum->words + offset. */
|
4196 |
|
|
int int_regs = cum->words - words;
|
4197 |
|
|
int hfa_size = GET_MODE_SIZE (hfa_mode);
|
4198 |
|
|
int byte_size;
|
4199 |
|
|
int args_byte_size;
|
4200 |
|
|
|
4201 |
|
|
/* If prototyped, pass it in FR regs then GR regs.
|
4202 |
|
|
If not prototyped, pass it in both FR and GR regs.
|
4203 |
|
|
|
4204 |
|
|
If this is an SFmode aggregate, then it is possible to run out of
|
4205 |
|
|
FR regs while GR regs are still left. In that case, we pass the
|
4206 |
|
|
remaining part in the GR regs. */
|
4207 |
|
|
|
4208 |
|
|
/* Fill the FP regs. We do this always. We stop if we reach the end
|
4209 |
|
|
of the argument, the last FP register, or the last argument slot. */
|
4210 |
|
|
|
4211 |
|
|
byte_size = ((mode == BLKmode)
|
4212 |
|
|
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
|
4213 |
|
|
args_byte_size = int_regs * UNITS_PER_WORD;
|
4214 |
|
|
offset = 0;
|
4215 |
|
|
for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
|
4216 |
|
|
&& args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
|
4217 |
|
|
{
|
4218 |
|
|
offset += hfa_size;
|
4219 |
|
|
args_byte_size += hfa_size;
|
4220 |
|
|
fp_regs++;
|
4221 |
|
|
}
|
4222 |
|
|
|
4223 |
|
|
cum->fp_regs = fp_regs;
|
4224 |
|
|
}
|
4225 |
|
|
|
4226 |
|
|
/* Integral and aggregates go in general registers. So do TFmode FP values.
|
4227 |
|
|
If we have run out of FR registers, then other FP values must also go in
|
4228 |
|
|
general registers. This can happen when we have a SFmode HFA. */
|
4229 |
|
|
else if (mode == TFmode || mode == TCmode
|
4230 |
|
|
|| (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
|
4231 |
|
|
cum->int_regs = cum->words;
|
4232 |
|
|
|
4233 |
|
|
/* If there is a prototype, then FP values go in a FR register when
|
4234 |
|
|
named, and in a GR register when unnamed. */
|
4235 |
|
|
else if (cum->prototype)
|
4236 |
|
|
{
|
4237 |
|
|
if (! named)
|
4238 |
|
|
cum->int_regs = cum->words;
|
4239 |
|
|
else
|
4240 |
|
|
/* ??? Complex types should not reach here. */
|
4241 |
|
|
cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
|
4242 |
|
|
}
|
4243 |
|
|
/* If there is no prototype, then FP values go in both FR and GR
|
4244 |
|
|
registers. */
|
4245 |
|
|
else
|
4246 |
|
|
{
|
4247 |
|
|
/* ??? Complex types should not reach here. */
|
4248 |
|
|
cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
|
4249 |
|
|
cum->int_regs = cum->words;
|
4250 |
|
|
}
|
4251 |
|
|
}
|
4252 |
|
|
|
4253 |
|
|
/* Arguments with alignment larger than 8 bytes start at the next even
|
4254 |
|
|
boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
|
4255 |
|
|
even though their normal alignment is 8 bytes. See ia64_function_arg. */
|
4256 |
|
|
|
4257 |
|
|
int
|
4258 |
|
|
ia64_function_arg_boundary (enum machine_mode mode, tree type)
|
4259 |
|
|
{
|
4260 |
|
|
|
4261 |
|
|
if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
|
4262 |
|
|
return PARM_BOUNDARY * 2;
|
4263 |
|
|
|
4264 |
|
|
if (type)
|
4265 |
|
|
{
|
4266 |
|
|
if (TYPE_ALIGN (type) > PARM_BOUNDARY)
|
4267 |
|
|
return PARM_BOUNDARY * 2;
|
4268 |
|
|
else
|
4269 |
|
|
return PARM_BOUNDARY;
|
4270 |
|
|
}
|
4271 |
|
|
|
4272 |
|
|
if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
|
4273 |
|
|
return PARM_BOUNDARY * 2;
|
4274 |
|
|
else
|
4275 |
|
|
return PARM_BOUNDARY;
|
4276 |
|
|
}
|
4277 |
|
|
|
4278 |
|
|
/* True if it is OK to do sibling call optimization for the specified
|
4279 |
|
|
call expression EXP. DECL will be the called function, or NULL if
|
4280 |
|
|
this is an indirect call. */
|
4281 |
|
|
static bool
|
4282 |
|
|
ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
|
4283 |
|
|
{
|
4284 |
|
|
/* We can't perform a sibcall if the current function has the syscall_linkage
|
4285 |
|
|
attribute. */
|
4286 |
|
|
if (lookup_attribute ("syscall_linkage",
|
4287 |
|
|
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
|
4288 |
|
|
return false;
|
4289 |
|
|
|
4290 |
|
|
/* We must always return with our current GP. This means we can
|
4291 |
|
|
only sibcall to functions defined in the current module. */
|
4292 |
|
|
return decl && (*targetm.binds_local_p) (decl);
|
4293 |
|
|
}
|
4294 |
|
|
|
4295 |
|
|
|
4296 |
|
|
/* Implement va_arg. */
|
4297 |
|
|
|
4298 |
|
|
static tree
|
4299 |
|
|
ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
|
4300 |
|
|
{
|
4301 |
|
|
/* Variable sized types are passed by reference. */
|
4302 |
|
|
if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
|
4303 |
|
|
{
|
4304 |
|
|
tree ptrtype = build_pointer_type (type);
|
4305 |
|
|
tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
|
4306 |
|
|
return build_va_arg_indirect_ref (addr);
|
4307 |
|
|
}
|
4308 |
|
|
|
4309 |
|
|
/* Aggregate arguments with alignment larger than 8 bytes start at
|
4310 |
|
|
the next even boundary. Integer and floating point arguments
|
4311 |
|
|
do so if they are larger than 8 bytes, whether or not they are
|
4312 |
|
|
also aligned larger than 8 bytes. */
|
4313 |
|
|
if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
|
4314 |
|
|
? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
|
4315 |
|
|
{
|
4316 |
|
|
tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
|
4317 |
|
|
build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
|
4318 |
|
|
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
|
4319 |
|
|
build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
|
4320 |
|
|
t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
|
4321 |
|
|
gimplify_and_add (t, pre_p);
|
4322 |
|
|
}
|
4323 |
|
|
|
4324 |
|
|
return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
|
4325 |
|
|
}
|
4326 |
|
|
|
4327 |
|
|
/* Return 1 if function return value returned in memory. Return 0 if it is
|
4328 |
|
|
in a register. */
|
4329 |
|
|
|
4330 |
|
|
static bool
|
4331 |
|
|
ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
|
4332 |
|
|
{
|
4333 |
|
|
enum machine_mode mode;
|
4334 |
|
|
enum machine_mode hfa_mode;
|
4335 |
|
|
HOST_WIDE_INT byte_size;
|
4336 |
|
|
|
4337 |
|
|
mode = TYPE_MODE (valtype);
|
4338 |
|
|
byte_size = GET_MODE_SIZE (mode);
|
4339 |
|
|
if (mode == BLKmode)
|
4340 |
|
|
{
|
4341 |
|
|
byte_size = int_size_in_bytes (valtype);
|
4342 |
|
|
if (byte_size < 0)
|
4343 |
|
|
return true;
|
4344 |
|
|
}
|
4345 |
|
|
|
4346 |
|
|
/* Hfa's with up to 8 elements are returned in the FP argument registers. */
|
4347 |
|
|
|
4348 |
|
|
hfa_mode = hfa_element_mode (valtype, 0);
|
4349 |
|
|
if (hfa_mode != VOIDmode)
|
4350 |
|
|
{
|
4351 |
|
|
int hfa_size = GET_MODE_SIZE (hfa_mode);
|
4352 |
|
|
|
4353 |
|
|
if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
|
4354 |
|
|
return true;
|
4355 |
|
|
else
|
4356 |
|
|
return false;
|
4357 |
|
|
}
|
4358 |
|
|
else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
|
4359 |
|
|
return true;
|
4360 |
|
|
else
|
4361 |
|
|
return false;
|
4362 |
|
|
}
|
4363 |
|
|
|
4364 |
|
|
/* Return rtx for register that holds the function return value. */
|
4365 |
|
|
|
4366 |
|
|
rtx
|
4367 |
|
|
ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
|
4368 |
|
|
{
|
4369 |
|
|
enum machine_mode mode;
|
4370 |
|
|
enum machine_mode hfa_mode;
|
4371 |
|
|
|
4372 |
|
|
mode = TYPE_MODE (valtype);
|
4373 |
|
|
hfa_mode = hfa_element_mode (valtype, 0);
|
4374 |
|
|
|
4375 |
|
|
if (hfa_mode != VOIDmode)
|
4376 |
|
|
{
|
4377 |
|
|
rtx loc[8];
|
4378 |
|
|
int i;
|
4379 |
|
|
int hfa_size;
|
4380 |
|
|
int byte_size;
|
4381 |
|
|
int offset;
|
4382 |
|
|
|
4383 |
|
|
hfa_size = GET_MODE_SIZE (hfa_mode);
|
4384 |
|
|
byte_size = ((mode == BLKmode)
|
4385 |
|
|
? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
|
4386 |
|
|
offset = 0;
|
4387 |
|
|
for (i = 0; offset < byte_size; i++)
|
4388 |
|
|
{
|
4389 |
|
|
loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
|
4390 |
|
|
gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
|
4391 |
|
|
GEN_INT (offset));
|
4392 |
|
|
offset += hfa_size;
|
4393 |
|
|
}
|
4394 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
|
4395 |
|
|
}
|
4396 |
|
|
else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
|
4397 |
|
|
return gen_rtx_REG (mode, FR_ARG_FIRST);
|
4398 |
|
|
else
|
4399 |
|
|
{
|
4400 |
|
|
bool need_parallel = false;
|
4401 |
|
|
|
4402 |
|
|
/* In big-endian mode, we need to manage the layout of aggregates
|
4403 |
|
|
in the registers so that we get the bits properly aligned in
|
4404 |
|
|
the highpart of the registers. */
|
4405 |
|
|
if (BYTES_BIG_ENDIAN
|
4406 |
|
|
&& (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
|
4407 |
|
|
need_parallel = true;
|
4408 |
|
|
|
4409 |
|
|
/* Something like struct S { long double x; char a[0] } is not an
|
4410 |
|
|
HFA structure, and therefore doesn't go in fp registers. But
|
4411 |
|
|
the middle-end will give it XFmode anyway, and XFmode values
|
4412 |
|
|
don't normally fit in integer registers. So we need to smuggle
|
4413 |
|
|
the value inside a parallel. */
|
4414 |
|
|
else if (mode == XFmode || mode == XCmode || mode == RFmode)
|
4415 |
|
|
need_parallel = true;
|
4416 |
|
|
|
4417 |
|
|
if (need_parallel)
|
4418 |
|
|
{
|
4419 |
|
|
rtx loc[8];
|
4420 |
|
|
int offset;
|
4421 |
|
|
int bytesize;
|
4422 |
|
|
int i;
|
4423 |
|
|
|
4424 |
|
|
offset = 0;
|
4425 |
|
|
bytesize = int_size_in_bytes (valtype);
|
4426 |
|
|
/* An empty PARALLEL is invalid here, but the return value
|
4427 |
|
|
doesn't matter for empty structs. */
|
4428 |
|
|
if (bytesize == 0)
|
4429 |
|
|
return gen_rtx_REG (mode, GR_RET_FIRST);
|
4430 |
|
|
for (i = 0; offset < bytesize; i++)
|
4431 |
|
|
{
|
4432 |
|
|
loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
|
4433 |
|
|
gen_rtx_REG (DImode,
|
4434 |
|
|
GR_RET_FIRST + i),
|
4435 |
|
|
GEN_INT (offset));
|
4436 |
|
|
offset += UNITS_PER_WORD;
|
4437 |
|
|
}
|
4438 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
|
4439 |
|
|
}
|
4440 |
|
|
|
4441 |
|
|
return gen_rtx_REG (mode, GR_RET_FIRST);
|
4442 |
|
|
}
|
4443 |
|
|
}
|
4444 |
|
|
|
4445 |
|
|
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
|
4446 |
|
|
We need to emit DTP-relative relocations. */
|
4447 |
|
|
|
4448 |
|
|
static void
|
4449 |
|
|
ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
|
4450 |
|
|
{
|
4451 |
|
|
gcc_assert (size == 4 || size == 8);
|
4452 |
|
|
if (size == 4)
|
4453 |
|
|
fputs ("\tdata4.ua\t@dtprel(", file);
|
4454 |
|
|
else
|
4455 |
|
|
fputs ("\tdata8.ua\t@dtprel(", file);
|
4456 |
|
|
output_addr_const (file, x);
|
4457 |
|
|
fputs (")", file);
|
4458 |
|
|
}
|
4459 |
|
|
|
4460 |
|
|
/* Print a memory address as an operand to reference that memory location. */
|
4461 |
|
|
|
4462 |
|
|
/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
|
4463 |
|
|
also call this from ia64_print_operand for memory addresses. */
|
4464 |
|
|
|
4465 |
|
|
void
|
4466 |
|
|
ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
|
4467 |
|
|
rtx address ATTRIBUTE_UNUSED)
|
4468 |
|
|
{
|
4469 |
|
|
}
|
4470 |
|
|
|
4471 |
|
|
/* Print an operand to an assembler instruction.
|
4472 |
|
|
C Swap and print a comparison operator.
|
4473 |
|
|
D Print an FP comparison operator.
|
4474 |
|
|
E Print 32 - constant, for SImode shifts as extract.
|
4475 |
|
|
e Print 64 - constant, for DImode rotates.
|
4476 |
|
|
F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
|
4477 |
|
|
a floating point register emitted normally.
|
4478 |
|
|
I Invert a predicate register by adding 1.
|
4479 |
|
|
J Select the proper predicate register for a condition.
|
4480 |
|
|
j Select the inverse predicate register for a condition.
|
4481 |
|
|
O Append .acq for volatile load.
|
4482 |
|
|
P Postincrement of a MEM.
|
4483 |
|
|
Q Append .rel for volatile store.
|
4484 |
|
|
S Shift amount for shladd instruction.
|
4485 |
|
|
T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
|
4486 |
|
|
for Intel assembler.
|
4487 |
|
|
U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
|
4488 |
|
|
for Intel assembler.
|
4489 |
|
|
X A pair of floating point registers.
|
4490 |
|
|
r Print register name, or constant 0 as r0. HP compatibility for
|
4491 |
|
|
Linux kernel.
|
4492 |
|
|
v Print vector constant value as an 8-byte integer value. */
|
4493 |
|
|
|
4494 |
|
|
void
|
4495 |
|
|
ia64_print_operand (FILE * file, rtx x, int code)
|
4496 |
|
|
{
|
4497 |
|
|
const char *str;
|
4498 |
|
|
|
4499 |
|
|
switch (code)
|
4500 |
|
|
{
|
4501 |
|
|
case 0:
|
4502 |
|
|
/* Handled below. */
|
4503 |
|
|
break;
|
4504 |
|
|
|
4505 |
|
|
case 'C':
|
4506 |
|
|
{
|
4507 |
|
|
enum rtx_code c = swap_condition (GET_CODE (x));
|
4508 |
|
|
fputs (GET_RTX_NAME (c), file);
|
4509 |
|
|
return;
|
4510 |
|
|
}
|
4511 |
|
|
|
4512 |
|
|
case 'D':
|
4513 |
|
|
switch (GET_CODE (x))
|
4514 |
|
|
{
|
4515 |
|
|
case NE:
|
4516 |
|
|
str = "neq";
|
4517 |
|
|
break;
|
4518 |
|
|
case UNORDERED:
|
4519 |
|
|
str = "unord";
|
4520 |
|
|
break;
|
4521 |
|
|
case ORDERED:
|
4522 |
|
|
str = "ord";
|
4523 |
|
|
break;
|
4524 |
|
|
default:
|
4525 |
|
|
str = GET_RTX_NAME (GET_CODE (x));
|
4526 |
|
|
break;
|
4527 |
|
|
}
|
4528 |
|
|
fputs (str, file);
|
4529 |
|
|
return;
|
4530 |
|
|
|
4531 |
|
|
case 'E':
|
4532 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
|
4533 |
|
|
return;
|
4534 |
|
|
|
4535 |
|
|
case 'e':
|
4536 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
|
4537 |
|
|
return;
|
4538 |
|
|
|
4539 |
|
|
case 'F':
|
4540 |
|
|
if (x == CONST0_RTX (GET_MODE (x)))
|
4541 |
|
|
str = reg_names [FR_REG (0)];
|
4542 |
|
|
else if (x == CONST1_RTX (GET_MODE (x)))
|
4543 |
|
|
str = reg_names [FR_REG (1)];
|
4544 |
|
|
else
|
4545 |
|
|
{
|
4546 |
|
|
gcc_assert (GET_CODE (x) == REG);
|
4547 |
|
|
str = reg_names [REGNO (x)];
|
4548 |
|
|
}
|
4549 |
|
|
fputs (str, file);
|
4550 |
|
|
return;
|
4551 |
|
|
|
4552 |
|
|
case 'I':
|
4553 |
|
|
fputs (reg_names [REGNO (x) + 1], file);
|
4554 |
|
|
return;
|
4555 |
|
|
|
4556 |
|
|
case 'J':
|
4557 |
|
|
case 'j':
|
4558 |
|
|
{
|
4559 |
|
|
unsigned int regno = REGNO (XEXP (x, 0));
|
4560 |
|
|
if (GET_CODE (x) == EQ)
|
4561 |
|
|
regno += 1;
|
4562 |
|
|
if (code == 'j')
|
4563 |
|
|
regno ^= 1;
|
4564 |
|
|
fputs (reg_names [regno], file);
|
4565 |
|
|
}
|
4566 |
|
|
return;
|
4567 |
|
|
|
4568 |
|
|
case 'O':
|
4569 |
|
|
if (MEM_VOLATILE_P (x))
|
4570 |
|
|
fputs(".acq", file);
|
4571 |
|
|
return;
|
4572 |
|
|
|
4573 |
|
|
case 'P':
|
4574 |
|
|
{
|
4575 |
|
|
HOST_WIDE_INT value;
|
4576 |
|
|
|
4577 |
|
|
switch (GET_CODE (XEXP (x, 0)))
|
4578 |
|
|
{
|
4579 |
|
|
default:
|
4580 |
|
|
return;
|
4581 |
|
|
|
4582 |
|
|
case POST_MODIFY:
|
4583 |
|
|
x = XEXP (XEXP (XEXP (x, 0), 1), 1);
|
4584 |
|
|
if (GET_CODE (x) == CONST_INT)
|
4585 |
|
|
value = INTVAL (x);
|
4586 |
|
|
else
|
4587 |
|
|
{
|
4588 |
|
|
gcc_assert (GET_CODE (x) == REG);
|
4589 |
|
|
fprintf (file, ", %s", reg_names[REGNO (x)]);
|
4590 |
|
|
return;
|
4591 |
|
|
}
|
4592 |
|
|
break;
|
4593 |
|
|
|
4594 |
|
|
case POST_INC:
|
4595 |
|
|
value = GET_MODE_SIZE (GET_MODE (x));
|
4596 |
|
|
break;
|
4597 |
|
|
|
4598 |
|
|
case POST_DEC:
|
4599 |
|
|
value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
|
4600 |
|
|
break;
|
4601 |
|
|
}
|
4602 |
|
|
|
4603 |
|
|
fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
|
4604 |
|
|
return;
|
4605 |
|
|
}
|
4606 |
|
|
|
4607 |
|
|
case 'Q':
|
4608 |
|
|
if (MEM_VOLATILE_P (x))
|
4609 |
|
|
fputs(".rel", file);
|
4610 |
|
|
return;
|
4611 |
|
|
|
4612 |
|
|
case 'S':
|
4613 |
|
|
fprintf (file, "%d", exact_log2 (INTVAL (x)));
|
4614 |
|
|
return;
|
4615 |
|
|
|
4616 |
|
|
case 'T':
|
4617 |
|
|
if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
|
4618 |
|
|
{
|
4619 |
|
|
fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
|
4620 |
|
|
return;
|
4621 |
|
|
}
|
4622 |
|
|
break;
|
4623 |
|
|
|
4624 |
|
|
case 'U':
|
4625 |
|
|
if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
|
4626 |
|
|
{
|
4627 |
|
|
const char *prefix = "0x";
|
4628 |
|
|
if (INTVAL (x) & 0x80000000)
|
4629 |
|
|
{
|
4630 |
|
|
fprintf (file, "0xffffffff");
|
4631 |
|
|
prefix = "";
|
4632 |
|
|
}
|
4633 |
|
|
fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
|
4634 |
|
|
return;
|
4635 |
|
|
}
|
4636 |
|
|
break;
|
4637 |
|
|
|
4638 |
|
|
case 'X':
|
4639 |
|
|
{
|
4640 |
|
|
unsigned int regno = REGNO (x);
|
4641 |
|
|
fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
|
4642 |
|
|
}
|
4643 |
|
|
return;
|
4644 |
|
|
|
4645 |
|
|
case 'r':
|
4646 |
|
|
/* If this operand is the constant zero, write it as register zero.
|
4647 |
|
|
Any register, zero, or CONST_INT value is OK here. */
|
4648 |
|
|
if (GET_CODE (x) == REG)
|
4649 |
|
|
fputs (reg_names[REGNO (x)], file);
|
4650 |
|
|
else if (x == CONST0_RTX (GET_MODE (x)))
|
4651 |
|
|
fputs ("r0", file);
|
4652 |
|
|
else if (GET_CODE (x) == CONST_INT)
|
4653 |
|
|
output_addr_const (file, x);
|
4654 |
|
|
else
|
4655 |
|
|
output_operand_lossage ("invalid %%r value");
|
4656 |
|
|
return;
|
4657 |
|
|
|
4658 |
|
|
case 'v':
|
4659 |
|
|
gcc_assert (GET_CODE (x) == CONST_VECTOR);
|
4660 |
|
|
x = simplify_subreg (DImode, x, GET_MODE (x), 0);
|
4661 |
|
|
break;
|
4662 |
|
|
|
4663 |
|
|
case '+':
|
4664 |
|
|
{
|
4665 |
|
|
const char *which;
|
4666 |
|
|
|
4667 |
|
|
/* For conditional branches, returns or calls, substitute
|
4668 |
|
|
sptk, dptk, dpnt, or spnt for %s. */
|
4669 |
|
|
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
|
4670 |
|
|
if (x)
|
4671 |
|
|
{
|
4672 |
|
|
int pred_val = INTVAL (XEXP (x, 0));
|
4673 |
|
|
|
4674 |
|
|
/* Guess top and bottom 10% statically predicted. */
|
4675 |
|
|
if (pred_val < REG_BR_PROB_BASE / 50
|
4676 |
|
|
&& br_prob_note_reliable_p (x))
|
4677 |
|
|
which = ".spnt";
|
4678 |
|
|
else if (pred_val < REG_BR_PROB_BASE / 2)
|
4679 |
|
|
which = ".dpnt";
|
4680 |
|
|
else if (pred_val < REG_BR_PROB_BASE / 100 * 98
|
4681 |
|
|
|| !br_prob_note_reliable_p (x))
|
4682 |
|
|
which = ".dptk";
|
4683 |
|
|
else
|
4684 |
|
|
which = ".sptk";
|
4685 |
|
|
}
|
4686 |
|
|
else if (GET_CODE (current_output_insn) == CALL_INSN)
|
4687 |
|
|
which = ".sptk";
|
4688 |
|
|
else
|
4689 |
|
|
which = ".dptk";
|
4690 |
|
|
|
4691 |
|
|
fputs (which, file);
|
4692 |
|
|
return;
|
4693 |
|
|
}
|
4694 |
|
|
|
4695 |
|
|
case ',':
|
4696 |
|
|
x = current_insn_predicate;
|
4697 |
|
|
if (x)
|
4698 |
|
|
{
|
4699 |
|
|
unsigned int regno = REGNO (XEXP (x, 0));
|
4700 |
|
|
if (GET_CODE (x) == EQ)
|
4701 |
|
|
regno += 1;
|
4702 |
|
|
fprintf (file, "(%s) ", reg_names [regno]);
|
4703 |
|
|
}
|
4704 |
|
|
return;
|
4705 |
|
|
|
4706 |
|
|
default:
|
4707 |
|
|
output_operand_lossage ("ia64_print_operand: unknown code");
|
4708 |
|
|
return;
|
4709 |
|
|
}
|
4710 |
|
|
|
4711 |
|
|
switch (GET_CODE (x))
|
4712 |
|
|
{
|
4713 |
|
|
/* This happens for the spill/restore instructions. */
|
4714 |
|
|
case POST_INC:
|
4715 |
|
|
case POST_DEC:
|
4716 |
|
|
case POST_MODIFY:
|
4717 |
|
|
x = XEXP (x, 0);
|
4718 |
|
|
/* ... fall through ... */
|
4719 |
|
|
|
4720 |
|
|
case REG:
|
4721 |
|
|
fputs (reg_names [REGNO (x)], file);
|
4722 |
|
|
break;
|
4723 |
|
|
|
4724 |
|
|
case MEM:
|
4725 |
|
|
{
|
4726 |
|
|
rtx addr = XEXP (x, 0);
|
4727 |
|
|
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
|
4728 |
|
|
addr = XEXP (addr, 0);
|
4729 |
|
|
fprintf (file, "[%s]", reg_names [REGNO (addr)]);
|
4730 |
|
|
break;
|
4731 |
|
|
}
|
4732 |
|
|
|
4733 |
|
|
default:
|
4734 |
|
|
output_addr_const (file, x);
|
4735 |
|
|
break;
|
4736 |
|
|
}
|
4737 |
|
|
|
4738 |
|
|
return;
|
4739 |
|
|
}
|
4740 |
|
|
|
4741 |
|
|
/* Compute a (partial) cost for rtx X. Return true if the complete
|
4742 |
|
|
cost has been computed, and false if subexpressions should be
|
4743 |
|
|
scanned. In either case, *TOTAL contains the cost result. */
|
4744 |
|
|
/* ??? This is incomplete. */
|
4745 |
|
|
|
4746 |
|
|
static bool
|
4747 |
|
|
ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
|
4748 |
|
|
{
|
4749 |
|
|
switch (code)
|
4750 |
|
|
{
|
4751 |
|
|
case CONST_INT:
|
4752 |
|
|
switch (outer_code)
|
4753 |
|
|
{
|
4754 |
|
|
case SET:
|
4755 |
|
|
*total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
|
4756 |
|
|
return true;
|
4757 |
|
|
case PLUS:
|
4758 |
|
|
if (CONST_OK_FOR_I (INTVAL (x)))
|
4759 |
|
|
*total = 0;
|
4760 |
|
|
else if (CONST_OK_FOR_J (INTVAL (x)))
|
4761 |
|
|
*total = 1;
|
4762 |
|
|
else
|
4763 |
|
|
*total = COSTS_N_INSNS (1);
|
4764 |
|
|
return true;
|
4765 |
|
|
default:
|
4766 |
|
|
if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
|
4767 |
|
|
*total = 0;
|
4768 |
|
|
else
|
4769 |
|
|
*total = COSTS_N_INSNS (1);
|
4770 |
|
|
return true;
|
4771 |
|
|
}
|
4772 |
|
|
|
4773 |
|
|
case CONST_DOUBLE:
|
4774 |
|
|
*total = COSTS_N_INSNS (1);
|
4775 |
|
|
return true;
|
4776 |
|
|
|
4777 |
|
|
case CONST:
|
4778 |
|
|
case SYMBOL_REF:
|
4779 |
|
|
case LABEL_REF:
|
4780 |
|
|
*total = COSTS_N_INSNS (3);
|
4781 |
|
|
return true;
|
4782 |
|
|
|
4783 |
|
|
case MULT:
|
4784 |
|
|
/* For multiplies wider than HImode, we have to go to the FPU,
|
4785 |
|
|
which normally involves copies. Plus there's the latency
|
4786 |
|
|
of the multiply itself, and the latency of the instructions to
|
4787 |
|
|
transfer integer regs to FP regs. */
|
4788 |
|
|
/* ??? Check for FP mode. */
|
4789 |
|
|
if (GET_MODE_SIZE (GET_MODE (x)) > 2)
|
4790 |
|
|
*total = COSTS_N_INSNS (10);
|
4791 |
|
|
else
|
4792 |
|
|
*total = COSTS_N_INSNS (2);
|
4793 |
|
|
return true;
|
4794 |
|
|
|
4795 |
|
|
case PLUS:
|
4796 |
|
|
case MINUS:
|
4797 |
|
|
case ASHIFT:
|
4798 |
|
|
case ASHIFTRT:
|
4799 |
|
|
case LSHIFTRT:
|
4800 |
|
|
*total = COSTS_N_INSNS (1);
|
4801 |
|
|
return true;
|
4802 |
|
|
|
4803 |
|
|
case DIV:
|
4804 |
|
|
case UDIV:
|
4805 |
|
|
case MOD:
|
4806 |
|
|
case UMOD:
|
4807 |
|
|
/* We make divide expensive, so that divide-by-constant will be
|
4808 |
|
|
optimized to a multiply. */
|
4809 |
|
|
*total = COSTS_N_INSNS (60);
|
4810 |
|
|
return true;
|
4811 |
|
|
|
4812 |
|
|
default:
|
4813 |
|
|
return false;
|
4814 |
|
|
}
|
4815 |
|
|
}
|
4816 |
|
|
|
4817 |
|
|
/* Calculate the cost of moving data from a register in class FROM to
|
4818 |
|
|
one in class TO, using MODE. */
|
4819 |
|
|
|
4820 |
|
|
int
|
4821 |
|
|
ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
|
4822 |
|
|
enum reg_class to)
|
4823 |
|
|
{
|
4824 |
|
|
/* ADDL_REGS is the same as GR_REGS for movement purposes. */
|
4825 |
|
|
if (to == ADDL_REGS)
|
4826 |
|
|
to = GR_REGS;
|
4827 |
|
|
if (from == ADDL_REGS)
|
4828 |
|
|
from = GR_REGS;
|
4829 |
|
|
|
4830 |
|
|
/* All costs are symmetric, so reduce cases by putting the
|
4831 |
|
|
lower number class as the destination. */
|
4832 |
|
|
if (from < to)
|
4833 |
|
|
{
|
4834 |
|
|
enum reg_class tmp = to;
|
4835 |
|
|
to = from, from = tmp;
|
4836 |
|
|
}
|
4837 |
|
|
|
4838 |
|
|
/* Moving from FR<->GR in XFmode must be more expensive than 2,
|
4839 |
|
|
so that we get secondary memory reloads. Between FR_REGS,
|
4840 |
|
|
we have to make this at least as expensive as MEMORY_MOVE_COST
|
4841 |
|
|
to avoid spectacularly poor register class preferencing. */
|
4842 |
|
|
if (mode == XFmode || mode == RFmode)
|
4843 |
|
|
{
|
4844 |
|
|
if (to != GR_REGS || from != GR_REGS)
|
4845 |
|
|
return MEMORY_MOVE_COST (mode, to, 0);
|
4846 |
|
|
else
|
4847 |
|
|
return 3;
|
4848 |
|
|
}
|
4849 |
|
|
|
4850 |
|
|
switch (to)
|
4851 |
|
|
{
|
4852 |
|
|
case PR_REGS:
|
4853 |
|
|
/* Moving between PR registers takes two insns. */
|
4854 |
|
|
if (from == PR_REGS)
|
4855 |
|
|
return 3;
|
4856 |
|
|
/* Moving between PR and anything but GR is impossible. */
|
4857 |
|
|
if (from != GR_REGS)
|
4858 |
|
|
return MEMORY_MOVE_COST (mode, to, 0);
|
4859 |
|
|
break;
|
4860 |
|
|
|
4861 |
|
|
case BR_REGS:
|
4862 |
|
|
/* Moving between BR and anything but GR is impossible. */
|
4863 |
|
|
if (from != GR_REGS && from != GR_AND_BR_REGS)
|
4864 |
|
|
return MEMORY_MOVE_COST (mode, to, 0);
|
4865 |
|
|
break;
|
4866 |
|
|
|
4867 |
|
|
case AR_I_REGS:
|
4868 |
|
|
case AR_M_REGS:
|
4869 |
|
|
/* Moving between AR and anything but GR is impossible. */
|
4870 |
|
|
if (from != GR_REGS)
|
4871 |
|
|
return MEMORY_MOVE_COST (mode, to, 0);
|
4872 |
|
|
break;
|
4873 |
|
|
|
4874 |
|
|
case GR_REGS:
|
4875 |
|
|
case FR_REGS:
|
4876 |
|
|
case FP_REGS:
|
4877 |
|
|
case GR_AND_FR_REGS:
|
4878 |
|
|
case GR_AND_BR_REGS:
|
4879 |
|
|
case ALL_REGS:
|
4880 |
|
|
break;
|
4881 |
|
|
|
4882 |
|
|
default:
|
4883 |
|
|
gcc_unreachable ();
|
4884 |
|
|
}
|
4885 |
|
|
|
4886 |
|
|
return 2;
|
4887 |
|
|
}
|
4888 |
|
|
|
4889 |
|
|
/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
|
4890 |
|
|
to use when copying X into that class. */
|
4891 |
|
|
|
4892 |
|
|
enum reg_class
|
4893 |
|
|
ia64_preferred_reload_class (rtx x, enum reg_class class)
|
4894 |
|
|
{
|
4895 |
|
|
switch (class)
|
4896 |
|
|
{
|
4897 |
|
|
case FR_REGS:
|
4898 |
|
|
case FP_REGS:
|
4899 |
|
|
/* Don't allow volatile mem reloads into floating point registers.
|
4900 |
|
|
This is defined to force reload to choose the r/m case instead
|
4901 |
|
|
of the f/f case when reloading (set (reg fX) (mem/v)). */
|
4902 |
|
|
if (MEM_P (x) && MEM_VOLATILE_P (x))
|
4903 |
|
|
return NO_REGS;
|
4904 |
|
|
|
4905 |
|
|
/* Force all unrecognized constants into the constant pool. */
|
4906 |
|
|
if (CONSTANT_P (x))
|
4907 |
|
|
return NO_REGS;
|
4908 |
|
|
break;
|
4909 |
|
|
|
4910 |
|
|
case AR_M_REGS:
|
4911 |
|
|
case AR_I_REGS:
|
4912 |
|
|
if (!OBJECT_P (x))
|
4913 |
|
|
return NO_REGS;
|
4914 |
|
|
break;
|
4915 |
|
|
|
4916 |
|
|
default:
|
4917 |
|
|
break;
|
4918 |
|
|
}
|
4919 |
|
|
|
4920 |
|
|
return class;
|
4921 |
|
|
}
|
4922 |
|
|
|
4923 |
|
|
/* This function returns the register class required for a secondary
|
4924 |
|
|
register when copying between one of the registers in CLASS, and X,
|
4925 |
|
|
using MODE. A return value of NO_REGS means that no secondary register
|
4926 |
|
|
is required. */
|
4927 |
|
|
|
4928 |
|
|
enum reg_class
|
4929 |
|
|
ia64_secondary_reload_class (enum reg_class class,
|
4930 |
|
|
enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
|
4931 |
|
|
{
|
4932 |
|
|
int regno = -1;
|
4933 |
|
|
|
4934 |
|
|
if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
|
4935 |
|
|
regno = true_regnum (x);
|
4936 |
|
|
|
4937 |
|
|
switch (class)
|
4938 |
|
|
{
|
4939 |
|
|
case BR_REGS:
|
4940 |
|
|
case AR_M_REGS:
|
4941 |
|
|
case AR_I_REGS:
|
4942 |
|
|
/* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
|
4943 |
|
|
interaction. We end up with two pseudos with overlapping lifetimes
|
4944 |
|
|
both of which are equiv to the same constant, and both which need
|
4945 |
|
|
to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
|
4946 |
|
|
changes depending on the path length, which means the qty_first_reg
|
4947 |
|
|
check in make_regs_eqv can give different answers at different times.
|
4948 |
|
|
At some point I'll probably need a reload_indi pattern to handle
|
4949 |
|
|
this.
|
4950 |
|
|
|
4951 |
|
|
We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
|
4952 |
|
|
wound up with a FP register from GR_AND_FR_REGS. Extend that to all
|
4953 |
|
|
non-general registers for good measure. */
|
4954 |
|
|
if (regno >= 0 && ! GENERAL_REGNO_P (regno))
|
4955 |
|
|
return GR_REGS;
|
4956 |
|
|
|
4957 |
|
|
/* This is needed if a pseudo used as a call_operand gets spilled to a
|
4958 |
|
|
stack slot. */
|
4959 |
|
|
if (GET_CODE (x) == MEM)
|
4960 |
|
|
return GR_REGS;
|
4961 |
|
|
break;
|
4962 |
|
|
|
4963 |
|
|
case FR_REGS:
|
4964 |
|
|
case FP_REGS:
|
4965 |
|
|
/* Need to go through general registers to get to other class regs. */
|
4966 |
|
|
if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
|
4967 |
|
|
return GR_REGS;
|
4968 |
|
|
|
4969 |
|
|
/* This can happen when a paradoxical subreg is an operand to the
|
4970 |
|
|
muldi3 pattern. */
|
4971 |
|
|
/* ??? This shouldn't be necessary after instruction scheduling is
|
4972 |
|
|
enabled, because paradoxical subregs are not accepted by
|
4973 |
|
|
register_operand when INSN_SCHEDULING is defined. Or alternatively,
|
4974 |
|
|
stop the paradoxical subreg stupidity in the *_operand functions
|
4975 |
|
|
in recog.c. */
|
4976 |
|
|
if (GET_CODE (x) == MEM
|
4977 |
|
|
&& (GET_MODE (x) == SImode || GET_MODE (x) == HImode
|
4978 |
|
|
|| GET_MODE (x) == QImode))
|
4979 |
|
|
return GR_REGS;
|
4980 |
|
|
|
4981 |
|
|
/* This can happen because of the ior/and/etc patterns that accept FP
|
4982 |
|
|
registers as operands. If the third operand is a constant, then it
|
4983 |
|
|
needs to be reloaded into a FP register. */
|
4984 |
|
|
if (GET_CODE (x) == CONST_INT)
|
4985 |
|
|
return GR_REGS;
|
4986 |
|
|
|
4987 |
|
|
/* This can happen because of register elimination in a muldi3 insn.
|
4988 |
|
|
E.g. `26107 * (unsigned long)&u'. */
|
4989 |
|
|
if (GET_CODE (x) == PLUS)
|
4990 |
|
|
return GR_REGS;
|
4991 |
|
|
break;
|
4992 |
|
|
|
4993 |
|
|
case PR_REGS:
|
4994 |
|
|
/* ??? This happens if we cse/gcse a BImode value across a call,
|
4995 |
|
|
and the function has a nonlocal goto. This is because global
|
4996 |
|
|
does not allocate call crossing pseudos to hard registers when
|
4997 |
|
|
current_function_has_nonlocal_goto is true. This is relatively
|
4998 |
|
|
common for C++ programs that use exceptions. To reproduce,
|
4999 |
|
|
return NO_REGS and compile libstdc++. */
|
5000 |
|
|
if (GET_CODE (x) == MEM)
|
5001 |
|
|
return GR_REGS;
|
5002 |
|
|
|
5003 |
|
|
/* This can happen when we take a BImode subreg of a DImode value,
|
5004 |
|
|
and that DImode value winds up in some non-GR register. */
|
5005 |
|
|
if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
|
5006 |
|
|
return GR_REGS;
|
5007 |
|
|
break;
|
5008 |
|
|
|
5009 |
|
|
default:
|
5010 |
|
|
break;
|
5011 |
|
|
}
|
5012 |
|
|
|
5013 |
|
|
return NO_REGS;
|
5014 |
|
|
}
|
5015 |
|
|
|
5016 |
|
|
|
5017 |
|
|
/* Emit text to declare externally defined variables and functions, because
|
5018 |
|
|
the Intel assembler does not support undefined externals. */
|
5019 |
|
|
|
5020 |
|
|
void
|
5021 |
|
|
ia64_asm_output_external (FILE *file, tree decl, const char *name)
|
5022 |
|
|
{
|
5023 |
|
|
int save_referenced;
|
5024 |
|
|
|
5025 |
|
|
/* GNU as does not need anything here, but the HP linker does need
|
5026 |
|
|
something for external functions. */
|
5027 |
|
|
|
5028 |
|
|
if (TARGET_GNU_AS
|
5029 |
|
|
&& (!TARGET_HPUX_LD
|
5030 |
|
|
|| TREE_CODE (decl) != FUNCTION_DECL
|
5031 |
|
|
|| strstr (name, "__builtin_") == name))
|
5032 |
|
|
return;
|
5033 |
|
|
|
5034 |
|
|
/* ??? The Intel assembler creates a reference that needs to be satisfied by
|
5035 |
|
|
the linker when we do this, so we need to be careful not to do this for
|
5036 |
|
|
builtin functions which have no library equivalent. Unfortunately, we
|
5037 |
|
|
can't tell here whether or not a function will actually be called by
|
5038 |
|
|
expand_expr, so we pull in library functions even if we may not need
|
5039 |
|
|
them later. */
|
5040 |
|
|
if (! strcmp (name, "__builtin_next_arg")
|
5041 |
|
|
|| ! strcmp (name, "alloca")
|
5042 |
|
|
|| ! strcmp (name, "__builtin_constant_p")
|
5043 |
|
|
|| ! strcmp (name, "__builtin_args_info"))
|
5044 |
|
|
return;
|
5045 |
|
|
|
5046 |
|
|
if (TARGET_HPUX_LD)
|
5047 |
|
|
ia64_hpux_add_extern_decl (decl);
|
5048 |
|
|
else
|
5049 |
|
|
{
|
5050 |
|
|
/* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
|
5051 |
|
|
restore it. */
|
5052 |
|
|
save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
|
5053 |
|
|
if (TREE_CODE (decl) == FUNCTION_DECL)
|
5054 |
|
|
ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
|
5055 |
|
|
(*targetm.asm_out.globalize_label) (file, name);
|
5056 |
|
|
TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
|
5057 |
|
|
}
|
5058 |
|
|
}
|
5059 |
|
|
|
5060 |
|
|
/* Parse the -mfixed-range= option string. */
|
5061 |
|
|
|
5062 |
|
|
static void
|
5063 |
|
|
fix_range (const char *const_str)
|
5064 |
|
|
{
|
5065 |
|
|
int i, first, last;
|
5066 |
|
|
char *str, *dash, *comma;
|
5067 |
|
|
|
5068 |
|
|
/* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
|
5069 |
|
|
REG2 are either register names or register numbers. The effect
|
5070 |
|
|
of this option is to mark the registers in the range from REG1 to
|
5071 |
|
|
REG2 as ``fixed'' so they won't be used by the compiler. This is
|
5072 |
|
|
used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
|
5073 |
|
|
|
5074 |
|
|
i = strlen (const_str);
|
5075 |
|
|
str = (char *) alloca (i + 1);
|
5076 |
|
|
memcpy (str, const_str, i + 1);
|
5077 |
|
|
|
5078 |
|
|
while (1)
|
5079 |
|
|
{
|
5080 |
|
|
dash = strchr (str, '-');
|
5081 |
|
|
if (!dash)
|
5082 |
|
|
{
|
5083 |
|
|
warning (0, "value of -mfixed-range must have form REG1-REG2");
|
5084 |
|
|
return;
|
5085 |
|
|
}
|
5086 |
|
|
*dash = '\0';
|
5087 |
|
|
|
5088 |
|
|
comma = strchr (dash + 1, ',');
|
5089 |
|
|
if (comma)
|
5090 |
|
|
*comma = '\0';
|
5091 |
|
|
|
5092 |
|
|
first = decode_reg_name (str);
|
5093 |
|
|
if (first < 0)
|
5094 |
|
|
{
|
5095 |
|
|
warning (0, "unknown register name: %s", str);
|
5096 |
|
|
return;
|
5097 |
|
|
}
|
5098 |
|
|
|
5099 |
|
|
last = decode_reg_name (dash + 1);
|
5100 |
|
|
if (last < 0)
|
5101 |
|
|
{
|
5102 |
|
|
warning (0, "unknown register name: %s", dash + 1);
|
5103 |
|
|
return;
|
5104 |
|
|
}
|
5105 |
|
|
|
5106 |
|
|
*dash = '-';
|
5107 |
|
|
|
5108 |
|
|
if (first > last)
|
5109 |
|
|
{
|
5110 |
|
|
warning (0, "%s-%s is an empty range", str, dash + 1);
|
5111 |
|
|
return;
|
5112 |
|
|
}
|
5113 |
|
|
|
5114 |
|
|
for (i = first; i <= last; ++i)
|
5115 |
|
|
fixed_regs[i] = call_used_regs[i] = 1;
|
5116 |
|
|
|
5117 |
|
|
if (!comma)
|
5118 |
|
|
break;
|
5119 |
|
|
|
5120 |
|
|
*comma = ',';
|
5121 |
|
|
str = comma + 1;
|
5122 |
|
|
}
|
5123 |
|
|
}
|
5124 |
|
|
|
5125 |
|
|
/* Implement TARGET_HANDLE_OPTION. */
|
5126 |
|
|
|
5127 |
|
|
static bool
|
5128 |
|
|
ia64_handle_option (size_t code, const char *arg, int value)
|
5129 |
|
|
{
|
5130 |
|
|
switch (code)
|
5131 |
|
|
{
|
5132 |
|
|
case OPT_mfixed_range_:
|
5133 |
|
|
fix_range (arg);
|
5134 |
|
|
return true;
|
5135 |
|
|
|
5136 |
|
|
case OPT_mtls_size_:
|
5137 |
|
|
if (value != 14 && value != 22 && value != 64)
|
5138 |
|
|
error ("bad value %<%s%> for -mtls-size= switch", arg);
|
5139 |
|
|
return true;
|
5140 |
|
|
|
5141 |
|
|
case OPT_mtune_:
|
5142 |
|
|
{
|
5143 |
|
|
static struct pta
|
5144 |
|
|
{
|
5145 |
|
|
const char *name; /* processor name or nickname. */
|
5146 |
|
|
enum processor_type processor;
|
5147 |
|
|
}
|
5148 |
|
|
const processor_alias_table[] =
|
5149 |
|
|
{
|
5150 |
|
|
{"itanium", PROCESSOR_ITANIUM},
|
5151 |
|
|
{"itanium1", PROCESSOR_ITANIUM},
|
5152 |
|
|
{"merced", PROCESSOR_ITANIUM},
|
5153 |
|
|
{"itanium2", PROCESSOR_ITANIUM2},
|
5154 |
|
|
{"mckinley", PROCESSOR_ITANIUM2},
|
5155 |
|
|
};
|
5156 |
|
|
int const pta_size = ARRAY_SIZE (processor_alias_table);
|
5157 |
|
|
int i;
|
5158 |
|
|
|
5159 |
|
|
for (i = 0; i < pta_size; i++)
|
5160 |
|
|
if (!strcmp (arg, processor_alias_table[i].name))
|
5161 |
|
|
{
|
5162 |
|
|
ia64_tune = processor_alias_table[i].processor;
|
5163 |
|
|
break;
|
5164 |
|
|
}
|
5165 |
|
|
if (i == pta_size)
|
5166 |
|
|
error ("bad value %<%s%> for -mtune= switch", arg);
|
5167 |
|
|
return true;
|
5168 |
|
|
}
|
5169 |
|
|
|
5170 |
|
|
default:
|
5171 |
|
|
return true;
|
5172 |
|
|
}
|
5173 |
|
|
}
|
5174 |
|
|
|
5175 |
|
|
/* Implement OVERRIDE_OPTIONS. */
|
5176 |
|
|
|
5177 |
|
|
void
|
5178 |
|
|
ia64_override_options (void)
|
5179 |
|
|
{
|
5180 |
|
|
if (TARGET_AUTO_PIC)
|
5181 |
|
|
target_flags |= MASK_CONST_GP;
|
5182 |
|
|
|
5183 |
|
|
if (TARGET_INLINE_SQRT == INL_MIN_LAT)
|
5184 |
|
|
{
|
5185 |
|
|
warning (0, "not yet implemented: latency-optimized inline square root");
|
5186 |
|
|
TARGET_INLINE_SQRT = INL_MAX_THR;
|
5187 |
|
|
}
|
5188 |
|
|
|
5189 |
|
|
ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
|
5190 |
|
|
flag_schedule_insns_after_reload = 0;
|
5191 |
|
|
|
5192 |
|
|
ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
|
5193 |
|
|
|
5194 |
|
|
init_machine_status = ia64_init_machine_status;
|
5195 |
|
|
}
|
5196 |
|
|
|
5197 |
|
|
static struct machine_function *
|
5198 |
|
|
ia64_init_machine_status (void)
|
5199 |
|
|
{
|
5200 |
|
|
return ggc_alloc_cleared (sizeof (struct machine_function));
|
5201 |
|
|
}
|
5202 |
|
|
|
5203 |
|
|
static enum attr_itanium_class ia64_safe_itanium_class (rtx);
|
5204 |
|
|
static enum attr_type ia64_safe_type (rtx);
|
5205 |
|
|
|
5206 |
|
|
static enum attr_itanium_class
|
5207 |
|
|
ia64_safe_itanium_class (rtx insn)
|
5208 |
|
|
{
|
5209 |
|
|
if (recog_memoized (insn) >= 0)
|
5210 |
|
|
return get_attr_itanium_class (insn);
|
5211 |
|
|
else
|
5212 |
|
|
return ITANIUM_CLASS_UNKNOWN;
|
5213 |
|
|
}
|
5214 |
|
|
|
5215 |
|
|
static enum attr_type
|
5216 |
|
|
ia64_safe_type (rtx insn)
|
5217 |
|
|
{
|
5218 |
|
|
if (recog_memoized (insn) >= 0)
|
5219 |
|
|
return get_attr_type (insn);
|
5220 |
|
|
else
|
5221 |
|
|
return TYPE_UNKNOWN;
|
5222 |
|
|
}
|
5223 |
|
|
|
5224 |
|
|
/* The following collection of routines emit instruction group stop bits as
|
5225 |
|
|
necessary to avoid dependencies. */
|
5226 |
|
|
|
5227 |
|
|
/* Need to track some additional registers as far as serialization is
|
5228 |
|
|
concerned so we can properly handle br.call and br.ret. We could
|
5229 |
|
|
make these registers visible to gcc, but since these registers are
|
5230 |
|
|
never explicitly used in gcc generated code, it seems wasteful to
|
5231 |
|
|
do so (plus it would make the call and return patterns needlessly
|
5232 |
|
|
complex). */
|
5233 |
|
|
#define REG_RP (BR_REG (0))
|
5234 |
|
|
#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
|
5235 |
|
|
/* This is used for volatile asms which may require a stop bit immediately
|
5236 |
|
|
before and after them. */
|
5237 |
|
|
#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
|
5238 |
|
|
#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
|
5239 |
|
|
#define NUM_REGS (AR_UNAT_BIT_0 + 64)
|
5240 |
|
|
|
5241 |
|
|
/* For each register, we keep track of how it has been written in the
|
5242 |
|
|
current instruction group.
|
5243 |
|
|
|
5244 |
|
|
If a register is written unconditionally (no qualifying predicate),
|
5245 |
|
|
WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
|
5246 |
|
|
|
5247 |
|
|
If a register is written if its qualifying predicate P is true, we
|
5248 |
|
|
set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
|
5249 |
|
|
may be written again by the complement of P (P^1) and when this happens,
|
5250 |
|
|
WRITE_COUNT gets set to 2.
|
5251 |
|
|
|
5252 |
|
|
The result of this is that whenever an insn attempts to write a register
|
5253 |
|
|
whose WRITE_COUNT is two, we need to issue an insn group barrier first.
|
5254 |
|
|
|
5255 |
|
|
If a predicate register is written by a floating-point insn, we set
|
5256 |
|
|
WRITTEN_BY_FP to true.
|
5257 |
|
|
|
5258 |
|
|
If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
|
5259 |
|
|
to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
|
5260 |
|
|
|
5261 |
|
|
struct reg_write_state
|
5262 |
|
|
{
|
5263 |
|
|
unsigned int write_count : 2;
|
5264 |
|
|
unsigned int first_pred : 16;
|
5265 |
|
|
unsigned int written_by_fp : 1;
|
5266 |
|
|
unsigned int written_by_and : 1;
|
5267 |
|
|
unsigned int written_by_or : 1;
|
5268 |
|
|
};
|
5269 |
|
|
|
5270 |
|
|
/* Cumulative info for the current instruction group. */
|
5271 |
|
|
struct reg_write_state rws_sum[NUM_REGS];
|
5272 |
|
|
/* Info for the current instruction. This gets copied to rws_sum after a
|
5273 |
|
|
stop bit is emitted. */
|
5274 |
|
|
struct reg_write_state rws_insn[NUM_REGS];
|
5275 |
|
|
|
5276 |
|
|
/* Indicates whether this is the first instruction after a stop bit,
|
5277 |
|
|
in which case we don't need another stop bit. Without this,
|
5278 |
|
|
ia64_variable_issue will die when scheduling an alloc. */
|
5279 |
|
|
static int first_instruction;
|
5280 |
|
|
|
5281 |
|
|
/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
|
5282 |
|
|
RTL for one instruction. */
|
5283 |
|
|
struct reg_flags
|
5284 |
|
|
{
|
5285 |
|
|
unsigned int is_write : 1; /* Is register being written? */
|
5286 |
|
|
unsigned int is_fp : 1; /* Is register used as part of an fp op? */
|
5287 |
|
|
unsigned int is_branch : 1; /* Is register used as part of a branch? */
|
5288 |
|
|
unsigned int is_and : 1; /* Is register used as part of and.orcm? */
|
5289 |
|
|
unsigned int is_or : 1; /* Is register used as part of or.andcm? */
|
5290 |
|
|
unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
|
5291 |
|
|
};
|
5292 |
|
|
|
5293 |
|
|
static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
|
5294 |
|
|
static int rws_access_regno (int, struct reg_flags, int);
|
5295 |
|
|
static int rws_access_reg (rtx, struct reg_flags, int);
|
5296 |
|
|
static void update_set_flags (rtx, struct reg_flags *);
|
5297 |
|
|
static int set_src_needs_barrier (rtx, struct reg_flags, int);
|
5298 |
|
|
static int rtx_needs_barrier (rtx, struct reg_flags, int);
|
5299 |
|
|
static void init_insn_group_barriers (void);
|
5300 |
|
|
static int group_barrier_needed (rtx);
|
5301 |
|
|
static int safe_group_barrier_needed (rtx);
|
5302 |
|
|
|
5303 |
|
|
/* Update *RWS for REGNO, which is being written by the current instruction,
|
5304 |
|
|
with predicate PRED, and associated register flags in FLAGS. */
|
5305 |
|
|
|
5306 |
|
|
static void
|
5307 |
|
|
rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
|
5308 |
|
|
{
|
5309 |
|
|
if (pred)
|
5310 |
|
|
rws[regno].write_count++;
|
5311 |
|
|
else
|
5312 |
|
|
rws[regno].write_count = 2;
|
5313 |
|
|
rws[regno].written_by_fp |= flags.is_fp;
|
5314 |
|
|
/* ??? Not tracking and/or across differing predicates. */
|
5315 |
|
|
rws[regno].written_by_and = flags.is_and;
|
5316 |
|
|
rws[regno].written_by_or = flags.is_or;
|
5317 |
|
|
rws[regno].first_pred = pred;
|
5318 |
|
|
}
|
5319 |
|
|
|
5320 |
|
|
/* Handle an access to register REGNO of type FLAGS using predicate register
|
5321 |
|
|
PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
|
5322 |
|
|
a dependency with an earlier instruction in the same group. */
|
5323 |
|
|
|
5324 |
|
|
static int
|
5325 |
|
|
rws_access_regno (int regno, struct reg_flags flags, int pred)
|
5326 |
|
|
{
|
5327 |
|
|
int need_barrier = 0;
|
5328 |
|
|
|
5329 |
|
|
gcc_assert (regno < NUM_REGS);
|
5330 |
|
|
|
5331 |
|
|
if (! PR_REGNO_P (regno))
|
5332 |
|
|
flags.is_and = flags.is_or = 0;
|
5333 |
|
|
|
5334 |
|
|
if (flags.is_write)
|
5335 |
|
|
{
|
5336 |
|
|
int write_count;
|
5337 |
|
|
|
5338 |
|
|
/* One insn writes same reg multiple times? */
|
5339 |
|
|
gcc_assert (!rws_insn[regno].write_count);
|
5340 |
|
|
|
5341 |
|
|
/* Update info for current instruction. */
|
5342 |
|
|
rws_update (rws_insn, regno, flags, pred);
|
5343 |
|
|
write_count = rws_sum[regno].write_count;
|
5344 |
|
|
|
5345 |
|
|
switch (write_count)
|
5346 |
|
|
{
|
5347 |
|
|
case 0:
|
5348 |
|
|
/* The register has not been written yet. */
|
5349 |
|
|
rws_update (rws_sum, regno, flags, pred);
|
5350 |
|
|
break;
|
5351 |
|
|
|
5352 |
|
|
case 1:
|
5353 |
|
|
/* The register has been written via a predicate. If this is
|
5354 |
|
|
not a complementary predicate, then we need a barrier. */
|
5355 |
|
|
/* ??? This assumes that P and P+1 are always complementary
|
5356 |
|
|
predicates for P even. */
|
5357 |
|
|
if (flags.is_and && rws_sum[regno].written_by_and)
|
5358 |
|
|
;
|
5359 |
|
|
else if (flags.is_or && rws_sum[regno].written_by_or)
|
5360 |
|
|
;
|
5361 |
|
|
else if ((rws_sum[regno].first_pred ^ 1) != pred)
|
5362 |
|
|
need_barrier = 1;
|
5363 |
|
|
rws_update (rws_sum, regno, flags, pred);
|
5364 |
|
|
break;
|
5365 |
|
|
|
5366 |
|
|
case 2:
|
5367 |
|
|
/* The register has been unconditionally written already. We
|
5368 |
|
|
need a barrier. */
|
5369 |
|
|
if (flags.is_and && rws_sum[regno].written_by_and)
|
5370 |
|
|
;
|
5371 |
|
|
else if (flags.is_or && rws_sum[regno].written_by_or)
|
5372 |
|
|
;
|
5373 |
|
|
else
|
5374 |
|
|
need_barrier = 1;
|
5375 |
|
|
rws_sum[regno].written_by_and = flags.is_and;
|
5376 |
|
|
rws_sum[regno].written_by_or = flags.is_or;
|
5377 |
|
|
break;
|
5378 |
|
|
|
5379 |
|
|
default:
|
5380 |
|
|
gcc_unreachable ();
|
5381 |
|
|
}
|
5382 |
|
|
}
|
5383 |
|
|
else
|
5384 |
|
|
{
|
5385 |
|
|
if (flags.is_branch)
|
5386 |
|
|
{
|
5387 |
|
|
/* Branches have several RAW exceptions that allow to avoid
|
5388 |
|
|
barriers. */
|
5389 |
|
|
|
5390 |
|
|
if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
|
5391 |
|
|
/* RAW dependencies on branch regs are permissible as long
|
5392 |
|
|
as the writer is a non-branch instruction. Since we
|
5393 |
|
|
never generate code that uses a branch register written
|
5394 |
|
|
by a branch instruction, handling this case is
|
5395 |
|
|
easy. */
|
5396 |
|
|
return 0;
|
5397 |
|
|
|
5398 |
|
|
if (REGNO_REG_CLASS (regno) == PR_REGS
|
5399 |
|
|
&& ! rws_sum[regno].written_by_fp)
|
5400 |
|
|
/* The predicates of a branch are available within the
|
5401 |
|
|
same insn group as long as the predicate was written by
|
5402 |
|
|
something other than a floating-point instruction. */
|
5403 |
|
|
return 0;
|
5404 |
|
|
}
|
5405 |
|
|
|
5406 |
|
|
if (flags.is_and && rws_sum[regno].written_by_and)
|
5407 |
|
|
return 0;
|
5408 |
|
|
if (flags.is_or && rws_sum[regno].written_by_or)
|
5409 |
|
|
return 0;
|
5410 |
|
|
|
5411 |
|
|
switch (rws_sum[regno].write_count)
|
5412 |
|
|
{
|
5413 |
|
|
case 0:
|
5414 |
|
|
/* The register has not been written yet. */
|
5415 |
|
|
break;
|
5416 |
|
|
|
5417 |
|
|
case 1:
|
5418 |
|
|
/* The register has been written via a predicate. If this is
|
5419 |
|
|
not a complementary predicate, then we need a barrier. */
|
5420 |
|
|
/* ??? This assumes that P and P+1 are always complementary
|
5421 |
|
|
predicates for P even. */
|
5422 |
|
|
if ((rws_sum[regno].first_pred ^ 1) != pred)
|
5423 |
|
|
need_barrier = 1;
|
5424 |
|
|
break;
|
5425 |
|
|
|
5426 |
|
|
case 2:
|
5427 |
|
|
/* The register has been unconditionally written already. We
|
5428 |
|
|
need a barrier. */
|
5429 |
|
|
need_barrier = 1;
|
5430 |
|
|
break;
|
5431 |
|
|
|
5432 |
|
|
default:
|
5433 |
|
|
gcc_unreachable ();
|
5434 |
|
|
}
|
5435 |
|
|
}
|
5436 |
|
|
|
5437 |
|
|
return need_barrier;
|
5438 |
|
|
}
|
5439 |
|
|
|
5440 |
|
|
static int
|
5441 |
|
|
rws_access_reg (rtx reg, struct reg_flags flags, int pred)
|
5442 |
|
|
{
|
5443 |
|
|
int regno = REGNO (reg);
|
5444 |
|
|
int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
|
5445 |
|
|
|
5446 |
|
|
if (n == 1)
|
5447 |
|
|
return rws_access_regno (regno, flags, pred);
|
5448 |
|
|
else
|
5449 |
|
|
{
|
5450 |
|
|
int need_barrier = 0;
|
5451 |
|
|
while (--n >= 0)
|
5452 |
|
|
need_barrier |= rws_access_regno (regno + n, flags, pred);
|
5453 |
|
|
return need_barrier;
|
5454 |
|
|
}
|
5455 |
|
|
}
|
5456 |
|
|
|
5457 |
|
|
/* Examine X, which is a SET rtx, and update the flags, the predicate, and
|
5458 |
|
|
the condition, stored in *PFLAGS, *PPRED and *PCOND. */
|
5459 |
|
|
|
5460 |
|
|
static void
|
5461 |
|
|
update_set_flags (rtx x, struct reg_flags *pflags)
|
5462 |
|
|
{
|
5463 |
|
|
rtx src = SET_SRC (x);
|
5464 |
|
|
|
5465 |
|
|
switch (GET_CODE (src))
|
5466 |
|
|
{
|
5467 |
|
|
case CALL:
|
5468 |
|
|
return;
|
5469 |
|
|
|
5470 |
|
|
case IF_THEN_ELSE:
|
5471 |
|
|
/* There are four cases here:
|
5472 |
|
|
(1) The destination is (pc), in which case this is a branch,
|
5473 |
|
|
nothing here applies.
|
5474 |
|
|
(2) The destination is ar.lc, in which case this is a
|
5475 |
|
|
doloop_end_internal,
|
5476 |
|
|
(3) The destination is an fp register, in which case this is
|
5477 |
|
|
an fselect instruction.
|
5478 |
|
|
(4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
|
5479 |
|
|
this is a check load.
|
5480 |
|
|
In all cases, nothing we do in this function applies. */
|
5481 |
|
|
return;
|
5482 |
|
|
|
5483 |
|
|
default:
|
5484 |
|
|
if (COMPARISON_P (src)
|
5485 |
|
|
&& SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
|
5486 |
|
|
/* Set pflags->is_fp to 1 so that we know we're dealing
|
5487 |
|
|
with a floating point comparison when processing the
|
5488 |
|
|
destination of the SET. */
|
5489 |
|
|
pflags->is_fp = 1;
|
5490 |
|
|
|
5491 |
|
|
/* Discover if this is a parallel comparison. We only handle
|
5492 |
|
|
and.orcm and or.andcm at present, since we must retain a
|
5493 |
|
|
strict inverse on the predicate pair. */
|
5494 |
|
|
else if (GET_CODE (src) == AND)
|
5495 |
|
|
pflags->is_and = 1;
|
5496 |
|
|
else if (GET_CODE (src) == IOR)
|
5497 |
|
|
pflags->is_or = 1;
|
5498 |
|
|
|
5499 |
|
|
break;
|
5500 |
|
|
}
|
5501 |
|
|
}
|
5502 |
|
|
|
5503 |
|
|
/* Subroutine of rtx_needs_barrier; this function determines whether the
|
5504 |
|
|
source of a given SET rtx found in X needs a barrier. FLAGS and PRED
|
5505 |
|
|
are as in rtx_needs_barrier. COND is an rtx that holds the condition
|
5506 |
|
|
for this insn. */
|
5507 |
|
|
|
5508 |
|
|
static int
|
5509 |
|
|
set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
|
5510 |
|
|
{
|
5511 |
|
|
int need_barrier = 0;
|
5512 |
|
|
rtx dst;
|
5513 |
|
|
rtx src = SET_SRC (x);
|
5514 |
|
|
|
5515 |
|
|
if (GET_CODE (src) == CALL)
|
5516 |
|
|
/* We don't need to worry about the result registers that
|
5517 |
|
|
get written by subroutine call. */
|
5518 |
|
|
return rtx_needs_barrier (src, flags, pred);
|
5519 |
|
|
else if (SET_DEST (x) == pc_rtx)
|
5520 |
|
|
{
|
5521 |
|
|
/* X is a conditional branch. */
|
5522 |
|
|
/* ??? This seems redundant, as the caller sets this bit for
|
5523 |
|
|
all JUMP_INSNs. */
|
5524 |
|
|
if (!ia64_spec_check_src_p (src))
|
5525 |
|
|
flags.is_branch = 1;
|
5526 |
|
|
return rtx_needs_barrier (src, flags, pred);
|
5527 |
|
|
}
|
5528 |
|
|
|
5529 |
|
|
if (ia64_spec_check_src_p (src))
|
5530 |
|
|
/* Avoid checking one register twice (in condition
|
5531 |
|
|
and in 'then' section) for ldc pattern. */
|
5532 |
|
|
{
|
5533 |
|
|
gcc_assert (REG_P (XEXP (src, 2)));
|
5534 |
|
|
need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
|
5535 |
|
|
|
5536 |
|
|
/* We process MEM below. */
|
5537 |
|
|
src = XEXP (src, 1);
|
5538 |
|
|
}
|
5539 |
|
|
|
5540 |
|
|
need_barrier |= rtx_needs_barrier (src, flags, pred);
|
5541 |
|
|
|
5542 |
|
|
dst = SET_DEST (x);
|
5543 |
|
|
if (GET_CODE (dst) == ZERO_EXTRACT)
|
5544 |
|
|
{
|
5545 |
|
|
need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
|
5546 |
|
|
need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
|
5547 |
|
|
}
|
5548 |
|
|
return need_barrier;
|
5549 |
|
|
}
|
5550 |
|
|
|
5551 |
|
|
/* Handle an access to rtx X of type FLAGS using predicate register
|
5552 |
|
|
PRED. Return 1 if this access creates a dependency with an earlier
|
5553 |
|
|
instruction in the same group. */
|
5554 |
|
|
|
5555 |
|
|
static int
|
5556 |
|
|
rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
|
5557 |
|
|
{
|
5558 |
|
|
int i, j;
|
5559 |
|
|
int is_complemented = 0;
|
5560 |
|
|
int need_barrier = 0;
|
5561 |
|
|
const char *format_ptr;
|
5562 |
|
|
struct reg_flags new_flags;
|
5563 |
|
|
rtx cond;
|
5564 |
|
|
|
5565 |
|
|
if (! x)
|
5566 |
|
|
return 0;
|
5567 |
|
|
|
5568 |
|
|
new_flags = flags;
|
5569 |
|
|
|
5570 |
|
|
switch (GET_CODE (x))
|
5571 |
|
|
{
|
5572 |
|
|
case SET:
|
5573 |
|
|
update_set_flags (x, &new_flags);
|
5574 |
|
|
need_barrier = set_src_needs_barrier (x, new_flags, pred);
|
5575 |
|
|
if (GET_CODE (SET_SRC (x)) != CALL)
|
5576 |
|
|
{
|
5577 |
|
|
new_flags.is_write = 1;
|
5578 |
|
|
need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
|
5579 |
|
|
}
|
5580 |
|
|
break;
|
5581 |
|
|
|
5582 |
|
|
case CALL:
|
5583 |
|
|
new_flags.is_write = 0;
|
5584 |
|
|
need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
|
5585 |
|
|
|
5586 |
|
|
/* Avoid multiple register writes, in case this is a pattern with
|
5587 |
|
|
multiple CALL rtx. This avoids a failure in rws_access_reg. */
|
5588 |
|
|
if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
|
5589 |
|
|
{
|
5590 |
|
|
new_flags.is_write = 1;
|
5591 |
|
|
need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
|
5592 |
|
|
need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
|
5593 |
|
|
need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
|
5594 |
|
|
}
|
5595 |
|
|
break;
|
5596 |
|
|
|
5597 |
|
|
case COND_EXEC:
|
5598 |
|
|
/* X is a predicated instruction. */
|
5599 |
|
|
|
5600 |
|
|
cond = COND_EXEC_TEST (x);
|
5601 |
|
|
gcc_assert (!pred);
|
5602 |
|
|
need_barrier = rtx_needs_barrier (cond, flags, 0);
|
5603 |
|
|
|
5604 |
|
|
if (GET_CODE (cond) == EQ)
|
5605 |
|
|
is_complemented = 1;
|
5606 |
|
|
cond = XEXP (cond, 0);
|
5607 |
|
|
gcc_assert (GET_CODE (cond) == REG
|
5608 |
|
|
&& REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
|
5609 |
|
|
pred = REGNO (cond);
|
5610 |
|
|
if (is_complemented)
|
5611 |
|
|
++pred;
|
5612 |
|
|
|
5613 |
|
|
need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
|
5614 |
|
|
return need_barrier;
|
5615 |
|
|
|
5616 |
|
|
case CLOBBER:
|
5617 |
|
|
case USE:
|
5618 |
|
|
/* Clobber & use are for earlier compiler-phases only. */
|
5619 |
|
|
break;
|
5620 |
|
|
|
5621 |
|
|
case ASM_OPERANDS:
|
5622 |
|
|
case ASM_INPUT:
|
5623 |
|
|
/* We always emit stop bits for traditional asms. We emit stop bits
|
5624 |
|
|
for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
|
5625 |
|
|
if (GET_CODE (x) != ASM_OPERANDS
|
5626 |
|
|
|| (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
|
5627 |
|
|
{
|
5628 |
|
|
/* Avoid writing the register multiple times if we have multiple
|
5629 |
|
|
asm outputs. This avoids a failure in rws_access_reg. */
|
5630 |
|
|
if (! rws_insn[REG_VOLATILE].write_count)
|
5631 |
|
|
{
|
5632 |
|
|
new_flags.is_write = 1;
|
5633 |
|
|
rws_access_regno (REG_VOLATILE, new_flags, pred);
|
5634 |
|
|
}
|
5635 |
|
|
return 1;
|
5636 |
|
|
}
|
5637 |
|
|
|
5638 |
|
|
/* For all ASM_OPERANDS, we must traverse the vector of input operands.
|
5639 |
|
|
We cannot just fall through here since then we would be confused
|
5640 |
|
|
by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
|
5641 |
|
|
traditional asms unlike their normal usage. */
|
5642 |
|
|
|
5643 |
|
|
for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
|
5644 |
|
|
if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
|
5645 |
|
|
need_barrier = 1;
|
5646 |
|
|
break;
|
5647 |
|
|
|
5648 |
|
|
case PARALLEL:
|
5649 |
|
|
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
|
5650 |
|
|
{
|
5651 |
|
|
rtx pat = XVECEXP (x, 0, i);
|
5652 |
|
|
switch (GET_CODE (pat))
|
5653 |
|
|
{
|
5654 |
|
|
case SET:
|
5655 |
|
|
update_set_flags (pat, &new_flags);
|
5656 |
|
|
need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
|
5657 |
|
|
break;
|
5658 |
|
|
|
5659 |
|
|
case USE:
|
5660 |
|
|
case CALL:
|
5661 |
|
|
case ASM_OPERANDS:
|
5662 |
|
|
need_barrier |= rtx_needs_barrier (pat, flags, pred);
|
5663 |
|
|
break;
|
5664 |
|
|
|
5665 |
|
|
case CLOBBER:
|
5666 |
|
|
case RETURN:
|
5667 |
|
|
break;
|
5668 |
|
|
|
5669 |
|
|
default:
|
5670 |
|
|
gcc_unreachable ();
|
5671 |
|
|
}
|
5672 |
|
|
}
|
5673 |
|
|
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
|
5674 |
|
|
{
|
5675 |
|
|
rtx pat = XVECEXP (x, 0, i);
|
5676 |
|
|
if (GET_CODE (pat) == SET)
|
5677 |
|
|
{
|
5678 |
|
|
if (GET_CODE (SET_SRC (pat)) != CALL)
|
5679 |
|
|
{
|
5680 |
|
|
new_flags.is_write = 1;
|
5681 |
|
|
need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
|
5682 |
|
|
pred);
|
5683 |
|
|
}
|
5684 |
|
|
}
|
5685 |
|
|
else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
|
5686 |
|
|
need_barrier |= rtx_needs_barrier (pat, flags, pred);
|
5687 |
|
|
}
|
5688 |
|
|
break;
|
5689 |
|
|
|
5690 |
|
|
case SUBREG:
|
5691 |
|
|
need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
|
5692 |
|
|
break;
|
5693 |
|
|
case REG:
|
5694 |
|
|
if (REGNO (x) == AR_UNAT_REGNUM)
|
5695 |
|
|
{
|
5696 |
|
|
for (i = 0; i < 64; ++i)
|
5697 |
|
|
need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
|
5698 |
|
|
}
|
5699 |
|
|
else
|
5700 |
|
|
need_barrier = rws_access_reg (x, flags, pred);
|
5701 |
|
|
break;
|
5702 |
|
|
|
5703 |
|
|
case MEM:
|
5704 |
|
|
/* Find the regs used in memory address computation. */
|
5705 |
|
|
new_flags.is_write = 0;
|
5706 |
|
|
need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
|
5707 |
|
|
break;
|
5708 |
|
|
|
5709 |
|
|
case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
|
5710 |
|
|
case SYMBOL_REF: case LABEL_REF: case CONST:
|
5711 |
|
|
break;
|
5712 |
|
|
|
5713 |
|
|
/* Operators with side-effects. */
|
5714 |
|
|
case POST_INC: case POST_DEC:
|
5715 |
|
|
gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
|
5716 |
|
|
|
5717 |
|
|
new_flags.is_write = 0;
|
5718 |
|
|
need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
|
5719 |
|
|
new_flags.is_write = 1;
|
5720 |
|
|
need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
|
5721 |
|
|
break;
|
5722 |
|
|
|
5723 |
|
|
case POST_MODIFY:
|
5724 |
|
|
gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
|
5725 |
|
|
|
5726 |
|
|
new_flags.is_write = 0;
|
5727 |
|
|
need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
|
5728 |
|
|
need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
|
5729 |
|
|
new_flags.is_write = 1;
|
5730 |
|
|
need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
|
5731 |
|
|
break;
|
5732 |
|
|
|
5733 |
|
|
/* Handle common unary and binary ops for efficiency. */
|
5734 |
|
|
case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
|
5735 |
|
|
case MOD: case UDIV: case UMOD: case AND: case IOR:
|
5736 |
|
|
case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
|
5737 |
|
|
case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
|
5738 |
|
|
case NE: case EQ: case GE: case GT: case LE:
|
5739 |
|
|
case LT: case GEU: case GTU: case LEU: case LTU:
|
5740 |
|
|
need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
|
5741 |
|
|
need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
|
5742 |
|
|
break;
|
5743 |
|
|
|
5744 |
|
|
case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
|
5745 |
|
|
case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
|
5746 |
|
|
case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
|
5747 |
|
|
case SQRT: case FFS: case POPCOUNT:
|
5748 |
|
|
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
|
5749 |
|
|
break;
|
5750 |
|
|
|
5751 |
|
|
case VEC_SELECT:
|
5752 |
|
|
/* VEC_SELECT's second argument is a PARALLEL with integers that
|
5753 |
|
|
describe the elements selected. On ia64, those integers are
|
5754 |
|
|
always constants. Avoid walking the PARALLEL so that we don't
|
5755 |
|
|
get confused with "normal" parallels and then die. */
|
5756 |
|
|
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
|
5757 |
|
|
break;
|
5758 |
|
|
|
5759 |
|
|
case UNSPEC:
|
5760 |
|
|
switch (XINT (x, 1))
|
5761 |
|
|
{
|
5762 |
|
|
case UNSPEC_LTOFF_DTPMOD:
|
5763 |
|
|
case UNSPEC_LTOFF_DTPREL:
|
5764 |
|
|
case UNSPEC_DTPREL:
|
5765 |
|
|
case UNSPEC_LTOFF_TPREL:
|
5766 |
|
|
case UNSPEC_TPREL:
|
5767 |
|
|
case UNSPEC_PRED_REL_MUTEX:
|
5768 |
|
|
case UNSPEC_PIC_CALL:
|
5769 |
|
|
case UNSPEC_MF:
|
5770 |
|
|
case UNSPEC_FETCHADD_ACQ:
|
5771 |
|
|
case UNSPEC_BSP_VALUE:
|
5772 |
|
|
case UNSPEC_FLUSHRS:
|
5773 |
|
|
case UNSPEC_BUNDLE_SELECTOR:
|
5774 |
|
|
break;
|
5775 |
|
|
|
5776 |
|
|
case UNSPEC_GR_SPILL:
|
5777 |
|
|
case UNSPEC_GR_RESTORE:
|
5778 |
|
|
{
|
5779 |
|
|
HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
|
5780 |
|
|
HOST_WIDE_INT bit = (offset >> 3) & 63;
|
5781 |
|
|
|
5782 |
|
|
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
|
5783 |
|
|
new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
|
5784 |
|
|
need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
|
5785 |
|
|
new_flags, pred);
|
5786 |
|
|
break;
|
5787 |
|
|
}
|
5788 |
|
|
|
5789 |
|
|
case UNSPEC_FR_SPILL:
|
5790 |
|
|
case UNSPEC_FR_RESTORE:
|
5791 |
|
|
case UNSPEC_GETF_EXP:
|
5792 |
|
|
case UNSPEC_SETF_EXP:
|
5793 |
|
|
case UNSPEC_ADDP4:
|
5794 |
|
|
case UNSPEC_FR_SQRT_RECIP_APPROX:
|
5795 |
|
|
case UNSPEC_LDA:
|
5796 |
|
|
case UNSPEC_LDS:
|
5797 |
|
|
case UNSPEC_LDSA:
|
5798 |
|
|
case UNSPEC_CHKACLR:
|
5799 |
|
|
case UNSPEC_CHKS:
|
5800 |
|
|
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
|
5801 |
|
|
break;
|
5802 |
|
|
|
5803 |
|
|
case UNSPEC_FR_RECIP_APPROX:
|
5804 |
|
|
case UNSPEC_SHRP:
|
5805 |
|
|
case UNSPEC_COPYSIGN:
|
5806 |
|
|
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
|
5807 |
|
|
need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
|
5808 |
|
|
break;
|
5809 |
|
|
|
5810 |
|
|
case UNSPEC_CMPXCHG_ACQ:
|
5811 |
|
|
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
|
5812 |
|
|
need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
|
5813 |
|
|
break;
|
5814 |
|
|
|
5815 |
|
|
default:
|
5816 |
|
|
gcc_unreachable ();
|
5817 |
|
|
}
|
5818 |
|
|
break;
|
5819 |
|
|
|
5820 |
|
|
case UNSPEC_VOLATILE:
|
5821 |
|
|
switch (XINT (x, 1))
|
5822 |
|
|
{
|
5823 |
|
|
case UNSPECV_ALLOC:
|
5824 |
|
|
/* Alloc must always be the first instruction of a group.
|
5825 |
|
|
We force this by always returning true. */
|
5826 |
|
|
/* ??? We might get better scheduling if we explicitly check for
|
5827 |
|
|
input/local/output register dependencies, and modify the
|
5828 |
|
|
scheduler so that alloc is always reordered to the start of
|
5829 |
|
|
the current group. We could then eliminate all of the
|
5830 |
|
|
first_instruction code. */
|
5831 |
|
|
rws_access_regno (AR_PFS_REGNUM, flags, pred);
|
5832 |
|
|
|
5833 |
|
|
new_flags.is_write = 1;
|
5834 |
|
|
rws_access_regno (REG_AR_CFM, new_flags, pred);
|
5835 |
|
|
return 1;
|
5836 |
|
|
|
5837 |
|
|
case UNSPECV_SET_BSP:
|
5838 |
|
|
need_barrier = 1;
|
5839 |
|
|
break;
|
5840 |
|
|
|
5841 |
|
|
case UNSPECV_BLOCKAGE:
|
5842 |
|
|
case UNSPECV_INSN_GROUP_BARRIER:
|
5843 |
|
|
case UNSPECV_BREAK:
|
5844 |
|
|
case UNSPECV_PSAC_ALL:
|
5845 |
|
|
case UNSPECV_PSAC_NORMAL:
|
5846 |
|
|
return 0;
|
5847 |
|
|
|
5848 |
|
|
default:
|
5849 |
|
|
gcc_unreachable ();
|
5850 |
|
|
}
|
5851 |
|
|
break;
|
5852 |
|
|
|
5853 |
|
|
case RETURN:
|
5854 |
|
|
new_flags.is_write = 0;
|
5855 |
|
|
need_barrier = rws_access_regno (REG_RP, flags, pred);
|
5856 |
|
|
need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
|
5857 |
|
|
|
5858 |
|
|
new_flags.is_write = 1;
|
5859 |
|
|
need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
|
5860 |
|
|
need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
|
5861 |
|
|
break;
|
5862 |
|
|
|
5863 |
|
|
default:
|
5864 |
|
|
format_ptr = GET_RTX_FORMAT (GET_CODE (x));
|
5865 |
|
|
for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
|
5866 |
|
|
switch (format_ptr[i])
|
5867 |
|
|
{
|
5868 |
|
|
case '0': /* unused field */
|
5869 |
|
|
case 'i': /* integer */
|
5870 |
|
|
case 'n': /* note */
|
5871 |
|
|
case 'w': /* wide integer */
|
5872 |
|
|
case 's': /* pointer to string */
|
5873 |
|
|
case 'S': /* optional pointer to string */
|
5874 |
|
|
break;
|
5875 |
|
|
|
5876 |
|
|
case 'e':
|
5877 |
|
|
if (rtx_needs_barrier (XEXP (x, i), flags, pred))
|
5878 |
|
|
need_barrier = 1;
|
5879 |
|
|
break;
|
5880 |
|
|
|
5881 |
|
|
case 'E':
|
5882 |
|
|
for (j = XVECLEN (x, i) - 1; j >= 0; --j)
|
5883 |
|
|
if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
|
5884 |
|
|
need_barrier = 1;
|
5885 |
|
|
break;
|
5886 |
|
|
|
5887 |
|
|
default:
|
5888 |
|
|
gcc_unreachable ();
|
5889 |
|
|
}
|
5890 |
|
|
break;
|
5891 |
|
|
}
|
5892 |
|
|
return need_barrier;
|
5893 |
|
|
}
|
5894 |
|
|
|
5895 |
|
|
/* Clear out the state for group_barrier_needed at the start of a
|
5896 |
|
|
sequence of insns. */
|
5897 |
|
|
|
5898 |
|
|
static void
|
5899 |
|
|
init_insn_group_barriers (void)
|
5900 |
|
|
{
|
5901 |
|
|
memset (rws_sum, 0, sizeof (rws_sum));
|
5902 |
|
|
first_instruction = 1;
|
5903 |
|
|
}
|
5904 |
|
|
|
5905 |
|
|
/* Given the current state, determine whether a group barrier (a stop bit) is
|
5906 |
|
|
necessary before INSN. Return nonzero if so. This modifies the state to
|
5907 |
|
|
include the effects of INSN as a side-effect. */
|
5908 |
|
|
|
5909 |
|
|
static int
|
5910 |
|
|
group_barrier_needed (rtx insn)
|
5911 |
|
|
{
|
5912 |
|
|
rtx pat;
|
5913 |
|
|
int need_barrier = 0;
|
5914 |
|
|
struct reg_flags flags;
|
5915 |
|
|
|
5916 |
|
|
memset (&flags, 0, sizeof (flags));
|
5917 |
|
|
switch (GET_CODE (insn))
|
5918 |
|
|
{
|
5919 |
|
|
case NOTE:
|
5920 |
|
|
break;
|
5921 |
|
|
|
5922 |
|
|
case BARRIER:
|
5923 |
|
|
/* A barrier doesn't imply an instruction group boundary. */
|
5924 |
|
|
break;
|
5925 |
|
|
|
5926 |
|
|
case CODE_LABEL:
|
5927 |
|
|
memset (rws_insn, 0, sizeof (rws_insn));
|
5928 |
|
|
return 1;
|
5929 |
|
|
|
5930 |
|
|
case CALL_INSN:
|
5931 |
|
|
flags.is_branch = 1;
|
5932 |
|
|
flags.is_sibcall = SIBLING_CALL_P (insn);
|
5933 |
|
|
memset (rws_insn, 0, sizeof (rws_insn));
|
5934 |
|
|
|
5935 |
|
|
/* Don't bundle a call following another call. */
|
5936 |
|
|
if ((pat = prev_active_insn (insn))
|
5937 |
|
|
&& GET_CODE (pat) == CALL_INSN)
|
5938 |
|
|
{
|
5939 |
|
|
need_barrier = 1;
|
5940 |
|
|
break;
|
5941 |
|
|
}
|
5942 |
|
|
|
5943 |
|
|
need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
|
5944 |
|
|
break;
|
5945 |
|
|
|
5946 |
|
|
case JUMP_INSN:
|
5947 |
|
|
if (!ia64_spec_check_p (insn))
|
5948 |
|
|
flags.is_branch = 1;
|
5949 |
|
|
|
5950 |
|
|
/* Don't bundle a jump following a call. */
|
5951 |
|
|
if ((pat = prev_active_insn (insn))
|
5952 |
|
|
&& GET_CODE (pat) == CALL_INSN)
|
5953 |
|
|
{
|
5954 |
|
|
need_barrier = 1;
|
5955 |
|
|
break;
|
5956 |
|
|
}
|
5957 |
|
|
/* FALLTHRU */
|
5958 |
|
|
|
5959 |
|
|
case INSN:
|
5960 |
|
|
if (GET_CODE (PATTERN (insn)) == USE
|
5961 |
|
|
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
5962 |
|
|
/* Don't care about USE and CLOBBER "insns"---those are used to
|
5963 |
|
|
indicate to the optimizer that it shouldn't get rid of
|
5964 |
|
|
certain operations. */
|
5965 |
|
|
break;
|
5966 |
|
|
|
5967 |
|
|
pat = PATTERN (insn);
|
5968 |
|
|
|
5969 |
|
|
/* Ug. Hack hacks hacked elsewhere. */
|
5970 |
|
|
switch (recog_memoized (insn))
|
5971 |
|
|
{
|
5972 |
|
|
/* We play dependency tricks with the epilogue in order
|
5973 |
|
|
to get proper schedules. Undo this for dv analysis. */
|
5974 |
|
|
case CODE_FOR_epilogue_deallocate_stack:
|
5975 |
|
|
case CODE_FOR_prologue_allocate_stack:
|
5976 |
|
|
pat = XVECEXP (pat, 0, 0);
|
5977 |
|
|
break;
|
5978 |
|
|
|
5979 |
|
|
/* The pattern we use for br.cloop confuses the code above.
|
5980 |
|
|
The second element of the vector is representative. */
|
5981 |
|
|
case CODE_FOR_doloop_end_internal:
|
5982 |
|
|
pat = XVECEXP (pat, 0, 1);
|
5983 |
|
|
break;
|
5984 |
|
|
|
5985 |
|
|
/* Doesn't generate code. */
|
5986 |
|
|
case CODE_FOR_pred_rel_mutex:
|
5987 |
|
|
case CODE_FOR_prologue_use:
|
5988 |
|
|
return 0;
|
5989 |
|
|
|
5990 |
|
|
default:
|
5991 |
|
|
break;
|
5992 |
|
|
}
|
5993 |
|
|
|
5994 |
|
|
memset (rws_insn, 0, sizeof (rws_insn));
|
5995 |
|
|
need_barrier = rtx_needs_barrier (pat, flags, 0);
|
5996 |
|
|
|
5997 |
|
|
/* Check to see if the previous instruction was a volatile
|
5998 |
|
|
asm. */
|
5999 |
|
|
if (! need_barrier)
|
6000 |
|
|
need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
|
6001 |
|
|
break;
|
6002 |
|
|
|
6003 |
|
|
default:
|
6004 |
|
|
gcc_unreachable ();
|
6005 |
|
|
}
|
6006 |
|
|
|
6007 |
|
|
if (first_instruction && INSN_P (insn)
|
6008 |
|
|
&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
|
6009 |
|
|
&& GET_CODE (PATTERN (insn)) != USE
|
6010 |
|
|
&& GET_CODE (PATTERN (insn)) != CLOBBER)
|
6011 |
|
|
{
|
6012 |
|
|
need_barrier = 0;
|
6013 |
|
|
first_instruction = 0;
|
6014 |
|
|
}
|
6015 |
|
|
|
6016 |
|
|
return need_barrier;
|
6017 |
|
|
}
|
6018 |
|
|
|
6019 |
|
|
/* Like group_barrier_needed, but do not clobber the current state. */
|
6020 |
|
|
|
6021 |
|
|
static int
|
6022 |
|
|
safe_group_barrier_needed (rtx insn)
|
6023 |
|
|
{
|
6024 |
|
|
struct reg_write_state rws_saved[NUM_REGS];
|
6025 |
|
|
int saved_first_instruction;
|
6026 |
|
|
int t;
|
6027 |
|
|
|
6028 |
|
|
memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
|
6029 |
|
|
saved_first_instruction = first_instruction;
|
6030 |
|
|
|
6031 |
|
|
t = group_barrier_needed (insn);
|
6032 |
|
|
|
6033 |
|
|
memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
|
6034 |
|
|
first_instruction = saved_first_instruction;
|
6035 |
|
|
|
6036 |
|
|
return t;
|
6037 |
|
|
}
|
6038 |
|
|
|
6039 |
|
|
/* Scan the current function and insert stop bits as necessary to
|
6040 |
|
|
eliminate dependencies. This function assumes that a final
|
6041 |
|
|
instruction scheduling pass has been run which has already
|
6042 |
|
|
inserted most of the necessary stop bits. This function only
|
6043 |
|
|
inserts new ones at basic block boundaries, since these are
|
6044 |
|
|
invisible to the scheduler. */
|
6045 |
|
|
|
6046 |
|
|
static void
|
6047 |
|
|
emit_insn_group_barriers (FILE *dump)
|
6048 |
|
|
{
|
6049 |
|
|
rtx insn;
|
6050 |
|
|
rtx last_label = 0;
|
6051 |
|
|
int insns_since_last_label = 0;
|
6052 |
|
|
|
6053 |
|
|
init_insn_group_barriers ();
|
6054 |
|
|
|
6055 |
|
|
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
6056 |
|
|
{
|
6057 |
|
|
if (GET_CODE (insn) == CODE_LABEL)
|
6058 |
|
|
{
|
6059 |
|
|
if (insns_since_last_label)
|
6060 |
|
|
last_label = insn;
|
6061 |
|
|
insns_since_last_label = 0;
|
6062 |
|
|
}
|
6063 |
|
|
else if (GET_CODE (insn) == NOTE
|
6064 |
|
|
&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
|
6065 |
|
|
{
|
6066 |
|
|
if (insns_since_last_label)
|
6067 |
|
|
last_label = insn;
|
6068 |
|
|
insns_since_last_label = 0;
|
6069 |
|
|
}
|
6070 |
|
|
else if (GET_CODE (insn) == INSN
|
6071 |
|
|
&& GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
|
6072 |
|
|
&& XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
|
6073 |
|
|
{
|
6074 |
|
|
init_insn_group_barriers ();
|
6075 |
|
|
last_label = 0;
|
6076 |
|
|
}
|
6077 |
|
|
else if (INSN_P (insn))
|
6078 |
|
|
{
|
6079 |
|
|
insns_since_last_label = 1;
|
6080 |
|
|
|
6081 |
|
|
if (group_barrier_needed (insn))
|
6082 |
|
|
{
|
6083 |
|
|
if (last_label)
|
6084 |
|
|
{
|
6085 |
|
|
if (dump)
|
6086 |
|
|
fprintf (dump, "Emitting stop before label %d\n",
|
6087 |
|
|
INSN_UID (last_label));
|
6088 |
|
|
emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
|
6089 |
|
|
insn = last_label;
|
6090 |
|
|
|
6091 |
|
|
init_insn_group_barriers ();
|
6092 |
|
|
last_label = 0;
|
6093 |
|
|
}
|
6094 |
|
|
}
|
6095 |
|
|
}
|
6096 |
|
|
}
|
6097 |
|
|
}
|
6098 |
|
|
|
6099 |
|
|
/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
|
6100 |
|
|
This function has to emit all necessary group barriers. */
|
6101 |
|
|
|
6102 |
|
|
static void
|
6103 |
|
|
emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
|
6104 |
|
|
{
|
6105 |
|
|
rtx insn;
|
6106 |
|
|
|
6107 |
|
|
init_insn_group_barriers ();
|
6108 |
|
|
|
6109 |
|
|
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
6110 |
|
|
{
|
6111 |
|
|
if (GET_CODE (insn) == BARRIER)
|
6112 |
|
|
{
|
6113 |
|
|
rtx last = prev_active_insn (insn);
|
6114 |
|
|
|
6115 |
|
|
if (! last)
|
6116 |
|
|
continue;
|
6117 |
|
|
if (GET_CODE (last) == JUMP_INSN
|
6118 |
|
|
&& GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
|
6119 |
|
|
last = prev_active_insn (last);
|
6120 |
|
|
if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
|
6121 |
|
|
emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
|
6122 |
|
|
|
6123 |
|
|
init_insn_group_barriers ();
|
6124 |
|
|
}
|
6125 |
|
|
else if (INSN_P (insn))
|
6126 |
|
|
{
|
6127 |
|
|
if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
|
6128 |
|
|
init_insn_group_barriers ();
|
6129 |
|
|
else if (group_barrier_needed (insn))
|
6130 |
|
|
{
|
6131 |
|
|
emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
|
6132 |
|
|
init_insn_group_barriers ();
|
6133 |
|
|
group_barrier_needed (insn);
|
6134 |
|
|
}
|
6135 |
|
|
}
|
6136 |
|
|
}
|
6137 |
|
|
}
|
6138 |
|
|
|
6139 |
|
|
|
6140 |
|
|
|
6141 |
|
|
/* Instruction scheduling support. */
|
6142 |
|
|
|
6143 |
|
|
#define NR_BUNDLES 10
|
6144 |
|
|
|
6145 |
|
|
/* A list of names of all available bundles. */
|
6146 |
|
|
|
6147 |
|
|
static const char *bundle_name [NR_BUNDLES] =
|
6148 |
|
|
{
|
6149 |
|
|
".mii",
|
6150 |
|
|
".mmi",
|
6151 |
|
|
".mfi",
|
6152 |
|
|
".mmf",
|
6153 |
|
|
#if NR_BUNDLES == 10
|
6154 |
|
|
".bbb",
|
6155 |
|
|
".mbb",
|
6156 |
|
|
#endif
|
6157 |
|
|
".mib",
|
6158 |
|
|
".mmb",
|
6159 |
|
|
".mfb",
|
6160 |
|
|
".mlx"
|
6161 |
|
|
};
|
6162 |
|
|
|
6163 |
|
|
/* Nonzero if we should insert stop bits into the schedule. */
|
6164 |
|
|
|
6165 |
|
|
int ia64_final_schedule = 0;
|
6166 |
|
|
|
6167 |
|
|
/* Codes of the corresponding queried units: */
|
6168 |
|
|
|
6169 |
|
|
static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
|
6170 |
|
|
static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
|
6171 |
|
|
|
6172 |
|
|
static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
|
6173 |
|
|
static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
|
6174 |
|
|
|
6175 |
|
|
static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
|
6176 |
|
|
|
6177 |
|
|
/* The following variable value is an insn group barrier. */
|
6178 |
|
|
|
6179 |
|
|
static rtx dfa_stop_insn;
|
6180 |
|
|
|
6181 |
|
|
/* The following variable value is the last issued insn. */
|
6182 |
|
|
|
6183 |
|
|
static rtx last_scheduled_insn;
|
6184 |
|
|
|
6185 |
|
|
/* The following variable value is size of the DFA state. */
|
6186 |
|
|
|
6187 |
|
|
static size_t dfa_state_size;
|
6188 |
|
|
|
6189 |
|
|
/* The following variable value is pointer to a DFA state used as
|
6190 |
|
|
temporary variable. */
|
6191 |
|
|
|
6192 |
|
|
static state_t temp_dfa_state = NULL;
|
6193 |
|
|
|
6194 |
|
|
/* The following variable value is DFA state after issuing the last
|
6195 |
|
|
insn. */
|
6196 |
|
|
|
6197 |
|
|
static state_t prev_cycle_state = NULL;
|
6198 |
|
|
|
6199 |
|
|
/* The following array element values are TRUE if the corresponding
|
6200 |
|
|
insn requires to add stop bits before it. */
|
6201 |
|
|
|
6202 |
|
|
static char *stops_p = NULL;
|
6203 |
|
|
|
6204 |
|
|
/* The following array element values are ZERO for non-speculative
|
6205 |
|
|
instructions and hold corresponding speculation check number for
|
6206 |
|
|
speculative instructions. */
|
6207 |
|
|
static int *spec_check_no = NULL;
|
6208 |
|
|
|
6209 |
|
|
/* Size of spec_check_no array. */
|
6210 |
|
|
static int max_uid = 0;
|
6211 |
|
|
|
6212 |
|
|
/* The following variable is used to set up the mentioned above array. */
|
6213 |
|
|
|
6214 |
|
|
static int stop_before_p = 0;
|
6215 |
|
|
|
6216 |
|
|
/* The following variable value is length of the arrays `clocks' and
|
6217 |
|
|
`add_cycles'. */
|
6218 |
|
|
|
6219 |
|
|
static int clocks_length;
|
6220 |
|
|
|
6221 |
|
|
/* The following array element values are cycles on which the
|
6222 |
|
|
corresponding insn will be issued. The array is used only for
|
6223 |
|
|
Itanium1. */
|
6224 |
|
|
|
6225 |
|
|
static int *clocks;
|
6226 |
|
|
|
6227 |
|
|
/* The following array element values are numbers of cycles should be
|
6228 |
|
|
added to improve insn scheduling for MM_insns for Itanium1. */
|
6229 |
|
|
|
6230 |
|
|
static int *add_cycles;
|
6231 |
|
|
|
6232 |
|
|
/* The following variable value is number of data speculations in progress. */
|
6233 |
|
|
static int pending_data_specs = 0;
|
6234 |
|
|
|
6235 |
|
|
static rtx ia64_single_set (rtx);
|
6236 |
|
|
static void ia64_emit_insn_before (rtx, rtx);
|
6237 |
|
|
|
6238 |
|
|
/* Map a bundle number to its pseudo-op. */
|
6239 |
|
|
|
6240 |
|
|
const char *
|
6241 |
|
|
get_bundle_name (int b)
|
6242 |
|
|
{
|
6243 |
|
|
return bundle_name[b];
|
6244 |
|
|
}
|
6245 |
|
|
|
6246 |
|
|
|
6247 |
|
|
/* Return the maximum number of instructions a cpu can issue. */
|
6248 |
|
|
|
6249 |
|
|
static int
|
6250 |
|
|
ia64_issue_rate (void)
|
6251 |
|
|
{
|
6252 |
|
|
return 6;
|
6253 |
|
|
}
|
6254 |
|
|
|
6255 |
|
|
/* Helper function - like single_set, but look inside COND_EXEC. */
|
6256 |
|
|
|
6257 |
|
|
static rtx
|
6258 |
|
|
ia64_single_set (rtx insn)
|
6259 |
|
|
{
|
6260 |
|
|
rtx x = PATTERN (insn), ret;
|
6261 |
|
|
if (GET_CODE (x) == COND_EXEC)
|
6262 |
|
|
x = COND_EXEC_CODE (x);
|
6263 |
|
|
if (GET_CODE (x) == SET)
|
6264 |
|
|
return x;
|
6265 |
|
|
|
6266 |
|
|
/* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
|
6267 |
|
|
Although they are not classical single set, the second set is there just
|
6268 |
|
|
to protect it from moving past FP-relative stack accesses. */
|
6269 |
|
|
switch (recog_memoized (insn))
|
6270 |
|
|
{
|
6271 |
|
|
case CODE_FOR_prologue_allocate_stack:
|
6272 |
|
|
case CODE_FOR_epilogue_deallocate_stack:
|
6273 |
|
|
ret = XVECEXP (x, 0, 0);
|
6274 |
|
|
break;
|
6275 |
|
|
|
6276 |
|
|
default:
|
6277 |
|
|
ret = single_set_2 (insn, x);
|
6278 |
|
|
break;
|
6279 |
|
|
}
|
6280 |
|
|
|
6281 |
|
|
return ret;
|
6282 |
|
|
}
|
6283 |
|
|
|
6284 |
|
|
/* Adjust the cost of a scheduling dependency.
|
6285 |
|
|
Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
|
6286 |
|
|
COST is the current cost. */
|
6287 |
|
|
|
6288 |
|
|
static int
|
6289 |
|
|
ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
|
6290 |
|
|
{
|
6291 |
|
|
enum reg_note dep_type = (enum reg_note) dep_type1;
|
6292 |
|
|
enum attr_itanium_class dep_class;
|
6293 |
|
|
enum attr_itanium_class insn_class;
|
6294 |
|
|
|
6295 |
|
|
if (dep_type != REG_DEP_OUTPUT)
|
6296 |
|
|
return cost;
|
6297 |
|
|
|
6298 |
|
|
insn_class = ia64_safe_itanium_class (insn);
|
6299 |
|
|
dep_class = ia64_safe_itanium_class (dep_insn);
|
6300 |
|
|
if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
|
6301 |
|
|
|| insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
|
6302 |
|
|
return 0;
|
6303 |
|
|
|
6304 |
|
|
return cost;
|
6305 |
|
|
}
|
6306 |
|
|
|
6307 |
|
|
/* Like emit_insn_before, but skip cycle_display notes.
|
6308 |
|
|
??? When cycle display notes are implemented, update this. */
|
6309 |
|
|
|
6310 |
|
|
static void
|
6311 |
|
|
ia64_emit_insn_before (rtx insn, rtx before)
|
6312 |
|
|
{
|
6313 |
|
|
emit_insn_before (insn, before);
|
6314 |
|
|
}
|
6315 |
|
|
|
6316 |
|
|
/* The following function marks insns who produce addresses for load
|
6317 |
|
|
and store insns. Such insns will be placed into M slots because it
|
6318 |
|
|
decrease latency time for Itanium1 (see function
|
6319 |
|
|
`ia64_produce_address_p' and the DFA descriptions). */
|
6320 |
|
|
|
6321 |
|
|
static void
|
6322 |
|
|
ia64_dependencies_evaluation_hook (rtx head, rtx tail)
|
6323 |
|
|
{
|
6324 |
|
|
rtx insn, link, next, next_tail;
|
6325 |
|
|
|
6326 |
|
|
/* Before reload, which_alternative is not set, which means that
|
6327 |
|
|
ia64_safe_itanium_class will produce wrong results for (at least)
|
6328 |
|
|
move instructions. */
|
6329 |
|
|
if (!reload_completed)
|
6330 |
|
|
return;
|
6331 |
|
|
|
6332 |
|
|
next_tail = NEXT_INSN (tail);
|
6333 |
|
|
for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
|
6334 |
|
|
if (INSN_P (insn))
|
6335 |
|
|
insn->call = 0;
|
6336 |
|
|
for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
|
6337 |
|
|
if (INSN_P (insn)
|
6338 |
|
|
&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
|
6339 |
|
|
{
|
6340 |
|
|
for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
|
6341 |
|
|
{
|
6342 |
|
|
enum attr_itanium_class c;
|
6343 |
|
|
|
6344 |
|
|
if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
|
6345 |
|
|
continue;
|
6346 |
|
|
next = XEXP (link, 0);
|
6347 |
|
|
c = ia64_safe_itanium_class (next);
|
6348 |
|
|
if ((c == ITANIUM_CLASS_ST
|
6349 |
|
|
|| c == ITANIUM_CLASS_STF)
|
6350 |
|
|
&& ia64_st_address_bypass_p (insn, next))
|
6351 |
|
|
break;
|
6352 |
|
|
else if ((c == ITANIUM_CLASS_LD
|
6353 |
|
|
|| c == ITANIUM_CLASS_FLD
|
6354 |
|
|
|| c == ITANIUM_CLASS_FLDP)
|
6355 |
|
|
&& ia64_ld_address_bypass_p (insn, next))
|
6356 |
|
|
break;
|
6357 |
|
|
}
|
6358 |
|
|
insn->call = link != 0;
|
6359 |
|
|
}
|
6360 |
|
|
}
|
6361 |
|
|
|
6362 |
|
|
/* We're beginning a new block. Initialize data structures as necessary. */
|
6363 |
|
|
|
6364 |
|
|
static void
|
6365 |
|
|
ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
|
6366 |
|
|
int sched_verbose ATTRIBUTE_UNUSED,
|
6367 |
|
|
int max_ready ATTRIBUTE_UNUSED)
|
6368 |
|
|
{
|
6369 |
|
|
#ifdef ENABLE_CHECKING
|
6370 |
|
|
rtx insn;
|
6371 |
|
|
|
6372 |
|
|
if (reload_completed)
|
6373 |
|
|
for (insn = NEXT_INSN (current_sched_info->prev_head);
|
6374 |
|
|
insn != current_sched_info->next_tail;
|
6375 |
|
|
insn = NEXT_INSN (insn))
|
6376 |
|
|
gcc_assert (!SCHED_GROUP_P (insn));
|
6377 |
|
|
#endif
|
6378 |
|
|
last_scheduled_insn = NULL_RTX;
|
6379 |
|
|
init_insn_group_barriers ();
|
6380 |
|
|
}
|
6381 |
|
|
|
6382 |
|
|
/* We're beginning a scheduling pass. Check assertion. */
|
6383 |
|
|
|
6384 |
|
|
static void
|
6385 |
|
|
ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
|
6386 |
|
|
int sched_verbose ATTRIBUTE_UNUSED,
|
6387 |
|
|
int max_ready ATTRIBUTE_UNUSED)
|
6388 |
|
|
{
|
6389 |
|
|
gcc_assert (!pending_data_specs);
|
6390 |
|
|
}
|
6391 |
|
|
|
6392 |
|
|
/* Scheduling pass is now finished. Free/reset static variable. */
|
6393 |
|
|
static void
|
6394 |
|
|
ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
|
6395 |
|
|
int sched_verbose ATTRIBUTE_UNUSED)
|
6396 |
|
|
{
|
6397 |
|
|
free (spec_check_no);
|
6398 |
|
|
spec_check_no = 0;
|
6399 |
|
|
max_uid = 0;
|
6400 |
|
|
}
|
6401 |
|
|
|
6402 |
|
|
/* We are about to being issuing insns for this clock cycle.
|
6403 |
|
|
Override the default sort algorithm to better slot instructions. */
|
6404 |
|
|
|
6405 |
|
|
static int
|
6406 |
|
|
ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
|
6407 |
|
|
int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
|
6408 |
|
|
int reorder_type)
|
6409 |
|
|
{
|
6410 |
|
|
int n_asms;
|
6411 |
|
|
int n_ready = *pn_ready;
|
6412 |
|
|
rtx *e_ready = ready + n_ready;
|
6413 |
|
|
rtx *insnp;
|
6414 |
|
|
|
6415 |
|
|
if (sched_verbose)
|
6416 |
|
|
fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
|
6417 |
|
|
|
6418 |
|
|
if (reorder_type == 0)
|
6419 |
|
|
{
|
6420 |
|
|
/* First, move all USEs, CLOBBERs and other crud out of the way. */
|
6421 |
|
|
n_asms = 0;
|
6422 |
|
|
for (insnp = ready; insnp < e_ready; insnp++)
|
6423 |
|
|
if (insnp < e_ready)
|
6424 |
|
|
{
|
6425 |
|
|
rtx insn = *insnp;
|
6426 |
|
|
enum attr_type t = ia64_safe_type (insn);
|
6427 |
|
|
if (t == TYPE_UNKNOWN)
|
6428 |
|
|
{
|
6429 |
|
|
if (GET_CODE (PATTERN (insn)) == ASM_INPUT
|
6430 |
|
|
|| asm_noperands (PATTERN (insn)) >= 0)
|
6431 |
|
|
{
|
6432 |
|
|
rtx lowest = ready[n_asms];
|
6433 |
|
|
ready[n_asms] = insn;
|
6434 |
|
|
*insnp = lowest;
|
6435 |
|
|
n_asms++;
|
6436 |
|
|
}
|
6437 |
|
|
else
|
6438 |
|
|
{
|
6439 |
|
|
rtx highest = ready[n_ready - 1];
|
6440 |
|
|
ready[n_ready - 1] = insn;
|
6441 |
|
|
*insnp = highest;
|
6442 |
|
|
return 1;
|
6443 |
|
|
}
|
6444 |
|
|
}
|
6445 |
|
|
}
|
6446 |
|
|
|
6447 |
|
|
if (n_asms < n_ready)
|
6448 |
|
|
{
|
6449 |
|
|
/* Some normal insns to process. Skip the asms. */
|
6450 |
|
|
ready += n_asms;
|
6451 |
|
|
n_ready -= n_asms;
|
6452 |
|
|
}
|
6453 |
|
|
else if (n_ready > 0)
|
6454 |
|
|
return 1;
|
6455 |
|
|
}
|
6456 |
|
|
|
6457 |
|
|
if (ia64_final_schedule)
|
6458 |
|
|
{
|
6459 |
|
|
int deleted = 0;
|
6460 |
|
|
int nr_need_stop = 0;
|
6461 |
|
|
|
6462 |
|
|
for (insnp = ready; insnp < e_ready; insnp++)
|
6463 |
|
|
if (safe_group_barrier_needed (*insnp))
|
6464 |
|
|
nr_need_stop++;
|
6465 |
|
|
|
6466 |
|
|
if (reorder_type == 1 && n_ready == nr_need_stop)
|
6467 |
|
|
return 0;
|
6468 |
|
|
if (reorder_type == 0)
|
6469 |
|
|
return 1;
|
6470 |
|
|
insnp = e_ready;
|
6471 |
|
|
/* Move down everything that needs a stop bit, preserving
|
6472 |
|
|
relative order. */
|
6473 |
|
|
while (insnp-- > ready + deleted)
|
6474 |
|
|
while (insnp >= ready + deleted)
|
6475 |
|
|
{
|
6476 |
|
|
rtx insn = *insnp;
|
6477 |
|
|
if (! safe_group_barrier_needed (insn))
|
6478 |
|
|
break;
|
6479 |
|
|
memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
|
6480 |
|
|
*ready = insn;
|
6481 |
|
|
deleted++;
|
6482 |
|
|
}
|
6483 |
|
|
n_ready -= deleted;
|
6484 |
|
|
ready += deleted;
|
6485 |
|
|
}
|
6486 |
|
|
|
6487 |
|
|
return 1;
|
6488 |
|
|
}
|
6489 |
|
|
|
6490 |
|
|
/* We are about to being issuing insns for this clock cycle. Override
|
6491 |
|
|
the default sort algorithm to better slot instructions. */
|
6492 |
|
|
|
6493 |
|
|
static int
|
6494 |
|
|
ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
|
6495 |
|
|
int clock_var)
|
6496 |
|
|
{
|
6497 |
|
|
return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
|
6498 |
|
|
pn_ready, clock_var, 0);
|
6499 |
|
|
}
|
6500 |
|
|
|
6501 |
|
|
/* Like ia64_sched_reorder, but called after issuing each insn.
|
6502 |
|
|
Override the default sort algorithm to better slot instructions. */
|
6503 |
|
|
|
6504 |
|
|
static int
|
6505 |
|
|
ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
|
6506 |
|
|
int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
|
6507 |
|
|
int *pn_ready, int clock_var)
|
6508 |
|
|
{
|
6509 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
|
6510 |
|
|
clocks [INSN_UID (last_scheduled_insn)] = clock_var;
|
6511 |
|
|
return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
|
6512 |
|
|
clock_var, 1);
|
6513 |
|
|
}
|
6514 |
|
|
|
6515 |
|
|
/* We are about to issue INSN. Return the number of insns left on the
|
6516 |
|
|
ready queue that can be issued this cycle. */
|
6517 |
|
|
|
6518 |
|
|
static int
|
6519 |
|
|
ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
|
6520 |
|
|
int sched_verbose ATTRIBUTE_UNUSED,
|
6521 |
|
|
rtx insn ATTRIBUTE_UNUSED,
|
6522 |
|
|
int can_issue_more ATTRIBUTE_UNUSED)
|
6523 |
|
|
{
|
6524 |
|
|
if (current_sched_info->flags & DO_SPECULATION)
|
6525 |
|
|
/* Modulo scheduling does not extend h_i_d when emitting
|
6526 |
|
|
new instructions. Deal with it. */
|
6527 |
|
|
{
|
6528 |
|
|
if (DONE_SPEC (insn) & BEGIN_DATA)
|
6529 |
|
|
pending_data_specs++;
|
6530 |
|
|
if (CHECK_SPEC (insn) & BEGIN_DATA)
|
6531 |
|
|
pending_data_specs--;
|
6532 |
|
|
}
|
6533 |
|
|
|
6534 |
|
|
last_scheduled_insn = insn;
|
6535 |
|
|
memcpy (prev_cycle_state, curr_state, dfa_state_size);
|
6536 |
|
|
if (reload_completed)
|
6537 |
|
|
{
|
6538 |
|
|
int needed = group_barrier_needed (insn);
|
6539 |
|
|
|
6540 |
|
|
gcc_assert (!needed);
|
6541 |
|
|
if (GET_CODE (insn) == CALL_INSN)
|
6542 |
|
|
init_insn_group_barriers ();
|
6543 |
|
|
stops_p [INSN_UID (insn)] = stop_before_p;
|
6544 |
|
|
stop_before_p = 0;
|
6545 |
|
|
}
|
6546 |
|
|
return 1;
|
6547 |
|
|
}
|
6548 |
|
|
|
6549 |
|
|
/* We are choosing insn from the ready queue. Return nonzero if INSN
|
6550 |
|
|
can be chosen. */
|
6551 |
|
|
|
6552 |
|
|
static int
|
6553 |
|
|
ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
|
6554 |
|
|
{
|
6555 |
|
|
gcc_assert (insn && INSN_P (insn));
|
6556 |
|
|
return ((!reload_completed
|
6557 |
|
|
|| !safe_group_barrier_needed (insn))
|
6558 |
|
|
&& ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
|
6559 |
|
|
}
|
6560 |
|
|
|
6561 |
|
|
/* We are choosing insn from the ready queue. Return nonzero if INSN
|
6562 |
|
|
can be chosen. */
|
6563 |
|
|
|
6564 |
|
|
static bool
|
6565 |
|
|
ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
|
6566 |
|
|
{
|
6567 |
|
|
gcc_assert (insn && INSN_P (insn));
|
6568 |
|
|
/* Size of ALAT is 32. As far as we perform conservative data speculation,
|
6569 |
|
|
we keep ALAT half-empty. */
|
6570 |
|
|
return (pending_data_specs < 16
|
6571 |
|
|
|| !(TODO_SPEC (insn) & BEGIN_DATA));
|
6572 |
|
|
}
|
6573 |
|
|
|
6574 |
|
|
/* The following variable value is pseudo-insn used by the DFA insn
|
6575 |
|
|
scheduler to change the DFA state when the simulated clock is
|
6576 |
|
|
increased. */
|
6577 |
|
|
|
6578 |
|
|
static rtx dfa_pre_cycle_insn;
|
6579 |
|
|
|
6580 |
|
|
/* We are about to being issuing INSN. Return nonzero if we cannot
|
6581 |
|
|
issue it on given cycle CLOCK and return zero if we should not sort
|
6582 |
|
|
the ready queue on the next clock start. */
|
6583 |
|
|
|
6584 |
|
|
static int
|
6585 |
|
|
ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
|
6586 |
|
|
int clock, int *sort_p)
|
6587 |
|
|
{
|
6588 |
|
|
int setup_clocks_p = FALSE;
|
6589 |
|
|
|
6590 |
|
|
gcc_assert (insn && INSN_P (insn));
|
6591 |
|
|
if ((reload_completed && safe_group_barrier_needed (insn))
|
6592 |
|
|
|| (last_scheduled_insn
|
6593 |
|
|
&& (GET_CODE (last_scheduled_insn) == CALL_INSN
|
6594 |
|
|
|| GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
|
6595 |
|
|
|| asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
|
6596 |
|
|
{
|
6597 |
|
|
init_insn_group_barriers ();
|
6598 |
|
|
if (verbose && dump)
|
6599 |
|
|
fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
|
6600 |
|
|
last_clock == clock ? " + cycle advance" : "");
|
6601 |
|
|
stop_before_p = 1;
|
6602 |
|
|
if (last_clock == clock)
|
6603 |
|
|
{
|
6604 |
|
|
state_transition (curr_state, dfa_stop_insn);
|
6605 |
|
|
if (TARGET_EARLY_STOP_BITS)
|
6606 |
|
|
*sort_p = (last_scheduled_insn == NULL_RTX
|
6607 |
|
|
|| GET_CODE (last_scheduled_insn) != CALL_INSN);
|
6608 |
|
|
else
|
6609 |
|
|
*sort_p = 0;
|
6610 |
|
|
return 1;
|
6611 |
|
|
}
|
6612 |
|
|
else if (reload_completed)
|
6613 |
|
|
setup_clocks_p = TRUE;
|
6614 |
|
|
if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
|
6615 |
|
|
|| asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
|
6616 |
|
|
state_reset (curr_state);
|
6617 |
|
|
else
|
6618 |
|
|
{
|
6619 |
|
|
memcpy (curr_state, prev_cycle_state, dfa_state_size);
|
6620 |
|
|
state_transition (curr_state, dfa_stop_insn);
|
6621 |
|
|
state_transition (curr_state, dfa_pre_cycle_insn);
|
6622 |
|
|
state_transition (curr_state, NULL);
|
6623 |
|
|
}
|
6624 |
|
|
}
|
6625 |
|
|
else if (reload_completed)
|
6626 |
|
|
setup_clocks_p = TRUE;
|
6627 |
|
|
if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
|
6628 |
|
|
&& GET_CODE (PATTERN (insn)) != ASM_INPUT
|
6629 |
|
|
&& asm_noperands (PATTERN (insn)) < 0)
|
6630 |
|
|
{
|
6631 |
|
|
enum attr_itanium_class c = ia64_safe_itanium_class (insn);
|
6632 |
|
|
|
6633 |
|
|
if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
|
6634 |
|
|
{
|
6635 |
|
|
rtx link;
|
6636 |
|
|
int d = -1;
|
6637 |
|
|
|
6638 |
|
|
for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
|
6639 |
|
|
if (REG_NOTE_KIND (link) == 0)
|
6640 |
|
|
{
|
6641 |
|
|
enum attr_itanium_class dep_class;
|
6642 |
|
|
rtx dep_insn = XEXP (link, 0);
|
6643 |
|
|
|
6644 |
|
|
dep_class = ia64_safe_itanium_class (dep_insn);
|
6645 |
|
|
if ((dep_class == ITANIUM_CLASS_MMMUL
|
6646 |
|
|
|| dep_class == ITANIUM_CLASS_MMSHF)
|
6647 |
|
|
&& last_clock - clocks [INSN_UID (dep_insn)] < 4
|
6648 |
|
|
&& (d < 0
|
6649 |
|
|
|| last_clock - clocks [INSN_UID (dep_insn)] < d))
|
6650 |
|
|
d = last_clock - clocks [INSN_UID (dep_insn)];
|
6651 |
|
|
}
|
6652 |
|
|
if (d >= 0)
|
6653 |
|
|
add_cycles [INSN_UID (insn)] = 3 - d;
|
6654 |
|
|
}
|
6655 |
|
|
}
|
6656 |
|
|
return 0;
|
6657 |
|
|
}
|
6658 |
|
|
|
6659 |
|
|
/* Implement targetm.sched.h_i_d_extended hook.
|
6660 |
|
|
Extend internal data structures. */
|
6661 |
|
|
static void
|
6662 |
|
|
ia64_h_i_d_extended (void)
|
6663 |
|
|
{
|
6664 |
|
|
if (current_sched_info->flags & DO_SPECULATION)
|
6665 |
|
|
{
|
6666 |
|
|
int new_max_uid = get_max_uid () + 1;
|
6667 |
|
|
|
6668 |
|
|
spec_check_no = xrecalloc (spec_check_no, new_max_uid,
|
6669 |
|
|
max_uid, sizeof (*spec_check_no));
|
6670 |
|
|
max_uid = new_max_uid;
|
6671 |
|
|
}
|
6672 |
|
|
|
6673 |
|
|
if (stops_p != NULL)
|
6674 |
|
|
{
|
6675 |
|
|
int new_clocks_length = get_max_uid () + 1;
|
6676 |
|
|
|
6677 |
|
|
stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
|
6678 |
|
|
|
6679 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM)
|
6680 |
|
|
{
|
6681 |
|
|
clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
|
6682 |
|
|
sizeof (int));
|
6683 |
|
|
add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
|
6684 |
|
|
sizeof (int));
|
6685 |
|
|
}
|
6686 |
|
|
|
6687 |
|
|
clocks_length = new_clocks_length;
|
6688 |
|
|
}
|
6689 |
|
|
}
|
6690 |
|
|
|
6691 |
|
|
/* Constants that help mapping 'enum machine_mode' to int. */
|
6692 |
|
|
enum SPEC_MODES
|
6693 |
|
|
{
|
6694 |
|
|
SPEC_MODE_INVALID = -1,
|
6695 |
|
|
SPEC_MODE_FIRST = 0,
|
6696 |
|
|
SPEC_MODE_FOR_EXTEND_FIRST = 1,
|
6697 |
|
|
SPEC_MODE_FOR_EXTEND_LAST = 3,
|
6698 |
|
|
SPEC_MODE_LAST = 8
|
6699 |
|
|
};
|
6700 |
|
|
|
6701 |
|
|
/* Return index of the MODE. */
|
6702 |
|
|
static int
|
6703 |
|
|
ia64_mode_to_int (enum machine_mode mode)
|
6704 |
|
|
{
|
6705 |
|
|
switch (mode)
|
6706 |
|
|
{
|
6707 |
|
|
case BImode: return 0; /* SPEC_MODE_FIRST */
|
6708 |
|
|
case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
|
6709 |
|
|
case HImode: return 2;
|
6710 |
|
|
case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
|
6711 |
|
|
case DImode: return 4;
|
6712 |
|
|
case SFmode: return 5;
|
6713 |
|
|
case DFmode: return 6;
|
6714 |
|
|
case XFmode: return 7;
|
6715 |
|
|
case TImode:
|
6716 |
|
|
/* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
|
6717 |
|
|
mentioned in itanium[12].md. Predicate fp_register_operand also
|
6718 |
|
|
needs to be defined. Bottom line: better disable for now. */
|
6719 |
|
|
return SPEC_MODE_INVALID;
|
6720 |
|
|
default: return SPEC_MODE_INVALID;
|
6721 |
|
|
}
|
6722 |
|
|
}
|
6723 |
|
|
|
6724 |
|
|
/* Provide information about speculation capabilities. */
|
6725 |
|
|
static void
|
6726 |
|
|
ia64_set_sched_flags (spec_info_t spec_info)
|
6727 |
|
|
{
|
6728 |
|
|
unsigned int *flags = &(current_sched_info->flags);
|
6729 |
|
|
|
6730 |
|
|
if (*flags & SCHED_RGN
|
6731 |
|
|
|| *flags & SCHED_EBB)
|
6732 |
|
|
{
|
6733 |
|
|
int mask = 0;
|
6734 |
|
|
|
6735 |
|
|
if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
|
6736 |
|
|
|| (mflag_sched_ar_data_spec && reload_completed))
|
6737 |
|
|
{
|
6738 |
|
|
mask |= BEGIN_DATA;
|
6739 |
|
|
|
6740 |
|
|
if ((mflag_sched_br_in_data_spec && !reload_completed)
|
6741 |
|
|
|| (mflag_sched_ar_in_data_spec && reload_completed))
|
6742 |
|
|
mask |= BE_IN_DATA;
|
6743 |
|
|
}
|
6744 |
|
|
|
6745 |
|
|
if (mflag_sched_control_spec)
|
6746 |
|
|
{
|
6747 |
|
|
mask |= BEGIN_CONTROL;
|
6748 |
|
|
|
6749 |
|
|
if (mflag_sched_in_control_spec)
|
6750 |
|
|
mask |= BE_IN_CONTROL;
|
6751 |
|
|
}
|
6752 |
|
|
|
6753 |
|
|
gcc_assert (*flags & USE_GLAT);
|
6754 |
|
|
|
6755 |
|
|
if (mask)
|
6756 |
|
|
{
|
6757 |
|
|
*flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
|
6758 |
|
|
|
6759 |
|
|
spec_info->mask = mask;
|
6760 |
|
|
spec_info->flags = 0;
|
6761 |
|
|
|
6762 |
|
|
if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
|
6763 |
|
|
spec_info->flags |= PREFER_NON_DATA_SPEC;
|
6764 |
|
|
|
6765 |
|
|
if ((mask & CONTROL_SPEC)
|
6766 |
|
|
&& mflag_sched_prefer_non_control_spec_insns)
|
6767 |
|
|
spec_info->flags |= PREFER_NON_CONTROL_SPEC;
|
6768 |
|
|
|
6769 |
|
|
if (mflag_sched_spec_verbose)
|
6770 |
|
|
{
|
6771 |
|
|
if (sched_verbose >= 1)
|
6772 |
|
|
spec_info->dump = sched_dump;
|
6773 |
|
|
else
|
6774 |
|
|
spec_info->dump = stderr;
|
6775 |
|
|
}
|
6776 |
|
|
else
|
6777 |
|
|
spec_info->dump = 0;
|
6778 |
|
|
|
6779 |
|
|
if (mflag_sched_count_spec_in_critical_path)
|
6780 |
|
|
spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
|
6781 |
|
|
}
|
6782 |
|
|
}
|
6783 |
|
|
}
|
6784 |
|
|
|
6785 |
|
|
/* Implement targetm.sched.speculate_insn hook.
|
6786 |
|
|
Check if the INSN can be TS speculative.
|
6787 |
|
|
If 'no' - return -1.
|
6788 |
|
|
If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
|
6789 |
|
|
If current pattern of the INSN already provides TS speculation, return 0. */
|
6790 |
|
|
static int
|
6791 |
|
|
ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
|
6792 |
|
|
{
|
6793 |
|
|
rtx pat, reg, mem, mem_reg;
|
6794 |
|
|
int mode_no, gen_p = 1;
|
6795 |
|
|
bool extend_p;
|
6796 |
|
|
|
6797 |
|
|
gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
|
6798 |
|
|
|
6799 |
|
|
pat = PATTERN (insn);
|
6800 |
|
|
|
6801 |
|
|
if (GET_CODE (pat) == COND_EXEC)
|
6802 |
|
|
pat = COND_EXEC_CODE (pat);
|
6803 |
|
|
|
6804 |
|
|
/* This should be a SET ... */
|
6805 |
|
|
if (GET_CODE (pat) != SET)
|
6806 |
|
|
return -1;
|
6807 |
|
|
|
6808 |
|
|
reg = SET_DEST (pat);
|
6809 |
|
|
/* ... to the general/fp register ... */
|
6810 |
|
|
if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
|
6811 |
|
|
return -1;
|
6812 |
|
|
|
6813 |
|
|
/* ... from the mem ... */
|
6814 |
|
|
mem = SET_SRC (pat);
|
6815 |
|
|
|
6816 |
|
|
/* ... that can, possibly, be a zero_extend ... */
|
6817 |
|
|
if (GET_CODE (mem) == ZERO_EXTEND)
|
6818 |
|
|
{
|
6819 |
|
|
mem = XEXP (mem, 0);
|
6820 |
|
|
extend_p = true;
|
6821 |
|
|
}
|
6822 |
|
|
else
|
6823 |
|
|
extend_p = false;
|
6824 |
|
|
|
6825 |
|
|
/* ... or a speculative load. */
|
6826 |
|
|
if (GET_CODE (mem) == UNSPEC)
|
6827 |
|
|
{
|
6828 |
|
|
int code;
|
6829 |
|
|
|
6830 |
|
|
code = XINT (mem, 1);
|
6831 |
|
|
if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
|
6832 |
|
|
return -1;
|
6833 |
|
|
|
6834 |
|
|
if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
|
6835 |
|
|
|| (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
|
6836 |
|
|
|| code == UNSPEC_LDSA)
|
6837 |
|
|
gen_p = 0;
|
6838 |
|
|
|
6839 |
|
|
mem = XVECEXP (mem, 0, 0);
|
6840 |
|
|
gcc_assert (MEM_P (mem));
|
6841 |
|
|
}
|
6842 |
|
|
|
6843 |
|
|
/* Source should be a mem ... */
|
6844 |
|
|
if (!MEM_P (mem))
|
6845 |
|
|
return -1;
|
6846 |
|
|
|
6847 |
|
|
/* ... addressed by a register. */
|
6848 |
|
|
mem_reg = XEXP (mem, 0);
|
6849 |
|
|
if (!REG_P (mem_reg))
|
6850 |
|
|
return -1;
|
6851 |
|
|
|
6852 |
|
|
/* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
|
6853 |
|
|
will always be DImode. */
|
6854 |
|
|
mode_no = ia64_mode_to_int (GET_MODE (mem));
|
6855 |
|
|
|
6856 |
|
|
if (mode_no == SPEC_MODE_INVALID
|
6857 |
|
|
|| (extend_p
|
6858 |
|
|
&& !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
|
6859 |
|
|
&& mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
|
6860 |
|
|
return -1;
|
6861 |
|
|
|
6862 |
|
|
extract_insn_cached (insn);
|
6863 |
|
|
gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
|
6864 |
|
|
|
6865 |
|
|
*new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
|
6866 |
|
|
|
6867 |
|
|
return gen_p;
|
6868 |
|
|
}
|
6869 |
|
|
|
6870 |
|
|
enum
|
6871 |
|
|
{
|
6872 |
|
|
/* Offset to reach ZERO_EXTEND patterns. */
|
6873 |
|
|
SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
|
6874 |
|
|
/* Number of patterns for each speculation mode. */
|
6875 |
|
|
SPEC_N = (SPEC_MODE_LAST
|
6876 |
|
|
+ SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
|
6877 |
|
|
};
|
6878 |
|
|
|
6879 |
|
|
enum SPEC_GEN_LD_MAP
|
6880 |
|
|
{
|
6881 |
|
|
/* Offset to ld.a patterns. */
|
6882 |
|
|
SPEC_GEN_A = 0 * SPEC_N,
|
6883 |
|
|
/* Offset to ld.s patterns. */
|
6884 |
|
|
SPEC_GEN_S = 1 * SPEC_N,
|
6885 |
|
|
/* Offset to ld.sa patterns. */
|
6886 |
|
|
SPEC_GEN_SA = 2 * SPEC_N,
|
6887 |
|
|
/* Offset to ld.sa patterns. For this patterns corresponding ld.c will
|
6888 |
|
|
mutate to chk.s. */
|
6889 |
|
|
SPEC_GEN_SA_FOR_S = 3 * SPEC_N
|
6890 |
|
|
};
|
6891 |
|
|
|
6892 |
|
|
/* These offsets are used to get (4 * SPEC_N). */
|
6893 |
|
|
enum SPEC_GEN_CHECK_OFFSET
|
6894 |
|
|
{
|
6895 |
|
|
SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
|
6896 |
|
|
SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
|
6897 |
|
|
};
|
6898 |
|
|
|
6899 |
|
|
/* If GEN_P is true, calculate the index of needed speculation check and return
|
6900 |
|
|
speculative pattern for INSN with speculative mode TS, machine mode
|
6901 |
|
|
MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
|
6902 |
|
|
If GEN_P is false, just calculate the index of needed speculation check. */
|
6903 |
|
|
static rtx
|
6904 |
|
|
ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
|
6905 |
|
|
{
|
6906 |
|
|
rtx pat, new_pat;
|
6907 |
|
|
int load_no;
|
6908 |
|
|
int shift = 0;
|
6909 |
|
|
|
6910 |
|
|
static rtx (* const gen_load[]) (rtx, rtx) = {
|
6911 |
|
|
gen_movbi_advanced,
|
6912 |
|
|
gen_movqi_advanced,
|
6913 |
|
|
gen_movhi_advanced,
|
6914 |
|
|
gen_movsi_advanced,
|
6915 |
|
|
gen_movdi_advanced,
|
6916 |
|
|
gen_movsf_advanced,
|
6917 |
|
|
gen_movdf_advanced,
|
6918 |
|
|
gen_movxf_advanced,
|
6919 |
|
|
gen_movti_advanced,
|
6920 |
|
|
gen_zero_extendqidi2_advanced,
|
6921 |
|
|
gen_zero_extendhidi2_advanced,
|
6922 |
|
|
gen_zero_extendsidi2_advanced,
|
6923 |
|
|
|
6924 |
|
|
gen_movbi_speculative,
|
6925 |
|
|
gen_movqi_speculative,
|
6926 |
|
|
gen_movhi_speculative,
|
6927 |
|
|
gen_movsi_speculative,
|
6928 |
|
|
gen_movdi_speculative,
|
6929 |
|
|
gen_movsf_speculative,
|
6930 |
|
|
gen_movdf_speculative,
|
6931 |
|
|
gen_movxf_speculative,
|
6932 |
|
|
gen_movti_speculative,
|
6933 |
|
|
gen_zero_extendqidi2_speculative,
|
6934 |
|
|
gen_zero_extendhidi2_speculative,
|
6935 |
|
|
gen_zero_extendsidi2_speculative,
|
6936 |
|
|
|
6937 |
|
|
gen_movbi_speculative_advanced,
|
6938 |
|
|
gen_movqi_speculative_advanced,
|
6939 |
|
|
gen_movhi_speculative_advanced,
|
6940 |
|
|
gen_movsi_speculative_advanced,
|
6941 |
|
|
gen_movdi_speculative_advanced,
|
6942 |
|
|
gen_movsf_speculative_advanced,
|
6943 |
|
|
gen_movdf_speculative_advanced,
|
6944 |
|
|
gen_movxf_speculative_advanced,
|
6945 |
|
|
gen_movti_speculative_advanced,
|
6946 |
|
|
gen_zero_extendqidi2_speculative_advanced,
|
6947 |
|
|
gen_zero_extendhidi2_speculative_advanced,
|
6948 |
|
|
gen_zero_extendsidi2_speculative_advanced,
|
6949 |
|
|
|
6950 |
|
|
gen_movbi_speculative_advanced,
|
6951 |
|
|
gen_movqi_speculative_advanced,
|
6952 |
|
|
gen_movhi_speculative_advanced,
|
6953 |
|
|
gen_movsi_speculative_advanced,
|
6954 |
|
|
gen_movdi_speculative_advanced,
|
6955 |
|
|
gen_movsf_speculative_advanced,
|
6956 |
|
|
gen_movdf_speculative_advanced,
|
6957 |
|
|
gen_movxf_speculative_advanced,
|
6958 |
|
|
gen_movti_speculative_advanced,
|
6959 |
|
|
gen_zero_extendqidi2_speculative_advanced,
|
6960 |
|
|
gen_zero_extendhidi2_speculative_advanced,
|
6961 |
|
|
gen_zero_extendsidi2_speculative_advanced
|
6962 |
|
|
};
|
6963 |
|
|
|
6964 |
|
|
load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
|
6965 |
|
|
|
6966 |
|
|
if (ts & BEGIN_DATA)
|
6967 |
|
|
{
|
6968 |
|
|
/* We don't need recovery because even if this is ld.sa
|
6969 |
|
|
ALAT entry will be allocated only if NAT bit is set to zero.
|
6970 |
|
|
So it is enough to use ld.c here. */
|
6971 |
|
|
|
6972 |
|
|
if (ts & BEGIN_CONTROL)
|
6973 |
|
|
{
|
6974 |
|
|
load_no += SPEC_GEN_SA;
|
6975 |
|
|
|
6976 |
|
|
if (!mflag_sched_ldc)
|
6977 |
|
|
shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
|
6978 |
|
|
}
|
6979 |
|
|
else
|
6980 |
|
|
{
|
6981 |
|
|
load_no += SPEC_GEN_A;
|
6982 |
|
|
|
6983 |
|
|
if (!mflag_sched_ldc)
|
6984 |
|
|
shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
|
6985 |
|
|
}
|
6986 |
|
|
}
|
6987 |
|
|
else if (ts & BEGIN_CONTROL)
|
6988 |
|
|
{
|
6989 |
|
|
/* ld.sa can be used instead of ld.s to avoid basic block splitting. */
|
6990 |
|
|
if (!mflag_control_ldc)
|
6991 |
|
|
load_no += SPEC_GEN_S;
|
6992 |
|
|
else
|
6993 |
|
|
{
|
6994 |
|
|
gcc_assert (mflag_sched_ldc);
|
6995 |
|
|
load_no += SPEC_GEN_SA_FOR_S;
|
6996 |
|
|
}
|
6997 |
|
|
}
|
6998 |
|
|
else
|
6999 |
|
|
gcc_unreachable ();
|
7000 |
|
|
|
7001 |
|
|
/* Set the desired check index. We add '1', because zero element in this
|
7002 |
|
|
array means, that instruction with such uid is non-speculative. */
|
7003 |
|
|
spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
|
7004 |
|
|
|
7005 |
|
|
if (!gen_p)
|
7006 |
|
|
return 0;
|
7007 |
|
|
|
7008 |
|
|
new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
|
7009 |
|
|
copy_rtx (recog_data.operand[1]));
|
7010 |
|
|
|
7011 |
|
|
pat = PATTERN (insn);
|
7012 |
|
|
if (GET_CODE (pat) == COND_EXEC)
|
7013 |
|
|
new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
|
7014 |
|
|
(COND_EXEC_TEST (pat)), new_pat);
|
7015 |
|
|
|
7016 |
|
|
return new_pat;
|
7017 |
|
|
}
|
7018 |
|
|
|
7019 |
|
|
/* Offset to branchy checks. */
|
7020 |
|
|
enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
|
7021 |
|
|
|
7022 |
|
|
/* Return nonzero, if INSN needs branchy recovery check. */
|
7023 |
|
|
static bool
|
7024 |
|
|
ia64_needs_block_p (rtx insn)
|
7025 |
|
|
{
|
7026 |
|
|
int check_no;
|
7027 |
|
|
|
7028 |
|
|
check_no = spec_check_no[INSN_UID(insn)] - 1;
|
7029 |
|
|
gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
|
7030 |
|
|
|
7031 |
|
|
return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
|
7032 |
|
|
|| (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
|
7033 |
|
|
}
|
7034 |
|
|
|
7035 |
|
|
/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
|
7036 |
|
|
If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
|
7037 |
|
|
Otherwise, generate a simple check. */
|
7038 |
|
|
static rtx
|
7039 |
|
|
ia64_gen_check (rtx insn, rtx label, bool mutate_p)
|
7040 |
|
|
{
|
7041 |
|
|
rtx op1, pat, check_pat;
|
7042 |
|
|
|
7043 |
|
|
static rtx (* const gen_check[]) (rtx, rtx) = {
|
7044 |
|
|
gen_movbi_clr,
|
7045 |
|
|
gen_movqi_clr,
|
7046 |
|
|
gen_movhi_clr,
|
7047 |
|
|
gen_movsi_clr,
|
7048 |
|
|
gen_movdi_clr,
|
7049 |
|
|
gen_movsf_clr,
|
7050 |
|
|
gen_movdf_clr,
|
7051 |
|
|
gen_movxf_clr,
|
7052 |
|
|
gen_movti_clr,
|
7053 |
|
|
gen_zero_extendqidi2_clr,
|
7054 |
|
|
gen_zero_extendhidi2_clr,
|
7055 |
|
|
gen_zero_extendsidi2_clr,
|
7056 |
|
|
|
7057 |
|
|
gen_speculation_check_bi,
|
7058 |
|
|
gen_speculation_check_qi,
|
7059 |
|
|
gen_speculation_check_hi,
|
7060 |
|
|
gen_speculation_check_si,
|
7061 |
|
|
gen_speculation_check_di,
|
7062 |
|
|
gen_speculation_check_sf,
|
7063 |
|
|
gen_speculation_check_df,
|
7064 |
|
|
gen_speculation_check_xf,
|
7065 |
|
|
gen_speculation_check_ti,
|
7066 |
|
|
gen_speculation_check_di,
|
7067 |
|
|
gen_speculation_check_di,
|
7068 |
|
|
gen_speculation_check_di,
|
7069 |
|
|
|
7070 |
|
|
gen_movbi_clr,
|
7071 |
|
|
gen_movqi_clr,
|
7072 |
|
|
gen_movhi_clr,
|
7073 |
|
|
gen_movsi_clr,
|
7074 |
|
|
gen_movdi_clr,
|
7075 |
|
|
gen_movsf_clr,
|
7076 |
|
|
gen_movdf_clr,
|
7077 |
|
|
gen_movxf_clr,
|
7078 |
|
|
gen_movti_clr,
|
7079 |
|
|
gen_zero_extendqidi2_clr,
|
7080 |
|
|
gen_zero_extendhidi2_clr,
|
7081 |
|
|
gen_zero_extendsidi2_clr,
|
7082 |
|
|
|
7083 |
|
|
gen_movbi_clr,
|
7084 |
|
|
gen_movqi_clr,
|
7085 |
|
|
gen_movhi_clr,
|
7086 |
|
|
gen_movsi_clr,
|
7087 |
|
|
gen_movdi_clr,
|
7088 |
|
|
gen_movsf_clr,
|
7089 |
|
|
gen_movdf_clr,
|
7090 |
|
|
gen_movxf_clr,
|
7091 |
|
|
gen_movti_clr,
|
7092 |
|
|
gen_zero_extendqidi2_clr,
|
7093 |
|
|
gen_zero_extendhidi2_clr,
|
7094 |
|
|
gen_zero_extendsidi2_clr,
|
7095 |
|
|
|
7096 |
|
|
gen_advanced_load_check_clr_bi,
|
7097 |
|
|
gen_advanced_load_check_clr_qi,
|
7098 |
|
|
gen_advanced_load_check_clr_hi,
|
7099 |
|
|
gen_advanced_load_check_clr_si,
|
7100 |
|
|
gen_advanced_load_check_clr_di,
|
7101 |
|
|
gen_advanced_load_check_clr_sf,
|
7102 |
|
|
gen_advanced_load_check_clr_df,
|
7103 |
|
|
gen_advanced_load_check_clr_xf,
|
7104 |
|
|
gen_advanced_load_check_clr_ti,
|
7105 |
|
|
gen_advanced_load_check_clr_di,
|
7106 |
|
|
gen_advanced_load_check_clr_di,
|
7107 |
|
|
gen_advanced_load_check_clr_di,
|
7108 |
|
|
|
7109 |
|
|
/* Following checks are generated during mutation. */
|
7110 |
|
|
gen_advanced_load_check_clr_bi,
|
7111 |
|
|
gen_advanced_load_check_clr_qi,
|
7112 |
|
|
gen_advanced_load_check_clr_hi,
|
7113 |
|
|
gen_advanced_load_check_clr_si,
|
7114 |
|
|
gen_advanced_load_check_clr_di,
|
7115 |
|
|
gen_advanced_load_check_clr_sf,
|
7116 |
|
|
gen_advanced_load_check_clr_df,
|
7117 |
|
|
gen_advanced_load_check_clr_xf,
|
7118 |
|
|
gen_advanced_load_check_clr_ti,
|
7119 |
|
|
gen_advanced_load_check_clr_di,
|
7120 |
|
|
gen_advanced_load_check_clr_di,
|
7121 |
|
|
gen_advanced_load_check_clr_di,
|
7122 |
|
|
|
7123 |
|
|
0,0,0,0,0,0,0,0,0,0,0,0,
|
7124 |
|
|
|
7125 |
|
|
gen_advanced_load_check_clr_bi,
|
7126 |
|
|
gen_advanced_load_check_clr_qi,
|
7127 |
|
|
gen_advanced_load_check_clr_hi,
|
7128 |
|
|
gen_advanced_load_check_clr_si,
|
7129 |
|
|
gen_advanced_load_check_clr_di,
|
7130 |
|
|
gen_advanced_load_check_clr_sf,
|
7131 |
|
|
gen_advanced_load_check_clr_df,
|
7132 |
|
|
gen_advanced_load_check_clr_xf,
|
7133 |
|
|
gen_advanced_load_check_clr_ti,
|
7134 |
|
|
gen_advanced_load_check_clr_di,
|
7135 |
|
|
gen_advanced_load_check_clr_di,
|
7136 |
|
|
gen_advanced_load_check_clr_di,
|
7137 |
|
|
|
7138 |
|
|
gen_speculation_check_bi,
|
7139 |
|
|
gen_speculation_check_qi,
|
7140 |
|
|
gen_speculation_check_hi,
|
7141 |
|
|
gen_speculation_check_si,
|
7142 |
|
|
gen_speculation_check_di,
|
7143 |
|
|
gen_speculation_check_sf,
|
7144 |
|
|
gen_speculation_check_df,
|
7145 |
|
|
gen_speculation_check_xf,
|
7146 |
|
|
gen_speculation_check_ti,
|
7147 |
|
|
gen_speculation_check_di,
|
7148 |
|
|
gen_speculation_check_di,
|
7149 |
|
|
gen_speculation_check_di
|
7150 |
|
|
};
|
7151 |
|
|
|
7152 |
|
|
extract_insn_cached (insn);
|
7153 |
|
|
|
7154 |
|
|
if (label)
|
7155 |
|
|
{
|
7156 |
|
|
gcc_assert (mutate_p || ia64_needs_block_p (insn));
|
7157 |
|
|
op1 = label;
|
7158 |
|
|
}
|
7159 |
|
|
else
|
7160 |
|
|
{
|
7161 |
|
|
gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
|
7162 |
|
|
op1 = copy_rtx (recog_data.operand[1]);
|
7163 |
|
|
}
|
7164 |
|
|
|
7165 |
|
|
if (mutate_p)
|
7166 |
|
|
/* INSN is ld.c.
|
7167 |
|
|
Find the speculation check number by searching for original
|
7168 |
|
|
speculative load in the RESOLVED_DEPS list of INSN.
|
7169 |
|
|
As long as patterns are unique for each instruction, this can be
|
7170 |
|
|
accomplished by matching ORIG_PAT fields. */
|
7171 |
|
|
{
|
7172 |
|
|
rtx link;
|
7173 |
|
|
int check_no = 0;
|
7174 |
|
|
rtx orig_pat = ORIG_PAT (insn);
|
7175 |
|
|
|
7176 |
|
|
for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
|
7177 |
|
|
{
|
7178 |
|
|
rtx x = XEXP (link, 0);
|
7179 |
|
|
|
7180 |
|
|
if (ORIG_PAT (x) == orig_pat)
|
7181 |
|
|
check_no = spec_check_no[INSN_UID (x)];
|
7182 |
|
|
}
|
7183 |
|
|
gcc_assert (check_no);
|
7184 |
|
|
|
7185 |
|
|
spec_check_no[INSN_UID (insn)] = (check_no
|
7186 |
|
|
+ SPEC_GEN_CHECK_MUTATION_OFFSET);
|
7187 |
|
|
}
|
7188 |
|
|
|
7189 |
|
|
check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
|
7190 |
|
|
(copy_rtx (recog_data.operand[0]), op1));
|
7191 |
|
|
|
7192 |
|
|
pat = PATTERN (insn);
|
7193 |
|
|
if (GET_CODE (pat) == COND_EXEC)
|
7194 |
|
|
check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
|
7195 |
|
|
check_pat);
|
7196 |
|
|
|
7197 |
|
|
return check_pat;
|
7198 |
|
|
}
|
7199 |
|
|
|
7200 |
|
|
/* Return nonzero, if X is branchy recovery check. */
|
7201 |
|
|
static int
|
7202 |
|
|
ia64_spec_check_p (rtx x)
|
7203 |
|
|
{
|
7204 |
|
|
x = PATTERN (x);
|
7205 |
|
|
if (GET_CODE (x) == COND_EXEC)
|
7206 |
|
|
x = COND_EXEC_CODE (x);
|
7207 |
|
|
if (GET_CODE (x) == SET)
|
7208 |
|
|
return ia64_spec_check_src_p (SET_SRC (x));
|
7209 |
|
|
return 0;
|
7210 |
|
|
}
|
7211 |
|
|
|
7212 |
|
|
/* Return nonzero, if SRC belongs to recovery check. */
|
7213 |
|
|
static int
|
7214 |
|
|
ia64_spec_check_src_p (rtx src)
|
7215 |
|
|
{
|
7216 |
|
|
if (GET_CODE (src) == IF_THEN_ELSE)
|
7217 |
|
|
{
|
7218 |
|
|
rtx t;
|
7219 |
|
|
|
7220 |
|
|
t = XEXP (src, 0);
|
7221 |
|
|
if (GET_CODE (t) == NE)
|
7222 |
|
|
{
|
7223 |
|
|
t = XEXP (t, 0);
|
7224 |
|
|
|
7225 |
|
|
if (GET_CODE (t) == UNSPEC)
|
7226 |
|
|
{
|
7227 |
|
|
int code;
|
7228 |
|
|
|
7229 |
|
|
code = XINT (t, 1);
|
7230 |
|
|
|
7231 |
|
|
if (code == UNSPEC_CHKACLR
|
7232 |
|
|
|| code == UNSPEC_CHKS
|
7233 |
|
|
|| code == UNSPEC_LDCCLR)
|
7234 |
|
|
{
|
7235 |
|
|
gcc_assert (code != 0);
|
7236 |
|
|
return code;
|
7237 |
|
|
}
|
7238 |
|
|
}
|
7239 |
|
|
}
|
7240 |
|
|
}
|
7241 |
|
|
return 0;
|
7242 |
|
|
}
|
7243 |
|
|
|
7244 |
|
|
|
7245 |
|
|
/* The following page contains abstract data `bundle states' which are
|
7246 |
|
|
used for bundling insns (inserting nops and template generation). */
|
7247 |
|
|
|
7248 |
|
|
/* The following describes state of insn bundling. */
|
7249 |
|
|
|
7250 |
|
|
struct bundle_state
|
7251 |
|
|
{
|
7252 |
|
|
/* Unique bundle state number to identify them in the debugging
|
7253 |
|
|
output */
|
7254 |
|
|
int unique_num;
|
7255 |
|
|
rtx insn; /* corresponding insn, NULL for the 1st and the last state */
|
7256 |
|
|
/* number nops before and after the insn */
|
7257 |
|
|
short before_nops_num, after_nops_num;
|
7258 |
|
|
int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
|
7259 |
|
|
insn */
|
7260 |
|
|
int cost; /* cost of the state in cycles */
|
7261 |
|
|
int accumulated_insns_num; /* number of all previous insns including
|
7262 |
|
|
nops. L is considered as 2 insns */
|
7263 |
|
|
int branch_deviation; /* deviation of previous branches from 3rd slots */
|
7264 |
|
|
struct bundle_state *next; /* next state with the same insn_num */
|
7265 |
|
|
struct bundle_state *originator; /* originator (previous insn state) */
|
7266 |
|
|
/* All bundle states are in the following chain. */
|
7267 |
|
|
struct bundle_state *allocated_states_chain;
|
7268 |
|
|
/* The DFA State after issuing the insn and the nops. */
|
7269 |
|
|
state_t dfa_state;
|
7270 |
|
|
};
|
7271 |
|
|
|
7272 |
|
|
/* The following is map insn number to the corresponding bundle state. */
|
7273 |
|
|
|
7274 |
|
|
static struct bundle_state **index_to_bundle_states;
|
7275 |
|
|
|
7276 |
|
|
/* The unique number of next bundle state. */
|
7277 |
|
|
|
7278 |
|
|
static int bundle_states_num;
|
7279 |
|
|
|
7280 |
|
|
/* All allocated bundle states are in the following chain. */
|
7281 |
|
|
|
7282 |
|
|
static struct bundle_state *allocated_bundle_states_chain;
|
7283 |
|
|
|
7284 |
|
|
/* All allocated but not used bundle states are in the following
|
7285 |
|
|
chain. */
|
7286 |
|
|
|
7287 |
|
|
static struct bundle_state *free_bundle_state_chain;
|
7288 |
|
|
|
7289 |
|
|
|
7290 |
|
|
/* The following function returns a free bundle state. */
|
7291 |
|
|
|
7292 |
|
|
static struct bundle_state *
|
7293 |
|
|
get_free_bundle_state (void)
|
7294 |
|
|
{
|
7295 |
|
|
struct bundle_state *result;
|
7296 |
|
|
|
7297 |
|
|
if (free_bundle_state_chain != NULL)
|
7298 |
|
|
{
|
7299 |
|
|
result = free_bundle_state_chain;
|
7300 |
|
|
free_bundle_state_chain = result->next;
|
7301 |
|
|
}
|
7302 |
|
|
else
|
7303 |
|
|
{
|
7304 |
|
|
result = xmalloc (sizeof (struct bundle_state));
|
7305 |
|
|
result->dfa_state = xmalloc (dfa_state_size);
|
7306 |
|
|
result->allocated_states_chain = allocated_bundle_states_chain;
|
7307 |
|
|
allocated_bundle_states_chain = result;
|
7308 |
|
|
}
|
7309 |
|
|
result->unique_num = bundle_states_num++;
|
7310 |
|
|
return result;
|
7311 |
|
|
|
7312 |
|
|
}
|
7313 |
|
|
|
7314 |
|
|
/* The following function frees given bundle state. */
|
7315 |
|
|
|
7316 |
|
|
static void
|
7317 |
|
|
free_bundle_state (struct bundle_state *state)
|
7318 |
|
|
{
|
7319 |
|
|
state->next = free_bundle_state_chain;
|
7320 |
|
|
free_bundle_state_chain = state;
|
7321 |
|
|
}
|
7322 |
|
|
|
7323 |
|
|
/* Start work with abstract data `bundle states'. */
|
7324 |
|
|
|
7325 |
|
|
static void
|
7326 |
|
|
initiate_bundle_states (void)
|
7327 |
|
|
{
|
7328 |
|
|
bundle_states_num = 0;
|
7329 |
|
|
free_bundle_state_chain = NULL;
|
7330 |
|
|
allocated_bundle_states_chain = NULL;
|
7331 |
|
|
}
|
7332 |
|
|
|
7333 |
|
|
/* Finish work with abstract data `bundle states'. */
|
7334 |
|
|
|
7335 |
|
|
static void
|
7336 |
|
|
finish_bundle_states (void)
|
7337 |
|
|
{
|
7338 |
|
|
struct bundle_state *curr_state, *next_state;
|
7339 |
|
|
|
7340 |
|
|
for (curr_state = allocated_bundle_states_chain;
|
7341 |
|
|
curr_state != NULL;
|
7342 |
|
|
curr_state = next_state)
|
7343 |
|
|
{
|
7344 |
|
|
next_state = curr_state->allocated_states_chain;
|
7345 |
|
|
free (curr_state->dfa_state);
|
7346 |
|
|
free (curr_state);
|
7347 |
|
|
}
|
7348 |
|
|
}
|
7349 |
|
|
|
7350 |
|
|
/* Hash table of the bundle states. The key is dfa_state and insn_num
|
7351 |
|
|
of the bundle states. */
|
7352 |
|
|
|
7353 |
|
|
static htab_t bundle_state_table;
|
7354 |
|
|
|
7355 |
|
|
/* The function returns hash of BUNDLE_STATE. */
|
7356 |
|
|
|
7357 |
|
|
static unsigned
|
7358 |
|
|
bundle_state_hash (const void *bundle_state)
|
7359 |
|
|
{
|
7360 |
|
|
const struct bundle_state *state = (struct bundle_state *) bundle_state;
|
7361 |
|
|
unsigned result, i;
|
7362 |
|
|
|
7363 |
|
|
for (result = i = 0; i < dfa_state_size; i++)
|
7364 |
|
|
result += (((unsigned char *) state->dfa_state) [i]
|
7365 |
|
|
<< ((i % CHAR_BIT) * 3 + CHAR_BIT));
|
7366 |
|
|
return result + state->insn_num;
|
7367 |
|
|
}
|
7368 |
|
|
|
7369 |
|
|
/* The function returns nonzero if the bundle state keys are equal. */
|
7370 |
|
|
|
7371 |
|
|
static int
|
7372 |
|
|
bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
|
7373 |
|
|
{
|
7374 |
|
|
const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
|
7375 |
|
|
const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
|
7376 |
|
|
|
7377 |
|
|
return (state1->insn_num == state2->insn_num
|
7378 |
|
|
&& memcmp (state1->dfa_state, state2->dfa_state,
|
7379 |
|
|
dfa_state_size) == 0);
|
7380 |
|
|
}
|
7381 |
|
|
|
7382 |
|
|
/* The function inserts the BUNDLE_STATE into the hash table. The
|
7383 |
|
|
function returns nonzero if the bundle has been inserted into the
|
7384 |
|
|
table. The table contains the best bundle state with given key. */
|
7385 |
|
|
|
7386 |
|
|
static int
|
7387 |
|
|
insert_bundle_state (struct bundle_state *bundle_state)
|
7388 |
|
|
{
|
7389 |
|
|
void **entry_ptr;
|
7390 |
|
|
|
7391 |
|
|
entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
|
7392 |
|
|
if (*entry_ptr == NULL)
|
7393 |
|
|
{
|
7394 |
|
|
bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
|
7395 |
|
|
index_to_bundle_states [bundle_state->insn_num] = bundle_state;
|
7396 |
|
|
*entry_ptr = (void *) bundle_state;
|
7397 |
|
|
return TRUE;
|
7398 |
|
|
}
|
7399 |
|
|
else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
|
7400 |
|
|
|| (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
|
7401 |
|
|
&& (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
|
7402 |
|
|
> bundle_state->accumulated_insns_num
|
7403 |
|
|
|| (((struct bundle_state *)
|
7404 |
|
|
*entry_ptr)->accumulated_insns_num
|
7405 |
|
|
== bundle_state->accumulated_insns_num
|
7406 |
|
|
&& ((struct bundle_state *)
|
7407 |
|
|
*entry_ptr)->branch_deviation
|
7408 |
|
|
> bundle_state->branch_deviation))))
|
7409 |
|
|
|
7410 |
|
|
{
|
7411 |
|
|
struct bundle_state temp;
|
7412 |
|
|
|
7413 |
|
|
temp = *(struct bundle_state *) *entry_ptr;
|
7414 |
|
|
*(struct bundle_state *) *entry_ptr = *bundle_state;
|
7415 |
|
|
((struct bundle_state *) *entry_ptr)->next = temp.next;
|
7416 |
|
|
*bundle_state = temp;
|
7417 |
|
|
}
|
7418 |
|
|
return FALSE;
|
7419 |
|
|
}
|
7420 |
|
|
|
7421 |
|
|
/* Start work with the hash table. */
|
7422 |
|
|
|
7423 |
|
|
static void
|
7424 |
|
|
initiate_bundle_state_table (void)
|
7425 |
|
|
{
|
7426 |
|
|
bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
|
7427 |
|
|
(htab_del) 0);
|
7428 |
|
|
}
|
7429 |
|
|
|
7430 |
|
|
/* Finish work with the hash table. */
|
7431 |
|
|
|
7432 |
|
|
static void
|
7433 |
|
|
finish_bundle_state_table (void)
|
7434 |
|
|
{
|
7435 |
|
|
htab_delete (bundle_state_table);
|
7436 |
|
|
}
|
7437 |
|
|
|
7438 |
|
|
|
7439 |
|
|
|
7440 |
|
|
/* The following variable is a insn `nop' used to check bundle states
|
7441 |
|
|
with different number of inserted nops. */
|
7442 |
|
|
|
7443 |
|
|
static rtx ia64_nop;
|
7444 |
|
|
|
7445 |
|
|
/* The following function tries to issue NOPS_NUM nops for the current
|
7446 |
|
|
state without advancing processor cycle. If it failed, the
|
7447 |
|
|
function returns FALSE and frees the current state. */
|
7448 |
|
|
|
7449 |
|
|
static int
|
7450 |
|
|
try_issue_nops (struct bundle_state *curr_state, int nops_num)
|
7451 |
|
|
{
|
7452 |
|
|
int i;
|
7453 |
|
|
|
7454 |
|
|
for (i = 0; i < nops_num; i++)
|
7455 |
|
|
if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
|
7456 |
|
|
{
|
7457 |
|
|
free_bundle_state (curr_state);
|
7458 |
|
|
return FALSE;
|
7459 |
|
|
}
|
7460 |
|
|
return TRUE;
|
7461 |
|
|
}
|
7462 |
|
|
|
7463 |
|
|
/* The following function tries to issue INSN for the current
|
7464 |
|
|
state without advancing processor cycle. If it failed, the
|
7465 |
|
|
function returns FALSE and frees the current state. */
|
7466 |
|
|
|
7467 |
|
|
static int
|
7468 |
|
|
try_issue_insn (struct bundle_state *curr_state, rtx insn)
|
7469 |
|
|
{
|
7470 |
|
|
if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
|
7471 |
|
|
{
|
7472 |
|
|
free_bundle_state (curr_state);
|
7473 |
|
|
return FALSE;
|
7474 |
|
|
}
|
7475 |
|
|
return TRUE;
|
7476 |
|
|
}
|
7477 |
|
|
|
7478 |
|
|
/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
|
7479 |
|
|
starting with ORIGINATOR without advancing processor cycle. If
|
7480 |
|
|
TRY_BUNDLE_END_P is TRUE, the function also/only (if
|
7481 |
|
|
ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
|
7482 |
|
|
If it was successful, the function creates new bundle state and
|
7483 |
|
|
insert into the hash table and into `index_to_bundle_states'. */
|
7484 |
|
|
|
7485 |
|
|
static void
|
7486 |
|
|
issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
|
7487 |
|
|
rtx insn, int try_bundle_end_p, int only_bundle_end_p)
|
7488 |
|
|
{
|
7489 |
|
|
struct bundle_state *curr_state;
|
7490 |
|
|
|
7491 |
|
|
curr_state = get_free_bundle_state ();
|
7492 |
|
|
memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
|
7493 |
|
|
curr_state->insn = insn;
|
7494 |
|
|
curr_state->insn_num = originator->insn_num + 1;
|
7495 |
|
|
curr_state->cost = originator->cost;
|
7496 |
|
|
curr_state->originator = originator;
|
7497 |
|
|
curr_state->before_nops_num = before_nops_num;
|
7498 |
|
|
curr_state->after_nops_num = 0;
|
7499 |
|
|
curr_state->accumulated_insns_num
|
7500 |
|
|
= originator->accumulated_insns_num + before_nops_num;
|
7501 |
|
|
curr_state->branch_deviation = originator->branch_deviation;
|
7502 |
|
|
gcc_assert (insn);
|
7503 |
|
|
if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
|
7504 |
|
|
{
|
7505 |
|
|
gcc_assert (GET_MODE (insn) != TImode);
|
7506 |
|
|
if (!try_issue_nops (curr_state, before_nops_num))
|
7507 |
|
|
return;
|
7508 |
|
|
if (!try_issue_insn (curr_state, insn))
|
7509 |
|
|
return;
|
7510 |
|
|
memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
|
7511 |
|
|
if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
|
7512 |
|
|
&& curr_state->accumulated_insns_num % 3 != 0)
|
7513 |
|
|
{
|
7514 |
|
|
free_bundle_state (curr_state);
|
7515 |
|
|
return;
|
7516 |
|
|
}
|
7517 |
|
|
}
|
7518 |
|
|
else if (GET_MODE (insn) != TImode)
|
7519 |
|
|
{
|
7520 |
|
|
if (!try_issue_nops (curr_state, before_nops_num))
|
7521 |
|
|
return;
|
7522 |
|
|
if (!try_issue_insn (curr_state, insn))
|
7523 |
|
|
return;
|
7524 |
|
|
curr_state->accumulated_insns_num++;
|
7525 |
|
|
gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
|
7526 |
|
|
&& asm_noperands (PATTERN (insn)) < 0);
|
7527 |
|
|
|
7528 |
|
|
if (ia64_safe_type (insn) == TYPE_L)
|
7529 |
|
|
curr_state->accumulated_insns_num++;
|
7530 |
|
|
}
|
7531 |
|
|
else
|
7532 |
|
|
{
|
7533 |
|
|
/* If this is an insn that must be first in a group, then don't allow
|
7534 |
|
|
nops to be emitted before it. Currently, alloc is the only such
|
7535 |
|
|
supported instruction. */
|
7536 |
|
|
/* ??? The bundling automatons should handle this for us, but they do
|
7537 |
|
|
not yet have support for the first_insn attribute. */
|
7538 |
|
|
if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
|
7539 |
|
|
{
|
7540 |
|
|
free_bundle_state (curr_state);
|
7541 |
|
|
return;
|
7542 |
|
|
}
|
7543 |
|
|
|
7544 |
|
|
state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
|
7545 |
|
|
state_transition (curr_state->dfa_state, NULL);
|
7546 |
|
|
curr_state->cost++;
|
7547 |
|
|
if (!try_issue_nops (curr_state, before_nops_num))
|
7548 |
|
|
return;
|
7549 |
|
|
if (!try_issue_insn (curr_state, insn))
|
7550 |
|
|
return;
|
7551 |
|
|
curr_state->accumulated_insns_num++;
|
7552 |
|
|
if (GET_CODE (PATTERN (insn)) == ASM_INPUT
|
7553 |
|
|
|| asm_noperands (PATTERN (insn)) >= 0)
|
7554 |
|
|
{
|
7555 |
|
|
/* Finish bundle containing asm insn. */
|
7556 |
|
|
curr_state->after_nops_num
|
7557 |
|
|
= 3 - curr_state->accumulated_insns_num % 3;
|
7558 |
|
|
curr_state->accumulated_insns_num
|
7559 |
|
|
+= 3 - curr_state->accumulated_insns_num % 3;
|
7560 |
|
|
}
|
7561 |
|
|
else if (ia64_safe_type (insn) == TYPE_L)
|
7562 |
|
|
curr_state->accumulated_insns_num++;
|
7563 |
|
|
}
|
7564 |
|
|
if (ia64_safe_type (insn) == TYPE_B)
|
7565 |
|
|
curr_state->branch_deviation
|
7566 |
|
|
+= 2 - (curr_state->accumulated_insns_num - 1) % 3;
|
7567 |
|
|
if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
|
7568 |
|
|
{
|
7569 |
|
|
if (!only_bundle_end_p && insert_bundle_state (curr_state))
|
7570 |
|
|
{
|
7571 |
|
|
state_t dfa_state;
|
7572 |
|
|
struct bundle_state *curr_state1;
|
7573 |
|
|
struct bundle_state *allocated_states_chain;
|
7574 |
|
|
|
7575 |
|
|
curr_state1 = get_free_bundle_state ();
|
7576 |
|
|
dfa_state = curr_state1->dfa_state;
|
7577 |
|
|
allocated_states_chain = curr_state1->allocated_states_chain;
|
7578 |
|
|
*curr_state1 = *curr_state;
|
7579 |
|
|
curr_state1->dfa_state = dfa_state;
|
7580 |
|
|
curr_state1->allocated_states_chain = allocated_states_chain;
|
7581 |
|
|
memcpy (curr_state1->dfa_state, curr_state->dfa_state,
|
7582 |
|
|
dfa_state_size);
|
7583 |
|
|
curr_state = curr_state1;
|
7584 |
|
|
}
|
7585 |
|
|
if (!try_issue_nops (curr_state,
|
7586 |
|
|
3 - curr_state->accumulated_insns_num % 3))
|
7587 |
|
|
return;
|
7588 |
|
|
curr_state->after_nops_num
|
7589 |
|
|
= 3 - curr_state->accumulated_insns_num % 3;
|
7590 |
|
|
curr_state->accumulated_insns_num
|
7591 |
|
|
+= 3 - curr_state->accumulated_insns_num % 3;
|
7592 |
|
|
}
|
7593 |
|
|
if (!insert_bundle_state (curr_state))
|
7594 |
|
|
free_bundle_state (curr_state);
|
7595 |
|
|
return;
|
7596 |
|
|
}
|
7597 |
|
|
|
7598 |
|
|
/* The following function returns position in the two window bundle
|
7599 |
|
|
for given STATE. */
|
7600 |
|
|
|
7601 |
|
|
static int
|
7602 |
|
|
get_max_pos (state_t state)
|
7603 |
|
|
{
|
7604 |
|
|
if (cpu_unit_reservation_p (state, pos_6))
|
7605 |
|
|
return 6;
|
7606 |
|
|
else if (cpu_unit_reservation_p (state, pos_5))
|
7607 |
|
|
return 5;
|
7608 |
|
|
else if (cpu_unit_reservation_p (state, pos_4))
|
7609 |
|
|
return 4;
|
7610 |
|
|
else if (cpu_unit_reservation_p (state, pos_3))
|
7611 |
|
|
return 3;
|
7612 |
|
|
else if (cpu_unit_reservation_p (state, pos_2))
|
7613 |
|
|
return 2;
|
7614 |
|
|
else if (cpu_unit_reservation_p (state, pos_1))
|
7615 |
|
|
return 1;
|
7616 |
|
|
else
|
7617 |
|
|
return 0;
|
7618 |
|
|
}
|
7619 |
|
|
|
7620 |
|
|
/* The function returns code of a possible template for given position
|
7621 |
|
|
and state. The function should be called only with 2 values of
|
7622 |
|
|
position equal to 3 or 6. We avoid generating F NOPs by putting
|
7623 |
|
|
templates containing F insns at the end of the template search
|
7624 |
|
|
because undocumented anomaly in McKinley derived cores which can
|
7625 |
|
|
cause stalls if an F-unit insn (including a NOP) is issued within a
|
7626 |
|
|
six-cycle window after reading certain application registers (such
|
7627 |
|
|
as ar.bsp). Furthermore, power-considerations also argue against
|
7628 |
|
|
the use of F-unit instructions unless they're really needed. */
|
7629 |
|
|
|
7630 |
|
|
static int
|
7631 |
|
|
get_template (state_t state, int pos)
|
7632 |
|
|
{
|
7633 |
|
|
switch (pos)
|
7634 |
|
|
{
|
7635 |
|
|
case 3:
|
7636 |
|
|
if (cpu_unit_reservation_p (state, _0mmi_))
|
7637 |
|
|
return 1;
|
7638 |
|
|
else if (cpu_unit_reservation_p (state, _0mii_))
|
7639 |
|
|
return 0;
|
7640 |
|
|
else if (cpu_unit_reservation_p (state, _0mmb_))
|
7641 |
|
|
return 7;
|
7642 |
|
|
else if (cpu_unit_reservation_p (state, _0mib_))
|
7643 |
|
|
return 6;
|
7644 |
|
|
else if (cpu_unit_reservation_p (state, _0mbb_))
|
7645 |
|
|
return 5;
|
7646 |
|
|
else if (cpu_unit_reservation_p (state, _0bbb_))
|
7647 |
|
|
return 4;
|
7648 |
|
|
else if (cpu_unit_reservation_p (state, _0mmf_))
|
7649 |
|
|
return 3;
|
7650 |
|
|
else if (cpu_unit_reservation_p (state, _0mfi_))
|
7651 |
|
|
return 2;
|
7652 |
|
|
else if (cpu_unit_reservation_p (state, _0mfb_))
|
7653 |
|
|
return 8;
|
7654 |
|
|
else if (cpu_unit_reservation_p (state, _0mlx_))
|
7655 |
|
|
return 9;
|
7656 |
|
|
else
|
7657 |
|
|
gcc_unreachable ();
|
7658 |
|
|
case 6:
|
7659 |
|
|
if (cpu_unit_reservation_p (state, _1mmi_))
|
7660 |
|
|
return 1;
|
7661 |
|
|
else if (cpu_unit_reservation_p (state, _1mii_))
|
7662 |
|
|
return 0;
|
7663 |
|
|
else if (cpu_unit_reservation_p (state, _1mmb_))
|
7664 |
|
|
return 7;
|
7665 |
|
|
else if (cpu_unit_reservation_p (state, _1mib_))
|
7666 |
|
|
return 6;
|
7667 |
|
|
else if (cpu_unit_reservation_p (state, _1mbb_))
|
7668 |
|
|
return 5;
|
7669 |
|
|
else if (cpu_unit_reservation_p (state, _1bbb_))
|
7670 |
|
|
return 4;
|
7671 |
|
|
else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
|
7672 |
|
|
return 3;
|
7673 |
|
|
else if (cpu_unit_reservation_p (state, _1mfi_))
|
7674 |
|
|
return 2;
|
7675 |
|
|
else if (cpu_unit_reservation_p (state, _1mfb_))
|
7676 |
|
|
return 8;
|
7677 |
|
|
else if (cpu_unit_reservation_p (state, _1mlx_))
|
7678 |
|
|
return 9;
|
7679 |
|
|
else
|
7680 |
|
|
gcc_unreachable ();
|
7681 |
|
|
default:
|
7682 |
|
|
gcc_unreachable ();
|
7683 |
|
|
}
|
7684 |
|
|
}
|
7685 |
|
|
|
7686 |
|
|
/* The following function returns an insn important for insn bundling
|
7687 |
|
|
followed by INSN and before TAIL. */
|
7688 |
|
|
|
7689 |
|
|
static rtx
|
7690 |
|
|
get_next_important_insn (rtx insn, rtx tail)
|
7691 |
|
|
{
|
7692 |
|
|
for (; insn && insn != tail; insn = NEXT_INSN (insn))
|
7693 |
|
|
if (INSN_P (insn)
|
7694 |
|
|
&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
|
7695 |
|
|
&& GET_CODE (PATTERN (insn)) != USE
|
7696 |
|
|
&& GET_CODE (PATTERN (insn)) != CLOBBER)
|
7697 |
|
|
return insn;
|
7698 |
|
|
return NULL_RTX;
|
7699 |
|
|
}
|
7700 |
|
|
|
7701 |
|
|
/* Add a bundle selector TEMPLATE0 before INSN. */
|
7702 |
|
|
|
7703 |
|
|
static void
|
7704 |
|
|
ia64_add_bundle_selector_before (int template0, rtx insn)
|
7705 |
|
|
{
|
7706 |
|
|
rtx b = gen_bundle_selector (GEN_INT (template0));
|
7707 |
|
|
|
7708 |
|
|
ia64_emit_insn_before (b, insn);
|
7709 |
|
|
#if NR_BUNDLES == 10
|
7710 |
|
|
if ((template0 == 4 || template0 == 5)
|
7711 |
|
|
&& (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
|
7712 |
|
|
{
|
7713 |
|
|
int i;
|
7714 |
|
|
rtx note = NULL_RTX;
|
7715 |
|
|
|
7716 |
|
|
/* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
|
7717 |
|
|
first or second slot. If it is and has REG_EH_NOTE set, copy it
|
7718 |
|
|
to following nops, as br.call sets rp to the address of following
|
7719 |
|
|
bundle and therefore an EH region end must be on a bundle
|
7720 |
|
|
boundary. */
|
7721 |
|
|
insn = PREV_INSN (insn);
|
7722 |
|
|
for (i = 0; i < 3; i++)
|
7723 |
|
|
{
|
7724 |
|
|
do
|
7725 |
|
|
insn = next_active_insn (insn);
|
7726 |
|
|
while (GET_CODE (insn) == INSN
|
7727 |
|
|
&& get_attr_empty (insn) == EMPTY_YES);
|
7728 |
|
|
if (GET_CODE (insn) == CALL_INSN)
|
7729 |
|
|
note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
|
7730 |
|
|
else if (note)
|
7731 |
|
|
{
|
7732 |
|
|
int code;
|
7733 |
|
|
|
7734 |
|
|
gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
|
7735 |
|
|
|| code == CODE_FOR_nop_b);
|
7736 |
|
|
if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
|
7737 |
|
|
note = NULL_RTX;
|
7738 |
|
|
else
|
7739 |
|
|
REG_NOTES (insn)
|
7740 |
|
|
= gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
|
7741 |
|
|
REG_NOTES (insn));
|
7742 |
|
|
}
|
7743 |
|
|
}
|
7744 |
|
|
}
|
7745 |
|
|
#endif
|
7746 |
|
|
}
|
7747 |
|
|
|
7748 |
|
|
/* The following function does insn bundling. Bundling means
|
7749 |
|
|
inserting templates and nop insns to fit insn groups into permitted
|
7750 |
|
|
templates. Instruction scheduling uses NDFA (non-deterministic
|
7751 |
|
|
finite automata) encoding informations about the templates and the
|
7752 |
|
|
inserted nops. Nondeterminism of the automata permits follows
|
7753 |
|
|
all possible insn sequences very fast.
|
7754 |
|
|
|
7755 |
|
|
Unfortunately it is not possible to get information about inserting
|
7756 |
|
|
nop insns and used templates from the automata states. The
|
7757 |
|
|
automata only says that we can issue an insn possibly inserting
|
7758 |
|
|
some nops before it and using some template. Therefore insn
|
7759 |
|
|
bundling in this function is implemented by using DFA
|
7760 |
|
|
(deterministic finite automata). We follow all possible insn
|
7761 |
|
|
sequences by inserting 0-2 nops (that is what the NDFA describe for
|
7762 |
|
|
insn scheduling) before/after each insn being bundled. We know the
|
7763 |
|
|
start of simulated processor cycle from insn scheduling (insn
|
7764 |
|
|
starting a new cycle has TImode).
|
7765 |
|
|
|
7766 |
|
|
Simple implementation of insn bundling would create enormous
|
7767 |
|
|
number of possible insn sequences satisfying information about new
|
7768 |
|
|
cycle ticks taken from the insn scheduling. To make the algorithm
|
7769 |
|
|
practical we use dynamic programming. Each decision (about
|
7770 |
|
|
inserting nops and implicitly about previous decisions) is described
|
7771 |
|
|
by structure bundle_state (see above). If we generate the same
|
7772 |
|
|
bundle state (key is automaton state after issuing the insns and
|
7773 |
|
|
nops for it), we reuse already generated one. As consequence we
|
7774 |
|
|
reject some decisions which cannot improve the solution and
|
7775 |
|
|
reduce memory for the algorithm.
|
7776 |
|
|
|
7777 |
|
|
When we reach the end of EBB (extended basic block), we choose the
|
7778 |
|
|
best sequence and then, moving back in EBB, insert templates for
|
7779 |
|
|
the best alternative. The templates are taken from querying
|
7780 |
|
|
automaton state for each insn in chosen bundle states.
|
7781 |
|
|
|
7782 |
|
|
So the algorithm makes two (forward and backward) passes through
|
7783 |
|
|
EBB. There is an additional forward pass through EBB for Itanium1
|
7784 |
|
|
processor. This pass inserts more nops to make dependency between
|
7785 |
|
|
a producer insn and MMMUL/MMSHF at least 4 cycles long. */
|
7786 |
|
|
|
7787 |
|
|
static void
|
7788 |
|
|
bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
|
7789 |
|
|
{
|
7790 |
|
|
struct bundle_state *curr_state, *next_state, *best_state;
|
7791 |
|
|
rtx insn, next_insn;
|
7792 |
|
|
int insn_num;
|
7793 |
|
|
int i, bundle_end_p, only_bundle_end_p, asm_p;
|
7794 |
|
|
int pos = 0, max_pos, template0, template1;
|
7795 |
|
|
rtx b;
|
7796 |
|
|
rtx nop;
|
7797 |
|
|
enum attr_type type;
|
7798 |
|
|
|
7799 |
|
|
insn_num = 0;
|
7800 |
|
|
/* Count insns in the EBB. */
|
7801 |
|
|
for (insn = NEXT_INSN (prev_head_insn);
|
7802 |
|
|
insn && insn != tail;
|
7803 |
|
|
insn = NEXT_INSN (insn))
|
7804 |
|
|
if (INSN_P (insn))
|
7805 |
|
|
insn_num++;
|
7806 |
|
|
if (insn_num == 0)
|
7807 |
|
|
return;
|
7808 |
|
|
bundling_p = 1;
|
7809 |
|
|
dfa_clean_insn_cache ();
|
7810 |
|
|
initiate_bundle_state_table ();
|
7811 |
|
|
index_to_bundle_states = xmalloc ((insn_num + 2)
|
7812 |
|
|
* sizeof (struct bundle_state *));
|
7813 |
|
|
/* First (forward) pass -- generation of bundle states. */
|
7814 |
|
|
curr_state = get_free_bundle_state ();
|
7815 |
|
|
curr_state->insn = NULL;
|
7816 |
|
|
curr_state->before_nops_num = 0;
|
7817 |
|
|
curr_state->after_nops_num = 0;
|
7818 |
|
|
curr_state->insn_num = 0;
|
7819 |
|
|
curr_state->cost = 0;
|
7820 |
|
|
curr_state->accumulated_insns_num = 0;
|
7821 |
|
|
curr_state->branch_deviation = 0;
|
7822 |
|
|
curr_state->next = NULL;
|
7823 |
|
|
curr_state->originator = NULL;
|
7824 |
|
|
state_reset (curr_state->dfa_state);
|
7825 |
|
|
index_to_bundle_states [0] = curr_state;
|
7826 |
|
|
insn_num = 0;
|
7827 |
|
|
/* Shift cycle mark if it is put on insn which could be ignored. */
|
7828 |
|
|
for (insn = NEXT_INSN (prev_head_insn);
|
7829 |
|
|
insn != tail;
|
7830 |
|
|
insn = NEXT_INSN (insn))
|
7831 |
|
|
if (INSN_P (insn)
|
7832 |
|
|
&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
|
7833 |
|
|
|| GET_CODE (PATTERN (insn)) == USE
|
7834 |
|
|
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
7835 |
|
|
&& GET_MODE (insn) == TImode)
|
7836 |
|
|
{
|
7837 |
|
|
PUT_MODE (insn, VOIDmode);
|
7838 |
|
|
for (next_insn = NEXT_INSN (insn);
|
7839 |
|
|
next_insn != tail;
|
7840 |
|
|
next_insn = NEXT_INSN (next_insn))
|
7841 |
|
|
if (INSN_P (next_insn)
|
7842 |
|
|
&& ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
|
7843 |
|
|
&& GET_CODE (PATTERN (next_insn)) != USE
|
7844 |
|
|
&& GET_CODE (PATTERN (next_insn)) != CLOBBER)
|
7845 |
|
|
{
|
7846 |
|
|
PUT_MODE (next_insn, TImode);
|
7847 |
|
|
break;
|
7848 |
|
|
}
|
7849 |
|
|
}
|
7850 |
|
|
/* Forward pass: generation of bundle states. */
|
7851 |
|
|
for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
|
7852 |
|
|
insn != NULL_RTX;
|
7853 |
|
|
insn = next_insn)
|
7854 |
|
|
{
|
7855 |
|
|
gcc_assert (INSN_P (insn)
|
7856 |
|
|
&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
|
7857 |
|
|
&& GET_CODE (PATTERN (insn)) != USE
|
7858 |
|
|
&& GET_CODE (PATTERN (insn)) != CLOBBER);
|
7859 |
|
|
type = ia64_safe_type (insn);
|
7860 |
|
|
next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
|
7861 |
|
|
insn_num++;
|
7862 |
|
|
index_to_bundle_states [insn_num] = NULL;
|
7863 |
|
|
for (curr_state = index_to_bundle_states [insn_num - 1];
|
7864 |
|
|
curr_state != NULL;
|
7865 |
|
|
curr_state = next_state)
|
7866 |
|
|
{
|
7867 |
|
|
pos = curr_state->accumulated_insns_num % 3;
|
7868 |
|
|
next_state = curr_state->next;
|
7869 |
|
|
/* We must fill up the current bundle in order to start a
|
7870 |
|
|
subsequent asm insn in a new bundle. Asm insn is always
|
7871 |
|
|
placed in a separate bundle. */
|
7872 |
|
|
only_bundle_end_p
|
7873 |
|
|
= (next_insn != NULL_RTX
|
7874 |
|
|
&& INSN_CODE (insn) == CODE_FOR_insn_group_barrier
|
7875 |
|
|
&& ia64_safe_type (next_insn) == TYPE_UNKNOWN);
|
7876 |
|
|
/* We may fill up the current bundle if it is the cycle end
|
7877 |
|
|
without a group barrier. */
|
7878 |
|
|
bundle_end_p
|
7879 |
|
|
= (only_bundle_end_p || next_insn == NULL_RTX
|
7880 |
|
|
|| (GET_MODE (next_insn) == TImode
|
7881 |
|
|
&& INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
|
7882 |
|
|
if (type == TYPE_F || type == TYPE_B || type == TYPE_L
|
7883 |
|
|
|| type == TYPE_S
|
7884 |
|
|
/* We need to insert 2 nops for cases like M_MII. To
|
7885 |
|
|
guarantee issuing all insns on the same cycle for
|
7886 |
|
|
Itanium 1, we need to issue 2 nops after the first M
|
7887 |
|
|
insn (MnnMII where n is a nop insn). */
|
7888 |
|
|
|| ((type == TYPE_M || type == TYPE_A)
|
7889 |
|
|
&& ia64_tune == PROCESSOR_ITANIUM
|
7890 |
|
|
&& !bundle_end_p && pos == 1))
|
7891 |
|
|
issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
|
7892 |
|
|
only_bundle_end_p);
|
7893 |
|
|
issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
|
7894 |
|
|
only_bundle_end_p);
|
7895 |
|
|
issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
|
7896 |
|
|
only_bundle_end_p);
|
7897 |
|
|
}
|
7898 |
|
|
gcc_assert (index_to_bundle_states [insn_num]);
|
7899 |
|
|
for (curr_state = index_to_bundle_states [insn_num];
|
7900 |
|
|
curr_state != NULL;
|
7901 |
|
|
curr_state = curr_state->next)
|
7902 |
|
|
if (verbose >= 2 && dump)
|
7903 |
|
|
{
|
7904 |
|
|
/* This structure is taken from generated code of the
|
7905 |
|
|
pipeline hazard recognizer (see file insn-attrtab.c).
|
7906 |
|
|
Please don't forget to change the structure if a new
|
7907 |
|
|
automaton is added to .md file. */
|
7908 |
|
|
struct DFA_chip
|
7909 |
|
|
{
|
7910 |
|
|
unsigned short one_automaton_state;
|
7911 |
|
|
unsigned short oneb_automaton_state;
|
7912 |
|
|
unsigned short two_automaton_state;
|
7913 |
|
|
unsigned short twob_automaton_state;
|
7914 |
|
|
};
|
7915 |
|
|
|
7916 |
|
|
fprintf
|
7917 |
|
|
(dump,
|
7918 |
|
|
"// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
|
7919 |
|
|
curr_state->unique_num,
|
7920 |
|
|
(curr_state->originator == NULL
|
7921 |
|
|
? -1 : curr_state->originator->unique_num),
|
7922 |
|
|
curr_state->cost,
|
7923 |
|
|
curr_state->before_nops_num, curr_state->after_nops_num,
|
7924 |
|
|
curr_state->accumulated_insns_num, curr_state->branch_deviation,
|
7925 |
|
|
(ia64_tune == PROCESSOR_ITANIUM
|
7926 |
|
|
? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
|
7927 |
|
|
: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
|
7928 |
|
|
INSN_UID (insn));
|
7929 |
|
|
}
|
7930 |
|
|
}
|
7931 |
|
|
|
7932 |
|
|
/* We should find a solution because the 2nd insn scheduling has
|
7933 |
|
|
found one. */
|
7934 |
|
|
gcc_assert (index_to_bundle_states [insn_num]);
|
7935 |
|
|
/* Find a state corresponding to the best insn sequence. */
|
7936 |
|
|
best_state = NULL;
|
7937 |
|
|
for (curr_state = index_to_bundle_states [insn_num];
|
7938 |
|
|
curr_state != NULL;
|
7939 |
|
|
curr_state = curr_state->next)
|
7940 |
|
|
/* We are just looking at the states with fully filled up last
|
7941 |
|
|
bundle. The first we prefer insn sequences with minimal cost
|
7942 |
|
|
then with minimal inserted nops and finally with branch insns
|
7943 |
|
|
placed in the 3rd slots. */
|
7944 |
|
|
if (curr_state->accumulated_insns_num % 3 == 0
|
7945 |
|
|
&& (best_state == NULL || best_state->cost > curr_state->cost
|
7946 |
|
|
|| (best_state->cost == curr_state->cost
|
7947 |
|
|
&& (curr_state->accumulated_insns_num
|
7948 |
|
|
< best_state->accumulated_insns_num
|
7949 |
|
|
|| (curr_state->accumulated_insns_num
|
7950 |
|
|
== best_state->accumulated_insns_num
|
7951 |
|
|
&& curr_state->branch_deviation
|
7952 |
|
|
< best_state->branch_deviation)))))
|
7953 |
|
|
best_state = curr_state;
|
7954 |
|
|
/* Second (backward) pass: adding nops and templates. */
|
7955 |
|
|
insn_num = best_state->before_nops_num;
|
7956 |
|
|
template0 = template1 = -1;
|
7957 |
|
|
for (curr_state = best_state;
|
7958 |
|
|
curr_state->originator != NULL;
|
7959 |
|
|
curr_state = curr_state->originator)
|
7960 |
|
|
{
|
7961 |
|
|
insn = curr_state->insn;
|
7962 |
|
|
asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
|
7963 |
|
|
|| asm_noperands (PATTERN (insn)) >= 0);
|
7964 |
|
|
insn_num++;
|
7965 |
|
|
if (verbose >= 2 && dump)
|
7966 |
|
|
{
|
7967 |
|
|
struct DFA_chip
|
7968 |
|
|
{
|
7969 |
|
|
unsigned short one_automaton_state;
|
7970 |
|
|
unsigned short oneb_automaton_state;
|
7971 |
|
|
unsigned short two_automaton_state;
|
7972 |
|
|
unsigned short twob_automaton_state;
|
7973 |
|
|
};
|
7974 |
|
|
|
7975 |
|
|
fprintf
|
7976 |
|
|
(dump,
|
7977 |
|
|
"// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
|
7978 |
|
|
curr_state->unique_num,
|
7979 |
|
|
(curr_state->originator == NULL
|
7980 |
|
|
? -1 : curr_state->originator->unique_num),
|
7981 |
|
|
curr_state->cost,
|
7982 |
|
|
curr_state->before_nops_num, curr_state->after_nops_num,
|
7983 |
|
|
curr_state->accumulated_insns_num, curr_state->branch_deviation,
|
7984 |
|
|
(ia64_tune == PROCESSOR_ITANIUM
|
7985 |
|
|
? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
|
7986 |
|
|
: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
|
7987 |
|
|
INSN_UID (insn));
|
7988 |
|
|
}
|
7989 |
|
|
/* Find the position in the current bundle window. The window can
|
7990 |
|
|
contain at most two bundles. Two bundle window means that
|
7991 |
|
|
the processor will make two bundle rotation. */
|
7992 |
|
|
max_pos = get_max_pos (curr_state->dfa_state);
|
7993 |
|
|
if (max_pos == 6
|
7994 |
|
|
/* The following (negative template number) means that the
|
7995 |
|
|
processor did one bundle rotation. */
|
7996 |
|
|
|| (max_pos == 3 && template0 < 0))
|
7997 |
|
|
{
|
7998 |
|
|
/* We are at the end of the window -- find template(s) for
|
7999 |
|
|
its bundle(s). */
|
8000 |
|
|
pos = max_pos;
|
8001 |
|
|
if (max_pos == 3)
|
8002 |
|
|
template0 = get_template (curr_state->dfa_state, 3);
|
8003 |
|
|
else
|
8004 |
|
|
{
|
8005 |
|
|
template1 = get_template (curr_state->dfa_state, 3);
|
8006 |
|
|
template0 = get_template (curr_state->dfa_state, 6);
|
8007 |
|
|
}
|
8008 |
|
|
}
|
8009 |
|
|
if (max_pos > 3 && template1 < 0)
|
8010 |
|
|
/* It may happen when we have the stop inside a bundle. */
|
8011 |
|
|
{
|
8012 |
|
|
gcc_assert (pos <= 3);
|
8013 |
|
|
template1 = get_template (curr_state->dfa_state, 3);
|
8014 |
|
|
pos += 3;
|
8015 |
|
|
}
|
8016 |
|
|
if (!asm_p)
|
8017 |
|
|
/* Emit nops after the current insn. */
|
8018 |
|
|
for (i = 0; i < curr_state->after_nops_num; i++)
|
8019 |
|
|
{
|
8020 |
|
|
nop = gen_nop ();
|
8021 |
|
|
emit_insn_after (nop, insn);
|
8022 |
|
|
pos--;
|
8023 |
|
|
gcc_assert (pos >= 0);
|
8024 |
|
|
if (pos % 3 == 0)
|
8025 |
|
|
{
|
8026 |
|
|
/* We are at the start of a bundle: emit the template
|
8027 |
|
|
(it should be defined). */
|
8028 |
|
|
gcc_assert (template0 >= 0);
|
8029 |
|
|
ia64_add_bundle_selector_before (template0, nop);
|
8030 |
|
|
/* If we have two bundle window, we make one bundle
|
8031 |
|
|
rotation. Otherwise template0 will be undefined
|
8032 |
|
|
(negative value). */
|
8033 |
|
|
template0 = template1;
|
8034 |
|
|
template1 = -1;
|
8035 |
|
|
}
|
8036 |
|
|
}
|
8037 |
|
|
/* Move the position backward in the window. Group barrier has
|
8038 |
|
|
no slot. Asm insn takes all bundle. */
|
8039 |
|
|
if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
|
8040 |
|
|
&& GET_CODE (PATTERN (insn)) != ASM_INPUT
|
8041 |
|
|
&& asm_noperands (PATTERN (insn)) < 0)
|
8042 |
|
|
pos--;
|
8043 |
|
|
/* Long insn takes 2 slots. */
|
8044 |
|
|
if (ia64_safe_type (insn) == TYPE_L)
|
8045 |
|
|
pos--;
|
8046 |
|
|
gcc_assert (pos >= 0);
|
8047 |
|
|
if (pos % 3 == 0
|
8048 |
|
|
&& INSN_CODE (insn) != CODE_FOR_insn_group_barrier
|
8049 |
|
|
&& GET_CODE (PATTERN (insn)) != ASM_INPUT
|
8050 |
|
|
&& asm_noperands (PATTERN (insn)) < 0)
|
8051 |
|
|
{
|
8052 |
|
|
/* The current insn is at the bundle start: emit the
|
8053 |
|
|
template. */
|
8054 |
|
|
gcc_assert (template0 >= 0);
|
8055 |
|
|
ia64_add_bundle_selector_before (template0, insn);
|
8056 |
|
|
b = PREV_INSN (insn);
|
8057 |
|
|
insn = b;
|
8058 |
|
|
/* See comment above in analogous place for emitting nops
|
8059 |
|
|
after the insn. */
|
8060 |
|
|
template0 = template1;
|
8061 |
|
|
template1 = -1;
|
8062 |
|
|
}
|
8063 |
|
|
/* Emit nops after the current insn. */
|
8064 |
|
|
for (i = 0; i < curr_state->before_nops_num; i++)
|
8065 |
|
|
{
|
8066 |
|
|
nop = gen_nop ();
|
8067 |
|
|
ia64_emit_insn_before (nop, insn);
|
8068 |
|
|
nop = PREV_INSN (insn);
|
8069 |
|
|
insn = nop;
|
8070 |
|
|
pos--;
|
8071 |
|
|
gcc_assert (pos >= 0);
|
8072 |
|
|
if (pos % 3 == 0)
|
8073 |
|
|
{
|
8074 |
|
|
/* See comment above in analogous place for emitting nops
|
8075 |
|
|
after the insn. */
|
8076 |
|
|
gcc_assert (template0 >= 0);
|
8077 |
|
|
ia64_add_bundle_selector_before (template0, insn);
|
8078 |
|
|
b = PREV_INSN (insn);
|
8079 |
|
|
insn = b;
|
8080 |
|
|
template0 = template1;
|
8081 |
|
|
template1 = -1;
|
8082 |
|
|
}
|
8083 |
|
|
}
|
8084 |
|
|
}
|
8085 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM)
|
8086 |
|
|
/* Insert additional cycles for MM-insns (MMMUL and MMSHF).
|
8087 |
|
|
Itanium1 has a strange design, if the distance between an insn
|
8088 |
|
|
and dependent MM-insn is less 4 then we have a 6 additional
|
8089 |
|
|
cycles stall. So we make the distance equal to 4 cycles if it
|
8090 |
|
|
is less. */
|
8091 |
|
|
for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
|
8092 |
|
|
insn != NULL_RTX;
|
8093 |
|
|
insn = next_insn)
|
8094 |
|
|
{
|
8095 |
|
|
gcc_assert (INSN_P (insn)
|
8096 |
|
|
&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
|
8097 |
|
|
&& GET_CODE (PATTERN (insn)) != USE
|
8098 |
|
|
&& GET_CODE (PATTERN (insn)) != CLOBBER);
|
8099 |
|
|
next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
|
8100 |
|
|
if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
|
8101 |
|
|
/* We found a MM-insn which needs additional cycles. */
|
8102 |
|
|
{
|
8103 |
|
|
rtx last;
|
8104 |
|
|
int i, j, n;
|
8105 |
|
|
int pred_stop_p;
|
8106 |
|
|
|
8107 |
|
|
/* Now we are searching for a template of the bundle in
|
8108 |
|
|
which the MM-insn is placed and the position of the
|
8109 |
|
|
insn in the bundle (0, 1, 2). Also we are searching
|
8110 |
|
|
for that there is a stop before the insn. */
|
8111 |
|
|
last = prev_active_insn (insn);
|
8112 |
|
|
pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
|
8113 |
|
|
if (pred_stop_p)
|
8114 |
|
|
last = prev_active_insn (last);
|
8115 |
|
|
n = 0;
|
8116 |
|
|
for (;; last = prev_active_insn (last))
|
8117 |
|
|
if (recog_memoized (last) == CODE_FOR_bundle_selector)
|
8118 |
|
|
{
|
8119 |
|
|
template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
|
8120 |
|
|
if (template0 == 9)
|
8121 |
|
|
/* The insn is in MLX bundle. Change the template
|
8122 |
|
|
onto MFI because we will add nops before the
|
8123 |
|
|
insn. It simplifies subsequent code a lot. */
|
8124 |
|
|
PATTERN (last)
|
8125 |
|
|
= gen_bundle_selector (const2_rtx); /* -> MFI */
|
8126 |
|
|
break;
|
8127 |
|
|
}
|
8128 |
|
|
else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
|
8129 |
|
|
&& (ia64_safe_itanium_class (last)
|
8130 |
|
|
!= ITANIUM_CLASS_IGNORE))
|
8131 |
|
|
n++;
|
8132 |
|
|
/* Some check of correctness: the stop is not at the
|
8133 |
|
|
bundle start, there are no more 3 insns in the bundle,
|
8134 |
|
|
and the MM-insn is not at the start of bundle with
|
8135 |
|
|
template MLX. */
|
8136 |
|
|
gcc_assert ((!pred_stop_p || n)
|
8137 |
|
|
&& n <= 2
|
8138 |
|
|
&& (template0 != 9 || !n));
|
8139 |
|
|
/* Put nops after the insn in the bundle. */
|
8140 |
|
|
for (j = 3 - n; j > 0; j --)
|
8141 |
|
|
ia64_emit_insn_before (gen_nop (), insn);
|
8142 |
|
|
/* It takes into account that we will add more N nops
|
8143 |
|
|
before the insn lately -- please see code below. */
|
8144 |
|
|
add_cycles [INSN_UID (insn)]--;
|
8145 |
|
|
if (!pred_stop_p || add_cycles [INSN_UID (insn)])
|
8146 |
|
|
ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
|
8147 |
|
|
insn);
|
8148 |
|
|
if (pred_stop_p)
|
8149 |
|
|
add_cycles [INSN_UID (insn)]--;
|
8150 |
|
|
for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
|
8151 |
|
|
{
|
8152 |
|
|
/* Insert "MII;" template. */
|
8153 |
|
|
ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
|
8154 |
|
|
insn);
|
8155 |
|
|
ia64_emit_insn_before (gen_nop (), insn);
|
8156 |
|
|
ia64_emit_insn_before (gen_nop (), insn);
|
8157 |
|
|
if (i > 1)
|
8158 |
|
|
{
|
8159 |
|
|
/* To decrease code size, we use "MI;I;"
|
8160 |
|
|
template. */
|
8161 |
|
|
ia64_emit_insn_before
|
8162 |
|
|
(gen_insn_group_barrier (GEN_INT (3)), insn);
|
8163 |
|
|
i--;
|
8164 |
|
|
}
|
8165 |
|
|
ia64_emit_insn_before (gen_nop (), insn);
|
8166 |
|
|
ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
|
8167 |
|
|
insn);
|
8168 |
|
|
}
|
8169 |
|
|
/* Put the MM-insn in the same slot of a bundle with the
|
8170 |
|
|
same template as the original one. */
|
8171 |
|
|
ia64_add_bundle_selector_before (template0, insn);
|
8172 |
|
|
/* To put the insn in the same slot, add necessary number
|
8173 |
|
|
of nops. */
|
8174 |
|
|
for (j = n; j > 0; j --)
|
8175 |
|
|
ia64_emit_insn_before (gen_nop (), insn);
|
8176 |
|
|
/* Put the stop if the original bundle had it. */
|
8177 |
|
|
if (pred_stop_p)
|
8178 |
|
|
ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
|
8179 |
|
|
insn);
|
8180 |
|
|
}
|
8181 |
|
|
}
|
8182 |
|
|
free (index_to_bundle_states);
|
8183 |
|
|
finish_bundle_state_table ();
|
8184 |
|
|
bundling_p = 0;
|
8185 |
|
|
dfa_clean_insn_cache ();
|
8186 |
|
|
}
|
8187 |
|
|
|
8188 |
|
|
/* The following function is called at the end of scheduling BB or
|
8189 |
|
|
EBB. After reload, it inserts stop bits and does insn bundling. */
|
8190 |
|
|
|
8191 |
|
|
static void
|
8192 |
|
|
ia64_sched_finish (FILE *dump, int sched_verbose)
|
8193 |
|
|
{
|
8194 |
|
|
if (sched_verbose)
|
8195 |
|
|
fprintf (dump, "// Finishing schedule.\n");
|
8196 |
|
|
if (!reload_completed)
|
8197 |
|
|
return;
|
8198 |
|
|
if (reload_completed)
|
8199 |
|
|
{
|
8200 |
|
|
final_emit_insn_group_barriers (dump);
|
8201 |
|
|
bundling (dump, sched_verbose, current_sched_info->prev_head,
|
8202 |
|
|
current_sched_info->next_tail);
|
8203 |
|
|
if (sched_verbose && dump)
|
8204 |
|
|
fprintf (dump, "// finishing %d-%d\n",
|
8205 |
|
|
INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
|
8206 |
|
|
INSN_UID (PREV_INSN (current_sched_info->next_tail)));
|
8207 |
|
|
|
8208 |
|
|
return;
|
8209 |
|
|
}
|
8210 |
|
|
}
|
8211 |
|
|
|
8212 |
|
|
/* The following function inserts stop bits in scheduled BB or EBB. */
|
8213 |
|
|
|
8214 |
|
|
static void
|
8215 |
|
|
final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
|
8216 |
|
|
{
|
8217 |
|
|
rtx insn;
|
8218 |
|
|
int need_barrier_p = 0;
|
8219 |
|
|
rtx prev_insn = NULL_RTX;
|
8220 |
|
|
|
8221 |
|
|
init_insn_group_barriers ();
|
8222 |
|
|
|
8223 |
|
|
for (insn = NEXT_INSN (current_sched_info->prev_head);
|
8224 |
|
|
insn != current_sched_info->next_tail;
|
8225 |
|
|
insn = NEXT_INSN (insn))
|
8226 |
|
|
{
|
8227 |
|
|
if (GET_CODE (insn) == BARRIER)
|
8228 |
|
|
{
|
8229 |
|
|
rtx last = prev_active_insn (insn);
|
8230 |
|
|
|
8231 |
|
|
if (! last)
|
8232 |
|
|
continue;
|
8233 |
|
|
if (GET_CODE (last) == JUMP_INSN
|
8234 |
|
|
&& GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
|
8235 |
|
|
last = prev_active_insn (last);
|
8236 |
|
|
if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
|
8237 |
|
|
emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
|
8238 |
|
|
|
8239 |
|
|
init_insn_group_barriers ();
|
8240 |
|
|
need_barrier_p = 0;
|
8241 |
|
|
prev_insn = NULL_RTX;
|
8242 |
|
|
}
|
8243 |
|
|
else if (INSN_P (insn))
|
8244 |
|
|
{
|
8245 |
|
|
if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
|
8246 |
|
|
{
|
8247 |
|
|
init_insn_group_barriers ();
|
8248 |
|
|
need_barrier_p = 0;
|
8249 |
|
|
prev_insn = NULL_RTX;
|
8250 |
|
|
}
|
8251 |
|
|
else if (need_barrier_p || group_barrier_needed (insn))
|
8252 |
|
|
{
|
8253 |
|
|
if (TARGET_EARLY_STOP_BITS)
|
8254 |
|
|
{
|
8255 |
|
|
rtx last;
|
8256 |
|
|
|
8257 |
|
|
for (last = insn;
|
8258 |
|
|
last != current_sched_info->prev_head;
|
8259 |
|
|
last = PREV_INSN (last))
|
8260 |
|
|
if (INSN_P (last) && GET_MODE (last) == TImode
|
8261 |
|
|
&& stops_p [INSN_UID (last)])
|
8262 |
|
|
break;
|
8263 |
|
|
if (last == current_sched_info->prev_head)
|
8264 |
|
|
last = insn;
|
8265 |
|
|
last = prev_active_insn (last);
|
8266 |
|
|
if (last
|
8267 |
|
|
&& recog_memoized (last) != CODE_FOR_insn_group_barrier)
|
8268 |
|
|
emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
|
8269 |
|
|
last);
|
8270 |
|
|
init_insn_group_barriers ();
|
8271 |
|
|
for (last = NEXT_INSN (last);
|
8272 |
|
|
last != insn;
|
8273 |
|
|
last = NEXT_INSN (last))
|
8274 |
|
|
if (INSN_P (last))
|
8275 |
|
|
group_barrier_needed (last);
|
8276 |
|
|
}
|
8277 |
|
|
else
|
8278 |
|
|
{
|
8279 |
|
|
emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
|
8280 |
|
|
insn);
|
8281 |
|
|
init_insn_group_barriers ();
|
8282 |
|
|
}
|
8283 |
|
|
group_barrier_needed (insn);
|
8284 |
|
|
prev_insn = NULL_RTX;
|
8285 |
|
|
}
|
8286 |
|
|
else if (recog_memoized (insn) >= 0)
|
8287 |
|
|
prev_insn = insn;
|
8288 |
|
|
need_barrier_p = (GET_CODE (insn) == CALL_INSN
|
8289 |
|
|
|| GET_CODE (PATTERN (insn)) == ASM_INPUT
|
8290 |
|
|
|| asm_noperands (PATTERN (insn)) >= 0);
|
8291 |
|
|
}
|
8292 |
|
|
}
|
8293 |
|
|
}
|
8294 |
|
|
|
8295 |
|
|
|
8296 |
|
|
|
8297 |
|
|
/* If the following function returns TRUE, we will use the DFA
|
8298 |
|
|
insn scheduler. */
|
8299 |
|
|
|
8300 |
|
|
static int
|
8301 |
|
|
ia64_first_cycle_multipass_dfa_lookahead (void)
|
8302 |
|
|
{
|
8303 |
|
|
return (reload_completed ? 6 : 4);
|
8304 |
|
|
}
|
8305 |
|
|
|
8306 |
|
|
/* The following function initiates variable `dfa_pre_cycle_insn'. */
|
8307 |
|
|
|
8308 |
|
|
static void
|
8309 |
|
|
ia64_init_dfa_pre_cycle_insn (void)
|
8310 |
|
|
{
|
8311 |
|
|
if (temp_dfa_state == NULL)
|
8312 |
|
|
{
|
8313 |
|
|
dfa_state_size = state_size ();
|
8314 |
|
|
temp_dfa_state = xmalloc (dfa_state_size);
|
8315 |
|
|
prev_cycle_state = xmalloc (dfa_state_size);
|
8316 |
|
|
}
|
8317 |
|
|
dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
|
8318 |
|
|
PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
|
8319 |
|
|
recog_memoized (dfa_pre_cycle_insn);
|
8320 |
|
|
dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
|
8321 |
|
|
PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
|
8322 |
|
|
recog_memoized (dfa_stop_insn);
|
8323 |
|
|
}
|
8324 |
|
|
|
8325 |
|
|
/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
|
8326 |
|
|
used by the DFA insn scheduler. */
|
8327 |
|
|
|
8328 |
|
|
static rtx
|
8329 |
|
|
ia64_dfa_pre_cycle_insn (void)
|
8330 |
|
|
{
|
8331 |
|
|
return dfa_pre_cycle_insn;
|
8332 |
|
|
}
|
8333 |
|
|
|
8334 |
|
|
/* The following function returns TRUE if PRODUCER (of type ilog or
|
8335 |
|
|
ld) produces address for CONSUMER (of type st or stf). */
|
8336 |
|
|
|
8337 |
|
|
int
|
8338 |
|
|
ia64_st_address_bypass_p (rtx producer, rtx consumer)
|
8339 |
|
|
{
|
8340 |
|
|
rtx dest, reg, mem;
|
8341 |
|
|
|
8342 |
|
|
gcc_assert (producer && consumer);
|
8343 |
|
|
dest = ia64_single_set (producer);
|
8344 |
|
|
gcc_assert (dest);
|
8345 |
|
|
reg = SET_DEST (dest);
|
8346 |
|
|
gcc_assert (reg);
|
8347 |
|
|
if (GET_CODE (reg) == SUBREG)
|
8348 |
|
|
reg = SUBREG_REG (reg);
|
8349 |
|
|
gcc_assert (GET_CODE (reg) == REG);
|
8350 |
|
|
|
8351 |
|
|
dest = ia64_single_set (consumer);
|
8352 |
|
|
gcc_assert (dest);
|
8353 |
|
|
mem = SET_DEST (dest);
|
8354 |
|
|
gcc_assert (mem && GET_CODE (mem) == MEM);
|
8355 |
|
|
return reg_mentioned_p (reg, mem);
|
8356 |
|
|
}
|
8357 |
|
|
|
8358 |
|
|
/* The following function returns TRUE if PRODUCER (of type ilog or
|
8359 |
|
|
ld) produces address for CONSUMER (of type ld or fld). */
|
8360 |
|
|
|
8361 |
|
|
int
|
8362 |
|
|
ia64_ld_address_bypass_p (rtx producer, rtx consumer)
|
8363 |
|
|
{
|
8364 |
|
|
rtx dest, src, reg, mem;
|
8365 |
|
|
|
8366 |
|
|
gcc_assert (producer && consumer);
|
8367 |
|
|
dest = ia64_single_set (producer);
|
8368 |
|
|
gcc_assert (dest);
|
8369 |
|
|
reg = SET_DEST (dest);
|
8370 |
|
|
gcc_assert (reg);
|
8371 |
|
|
if (GET_CODE (reg) == SUBREG)
|
8372 |
|
|
reg = SUBREG_REG (reg);
|
8373 |
|
|
gcc_assert (GET_CODE (reg) == REG);
|
8374 |
|
|
|
8375 |
|
|
src = ia64_single_set (consumer);
|
8376 |
|
|
gcc_assert (src);
|
8377 |
|
|
mem = SET_SRC (src);
|
8378 |
|
|
gcc_assert (mem);
|
8379 |
|
|
|
8380 |
|
|
if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
|
8381 |
|
|
mem = XVECEXP (mem, 0, 0);
|
8382 |
|
|
else if (GET_CODE (mem) == IF_THEN_ELSE)
|
8383 |
|
|
/* ??? Is this bypass necessary for ld.c? */
|
8384 |
|
|
{
|
8385 |
|
|
gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
|
8386 |
|
|
mem = XEXP (mem, 1);
|
8387 |
|
|
}
|
8388 |
|
|
|
8389 |
|
|
while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
|
8390 |
|
|
mem = XEXP (mem, 0);
|
8391 |
|
|
|
8392 |
|
|
if (GET_CODE (mem) == UNSPEC)
|
8393 |
|
|
{
|
8394 |
|
|
int c = XINT (mem, 1);
|
8395 |
|
|
|
8396 |
|
|
gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
|
8397 |
|
|
mem = XVECEXP (mem, 0, 0);
|
8398 |
|
|
}
|
8399 |
|
|
|
8400 |
|
|
/* Note that LO_SUM is used for GOT loads. */
|
8401 |
|
|
gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
|
8402 |
|
|
|
8403 |
|
|
return reg_mentioned_p (reg, mem);
|
8404 |
|
|
}
|
8405 |
|
|
|
8406 |
|
|
/* The following function returns TRUE if INSN produces address for a
|
8407 |
|
|
load/store insn. We will place such insns into M slot because it
|
8408 |
|
|
decreases its latency time. */
|
8409 |
|
|
|
8410 |
|
|
int
|
8411 |
|
|
ia64_produce_address_p (rtx insn)
|
8412 |
|
|
{
|
8413 |
|
|
return insn->call;
|
8414 |
|
|
}
|
8415 |
|
|
|
8416 |
|
|
|
8417 |
|
|
/* Emit pseudo-ops for the assembler to describe predicate relations.
|
8418 |
|
|
At present this assumes that we only consider predicate pairs to
|
8419 |
|
|
be mutex, and that the assembler can deduce proper values from
|
8420 |
|
|
straight-line code. */
|
8421 |
|
|
|
8422 |
|
|
static void
|
8423 |
|
|
emit_predicate_relation_info (void)
|
8424 |
|
|
{
|
8425 |
|
|
basic_block bb;
|
8426 |
|
|
|
8427 |
|
|
FOR_EACH_BB_REVERSE (bb)
|
8428 |
|
|
{
|
8429 |
|
|
int r;
|
8430 |
|
|
rtx head = BB_HEAD (bb);
|
8431 |
|
|
|
8432 |
|
|
/* We only need such notes at code labels. */
|
8433 |
|
|
if (GET_CODE (head) != CODE_LABEL)
|
8434 |
|
|
continue;
|
8435 |
|
|
if (GET_CODE (NEXT_INSN (head)) == NOTE
|
8436 |
|
|
&& NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
|
8437 |
|
|
head = NEXT_INSN (head);
|
8438 |
|
|
|
8439 |
|
|
/* Skip p0, which may be thought to be live due to (reg:DI p0)
|
8440 |
|
|
grabbing the entire block of predicate registers. */
|
8441 |
|
|
for (r = PR_REG (2); r < PR_REG (64); r += 2)
|
8442 |
|
|
if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
|
8443 |
|
|
{
|
8444 |
|
|
rtx p = gen_rtx_REG (BImode, r);
|
8445 |
|
|
rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
|
8446 |
|
|
if (head == BB_END (bb))
|
8447 |
|
|
BB_END (bb) = n;
|
8448 |
|
|
head = n;
|
8449 |
|
|
}
|
8450 |
|
|
}
|
8451 |
|
|
|
8452 |
|
|
/* Look for conditional calls that do not return, and protect predicate
|
8453 |
|
|
relations around them. Otherwise the assembler will assume the call
|
8454 |
|
|
returns, and complain about uses of call-clobbered predicates after
|
8455 |
|
|
the call. */
|
8456 |
|
|
FOR_EACH_BB_REVERSE (bb)
|
8457 |
|
|
{
|
8458 |
|
|
rtx insn = BB_HEAD (bb);
|
8459 |
|
|
|
8460 |
|
|
while (1)
|
8461 |
|
|
{
|
8462 |
|
|
if (GET_CODE (insn) == CALL_INSN
|
8463 |
|
|
&& GET_CODE (PATTERN (insn)) == COND_EXEC
|
8464 |
|
|
&& find_reg_note (insn, REG_NORETURN, NULL_RTX))
|
8465 |
|
|
{
|
8466 |
|
|
rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
|
8467 |
|
|
rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
|
8468 |
|
|
if (BB_HEAD (bb) == insn)
|
8469 |
|
|
BB_HEAD (bb) = b;
|
8470 |
|
|
if (BB_END (bb) == insn)
|
8471 |
|
|
BB_END (bb) = a;
|
8472 |
|
|
}
|
8473 |
|
|
|
8474 |
|
|
if (insn == BB_END (bb))
|
8475 |
|
|
break;
|
8476 |
|
|
insn = NEXT_INSN (insn);
|
8477 |
|
|
}
|
8478 |
|
|
}
|
8479 |
|
|
}
|
8480 |
|
|
|
8481 |
|
|
/* Perform machine dependent operations on the rtl chain INSNS. */
|
8482 |
|
|
|
8483 |
|
|
static void
|
8484 |
|
|
ia64_reorg (void)
|
8485 |
|
|
{
|
8486 |
|
|
/* We are freeing block_for_insn in the toplev to keep compatibility
|
8487 |
|
|
with old MDEP_REORGS that are not CFG based. Recompute it now. */
|
8488 |
|
|
compute_bb_for_insn ();
|
8489 |
|
|
|
8490 |
|
|
/* If optimizing, we'll have split before scheduling. */
|
8491 |
|
|
if (optimize == 0)
|
8492 |
|
|
split_all_insns (0);
|
8493 |
|
|
|
8494 |
|
|
/* ??? update_life_info_in_dirty_blocks fails to terminate during
|
8495 |
|
|
non-optimizing bootstrap. */
|
8496 |
|
|
update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
|
8497 |
|
|
|
8498 |
|
|
if (optimize && ia64_flag_schedule_insns2)
|
8499 |
|
|
{
|
8500 |
|
|
timevar_push (TV_SCHED2);
|
8501 |
|
|
ia64_final_schedule = 1;
|
8502 |
|
|
|
8503 |
|
|
initiate_bundle_states ();
|
8504 |
|
|
ia64_nop = make_insn_raw (gen_nop ());
|
8505 |
|
|
PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
|
8506 |
|
|
recog_memoized (ia64_nop);
|
8507 |
|
|
clocks_length = get_max_uid () + 1;
|
8508 |
|
|
stops_p = xcalloc (1, clocks_length);
|
8509 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM)
|
8510 |
|
|
{
|
8511 |
|
|
clocks = xcalloc (clocks_length, sizeof (int));
|
8512 |
|
|
add_cycles = xcalloc (clocks_length, sizeof (int));
|
8513 |
|
|
}
|
8514 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM2)
|
8515 |
|
|
{
|
8516 |
|
|
pos_1 = get_cpu_unit_code ("2_1");
|
8517 |
|
|
pos_2 = get_cpu_unit_code ("2_2");
|
8518 |
|
|
pos_3 = get_cpu_unit_code ("2_3");
|
8519 |
|
|
pos_4 = get_cpu_unit_code ("2_4");
|
8520 |
|
|
pos_5 = get_cpu_unit_code ("2_5");
|
8521 |
|
|
pos_6 = get_cpu_unit_code ("2_6");
|
8522 |
|
|
_0mii_ = get_cpu_unit_code ("2b_0mii.");
|
8523 |
|
|
_0mmi_ = get_cpu_unit_code ("2b_0mmi.");
|
8524 |
|
|
_0mfi_ = get_cpu_unit_code ("2b_0mfi.");
|
8525 |
|
|
_0mmf_ = get_cpu_unit_code ("2b_0mmf.");
|
8526 |
|
|
_0bbb_ = get_cpu_unit_code ("2b_0bbb.");
|
8527 |
|
|
_0mbb_ = get_cpu_unit_code ("2b_0mbb.");
|
8528 |
|
|
_0mib_ = get_cpu_unit_code ("2b_0mib.");
|
8529 |
|
|
_0mmb_ = get_cpu_unit_code ("2b_0mmb.");
|
8530 |
|
|
_0mfb_ = get_cpu_unit_code ("2b_0mfb.");
|
8531 |
|
|
_0mlx_ = get_cpu_unit_code ("2b_0mlx.");
|
8532 |
|
|
_1mii_ = get_cpu_unit_code ("2b_1mii.");
|
8533 |
|
|
_1mmi_ = get_cpu_unit_code ("2b_1mmi.");
|
8534 |
|
|
_1mfi_ = get_cpu_unit_code ("2b_1mfi.");
|
8535 |
|
|
_1mmf_ = get_cpu_unit_code ("2b_1mmf.");
|
8536 |
|
|
_1bbb_ = get_cpu_unit_code ("2b_1bbb.");
|
8537 |
|
|
_1mbb_ = get_cpu_unit_code ("2b_1mbb.");
|
8538 |
|
|
_1mib_ = get_cpu_unit_code ("2b_1mib.");
|
8539 |
|
|
_1mmb_ = get_cpu_unit_code ("2b_1mmb.");
|
8540 |
|
|
_1mfb_ = get_cpu_unit_code ("2b_1mfb.");
|
8541 |
|
|
_1mlx_ = get_cpu_unit_code ("2b_1mlx.");
|
8542 |
|
|
}
|
8543 |
|
|
else
|
8544 |
|
|
{
|
8545 |
|
|
pos_1 = get_cpu_unit_code ("1_1");
|
8546 |
|
|
pos_2 = get_cpu_unit_code ("1_2");
|
8547 |
|
|
pos_3 = get_cpu_unit_code ("1_3");
|
8548 |
|
|
pos_4 = get_cpu_unit_code ("1_4");
|
8549 |
|
|
pos_5 = get_cpu_unit_code ("1_5");
|
8550 |
|
|
pos_6 = get_cpu_unit_code ("1_6");
|
8551 |
|
|
_0mii_ = get_cpu_unit_code ("1b_0mii.");
|
8552 |
|
|
_0mmi_ = get_cpu_unit_code ("1b_0mmi.");
|
8553 |
|
|
_0mfi_ = get_cpu_unit_code ("1b_0mfi.");
|
8554 |
|
|
_0mmf_ = get_cpu_unit_code ("1b_0mmf.");
|
8555 |
|
|
_0bbb_ = get_cpu_unit_code ("1b_0bbb.");
|
8556 |
|
|
_0mbb_ = get_cpu_unit_code ("1b_0mbb.");
|
8557 |
|
|
_0mib_ = get_cpu_unit_code ("1b_0mib.");
|
8558 |
|
|
_0mmb_ = get_cpu_unit_code ("1b_0mmb.");
|
8559 |
|
|
_0mfb_ = get_cpu_unit_code ("1b_0mfb.");
|
8560 |
|
|
_0mlx_ = get_cpu_unit_code ("1b_0mlx.");
|
8561 |
|
|
_1mii_ = get_cpu_unit_code ("1b_1mii.");
|
8562 |
|
|
_1mmi_ = get_cpu_unit_code ("1b_1mmi.");
|
8563 |
|
|
_1mfi_ = get_cpu_unit_code ("1b_1mfi.");
|
8564 |
|
|
_1mmf_ = get_cpu_unit_code ("1b_1mmf.");
|
8565 |
|
|
_1bbb_ = get_cpu_unit_code ("1b_1bbb.");
|
8566 |
|
|
_1mbb_ = get_cpu_unit_code ("1b_1mbb.");
|
8567 |
|
|
_1mib_ = get_cpu_unit_code ("1b_1mib.");
|
8568 |
|
|
_1mmb_ = get_cpu_unit_code ("1b_1mmb.");
|
8569 |
|
|
_1mfb_ = get_cpu_unit_code ("1b_1mfb.");
|
8570 |
|
|
_1mlx_ = get_cpu_unit_code ("1b_1mlx.");
|
8571 |
|
|
}
|
8572 |
|
|
schedule_ebbs ();
|
8573 |
|
|
finish_bundle_states ();
|
8574 |
|
|
if (ia64_tune == PROCESSOR_ITANIUM)
|
8575 |
|
|
{
|
8576 |
|
|
free (add_cycles);
|
8577 |
|
|
free (clocks);
|
8578 |
|
|
}
|
8579 |
|
|
free (stops_p);
|
8580 |
|
|
stops_p = NULL;
|
8581 |
|
|
emit_insn_group_barriers (dump_file);
|
8582 |
|
|
|
8583 |
|
|
ia64_final_schedule = 0;
|
8584 |
|
|
timevar_pop (TV_SCHED2);
|
8585 |
|
|
}
|
8586 |
|
|
else
|
8587 |
|
|
emit_all_insn_group_barriers (dump_file);
|
8588 |
|
|
|
8589 |
|
|
/* A call must not be the last instruction in a function, so that the
|
8590 |
|
|
return address is still within the function, so that unwinding works
|
8591 |
|
|
properly. Note that IA-64 differs from dwarf2 on this point. */
|
8592 |
|
|
if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
|
8593 |
|
|
{
|
8594 |
|
|
rtx insn;
|
8595 |
|
|
int saw_stop = 0;
|
8596 |
|
|
|
8597 |
|
|
insn = get_last_insn ();
|
8598 |
|
|
if (! INSN_P (insn))
|
8599 |
|
|
insn = prev_active_insn (insn);
|
8600 |
|
|
/* Skip over insns that expand to nothing. */
|
8601 |
|
|
while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
|
8602 |
|
|
{
|
8603 |
|
|
if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
|
8604 |
|
|
&& XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
|
8605 |
|
|
saw_stop = 1;
|
8606 |
|
|
insn = prev_active_insn (insn);
|
8607 |
|
|
}
|
8608 |
|
|
if (GET_CODE (insn) == CALL_INSN)
|
8609 |
|
|
{
|
8610 |
|
|
if (! saw_stop)
|
8611 |
|
|
emit_insn (gen_insn_group_barrier (GEN_INT (3)));
|
8612 |
|
|
emit_insn (gen_break_f ());
|
8613 |
|
|
emit_insn (gen_insn_group_barrier (GEN_INT (3)));
|
8614 |
|
|
}
|
8615 |
|
|
}
|
8616 |
|
|
|
8617 |
|
|
emit_predicate_relation_info ();
|
8618 |
|
|
|
8619 |
|
|
if (ia64_flag_var_tracking)
|
8620 |
|
|
{
|
8621 |
|
|
timevar_push (TV_VAR_TRACKING);
|
8622 |
|
|
variable_tracking_main ();
|
8623 |
|
|
timevar_pop (TV_VAR_TRACKING);
|
8624 |
|
|
}
|
8625 |
|
|
}
|
8626 |
|
|
|
8627 |
|
|
/* Return true if REGNO is used by the epilogue. */
|
8628 |
|
|
|
8629 |
|
|
int
|
8630 |
|
|
ia64_epilogue_uses (int regno)
|
8631 |
|
|
{
|
8632 |
|
|
switch (regno)
|
8633 |
|
|
{
|
8634 |
|
|
case R_GR (1):
|
8635 |
|
|
/* With a call to a function in another module, we will write a new
|
8636 |
|
|
value to "gp". After returning from such a call, we need to make
|
8637 |
|
|
sure the function restores the original gp-value, even if the
|
8638 |
|
|
function itself does not use the gp anymore. */
|
8639 |
|
|
return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
|
8640 |
|
|
|
8641 |
|
|
case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
|
8642 |
|
|
case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
|
8643 |
|
|
/* For functions defined with the syscall_linkage attribute, all
|
8644 |
|
|
input registers are marked as live at all function exits. This
|
8645 |
|
|
prevents the register allocator from using the input registers,
|
8646 |
|
|
which in turn makes it possible to restart a system call after
|
8647 |
|
|
an interrupt without having to save/restore the input registers.
|
8648 |
|
|
This also prevents kernel data from leaking to application code. */
|
8649 |
|
|
return lookup_attribute ("syscall_linkage",
|
8650 |
|
|
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
|
8651 |
|
|
|
8652 |
|
|
case R_BR (0):
|
8653 |
|
|
/* Conditional return patterns can't represent the use of `b0' as
|
8654 |
|
|
the return address, so we force the value live this way. */
|
8655 |
|
|
return 1;
|
8656 |
|
|
|
8657 |
|
|
case AR_PFS_REGNUM:
|
8658 |
|
|
/* Likewise for ar.pfs, which is used by br.ret. */
|
8659 |
|
|
return 1;
|
8660 |
|
|
|
8661 |
|
|
default:
|
8662 |
|
|
return 0;
|
8663 |
|
|
}
|
8664 |
|
|
}
|
8665 |
|
|
|
8666 |
|
|
/* Return true if REGNO is used by the frame unwinder. */
|
8667 |
|
|
|
8668 |
|
|
int
|
8669 |
|
|
ia64_eh_uses (int regno)
|
8670 |
|
|
{
|
8671 |
|
|
if (! reload_completed)
|
8672 |
|
|
return 0;
|
8673 |
|
|
|
8674 |
|
|
if (current_frame_info.reg_save_b0
|
8675 |
|
|
&& regno == current_frame_info.reg_save_b0)
|
8676 |
|
|
return 1;
|
8677 |
|
|
if (current_frame_info.reg_save_pr
|
8678 |
|
|
&& regno == current_frame_info.reg_save_pr)
|
8679 |
|
|
return 1;
|
8680 |
|
|
if (current_frame_info.reg_save_ar_pfs
|
8681 |
|
|
&& regno == current_frame_info.reg_save_ar_pfs)
|
8682 |
|
|
return 1;
|
8683 |
|
|
if (current_frame_info.reg_save_ar_unat
|
8684 |
|
|
&& regno == current_frame_info.reg_save_ar_unat)
|
8685 |
|
|
return 1;
|
8686 |
|
|
if (current_frame_info.reg_save_ar_lc
|
8687 |
|
|
&& regno == current_frame_info.reg_save_ar_lc)
|
8688 |
|
|
return 1;
|
8689 |
|
|
|
8690 |
|
|
return 0;
|
8691 |
|
|
}
|
8692 |
|
|
|
8693 |
|
|
/* Return true if this goes in small data/bss. */
|
8694 |
|
|
|
8695 |
|
|
/* ??? We could also support own long data here. Generating movl/add/ld8
|
8696 |
|
|
instead of addl,ld8/ld8. This makes the code bigger, but should make the
|
8697 |
|
|
code faster because there is one less load. This also includes incomplete
|
8698 |
|
|
types which can't go in sdata/sbss. */
|
8699 |
|
|
|
8700 |
|
|
static bool
|
8701 |
|
|
ia64_in_small_data_p (tree exp)
|
8702 |
|
|
{
|
8703 |
|
|
if (TARGET_NO_SDATA)
|
8704 |
|
|
return false;
|
8705 |
|
|
|
8706 |
|
|
/* We want to merge strings, so we never consider them small data. */
|
8707 |
|
|
if (TREE_CODE (exp) == STRING_CST)
|
8708 |
|
|
return false;
|
8709 |
|
|
|
8710 |
|
|
/* Functions are never small data. */
|
8711 |
|
|
if (TREE_CODE (exp) == FUNCTION_DECL)
|
8712 |
|
|
return false;
|
8713 |
|
|
|
8714 |
|
|
if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
|
8715 |
|
|
{
|
8716 |
|
|
const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
|
8717 |
|
|
|
8718 |
|
|
if (strcmp (section, ".sdata") == 0
|
8719 |
|
|
|| strncmp (section, ".sdata.", 7) == 0
|
8720 |
|
|
|| strncmp (section, ".gnu.linkonce.s.", 16) == 0
|
8721 |
|
|
|| strcmp (section, ".sbss") == 0
|
8722 |
|
|
|| strncmp (section, ".sbss.", 6) == 0
|
8723 |
|
|
|| strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
|
8724 |
|
|
return true;
|
8725 |
|
|
}
|
8726 |
|
|
else
|
8727 |
|
|
{
|
8728 |
|
|
HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
|
8729 |
|
|
|
8730 |
|
|
/* If this is an incomplete type with size 0, then we can't put it
|
8731 |
|
|
in sdata because it might be too big when completed. */
|
8732 |
|
|
if (size > 0 && size <= ia64_section_threshold)
|
8733 |
|
|
return true;
|
8734 |
|
|
}
|
8735 |
|
|
|
8736 |
|
|
return false;
|
8737 |
|
|
}
|
8738 |
|
|
|
8739 |
|
|
/* Output assembly directives for prologue regions. */
|
8740 |
|
|
|
8741 |
|
|
/* The current basic block number. */
|
8742 |
|
|
|
8743 |
|
|
static bool last_block;
|
8744 |
|
|
|
8745 |
|
|
/* True if we need a copy_state command at the start of the next block. */
|
8746 |
|
|
|
8747 |
|
|
static bool need_copy_state;
|
8748 |
|
|
|
8749 |
|
|
#ifndef MAX_ARTIFICIAL_LABEL_BYTES
|
8750 |
|
|
# define MAX_ARTIFICIAL_LABEL_BYTES 30
|
8751 |
|
|
#endif
|
8752 |
|
|
|
8753 |
|
|
/* Emit a debugging label after a call-frame-related insn. We'd
|
8754 |
|
|
rather output the label right away, but we'd have to output it
|
8755 |
|
|
after, not before, the instruction, and the instruction has not
|
8756 |
|
|
been output yet. So we emit the label after the insn, delete it to
|
8757 |
|
|
avoid introducing basic blocks, and mark it as preserved, such that
|
8758 |
|
|
it is still output, given that it is referenced in debug info. */
|
8759 |
|
|
|
8760 |
|
|
static const char *
|
8761 |
|
|
ia64_emit_deleted_label_after_insn (rtx insn)
|
8762 |
|
|
{
|
8763 |
|
|
char label[MAX_ARTIFICIAL_LABEL_BYTES];
|
8764 |
|
|
rtx lb = gen_label_rtx ();
|
8765 |
|
|
rtx label_insn = emit_label_after (lb, insn);
|
8766 |
|
|
|
8767 |
|
|
LABEL_PRESERVE_P (lb) = 1;
|
8768 |
|
|
|
8769 |
|
|
delete_insn (label_insn);
|
8770 |
|
|
|
8771 |
|
|
ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
|
8772 |
|
|
|
8773 |
|
|
return xstrdup (label);
|
8774 |
|
|
}
|
8775 |
|
|
|
8776 |
|
|
/* Define the CFA after INSN with the steady-state definition. */
|
8777 |
|
|
|
8778 |
|
|
static void
|
8779 |
|
|
ia64_dwarf2out_def_steady_cfa (rtx insn)
|
8780 |
|
|
{
|
8781 |
|
|
rtx fp = frame_pointer_needed
|
8782 |
|
|
? hard_frame_pointer_rtx
|
8783 |
|
|
: stack_pointer_rtx;
|
8784 |
|
|
|
8785 |
|
|
dwarf2out_def_cfa
|
8786 |
|
|
(ia64_emit_deleted_label_after_insn (insn),
|
8787 |
|
|
REGNO (fp),
|
8788 |
|
|
ia64_initial_elimination_offset
|
8789 |
|
|
(REGNO (arg_pointer_rtx), REGNO (fp))
|
8790 |
|
|
+ ARG_POINTER_CFA_OFFSET (current_function_decl));
|
8791 |
|
|
}
|
8792 |
|
|
|
8793 |
|
|
/* The generic dwarf2 frame debug info generator does not define a
|
8794 |
|
|
separate region for the very end of the epilogue, so refrain from
|
8795 |
|
|
doing so in the IA64-specific code as well. */
|
8796 |
|
|
|
8797 |
|
|
#define IA64_CHANGE_CFA_IN_EPILOGUE 0
|
8798 |
|
|
|
8799 |
|
|
/* The function emits unwind directives for the start of an epilogue. */
|
8800 |
|
|
|
8801 |
|
|
static void
|
8802 |
|
|
process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
|
8803 |
|
|
{
|
8804 |
|
|
/* If this isn't the last block of the function, then we need to label the
|
8805 |
|
|
current state, and copy it back in at the start of the next block. */
|
8806 |
|
|
|
8807 |
|
|
if (!last_block)
|
8808 |
|
|
{
|
8809 |
|
|
if (unwind)
|
8810 |
|
|
fprintf (asm_out_file, "\t.label_state %d\n",
|
8811 |
|
|
++cfun->machine->state_num);
|
8812 |
|
|
need_copy_state = true;
|
8813 |
|
|
}
|
8814 |
|
|
|
8815 |
|
|
if (unwind)
|
8816 |
|
|
fprintf (asm_out_file, "\t.restore sp\n");
|
8817 |
|
|
if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
|
8818 |
|
|
dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
|
8819 |
|
|
STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
|
8820 |
|
|
}
|
8821 |
|
|
|
8822 |
|
|
/* This function processes a SET pattern looking for specific patterns
|
8823 |
|
|
which result in emitting an assembly directive required for unwinding. */
|
8824 |
|
|
|
8825 |
|
|
static int
|
8826 |
|
|
process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
|
8827 |
|
|
{
|
8828 |
|
|
rtx src = SET_SRC (pat);
|
8829 |
|
|
rtx dest = SET_DEST (pat);
|
8830 |
|
|
int src_regno, dest_regno;
|
8831 |
|
|
|
8832 |
|
|
/* Look for the ALLOC insn. */
|
8833 |
|
|
if (GET_CODE (src) == UNSPEC_VOLATILE
|
8834 |
|
|
&& XINT (src, 1) == UNSPECV_ALLOC
|
8835 |
|
|
&& GET_CODE (dest) == REG)
|
8836 |
|
|
{
|
8837 |
|
|
dest_regno = REGNO (dest);
|
8838 |
|
|
|
8839 |
|
|
/* If this is the final destination for ar.pfs, then this must
|
8840 |
|
|
be the alloc in the prologue. */
|
8841 |
|
|
if (dest_regno == current_frame_info.reg_save_ar_pfs)
|
8842 |
|
|
{
|
8843 |
|
|
if (unwind)
|
8844 |
|
|
fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
|
8845 |
|
|
ia64_dbx_register_number (dest_regno));
|
8846 |
|
|
}
|
8847 |
|
|
else
|
8848 |
|
|
{
|
8849 |
|
|
/* This must be an alloc before a sibcall. We must drop the
|
8850 |
|
|
old frame info. The easiest way to drop the old frame
|
8851 |
|
|
info is to ensure we had a ".restore sp" directive
|
8852 |
|
|
followed by a new prologue. If the procedure doesn't
|
8853 |
|
|
have a memory-stack frame, we'll issue a dummy ".restore
|
8854 |
|
|
sp" now. */
|
8855 |
|
|
if (current_frame_info.total_size == 0 && !frame_pointer_needed)
|
8856 |
|
|
/* if haven't done process_epilogue() yet, do it now */
|
8857 |
|
|
process_epilogue (asm_out_file, insn, unwind, frame);
|
8858 |
|
|
if (unwind)
|
8859 |
|
|
fprintf (asm_out_file, "\t.prologue\n");
|
8860 |
|
|
}
|
8861 |
|
|
return 1;
|
8862 |
|
|
}
|
8863 |
|
|
|
8864 |
|
|
/* Look for SP = .... */
|
8865 |
|
|
if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
|
8866 |
|
|
{
|
8867 |
|
|
if (GET_CODE (src) == PLUS)
|
8868 |
|
|
{
|
8869 |
|
|
rtx op0 = XEXP (src, 0);
|
8870 |
|
|
rtx op1 = XEXP (src, 1);
|
8871 |
|
|
|
8872 |
|
|
gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
|
8873 |
|
|
|
8874 |
|
|
if (INTVAL (op1) < 0)
|
8875 |
|
|
{
|
8876 |
|
|
gcc_assert (!frame_pointer_needed);
|
8877 |
|
|
if (unwind)
|
8878 |
|
|
fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
|
8879 |
|
|
-INTVAL (op1));
|
8880 |
|
|
if (frame)
|
8881 |
|
|
ia64_dwarf2out_def_steady_cfa (insn);
|
8882 |
|
|
}
|
8883 |
|
|
else
|
8884 |
|
|
process_epilogue (asm_out_file, insn, unwind, frame);
|
8885 |
|
|
}
|
8886 |
|
|
else
|
8887 |
|
|
{
|
8888 |
|
|
gcc_assert (GET_CODE (src) == REG
|
8889 |
|
|
&& REGNO (src) == HARD_FRAME_POINTER_REGNUM);
|
8890 |
|
|
process_epilogue (asm_out_file, insn, unwind, frame);
|
8891 |
|
|
}
|
8892 |
|
|
|
8893 |
|
|
return 1;
|
8894 |
|
|
}
|
8895 |
|
|
|
8896 |
|
|
/* Register move we need to look at. */
|
8897 |
|
|
if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
|
8898 |
|
|
{
|
8899 |
|
|
src_regno = REGNO (src);
|
8900 |
|
|
dest_regno = REGNO (dest);
|
8901 |
|
|
|
8902 |
|
|
switch (src_regno)
|
8903 |
|
|
{
|
8904 |
|
|
case BR_REG (0):
|
8905 |
|
|
/* Saving return address pointer. */
|
8906 |
|
|
gcc_assert (dest_regno == current_frame_info.reg_save_b0);
|
8907 |
|
|
if (unwind)
|
8908 |
|
|
fprintf (asm_out_file, "\t.save rp, r%d\n",
|
8909 |
|
|
ia64_dbx_register_number (dest_regno));
|
8910 |
|
|
return 1;
|
8911 |
|
|
|
8912 |
|
|
case PR_REG (0):
|
8913 |
|
|
gcc_assert (dest_regno == current_frame_info.reg_save_pr);
|
8914 |
|
|
if (unwind)
|
8915 |
|
|
fprintf (asm_out_file, "\t.save pr, r%d\n",
|
8916 |
|
|
ia64_dbx_register_number (dest_regno));
|
8917 |
|
|
return 1;
|
8918 |
|
|
|
8919 |
|
|
case AR_UNAT_REGNUM:
|
8920 |
|
|
gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
|
8921 |
|
|
if (unwind)
|
8922 |
|
|
fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
|
8923 |
|
|
ia64_dbx_register_number (dest_regno));
|
8924 |
|
|
return 1;
|
8925 |
|
|
|
8926 |
|
|
case AR_LC_REGNUM:
|
8927 |
|
|
gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
|
8928 |
|
|
if (unwind)
|
8929 |
|
|
fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
|
8930 |
|
|
ia64_dbx_register_number (dest_regno));
|
8931 |
|
|
return 1;
|
8932 |
|
|
|
8933 |
|
|
case STACK_POINTER_REGNUM:
|
8934 |
|
|
gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
|
8935 |
|
|
&& frame_pointer_needed);
|
8936 |
|
|
if (unwind)
|
8937 |
|
|
fprintf (asm_out_file, "\t.vframe r%d\n",
|
8938 |
|
|
ia64_dbx_register_number (dest_regno));
|
8939 |
|
|
if (frame)
|
8940 |
|
|
ia64_dwarf2out_def_steady_cfa (insn);
|
8941 |
|
|
return 1;
|
8942 |
|
|
|
8943 |
|
|
default:
|
8944 |
|
|
/* Everything else should indicate being stored to memory. */
|
8945 |
|
|
gcc_unreachable ();
|
8946 |
|
|
}
|
8947 |
|
|
}
|
8948 |
|
|
|
8949 |
|
|
/* Memory store we need to look at. */
|
8950 |
|
|
if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
|
8951 |
|
|
{
|
8952 |
|
|
long off;
|
8953 |
|
|
rtx base;
|
8954 |
|
|
const char *saveop;
|
8955 |
|
|
|
8956 |
|
|
if (GET_CODE (XEXP (dest, 0)) == REG)
|
8957 |
|
|
{
|
8958 |
|
|
base = XEXP (dest, 0);
|
8959 |
|
|
off = 0;
|
8960 |
|
|
}
|
8961 |
|
|
else
|
8962 |
|
|
{
|
8963 |
|
|
gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
|
8964 |
|
|
&& GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
|
8965 |
|
|
base = XEXP (XEXP (dest, 0), 0);
|
8966 |
|
|
off = INTVAL (XEXP (XEXP (dest, 0), 1));
|
8967 |
|
|
}
|
8968 |
|
|
|
8969 |
|
|
if (base == hard_frame_pointer_rtx)
|
8970 |
|
|
{
|
8971 |
|
|
saveop = ".savepsp";
|
8972 |
|
|
off = - off;
|
8973 |
|
|
}
|
8974 |
|
|
else
|
8975 |
|
|
{
|
8976 |
|
|
gcc_assert (base == stack_pointer_rtx);
|
8977 |
|
|
saveop = ".savesp";
|
8978 |
|
|
}
|
8979 |
|
|
|
8980 |
|
|
src_regno = REGNO (src);
|
8981 |
|
|
switch (src_regno)
|
8982 |
|
|
{
|
8983 |
|
|
case BR_REG (0):
|
8984 |
|
|
gcc_assert (!current_frame_info.reg_save_b0);
|
8985 |
|
|
if (unwind)
|
8986 |
|
|
fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
|
8987 |
|
|
return 1;
|
8988 |
|
|
|
8989 |
|
|
case PR_REG (0):
|
8990 |
|
|
gcc_assert (!current_frame_info.reg_save_pr);
|
8991 |
|
|
if (unwind)
|
8992 |
|
|
fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
|
8993 |
|
|
return 1;
|
8994 |
|
|
|
8995 |
|
|
case AR_LC_REGNUM:
|
8996 |
|
|
gcc_assert (!current_frame_info.reg_save_ar_lc);
|
8997 |
|
|
if (unwind)
|
8998 |
|
|
fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
|
8999 |
|
|
return 1;
|
9000 |
|
|
|
9001 |
|
|
case AR_PFS_REGNUM:
|
9002 |
|
|
gcc_assert (!current_frame_info.reg_save_ar_pfs);
|
9003 |
|
|
if (unwind)
|
9004 |
|
|
fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
|
9005 |
|
|
return 1;
|
9006 |
|
|
|
9007 |
|
|
case AR_UNAT_REGNUM:
|
9008 |
|
|
gcc_assert (!current_frame_info.reg_save_ar_unat);
|
9009 |
|
|
if (unwind)
|
9010 |
|
|
fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
|
9011 |
|
|
return 1;
|
9012 |
|
|
|
9013 |
|
|
case GR_REG (4):
|
9014 |
|
|
case GR_REG (5):
|
9015 |
|
|
case GR_REG (6):
|
9016 |
|
|
case GR_REG (7):
|
9017 |
|
|
if (unwind)
|
9018 |
|
|
fprintf (asm_out_file, "\t.save.g 0x%x\n",
|
9019 |
|
|
1 << (src_regno - GR_REG (4)));
|
9020 |
|
|
return 1;
|
9021 |
|
|
|
9022 |
|
|
case BR_REG (1):
|
9023 |
|
|
case BR_REG (2):
|
9024 |
|
|
case BR_REG (3):
|
9025 |
|
|
case BR_REG (4):
|
9026 |
|
|
case BR_REG (5):
|
9027 |
|
|
if (unwind)
|
9028 |
|
|
fprintf (asm_out_file, "\t.save.b 0x%x\n",
|
9029 |
|
|
1 << (src_regno - BR_REG (1)));
|
9030 |
|
|
return 1;
|
9031 |
|
|
|
9032 |
|
|
case FR_REG (2):
|
9033 |
|
|
case FR_REG (3):
|
9034 |
|
|
case FR_REG (4):
|
9035 |
|
|
case FR_REG (5):
|
9036 |
|
|
if (unwind)
|
9037 |
|
|
fprintf (asm_out_file, "\t.save.f 0x%x\n",
|
9038 |
|
|
1 << (src_regno - FR_REG (2)));
|
9039 |
|
|
return 1;
|
9040 |
|
|
|
9041 |
|
|
case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
|
9042 |
|
|
case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
|
9043 |
|
|
case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
|
9044 |
|
|
case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
|
9045 |
|
|
if (unwind)
|
9046 |
|
|
fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
|
9047 |
|
|
1 << (src_regno - FR_REG (12)));
|
9048 |
|
|
return 1;
|
9049 |
|
|
|
9050 |
|
|
default:
|
9051 |
|
|
return 0;
|
9052 |
|
|
}
|
9053 |
|
|
}
|
9054 |
|
|
|
9055 |
|
|
return 0;
|
9056 |
|
|
}
|
9057 |
|
|
|
9058 |
|
|
|
9059 |
|
|
/* This function looks at a single insn and emits any directives
|
9060 |
|
|
required to unwind this insn. */
|
9061 |
|
|
void
|
9062 |
|
|
process_for_unwind_directive (FILE *asm_out_file, rtx insn)
|
9063 |
|
|
{
|
9064 |
|
|
bool unwind = (flag_unwind_tables
|
9065 |
|
|
|| (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
|
9066 |
|
|
bool frame = dwarf2out_do_frame ();
|
9067 |
|
|
|
9068 |
|
|
if (unwind || frame)
|
9069 |
|
|
{
|
9070 |
|
|
rtx pat;
|
9071 |
|
|
|
9072 |
|
|
if (GET_CODE (insn) == NOTE
|
9073 |
|
|
&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
|
9074 |
|
|
{
|
9075 |
|
|
last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
|
9076 |
|
|
|
9077 |
|
|
/* Restore unwind state from immediately before the epilogue. */
|
9078 |
|
|
if (need_copy_state)
|
9079 |
|
|
{
|
9080 |
|
|
if (unwind)
|
9081 |
|
|
{
|
9082 |
|
|
fprintf (asm_out_file, "\t.body\n");
|
9083 |
|
|
fprintf (asm_out_file, "\t.copy_state %d\n",
|
9084 |
|
|
cfun->machine->state_num);
|
9085 |
|
|
}
|
9086 |
|
|
if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
|
9087 |
|
|
ia64_dwarf2out_def_steady_cfa (insn);
|
9088 |
|
|
need_copy_state = false;
|
9089 |
|
|
}
|
9090 |
|
|
}
|
9091 |
|
|
|
9092 |
|
|
if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
|
9093 |
|
|
return;
|
9094 |
|
|
|
9095 |
|
|
pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
|
9096 |
|
|
if (pat)
|
9097 |
|
|
pat = XEXP (pat, 0);
|
9098 |
|
|
else
|
9099 |
|
|
pat = PATTERN (insn);
|
9100 |
|
|
|
9101 |
|
|
switch (GET_CODE (pat))
|
9102 |
|
|
{
|
9103 |
|
|
case SET:
|
9104 |
|
|
process_set (asm_out_file, pat, insn, unwind, frame);
|
9105 |
|
|
break;
|
9106 |
|
|
|
9107 |
|
|
case PARALLEL:
|
9108 |
|
|
{
|
9109 |
|
|
int par_index;
|
9110 |
|
|
int limit = XVECLEN (pat, 0);
|
9111 |
|
|
for (par_index = 0; par_index < limit; par_index++)
|
9112 |
|
|
{
|
9113 |
|
|
rtx x = XVECEXP (pat, 0, par_index);
|
9114 |
|
|
if (GET_CODE (x) == SET)
|
9115 |
|
|
process_set (asm_out_file, x, insn, unwind, frame);
|
9116 |
|
|
}
|
9117 |
|
|
break;
|
9118 |
|
|
}
|
9119 |
|
|
|
9120 |
|
|
default:
|
9121 |
|
|
gcc_unreachable ();
|
9122 |
|
|
}
|
9123 |
|
|
}
|
9124 |
|
|
}
|
9125 |
|
|
|
9126 |
|
|
|
9127 |
|
|
enum ia64_builtins
|
9128 |
|
|
{
|
9129 |
|
|
IA64_BUILTIN_BSP,
|
9130 |
|
|
IA64_BUILTIN_FLUSHRS
|
9131 |
|
|
};
|
9132 |
|
|
|
9133 |
|
|
void
|
9134 |
|
|
ia64_init_builtins (void)
|
9135 |
|
|
{
|
9136 |
|
|
tree fpreg_type;
|
9137 |
|
|
tree float80_type;
|
9138 |
|
|
|
9139 |
|
|
/* The __fpreg type. */
|
9140 |
|
|
fpreg_type = make_node (REAL_TYPE);
|
9141 |
|
|
TYPE_PRECISION (fpreg_type) = 82;
|
9142 |
|
|
layout_type (fpreg_type);
|
9143 |
|
|
(*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
|
9144 |
|
|
|
9145 |
|
|
/* The __float80 type. */
|
9146 |
|
|
float80_type = make_node (REAL_TYPE);
|
9147 |
|
|
TYPE_PRECISION (float80_type) = 80;
|
9148 |
|
|
layout_type (float80_type);
|
9149 |
|
|
(*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
|
9150 |
|
|
|
9151 |
|
|
/* The __float128 type. */
|
9152 |
|
|
if (!TARGET_HPUX)
|
9153 |
|
|
{
|
9154 |
|
|
tree float128_type = make_node (REAL_TYPE);
|
9155 |
|
|
TYPE_PRECISION (float128_type) = 128;
|
9156 |
|
|
layout_type (float128_type);
|
9157 |
|
|
(*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
|
9158 |
|
|
}
|
9159 |
|
|
else
|
9160 |
|
|
/* Under HPUX, this is a synonym for "long double". */
|
9161 |
|
|
(*lang_hooks.types.register_builtin_type) (long_double_type_node,
|
9162 |
|
|
"__float128");
|
9163 |
|
|
|
9164 |
|
|
#define def_builtin(name, type, code) \
|
9165 |
|
|
lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
|
9166 |
|
|
NULL, NULL_TREE)
|
9167 |
|
|
|
9168 |
|
|
def_builtin ("__builtin_ia64_bsp",
|
9169 |
|
|
build_function_type (ptr_type_node, void_list_node),
|
9170 |
|
|
IA64_BUILTIN_BSP);
|
9171 |
|
|
|
9172 |
|
|
def_builtin ("__builtin_ia64_flushrs",
|
9173 |
|
|
build_function_type (void_type_node, void_list_node),
|
9174 |
|
|
IA64_BUILTIN_FLUSHRS);
|
9175 |
|
|
|
9176 |
|
|
#undef def_builtin
|
9177 |
|
|
}
|
9178 |
|
|
|
9179 |
|
|
rtx
|
9180 |
|
|
ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
9181 |
|
|
enum machine_mode mode ATTRIBUTE_UNUSED,
|
9182 |
|
|
int ignore ATTRIBUTE_UNUSED)
|
9183 |
|
|
{
|
9184 |
|
|
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
|
9185 |
|
|
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
|
9186 |
|
|
|
9187 |
|
|
switch (fcode)
|
9188 |
|
|
{
|
9189 |
|
|
case IA64_BUILTIN_BSP:
|
9190 |
|
|
if (! target || ! register_operand (target, DImode))
|
9191 |
|
|
target = gen_reg_rtx (DImode);
|
9192 |
|
|
emit_insn (gen_bsp_value (target));
|
9193 |
|
|
#ifdef POINTERS_EXTEND_UNSIGNED
|
9194 |
|
|
target = convert_memory_address (ptr_mode, target);
|
9195 |
|
|
#endif
|
9196 |
|
|
return target;
|
9197 |
|
|
|
9198 |
|
|
case IA64_BUILTIN_FLUSHRS:
|
9199 |
|
|
emit_insn (gen_flushrs ());
|
9200 |
|
|
return const0_rtx;
|
9201 |
|
|
|
9202 |
|
|
default:
|
9203 |
|
|
break;
|
9204 |
|
|
}
|
9205 |
|
|
|
9206 |
|
|
return NULL_RTX;
|
9207 |
|
|
}
|
9208 |
|
|
|
9209 |
|
|
/* For the HP-UX IA64 aggregate parameters are passed stored in the
|
9210 |
|
|
most significant bits of the stack slot. */
|
9211 |
|
|
|
9212 |
|
|
enum direction
|
9213 |
|
|
ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
|
9214 |
|
|
{
|
9215 |
|
|
/* Exception to normal case for structures/unions/etc. */
|
9216 |
|
|
|
9217 |
|
|
if (type && AGGREGATE_TYPE_P (type)
|
9218 |
|
|
&& int_size_in_bytes (type) < UNITS_PER_WORD)
|
9219 |
|
|
return upward;
|
9220 |
|
|
|
9221 |
|
|
/* Fall back to the default. */
|
9222 |
|
|
return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
|
9223 |
|
|
}
|
9224 |
|
|
|
9225 |
|
|
/* Linked list of all external functions that are to be emitted by GCC.
|
9226 |
|
|
We output the name if and only if TREE_SYMBOL_REFERENCED is set in
|
9227 |
|
|
order to avoid putting out names that are never really used. */
|
9228 |
|
|
|
9229 |
|
|
struct extern_func_list GTY(())
|
9230 |
|
|
{
|
9231 |
|
|
struct extern_func_list *next;
|
9232 |
|
|
tree decl;
|
9233 |
|
|
};
|
9234 |
|
|
|
9235 |
|
|
static GTY(()) struct extern_func_list *extern_func_head;
|
9236 |
|
|
|
9237 |
|
|
static void
|
9238 |
|
|
ia64_hpux_add_extern_decl (tree decl)
|
9239 |
|
|
{
|
9240 |
|
|
struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
|
9241 |
|
|
|
9242 |
|
|
p->decl = decl;
|
9243 |
|
|
p->next = extern_func_head;
|
9244 |
|
|
extern_func_head = p;
|
9245 |
|
|
}
|
9246 |
|
|
|
9247 |
|
|
/* Print out the list of used global functions. */
|
9248 |
|
|
|
9249 |
|
|
static void
|
9250 |
|
|
ia64_hpux_file_end (void)
|
9251 |
|
|
{
|
9252 |
|
|
struct extern_func_list *p;
|
9253 |
|
|
|
9254 |
|
|
for (p = extern_func_head; p; p = p->next)
|
9255 |
|
|
{
|
9256 |
|
|
tree decl = p->decl;
|
9257 |
|
|
tree id = DECL_ASSEMBLER_NAME (decl);
|
9258 |
|
|
|
9259 |
|
|
gcc_assert (id);
|
9260 |
|
|
|
9261 |
|
|
if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
|
9262 |
|
|
{
|
9263 |
|
|
const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
|
9264 |
|
|
|
9265 |
|
|
TREE_ASM_WRITTEN (decl) = 1;
|
9266 |
|
|
(*targetm.asm_out.globalize_label) (asm_out_file, name);
|
9267 |
|
|
fputs (TYPE_ASM_OP, asm_out_file);
|
9268 |
|
|
assemble_name (asm_out_file, name);
|
9269 |
|
|
fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
|
9270 |
|
|
}
|
9271 |
|
|
}
|
9272 |
|
|
|
9273 |
|
|
extern_func_head = 0;
|
9274 |
|
|
}
|
9275 |
|
|
|
9276 |
|
|
/* Set SImode div/mod functions, init_integral_libfuncs only initializes
|
9277 |
|
|
modes of word_mode and larger. Rename the TFmode libfuncs using the
|
9278 |
|
|
HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
|
9279 |
|
|
backward compatibility. */
|
9280 |
|
|
|
9281 |
|
|
static void
|
9282 |
|
|
ia64_init_libfuncs (void)
|
9283 |
|
|
{
|
9284 |
|
|
set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
|
9285 |
|
|
set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
|
9286 |
|
|
set_optab_libfunc (smod_optab, SImode, "__modsi3");
|
9287 |
|
|
set_optab_libfunc (umod_optab, SImode, "__umodsi3");
|
9288 |
|
|
|
9289 |
|
|
set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
|
9290 |
|
|
set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
|
9291 |
|
|
set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
|
9292 |
|
|
set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
|
9293 |
|
|
set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
|
9294 |
|
|
|
9295 |
|
|
set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
|
9296 |
|
|
set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
|
9297 |
|
|
set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
|
9298 |
|
|
set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
|
9299 |
|
|
set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
|
9300 |
|
|
set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
|
9301 |
|
|
|
9302 |
|
|
set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
|
9303 |
|
|
set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
|
9304 |
|
|
set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
|
9305 |
|
|
set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
|
9306 |
|
|
set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
|
9307 |
|
|
|
9308 |
|
|
set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
|
9309 |
|
|
set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
|
9310 |
|
|
set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
|
9311 |
|
|
/* HP-UX 11.23 libc does not have a function for unsigned
|
9312 |
|
|
SImode-to-TFmode conversion. */
|
9313 |
|
|
set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
|
9314 |
|
|
}
|
9315 |
|
|
|
9316 |
|
|
/* Rename all the TFmode libfuncs using the HPUX conventions. */
|
9317 |
|
|
|
9318 |
|
|
static void
|
9319 |
|
|
ia64_hpux_init_libfuncs (void)
|
9320 |
|
|
{
|
9321 |
|
|
ia64_init_libfuncs ();
|
9322 |
|
|
|
9323 |
|
|
/* The HP SI millicode division and mod functions expect DI arguments.
|
9324 |
|
|
By turning them off completely we avoid using both libgcc and the
|
9325 |
|
|
non-standard millicode routines and use the HP DI millicode routines
|
9326 |
|
|
instead. */
|
9327 |
|
|
|
9328 |
|
|
set_optab_libfunc (sdiv_optab, SImode, 0);
|
9329 |
|
|
set_optab_libfunc (udiv_optab, SImode, 0);
|
9330 |
|
|
set_optab_libfunc (smod_optab, SImode, 0);
|
9331 |
|
|
set_optab_libfunc (umod_optab, SImode, 0);
|
9332 |
|
|
|
9333 |
|
|
set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
|
9334 |
|
|
set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
|
9335 |
|
|
set_optab_libfunc (smod_optab, DImode, "__milli_remI");
|
9336 |
|
|
set_optab_libfunc (umod_optab, DImode, "__milli_remU");
|
9337 |
|
|
|
9338 |
|
|
/* HP-UX libc has TF min/max/abs routines in it. */
|
9339 |
|
|
set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
|
9340 |
|
|
set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
|
9341 |
|
|
set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
|
9342 |
|
|
|
9343 |
|
|
/* ia64_expand_compare uses this. */
|
9344 |
|
|
cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
|
9345 |
|
|
|
9346 |
|
|
/* These should never be used. */
|
9347 |
|
|
set_optab_libfunc (eq_optab, TFmode, 0);
|
9348 |
|
|
set_optab_libfunc (ne_optab, TFmode, 0);
|
9349 |
|
|
set_optab_libfunc (gt_optab, TFmode, 0);
|
9350 |
|
|
set_optab_libfunc (ge_optab, TFmode, 0);
|
9351 |
|
|
set_optab_libfunc (lt_optab, TFmode, 0);
|
9352 |
|
|
set_optab_libfunc (le_optab, TFmode, 0);
|
9353 |
|
|
}
|
9354 |
|
|
|
9355 |
|
|
/* Rename the division and modulus functions in VMS. */
|
9356 |
|
|
|
9357 |
|
|
static void
|
9358 |
|
|
ia64_vms_init_libfuncs (void)
|
9359 |
|
|
{
|
9360 |
|
|
set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
|
9361 |
|
|
set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
|
9362 |
|
|
set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
|
9363 |
|
|
set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
|
9364 |
|
|
set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
|
9365 |
|
|
set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
|
9366 |
|
|
set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
|
9367 |
|
|
set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
|
9368 |
|
|
}
|
9369 |
|
|
|
9370 |
|
|
/* Rename the TFmode libfuncs available from soft-fp in glibc using
|
9371 |
|
|
the HPUX conventions. */
|
9372 |
|
|
|
9373 |
|
|
static void
|
9374 |
|
|
ia64_sysv4_init_libfuncs (void)
|
9375 |
|
|
{
|
9376 |
|
|
ia64_init_libfuncs ();
|
9377 |
|
|
|
9378 |
|
|
/* These functions are not part of the HPUX TFmode interface. We
|
9379 |
|
|
use them instead of _U_Qfcmp, which doesn't work the way we
|
9380 |
|
|
expect. */
|
9381 |
|
|
set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
|
9382 |
|
|
set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
|
9383 |
|
|
set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
|
9384 |
|
|
set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
|
9385 |
|
|
set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
|
9386 |
|
|
set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
|
9387 |
|
|
|
9388 |
|
|
/* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
|
9389 |
|
|
glibc doesn't have them. */
|
9390 |
|
|
}
|
9391 |
|
|
|
9392 |
|
|
/* For HPUX, it is illegal to have relocations in shared segments. */
|
9393 |
|
|
|
9394 |
|
|
static int
|
9395 |
|
|
ia64_hpux_reloc_rw_mask (void)
|
9396 |
|
|
{
|
9397 |
|
|
return 3;
|
9398 |
|
|
}
|
9399 |
|
|
|
9400 |
|
|
/* For others, relax this so that relocations to local data goes in
|
9401 |
|
|
read-only segments, but we still cannot allow global relocations
|
9402 |
|
|
in read-only segments. */
|
9403 |
|
|
|
9404 |
|
|
static int
|
9405 |
|
|
ia64_reloc_rw_mask (void)
|
9406 |
|
|
{
|
9407 |
|
|
return flag_pic ? 3 : 2;
|
9408 |
|
|
}
|
9409 |
|
|
|
9410 |
|
|
/* Return the section to use for X. The only special thing we do here
|
9411 |
|
|
is to honor small data. */
|
9412 |
|
|
|
9413 |
|
|
static section *
|
9414 |
|
|
ia64_select_rtx_section (enum machine_mode mode, rtx x,
|
9415 |
|
|
unsigned HOST_WIDE_INT align)
|
9416 |
|
|
{
|
9417 |
|
|
if (GET_MODE_SIZE (mode) > 0
|
9418 |
|
|
&& GET_MODE_SIZE (mode) <= ia64_section_threshold
|
9419 |
|
|
&& !TARGET_NO_SDATA)
|
9420 |
|
|
return sdata_section;
|
9421 |
|
|
else
|
9422 |
|
|
return default_elf_select_rtx_section (mode, x, align);
|
9423 |
|
|
}
|
9424 |
|
|
|
9425 |
|
|
static unsigned int
|
9426 |
|
|
ia64_section_type_flags (tree decl, const char *name, int reloc)
|
9427 |
|
|
{
|
9428 |
|
|
unsigned int flags = 0;
|
9429 |
|
|
|
9430 |
|
|
if (strcmp (name, ".sdata") == 0
|
9431 |
|
|
|| strncmp (name, ".sdata.", 7) == 0
|
9432 |
|
|
|| strncmp (name, ".gnu.linkonce.s.", 16) == 0
|
9433 |
|
|
|| strncmp (name, ".sdata2.", 8) == 0
|
9434 |
|
|
|| strncmp (name, ".gnu.linkonce.s2.", 17) == 0
|
9435 |
|
|
|| strcmp (name, ".sbss") == 0
|
9436 |
|
|
|| strncmp (name, ".sbss.", 6) == 0
|
9437 |
|
|
|| strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
|
9438 |
|
|
flags = SECTION_SMALL;
|
9439 |
|
|
|
9440 |
|
|
flags |= default_section_type_flags (decl, name, reloc);
|
9441 |
|
|
return flags;
|
9442 |
|
|
}
|
9443 |
|
|
|
9444 |
|
|
/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
|
9445 |
|
|
structure type and that the address of that type should be passed
|
9446 |
|
|
in out0, rather than in r8. */
|
9447 |
|
|
|
9448 |
|
|
static bool
|
9449 |
|
|
ia64_struct_retval_addr_is_first_parm_p (tree fntype)
|
9450 |
|
|
{
|
9451 |
|
|
tree ret_type = TREE_TYPE (fntype);
|
9452 |
|
|
|
9453 |
|
|
/* The Itanium C++ ABI requires that out0, rather than r8, be used
|
9454 |
|
|
as the structure return address parameter, if the return value
|
9455 |
|
|
type has a non-trivial copy constructor or destructor. It is not
|
9456 |
|
|
clear if this same convention should be used for other
|
9457 |
|
|
programming languages. Until G++ 3.4, we incorrectly used r8 for
|
9458 |
|
|
these return values. */
|
9459 |
|
|
return (abi_version_at_least (2)
|
9460 |
|
|
&& ret_type
|
9461 |
|
|
&& TYPE_MODE (ret_type) == BLKmode
|
9462 |
|
|
&& TREE_ADDRESSABLE (ret_type)
|
9463 |
|
|
&& strcmp (lang_hooks.name, "GNU C++") == 0);
|
9464 |
|
|
}
|
9465 |
|
|
|
9466 |
|
|
/* Output the assembler code for a thunk function. THUNK_DECL is the
|
9467 |
|
|
declaration for the thunk function itself, FUNCTION is the decl for
|
9468 |
|
|
the target function. DELTA is an immediate constant offset to be
|
9469 |
|
|
added to THIS. If VCALL_OFFSET is nonzero, the word at
|
9470 |
|
|
*(*this + vcall_offset) should be added to THIS. */
|
9471 |
|
|
|
9472 |
|
|
static void
|
9473 |
|
|
ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
|
9474 |
|
|
HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
|
9475 |
|
|
tree function)
|
9476 |
|
|
{
|
9477 |
|
|
rtx this, insn, funexp;
|
9478 |
|
|
unsigned int this_parmno;
|
9479 |
|
|
unsigned int this_regno;
|
9480 |
|
|
|
9481 |
|
|
reload_completed = 1;
|
9482 |
|
|
epilogue_completed = 1;
|
9483 |
|
|
no_new_pseudos = 1;
|
9484 |
|
|
reset_block_changes ();
|
9485 |
|
|
|
9486 |
|
|
/* Set things up as ia64_expand_prologue might. */
|
9487 |
|
|
last_scratch_gr_reg = 15;
|
9488 |
|
|
|
9489 |
|
|
memset (¤t_frame_info, 0, sizeof (current_frame_info));
|
9490 |
|
|
current_frame_info.spill_cfa_off = -16;
|
9491 |
|
|
current_frame_info.n_input_regs = 1;
|
9492 |
|
|
current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
|
9493 |
|
|
|
9494 |
|
|
/* Mark the end of the (empty) prologue. */
|
9495 |
|
|
emit_note (NOTE_INSN_PROLOGUE_END);
|
9496 |
|
|
|
9497 |
|
|
/* Figure out whether "this" will be the first parameter (the
|
9498 |
|
|
typical case) or the second parameter (as happens when the
|
9499 |
|
|
virtual function returns certain class objects). */
|
9500 |
|
|
this_parmno
|
9501 |
|
|
= (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
|
9502 |
|
|
? 1 : 0);
|
9503 |
|
|
this_regno = IN_REG (this_parmno);
|
9504 |
|
|
if (!TARGET_REG_NAMES)
|
9505 |
|
|
reg_names[this_regno] = ia64_reg_numbers[this_parmno];
|
9506 |
|
|
|
9507 |
|
|
this = gen_rtx_REG (Pmode, this_regno);
|
9508 |
|
|
if (TARGET_ILP32)
|
9509 |
|
|
{
|
9510 |
|
|
rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
|
9511 |
|
|
REG_POINTER (tmp) = 1;
|
9512 |
|
|
if (delta && CONST_OK_FOR_I (delta))
|
9513 |
|
|
{
|
9514 |
|
|
emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
|
9515 |
|
|
delta = 0;
|
9516 |
|
|
}
|
9517 |
|
|
else
|
9518 |
|
|
emit_insn (gen_ptr_extend (this, tmp));
|
9519 |
|
|
}
|
9520 |
|
|
|
9521 |
|
|
/* Apply the constant offset, if required. */
|
9522 |
|
|
if (delta)
|
9523 |
|
|
{
|
9524 |
|
|
rtx delta_rtx = GEN_INT (delta);
|
9525 |
|
|
|
9526 |
|
|
if (!CONST_OK_FOR_I (delta))
|
9527 |
|
|
{
|
9528 |
|
|
rtx tmp = gen_rtx_REG (Pmode, 2);
|
9529 |
|
|
emit_move_insn (tmp, delta_rtx);
|
9530 |
|
|
delta_rtx = tmp;
|
9531 |
|
|
}
|
9532 |
|
|
emit_insn (gen_adddi3 (this, this, delta_rtx));
|
9533 |
|
|
}
|
9534 |
|
|
|
9535 |
|
|
/* Apply the offset from the vtable, if required. */
|
9536 |
|
|
if (vcall_offset)
|
9537 |
|
|
{
|
9538 |
|
|
rtx vcall_offset_rtx = GEN_INT (vcall_offset);
|
9539 |
|
|
rtx tmp = gen_rtx_REG (Pmode, 2);
|
9540 |
|
|
|
9541 |
|
|
if (TARGET_ILP32)
|
9542 |
|
|
{
|
9543 |
|
|
rtx t = gen_rtx_REG (ptr_mode, 2);
|
9544 |
|
|
REG_POINTER (t) = 1;
|
9545 |
|
|
emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
|
9546 |
|
|
if (CONST_OK_FOR_I (vcall_offset))
|
9547 |
|
|
{
|
9548 |
|
|
emit_insn (gen_ptr_extend_plus_imm (tmp, t,
|
9549 |
|
|
vcall_offset_rtx));
|
9550 |
|
|
vcall_offset = 0;
|
9551 |
|
|
}
|
9552 |
|
|
else
|
9553 |
|
|
emit_insn (gen_ptr_extend (tmp, t));
|
9554 |
|
|
}
|
9555 |
|
|
else
|
9556 |
|
|
emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
|
9557 |
|
|
|
9558 |
|
|
if (vcall_offset)
|
9559 |
|
|
{
|
9560 |
|
|
if (!CONST_OK_FOR_J (vcall_offset))
|
9561 |
|
|
{
|
9562 |
|
|
rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
|
9563 |
|
|
emit_move_insn (tmp2, vcall_offset_rtx);
|
9564 |
|
|
vcall_offset_rtx = tmp2;
|
9565 |
|
|
}
|
9566 |
|
|
emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
|
9567 |
|
|
}
|
9568 |
|
|
|
9569 |
|
|
if (TARGET_ILP32)
|
9570 |
|
|
emit_move_insn (gen_rtx_REG (ptr_mode, 2),
|
9571 |
|
|
gen_rtx_MEM (ptr_mode, tmp));
|
9572 |
|
|
else
|
9573 |
|
|
emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
|
9574 |
|
|
|
9575 |
|
|
emit_insn (gen_adddi3 (this, this, tmp));
|
9576 |
|
|
}
|
9577 |
|
|
|
9578 |
|
|
/* Generate a tail call to the target function. */
|
9579 |
|
|
if (! TREE_USED (function))
|
9580 |
|
|
{
|
9581 |
|
|
assemble_external (function);
|
9582 |
|
|
TREE_USED (function) = 1;
|
9583 |
|
|
}
|
9584 |
|
|
funexp = XEXP (DECL_RTL (function), 0);
|
9585 |
|
|
funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
|
9586 |
|
|
ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
|
9587 |
|
|
insn = get_last_insn ();
|
9588 |
|
|
SIBLING_CALL_P (insn) = 1;
|
9589 |
|
|
|
9590 |
|
|
/* Code generation for calls relies on splitting. */
|
9591 |
|
|
reload_completed = 1;
|
9592 |
|
|
epilogue_completed = 1;
|
9593 |
|
|
try_split (PATTERN (insn), insn, 0);
|
9594 |
|
|
|
9595 |
|
|
emit_barrier ();
|
9596 |
|
|
|
9597 |
|
|
/* Run just enough of rest_of_compilation to get the insns emitted.
|
9598 |
|
|
There's not really enough bulk here to make other passes such as
|
9599 |
|
|
instruction scheduling worth while. Note that use_thunk calls
|
9600 |
|
|
assemble_start_function and assemble_end_function. */
|
9601 |
|
|
|
9602 |
|
|
insn_locators_initialize ();
|
9603 |
|
|
emit_all_insn_group_barriers (NULL);
|
9604 |
|
|
insn = get_insns ();
|
9605 |
|
|
shorten_branches (insn);
|
9606 |
|
|
final_start_function (insn, file, 1);
|
9607 |
|
|
final (insn, file, 1);
|
9608 |
|
|
final_end_function ();
|
9609 |
|
|
|
9610 |
|
|
reload_completed = 0;
|
9611 |
|
|
epilogue_completed = 0;
|
9612 |
|
|
no_new_pseudos = 0;
|
9613 |
|
|
}
|
9614 |
|
|
|
9615 |
|
|
/* Worker function for TARGET_STRUCT_VALUE_RTX. */
|
9616 |
|
|
|
9617 |
|
|
static rtx
|
9618 |
|
|
ia64_struct_value_rtx (tree fntype,
|
9619 |
|
|
int incoming ATTRIBUTE_UNUSED)
|
9620 |
|
|
{
|
9621 |
|
|
if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
|
9622 |
|
|
return NULL_RTX;
|
9623 |
|
|
return gen_rtx_REG (Pmode, GR_REG (8));
|
9624 |
|
|
}
|
9625 |
|
|
|
9626 |
|
|
static bool
|
9627 |
|
|
ia64_scalar_mode_supported_p (enum machine_mode mode)
|
9628 |
|
|
{
|
9629 |
|
|
switch (mode)
|
9630 |
|
|
{
|
9631 |
|
|
case QImode:
|
9632 |
|
|
case HImode:
|
9633 |
|
|
case SImode:
|
9634 |
|
|
case DImode:
|
9635 |
|
|
case TImode:
|
9636 |
|
|
return true;
|
9637 |
|
|
|
9638 |
|
|
case SFmode:
|
9639 |
|
|
case DFmode:
|
9640 |
|
|
case XFmode:
|
9641 |
|
|
case RFmode:
|
9642 |
|
|
return true;
|
9643 |
|
|
|
9644 |
|
|
case TFmode:
|
9645 |
|
|
return TARGET_HPUX;
|
9646 |
|
|
|
9647 |
|
|
default:
|
9648 |
|
|
return false;
|
9649 |
|
|
}
|
9650 |
|
|
}
|
9651 |
|
|
|
9652 |
|
|
static bool
|
9653 |
|
|
ia64_vector_mode_supported_p (enum machine_mode mode)
|
9654 |
|
|
{
|
9655 |
|
|
switch (mode)
|
9656 |
|
|
{
|
9657 |
|
|
case V8QImode:
|
9658 |
|
|
case V4HImode:
|
9659 |
|
|
case V2SImode:
|
9660 |
|
|
return true;
|
9661 |
|
|
|
9662 |
|
|
case V2SFmode:
|
9663 |
|
|
return true;
|
9664 |
|
|
|
9665 |
|
|
default:
|
9666 |
|
|
return false;
|
9667 |
|
|
}
|
9668 |
|
|
}
|
9669 |
|
|
|
9670 |
|
|
/* Implement the FUNCTION_PROFILER macro. */
|
9671 |
|
|
|
9672 |
|
|
void
|
9673 |
|
|
ia64_output_function_profiler (FILE *file, int labelno)
|
9674 |
|
|
{
|
9675 |
|
|
bool indirect_call;
|
9676 |
|
|
|
9677 |
|
|
/* If the function needs a static chain and the static chain
|
9678 |
|
|
register is r15, we use an indirect call so as to bypass
|
9679 |
|
|
the PLT stub in case the executable is dynamically linked,
|
9680 |
|
|
because the stub clobbers r15 as per 5.3.6 of the psABI.
|
9681 |
|
|
We don't need to do that in non canonical PIC mode. */
|
9682 |
|
|
|
9683 |
|
|
if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
|
9684 |
|
|
{
|
9685 |
|
|
gcc_assert (STATIC_CHAIN_REGNUM == 15);
|
9686 |
|
|
indirect_call = true;
|
9687 |
|
|
}
|
9688 |
|
|
else
|
9689 |
|
|
indirect_call = false;
|
9690 |
|
|
|
9691 |
|
|
if (TARGET_GNU_AS)
|
9692 |
|
|
fputs ("\t.prologue 4, r40\n", file);
|
9693 |
|
|
else
|
9694 |
|
|
fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
|
9695 |
|
|
fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
|
9696 |
|
|
|
9697 |
|
|
if (NO_PROFILE_COUNTERS)
|
9698 |
|
|
fputs ("\tmov out3 = r0\n", file);
|
9699 |
|
|
else
|
9700 |
|
|
{
|
9701 |
|
|
char buf[20];
|
9702 |
|
|
ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
|
9703 |
|
|
|
9704 |
|
|
if (TARGET_AUTO_PIC)
|
9705 |
|
|
fputs ("\tmovl out3 = @gprel(", file);
|
9706 |
|
|
else
|
9707 |
|
|
fputs ("\taddl out3 = @ltoff(", file);
|
9708 |
|
|
assemble_name (file, buf);
|
9709 |
|
|
if (TARGET_AUTO_PIC)
|
9710 |
|
|
fputs (")\n", file);
|
9711 |
|
|
else
|
9712 |
|
|
fputs ("), r1\n", file);
|
9713 |
|
|
}
|
9714 |
|
|
|
9715 |
|
|
if (indirect_call)
|
9716 |
|
|
fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
|
9717 |
|
|
fputs ("\t;;\n", file);
|
9718 |
|
|
|
9719 |
|
|
fputs ("\t.save rp, r42\n", file);
|
9720 |
|
|
fputs ("\tmov out2 = b0\n", file);
|
9721 |
|
|
if (indirect_call)
|
9722 |
|
|
fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
|
9723 |
|
|
fputs ("\t.body\n", file);
|
9724 |
|
|
fputs ("\tmov out1 = r1\n", file);
|
9725 |
|
|
if (indirect_call)
|
9726 |
|
|
{
|
9727 |
|
|
fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
|
9728 |
|
|
fputs ("\tmov b6 = r16\n", file);
|
9729 |
|
|
fputs ("\tld8 r1 = [r14]\n", file);
|
9730 |
|
|
fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
|
9731 |
|
|
}
|
9732 |
|
|
else
|
9733 |
|
|
fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
|
9734 |
|
|
}
|
9735 |
|
|
|
9736 |
|
|
static GTY(()) rtx mcount_func_rtx;
|
9737 |
|
|
static rtx
|
9738 |
|
|
gen_mcount_func_rtx (void)
|
9739 |
|
|
{
|
9740 |
|
|
if (!mcount_func_rtx)
|
9741 |
|
|
mcount_func_rtx = init_one_libfunc ("_mcount");
|
9742 |
|
|
return mcount_func_rtx;
|
9743 |
|
|
}
|
9744 |
|
|
|
9745 |
|
|
void
|
9746 |
|
|
ia64_profile_hook (int labelno)
|
9747 |
|
|
{
|
9748 |
|
|
rtx label, ip;
|
9749 |
|
|
|
9750 |
|
|
if (NO_PROFILE_COUNTERS)
|
9751 |
|
|
label = const0_rtx;
|
9752 |
|
|
else
|
9753 |
|
|
{
|
9754 |
|
|
char buf[30];
|
9755 |
|
|
const char *label_name;
|
9756 |
|
|
ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
|
9757 |
|
|
label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
|
9758 |
|
|
label = gen_rtx_SYMBOL_REF (Pmode, label_name);
|
9759 |
|
|
SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
|
9760 |
|
|
}
|
9761 |
|
|
ip = gen_reg_rtx (Pmode);
|
9762 |
|
|
emit_insn (gen_ip_value (ip));
|
9763 |
|
|
emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
|
9764 |
|
|
VOIDmode, 3,
|
9765 |
|
|
gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
|
9766 |
|
|
ip, Pmode,
|
9767 |
|
|
label, Pmode);
|
9768 |
|
|
}
|
9769 |
|
|
|
9770 |
|
|
/* Return the mangling of TYPE if it is an extended fundamental type. */
|
9771 |
|
|
|
9772 |
|
|
static const char *
|
9773 |
|
|
ia64_mangle_fundamental_type (tree type)
|
9774 |
|
|
{
|
9775 |
|
|
/* On HP-UX, "long double" is mangled as "e" so __float128 is
|
9776 |
|
|
mangled as "e". */
|
9777 |
|
|
if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
|
9778 |
|
|
return "g";
|
9779 |
|
|
/* On HP-UX, "e" is not available as a mangling of __float80 so use
|
9780 |
|
|
an extended mangling. Elsewhere, "e" is available since long
|
9781 |
|
|
double is 80 bits. */
|
9782 |
|
|
if (TYPE_MODE (type) == XFmode)
|
9783 |
|
|
return TARGET_HPUX ? "u9__float80" : "e";
|
9784 |
|
|
if (TYPE_MODE (type) == RFmode)
|
9785 |
|
|
return "u7__fpreg";
|
9786 |
|
|
return NULL;
|
9787 |
|
|
}
|
9788 |
|
|
|
9789 |
|
|
/* Return the diagnostic message string if conversion from FROMTYPE to
|
9790 |
|
|
TOTYPE is not allowed, NULL otherwise. */
|
9791 |
|
|
static const char *
|
9792 |
|
|
ia64_invalid_conversion (tree fromtype, tree totype)
|
9793 |
|
|
{
|
9794 |
|
|
/* Reject nontrivial conversion to or from __fpreg. */
|
9795 |
|
|
if (TYPE_MODE (fromtype) == RFmode
|
9796 |
|
|
&& TYPE_MODE (totype) != RFmode
|
9797 |
|
|
&& TYPE_MODE (totype) != VOIDmode)
|
9798 |
|
|
return N_("invalid conversion from %<__fpreg%>");
|
9799 |
|
|
if (TYPE_MODE (totype) == RFmode
|
9800 |
|
|
&& TYPE_MODE (fromtype) != RFmode)
|
9801 |
|
|
return N_("invalid conversion to %<__fpreg%>");
|
9802 |
|
|
return NULL;
|
9803 |
|
|
}
|
9804 |
|
|
|
9805 |
|
|
/* Return the diagnostic message string if the unary operation OP is
|
9806 |
|
|
not permitted on TYPE, NULL otherwise. */
|
9807 |
|
|
static const char *
|
9808 |
|
|
ia64_invalid_unary_op (int op, tree type)
|
9809 |
|
|
{
|
9810 |
|
|
/* Reject operations on __fpreg other than unary + or &. */
|
9811 |
|
|
if (TYPE_MODE (type) == RFmode
|
9812 |
|
|
&& op != CONVERT_EXPR
|
9813 |
|
|
&& op != ADDR_EXPR)
|
9814 |
|
|
return N_("invalid operation on %<__fpreg%>");
|
9815 |
|
|
return NULL;
|
9816 |
|
|
}
|
9817 |
|
|
|
9818 |
|
|
/* Return the diagnostic message string if the binary operation OP is
|
9819 |
|
|
not permitted on TYPE1 and TYPE2, NULL otherwise. */
|
9820 |
|
|
static const char *
|
9821 |
|
|
ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
|
9822 |
|
|
{
|
9823 |
|
|
/* Reject operations on __fpreg. */
|
9824 |
|
|
if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
|
9825 |
|
|
return N_("invalid operation on %<__fpreg%>");
|
9826 |
|
|
return NULL;
|
9827 |
|
|
}
|
9828 |
|
|
|
9829 |
|
|
/* Implement overriding of the optimization options. */
|
9830 |
|
|
void
|
9831 |
|
|
ia64_optimization_options (int level ATTRIBUTE_UNUSED,
|
9832 |
|
|
int size ATTRIBUTE_UNUSED)
|
9833 |
|
|
{
|
9834 |
|
|
/* Let the scheduler form additional regions. */
|
9835 |
|
|
set_param_value ("max-sched-extend-regions-iters", 2);
|
9836 |
|
|
}
|
9837 |
|
|
|
9838 |
|
|
#include "gt-ia64.h"
|