OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [ia64/] [ia64.c] - Blame information for rev 709

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
/* Definitions of target machine for GNU compiler.
2
   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3
   2009, 2010, 2011
4
   Free Software Foundation, Inc.
5
   Contributed by James E. Wilson <wilson@cygnus.com> and
6
                  David Mosberger <davidm@hpl.hp.com>.
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation; either version 3, or (at your option)
13
any later version.
14
 
15
GCC is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
GNU General Public License for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
#include "config.h"
25
#include "system.h"
26
#include "coretypes.h"
27
#include "tm.h"
28
#include "rtl.h"
29
#include "tree.h"
30
#include "regs.h"
31
#include "hard-reg-set.h"
32
#include "insn-config.h"
33
#include "conditions.h"
34
#include "output.h"
35
#include "insn-attr.h"
36
#include "flags.h"
37
#include "recog.h"
38
#include "expr.h"
39
#include "optabs.h"
40
#include "except.h"
41
#include "function.h"
42
#include "ggc.h"
43
#include "basic-block.h"
44
#include "libfuncs.h"
45
#include "diagnostic-core.h"
46
#include "sched-int.h"
47
#include "timevar.h"
48
#include "target.h"
49
#include "target-def.h"
50
#include "tm_p.h"
51
#include "hashtab.h"
52
#include "langhooks.h"
53
#include "cfglayout.h"
54
#include "gimple.h"
55
#include "intl.h"
56
#include "df.h"
57
#include "debug.h"
58
#include "params.h"
59
#include "dbgcnt.h"
60
#include "tm-constrs.h"
61
#include "sel-sched.h"
62
#include "reload.h"
63
#include "dwarf2out.h"
64
#include "opts.h"
65
 
66
/* This is used for communication between ASM_OUTPUT_LABEL and
67
   ASM_OUTPUT_LABELREF.  */
68
int ia64_asm_output_label = 0;
69
 
70
/* Register names for ia64_expand_prologue.  */
71
static const char * const ia64_reg_numbers[96] =
72
{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
73
  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
74
  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
75
  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
76
  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
77
  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
78
  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
79
  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
80
  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
81
  "r104","r105","r106","r107","r108","r109","r110","r111",
82
  "r112","r113","r114","r115","r116","r117","r118","r119",
83
  "r120","r121","r122","r123","r124","r125","r126","r127"};
84
 
85
/* ??? These strings could be shared with REGISTER_NAMES.  */
86
static const char * const ia64_input_reg_names[8] =
87
{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
88
 
89
/* ??? These strings could be shared with REGISTER_NAMES.  */
90
static const char * const ia64_local_reg_names[80] =
91
{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
92
  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
93
  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
94
  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
95
  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
96
  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
97
  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
98
  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
99
  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
100
  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
101
 
102
/* ??? These strings could be shared with REGISTER_NAMES.  */
103
static const char * const ia64_output_reg_names[8] =
104
{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
105
 
106
/* Variables which are this size or smaller are put in the sdata/sbss
107
   sections.  */
108
 
109
unsigned int ia64_section_threshold;
110
 
111
/* The following variable is used by the DFA insn scheduler.  The value is
112
   TRUE if we do insn bundling instead of insn scheduling.  */
113
int bundling_p = 0;
114
 
115
enum ia64_frame_regs
116
{
117
   reg_fp,
118
   reg_save_b0,
119
   reg_save_pr,
120
   reg_save_ar_pfs,
121
   reg_save_ar_unat,
122
   reg_save_ar_lc,
123
   reg_save_gp,
124
   number_of_ia64_frame_regs
125
};
126
 
127
/* Structure to be filled in by ia64_compute_frame_size with register
128
   save masks and offsets for the current function.  */
129
 
130
struct ia64_frame_info
131
{
132
  HOST_WIDE_INT total_size;     /* size of the stack frame, not including
133
                                   the caller's scratch area.  */
134
  HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
135
  HOST_WIDE_INT spill_size;     /* size of the gr/br/fr spill area.  */
136
  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
137
  HARD_REG_SET mask;            /* mask of saved registers.  */
138
  unsigned int gr_used_mask;    /* mask of registers in use as gr spill
139
                                   registers or long-term scratches.  */
140
  int n_spilled;                /* number of spilled registers.  */
141
  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
142
  int n_input_regs;             /* number of input registers used.  */
143
  int n_local_regs;             /* number of local registers used.  */
144
  int n_output_regs;            /* number of output registers used.  */
145
  int n_rotate_regs;            /* number of rotating registers used.  */
146
 
147
  char need_regstk;             /* true if a .regstk directive needed.  */
148
  char initialized;             /* true if the data is finalized.  */
149
};
150
 
151
/* Current frame information calculated by ia64_compute_frame_size.  */
152
static struct ia64_frame_info current_frame_info;
153
/* The actual registers that are emitted.  */
154
static int emitted_frame_related_regs[number_of_ia64_frame_regs];
155
 
156
static int ia64_first_cycle_multipass_dfa_lookahead (void);
157
static void ia64_dependencies_evaluation_hook (rtx, rtx);
158
static void ia64_init_dfa_pre_cycle_insn (void);
159
static rtx ia64_dfa_pre_cycle_insn (void);
160
static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161
static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
162
static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163
static void ia64_h_i_d_extended (void);
164
static void * ia64_alloc_sched_context (void);
165
static void ia64_init_sched_context (void *, bool);
166
static void ia64_set_sched_context (void *);
167
static void ia64_clear_sched_context (void *);
168
static void ia64_free_sched_context (void *);
169
static int ia64_mode_to_int (enum machine_mode);
170
static void ia64_set_sched_flags (spec_info_t);
171
static ds_t ia64_get_insn_spec_ds (rtx);
172
static ds_t ia64_get_insn_checked_ds (rtx);
173
static bool ia64_skip_rtx_p (const_rtx);
174
static int ia64_speculate_insn (rtx, ds_t, rtx *);
175
static bool ia64_needs_block_p (int);
176
static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
177
static int ia64_spec_check_p (rtx);
178
static int ia64_spec_check_src_p (rtx);
179
static rtx gen_tls_get_addr (void);
180
static rtx gen_thread_pointer (void);
181
static int find_gr_spill (enum ia64_frame_regs, int);
182
static int next_scratch_gr_reg (void);
183
static void mark_reg_gr_used_mask (rtx, void *);
184
static void ia64_compute_frame_size (HOST_WIDE_INT);
185
static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
186
static void finish_spill_pointers (void);
187
static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
188
static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
189
static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
190
static rtx gen_movdi_x (rtx, rtx, rtx);
191
static rtx gen_fr_spill_x (rtx, rtx, rtx);
192
static rtx gen_fr_restore_x (rtx, rtx, rtx);
193
 
194
static void ia64_option_override (void);
195
static bool ia64_can_eliminate (const int, const int);
196
static enum machine_mode hfa_element_mode (const_tree, bool);
197
static void ia64_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
198
                                         tree, int *, int);
199
static int ia64_arg_partial_bytes (cumulative_args_t, enum machine_mode,
200
                                   tree, bool);
201
static rtx ia64_function_arg_1 (cumulative_args_t, enum machine_mode,
202
                                const_tree, bool, bool);
203
static rtx ia64_function_arg (cumulative_args_t, enum machine_mode,
204
                              const_tree, bool);
205
static rtx ia64_function_incoming_arg (cumulative_args_t,
206
                                       enum machine_mode, const_tree, bool);
207
static void ia64_function_arg_advance (cumulative_args_t, enum machine_mode,
208
                                       const_tree, bool);
209
static unsigned int ia64_function_arg_boundary (enum machine_mode,
210
                                                const_tree);
211
static bool ia64_function_ok_for_sibcall (tree, tree);
212
static bool ia64_return_in_memory (const_tree, const_tree);
213
static rtx ia64_function_value (const_tree, const_tree, bool);
214
static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215
static bool ia64_function_value_regno_p (const unsigned int);
216
static int ia64_register_move_cost (enum machine_mode, reg_class_t,
217
                                    reg_class_t);
218
static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
219
                                  bool);
220
static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
221
static int ia64_unspec_may_trap_p (const_rtx, unsigned);
222
static void fix_range (const char *);
223
static struct machine_function * ia64_init_machine_status (void);
224
static void emit_insn_group_barriers (FILE *);
225
static void emit_all_insn_group_barriers (FILE *);
226
static void final_emit_insn_group_barriers (FILE *);
227
static void emit_predicate_relation_info (void);
228
static void ia64_reorg (void);
229
static bool ia64_in_small_data_p (const_tree);
230
static void process_epilogue (FILE *, rtx, bool, bool);
231
 
232
static bool ia64_assemble_integer (rtx, unsigned int, int);
233
static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
234
static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
235
static void ia64_output_function_end_prologue (FILE *);
236
 
237
static void ia64_print_operand (FILE *, rtx, int);
238
static void ia64_print_operand_address (FILE *, rtx);
239
static bool ia64_print_operand_punct_valid_p (unsigned char code);
240
 
241
static int ia64_issue_rate (void);
242
static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
243
static void ia64_sched_init (FILE *, int, int);
244
static void ia64_sched_init_global (FILE *, int, int);
245
static void ia64_sched_finish_global (FILE *, int);
246
static void ia64_sched_finish (FILE *, int);
247
static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
248
static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
249
static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
250
static int ia64_variable_issue (FILE *, int, rtx, int);
251
 
252
static void ia64_asm_unwind_emit (FILE *, rtx);
253
static void ia64_asm_emit_except_personality (rtx);
254
static void ia64_asm_init_sections (void);
255
 
256
static enum unwind_info_type ia64_debug_unwind_info (void);
257
 
258
static struct bundle_state *get_free_bundle_state (void);
259
static void free_bundle_state (struct bundle_state *);
260
static void initiate_bundle_states (void);
261
static void finish_bundle_states (void);
262
static unsigned bundle_state_hash (const void *);
263
static int bundle_state_eq_p (const void *, const void *);
264
static int insert_bundle_state (struct bundle_state *);
265
static void initiate_bundle_state_table (void);
266
static void finish_bundle_state_table (void);
267
static int try_issue_nops (struct bundle_state *, int);
268
static int try_issue_insn (struct bundle_state *, rtx);
269
static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
270
static int get_max_pos (state_t);
271
static int get_template (state_t, int);
272
 
273
static rtx get_next_important_insn (rtx, rtx);
274
static bool important_for_bundling_p (rtx);
275
static void bundling (FILE *, int, rtx, rtx);
276
 
277
static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
278
                                  HOST_WIDE_INT, tree);
279
static void ia64_file_start (void);
280
static void ia64_globalize_decl_name (FILE *, tree);
281
 
282
static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
283
static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284
static section *ia64_select_rtx_section (enum machine_mode, rtx,
285
                                         unsigned HOST_WIDE_INT);
286
static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
287
     ATTRIBUTE_UNUSED;
288
static unsigned int ia64_section_type_flags (tree, const char *, int);
289
static void ia64_init_libfuncs (void)
290
     ATTRIBUTE_UNUSED;
291
static void ia64_hpux_init_libfuncs (void)
292
     ATTRIBUTE_UNUSED;
293
static void ia64_sysv4_init_libfuncs (void)
294
     ATTRIBUTE_UNUSED;
295
static void ia64_vms_init_libfuncs (void)
296
     ATTRIBUTE_UNUSED;
297
static void ia64_soft_fp_init_libfuncs (void)
298
     ATTRIBUTE_UNUSED;
299
static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
300
     ATTRIBUTE_UNUSED;
301
static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
302
     ATTRIBUTE_UNUSED;
303
 
304
static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
305
static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
306
static void ia64_encode_section_info (tree, rtx, int);
307
static rtx ia64_struct_value_rtx (tree, int);
308
static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
309
static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
310
static bool ia64_vector_mode_supported_p (enum machine_mode mode);
311
static bool ia64_legitimate_constant_p (enum machine_mode, rtx);
312
static bool ia64_legitimate_address_p (enum machine_mode, rtx, bool);
313
static bool ia64_cannot_force_const_mem (enum machine_mode, rtx);
314
static const char *ia64_mangle_type (const_tree);
315
static const char *ia64_invalid_conversion (const_tree, const_tree);
316
static const char *ia64_invalid_unary_op (int, const_tree);
317
static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
318
static enum machine_mode ia64_c_mode_for_suffix (char);
319
static void ia64_trampoline_init (rtx, tree, rtx);
320
static void ia64_override_options_after_change (void);
321
 
322
static tree ia64_builtin_decl (unsigned, bool);
323
 
324
static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
325
static enum machine_mode ia64_get_reg_raw_mode (int regno);
326
static section * ia64_hpux_function_section (tree, enum node_frequency,
327
                                             bool, bool);
328
 
329
static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
330
                                              const unsigned char *sel);
331
 
332
#define MAX_VECT_LEN    8
333
 
334
struct expand_vec_perm_d
335
{
336
  rtx target, op0, op1;
337
  unsigned char perm[MAX_VECT_LEN];
338
  enum machine_mode vmode;
339
  unsigned char nelt;
340
  bool one_operand_p;
341
  bool testing_p;
342
};
343
 
344
static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
345
 
346
 
347
/* Table of valid machine attributes.  */
348
static const struct attribute_spec ia64_attribute_table[] =
349
{
350
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
351
       affects_type_identity } */
352
  { "syscall_linkage", 0, 0, false, true,  true,  NULL, false },
353
  { "model",           1, 1, true, false, false, ia64_handle_model_attribute,
354
    false },
355
#if TARGET_ABI_OPEN_VMS
356
  { "common_object",   1, 1, true, false, false,
357
    ia64_vms_common_object_attribute, false },
358
#endif
359
  { "version_id",      1, 1, true, false, false,
360
    ia64_handle_version_id_attribute, false },
361
  { NULL,              0, 0, false, false, false, NULL, false }
362
};
363
 
364
/* Initialize the GCC target structure.  */
365
#undef TARGET_ATTRIBUTE_TABLE
366
#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
367
 
368
#undef TARGET_INIT_BUILTINS
369
#define TARGET_INIT_BUILTINS ia64_init_builtins
370
 
371
#undef TARGET_EXPAND_BUILTIN
372
#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
373
 
374
#undef TARGET_BUILTIN_DECL
375
#define TARGET_BUILTIN_DECL ia64_builtin_decl
376
 
377
#undef TARGET_ASM_BYTE_OP
378
#define TARGET_ASM_BYTE_OP "\tdata1\t"
379
#undef TARGET_ASM_ALIGNED_HI_OP
380
#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
381
#undef TARGET_ASM_ALIGNED_SI_OP
382
#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
383
#undef TARGET_ASM_ALIGNED_DI_OP
384
#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
385
#undef TARGET_ASM_UNALIGNED_HI_OP
386
#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
387
#undef TARGET_ASM_UNALIGNED_SI_OP
388
#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
389
#undef TARGET_ASM_UNALIGNED_DI_OP
390
#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
391
#undef TARGET_ASM_INTEGER
392
#define TARGET_ASM_INTEGER ia64_assemble_integer
393
 
394
#undef TARGET_OPTION_OVERRIDE
395
#define TARGET_OPTION_OVERRIDE ia64_option_override
396
 
397
#undef TARGET_ASM_FUNCTION_PROLOGUE
398
#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
399
#undef TARGET_ASM_FUNCTION_END_PROLOGUE
400
#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
401
#undef TARGET_ASM_FUNCTION_EPILOGUE
402
#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
403
 
404
#undef TARGET_PRINT_OPERAND
405
#define TARGET_PRINT_OPERAND ia64_print_operand
406
#undef TARGET_PRINT_OPERAND_ADDRESS
407
#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
408
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
409
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
410
 
411
#undef TARGET_IN_SMALL_DATA_P
412
#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
413
 
414
#undef TARGET_SCHED_ADJUST_COST_2
415
#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
416
#undef TARGET_SCHED_ISSUE_RATE
417
#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
418
#undef TARGET_SCHED_VARIABLE_ISSUE
419
#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
420
#undef TARGET_SCHED_INIT
421
#define TARGET_SCHED_INIT ia64_sched_init
422
#undef TARGET_SCHED_FINISH
423
#define TARGET_SCHED_FINISH ia64_sched_finish
424
#undef TARGET_SCHED_INIT_GLOBAL
425
#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
426
#undef TARGET_SCHED_FINISH_GLOBAL
427
#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
428
#undef TARGET_SCHED_REORDER
429
#define TARGET_SCHED_REORDER ia64_sched_reorder
430
#undef TARGET_SCHED_REORDER2
431
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
432
 
433
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
434
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
435
 
436
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
437
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
438
 
439
#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
440
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
441
#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
442
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
443
 
444
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
445
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
446
  ia64_first_cycle_multipass_dfa_lookahead_guard
447
 
448
#undef TARGET_SCHED_DFA_NEW_CYCLE
449
#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
450
 
451
#undef TARGET_SCHED_H_I_D_EXTENDED
452
#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
453
 
454
#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
455
#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
456
 
457
#undef TARGET_SCHED_INIT_SCHED_CONTEXT
458
#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
459
 
460
#undef TARGET_SCHED_SET_SCHED_CONTEXT
461
#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
462
 
463
#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
464
#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
465
 
466
#undef TARGET_SCHED_FREE_SCHED_CONTEXT
467
#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
468
 
469
#undef TARGET_SCHED_SET_SCHED_FLAGS
470
#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
471
 
472
#undef TARGET_SCHED_GET_INSN_SPEC_DS
473
#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
474
 
475
#undef TARGET_SCHED_GET_INSN_CHECKED_DS
476
#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
477
 
478
#undef TARGET_SCHED_SPECULATE_INSN
479
#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
480
 
481
#undef TARGET_SCHED_NEEDS_BLOCK_P
482
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
483
 
484
#undef TARGET_SCHED_GEN_SPEC_CHECK
485
#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
486
 
487
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
488
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
489
  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
490
 
491
#undef TARGET_SCHED_SKIP_RTX_P
492
#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
493
 
494
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
495
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
496
#undef TARGET_ARG_PARTIAL_BYTES
497
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
498
#undef TARGET_FUNCTION_ARG
499
#define TARGET_FUNCTION_ARG ia64_function_arg
500
#undef TARGET_FUNCTION_INCOMING_ARG
501
#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
502
#undef TARGET_FUNCTION_ARG_ADVANCE
503
#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
504
#undef TARGET_FUNCTION_ARG_BOUNDARY
505
#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
506
 
507
#undef TARGET_ASM_OUTPUT_MI_THUNK
508
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
509
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
511
 
512
#undef TARGET_ASM_FILE_START
513
#define TARGET_ASM_FILE_START ia64_file_start
514
 
515
#undef TARGET_ASM_GLOBALIZE_DECL_NAME
516
#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
517
 
518
#undef TARGET_REGISTER_MOVE_COST
519
#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
520
#undef TARGET_MEMORY_MOVE_COST
521
#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
522
#undef TARGET_RTX_COSTS
523
#define TARGET_RTX_COSTS ia64_rtx_costs
524
#undef TARGET_ADDRESS_COST
525
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526
 
527
#undef TARGET_UNSPEC_MAY_TRAP_P
528
#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
529
 
530
#undef TARGET_MACHINE_DEPENDENT_REORG
531
#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
532
 
533
#undef TARGET_ENCODE_SECTION_INFO
534
#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
535
 
536
#undef  TARGET_SECTION_TYPE_FLAGS
537
#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
538
 
539
#ifdef HAVE_AS_TLS
540
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
541
#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
542
#endif
543
 
544
/* ??? Investigate.  */
545
#if 0
546
#undef TARGET_PROMOTE_PROTOTYPES
547
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
548
#endif
549
 
550
#undef TARGET_FUNCTION_VALUE
551
#define TARGET_FUNCTION_VALUE ia64_function_value
552
#undef TARGET_LIBCALL_VALUE
553
#define TARGET_LIBCALL_VALUE ia64_libcall_value
554
#undef TARGET_FUNCTION_VALUE_REGNO_P
555
#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
556
 
557
#undef TARGET_STRUCT_VALUE_RTX
558
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
559
#undef TARGET_RETURN_IN_MEMORY
560
#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
561
#undef TARGET_SETUP_INCOMING_VARARGS
562
#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
563
#undef TARGET_STRICT_ARGUMENT_NAMING
564
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
565
#undef TARGET_MUST_PASS_IN_STACK
566
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
567
#undef TARGET_GET_RAW_RESULT_MODE
568
#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
569
#undef TARGET_GET_RAW_ARG_MODE
570
#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
571
 
572
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
573
#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
574
 
575
#undef TARGET_ASM_UNWIND_EMIT
576
#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
577
#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
578
#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
579
#undef TARGET_ASM_INIT_SECTIONS
580
#define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
581
 
582
#undef TARGET_DEBUG_UNWIND_INFO
583
#define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
584
 
585
#undef TARGET_SCALAR_MODE_SUPPORTED_P
586
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
587
#undef TARGET_VECTOR_MODE_SUPPORTED_P
588
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
589
 
590
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
591
   in an order different from the specified program order.  */
592
#undef TARGET_RELAXED_ORDERING
593
#define TARGET_RELAXED_ORDERING true
594
 
595
#undef TARGET_LEGITIMATE_CONSTANT_P
596
#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
597
#undef TARGET_LEGITIMATE_ADDRESS_P
598
#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
599
 
600
#undef TARGET_CANNOT_FORCE_CONST_MEM
601
#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
602
 
603
#undef TARGET_MANGLE_TYPE
604
#define TARGET_MANGLE_TYPE ia64_mangle_type
605
 
606
#undef TARGET_INVALID_CONVERSION
607
#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
608
#undef TARGET_INVALID_UNARY_OP
609
#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
610
#undef TARGET_INVALID_BINARY_OP
611
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
612
 
613
#undef TARGET_C_MODE_FOR_SUFFIX
614
#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
615
 
616
#undef TARGET_CAN_ELIMINATE
617
#define TARGET_CAN_ELIMINATE ia64_can_eliminate
618
 
619
#undef TARGET_TRAMPOLINE_INIT
620
#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
621
 
622
#undef TARGET_INVALID_WITHIN_DOLOOP
623
#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
624
 
625
#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
626
#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
627
 
628
#undef TARGET_PREFERRED_RELOAD_CLASS
629
#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
630
 
631
#undef TARGET_DELAY_SCHED2
632
#define TARGET_DELAY_SCHED2 true
633
 
634
/* Variable tracking should be run after all optimizations which
635
   change order of insns.  It also needs a valid CFG.  */
636
#undef TARGET_DELAY_VARTRACK
637
#define TARGET_DELAY_VARTRACK true
638
 
639
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
640
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
641
 
642
struct gcc_target targetm = TARGET_INITIALIZER;
643
 
644
typedef enum
645
  {
646
    ADDR_AREA_NORMAL,   /* normal address area */
647
    ADDR_AREA_SMALL     /* addressable by "addl" (-2MB < addr < 2MB) */
648
  }
649
ia64_addr_area;
650
 
651
static GTY(()) tree small_ident1;
652
static GTY(()) tree small_ident2;
653
 
654
static void
655
init_idents (void)
656
{
657
  if (small_ident1 == 0)
658
    {
659
      small_ident1 = get_identifier ("small");
660
      small_ident2 = get_identifier ("__small__");
661
    }
662
}
663
 
664
/* Retrieve the address area that has been chosen for the given decl.  */
665
 
666
static ia64_addr_area
667
ia64_get_addr_area (tree decl)
668
{
669
  tree model_attr;
670
 
671
  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
672
  if (model_attr)
673
    {
674
      tree id;
675
 
676
      init_idents ();
677
      id = TREE_VALUE (TREE_VALUE (model_attr));
678
      if (id == small_ident1 || id == small_ident2)
679
        return ADDR_AREA_SMALL;
680
    }
681
  return ADDR_AREA_NORMAL;
682
}
683
 
684
static tree
685
ia64_handle_model_attribute (tree *node, tree name, tree args,
686
                             int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
687
{
688
  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
689
  ia64_addr_area area;
690
  tree arg, decl = *node;
691
 
692
  init_idents ();
693
  arg = TREE_VALUE (args);
694
  if (arg == small_ident1 || arg == small_ident2)
695
    {
696
      addr_area = ADDR_AREA_SMALL;
697
    }
698
  else
699
    {
700
      warning (OPT_Wattributes, "invalid argument of %qE attribute",
701
               name);
702
      *no_add_attrs = true;
703
    }
704
 
705
  switch (TREE_CODE (decl))
706
    {
707
    case VAR_DECL:
708
      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
709
           == FUNCTION_DECL)
710
          && !TREE_STATIC (decl))
711
        {
712
          error_at (DECL_SOURCE_LOCATION (decl),
713
                    "an address area attribute cannot be specified for "
714
                    "local variables");
715
          *no_add_attrs = true;
716
        }
717
      area = ia64_get_addr_area (decl);
718
      if (area != ADDR_AREA_NORMAL && addr_area != area)
719
        {
720
          error ("address area of %q+D conflicts with previous "
721
                 "declaration", decl);
722
          *no_add_attrs = true;
723
        }
724
      break;
725
 
726
    case FUNCTION_DECL:
727
      error_at (DECL_SOURCE_LOCATION (decl),
728
                "address area attribute cannot be specified for "
729
                "functions");
730
      *no_add_attrs = true;
731
      break;
732
 
733
    default:
734
      warning (OPT_Wattributes, "%qE attribute ignored",
735
               name);
736
      *no_add_attrs = true;
737
      break;
738
    }
739
 
740
  return NULL_TREE;
741
}
742
 
743
/* The section must have global and overlaid attributes.  */
744
#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
745
 
746
/* Part of the low level implementation of DEC Ada pragma Common_Object which
747
   enables the shared use of variables stored in overlaid linker areas
748
   corresponding to the use of Fortran COMMON.  */
749
 
750
static tree
751
ia64_vms_common_object_attribute (tree *node, tree name, tree args,
752
                                  int flags ATTRIBUTE_UNUSED,
753
                                  bool *no_add_attrs)
754
{
755
    tree decl = *node;
756
    tree id, val;
757
    if (! DECL_P (decl))
758
      abort ();
759
 
760
    DECL_COMMON (decl) = 1;
761
    id = TREE_VALUE (args);
762
    if (TREE_CODE (id) == IDENTIFIER_NODE)
763
      val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
764
    else if (TREE_CODE (id) == STRING_CST)
765
      val = id;
766
    else
767
      {
768
        warning (OPT_Wattributes,
769
                 "%qE attribute requires a string constant argument", name);
770
        *no_add_attrs = true;
771
        return NULL_TREE;
772
      }
773
    DECL_SECTION_NAME (decl) = val;
774
    return NULL_TREE;
775
}
776
 
777
/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
778
 
779
void
780
ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
781
                                     unsigned HOST_WIDE_INT size,
782
                                     unsigned int align)
783
{
784
  tree attr = DECL_ATTRIBUTES (decl);
785
 
786
  /* As common_object attribute set DECL_SECTION_NAME check it before
787
     looking up the attribute.  */
788
  if (DECL_SECTION_NAME (decl) && attr)
789
    attr = lookup_attribute ("common_object", attr);
790
  else
791
    attr = NULL_TREE;
792
 
793
  if (!attr)
794
    {
795
      /*  Code from elfos.h.  */
796
      fprintf (file, "%s", COMMON_ASM_OP);
797
      assemble_name (file, name);
798
      fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
799
               size, align / BITS_PER_UNIT);
800
    }
801
  else
802
    {
803
      ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
804
      ASM_OUTPUT_LABEL (file, name);
805
      ASM_OUTPUT_SKIP (file, size ? size : 1);
806
    }
807
}
808
 
809
/* Definition of TARGET_ASM_NAMED_SECTION for VMS.  */
810
 
811
void
812
ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
813
                                tree decl)
814
{
815
  if (!(flags & SECTION_VMS_OVERLAY))
816
    {
817
      default_elf_asm_named_section (name, flags, decl);
818
      return;
819
    }
820
  if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
821
    abort ();
822
 
823
  if (flags & SECTION_DECLARED)
824
    {
825
      fprintf (asm_out_file, "\t.section\t%s\n", name);
826
      return;
827
    }
828
 
829
  fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
830
}
831
 
832
static void
833
ia64_encode_addr_area (tree decl, rtx symbol)
834
{
835
  int flags;
836
 
837
  flags = SYMBOL_REF_FLAGS (symbol);
838
  switch (ia64_get_addr_area (decl))
839
    {
840
    case ADDR_AREA_NORMAL: break;
841
    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
842
    default: gcc_unreachable ();
843
    }
844
  SYMBOL_REF_FLAGS (symbol) = flags;
845
}
846
 
847
static void
848
ia64_encode_section_info (tree decl, rtx rtl, int first)
849
{
850
  default_encode_section_info (decl, rtl, first);
851
 
852
  /* Careful not to prod global register variables.  */
853
  if (TREE_CODE (decl) == VAR_DECL
854
      && GET_CODE (DECL_RTL (decl)) == MEM
855
      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
856
      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
857
    ia64_encode_addr_area (decl, XEXP (rtl, 0));
858
}
859
 
860
/* Return 1 if the operands of a move are ok.  */
861
 
862
int
863
ia64_move_ok (rtx dst, rtx src)
864
{
865
  /* If we're under init_recog_no_volatile, we'll not be able to use
866
     memory_operand.  So check the code directly and don't worry about
867
     the validity of the underlying address, which should have been
868
     checked elsewhere anyway.  */
869
  if (GET_CODE (dst) != MEM)
870
    return 1;
871
  if (GET_CODE (src) == MEM)
872
    return 0;
873
  if (register_operand (src, VOIDmode))
874
    return 1;
875
 
876
  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
877
  if (INTEGRAL_MODE_P (GET_MODE (dst)))
878
    return src == const0_rtx;
879
  else
880
    return satisfies_constraint_G (src);
881
}
882
 
883
/* Return 1 if the operands are ok for a floating point load pair.  */
884
 
885
int
886
ia64_load_pair_ok (rtx dst, rtx src)
887
{
888
  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
889
    return 0;
890
  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
891
    return 0;
892
  switch (GET_CODE (XEXP (src, 0)))
893
    {
894
    case REG:
895
    case POST_INC:
896
      break;
897
    case POST_DEC:
898
      return 0;
899
    case POST_MODIFY:
900
      {
901
        rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
902
 
903
        if (GET_CODE (adjust) != CONST_INT
904
            || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
905
          return 0;
906
      }
907
      break;
908
    default:
909
      abort ();
910
    }
911
  return 1;
912
}
913
 
914
int
915
addp4_optimize_ok (rtx op1, rtx op2)
916
{
917
  return (basereg_operand (op1, GET_MODE(op1)) !=
918
          basereg_operand (op2, GET_MODE(op2)));
919
}
920
 
921
/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
922
   Return the length of the field, or <= 0 on failure.  */
923
 
924
int
925
ia64_depz_field_mask (rtx rop, rtx rshift)
926
{
927
  unsigned HOST_WIDE_INT op = INTVAL (rop);
928
  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
929
 
930
  /* Get rid of the zero bits we're shifting in.  */
931
  op >>= shift;
932
 
933
  /* We must now have a solid block of 1's at bit 0.  */
934
  return exact_log2 (op + 1);
935
}
936
 
937
/* Return the TLS model to use for ADDR.  */
938
 
939
static enum tls_model
940
tls_symbolic_operand_type (rtx addr)
941
{
942
  enum tls_model tls_kind = TLS_MODEL_NONE;
943
 
944
  if (GET_CODE (addr) == CONST)
945
    {
946
      if (GET_CODE (XEXP (addr, 0)) == PLUS
947
          && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
948
        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
949
    }
950
  else if (GET_CODE (addr) == SYMBOL_REF)
951
    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
952
 
953
  return tls_kind;
954
}
955
 
956
/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
957
   as a base register.  */
958
 
959
static inline bool
960
ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
961
{
962
  if (strict
963
      && REGNO_OK_FOR_BASE_P (REGNO (reg)))
964
    return true;
965
  else if (!strict
966
           && (GENERAL_REGNO_P (REGNO (reg))
967
               || !HARD_REGISTER_P (reg)))
968
    return true;
969
  else
970
    return false;
971
}
972
 
973
static bool
974
ia64_legitimate_address_reg (const_rtx reg, bool strict)
975
{
976
  if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
977
      || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
978
          && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
979
    return true;
980
 
981
  return false;
982
}
983
 
984
static bool
985
ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
986
{
987
  if (GET_CODE (disp) == PLUS
988
      && rtx_equal_p (reg, XEXP (disp, 0))
989
      && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
990
          || (CONST_INT_P (XEXP (disp, 1))
991
              && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
992
    return true;
993
 
994
  return false;
995
}
996
 
997
/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
998
 
999
static bool
1000
ia64_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
1001
                           rtx x, bool strict)
1002
{
1003
  if (ia64_legitimate_address_reg (x, strict))
1004
    return true;
1005
  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1006
           && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1007
           && XEXP (x, 0) != arg_pointer_rtx)
1008
    return true;
1009
  else if (GET_CODE (x) == POST_MODIFY
1010
           && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1011
           && XEXP (x, 0) != arg_pointer_rtx
1012
           && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1013
    return true;
1014
  else
1015
    return false;
1016
}
1017
 
1018
/* Return true if X is a constant that is valid for some immediate
1019
   field in an instruction.  */
1020
 
1021
static bool
1022
ia64_legitimate_constant_p (enum machine_mode mode, rtx x)
1023
{
1024
  switch (GET_CODE (x))
1025
    {
1026
    case CONST_INT:
1027
    case LABEL_REF:
1028
      return true;
1029
 
1030
    case CONST_DOUBLE:
1031
      if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1032
        return true;
1033
      return satisfies_constraint_G (x);
1034
 
1035
    case CONST:
1036
    case SYMBOL_REF:
1037
      /* ??? Short term workaround for PR 28490.  We must make the code here
1038
         match the code in ia64_expand_move and move_operand, even though they
1039
         are both technically wrong.  */
1040
      if (tls_symbolic_operand_type (x) == 0)
1041
        {
1042
          HOST_WIDE_INT addend = 0;
1043
          rtx op = x;
1044
 
1045
          if (GET_CODE (op) == CONST
1046
              && GET_CODE (XEXP (op, 0)) == PLUS
1047
              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1048
            {
1049
              addend = INTVAL (XEXP (XEXP (op, 0), 1));
1050
              op = XEXP (XEXP (op, 0), 0);
1051
            }
1052
 
1053
          if (any_offset_symbol_operand (op, mode)
1054
              || function_operand (op, mode))
1055
            return true;
1056
          if (aligned_offset_symbol_operand (op, mode))
1057
            return (addend & 0x3fff) == 0;
1058
          return false;
1059
        }
1060
      return false;
1061
 
1062
    case CONST_VECTOR:
1063
      if (mode == V2SFmode)
1064
        return satisfies_constraint_Y (x);
1065
 
1066
      return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1067
              && GET_MODE_SIZE (mode) <= 8);
1068
 
1069
    default:
1070
      return false;
1071
    }
1072
}
1073
 
1074
/* Don't allow TLS addresses to get spilled to memory.  */
1075
 
1076
static bool
1077
ia64_cannot_force_const_mem (enum machine_mode mode, rtx x)
1078
{
1079
  if (mode == RFmode)
1080
    return true;
1081
  return tls_symbolic_operand_type (x) != 0;
1082
}
1083
 
1084
/* Expand a symbolic constant load.  */
1085
 
1086
bool
1087
ia64_expand_load_address (rtx dest, rtx src)
1088
{
1089
  gcc_assert (GET_CODE (dest) == REG);
1090
 
1091
  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1092
     having to pointer-extend the value afterward.  Other forms of address
1093
     computation below are also more natural to compute as 64-bit quantities.
1094
     If we've been given an SImode destination register, change it.  */
1095
  if (GET_MODE (dest) != Pmode)
1096
    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1097
                               byte_lowpart_offset (Pmode, GET_MODE (dest)));
1098
 
1099
  if (TARGET_NO_PIC)
1100
    return false;
1101
  if (small_addr_symbolic_operand (src, VOIDmode))
1102
    return false;
1103
 
1104
  if (TARGET_AUTO_PIC)
1105
    emit_insn (gen_load_gprel64 (dest, src));
1106
  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1107
    emit_insn (gen_load_fptr (dest, src));
1108
  else if (sdata_symbolic_operand (src, VOIDmode))
1109
    emit_insn (gen_load_gprel (dest, src));
1110
  else
1111
    {
1112
      HOST_WIDE_INT addend = 0;
1113
      rtx tmp;
1114
 
1115
      /* We did split constant offsets in ia64_expand_move, and we did try
1116
         to keep them split in move_operand, but we also allowed reload to
1117
         rematerialize arbitrary constants rather than spill the value to
1118
         the stack and reload it.  So we have to be prepared here to split
1119
         them apart again.  */
1120
      if (GET_CODE (src) == CONST)
1121
        {
1122
          HOST_WIDE_INT hi, lo;
1123
 
1124
          hi = INTVAL (XEXP (XEXP (src, 0), 1));
1125
          lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1126
          hi = hi - lo;
1127
 
1128
          if (lo != 0)
1129
            {
1130
              addend = lo;
1131
              src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1132
            }
1133
        }
1134
 
1135
      tmp = gen_rtx_HIGH (Pmode, src);
1136
      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1137
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1138
 
1139
      tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1140
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1141
 
1142
      if (addend)
1143
        {
1144
          tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1145
          emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1146
        }
1147
    }
1148
 
1149
  return true;
1150
}
1151
 
1152
static GTY(()) rtx gen_tls_tga;
1153
static rtx
1154
gen_tls_get_addr (void)
1155
{
1156
  if (!gen_tls_tga)
1157
    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1158
  return gen_tls_tga;
1159
}
1160
 
1161
static GTY(()) rtx thread_pointer_rtx;
1162
static rtx
1163
gen_thread_pointer (void)
1164
{
1165
  if (!thread_pointer_rtx)
1166
    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1167
  return thread_pointer_rtx;
1168
}
1169
 
1170
static rtx
1171
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1172
                         rtx orig_op1, HOST_WIDE_INT addend)
1173
{
1174
  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1175
  rtx orig_op0 = op0;
1176
  HOST_WIDE_INT addend_lo, addend_hi;
1177
 
1178
  switch (tls_kind)
1179
    {
1180
    case TLS_MODEL_GLOBAL_DYNAMIC:
1181
      start_sequence ();
1182
 
1183
      tga_op1 = gen_reg_rtx (Pmode);
1184
      emit_insn (gen_load_dtpmod (tga_op1, op1));
1185
 
1186
      tga_op2 = gen_reg_rtx (Pmode);
1187
      emit_insn (gen_load_dtprel (tga_op2, op1));
1188
 
1189
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1190
                                         LCT_CONST, Pmode, 2, tga_op1,
1191
                                         Pmode, tga_op2, Pmode);
1192
 
1193
      insns = get_insns ();
1194
      end_sequence ();
1195
 
1196
      if (GET_MODE (op0) != Pmode)
1197
        op0 = tga_ret;
1198
      emit_libcall_block (insns, op0, tga_ret, op1);
1199
      break;
1200
 
1201
    case TLS_MODEL_LOCAL_DYNAMIC:
1202
      /* ??? This isn't the completely proper way to do local-dynamic
1203
         If the call to __tls_get_addr is used only by a single symbol,
1204
         then we should (somehow) move the dtprel to the second arg
1205
         to avoid the extra add.  */
1206
      start_sequence ();
1207
 
1208
      tga_op1 = gen_reg_rtx (Pmode);
1209
      emit_insn (gen_load_dtpmod (tga_op1, op1));
1210
 
1211
      tga_op2 = const0_rtx;
1212
 
1213
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1214
                                         LCT_CONST, Pmode, 2, tga_op1,
1215
                                         Pmode, tga_op2, Pmode);
1216
 
1217
      insns = get_insns ();
1218
      end_sequence ();
1219
 
1220
      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1221
                                UNSPEC_LD_BASE);
1222
      tmp = gen_reg_rtx (Pmode);
1223
      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1224
 
1225
      if (!register_operand (op0, Pmode))
1226
        op0 = gen_reg_rtx (Pmode);
1227
      if (TARGET_TLS64)
1228
        {
1229
          emit_insn (gen_load_dtprel (op0, op1));
1230
          emit_insn (gen_adddi3 (op0, tmp, op0));
1231
        }
1232
      else
1233
        emit_insn (gen_add_dtprel (op0, op1, tmp));
1234
      break;
1235
 
1236
    case TLS_MODEL_INITIAL_EXEC:
1237
      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1238
      addend_hi = addend - addend_lo;
1239
 
1240
      op1 = plus_constant (op1, addend_hi);
1241
      addend = addend_lo;
1242
 
1243
      tmp = gen_reg_rtx (Pmode);
1244
      emit_insn (gen_load_tprel (tmp, op1));
1245
 
1246
      if (!register_operand (op0, Pmode))
1247
        op0 = gen_reg_rtx (Pmode);
1248
      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1249
      break;
1250
 
1251
    case TLS_MODEL_LOCAL_EXEC:
1252
      if (!register_operand (op0, Pmode))
1253
        op0 = gen_reg_rtx (Pmode);
1254
 
1255
      op1 = orig_op1;
1256
      addend = 0;
1257
      if (TARGET_TLS64)
1258
        {
1259
          emit_insn (gen_load_tprel (op0, op1));
1260
          emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1261
        }
1262
      else
1263
        emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1264
      break;
1265
 
1266
    default:
1267
      gcc_unreachable ();
1268
    }
1269
 
1270
  if (addend)
1271
    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1272
                               orig_op0, 1, OPTAB_DIRECT);
1273
  if (orig_op0 == op0)
1274
    return NULL_RTX;
1275
  if (GET_MODE (orig_op0) == Pmode)
1276
    return op0;
1277
  return gen_lowpart (GET_MODE (orig_op0), op0);
1278
}
1279
 
1280
rtx
1281
ia64_expand_move (rtx op0, rtx op1)
1282
{
1283
  enum machine_mode mode = GET_MODE (op0);
1284
 
1285
  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1286
    op1 = force_reg (mode, op1);
1287
 
1288
  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1289
    {
1290
      HOST_WIDE_INT addend = 0;
1291
      enum tls_model tls_kind;
1292
      rtx sym = op1;
1293
 
1294
      if (GET_CODE (op1) == CONST
1295
          && GET_CODE (XEXP (op1, 0)) == PLUS
1296
          && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1297
        {
1298
          addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1299
          sym = XEXP (XEXP (op1, 0), 0);
1300
        }
1301
 
1302
      tls_kind = tls_symbolic_operand_type (sym);
1303
      if (tls_kind)
1304
        return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1305
 
1306
      if (any_offset_symbol_operand (sym, mode))
1307
        addend = 0;
1308
      else if (aligned_offset_symbol_operand (sym, mode))
1309
        {
1310
          HOST_WIDE_INT addend_lo, addend_hi;
1311
 
1312
          addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1313
          addend_hi = addend - addend_lo;
1314
 
1315
          if (addend_lo != 0)
1316
            {
1317
              op1 = plus_constant (sym, addend_hi);
1318
              addend = addend_lo;
1319
            }
1320
          else
1321
            addend = 0;
1322
        }
1323
      else
1324
        op1 = sym;
1325
 
1326
      if (reload_completed)
1327
        {
1328
          /* We really should have taken care of this offset earlier.  */
1329
          gcc_assert (addend == 0);
1330
          if (ia64_expand_load_address (op0, op1))
1331
            return NULL_RTX;
1332
        }
1333
 
1334
      if (addend)
1335
        {
1336
          rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1337
 
1338
          emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1339
 
1340
          op1 = expand_simple_binop (mode, PLUS, subtarget,
1341
                                     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1342
          if (op0 == op1)
1343
            return NULL_RTX;
1344
        }
1345
    }
1346
 
1347
  return op1;
1348
}
1349
 
1350
/* Split a move from OP1 to OP0 conditional on COND.  */
1351
 
1352
void
1353
ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1354
{
1355
  rtx insn, first = get_last_insn ();
1356
 
1357
  emit_move_insn (op0, op1);
1358
 
1359
  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1360
    if (INSN_P (insn))
1361
      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1362
                                          PATTERN (insn));
1363
}
1364
 
1365
/* Split a post-reload TImode or TFmode reference into two DImode
1366
   components.  This is made extra difficult by the fact that we do
1367
   not get any scratch registers to work with, because reload cannot
1368
   be prevented from giving us a scratch that overlaps the register
1369
   pair involved.  So instead, when addressing memory, we tweak the
1370
   pointer register up and back down with POST_INCs.  Or up and not
1371
   back down when we can get away with it.
1372
 
1373
   REVERSED is true when the loads must be done in reversed order
1374
   (high word first) for correctness.  DEAD is true when the pointer
1375
   dies with the second insn we generate and therefore the second
1376
   address must not carry a postmodify.
1377
 
1378
   May return an insn which is to be emitted after the moves.  */
1379
 
1380
static rtx
1381
ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1382
{
1383
  rtx fixup = 0;
1384
 
1385
  switch (GET_CODE (in))
1386
    {
1387
    case REG:
1388
      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1389
      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1390
      break;
1391
 
1392
    case CONST_INT:
1393
    case CONST_DOUBLE:
1394
      /* Cannot occur reversed.  */
1395
      gcc_assert (!reversed);
1396
 
1397
      if (GET_MODE (in) != TFmode)
1398
        split_double (in, &out[0], &out[1]);
1399
      else
1400
        /* split_double does not understand how to split a TFmode
1401
           quantity into a pair of DImode constants.  */
1402
        {
1403
          REAL_VALUE_TYPE r;
1404
          unsigned HOST_WIDE_INT p[2];
1405
          long l[4];  /* TFmode is 128 bits */
1406
 
1407
          REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1408
          real_to_target (l, &r, TFmode);
1409
 
1410
          if (FLOAT_WORDS_BIG_ENDIAN)
1411
            {
1412
              p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1413
              p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1414
            }
1415
          else
1416
            {
1417
              p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1418
              p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1419
            }
1420
          out[0] = GEN_INT (p[0]);
1421
          out[1] = GEN_INT (p[1]);
1422
        }
1423
      break;
1424
 
1425
    case MEM:
1426
      {
1427
        rtx base = XEXP (in, 0);
1428
        rtx offset;
1429
 
1430
        switch (GET_CODE (base))
1431
          {
1432
          case REG:
1433
            if (!reversed)
1434
              {
1435
                out[0] = adjust_automodify_address
1436
                  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1437
                out[1] = adjust_automodify_address
1438
                  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1439
              }
1440
            else
1441
              {
1442
                /* Reversal requires a pre-increment, which can only
1443
                   be done as a separate insn.  */
1444
                emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1445
                out[0] = adjust_automodify_address
1446
                  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1447
                out[1] = adjust_address (in, DImode, 0);
1448
              }
1449
            break;
1450
 
1451
          case POST_INC:
1452
            gcc_assert (!reversed && !dead);
1453
 
1454
            /* Just do the increment in two steps.  */
1455
            out[0] = adjust_automodify_address (in, DImode, 0, 0);
1456
            out[1] = adjust_automodify_address (in, DImode, 0, 8);
1457
            break;
1458
 
1459
          case POST_DEC:
1460
            gcc_assert (!reversed && !dead);
1461
 
1462
            /* Add 8, subtract 24.  */
1463
            base = XEXP (base, 0);
1464
            out[0] = adjust_automodify_address
1465
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1466
            out[1] = adjust_automodify_address
1467
              (in, DImode,
1468
               gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1469
               8);
1470
            break;
1471
 
1472
          case POST_MODIFY:
1473
            gcc_assert (!reversed && !dead);
1474
 
1475
            /* Extract and adjust the modification.  This case is
1476
               trickier than the others, because we might have an
1477
               index register, or we might have a combined offset that
1478
               doesn't fit a signed 9-bit displacement field.  We can
1479
               assume the incoming expression is already legitimate.  */
1480
            offset = XEXP (base, 1);
1481
            base = XEXP (base, 0);
1482
 
1483
            out[0] = adjust_automodify_address
1484
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1485
 
1486
            if (GET_CODE (XEXP (offset, 1)) == REG)
1487
              {
1488
                /* Can't adjust the postmodify to match.  Emit the
1489
                   original, then a separate addition insn.  */
1490
                out[1] = adjust_automodify_address (in, DImode, 0, 8);
1491
                fixup = gen_adddi3 (base, base, GEN_INT (-8));
1492
              }
1493
            else
1494
              {
1495
                gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1496
                if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1497
                  {
1498
                    /* Again the postmodify cannot be made to match,
1499
                       but in this case it's more efficient to get rid
1500
                       of the postmodify entirely and fix up with an
1501
                       add insn.  */
1502
                    out[1] = adjust_automodify_address (in, DImode, base, 8);
1503
                    fixup = gen_adddi3
1504
                      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1505
                  }
1506
                else
1507
                  {
1508
                    /* Combined offset still fits in the displacement field.
1509
                       (We cannot overflow it at the high end.)  */
1510
                    out[1] = adjust_automodify_address
1511
                      (in, DImode, gen_rtx_POST_MODIFY
1512
                       (Pmode, base, gen_rtx_PLUS
1513
                        (Pmode, base,
1514
                         GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1515
                       8);
1516
                  }
1517
              }
1518
            break;
1519
 
1520
          default:
1521
            gcc_unreachable ();
1522
          }
1523
        break;
1524
      }
1525
 
1526
    default:
1527
      gcc_unreachable ();
1528
    }
1529
 
1530
  return fixup;
1531
}
1532
 
1533
/* Split a TImode or TFmode move instruction after reload.
1534
   This is used by *movtf_internal and *movti_internal.  */
1535
void
1536
ia64_split_tmode_move (rtx operands[])
1537
{
1538
  rtx in[2], out[2], insn;
1539
  rtx fixup[2];
1540
  bool dead = false;
1541
  bool reversed = false;
1542
 
1543
  /* It is possible for reload to decide to overwrite a pointer with
1544
     the value it points to.  In that case we have to do the loads in
1545
     the appropriate order so that the pointer is not destroyed too
1546
     early.  Also we must not generate a postmodify for that second
1547
     load, or rws_access_regno will die.  */
1548
  if (GET_CODE (operands[1]) == MEM
1549
      && reg_overlap_mentioned_p (operands[0], operands[1]))
1550
    {
1551
      rtx base = XEXP (operands[1], 0);
1552
      while (GET_CODE (base) != REG)
1553
        base = XEXP (base, 0);
1554
 
1555
      if (REGNO (base) == REGNO (operands[0]))
1556
        reversed = true;
1557
      dead = true;
1558
    }
1559
  /* Another reason to do the moves in reversed order is if the first
1560
     element of the target register pair is also the second element of
1561
     the source register pair.  */
1562
  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1563
      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1564
    reversed = true;
1565
 
1566
  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1567
  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1568
 
1569
#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)                               \
1570
  if (GET_CODE (EXP) == MEM                                             \
1571
      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY                        \
1572
          || GET_CODE (XEXP (EXP, 0)) == POST_INC                        \
1573
          || GET_CODE (XEXP (EXP, 0)) == POST_DEC))                      \
1574
    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1575
 
1576
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1577
  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1578
  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1579
 
1580
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1581
  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1582
  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1583
 
1584
  if (fixup[0])
1585
    emit_insn (fixup[0]);
1586
  if (fixup[1])
1587
    emit_insn (fixup[1]);
1588
 
1589
#undef MAYBE_ADD_REG_INC_NOTE
1590
}
1591
 
1592
/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1593
   through memory plus an extra GR scratch register.  Except that you can
1594
   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1595
   SECONDARY_RELOAD_CLASS, but not both.
1596
 
1597
   We got into problems in the first place by allowing a construct like
1598
   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1599
   This solution attempts to prevent this situation from occurring.  When
1600
   we see something like the above, we spill the inner register to memory.  */
1601
 
1602
static rtx
1603
spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1604
{
1605
  if (GET_CODE (in) == SUBREG
1606
      && GET_MODE (SUBREG_REG (in)) == TImode
1607
      && GET_CODE (SUBREG_REG (in)) == REG)
1608
    {
1609
      rtx memt = assign_stack_temp (TImode, 16, 0);
1610
      emit_move_insn (memt, SUBREG_REG (in));
1611
      return adjust_address (memt, mode, 0);
1612
    }
1613
  else if (force && GET_CODE (in) == REG)
1614
    {
1615
      rtx memx = assign_stack_temp (mode, 16, 0);
1616
      emit_move_insn (memx, in);
1617
      return memx;
1618
    }
1619
  else
1620
    return in;
1621
}
1622
 
1623
/* Expand the movxf or movrf pattern (MODE says which) with the given
1624
   OPERANDS, returning true if the pattern should then invoke
1625
   DONE.  */
1626
 
1627
bool
1628
ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1629
{
1630
  rtx op0 = operands[0];
1631
 
1632
  if (GET_CODE (op0) == SUBREG)
1633
    op0 = SUBREG_REG (op0);
1634
 
1635
  /* We must support XFmode loads into general registers for stdarg/vararg,
1636
     unprototyped calls, and a rare case where a long double is passed as
1637
     an argument after a float HFA fills the FP registers.  We split them into
1638
     DImode loads for convenience.  We also need to support XFmode stores
1639
     for the last case.  This case does not happen for stdarg/vararg routines,
1640
     because we do a block store to memory of unnamed arguments.  */
1641
 
1642
  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1643
    {
1644
      rtx out[2];
1645
 
1646
      /* We're hoping to transform everything that deals with XFmode
1647
         quantities and GR registers early in the compiler.  */
1648
      gcc_assert (can_create_pseudo_p ());
1649
 
1650
      /* Struct to register can just use TImode instead.  */
1651
      if ((GET_CODE (operands[1]) == SUBREG
1652
           && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1653
          || (GET_CODE (operands[1]) == REG
1654
              && GR_REGNO_P (REGNO (operands[1]))))
1655
        {
1656
          rtx op1 = operands[1];
1657
 
1658
          if (GET_CODE (op1) == SUBREG)
1659
            op1 = SUBREG_REG (op1);
1660
          else
1661
            op1 = gen_rtx_REG (TImode, REGNO (op1));
1662
 
1663
          emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1664
          return true;
1665
        }
1666
 
1667
      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1668
        {
1669
          /* Don't word-swap when reading in the constant.  */
1670
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1671
                          operand_subword (operands[1], WORDS_BIG_ENDIAN,
1672
                                           0, mode));
1673
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1674
                          operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1675
                                           0, mode));
1676
          return true;
1677
        }
1678
 
1679
      /* If the quantity is in a register not known to be GR, spill it.  */
1680
      if (register_operand (operands[1], mode))
1681
        operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1682
 
1683
      gcc_assert (GET_CODE (operands[1]) == MEM);
1684
 
1685
      /* Don't word-swap when reading in the value.  */
1686
      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1687
      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1688
 
1689
      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1690
      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1691
      return true;
1692
    }
1693
 
1694
  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1695
    {
1696
      /* We're hoping to transform everything that deals with XFmode
1697
         quantities and GR registers early in the compiler.  */
1698
      gcc_assert (can_create_pseudo_p ());
1699
 
1700
      /* Op0 can't be a GR_REG here, as that case is handled above.
1701
         If op0 is a register, then we spill op1, so that we now have a
1702
         MEM operand.  This requires creating an XFmode subreg of a TImode reg
1703
         to force the spill.  */
1704
      if (register_operand (operands[0], mode))
1705
        {
1706
          rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1707
          op1 = gen_rtx_SUBREG (mode, op1, 0);
1708
          operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1709
        }
1710
 
1711
      else
1712
        {
1713
          rtx in[2];
1714
 
1715
          gcc_assert (GET_CODE (operands[0]) == MEM);
1716
 
1717
          /* Don't word-swap when writing out the value.  */
1718
          in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1719
          in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1720
 
1721
          emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1722
          emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1723
          return true;
1724
        }
1725
    }
1726
 
1727
  if (!reload_in_progress && !reload_completed)
1728
    {
1729
      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1730
 
1731
      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1732
        {
1733
          rtx memt, memx, in = operands[1];
1734
          if (CONSTANT_P (in))
1735
            in = validize_mem (force_const_mem (mode, in));
1736
          if (GET_CODE (in) == MEM)
1737
            memt = adjust_address (in, TImode, 0);
1738
          else
1739
            {
1740
              memt = assign_stack_temp (TImode, 16, 0);
1741
              memx = adjust_address (memt, mode, 0);
1742
              emit_move_insn (memx, in);
1743
            }
1744
          emit_move_insn (op0, memt);
1745
          return true;
1746
        }
1747
 
1748
      if (!ia64_move_ok (operands[0], operands[1]))
1749
        operands[1] = force_reg (mode, operands[1]);
1750
    }
1751
 
1752
  return false;
1753
}
1754
 
1755
/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1756
   with the expression that holds the compare result (in VOIDmode).  */
1757
 
1758
static GTY(()) rtx cmptf_libfunc;
1759
 
1760
void
1761
ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1762
{
1763
  enum rtx_code code = GET_CODE (*expr);
1764
  rtx cmp;
1765
 
1766
  /* If we have a BImode input, then we already have a compare result, and
1767
     do not need to emit another comparison.  */
1768
  if (GET_MODE (*op0) == BImode)
1769
    {
1770
      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1771
      cmp = *op0;
1772
    }
1773
  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1774
     magic number as its third argument, that indicates what to do.
1775
     The return value is an integer to be compared against zero.  */
1776
  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1777
    {
1778
      enum qfcmp_magic {
1779
        QCMP_INV = 1,   /* Raise FP_INVALID on SNaN as a side effect.  */
1780
        QCMP_UNORD = 2,
1781
        QCMP_EQ = 4,
1782
        QCMP_LT = 8,
1783
        QCMP_GT = 16
1784
      };
1785
      int magic;
1786
      enum rtx_code ncode;
1787
      rtx ret, insns;
1788
 
1789
      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1790
      switch (code)
1791
        {
1792
          /* 1 = equal, 0 = not equal.  Equality operators do
1793
             not raise FP_INVALID when given an SNaN operand.  */
1794
        case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1795
        case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1796
          /* isunordered() from C99.  */
1797
        case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1798
        case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1799
          /* Relational operators raise FP_INVALID when given
1800
             an SNaN operand.  */
1801
        case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1802
        case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1803
        case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1804
        case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1805
          /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1806
             Expanders for buneq etc. weuld have to be added to ia64.md
1807
             for this to be useful.  */
1808
        default: gcc_unreachable ();
1809
        }
1810
 
1811
      start_sequence ();
1812
 
1813
      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1814
                                     *op0, TFmode, *op1, TFmode,
1815
                                     GEN_INT (magic), DImode);
1816
      cmp = gen_reg_rtx (BImode);
1817
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1818
                              gen_rtx_fmt_ee (ncode, BImode,
1819
                                              ret, const0_rtx)));
1820
 
1821
      insns = get_insns ();
1822
      end_sequence ();
1823
 
1824
      emit_libcall_block (insns, cmp, cmp,
1825
                          gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1826
      code = NE;
1827
    }
1828
  else
1829
    {
1830
      cmp = gen_reg_rtx (BImode);
1831
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1832
                              gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1833
      code = NE;
1834
    }
1835
 
1836
  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1837
  *op0 = cmp;
1838
  *op1 = const0_rtx;
1839
}
1840
 
1841
/* Generate an integral vector comparison.  Return true if the condition has
1842
   been reversed, and so the sense of the comparison should be inverted.  */
1843
 
1844
static bool
1845
ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1846
                            rtx dest, rtx op0, rtx op1)
1847
{
1848
  bool negate = false;
1849
  rtx x;
1850
 
1851
  /* Canonicalize the comparison to EQ, GT, GTU.  */
1852
  switch (code)
1853
    {
1854
    case EQ:
1855
    case GT:
1856
    case GTU:
1857
      break;
1858
 
1859
    case NE:
1860
    case LE:
1861
    case LEU:
1862
      code = reverse_condition (code);
1863
      negate = true;
1864
      break;
1865
 
1866
    case GE:
1867
    case GEU:
1868
      code = reverse_condition (code);
1869
      negate = true;
1870
      /* FALLTHRU */
1871
 
1872
    case LT:
1873
    case LTU:
1874
      code = swap_condition (code);
1875
      x = op0, op0 = op1, op1 = x;
1876
      break;
1877
 
1878
    default:
1879
      gcc_unreachable ();
1880
    }
1881
 
1882
  /* Unsigned parallel compare is not supported by the hardware.  Play some
1883
     tricks to turn this into a signed comparison against 0.  */
1884
  if (code == GTU)
1885
    {
1886
      switch (mode)
1887
        {
1888
        case V2SImode:
1889
          {
1890
            rtx t1, t2, mask;
1891
 
1892
            /* Subtract (-(INT MAX) - 1) from both operands to make
1893
               them signed.  */
1894
            mask = GEN_INT (0x80000000);
1895
            mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1896
            mask = force_reg (mode, mask);
1897
            t1 = gen_reg_rtx (mode);
1898
            emit_insn (gen_subv2si3 (t1, op0, mask));
1899
            t2 = gen_reg_rtx (mode);
1900
            emit_insn (gen_subv2si3 (t2, op1, mask));
1901
            op0 = t1;
1902
            op1 = t2;
1903
            code = GT;
1904
          }
1905
          break;
1906
 
1907
        case V8QImode:
1908
        case V4HImode:
1909
          /* Perform a parallel unsigned saturating subtraction.  */
1910
          x = gen_reg_rtx (mode);
1911
          emit_insn (gen_rtx_SET (VOIDmode, x,
1912
                                  gen_rtx_US_MINUS (mode, op0, op1)));
1913
 
1914
          code = EQ;
1915
          op0 = x;
1916
          op1 = CONST0_RTX (mode);
1917
          negate = !negate;
1918
          break;
1919
 
1920
        default:
1921
          gcc_unreachable ();
1922
        }
1923
    }
1924
 
1925
  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1926
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1927
 
1928
  return negate;
1929
}
1930
 
1931
/* Emit an integral vector conditional move.  */
1932
 
1933
void
1934
ia64_expand_vecint_cmov (rtx operands[])
1935
{
1936
  enum machine_mode mode = GET_MODE (operands[0]);
1937
  enum rtx_code code = GET_CODE (operands[3]);
1938
  bool negate;
1939
  rtx cmp, x, ot, of;
1940
 
1941
  cmp = gen_reg_rtx (mode);
1942
  negate = ia64_expand_vecint_compare (code, mode, cmp,
1943
                                       operands[4], operands[5]);
1944
 
1945
  ot = operands[1+negate];
1946
  of = operands[2-negate];
1947
 
1948
  if (ot == CONST0_RTX (mode))
1949
    {
1950
      if (of == CONST0_RTX (mode))
1951
        {
1952
          emit_move_insn (operands[0], ot);
1953
          return;
1954
        }
1955
 
1956
      x = gen_rtx_NOT (mode, cmp);
1957
      x = gen_rtx_AND (mode, x, of);
1958
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1959
    }
1960
  else if (of == CONST0_RTX (mode))
1961
    {
1962
      x = gen_rtx_AND (mode, cmp, ot);
1963
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1964
    }
1965
  else
1966
    {
1967
      rtx t, f;
1968
 
1969
      t = gen_reg_rtx (mode);
1970
      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1971
      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1972
 
1973
      f = gen_reg_rtx (mode);
1974
      x = gen_rtx_NOT (mode, cmp);
1975
      x = gen_rtx_AND (mode, x, operands[2-negate]);
1976
      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1977
 
1978
      x = gen_rtx_IOR (mode, t, f);
1979
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1980
    }
1981
}
1982
 
1983
/* Emit an integral vector min or max operation.  Return true if all done.  */
1984
 
1985
bool
1986
ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1987
                           rtx operands[])
1988
{
1989
  rtx xops[6];
1990
 
1991
  /* These four combinations are supported directly.  */
1992
  if (mode == V8QImode && (code == UMIN || code == UMAX))
1993
    return false;
1994
  if (mode == V4HImode && (code == SMIN || code == SMAX))
1995
    return false;
1996
 
1997
  /* This combination can be implemented with only saturating subtraction.  */
1998
  if (mode == V4HImode && code == UMAX)
1999
    {
2000
      rtx x, tmp = gen_reg_rtx (mode);
2001
 
2002
      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2003
      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
2004
 
2005
      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2006
      return true;
2007
    }
2008
 
2009
  /* Everything else implemented via vector comparisons.  */
2010
  xops[0] = operands[0];
2011
  xops[4] = xops[1] = operands[1];
2012
  xops[5] = xops[2] = operands[2];
2013
 
2014
  switch (code)
2015
    {
2016
    case UMIN:
2017
      code = LTU;
2018
      break;
2019
    case UMAX:
2020
      code = GTU;
2021
      break;
2022
    case SMIN:
2023
      code = LT;
2024
      break;
2025
    case SMAX:
2026
      code = GT;
2027
      break;
2028
    default:
2029
      gcc_unreachable ();
2030
    }
2031
  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2032
 
2033
  ia64_expand_vecint_cmov (xops);
2034
  return true;
2035
}
2036
 
2037
/* The vectors LO and HI each contain N halves of a double-wide vector.
2038
   Reassemble either the first N/2 or the second N/2 elements.  */
2039
 
2040
void
2041
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2042
{
2043
  enum machine_mode vmode = GET_MODE (lo);
2044
  unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2045
  struct expand_vec_perm_d d;
2046
  bool ok;
2047
 
2048
  d.target = gen_lowpart (vmode, out);
2049
  d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2050
  d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2051
  d.vmode = vmode;
2052
  d.nelt = nelt;
2053
  d.one_operand_p = false;
2054
  d.testing_p = false;
2055
 
2056
  high = (highp ? nelt / 2 : 0);
2057
  for (i = 0; i < nelt / 2; ++i)
2058
    {
2059
      d.perm[i * 2] = i + high;
2060
      d.perm[i * 2 + 1] = i + high + nelt;
2061
    }
2062
 
2063
  ok = ia64_expand_vec_perm_const_1 (&d);
2064
  gcc_assert (ok);
2065
}
2066
 
2067
/* Return a vector of the sign-extension of VEC.  */
2068
 
2069
static rtx
2070
ia64_unpack_sign (rtx vec, bool unsignedp)
2071
{
2072
  enum machine_mode mode = GET_MODE (vec);
2073
  rtx zero = CONST0_RTX (mode);
2074
 
2075
  if (unsignedp)
2076
    return zero;
2077
  else
2078
    {
2079
      rtx sign = gen_reg_rtx (mode);
2080
      bool neg;
2081
 
2082
      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2083
      gcc_assert (!neg);
2084
 
2085
      return sign;
2086
    }
2087
}
2088
 
2089
/* Emit an integral vector unpack operation.  */
2090
 
2091
void
2092
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2093
{
2094
  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2095
  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2096
}
2097
 
2098
/* Emit an integral vector widening sum operations.  */
2099
 
2100
void
2101
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2102
{
2103
  enum machine_mode wmode;
2104
  rtx l, h, t, sign;
2105
 
2106
  sign = ia64_unpack_sign (operands[1], unsignedp);
2107
 
2108
  wmode = GET_MODE (operands[0]);
2109
  l = gen_reg_rtx (wmode);
2110
  h = gen_reg_rtx (wmode);
2111
 
2112
  ia64_unpack_assemble (l, operands[1], sign, false);
2113
  ia64_unpack_assemble (h, operands[1], sign, true);
2114
 
2115
  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2116
  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2117
  if (t != operands[0])
2118
    emit_move_insn (operands[0], t);
2119
}
2120
 
2121
/* Emit a signed or unsigned V8QI dot product operation.  */
2122
 
2123
void
2124
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
2125
{
2126
  rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
2127
  rtx p1, p2, p3, p4, s1, s2, s3;
2128
 
2129
  op1 = operands[1];
2130
  op2 = operands[2];
2131
  sn1 = ia64_unpack_sign (op1, unsignedp);
2132
  sn2 = ia64_unpack_sign (op2, unsignedp);
2133
 
2134
  l1 = gen_reg_rtx (V4HImode);
2135
  l2 = gen_reg_rtx (V4HImode);
2136
  h1 = gen_reg_rtx (V4HImode);
2137
  h2 = gen_reg_rtx (V4HImode);
2138
  ia64_unpack_assemble (l1, op1, sn1, false);
2139
  ia64_unpack_assemble (l2, op2, sn2, false);
2140
  ia64_unpack_assemble (h1, op1, sn1, true);
2141
  ia64_unpack_assemble (h2, op2, sn2, true);
2142
 
2143
  p1 = gen_reg_rtx (V2SImode);
2144
  p2 = gen_reg_rtx (V2SImode);
2145
  p3 = gen_reg_rtx (V2SImode);
2146
  p4 = gen_reg_rtx (V2SImode);
2147
  emit_insn (gen_pmpy2_even (p1, l1, l2));
2148
  emit_insn (gen_pmpy2_even (p2, h1, h2));
2149
  emit_insn (gen_pmpy2_odd (p3, l1, l2));
2150
  emit_insn (gen_pmpy2_odd (p4, h1, h2));
2151
 
2152
  s1 = gen_reg_rtx (V2SImode);
2153
  s2 = gen_reg_rtx (V2SImode);
2154
  s3 = gen_reg_rtx (V2SImode);
2155
  emit_insn (gen_addv2si3 (s1, p1, p2));
2156
  emit_insn (gen_addv2si3 (s2, p3, p4));
2157
  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2158
  emit_insn (gen_addv2si3 (operands[0], s2, s3));
2159
}
2160
 
2161
/* Emit the appropriate sequence for a call.  */
2162
 
2163
void
2164
ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2165
                  int sibcall_p)
2166
{
2167
  rtx insn, b0;
2168
 
2169
  addr = XEXP (addr, 0);
2170
  addr = convert_memory_address (DImode, addr);
2171
  b0 = gen_rtx_REG (DImode, R_BR (0));
2172
 
2173
  /* ??? Should do this for functions known to bind local too.  */
2174
  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2175
    {
2176
      if (sibcall_p)
2177
        insn = gen_sibcall_nogp (addr);
2178
      else if (! retval)
2179
        insn = gen_call_nogp (addr, b0);
2180
      else
2181
        insn = gen_call_value_nogp (retval, addr, b0);
2182
      insn = emit_call_insn (insn);
2183
    }
2184
  else
2185
    {
2186
      if (sibcall_p)
2187
        insn = gen_sibcall_gp (addr);
2188
      else if (! retval)
2189
        insn = gen_call_gp (addr, b0);
2190
      else
2191
        insn = gen_call_value_gp (retval, addr, b0);
2192
      insn = emit_call_insn (insn);
2193
 
2194
      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2195
    }
2196
 
2197
  if (sibcall_p)
2198
    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2199
 
2200
  if (TARGET_ABI_OPEN_VMS)
2201
    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2202
             gen_rtx_REG (DImode, GR_REG (25)));
2203
}
2204
 
2205
static void
2206
reg_emitted (enum ia64_frame_regs r)
2207
{
2208
  if (emitted_frame_related_regs[r] == 0)
2209
    emitted_frame_related_regs[r] = current_frame_info.r[r];
2210
  else
2211
    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2212
}
2213
 
2214
static int
2215
get_reg (enum ia64_frame_regs r)
2216
{
2217
  reg_emitted (r);
2218
  return current_frame_info.r[r];
2219
}
2220
 
2221
static bool
2222
is_emitted (int regno)
2223
{
2224
  unsigned int r;
2225
 
2226
  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2227
    if (emitted_frame_related_regs[r] == regno)
2228
      return true;
2229
  return false;
2230
}
2231
 
2232
void
2233
ia64_reload_gp (void)
2234
{
2235
  rtx tmp;
2236
 
2237
  if (current_frame_info.r[reg_save_gp])
2238
    {
2239
      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2240
    }
2241
  else
2242
    {
2243
      HOST_WIDE_INT offset;
2244
      rtx offset_r;
2245
 
2246
      offset = (current_frame_info.spill_cfa_off
2247
                + current_frame_info.spill_size);
2248
      if (frame_pointer_needed)
2249
        {
2250
          tmp = hard_frame_pointer_rtx;
2251
          offset = -offset;
2252
        }
2253
      else
2254
        {
2255
          tmp = stack_pointer_rtx;
2256
          offset = current_frame_info.total_size - offset;
2257
        }
2258
 
2259
      offset_r = GEN_INT (offset);
2260
      if (satisfies_constraint_I (offset_r))
2261
        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2262
      else
2263
        {
2264
          emit_move_insn (pic_offset_table_rtx, offset_r);
2265
          emit_insn (gen_adddi3 (pic_offset_table_rtx,
2266
                                 pic_offset_table_rtx, tmp));
2267
        }
2268
 
2269
      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2270
    }
2271
 
2272
  emit_move_insn (pic_offset_table_rtx, tmp);
2273
}
2274
 
2275
void
2276
ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2277
                 rtx scratch_b, int noreturn_p, int sibcall_p)
2278
{
2279
  rtx insn;
2280
  bool is_desc = false;
2281
 
2282
  /* If we find we're calling through a register, then we're actually
2283
     calling through a descriptor, so load up the values.  */
2284
  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2285
    {
2286
      rtx tmp;
2287
      bool addr_dead_p;
2288
 
2289
      /* ??? We are currently constrained to *not* use peep2, because
2290
         we can legitimately change the global lifetime of the GP
2291
         (in the form of killing where previously live).  This is
2292
         because a call through a descriptor doesn't use the previous
2293
         value of the GP, while a direct call does, and we do not
2294
         commit to either form until the split here.
2295
 
2296
         That said, this means that we lack precise life info for
2297
         whether ADDR is dead after this call.  This is not terribly
2298
         important, since we can fix things up essentially for free
2299
         with the POST_DEC below, but it's nice to not use it when we
2300
         can immediately tell it's not necessary.  */
2301
      addr_dead_p = ((noreturn_p || sibcall_p
2302
                      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2303
                                            REGNO (addr)))
2304
                     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2305
 
2306
      /* Load the code address into scratch_b.  */
2307
      tmp = gen_rtx_POST_INC (Pmode, addr);
2308
      tmp = gen_rtx_MEM (Pmode, tmp);
2309
      emit_move_insn (scratch_r, tmp);
2310
      emit_move_insn (scratch_b, scratch_r);
2311
 
2312
      /* Load the GP address.  If ADDR is not dead here, then we must
2313
         revert the change made above via the POST_INCREMENT.  */
2314
      if (!addr_dead_p)
2315
        tmp = gen_rtx_POST_DEC (Pmode, addr);
2316
      else
2317
        tmp = addr;
2318
      tmp = gen_rtx_MEM (Pmode, tmp);
2319
      emit_move_insn (pic_offset_table_rtx, tmp);
2320
 
2321
      is_desc = true;
2322
      addr = scratch_b;
2323
    }
2324
 
2325
  if (sibcall_p)
2326
    insn = gen_sibcall_nogp (addr);
2327
  else if (retval)
2328
    insn = gen_call_value_nogp (retval, addr, retaddr);
2329
  else
2330
    insn = gen_call_nogp (addr, retaddr);
2331
  emit_call_insn (insn);
2332
 
2333
  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2334
    ia64_reload_gp ();
2335
}
2336
 
2337
/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2338
 
2339
   This differs from the generic code in that we know about the zero-extending
2340
   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2341
   also know that ld.acq+cmpxchg.rel equals a full barrier.
2342
 
2343
   The loop we want to generate looks like
2344
 
2345
        cmp_reg = mem;
2346
      label:
2347
        old_reg = cmp_reg;
2348
        new_reg = cmp_reg op val;
2349
        cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2350
        if (cmp_reg != old_reg)
2351
          goto label;
2352
 
2353
   Note that we only do the plain load from memory once.  Subsequent
2354
   iterations use the value loaded by the compare-and-swap pattern.  */
2355
 
2356
void
2357
ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2358
                       rtx old_dst, rtx new_dst, enum memmodel model)
2359
{
2360
  enum machine_mode mode = GET_MODE (mem);
2361
  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2362
  enum insn_code icode;
2363
 
2364
  /* Special case for using fetchadd.  */
2365
  if ((mode == SImode || mode == DImode)
2366
      && (code == PLUS || code == MINUS)
2367
      && fetchadd_operand (val, mode))
2368
    {
2369
      if (code == MINUS)
2370
        val = GEN_INT (-INTVAL (val));
2371
 
2372
      if (!old_dst)
2373
        old_dst = gen_reg_rtx (mode);
2374
 
2375
      switch (model)
2376
        {
2377
        case MEMMODEL_ACQ_REL:
2378
        case MEMMODEL_SEQ_CST:
2379
          emit_insn (gen_memory_barrier ());
2380
          /* FALLTHRU */
2381
        case MEMMODEL_RELAXED:
2382
        case MEMMODEL_ACQUIRE:
2383
        case MEMMODEL_CONSUME:
2384
          if (mode == SImode)
2385
            icode = CODE_FOR_fetchadd_acq_si;
2386
          else
2387
            icode = CODE_FOR_fetchadd_acq_di;
2388
          break;
2389
        case MEMMODEL_RELEASE:
2390
          if (mode == SImode)
2391
            icode = CODE_FOR_fetchadd_rel_si;
2392
          else
2393
            icode = CODE_FOR_fetchadd_rel_di;
2394
          break;
2395
 
2396
        default:
2397
          gcc_unreachable ();
2398
        }
2399
 
2400
      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2401
 
2402
      if (new_dst)
2403
        {
2404
          new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2405
                                         true, OPTAB_WIDEN);
2406
          if (new_reg != new_dst)
2407
            emit_move_insn (new_dst, new_reg);
2408
        }
2409
      return;
2410
    }
2411
 
2412
  /* Because of the volatile mem read, we get an ld.acq, which is the
2413
     front half of the full barrier.  The end half is the cmpxchg.rel.
2414
     For relaxed and release memory models, we don't need this.  But we
2415
     also don't bother trying to prevent it either.  */
2416
  gcc_assert (model == MEMMODEL_RELAXED
2417
              || model == MEMMODEL_RELEASE
2418
              || MEM_VOLATILE_P (mem));
2419
 
2420
  old_reg = gen_reg_rtx (DImode);
2421
  cmp_reg = gen_reg_rtx (DImode);
2422
  label = gen_label_rtx ();
2423
 
2424
  if (mode != DImode)
2425
    {
2426
      val = simplify_gen_subreg (DImode, val, mode, 0);
2427
      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2428
    }
2429
  else
2430
    emit_move_insn (cmp_reg, mem);
2431
 
2432
  emit_label (label);
2433
 
2434
  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2435
  emit_move_insn (old_reg, cmp_reg);
2436
  emit_move_insn (ar_ccv, cmp_reg);
2437
 
2438
  if (old_dst)
2439
    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2440
 
2441
  new_reg = cmp_reg;
2442
  if (code == NOT)
2443
    {
2444
      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2445
                                     true, OPTAB_DIRECT);
2446
      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2447
    }
2448
  else
2449
    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2450
                                   true, OPTAB_DIRECT);
2451
 
2452
  if (mode != DImode)
2453
    new_reg = gen_lowpart (mode, new_reg);
2454
  if (new_dst)
2455
    emit_move_insn (new_dst, new_reg);
2456
 
2457
  switch (model)
2458
    {
2459
    case MEMMODEL_RELAXED:
2460
    case MEMMODEL_ACQUIRE:
2461
    case MEMMODEL_CONSUME:
2462
      switch (mode)
2463
        {
2464
        case QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2465
        case HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2466
        case SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2467
        case DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2468
        default:
2469
          gcc_unreachable ();
2470
        }
2471
      break;
2472
 
2473
    case MEMMODEL_RELEASE:
2474
    case MEMMODEL_ACQ_REL:
2475
    case MEMMODEL_SEQ_CST:
2476
      switch (mode)
2477
        {
2478
        case QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2479
        case HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2480
        case SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2481
        case DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2482
        default:
2483
          gcc_unreachable ();
2484
        }
2485
      break;
2486
 
2487
    default:
2488
      gcc_unreachable ();
2489
    }
2490
 
2491
  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2492
 
2493
  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2494
}
2495
 
2496
/* Begin the assembly file.  */
2497
 
2498
static void
2499
ia64_file_start (void)
2500
{
2501
  default_file_start ();
2502
  emit_safe_across_calls ();
2503
}
2504
 
2505
void
2506
emit_safe_across_calls (void)
2507
{
2508
  unsigned int rs, re;
2509
  int out_state;
2510
 
2511
  rs = 1;
2512
  out_state = 0;
2513
  while (1)
2514
    {
2515
      while (rs < 64 && call_used_regs[PR_REG (rs)])
2516
        rs++;
2517
      if (rs >= 64)
2518
        break;
2519
      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2520
        continue;
2521
      if (out_state == 0)
2522
        {
2523
          fputs ("\t.pred.safe_across_calls ", asm_out_file);
2524
          out_state = 1;
2525
        }
2526
      else
2527
        fputc (',', asm_out_file);
2528
      if (re == rs + 1)
2529
        fprintf (asm_out_file, "p%u", rs);
2530
      else
2531
        fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2532
      rs = re + 1;
2533
    }
2534
  if (out_state)
2535
    fputc ('\n', asm_out_file);
2536
}
2537
 
2538
/* Globalize a declaration.  */
2539
 
2540
static void
2541
ia64_globalize_decl_name (FILE * stream, tree decl)
2542
{
2543
  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2544
  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2545
  if (version_attr)
2546
    {
2547
      tree v = TREE_VALUE (TREE_VALUE (version_attr));
2548
      const char *p = TREE_STRING_POINTER (v);
2549
      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2550
    }
2551
  targetm.asm_out.globalize_label (stream, name);
2552
  if (TREE_CODE (decl) == FUNCTION_DECL)
2553
    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2554
}
2555
 
2556
/* Helper function for ia64_compute_frame_size: find an appropriate general
2557
   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2558
   bits in GR0 to GR31 that have already been allocated by this routine.
2559
   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2560
 
2561
static int
2562
find_gr_spill (enum ia64_frame_regs r, int try_locals)
2563
{
2564
  int regno;
2565
 
2566
  if (emitted_frame_related_regs[r] != 0)
2567
    {
2568
      regno = emitted_frame_related_regs[r];
2569
      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2570
          && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2571
        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2572
      else if (current_function_is_leaf
2573
               && regno >= GR_REG (1) && regno <= GR_REG (31))
2574
        current_frame_info.gr_used_mask |= 1 << regno;
2575
 
2576
      return regno;
2577
    }
2578
 
2579
  /* If this is a leaf function, first try an otherwise unused
2580
     call-clobbered register.  */
2581
  if (current_function_is_leaf)
2582
    {
2583
      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2584
        if (! df_regs_ever_live_p (regno)
2585
            && call_used_regs[regno]
2586
            && ! fixed_regs[regno]
2587
            && ! global_regs[regno]
2588
            && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2589
            && ! is_emitted (regno))
2590
          {
2591
            current_frame_info.gr_used_mask |= 1 << regno;
2592
            return regno;
2593
          }
2594
    }
2595
 
2596
  if (try_locals)
2597
    {
2598
      regno = current_frame_info.n_local_regs;
2599
      /* If there is a frame pointer, then we can't use loc79, because
2600
         that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2601
         reg_name switching code in ia64_expand_prologue.  */
2602
      while (regno < (80 - frame_pointer_needed))
2603
        if (! is_emitted (LOC_REG (regno++)))
2604
          {
2605
            current_frame_info.n_local_regs = regno;
2606
            return LOC_REG (regno - 1);
2607
          }
2608
    }
2609
 
2610
  /* Failed to find a general register to spill to.  Must use stack.  */
2611
  return 0;
2612
}
2613
 
2614
/* In order to make for nice schedules, we try to allocate every temporary
2615
   to a different register.  We must of course stay away from call-saved,
2616
   fixed, and global registers.  We must also stay away from registers
2617
   allocated in current_frame_info.gr_used_mask, since those include regs
2618
   used all through the prologue.
2619
 
2620
   Any register allocated here must be used immediately.  The idea is to
2621
   aid scheduling, not to solve data flow problems.  */
2622
 
2623
static int last_scratch_gr_reg;
2624
 
2625
static int
2626
next_scratch_gr_reg (void)
2627
{
2628
  int i, regno;
2629
 
2630
  for (i = 0; i < 32; ++i)
2631
    {
2632
      regno = (last_scratch_gr_reg + i + 1) & 31;
2633
      if (call_used_regs[regno]
2634
          && ! fixed_regs[regno]
2635
          && ! global_regs[regno]
2636
          && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2637
        {
2638
          last_scratch_gr_reg = regno;
2639
          return regno;
2640
        }
2641
    }
2642
 
2643
  /* There must be _something_ available.  */
2644
  gcc_unreachable ();
2645
}
2646
 
2647
/* Helper function for ia64_compute_frame_size, called through
2648
   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2649
 
2650
static void
2651
mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2652
{
2653
  unsigned int regno = REGNO (reg);
2654
  if (regno < 32)
2655
    {
2656
      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2657
      for (i = 0; i < n; ++i)
2658
        current_frame_info.gr_used_mask |= 1 << (regno + i);
2659
    }
2660
}
2661
 
2662
 
2663
/* Returns the number of bytes offset between the frame pointer and the stack
2664
   pointer for the current function.  SIZE is the number of bytes of space
2665
   needed for local variables.  */
2666
 
2667
static void
2668
ia64_compute_frame_size (HOST_WIDE_INT size)
2669
{
2670
  HOST_WIDE_INT total_size;
2671
  HOST_WIDE_INT spill_size = 0;
2672
  HOST_WIDE_INT extra_spill_size = 0;
2673
  HOST_WIDE_INT pretend_args_size;
2674
  HARD_REG_SET mask;
2675
  int n_spilled = 0;
2676
  int spilled_gr_p = 0;
2677
  int spilled_fr_p = 0;
2678
  unsigned int regno;
2679
  int min_regno;
2680
  int max_regno;
2681
  int i;
2682
 
2683
  if (current_frame_info.initialized)
2684
    return;
2685
 
2686
  memset (&current_frame_info, 0, sizeof current_frame_info);
2687
  CLEAR_HARD_REG_SET (mask);
2688
 
2689
  /* Don't allocate scratches to the return register.  */
2690
  diddle_return_value (mark_reg_gr_used_mask, NULL);
2691
 
2692
  /* Don't allocate scratches to the EH scratch registers.  */
2693
  if (cfun->machine->ia64_eh_epilogue_sp)
2694
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2695
  if (cfun->machine->ia64_eh_epilogue_bsp)
2696
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2697
 
2698
  /* Find the size of the register stack frame.  We have only 80 local
2699
     registers, because we reserve 8 for the inputs and 8 for the
2700
     outputs.  */
2701
 
2702
  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2703
     since we'll be adjusting that down later.  */
2704
  regno = LOC_REG (78) + ! frame_pointer_needed;
2705
  for (; regno >= LOC_REG (0); regno--)
2706
    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2707
      break;
2708
  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2709
 
2710
  /* For functions marked with the syscall_linkage attribute, we must mark
2711
     all eight input registers as in use, so that locals aren't visible to
2712
     the caller.  */
2713
 
2714
  if (cfun->machine->n_varargs > 0
2715
      || lookup_attribute ("syscall_linkage",
2716
                           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2717
    current_frame_info.n_input_regs = 8;
2718
  else
2719
    {
2720
      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2721
        if (df_regs_ever_live_p (regno))
2722
          break;
2723
      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2724
    }
2725
 
2726
  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2727
    if (df_regs_ever_live_p (regno))
2728
      break;
2729
  i = regno - OUT_REG (0) + 1;
2730
 
2731
#ifndef PROFILE_HOOK
2732
  /* When -p profiling, we need one output register for the mcount argument.
2733
     Likewise for -a profiling for the bb_init_func argument.  For -ax
2734
     profiling, we need two output registers for the two bb_init_trace_func
2735
     arguments.  */
2736
  if (crtl->profile)
2737
    i = MAX (i, 1);
2738
#endif
2739
  current_frame_info.n_output_regs = i;
2740
 
2741
  /* ??? No rotating register support yet.  */
2742
  current_frame_info.n_rotate_regs = 0;
2743
 
2744
  /* Discover which registers need spilling, and how much room that
2745
     will take.  Begin with floating point and general registers,
2746
     which will always wind up on the stack.  */
2747
 
2748
  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2749
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2750
      {
2751
        SET_HARD_REG_BIT (mask, regno);
2752
        spill_size += 16;
2753
        n_spilled += 1;
2754
        spilled_fr_p = 1;
2755
      }
2756
 
2757
  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2758
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2759
      {
2760
        SET_HARD_REG_BIT (mask, regno);
2761
        spill_size += 8;
2762
        n_spilled += 1;
2763
        spilled_gr_p = 1;
2764
      }
2765
 
2766
  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2767
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2768
      {
2769
        SET_HARD_REG_BIT (mask, regno);
2770
        spill_size += 8;
2771
        n_spilled += 1;
2772
      }
2773
 
2774
  /* Now come all special registers that might get saved in other
2775
     general registers.  */
2776
 
2777
  if (frame_pointer_needed)
2778
    {
2779
      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2780
      /* If we did not get a register, then we take LOC79.  This is guaranteed
2781
         to be free, even if regs_ever_live is already set, because this is
2782
         HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2783
         as we don't count loc79 above.  */
2784
      if (current_frame_info.r[reg_fp] == 0)
2785
        {
2786
          current_frame_info.r[reg_fp] = LOC_REG (79);
2787
          current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2788
        }
2789
    }
2790
 
2791
  if (! current_function_is_leaf)
2792
    {
2793
      /* Emit a save of BR0 if we call other functions.  Do this even
2794
         if this function doesn't return, as EH depends on this to be
2795
         able to unwind the stack.  */
2796
      SET_HARD_REG_BIT (mask, BR_REG (0));
2797
 
2798
      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2799
      if (current_frame_info.r[reg_save_b0] == 0)
2800
        {
2801
          extra_spill_size += 8;
2802
          n_spilled += 1;
2803
        }
2804
 
2805
      /* Similarly for ar.pfs.  */
2806
      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2807
      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2808
      if (current_frame_info.r[reg_save_ar_pfs] == 0)
2809
        {
2810
          extra_spill_size += 8;
2811
          n_spilled += 1;
2812
        }
2813
 
2814
      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2815
         registers are clobbered, so we fall back to the stack.  */
2816
      current_frame_info.r[reg_save_gp]
2817
        = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2818
      if (current_frame_info.r[reg_save_gp] == 0)
2819
        {
2820
          SET_HARD_REG_BIT (mask, GR_REG (1));
2821
          spill_size += 8;
2822
          n_spilled += 1;
2823
        }
2824
    }
2825
  else
2826
    {
2827
      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2828
        {
2829
          SET_HARD_REG_BIT (mask, BR_REG (0));
2830
          extra_spill_size += 8;
2831
          n_spilled += 1;
2832
        }
2833
 
2834
      if (df_regs_ever_live_p (AR_PFS_REGNUM))
2835
        {
2836
          SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2837
          current_frame_info.r[reg_save_ar_pfs]
2838
            = find_gr_spill (reg_save_ar_pfs, 1);
2839
          if (current_frame_info.r[reg_save_ar_pfs] == 0)
2840
            {
2841
              extra_spill_size += 8;
2842
              n_spilled += 1;
2843
            }
2844
        }
2845
    }
2846
 
2847
  /* Unwind descriptor hackery: things are most efficient if we allocate
2848
     consecutive GR save registers for RP, PFS, FP in that order. However,
2849
     it is absolutely critical that FP get the only hard register that's
2850
     guaranteed to be free, so we allocated it first.  If all three did
2851
     happen to be allocated hard regs, and are consecutive, rearrange them
2852
     into the preferred order now.
2853
 
2854
     If we have already emitted code for any of those registers,
2855
     then it's already too late to change.  */
2856
  min_regno = MIN (current_frame_info.r[reg_fp],
2857
                   MIN (current_frame_info.r[reg_save_b0],
2858
                        current_frame_info.r[reg_save_ar_pfs]));
2859
  max_regno = MAX (current_frame_info.r[reg_fp],
2860
                   MAX (current_frame_info.r[reg_save_b0],
2861
                        current_frame_info.r[reg_save_ar_pfs]));
2862
  if (min_regno > 0
2863
      && min_regno + 2 == max_regno
2864
      && (current_frame_info.r[reg_fp] == min_regno + 1
2865
          || current_frame_info.r[reg_save_b0] == min_regno + 1
2866
          || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2867
      && (emitted_frame_related_regs[reg_save_b0] == 0
2868
          || emitted_frame_related_regs[reg_save_b0] == min_regno)
2869
      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2870
          || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2871
      && (emitted_frame_related_regs[reg_fp] == 0
2872
          || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2873
    {
2874
      current_frame_info.r[reg_save_b0] = min_regno;
2875
      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2876
      current_frame_info.r[reg_fp] = min_regno + 2;
2877
    }
2878
 
2879
  /* See if we need to store the predicate register block.  */
2880
  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2881
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2882
      break;
2883
  if (regno <= PR_REG (63))
2884
    {
2885
      SET_HARD_REG_BIT (mask, PR_REG (0));
2886
      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2887
      if (current_frame_info.r[reg_save_pr] == 0)
2888
        {
2889
          extra_spill_size += 8;
2890
          n_spilled += 1;
2891
        }
2892
 
2893
      /* ??? Mark them all as used so that register renaming and such
2894
         are free to use them.  */
2895
      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2896
        df_set_regs_ever_live (regno, true);
2897
    }
2898
 
2899
  /* If we're forced to use st8.spill, we're forced to save and restore
2900
     ar.unat as well.  The check for existing liveness allows inline asm
2901
     to touch ar.unat.  */
2902
  if (spilled_gr_p || cfun->machine->n_varargs
2903
      || df_regs_ever_live_p (AR_UNAT_REGNUM))
2904
    {
2905
      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2906
      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2907
      current_frame_info.r[reg_save_ar_unat]
2908
        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2909
      if (current_frame_info.r[reg_save_ar_unat] == 0)
2910
        {
2911
          extra_spill_size += 8;
2912
          n_spilled += 1;
2913
        }
2914
    }
2915
 
2916
  if (df_regs_ever_live_p (AR_LC_REGNUM))
2917
    {
2918
      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2919
      current_frame_info.r[reg_save_ar_lc]
2920
        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2921
      if (current_frame_info.r[reg_save_ar_lc] == 0)
2922
        {
2923
          extra_spill_size += 8;
2924
          n_spilled += 1;
2925
        }
2926
    }
2927
 
2928
  /* If we have an odd number of words of pretend arguments written to
2929
     the stack, then the FR save area will be unaligned.  We round the
2930
     size of this area up to keep things 16 byte aligned.  */
2931
  if (spilled_fr_p)
2932
    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2933
  else
2934
    pretend_args_size = crtl->args.pretend_args_size;
2935
 
2936
  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2937
                + crtl->outgoing_args_size);
2938
  total_size = IA64_STACK_ALIGN (total_size);
2939
 
2940
  /* We always use the 16-byte scratch area provided by the caller, but
2941
     if we are a leaf function, there's no one to which we need to provide
2942
     a scratch area.  */
2943
  if (current_function_is_leaf)
2944
    total_size = MAX (0, total_size - 16);
2945
 
2946
  current_frame_info.total_size = total_size;
2947
  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2948
  current_frame_info.spill_size = spill_size;
2949
  current_frame_info.extra_spill_size = extra_spill_size;
2950
  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2951
  current_frame_info.n_spilled = n_spilled;
2952
  current_frame_info.initialized = reload_completed;
2953
}
2954
 
2955
/* Worker function for TARGET_CAN_ELIMINATE.  */
2956
 
2957
bool
2958
ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2959
{
2960
  return (to == BR_REG (0) ? current_function_is_leaf : true);
2961
}
2962
 
2963
/* Compute the initial difference between the specified pair of registers.  */
2964
 
2965
HOST_WIDE_INT
2966
ia64_initial_elimination_offset (int from, int to)
2967
{
2968
  HOST_WIDE_INT offset;
2969
 
2970
  ia64_compute_frame_size (get_frame_size ());
2971
  switch (from)
2972
    {
2973
    case FRAME_POINTER_REGNUM:
2974
      switch (to)
2975
        {
2976
        case HARD_FRAME_POINTER_REGNUM:
2977
          if (current_function_is_leaf)
2978
            offset = -current_frame_info.total_size;
2979
          else
2980
            offset = -(current_frame_info.total_size
2981
                       - crtl->outgoing_args_size - 16);
2982
          break;
2983
 
2984
        case STACK_POINTER_REGNUM:
2985
          if (current_function_is_leaf)
2986
            offset = 0;
2987
          else
2988
            offset = 16 + crtl->outgoing_args_size;
2989
          break;
2990
 
2991
        default:
2992
          gcc_unreachable ();
2993
        }
2994
      break;
2995
 
2996
    case ARG_POINTER_REGNUM:
2997
      /* Arguments start above the 16 byte save area, unless stdarg
2998
         in which case we store through the 16 byte save area.  */
2999
      switch (to)
3000
        {
3001
        case HARD_FRAME_POINTER_REGNUM:
3002
          offset = 16 - crtl->args.pretend_args_size;
3003
          break;
3004
 
3005
        case STACK_POINTER_REGNUM:
3006
          offset = (current_frame_info.total_size
3007
                    + 16 - crtl->args.pretend_args_size);
3008
          break;
3009
 
3010
        default:
3011
          gcc_unreachable ();
3012
        }
3013
      break;
3014
 
3015
    default:
3016
      gcc_unreachable ();
3017
    }
3018
 
3019
  return offset;
3020
}
3021
 
3022
/* If there are more than a trivial number of register spills, we use
3023
   two interleaved iterators so that we can get two memory references
3024
   per insn group.
3025
 
3026
   In order to simplify things in the prologue and epilogue expanders,
3027
   we use helper functions to fix up the memory references after the
3028
   fact with the appropriate offsets to a POST_MODIFY memory mode.
3029
   The following data structure tracks the state of the two iterators
3030
   while insns are being emitted.  */
3031
 
3032
struct spill_fill_data
3033
{
3034
  rtx init_after;               /* point at which to emit initializations */
3035
  rtx init_reg[2];              /* initial base register */
3036
  rtx iter_reg[2];              /* the iterator registers */
3037
  rtx *prev_addr[2];            /* address of last memory use */
3038
  rtx prev_insn[2];             /* the insn corresponding to prev_addr */
3039
  HOST_WIDE_INT prev_off[2];    /* last offset */
3040
  int n_iter;                   /* number of iterators in use */
3041
  int next_iter;                /* next iterator to use */
3042
  unsigned int save_gr_used_mask;
3043
};
3044
 
3045
static struct spill_fill_data spill_fill_data;
3046
 
3047
static void
3048
setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3049
{
3050
  int i;
3051
 
3052
  spill_fill_data.init_after = get_last_insn ();
3053
  spill_fill_data.init_reg[0] = init_reg;
3054
  spill_fill_data.init_reg[1] = init_reg;
3055
  spill_fill_data.prev_addr[0] = NULL;
3056
  spill_fill_data.prev_addr[1] = NULL;
3057
  spill_fill_data.prev_insn[0] = NULL;
3058
  spill_fill_data.prev_insn[1] = NULL;
3059
  spill_fill_data.prev_off[0] = cfa_off;
3060
  spill_fill_data.prev_off[1] = cfa_off;
3061
  spill_fill_data.next_iter = 0;
3062
  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3063
 
3064
  spill_fill_data.n_iter = 1 + (n_spills > 2);
3065
  for (i = 0; i < spill_fill_data.n_iter; ++i)
3066
    {
3067
      int regno = next_scratch_gr_reg ();
3068
      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3069
      current_frame_info.gr_used_mask |= 1 << regno;
3070
    }
3071
}
3072
 
3073
static void
3074
finish_spill_pointers (void)
3075
{
3076
  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3077
}
3078
 
3079
static rtx
3080
spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3081
{
3082
  int iter = spill_fill_data.next_iter;
3083
  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3084
  rtx disp_rtx = GEN_INT (disp);
3085
  rtx mem;
3086
 
3087
  if (spill_fill_data.prev_addr[iter])
3088
    {
3089
      if (satisfies_constraint_N (disp_rtx))
3090
        {
3091
          *spill_fill_data.prev_addr[iter]
3092
            = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3093
                                   gen_rtx_PLUS (DImode,
3094
                                                 spill_fill_data.iter_reg[iter],
3095
                                                 disp_rtx));
3096
          add_reg_note (spill_fill_data.prev_insn[iter],
3097
                        REG_INC, spill_fill_data.iter_reg[iter]);
3098
        }
3099
      else
3100
        {
3101
          /* ??? Could use register post_modify for loads.  */
3102
          if (!satisfies_constraint_I (disp_rtx))
3103
            {
3104
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3105
              emit_move_insn (tmp, disp_rtx);
3106
              disp_rtx = tmp;
3107
            }
3108
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3109
                                 spill_fill_data.iter_reg[iter], disp_rtx));
3110
        }
3111
    }
3112
  /* Micro-optimization: if we've created a frame pointer, it's at
3113
     CFA 0, which may allow the real iterator to be initialized lower,
3114
     slightly increasing parallelism.  Also, if there are few saves
3115
     it may eliminate the iterator entirely.  */
3116
  else if (disp == 0
3117
           && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3118
           && frame_pointer_needed)
3119
    {
3120
      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3121
      set_mem_alias_set (mem, get_varargs_alias_set ());
3122
      return mem;
3123
    }
3124
  else
3125
    {
3126
      rtx seq, insn;
3127
 
3128
      if (disp == 0)
3129
        seq = gen_movdi (spill_fill_data.iter_reg[iter],
3130
                         spill_fill_data.init_reg[iter]);
3131
      else
3132
        {
3133
          start_sequence ();
3134
 
3135
          if (!satisfies_constraint_I (disp_rtx))
3136
            {
3137
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3138
              emit_move_insn (tmp, disp_rtx);
3139
              disp_rtx = tmp;
3140
            }
3141
 
3142
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3143
                                 spill_fill_data.init_reg[iter],
3144
                                 disp_rtx));
3145
 
3146
          seq = get_insns ();
3147
          end_sequence ();
3148
        }
3149
 
3150
      /* Careful for being the first insn in a sequence.  */
3151
      if (spill_fill_data.init_after)
3152
        insn = emit_insn_after (seq, spill_fill_data.init_after);
3153
      else
3154
        {
3155
          rtx first = get_insns ();
3156
          if (first)
3157
            insn = emit_insn_before (seq, first);
3158
          else
3159
            insn = emit_insn (seq);
3160
        }
3161
      spill_fill_data.init_after = insn;
3162
    }
3163
 
3164
  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3165
 
3166
  /* ??? Not all of the spills are for varargs, but some of them are.
3167
     The rest of the spills belong in an alias set of their own.  But
3168
     it doesn't actually hurt to include them here.  */
3169
  set_mem_alias_set (mem, get_varargs_alias_set ());
3170
 
3171
  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3172
  spill_fill_data.prev_off[iter] = cfa_off;
3173
 
3174
  if (++iter >= spill_fill_data.n_iter)
3175
    iter = 0;
3176
  spill_fill_data.next_iter = iter;
3177
 
3178
  return mem;
3179
}
3180
 
3181
static void
3182
do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3183
          rtx frame_reg)
3184
{
3185
  int iter = spill_fill_data.next_iter;
3186
  rtx mem, insn;
3187
 
3188
  mem = spill_restore_mem (reg, cfa_off);
3189
  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3190
  spill_fill_data.prev_insn[iter] = insn;
3191
 
3192
  if (frame_reg)
3193
    {
3194
      rtx base;
3195
      HOST_WIDE_INT off;
3196
 
3197
      RTX_FRAME_RELATED_P (insn) = 1;
3198
 
3199
      /* Don't even pretend that the unwind code can intuit its way
3200
         through a pair of interleaved post_modify iterators.  Just
3201
         provide the correct answer.  */
3202
 
3203
      if (frame_pointer_needed)
3204
        {
3205
          base = hard_frame_pointer_rtx;
3206
          off = - cfa_off;
3207
        }
3208
      else
3209
        {
3210
          base = stack_pointer_rtx;
3211
          off = current_frame_info.total_size - cfa_off;
3212
        }
3213
 
3214
      add_reg_note (insn, REG_CFA_OFFSET,
3215
                    gen_rtx_SET (VOIDmode,
3216
                                 gen_rtx_MEM (GET_MODE (reg),
3217
                                              plus_constant (base, off)),
3218
                                 frame_reg));
3219
    }
3220
}
3221
 
3222
static void
3223
do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3224
{
3225
  int iter = spill_fill_data.next_iter;
3226
  rtx insn;
3227
 
3228
  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3229
                                GEN_INT (cfa_off)));
3230
  spill_fill_data.prev_insn[iter] = insn;
3231
}
3232
 
3233
/* Wrapper functions that discards the CONST_INT spill offset.  These
3234
   exist so that we can give gr_spill/gr_fill the offset they need and
3235
   use a consistent function interface.  */
3236
 
3237
static rtx
3238
gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3239
{
3240
  return gen_movdi (dest, src);
3241
}
3242
 
3243
static rtx
3244
gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3245
{
3246
  return gen_fr_spill (dest, src);
3247
}
3248
 
3249
static rtx
3250
gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3251
{
3252
  return gen_fr_restore (dest, src);
3253
}
3254
 
3255
/* Called after register allocation to add any instructions needed for the
3256
   prologue.  Using a prologue insn is favored compared to putting all of the
3257
   instructions in output_function_prologue(), since it allows the scheduler
3258
   to intermix instructions with the saves of the caller saved registers.  In
3259
   some cases, it might be necessary to emit a barrier instruction as the last
3260
   insn to prevent such scheduling.
3261
 
3262
   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3263
   so that the debug info generation code can handle them properly.
3264
 
3265
   The register save area is layed out like so:
3266
   cfa+16
3267
        [ varargs spill area ]
3268
        [ fr register spill area ]
3269
        [ br register spill area ]
3270
        [ ar register spill area ]
3271
        [ pr register spill area ]
3272
        [ gr register spill area ] */
3273
 
3274
/* ??? Get inefficient code when the frame size is larger than can fit in an
3275
   adds instruction.  */
3276
 
3277
void
3278
ia64_expand_prologue (void)
3279
{
3280
  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3281
  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3282
  rtx reg, alt_reg;
3283
 
3284
  ia64_compute_frame_size (get_frame_size ());
3285
  last_scratch_gr_reg = 15;
3286
 
3287
  if (flag_stack_usage_info)
3288
    current_function_static_stack_size = current_frame_info.total_size;
3289
 
3290
  if (dump_file)
3291
    {
3292
      fprintf (dump_file, "ia64 frame related registers "
3293
               "recorded in current_frame_info.r[]:\n");
3294
#define PRINTREG(a) if (current_frame_info.r[a]) \
3295
        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3296
      PRINTREG(reg_fp);
3297
      PRINTREG(reg_save_b0);
3298
      PRINTREG(reg_save_pr);
3299
      PRINTREG(reg_save_ar_pfs);
3300
      PRINTREG(reg_save_ar_unat);
3301
      PRINTREG(reg_save_ar_lc);
3302
      PRINTREG(reg_save_gp);
3303
#undef PRINTREG
3304
    }
3305
 
3306
  /* If there is no epilogue, then we don't need some prologue insns.
3307
     We need to avoid emitting the dead prologue insns, because flow
3308
     will complain about them.  */
3309
  if (optimize)
3310
    {
3311
      edge e;
3312
      edge_iterator ei;
3313
 
3314
      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3315
        if ((e->flags & EDGE_FAKE) == 0
3316
            && (e->flags & EDGE_FALLTHRU) != 0)
3317
          break;
3318
      epilogue_p = (e != NULL);
3319
    }
3320
  else
3321
    epilogue_p = 1;
3322
 
3323
  /* Set the local, input, and output register names.  We need to do this
3324
     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3325
     half.  If we use in/loc/out register names, then we get assembler errors
3326
     in crtn.S because there is no alloc insn or regstk directive in there.  */
3327
  if (! TARGET_REG_NAMES)
3328
    {
3329
      int inputs = current_frame_info.n_input_regs;
3330
      int locals = current_frame_info.n_local_regs;
3331
      int outputs = current_frame_info.n_output_regs;
3332
 
3333
      for (i = 0; i < inputs; i++)
3334
        reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3335
      for (i = 0; i < locals; i++)
3336
        reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3337
      for (i = 0; i < outputs; i++)
3338
        reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3339
    }
3340
 
3341
  /* Set the frame pointer register name.  The regnum is logically loc79,
3342
     but of course we'll not have allocated that many locals.  Rather than
3343
     worrying about renumbering the existing rtxs, we adjust the name.  */
3344
  /* ??? This code means that we can never use one local register when
3345
     there is a frame pointer.  loc79 gets wasted in this case, as it is
3346
     renamed to a register that will never be used.  See also the try_locals
3347
     code in find_gr_spill.  */
3348
  if (current_frame_info.r[reg_fp])
3349
    {
3350
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3351
      reg_names[HARD_FRAME_POINTER_REGNUM]
3352
        = reg_names[current_frame_info.r[reg_fp]];
3353
      reg_names[current_frame_info.r[reg_fp]] = tmp;
3354
    }
3355
 
3356
  /* We don't need an alloc instruction if we've used no outputs or locals.  */
3357
  if (current_frame_info.n_local_regs == 0
3358
      && current_frame_info.n_output_regs == 0
3359
      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3360
      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3361
    {
3362
      /* If there is no alloc, but there are input registers used, then we
3363
         need a .regstk directive.  */
3364
      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3365
      ar_pfs_save_reg = NULL_RTX;
3366
    }
3367
  else
3368
    {
3369
      current_frame_info.need_regstk = 0;
3370
 
3371
      if (current_frame_info.r[reg_save_ar_pfs])
3372
        {
3373
          regno = current_frame_info.r[reg_save_ar_pfs];
3374
          reg_emitted (reg_save_ar_pfs);
3375
        }
3376
      else
3377
        regno = next_scratch_gr_reg ();
3378
      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3379
 
3380
      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3381
                                   GEN_INT (current_frame_info.n_input_regs),
3382
                                   GEN_INT (current_frame_info.n_local_regs),
3383
                                   GEN_INT (current_frame_info.n_output_regs),
3384
                                   GEN_INT (current_frame_info.n_rotate_regs)));
3385
      if (current_frame_info.r[reg_save_ar_pfs])
3386
        {
3387
          RTX_FRAME_RELATED_P (insn) = 1;
3388
          add_reg_note (insn, REG_CFA_REGISTER,
3389
                        gen_rtx_SET (VOIDmode,
3390
                                     ar_pfs_save_reg,
3391
                                     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3392
        }
3393
    }
3394
 
3395
  /* Set up frame pointer, stack pointer, and spill iterators.  */
3396
 
3397
  n_varargs = cfun->machine->n_varargs;
3398
  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3399
                        stack_pointer_rtx, 0);
3400
 
3401
  if (frame_pointer_needed)
3402
    {
3403
      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3404
      RTX_FRAME_RELATED_P (insn) = 1;
3405
 
3406
      /* Force the unwind info to recognize this as defining a new CFA,
3407
         rather than some temp register setup.  */
3408
      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3409
    }
3410
 
3411
  if (current_frame_info.total_size != 0)
3412
    {
3413
      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3414
      rtx offset;
3415
 
3416
      if (satisfies_constraint_I (frame_size_rtx))
3417
        offset = frame_size_rtx;
3418
      else
3419
        {
3420
          regno = next_scratch_gr_reg ();
3421
          offset = gen_rtx_REG (DImode, regno);
3422
          emit_move_insn (offset, frame_size_rtx);
3423
        }
3424
 
3425
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3426
                                    stack_pointer_rtx, offset));
3427
 
3428
      if (! frame_pointer_needed)
3429
        {
3430
          RTX_FRAME_RELATED_P (insn) = 1;
3431
          add_reg_note (insn, REG_CFA_ADJUST_CFA,
3432
                        gen_rtx_SET (VOIDmode,
3433
                                     stack_pointer_rtx,
3434
                                     gen_rtx_PLUS (DImode,
3435
                                                   stack_pointer_rtx,
3436
                                                   frame_size_rtx)));
3437
        }
3438
 
3439
      /* ??? At this point we must generate a magic insn that appears to
3440
         modify the stack pointer, the frame pointer, and all spill
3441
         iterators.  This would allow the most scheduling freedom.  For
3442
         now, just hard stop.  */
3443
      emit_insn (gen_blockage ());
3444
    }
3445
 
3446
  /* Must copy out ar.unat before doing any integer spills.  */
3447
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3448
    {
3449
      if (current_frame_info.r[reg_save_ar_unat])
3450
        {
3451
          ar_unat_save_reg
3452
            = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3453
          reg_emitted (reg_save_ar_unat);
3454
        }
3455
      else
3456
        {
3457
          alt_regno = next_scratch_gr_reg ();
3458
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3459
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3460
        }
3461
 
3462
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3463
      insn = emit_move_insn (ar_unat_save_reg, reg);
3464
      if (current_frame_info.r[reg_save_ar_unat])
3465
        {
3466
          RTX_FRAME_RELATED_P (insn) = 1;
3467
          add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3468
        }
3469
 
3470
      /* Even if we're not going to generate an epilogue, we still
3471
         need to save the register so that EH works.  */
3472
      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3473
        emit_insn (gen_prologue_use (ar_unat_save_reg));
3474
    }
3475
  else
3476
    ar_unat_save_reg = NULL_RTX;
3477
 
3478
  /* Spill all varargs registers.  Do this before spilling any GR registers,
3479
     since we want the UNAT bits for the GR registers to override the UNAT
3480
     bits from varargs, which we don't care about.  */
3481
 
3482
  cfa_off = -16;
3483
  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3484
    {
3485
      reg = gen_rtx_REG (DImode, regno);
3486
      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3487
    }
3488
 
3489
  /* Locate the bottom of the register save area.  */
3490
  cfa_off = (current_frame_info.spill_cfa_off
3491
             + current_frame_info.spill_size
3492
             + current_frame_info.extra_spill_size);
3493
 
3494
  /* Save the predicate register block either in a register or in memory.  */
3495
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3496
    {
3497
      reg = gen_rtx_REG (DImode, PR_REG (0));
3498
      if (current_frame_info.r[reg_save_pr] != 0)
3499
        {
3500
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3501
          reg_emitted (reg_save_pr);
3502
          insn = emit_move_insn (alt_reg, reg);
3503
 
3504
          /* ??? Denote pr spill/fill by a DImode move that modifies all
3505
             64 hard registers.  */
3506
          RTX_FRAME_RELATED_P (insn) = 1;
3507
          add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3508
 
3509
          /* Even if we're not going to generate an epilogue, we still
3510
             need to save the register so that EH works.  */
3511
          if (! epilogue_p)
3512
            emit_insn (gen_prologue_use (alt_reg));
3513
        }
3514
      else
3515
        {
3516
          alt_regno = next_scratch_gr_reg ();
3517
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3518
          insn = emit_move_insn (alt_reg, reg);
3519
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3520
          cfa_off -= 8;
3521
        }
3522
    }
3523
 
3524
  /* Handle AR regs in numerical order.  All of them get special handling.  */
3525
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3526
      && current_frame_info.r[reg_save_ar_unat] == 0)
3527
    {
3528
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3529
      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3530
      cfa_off -= 8;
3531
    }
3532
 
3533
  /* The alloc insn already copied ar.pfs into a general register.  The
3534
     only thing we have to do now is copy that register to a stack slot
3535
     if we'd not allocated a local register for the job.  */
3536
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3537
      && current_frame_info.r[reg_save_ar_pfs] == 0)
3538
    {
3539
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3540
      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3541
      cfa_off -= 8;
3542
    }
3543
 
3544
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3545
    {
3546
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3547
      if (current_frame_info.r[reg_save_ar_lc] != 0)
3548
        {
3549
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3550
          reg_emitted (reg_save_ar_lc);
3551
          insn = emit_move_insn (alt_reg, reg);
3552
          RTX_FRAME_RELATED_P (insn) = 1;
3553
          add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3554
 
3555
          /* Even if we're not going to generate an epilogue, we still
3556
             need to save the register so that EH works.  */
3557
          if (! epilogue_p)
3558
            emit_insn (gen_prologue_use (alt_reg));
3559
        }
3560
      else
3561
        {
3562
          alt_regno = next_scratch_gr_reg ();
3563
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3564
          emit_move_insn (alt_reg, reg);
3565
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3566
          cfa_off -= 8;
3567
        }
3568
    }
3569
 
3570
  /* Save the return pointer.  */
3571
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3572
    {
3573
      reg = gen_rtx_REG (DImode, BR_REG (0));
3574
      if (current_frame_info.r[reg_save_b0] != 0)
3575
        {
3576
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3577
          reg_emitted (reg_save_b0);
3578
          insn = emit_move_insn (alt_reg, reg);
3579
          RTX_FRAME_RELATED_P (insn) = 1;
3580
          add_reg_note (insn, REG_CFA_REGISTER,
3581
                        gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
3582
 
3583
          /* Even if we're not going to generate an epilogue, we still
3584
             need to save the register so that EH works.  */
3585
          if (! epilogue_p)
3586
            emit_insn (gen_prologue_use (alt_reg));
3587
        }
3588
      else
3589
        {
3590
          alt_regno = next_scratch_gr_reg ();
3591
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3592
          emit_move_insn (alt_reg, reg);
3593
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3594
          cfa_off -= 8;
3595
        }
3596
    }
3597
 
3598
  if (current_frame_info.r[reg_save_gp])
3599
    {
3600
      reg_emitted (reg_save_gp);
3601
      insn = emit_move_insn (gen_rtx_REG (DImode,
3602
                                          current_frame_info.r[reg_save_gp]),
3603
                             pic_offset_table_rtx);
3604
    }
3605
 
3606
  /* We should now be at the base of the gr/br/fr spill area.  */
3607
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3608
                          + current_frame_info.spill_size));
3609
 
3610
  /* Spill all general registers.  */
3611
  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3612
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3613
      {
3614
        reg = gen_rtx_REG (DImode, regno);
3615
        do_spill (gen_gr_spill, reg, cfa_off, reg);
3616
        cfa_off -= 8;
3617
      }
3618
 
3619
  /* Spill the rest of the BR registers.  */
3620
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3621
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3622
      {
3623
        alt_regno = next_scratch_gr_reg ();
3624
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3625
        reg = gen_rtx_REG (DImode, regno);
3626
        emit_move_insn (alt_reg, reg);
3627
        do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3628
        cfa_off -= 8;
3629
      }
3630
 
3631
  /* Align the frame and spill all FR registers.  */
3632
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3633
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3634
      {
3635
        gcc_assert (!(cfa_off & 15));
3636
        reg = gen_rtx_REG (XFmode, regno);
3637
        do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3638
        cfa_off -= 16;
3639
      }
3640
 
3641
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3642
 
3643
  finish_spill_pointers ();
3644
}
3645
 
3646
/* Output the textual info surrounding the prologue.  */
3647
 
3648
void
3649
ia64_start_function (FILE *file, const char *fnname,
3650
                     tree decl ATTRIBUTE_UNUSED)
3651
{
3652
#if VMS_DEBUGGING_INFO
3653
  if (vms_debug_main
3654
      && debug_info_level > DINFO_LEVEL_NONE
3655
      && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3656
    {
3657
      targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3658
      ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3659
      dwarf2out_vms_debug_main_pointer ();
3660
      vms_debug_main = 0;
3661
    }
3662
#endif
3663
 
3664
  fputs ("\t.proc ", file);
3665
  assemble_name (file, fnname);
3666
  fputc ('\n', file);
3667
  ASM_OUTPUT_LABEL (file, fnname);
3668
}
3669
 
3670
/* Called after register allocation to add any instructions needed for the
3671
   epilogue.  Using an epilogue insn is favored compared to putting all of the
3672
   instructions in output_function_prologue(), since it allows the scheduler
3673
   to intermix instructions with the saves of the caller saved registers.  In
3674
   some cases, it might be necessary to emit a barrier instruction as the last
3675
   insn to prevent such scheduling.  */
3676
 
3677
void
3678
ia64_expand_epilogue (int sibcall_p)
3679
{
3680
  rtx insn, reg, alt_reg, ar_unat_save_reg;
3681
  int regno, alt_regno, cfa_off;
3682
 
3683
  ia64_compute_frame_size (get_frame_size ());
3684
 
3685
  /* If there is a frame pointer, then we use it instead of the stack
3686
     pointer, so that the stack pointer does not need to be valid when
3687
     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3688
  if (frame_pointer_needed)
3689
    setup_spill_pointers (current_frame_info.n_spilled,
3690
                          hard_frame_pointer_rtx, 0);
3691
  else
3692
    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3693
                          current_frame_info.total_size);
3694
 
3695
  if (current_frame_info.total_size != 0)
3696
    {
3697
      /* ??? At this point we must generate a magic insn that appears to
3698
         modify the spill iterators and the frame pointer.  This would
3699
         allow the most scheduling freedom.  For now, just hard stop.  */
3700
      emit_insn (gen_blockage ());
3701
    }
3702
 
3703
  /* Locate the bottom of the register save area.  */
3704
  cfa_off = (current_frame_info.spill_cfa_off
3705
             + current_frame_info.spill_size
3706
             + current_frame_info.extra_spill_size);
3707
 
3708
  /* Restore the predicate registers.  */
3709
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3710
    {
3711
      if (current_frame_info.r[reg_save_pr] != 0)
3712
        {
3713
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3714
          reg_emitted (reg_save_pr);
3715
        }
3716
      else
3717
        {
3718
          alt_regno = next_scratch_gr_reg ();
3719
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3720
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3721
          cfa_off -= 8;
3722
        }
3723
      reg = gen_rtx_REG (DImode, PR_REG (0));
3724
      emit_move_insn (reg, alt_reg);
3725
    }
3726
 
3727
  /* Restore the application registers.  */
3728
 
3729
  /* Load the saved unat from the stack, but do not restore it until
3730
     after the GRs have been restored.  */
3731
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3732
    {
3733
      if (current_frame_info.r[reg_save_ar_unat] != 0)
3734
        {
3735
          ar_unat_save_reg
3736
            = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3737
          reg_emitted (reg_save_ar_unat);
3738
        }
3739
      else
3740
        {
3741
          alt_regno = next_scratch_gr_reg ();
3742
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3743
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3744
          do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3745
          cfa_off -= 8;
3746
        }
3747
    }
3748
  else
3749
    ar_unat_save_reg = NULL_RTX;
3750
 
3751
  if (current_frame_info.r[reg_save_ar_pfs] != 0)
3752
    {
3753
      reg_emitted (reg_save_ar_pfs);
3754
      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3755
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3756
      emit_move_insn (reg, alt_reg);
3757
    }
3758
  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3759
    {
3760
      alt_regno = next_scratch_gr_reg ();
3761
      alt_reg = gen_rtx_REG (DImode, alt_regno);
3762
      do_restore (gen_movdi_x, alt_reg, cfa_off);
3763
      cfa_off -= 8;
3764
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3765
      emit_move_insn (reg, alt_reg);
3766
    }
3767
 
3768
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3769
    {
3770
      if (current_frame_info.r[reg_save_ar_lc] != 0)
3771
        {
3772
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3773
          reg_emitted (reg_save_ar_lc);
3774
        }
3775
      else
3776
        {
3777
          alt_regno = next_scratch_gr_reg ();
3778
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3779
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3780
          cfa_off -= 8;
3781
        }
3782
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3783
      emit_move_insn (reg, alt_reg);
3784
    }
3785
 
3786
  /* Restore the return pointer.  */
3787
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3788
    {
3789
      if (current_frame_info.r[reg_save_b0] != 0)
3790
        {
3791
         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3792
         reg_emitted (reg_save_b0);
3793
        }
3794
      else
3795
        {
3796
          alt_regno = next_scratch_gr_reg ();
3797
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3798
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3799
          cfa_off -= 8;
3800
        }
3801
      reg = gen_rtx_REG (DImode, BR_REG (0));
3802
      emit_move_insn (reg, alt_reg);
3803
    }
3804
 
3805
  /* We should now be at the base of the gr/br/fr spill area.  */
3806
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3807
                          + current_frame_info.spill_size));
3808
 
3809
  /* The GP may be stored on the stack in the prologue, but it's
3810
     never restored in the epilogue.  Skip the stack slot.  */
3811
  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3812
    cfa_off -= 8;
3813
 
3814
  /* Restore all general registers.  */
3815
  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3816
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3817
      {
3818
        reg = gen_rtx_REG (DImode, regno);
3819
        do_restore (gen_gr_restore, reg, cfa_off);
3820
        cfa_off -= 8;
3821
      }
3822
 
3823
  /* Restore the branch registers.  */
3824
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3825
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3826
      {
3827
        alt_regno = next_scratch_gr_reg ();
3828
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3829
        do_restore (gen_movdi_x, alt_reg, cfa_off);
3830
        cfa_off -= 8;
3831
        reg = gen_rtx_REG (DImode, regno);
3832
        emit_move_insn (reg, alt_reg);
3833
      }
3834
 
3835
  /* Restore floating point registers.  */
3836
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3837
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3838
      {
3839
        gcc_assert (!(cfa_off & 15));
3840
        reg = gen_rtx_REG (XFmode, regno);
3841
        do_restore (gen_fr_restore_x, reg, cfa_off);
3842
        cfa_off -= 16;
3843
      }
3844
 
3845
  /* Restore ar.unat for real.  */
3846
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3847
    {
3848
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3849
      emit_move_insn (reg, ar_unat_save_reg);
3850
    }
3851
 
3852
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3853
 
3854
  finish_spill_pointers ();
3855
 
3856
  if (current_frame_info.total_size
3857
      || cfun->machine->ia64_eh_epilogue_sp
3858
      || frame_pointer_needed)
3859
    {
3860
      /* ??? At this point we must generate a magic insn that appears to
3861
         modify the spill iterators, the stack pointer, and the frame
3862
         pointer.  This would allow the most scheduling freedom.  For now,
3863
         just hard stop.  */
3864
      emit_insn (gen_blockage ());
3865
    }
3866
 
3867
  if (cfun->machine->ia64_eh_epilogue_sp)
3868
    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3869
  else if (frame_pointer_needed)
3870
    {
3871
      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3872
      RTX_FRAME_RELATED_P (insn) = 1;
3873
      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
3874
    }
3875
  else if (current_frame_info.total_size)
3876
    {
3877
      rtx offset, frame_size_rtx;
3878
 
3879
      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3880
      if (satisfies_constraint_I (frame_size_rtx))
3881
        offset = frame_size_rtx;
3882
      else
3883
        {
3884
          regno = next_scratch_gr_reg ();
3885
          offset = gen_rtx_REG (DImode, regno);
3886
          emit_move_insn (offset, frame_size_rtx);
3887
        }
3888
 
3889
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3890
                                    offset));
3891
 
3892
      RTX_FRAME_RELATED_P (insn) = 1;
3893
      add_reg_note (insn, REG_CFA_ADJUST_CFA,
3894
                    gen_rtx_SET (VOIDmode,
3895
                                 stack_pointer_rtx,
3896
                                 gen_rtx_PLUS (DImode,
3897
                                               stack_pointer_rtx,
3898
                                               frame_size_rtx)));
3899
    }
3900
 
3901
  if (cfun->machine->ia64_eh_epilogue_bsp)
3902
    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3903
 
3904
  if (! sibcall_p)
3905
    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3906
  else
3907
    {
3908
      int fp = GR_REG (2);
3909
      /* We need a throw away register here, r0 and r1 are reserved,
3910
         so r2 is the first available call clobbered register.  If
3911
         there was a frame_pointer register, we may have swapped the
3912
         names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
3913
         sure we're using the string "r2" when emitting the register
3914
         name for the assembler.  */
3915
      if (current_frame_info.r[reg_fp]
3916
          && current_frame_info.r[reg_fp] == GR_REG (2))
3917
        fp = HARD_FRAME_POINTER_REGNUM;
3918
 
3919
      /* We must emit an alloc to force the input registers to become output
3920
         registers.  Otherwise, if the callee tries to pass its parameters
3921
         through to another call without an intervening alloc, then these
3922
         values get lost.  */
3923
      /* ??? We don't need to preserve all input registers.  We only need to
3924
         preserve those input registers used as arguments to the sibling call.
3925
         It is unclear how to compute that number here.  */
3926
      if (current_frame_info.n_input_regs != 0)
3927
        {
3928
          rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3929
 
3930
          insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3931
                                const0_rtx, const0_rtx,
3932
                                n_inputs, const0_rtx));
3933
          RTX_FRAME_RELATED_P (insn) = 1;
3934
 
3935
          /* ??? We need to mark the alloc as frame-related so that it gets
3936
             passed into ia64_asm_unwind_emit for ia64-specific unwinding.
3937
             But there's nothing dwarf2 related to be done wrt the register
3938
             windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
3939
             the empty parallel means dwarf2out will not see anything.  */
3940
          add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3941
                        gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
3942
        }
3943
    }
3944
}
3945
 
3946
/* Return 1 if br.ret can do all the work required to return from a
3947
   function.  */
3948
 
3949
int
3950
ia64_direct_return (void)
3951
{
3952
  if (reload_completed && ! frame_pointer_needed)
3953
    {
3954
      ia64_compute_frame_size (get_frame_size ());
3955
 
3956
      return (current_frame_info.total_size == 0
3957
              && current_frame_info.n_spilled == 0
3958
              && current_frame_info.r[reg_save_b0] == 0
3959
              && current_frame_info.r[reg_save_pr] == 0
3960
              && current_frame_info.r[reg_save_ar_pfs] == 0
3961
              && current_frame_info.r[reg_save_ar_unat] == 0
3962
              && current_frame_info.r[reg_save_ar_lc] == 0);
3963
    }
3964
  return 0;
3965
}
3966
 
3967
/* Return the magic cookie that we use to hold the return address
3968
   during early compilation.  */
3969
 
3970
rtx
3971
ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3972
{
3973
  if (count != 0)
3974
    return NULL;
3975
  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3976
}
3977
 
3978
/* Split this value after reload, now that we know where the return
3979
   address is saved.  */
3980
 
3981
void
3982
ia64_split_return_addr_rtx (rtx dest)
3983
{
3984
  rtx src;
3985
 
3986
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3987
    {
3988
      if (current_frame_info.r[reg_save_b0] != 0)
3989
        {
3990
          src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3991
          reg_emitted (reg_save_b0);
3992
        }
3993
      else
3994
        {
3995
          HOST_WIDE_INT off;
3996
          unsigned int regno;
3997
          rtx off_r;
3998
 
3999
          /* Compute offset from CFA for BR0.  */
4000
          /* ??? Must be kept in sync with ia64_expand_prologue.  */
4001
          off = (current_frame_info.spill_cfa_off
4002
                 + current_frame_info.spill_size);
4003
          for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4004
            if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4005
              off -= 8;
4006
 
4007
          /* Convert CFA offset to a register based offset.  */
4008
          if (frame_pointer_needed)
4009
            src = hard_frame_pointer_rtx;
4010
          else
4011
            {
4012
              src = stack_pointer_rtx;
4013
              off += current_frame_info.total_size;
4014
            }
4015
 
4016
          /* Load address into scratch register.  */
4017
          off_r = GEN_INT (off);
4018
          if (satisfies_constraint_I (off_r))
4019
            emit_insn (gen_adddi3 (dest, src, off_r));
4020
          else
4021
            {
4022
              emit_move_insn (dest, off_r);
4023
              emit_insn (gen_adddi3 (dest, src, dest));
4024
            }
4025
 
4026
          src = gen_rtx_MEM (Pmode, dest);
4027
        }
4028
    }
4029
  else
4030
    src = gen_rtx_REG (DImode, BR_REG (0));
4031
 
4032
  emit_move_insn (dest, src);
4033
}
4034
 
4035
int
4036
ia64_hard_regno_rename_ok (int from, int to)
4037
{
4038
  /* Don't clobber any of the registers we reserved for the prologue.  */
4039
  unsigned int r;
4040
 
4041
  for (r = reg_fp; r <= reg_save_ar_lc; r++)
4042
    if (to == current_frame_info.r[r]
4043
        || from == current_frame_info.r[r]
4044
        || to == emitted_frame_related_regs[r]
4045
        || from == emitted_frame_related_regs[r])
4046
      return 0;
4047
 
4048
  /* Don't use output registers outside the register frame.  */
4049
  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4050
    return 0;
4051
 
4052
  /* Retain even/oddness on predicate register pairs.  */
4053
  if (PR_REGNO_P (from) && PR_REGNO_P (to))
4054
    return (from & 1) == (to & 1);
4055
 
4056
  return 1;
4057
}
4058
 
4059
/* Target hook for assembling integer objects.  Handle word-sized
4060
   aligned objects and detect the cases when @fptr is needed.  */
4061
 
4062
static bool
4063
ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4064
{
4065
  if (size == POINTER_SIZE / BITS_PER_UNIT
4066
      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4067
      && GET_CODE (x) == SYMBOL_REF
4068
      && SYMBOL_REF_FUNCTION_P (x))
4069
    {
4070
      static const char * const directive[2][2] = {
4071
          /* 64-bit pointer */  /* 32-bit pointer */
4072
        { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},  /* unaligned */
4073
        { "\tdata8\t@fptr(",    "\tdata4\t@fptr("}      /* aligned */
4074
      };
4075
      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4076
      output_addr_const (asm_out_file, x);
4077
      fputs (")\n", asm_out_file);
4078
      return true;
4079
    }
4080
  return default_assemble_integer (x, size, aligned_p);
4081
}
4082
 
4083
/* Emit the function prologue.  */
4084
 
4085
static void
4086
ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4087
{
4088
  int mask, grsave, grsave_prev;
4089
 
4090
  if (current_frame_info.need_regstk)
4091
    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4092
             current_frame_info.n_input_regs,
4093
             current_frame_info.n_local_regs,
4094
             current_frame_info.n_output_regs,
4095
             current_frame_info.n_rotate_regs);
4096
 
4097
  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4098
    return;
4099
 
4100
  /* Emit the .prologue directive.  */
4101
 
4102
  mask = 0;
4103
  grsave = grsave_prev = 0;
4104
  if (current_frame_info.r[reg_save_b0] != 0)
4105
    {
4106
      mask |= 8;
4107
      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4108
    }
4109
  if (current_frame_info.r[reg_save_ar_pfs] != 0
4110
      && (grsave_prev == 0
4111
          || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4112
    {
4113
      mask |= 4;
4114
      if (grsave_prev == 0)
4115
        grsave = current_frame_info.r[reg_save_ar_pfs];
4116
      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4117
    }
4118
  if (current_frame_info.r[reg_fp] != 0
4119
      && (grsave_prev == 0
4120
          || current_frame_info.r[reg_fp] == grsave_prev + 1))
4121
    {
4122
      mask |= 2;
4123
      if (grsave_prev == 0)
4124
        grsave = HARD_FRAME_POINTER_REGNUM;
4125
      grsave_prev = current_frame_info.r[reg_fp];
4126
    }
4127
  if (current_frame_info.r[reg_save_pr] != 0
4128
      && (grsave_prev == 0
4129
          || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4130
    {
4131
      mask |= 1;
4132
      if (grsave_prev == 0)
4133
        grsave = current_frame_info.r[reg_save_pr];
4134
    }
4135
 
4136
  if (mask && TARGET_GNU_AS)
4137
    fprintf (file, "\t.prologue %d, %d\n", mask,
4138
             ia64_dbx_register_number (grsave));
4139
  else
4140
    fputs ("\t.prologue\n", file);
4141
 
4142
  /* Emit a .spill directive, if necessary, to relocate the base of
4143
     the register spill area.  */
4144
  if (current_frame_info.spill_cfa_off != -16)
4145
    fprintf (file, "\t.spill %ld\n",
4146
             (long) (current_frame_info.spill_cfa_off
4147
                     + current_frame_info.spill_size));
4148
}
4149
 
4150
/* Emit the .body directive at the scheduled end of the prologue.  */
4151
 
4152
static void
4153
ia64_output_function_end_prologue (FILE *file)
4154
{
4155
  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4156
    return;
4157
 
4158
  fputs ("\t.body\n", file);
4159
}
4160
 
4161
/* Emit the function epilogue.  */
4162
 
4163
static void
4164
ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4165
                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4166
{
4167
  int i;
4168
 
4169
  if (current_frame_info.r[reg_fp])
4170
    {
4171
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4172
      reg_names[HARD_FRAME_POINTER_REGNUM]
4173
        = reg_names[current_frame_info.r[reg_fp]];
4174
      reg_names[current_frame_info.r[reg_fp]] = tmp;
4175
      reg_emitted (reg_fp);
4176
    }
4177
  if (! TARGET_REG_NAMES)
4178
    {
4179
      for (i = 0; i < current_frame_info.n_input_regs; i++)
4180
        reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4181
      for (i = 0; i < current_frame_info.n_local_regs; i++)
4182
        reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4183
      for (i = 0; i < current_frame_info.n_output_regs; i++)
4184
        reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4185
    }
4186
 
4187
  current_frame_info.initialized = 0;
4188
}
4189
 
4190
int
4191
ia64_dbx_register_number (int regno)
4192
{
4193
  /* In ia64_expand_prologue we quite literally renamed the frame pointer
4194
     from its home at loc79 to something inside the register frame.  We
4195
     must perform the same renumbering here for the debug info.  */
4196
  if (current_frame_info.r[reg_fp])
4197
    {
4198
      if (regno == HARD_FRAME_POINTER_REGNUM)
4199
        regno = current_frame_info.r[reg_fp];
4200
      else if (regno == current_frame_info.r[reg_fp])
4201
        regno = HARD_FRAME_POINTER_REGNUM;
4202
    }
4203
 
4204
  if (IN_REGNO_P (regno))
4205
    return 32 + regno - IN_REG (0);
4206
  else if (LOC_REGNO_P (regno))
4207
    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4208
  else if (OUT_REGNO_P (regno))
4209
    return (32 + current_frame_info.n_input_regs
4210
            + current_frame_info.n_local_regs + regno - OUT_REG (0));
4211
  else
4212
    return regno;
4213
}
4214
 
4215
/* Implement TARGET_TRAMPOLINE_INIT.
4216
 
4217
   The trampoline should set the static chain pointer to value placed
4218
   into the trampoline and should branch to the specified routine.
4219
   To make the normal indirect-subroutine calling convention work,
4220
   the trampoline must look like a function descriptor; the first
4221
   word being the target address and the second being the target's
4222
   global pointer.
4223
 
4224
   We abuse the concept of a global pointer by arranging for it
4225
   to point to the data we need to load.  The complete trampoline
4226
   has the following form:
4227
 
4228
                +-------------------+ \
4229
        TRAMP:  | __ia64_trampoline | |
4230
                +-------------------+  > fake function descriptor
4231
                | TRAMP+16          | |
4232
                +-------------------+ /
4233
                | target descriptor |
4234
                +-------------------+
4235
                | static link       |
4236
                +-------------------+
4237
*/
4238
 
4239
static void
4240
ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4241
{
4242
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4243
  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4244
 
4245
  /* The Intel assembler requires that the global __ia64_trampoline symbol
4246
     be declared explicitly */
4247
  if (!TARGET_GNU_AS)
4248
    {
4249
      static bool declared_ia64_trampoline = false;
4250
 
4251
      if (!declared_ia64_trampoline)
4252
        {
4253
          declared_ia64_trampoline = true;
4254
          (*targetm.asm_out.globalize_label) (asm_out_file,
4255
                                              "__ia64_trampoline");
4256
        }
4257
    }
4258
 
4259
  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4260
  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4261
  fnaddr = convert_memory_address (Pmode, fnaddr);
4262
  static_chain = convert_memory_address (Pmode, static_chain);
4263
 
4264
  /* Load up our iterator.  */
4265
  addr_reg = copy_to_reg (addr);
4266
  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4267
 
4268
  /* The first two words are the fake descriptor:
4269
     __ia64_trampoline, ADDR+16.  */
4270
  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4271
  if (TARGET_ABI_OPEN_VMS)
4272
    {
4273
      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4274
         in the Macro-32 compiler) and changed the semantics of the LTOFF22
4275
         relocation against function symbols to make it identical to the
4276
         LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4277
         strict ELF and dereference to get the bare code address.  */
4278
      rtx reg = gen_reg_rtx (Pmode);
4279
      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4280
      emit_move_insn (reg, tramp);
4281
      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4282
      tramp = reg;
4283
   }
4284
  emit_move_insn (m_tramp, tramp);
4285
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4286
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4287
 
4288
  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4289
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4290
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4291
 
4292
  /* The third word is the target descriptor.  */
4293
  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4294
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4295
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4296
 
4297
  /* The fourth word is the static chain.  */
4298
  emit_move_insn (m_tramp, static_chain);
4299
}
4300
 
4301
/* Do any needed setup for a variadic function.  CUM has not been updated
4302
   for the last named argument which has type TYPE and mode MODE.
4303
 
4304
   We generate the actual spill instructions during prologue generation.  */
4305
 
4306
static void
4307
ia64_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4308
                             tree type, int * pretend_size,
4309
                             int second_time ATTRIBUTE_UNUSED)
4310
{
4311
  CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4312
 
4313
  /* Skip the current argument.  */
4314
  ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4315
 
4316
  if (next_cum.words < MAX_ARGUMENT_SLOTS)
4317
    {
4318
      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4319
      *pretend_size = n * UNITS_PER_WORD;
4320
      cfun->machine->n_varargs = n;
4321
    }
4322
}
4323
 
4324
/* Check whether TYPE is a homogeneous floating point aggregate.  If
4325
   it is, return the mode of the floating point type that appears
4326
   in all leafs.  If it is not, return VOIDmode.
4327
 
4328
   An aggregate is a homogeneous floating point aggregate is if all
4329
   fields/elements in it have the same floating point type (e.g,
4330
   SFmode).  128-bit quad-precision floats are excluded.
4331
 
4332
   Variable sized aggregates should never arrive here, since we should
4333
   have already decided to pass them by reference.  Top-level zero-sized
4334
   aggregates are excluded because our parallels crash the middle-end.  */
4335
 
4336
static enum machine_mode
4337
hfa_element_mode (const_tree type, bool nested)
4338
{
4339
  enum machine_mode element_mode = VOIDmode;
4340
  enum machine_mode mode;
4341
  enum tree_code code = TREE_CODE (type);
4342
  int know_element_mode = 0;
4343
  tree t;
4344
 
4345
  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4346
    return VOIDmode;
4347
 
4348
  switch (code)
4349
    {
4350
    case VOID_TYPE:     case INTEGER_TYPE:      case ENUMERAL_TYPE:
4351
    case BOOLEAN_TYPE:  case POINTER_TYPE:
4352
    case OFFSET_TYPE:   case REFERENCE_TYPE:    case METHOD_TYPE:
4353
    case LANG_TYPE:             case FUNCTION_TYPE:
4354
      return VOIDmode;
4355
 
4356
      /* Fortran complex types are supposed to be HFAs, so we need to handle
4357
         gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4358
         types though.  */
4359
    case COMPLEX_TYPE:
4360
      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4361
          && TYPE_MODE (type) != TCmode)
4362
        return GET_MODE_INNER (TYPE_MODE (type));
4363
      else
4364
        return VOIDmode;
4365
 
4366
    case REAL_TYPE:
4367
      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4368
         mode if this is contained within an aggregate.  */
4369
      if (nested && TYPE_MODE (type) != TFmode)
4370
        return TYPE_MODE (type);
4371
      else
4372
        return VOIDmode;
4373
 
4374
    case ARRAY_TYPE:
4375
      return hfa_element_mode (TREE_TYPE (type), 1);
4376
 
4377
    case RECORD_TYPE:
4378
    case UNION_TYPE:
4379
    case QUAL_UNION_TYPE:
4380
      for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4381
        {
4382
          if (TREE_CODE (t) != FIELD_DECL)
4383
            continue;
4384
 
4385
          mode = hfa_element_mode (TREE_TYPE (t), 1);
4386
          if (know_element_mode)
4387
            {
4388
              if (mode != element_mode)
4389
                return VOIDmode;
4390
            }
4391
          else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4392
            return VOIDmode;
4393
          else
4394
            {
4395
              know_element_mode = 1;
4396
              element_mode = mode;
4397
            }
4398
        }
4399
      return element_mode;
4400
 
4401
    default:
4402
      /* If we reach here, we probably have some front-end specific type
4403
         that the backend doesn't know about.  This can happen via the
4404
         aggregate_value_p call in init_function_start.  All we can do is
4405
         ignore unknown tree types.  */
4406
      return VOIDmode;
4407
    }
4408
 
4409
  return VOIDmode;
4410
}
4411
 
4412
/* Return the number of words required to hold a quantity of TYPE and MODE
4413
   when passed as an argument.  */
4414
static int
4415
ia64_function_arg_words (const_tree type, enum machine_mode mode)
4416
{
4417
  int words;
4418
 
4419
  if (mode == BLKmode)
4420
    words = int_size_in_bytes (type);
4421
  else
4422
    words = GET_MODE_SIZE (mode);
4423
 
4424
  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4425
}
4426
 
4427
/* Return the number of registers that should be skipped so the current
4428
   argument (described by TYPE and WORDS) will be properly aligned.
4429
 
4430
   Integer and float arguments larger than 8 bytes start at the next
4431
   even boundary.  Aggregates larger than 8 bytes start at the next
4432
   even boundary if the aggregate has 16 byte alignment.  Note that
4433
   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4434
   but are still to be aligned in registers.
4435
 
4436
   ??? The ABI does not specify how to handle aggregates with
4437
   alignment from 9 to 15 bytes, or greater than 16.  We handle them
4438
   all as if they had 16 byte alignment.  Such aggregates can occur
4439
   only if gcc extensions are used.  */
4440
static int
4441
ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4442
                          const_tree type, int words)
4443
{
4444
  /* No registers are skipped on VMS.  */
4445
  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4446
    return 0;
4447
 
4448
  if (type
4449
      && TREE_CODE (type) != INTEGER_TYPE
4450
      && TREE_CODE (type) != REAL_TYPE)
4451
    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4452
  else
4453
    return words > 1;
4454
}
4455
 
4456
/* Return rtx for register where argument is passed, or zero if it is passed
4457
   on the stack.  */
4458
/* ??? 128-bit quad-precision floats are always passed in general
4459
   registers.  */
4460
 
4461
static rtx
4462
ia64_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
4463
                     const_tree type, bool named, bool incoming)
4464
{
4465
  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4466
 
4467
  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4468
  int words = ia64_function_arg_words (type, mode);
4469
  int offset = ia64_function_arg_offset (cum, type, words);
4470
  enum machine_mode hfa_mode = VOIDmode;
4471
 
4472
  /* For OPEN VMS, emit the instruction setting up the argument register here,
4473
     when we know this will be together with the other arguments setup related
4474
     insns.  This is not the conceptually best place to do this, but this is
4475
     the easiest as we have convenient access to cumulative args info.  */
4476
 
4477
  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4478
      && named == 1)
4479
    {
4480
      unsigned HOST_WIDE_INT regval = cum->words;
4481
      int i;
4482
 
4483
      for (i = 0; i < 8; i++)
4484
        regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4485
 
4486
      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4487
                      GEN_INT (regval));
4488
    }
4489
 
4490
  /* If all argument slots are used, then it must go on the stack.  */
4491
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4492
    return 0;
4493
 
4494
  /* Check for and handle homogeneous FP aggregates.  */
4495
  if (type)
4496
    hfa_mode = hfa_element_mode (type, 0);
4497
 
4498
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4499
     and unprototyped hfas are passed specially.  */
4500
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4501
    {
4502
      rtx loc[16];
4503
      int i = 0;
4504
      int fp_regs = cum->fp_regs;
4505
      int int_regs = cum->words + offset;
4506
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4507
      int byte_size;
4508
      int args_byte_size;
4509
 
4510
      /* If prototyped, pass it in FR regs then GR regs.
4511
         If not prototyped, pass it in both FR and GR regs.
4512
 
4513
         If this is an SFmode aggregate, then it is possible to run out of
4514
         FR regs while GR regs are still left.  In that case, we pass the
4515
         remaining part in the GR regs.  */
4516
 
4517
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4518
         of the argument, the last FP register, or the last argument slot.  */
4519
 
4520
      byte_size = ((mode == BLKmode)
4521
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4522
      args_byte_size = int_regs * UNITS_PER_WORD;
4523
      offset = 0;
4524
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4525
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4526
        {
4527
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4528
                                      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4529
                                                              + fp_regs)),
4530
                                      GEN_INT (offset));
4531
          offset += hfa_size;
4532
          args_byte_size += hfa_size;
4533
          fp_regs++;
4534
        }
4535
 
4536
      /* If no prototype, then the whole thing must go in GR regs.  */
4537
      if (! cum->prototype)
4538
        offset = 0;
4539
      /* If this is an SFmode aggregate, then we might have some left over
4540
         that needs to go in GR regs.  */
4541
      else if (byte_size != offset)
4542
        int_regs += offset / UNITS_PER_WORD;
4543
 
4544
      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4545
 
4546
      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4547
        {
4548
          enum machine_mode gr_mode = DImode;
4549
          unsigned int gr_size;
4550
 
4551
          /* If we have an odd 4 byte hunk because we ran out of FR regs,
4552
             then this goes in a GR reg left adjusted/little endian, right
4553
             adjusted/big endian.  */
4554
          /* ??? Currently this is handled wrong, because 4-byte hunks are
4555
             always right adjusted/little endian.  */
4556
          if (offset & 0x4)
4557
            gr_mode = SImode;
4558
          /* If we have an even 4 byte hunk because the aggregate is a
4559
             multiple of 4 bytes in size, then this goes in a GR reg right
4560
             adjusted/little endian.  */
4561
          else if (byte_size - offset == 4)
4562
            gr_mode = SImode;
4563
 
4564
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4565
                                      gen_rtx_REG (gr_mode, (basereg
4566
                                                             + int_regs)),
4567
                                      GEN_INT (offset));
4568
 
4569
          gr_size = GET_MODE_SIZE (gr_mode);
4570
          offset += gr_size;
4571
          if (gr_size == UNITS_PER_WORD
4572
              || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4573
            int_regs++;
4574
          else if (gr_size > UNITS_PER_WORD)
4575
            int_regs += gr_size / UNITS_PER_WORD;
4576
        }
4577
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4578
    }
4579
 
4580
  /* On OpenVMS variable argument is either in Rn or Fn.  */
4581
  else if (TARGET_ABI_OPEN_VMS && named == 0)
4582
    {
4583
      if (FLOAT_MODE_P (mode))
4584
        return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4585
      else
4586
        return gen_rtx_REG (mode, basereg + cum->words);
4587
    }
4588
 
4589
  /* Integral and aggregates go in general registers.  If we have run out of
4590
     FR registers, then FP values must also go in general registers.  This can
4591
     happen when we have a SFmode HFA.  */
4592
  else if (mode == TFmode || mode == TCmode
4593
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4594
    {
4595
      int byte_size = ((mode == BLKmode)
4596
                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4597
      if (BYTES_BIG_ENDIAN
4598
        && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4599
        && byte_size < UNITS_PER_WORD
4600
        && byte_size > 0)
4601
        {
4602
          rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4603
                                          gen_rtx_REG (DImode,
4604
                                                       (basereg + cum->words
4605
                                                        + offset)),
4606
                                          const0_rtx);
4607
          return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4608
        }
4609
      else
4610
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4611
 
4612
    }
4613
 
4614
  /* If there is a prototype, then FP values go in a FR register when
4615
     named, and in a GR register when unnamed.  */
4616
  else if (cum->prototype)
4617
    {
4618
      if (named)
4619
        return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4620
      /* In big-endian mode, an anonymous SFmode value must be represented
4621
         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4622
         the value into the high half of the general register.  */
4623
      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4624
        return gen_rtx_PARALLEL (mode,
4625
                 gen_rtvec (1,
4626
                   gen_rtx_EXPR_LIST (VOIDmode,
4627
                     gen_rtx_REG (DImode, basereg + cum->words + offset),
4628
                                      const0_rtx)));
4629
      else
4630
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4631
    }
4632
  /* If there is no prototype, then FP values go in both FR and GR
4633
     registers.  */
4634
  else
4635
    {
4636
      /* See comment above.  */
4637
      enum machine_mode inner_mode =
4638
        (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4639
 
4640
      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4641
                                      gen_rtx_REG (mode, (FR_ARG_FIRST
4642
                                                          + cum->fp_regs)),
4643
                                      const0_rtx);
4644
      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4645
                                      gen_rtx_REG (inner_mode,
4646
                                                   (basereg + cum->words
4647
                                                    + offset)),
4648
                                      const0_rtx);
4649
 
4650
      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4651
    }
4652
}
4653
 
4654
/* Implement TARGET_FUNCION_ARG target hook.  */
4655
 
4656
static rtx
4657
ia64_function_arg (cumulative_args_t cum, enum machine_mode mode,
4658
                   const_tree type, bool named)
4659
{
4660
  return ia64_function_arg_1 (cum, mode, type, named, false);
4661
}
4662
 
4663
/* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4664
 
4665
static rtx
4666
ia64_function_incoming_arg (cumulative_args_t cum,
4667
                            enum machine_mode mode,
4668
                            const_tree type, bool named)
4669
{
4670
  return ia64_function_arg_1 (cum, mode, type, named, true);
4671
}
4672
 
4673
/* Return number of bytes, at the beginning of the argument, that must be
4674
   put in registers.  0 is the argument is entirely in registers or entirely
4675
   in memory.  */
4676
 
4677
static int
4678
ia64_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
4679
                        tree type, bool named ATTRIBUTE_UNUSED)
4680
{
4681
  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4682
 
4683
  int words = ia64_function_arg_words (type, mode);
4684
  int offset = ia64_function_arg_offset (cum, type, words);
4685
 
4686
  /* If all argument slots are used, then it must go on the stack.  */
4687
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4688
    return 0;
4689
 
4690
  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4691
     it fits within the 8 argument slots, then it goes entirely in
4692
     registers.  If it extends past the last argument slot, then the rest
4693
     goes on the stack.  */
4694
 
4695
  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4696
    return 0;
4697
 
4698
  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4699
}
4700
 
4701
/* Return ivms_arg_type based on machine_mode.  */
4702
 
4703
static enum ivms_arg_type
4704
ia64_arg_type (enum machine_mode mode)
4705
{
4706
  switch (mode)
4707
    {
4708
    case SFmode:
4709
      return FS;
4710
    case DFmode:
4711
      return FT;
4712
    default:
4713
      return I64;
4714
    }
4715
}
4716
 
4717
/* Update CUM to point after this argument.  This is patterned after
4718
   ia64_function_arg.  */
4719
 
4720
static void
4721
ia64_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4722
                           const_tree type, bool named)
4723
{
4724
  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4725
  int words = ia64_function_arg_words (type, mode);
4726
  int offset = ia64_function_arg_offset (cum, type, words);
4727
  enum machine_mode hfa_mode = VOIDmode;
4728
 
4729
  /* If all arg slots are already full, then there is nothing to do.  */
4730
  if (cum->words >= MAX_ARGUMENT_SLOTS)
4731
    {
4732
      cum->words += words + offset;
4733
      return;
4734
    }
4735
 
4736
  cum->atypes[cum->words] = ia64_arg_type (mode);
4737
  cum->words += words + offset;
4738
 
4739
  /* Check for and handle homogeneous FP aggregates.  */
4740
  if (type)
4741
    hfa_mode = hfa_element_mode (type, 0);
4742
 
4743
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4744
     and unprototyped hfas are passed specially.  */
4745
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4746
    {
4747
      int fp_regs = cum->fp_regs;
4748
      /* This is the original value of cum->words + offset.  */
4749
      int int_regs = cum->words - words;
4750
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4751
      int byte_size;
4752
      int args_byte_size;
4753
 
4754
      /* If prototyped, pass it in FR regs then GR regs.
4755
         If not prototyped, pass it in both FR and GR regs.
4756
 
4757
         If this is an SFmode aggregate, then it is possible to run out of
4758
         FR regs while GR regs are still left.  In that case, we pass the
4759
         remaining part in the GR regs.  */
4760
 
4761
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4762
         of the argument, the last FP register, or the last argument slot.  */
4763
 
4764
      byte_size = ((mode == BLKmode)
4765
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4766
      args_byte_size = int_regs * UNITS_PER_WORD;
4767
      offset = 0;
4768
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4769
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4770
        {
4771
          offset += hfa_size;
4772
          args_byte_size += hfa_size;
4773
          fp_regs++;
4774
        }
4775
 
4776
      cum->fp_regs = fp_regs;
4777
    }
4778
 
4779
  /* On OpenVMS variable argument is either in Rn or Fn.  */
4780
  else if (TARGET_ABI_OPEN_VMS && named == 0)
4781
    {
4782
      cum->int_regs = cum->words;
4783
      cum->fp_regs = cum->words;
4784
    }
4785
 
4786
  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4787
     If we have run out of FR registers, then other FP values must also go in
4788
     general registers.  This can happen when we have a SFmode HFA.  */
4789
  else if (mode == TFmode || mode == TCmode
4790
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4791
    cum->int_regs = cum->words;
4792
 
4793
  /* If there is a prototype, then FP values go in a FR register when
4794
     named, and in a GR register when unnamed.  */
4795
  else if (cum->prototype)
4796
    {
4797
      if (! named)
4798
        cum->int_regs = cum->words;
4799
      else
4800
        /* ??? Complex types should not reach here.  */
4801
        cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4802
    }
4803
  /* If there is no prototype, then FP values go in both FR and GR
4804
     registers.  */
4805
  else
4806
    {
4807
      /* ??? Complex types should not reach here.  */
4808
      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4809
      cum->int_regs = cum->words;
4810
    }
4811
}
4812
 
4813
/* Arguments with alignment larger than 8 bytes start at the next even
4814
   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4815
   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4816
 
4817
static unsigned int
4818
ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
4819
{
4820
  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4821
    return PARM_BOUNDARY * 2;
4822
 
4823
  if (type)
4824
    {
4825
      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4826
        return PARM_BOUNDARY * 2;
4827
      else
4828
        return PARM_BOUNDARY;
4829
    }
4830
 
4831
  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4832
    return PARM_BOUNDARY * 2;
4833
  else
4834
    return PARM_BOUNDARY;
4835
}
4836
 
4837
/* True if it is OK to do sibling call optimization for the specified
4838
   call expression EXP.  DECL will be the called function, or NULL if
4839
   this is an indirect call.  */
4840
static bool
4841
ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4842
{
4843
  /* We can't perform a sibcall if the current function has the syscall_linkage
4844
     attribute.  */
4845
  if (lookup_attribute ("syscall_linkage",
4846
                        TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4847
    return false;
4848
 
4849
  /* We must always return with our current GP.  This means we can
4850
     only sibcall to functions defined in the current module unless
4851
     TARGET_CONST_GP is set to true.  */
4852
  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4853
}
4854
 
4855
 
4856
/* Implement va_arg.  */
4857
 
4858
static tree
4859
ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4860
                      gimple_seq *post_p)
4861
{
4862
  /* Variable sized types are passed by reference.  */
4863
  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4864
    {
4865
      tree ptrtype = build_pointer_type (type);
4866
      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4867
      return build_va_arg_indirect_ref (addr);
4868
    }
4869
 
4870
  /* Aggregate arguments with alignment larger than 8 bytes start at
4871
     the next even boundary.  Integer and floating point arguments
4872
     do so if they are larger than 8 bytes, whether or not they are
4873
     also aligned larger than 8 bytes.  */
4874
  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4875
      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4876
    {
4877
      tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
4878
      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4879
                  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
4880
      gimplify_assign (unshare_expr (valist), t, pre_p);
4881
    }
4882
 
4883
  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4884
}
4885
 
4886
/* Return 1 if function return value returned in memory.  Return 0 if it is
4887
   in a register.  */
4888
 
4889
static bool
4890
ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4891
{
4892
  enum machine_mode mode;
4893
  enum machine_mode hfa_mode;
4894
  HOST_WIDE_INT byte_size;
4895
 
4896
  mode = TYPE_MODE (valtype);
4897
  byte_size = GET_MODE_SIZE (mode);
4898
  if (mode == BLKmode)
4899
    {
4900
      byte_size = int_size_in_bytes (valtype);
4901
      if (byte_size < 0)
4902
        return true;
4903
    }
4904
 
4905
  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4906
 
4907
  hfa_mode = hfa_element_mode (valtype, 0);
4908
  if (hfa_mode != VOIDmode)
4909
    {
4910
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4911
 
4912
      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4913
        return true;
4914
      else
4915
        return false;
4916
    }
4917
  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4918
    return true;
4919
  else
4920
    return false;
4921
}
4922
 
4923
/* Return rtx for register that holds the function return value.  */
4924
 
4925
static rtx
4926
ia64_function_value (const_tree valtype,
4927
                     const_tree fn_decl_or_type,
4928
                     bool outgoing ATTRIBUTE_UNUSED)
4929
{
4930
  enum machine_mode mode;
4931
  enum machine_mode hfa_mode;
4932
  int unsignedp;
4933
  const_tree func = fn_decl_or_type;
4934
 
4935
  if (fn_decl_or_type
4936
      && !DECL_P (fn_decl_or_type))
4937
    func = NULL;
4938
 
4939
  mode = TYPE_MODE (valtype);
4940
  hfa_mode = hfa_element_mode (valtype, 0);
4941
 
4942
  if (hfa_mode != VOIDmode)
4943
    {
4944
      rtx loc[8];
4945
      int i;
4946
      int hfa_size;
4947
      int byte_size;
4948
      int offset;
4949
 
4950
      hfa_size = GET_MODE_SIZE (hfa_mode);
4951
      byte_size = ((mode == BLKmode)
4952
                   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4953
      offset = 0;
4954
      for (i = 0; offset < byte_size; i++)
4955
        {
4956
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4957
                                      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4958
                                      GEN_INT (offset));
4959
          offset += hfa_size;
4960
        }
4961
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4962
    }
4963
  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4964
    return gen_rtx_REG (mode, FR_ARG_FIRST);
4965
  else
4966
    {
4967
      bool need_parallel = false;
4968
 
4969
      /* In big-endian mode, we need to manage the layout of aggregates
4970
         in the registers so that we get the bits properly aligned in
4971
         the highpart of the registers.  */
4972
      if (BYTES_BIG_ENDIAN
4973
          && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4974
        need_parallel = true;
4975
 
4976
      /* Something like struct S { long double x; char a[0] } is not an
4977
         HFA structure, and therefore doesn't go in fp registers.  But
4978
         the middle-end will give it XFmode anyway, and XFmode values
4979
         don't normally fit in integer registers.  So we need to smuggle
4980
         the value inside a parallel.  */
4981
      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4982
        need_parallel = true;
4983
 
4984
      if (need_parallel)
4985
        {
4986
          rtx loc[8];
4987
          int offset;
4988
          int bytesize;
4989
          int i;
4990
 
4991
          offset = 0;
4992
          bytesize = int_size_in_bytes (valtype);
4993
          /* An empty PARALLEL is invalid here, but the return value
4994
             doesn't matter for empty structs.  */
4995
          if (bytesize == 0)
4996
            return gen_rtx_REG (mode, GR_RET_FIRST);
4997
          for (i = 0; offset < bytesize; i++)
4998
            {
4999
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5000
                                          gen_rtx_REG (DImode,
5001
                                                       GR_RET_FIRST + i),
5002
                                          GEN_INT (offset));
5003
              offset += UNITS_PER_WORD;
5004
            }
5005
          return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5006
        }
5007
 
5008
      mode = promote_function_mode (valtype, mode, &unsignedp,
5009
                                    func ? TREE_TYPE (func) : NULL_TREE,
5010
                                    true);
5011
 
5012
      return gen_rtx_REG (mode, GR_RET_FIRST);
5013
    }
5014
}
5015
 
5016
/* Worker function for TARGET_LIBCALL_VALUE.  */
5017
 
5018
static rtx
5019
ia64_libcall_value (enum machine_mode mode,
5020
                    const_rtx fun ATTRIBUTE_UNUSED)
5021
{
5022
  return gen_rtx_REG (mode,
5023
                      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5024
                         || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5025
                        && (mode) != TFmode)
5026
                       ? FR_RET_FIRST : GR_RET_FIRST));
5027
}
5028
 
5029
/* Worker function for FUNCTION_VALUE_REGNO_P.  */
5030
 
5031
static bool
5032
ia64_function_value_regno_p (const unsigned int regno)
5033
{
5034
  return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5035
          || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5036
}
5037
 
5038
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5039
   We need to emit DTP-relative relocations.  */
5040
 
5041
static void
5042
ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5043
{
5044
  gcc_assert (size == 4 || size == 8);
5045
  if (size == 4)
5046
    fputs ("\tdata4.ua\t@dtprel(", file);
5047
  else
5048
    fputs ("\tdata8.ua\t@dtprel(", file);
5049
  output_addr_const (file, x);
5050
  fputs (")", file);
5051
}
5052
 
5053
/* Print a memory address as an operand to reference that memory location.  */
5054
 
5055
/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5056
   also call this from ia64_print_operand for memory addresses.  */
5057
 
5058
static void
5059
ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5060
                            rtx address ATTRIBUTE_UNUSED)
5061
{
5062
}
5063
 
5064
/* Print an operand to an assembler instruction.
5065
   C    Swap and print a comparison operator.
5066
   D    Print an FP comparison operator.
5067
   E    Print 32 - constant, for SImode shifts as extract.
5068
   e    Print 64 - constant, for DImode rotates.
5069
   F    A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5070
        a floating point register emitted normally.
5071
   G    A floating point constant.
5072
   I    Invert a predicate register by adding 1.
5073
   J    Select the proper predicate register for a condition.
5074
   j    Select the inverse predicate register for a condition.
5075
   O    Append .acq for volatile load.
5076
   P    Postincrement of a MEM.
5077
   Q    Append .rel for volatile store.
5078
   R    Print .s .d or nothing for a single, double or no truncation.
5079
   S    Shift amount for shladd instruction.
5080
   T    Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5081
        for Intel assembler.
5082
   U    Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5083
        for Intel assembler.
5084
   X    A pair of floating point registers.
5085
   r    Print register name, or constant 0 as r0.  HP compatibility for
5086
        Linux kernel.
5087
   v    Print vector constant value as an 8-byte integer value.  */
5088
 
5089
static void
5090
ia64_print_operand (FILE * file, rtx x, int code)
5091
{
5092
  const char *str;
5093
 
5094
  switch (code)
5095
    {
5096
    case 0:
5097
      /* Handled below.  */
5098
      break;
5099
 
5100
    case 'C':
5101
      {
5102
        enum rtx_code c = swap_condition (GET_CODE (x));
5103
        fputs (GET_RTX_NAME (c), file);
5104
        return;
5105
      }
5106
 
5107
    case 'D':
5108
      switch (GET_CODE (x))
5109
        {
5110
        case NE:
5111
          str = "neq";
5112
          break;
5113
        case UNORDERED:
5114
          str = "unord";
5115
          break;
5116
        case ORDERED:
5117
          str = "ord";
5118
          break;
5119
        case UNLT:
5120
          str = "nge";
5121
          break;
5122
        case UNLE:
5123
          str = "ngt";
5124
          break;
5125
        case UNGT:
5126
          str = "nle";
5127
          break;
5128
        case UNGE:
5129
          str = "nlt";
5130
          break;
5131
        default:
5132
          str = GET_RTX_NAME (GET_CODE (x));
5133
          break;
5134
        }
5135
      fputs (str, file);
5136
      return;
5137
 
5138
    case 'E':
5139
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5140
      return;
5141
 
5142
    case 'e':
5143
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5144
      return;
5145
 
5146
    case 'F':
5147
      if (x == CONST0_RTX (GET_MODE (x)))
5148
        str = reg_names [FR_REG (0)];
5149
      else if (x == CONST1_RTX (GET_MODE (x)))
5150
        str = reg_names [FR_REG (1)];
5151
      else
5152
        {
5153
          gcc_assert (GET_CODE (x) == REG);
5154
          str = reg_names [REGNO (x)];
5155
        }
5156
      fputs (str, file);
5157
      return;
5158
 
5159
    case 'G':
5160
      {
5161
        long val[4];
5162
        REAL_VALUE_TYPE rv;
5163
        REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5164
        real_to_target (val, &rv, GET_MODE (x));
5165
        if (GET_MODE (x) == SFmode)
5166
          fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5167
        else if (GET_MODE (x) == DFmode)
5168
          fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5169
                                          & 0xffffffff,
5170
                                         (WORDS_BIG_ENDIAN ? val[1] : val[0])
5171
                                          & 0xffffffff);
5172
        else
5173
          output_operand_lossage ("invalid %%G mode");
5174
      }
5175
      return;
5176
 
5177
    case 'I':
5178
      fputs (reg_names [REGNO (x) + 1], file);
5179
      return;
5180
 
5181
    case 'J':
5182
    case 'j':
5183
      {
5184
        unsigned int regno = REGNO (XEXP (x, 0));
5185
        if (GET_CODE (x) == EQ)
5186
          regno += 1;
5187
        if (code == 'j')
5188
          regno ^= 1;
5189
        fputs (reg_names [regno], file);
5190
      }
5191
      return;
5192
 
5193
    case 'O':
5194
      if (MEM_VOLATILE_P (x))
5195
        fputs(".acq", file);
5196
      return;
5197
 
5198
    case 'P':
5199
      {
5200
        HOST_WIDE_INT value;
5201
 
5202
        switch (GET_CODE (XEXP (x, 0)))
5203
          {
5204
          default:
5205
            return;
5206
 
5207
          case POST_MODIFY:
5208
            x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5209
            if (GET_CODE (x) == CONST_INT)
5210
              value = INTVAL (x);
5211
            else
5212
              {
5213
                gcc_assert (GET_CODE (x) == REG);
5214
                fprintf (file, ", %s", reg_names[REGNO (x)]);
5215
                return;
5216
              }
5217
            break;
5218
 
5219
          case POST_INC:
5220
            value = GET_MODE_SIZE (GET_MODE (x));
5221
            break;
5222
 
5223
          case POST_DEC:
5224
            value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5225
            break;
5226
          }
5227
 
5228
        fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5229
        return;
5230
      }
5231
 
5232
    case 'Q':
5233
      if (MEM_VOLATILE_P (x))
5234
        fputs(".rel", file);
5235
      return;
5236
 
5237
    case 'R':
5238
      if (x == CONST0_RTX (GET_MODE (x)))
5239
        fputs(".s", file);
5240
      else if (x == CONST1_RTX (GET_MODE (x)))
5241
        fputs(".d", file);
5242
      else if (x == CONST2_RTX (GET_MODE (x)))
5243
        ;
5244
      else
5245
        output_operand_lossage ("invalid %%R value");
5246
      return;
5247
 
5248
    case 'S':
5249
      fprintf (file, "%d", exact_log2 (INTVAL (x)));
5250
      return;
5251
 
5252
    case 'T':
5253
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5254
        {
5255
          fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5256
          return;
5257
        }
5258
      break;
5259
 
5260
    case 'U':
5261
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5262
        {
5263
          const char *prefix = "0x";
5264
          if (INTVAL (x) & 0x80000000)
5265
            {
5266
              fprintf (file, "0xffffffff");
5267
              prefix = "";
5268
            }
5269
          fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5270
          return;
5271
        }
5272
      break;
5273
 
5274
    case 'X':
5275
      {
5276
        unsigned int regno = REGNO (x);
5277
        fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5278
      }
5279
      return;
5280
 
5281
    case 'r':
5282
      /* If this operand is the constant zero, write it as register zero.
5283
         Any register, zero, or CONST_INT value is OK here.  */
5284
      if (GET_CODE (x) == REG)
5285
        fputs (reg_names[REGNO (x)], file);
5286
      else if (x == CONST0_RTX (GET_MODE (x)))
5287
        fputs ("r0", file);
5288
      else if (GET_CODE (x) == CONST_INT)
5289
        output_addr_const (file, x);
5290
      else
5291
        output_operand_lossage ("invalid %%r value");
5292
      return;
5293
 
5294
    case 'v':
5295
      gcc_assert (GET_CODE (x) == CONST_VECTOR);
5296
      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5297
      break;
5298
 
5299
    case '+':
5300
      {
5301
        const char *which;
5302
 
5303
        /* For conditional branches, returns or calls, substitute
5304
           sptk, dptk, dpnt, or spnt for %s.  */
5305
        x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5306
        if (x)
5307
          {
5308
            int pred_val = INTVAL (XEXP (x, 0));
5309
 
5310
            /* Guess top and bottom 10% statically predicted.  */
5311
            if (pred_val < REG_BR_PROB_BASE / 50
5312
                && br_prob_note_reliable_p (x))
5313
              which = ".spnt";
5314
            else if (pred_val < REG_BR_PROB_BASE / 2)
5315
              which = ".dpnt";
5316
            else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5317
                     || !br_prob_note_reliable_p (x))
5318
              which = ".dptk";
5319
            else
5320
              which = ".sptk";
5321
          }
5322
        else if (GET_CODE (current_output_insn) == CALL_INSN)
5323
          which = ".sptk";
5324
        else
5325
          which = ".dptk";
5326
 
5327
        fputs (which, file);
5328
        return;
5329
      }
5330
 
5331
    case ',':
5332
      x = current_insn_predicate;
5333
      if (x)
5334
        {
5335
          unsigned int regno = REGNO (XEXP (x, 0));
5336
          if (GET_CODE (x) == EQ)
5337
            regno += 1;
5338
          fprintf (file, "(%s) ", reg_names [regno]);
5339
        }
5340
      return;
5341
 
5342
    default:
5343
      output_operand_lossage ("ia64_print_operand: unknown code");
5344
      return;
5345
    }
5346
 
5347
  switch (GET_CODE (x))
5348
    {
5349
      /* This happens for the spill/restore instructions.  */
5350
    case POST_INC:
5351
    case POST_DEC:
5352
    case POST_MODIFY:
5353
      x = XEXP (x, 0);
5354
      /* ... fall through ...  */
5355
 
5356
    case REG:
5357
      fputs (reg_names [REGNO (x)], file);
5358
      break;
5359
 
5360
    case MEM:
5361
      {
5362
        rtx addr = XEXP (x, 0);
5363
        if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5364
          addr = XEXP (addr, 0);
5365
        fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5366
        break;
5367
      }
5368
 
5369
    default:
5370
      output_addr_const (file, x);
5371
      break;
5372
    }
5373
 
5374
  return;
5375
}
5376
 
5377
/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5378
 
5379
static bool
5380
ia64_print_operand_punct_valid_p (unsigned char code)
5381
{
5382
  return (code == '+' || code == ',');
5383
}
5384
 
5385
/* Compute a (partial) cost for rtx X.  Return true if the complete
5386
   cost has been computed, and false if subexpressions should be
5387
   scanned.  In either case, *TOTAL contains the cost result.  */
5388
/* ??? This is incomplete.  */
5389
 
5390
static bool
5391
ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5392
                int *total, bool speed ATTRIBUTE_UNUSED)
5393
{
5394
  switch (code)
5395
    {
5396
    case CONST_INT:
5397
      switch (outer_code)
5398
        {
5399
        case SET:
5400
          *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5401
          return true;
5402
        case PLUS:
5403
          if (satisfies_constraint_I (x))
5404
            *total = 0;
5405
          else if (satisfies_constraint_J (x))
5406
            *total = 1;
5407
          else
5408
            *total = COSTS_N_INSNS (1);
5409
          return true;
5410
        default:
5411
          if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5412
            *total = 0;
5413
          else
5414
            *total = COSTS_N_INSNS (1);
5415
          return true;
5416
        }
5417
 
5418
    case CONST_DOUBLE:
5419
      *total = COSTS_N_INSNS (1);
5420
      return true;
5421
 
5422
    case CONST:
5423
    case SYMBOL_REF:
5424
    case LABEL_REF:
5425
      *total = COSTS_N_INSNS (3);
5426
      return true;
5427
 
5428
    case FMA:
5429
      *total = COSTS_N_INSNS (4);
5430
      return true;
5431
 
5432
    case MULT:
5433
      /* For multiplies wider than HImode, we have to go to the FPU,
5434
         which normally involves copies.  Plus there's the latency
5435
         of the multiply itself, and the latency of the instructions to
5436
         transfer integer regs to FP regs.  */
5437
      if (FLOAT_MODE_P (GET_MODE (x)))
5438
        *total = COSTS_N_INSNS (4);
5439
      else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5440
        *total = COSTS_N_INSNS (10);
5441
      else
5442
        *total = COSTS_N_INSNS (2);
5443
      return true;
5444
 
5445
    case PLUS:
5446
    case MINUS:
5447
      if (FLOAT_MODE_P (GET_MODE (x)))
5448
        {
5449
          *total = COSTS_N_INSNS (4);
5450
          return true;
5451
        }
5452
      /* FALLTHRU */
5453
 
5454
    case ASHIFT:
5455
    case ASHIFTRT:
5456
    case LSHIFTRT:
5457
      *total = COSTS_N_INSNS (1);
5458
      return true;
5459
 
5460
    case DIV:
5461
    case UDIV:
5462
    case MOD:
5463
    case UMOD:
5464
      /* We make divide expensive, so that divide-by-constant will be
5465
         optimized to a multiply.  */
5466
      *total = COSTS_N_INSNS (60);
5467
      return true;
5468
 
5469
    default:
5470
      return false;
5471
    }
5472
}
5473
 
5474
/* Calculate the cost of moving data from a register in class FROM to
5475
   one in class TO, using MODE.  */
5476
 
5477
static int
5478
ia64_register_move_cost (enum machine_mode mode, reg_class_t from,
5479
                         reg_class_t to)
5480
{
5481
  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5482
  if (to == ADDL_REGS)
5483
    to = GR_REGS;
5484
  if (from == ADDL_REGS)
5485
    from = GR_REGS;
5486
 
5487
  /* All costs are symmetric, so reduce cases by putting the
5488
     lower number class as the destination.  */
5489
  if (from < to)
5490
    {
5491
      reg_class_t tmp = to;
5492
      to = from, from = tmp;
5493
    }
5494
 
5495
  /* Moving from FR<->GR in XFmode must be more expensive than 2,
5496
     so that we get secondary memory reloads.  Between FR_REGS,
5497
     we have to make this at least as expensive as memory_move_cost
5498
     to avoid spectacularly poor register class preferencing.  */
5499
  if (mode == XFmode || mode == RFmode)
5500
    {
5501
      if (to != GR_REGS || from != GR_REGS)
5502
        return memory_move_cost (mode, to, false);
5503
      else
5504
        return 3;
5505
    }
5506
 
5507
  switch (to)
5508
    {
5509
    case PR_REGS:
5510
      /* Moving between PR registers takes two insns.  */
5511
      if (from == PR_REGS)
5512
        return 3;
5513
      /* Moving between PR and anything but GR is impossible.  */
5514
      if (from != GR_REGS)
5515
        return memory_move_cost (mode, to, false);
5516
      break;
5517
 
5518
    case BR_REGS:
5519
      /* Moving between BR and anything but GR is impossible.  */
5520
      if (from != GR_REGS && from != GR_AND_BR_REGS)
5521
        return memory_move_cost (mode, to, false);
5522
      break;
5523
 
5524
    case AR_I_REGS:
5525
    case AR_M_REGS:
5526
      /* Moving between AR and anything but GR is impossible.  */
5527
      if (from != GR_REGS)
5528
        return memory_move_cost (mode, to, false);
5529
      break;
5530
 
5531
    case GR_REGS:
5532
    case FR_REGS:
5533
    case FP_REGS:
5534
    case GR_AND_FR_REGS:
5535
    case GR_AND_BR_REGS:
5536
    case ALL_REGS:
5537
      break;
5538
 
5539
    default:
5540
      gcc_unreachable ();
5541
    }
5542
 
5543
  return 2;
5544
}
5545
 
5546
/* Calculate the cost of moving data of MODE from a register to or from
5547
   memory.  */
5548
 
5549
static int
5550
ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5551
                       reg_class_t rclass,
5552
                       bool in ATTRIBUTE_UNUSED)
5553
{
5554
  if (rclass == GENERAL_REGS
5555
      || rclass == FR_REGS
5556
      || rclass == FP_REGS
5557
      || rclass == GR_AND_FR_REGS)
5558
    return 4;
5559
  else
5560
    return 10;
5561
}
5562
 
5563
/* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5564
   on RCLASS to use when copying X into that class.  */
5565
 
5566
static reg_class_t
5567
ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5568
{
5569
  switch (rclass)
5570
    {
5571
    case FR_REGS:
5572
    case FP_REGS:
5573
      /* Don't allow volatile mem reloads into floating point registers.
5574
         This is defined to force reload to choose the r/m case instead
5575
         of the f/f case when reloading (set (reg fX) (mem/v)).  */
5576
      if (MEM_P (x) && MEM_VOLATILE_P (x))
5577
        return NO_REGS;
5578
 
5579
      /* Force all unrecognized constants into the constant pool.  */
5580
      if (CONSTANT_P (x))
5581
        return NO_REGS;
5582
      break;
5583
 
5584
    case AR_M_REGS:
5585
    case AR_I_REGS:
5586
      if (!OBJECT_P (x))
5587
        return NO_REGS;
5588
      break;
5589
 
5590
    default:
5591
      break;
5592
    }
5593
 
5594
  return rclass;
5595
}
5596
 
5597
/* This function returns the register class required for a secondary
5598
   register when copying between one of the registers in RCLASS, and X,
5599
   using MODE.  A return value of NO_REGS means that no secondary register
5600
   is required.  */
5601
 
5602
enum reg_class
5603
ia64_secondary_reload_class (enum reg_class rclass,
5604
                             enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5605
{
5606
  int regno = -1;
5607
 
5608
  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5609
    regno = true_regnum (x);
5610
 
5611
  switch (rclass)
5612
    {
5613
    case BR_REGS:
5614
    case AR_M_REGS:
5615
    case AR_I_REGS:
5616
      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5617
         interaction.  We end up with two pseudos with overlapping lifetimes
5618
         both of which are equiv to the same constant, and both which need
5619
         to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5620
         changes depending on the path length, which means the qty_first_reg
5621
         check in make_regs_eqv can give different answers at different times.
5622
         At some point I'll probably need a reload_indi pattern to handle
5623
         this.
5624
 
5625
         We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5626
         wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5627
         non-general registers for good measure.  */
5628
      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5629
        return GR_REGS;
5630
 
5631
      /* This is needed if a pseudo used as a call_operand gets spilled to a
5632
         stack slot.  */
5633
      if (GET_CODE (x) == MEM)
5634
        return GR_REGS;
5635
      break;
5636
 
5637
    case FR_REGS:
5638
    case FP_REGS:
5639
      /* Need to go through general registers to get to other class regs.  */
5640
      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5641
        return GR_REGS;
5642
 
5643
      /* This can happen when a paradoxical subreg is an operand to the
5644
         muldi3 pattern.  */
5645
      /* ??? This shouldn't be necessary after instruction scheduling is
5646
         enabled, because paradoxical subregs are not accepted by
5647
         register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5648
         stop the paradoxical subreg stupidity in the *_operand functions
5649
         in recog.c.  */
5650
      if (GET_CODE (x) == MEM
5651
          && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5652
              || GET_MODE (x) == QImode))
5653
        return GR_REGS;
5654
 
5655
      /* This can happen because of the ior/and/etc patterns that accept FP
5656
         registers as operands.  If the third operand is a constant, then it
5657
         needs to be reloaded into a FP register.  */
5658
      if (GET_CODE (x) == CONST_INT)
5659
        return GR_REGS;
5660
 
5661
      /* This can happen because of register elimination in a muldi3 insn.
5662
         E.g. `26107 * (unsigned long)&u'.  */
5663
      if (GET_CODE (x) == PLUS)
5664
        return GR_REGS;
5665
      break;
5666
 
5667
    case PR_REGS:
5668
      /* ??? This happens if we cse/gcse a BImode value across a call,
5669
         and the function has a nonlocal goto.  This is because global
5670
         does not allocate call crossing pseudos to hard registers when
5671
         crtl->has_nonlocal_goto is true.  This is relatively
5672
         common for C++ programs that use exceptions.  To reproduce,
5673
         return NO_REGS and compile libstdc++.  */
5674
      if (GET_CODE (x) == MEM)
5675
        return GR_REGS;
5676
 
5677
      /* This can happen when we take a BImode subreg of a DImode value,
5678
         and that DImode value winds up in some non-GR register.  */
5679
      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5680
        return GR_REGS;
5681
      break;
5682
 
5683
    default:
5684
      break;
5685
    }
5686
 
5687
  return NO_REGS;
5688
}
5689
 
5690
 
5691
/* Implement targetm.unspec_may_trap_p hook.  */
5692
static int
5693
ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5694
{
5695
  if (GET_CODE (x) == UNSPEC)
5696
    {
5697
      switch (XINT (x, 1))
5698
        {
5699
        case UNSPEC_LDA:
5700
        case UNSPEC_LDS:
5701
        case UNSPEC_LDSA:
5702
        case UNSPEC_LDCCLR:
5703
        case UNSPEC_CHKACLR:
5704
        case UNSPEC_CHKS:
5705
          /* These unspecs are just wrappers.  */
5706
          return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5707
        }
5708
    }
5709
 
5710
  return default_unspec_may_trap_p (x, flags);
5711
}
5712
 
5713
 
5714
/* Parse the -mfixed-range= option string.  */
5715
 
5716
static void
5717
fix_range (const char *const_str)
5718
{
5719
  int i, first, last;
5720
  char *str, *dash, *comma;
5721
 
5722
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5723
     REG2 are either register names or register numbers.  The effect
5724
     of this option is to mark the registers in the range from REG1 to
5725
     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5726
     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5727
 
5728
  i = strlen (const_str);
5729
  str = (char *) alloca (i + 1);
5730
  memcpy (str, const_str, i + 1);
5731
 
5732
  while (1)
5733
    {
5734
      dash = strchr (str, '-');
5735
      if (!dash)
5736
        {
5737
          warning (0, "value of -mfixed-range must have form REG1-REG2");
5738
          return;
5739
        }
5740
      *dash = '\0';
5741
 
5742
      comma = strchr (dash + 1, ',');
5743
      if (comma)
5744
        *comma = '\0';
5745
 
5746
      first = decode_reg_name (str);
5747
      if (first < 0)
5748
        {
5749
          warning (0, "unknown register name: %s", str);
5750
          return;
5751
        }
5752
 
5753
      last = decode_reg_name (dash + 1);
5754
      if (last < 0)
5755
        {
5756
          warning (0, "unknown register name: %s", dash + 1);
5757
          return;
5758
        }
5759
 
5760
      *dash = '-';
5761
 
5762
      if (first > last)
5763
        {
5764
          warning (0, "%s-%s is an empty range", str, dash + 1);
5765
          return;
5766
        }
5767
 
5768
      for (i = first; i <= last; ++i)
5769
        fixed_regs[i] = call_used_regs[i] = 1;
5770
 
5771
      if (!comma)
5772
        break;
5773
 
5774
      *comma = ',';
5775
      str = comma + 1;
5776
    }
5777
}
5778
 
5779
/* Implement TARGET_OPTION_OVERRIDE.  */
5780
 
5781
static void
5782
ia64_option_override (void)
5783
{
5784
  unsigned int i;
5785
  cl_deferred_option *opt;
5786
  VEC(cl_deferred_option,heap) *vec
5787
    = (VEC(cl_deferred_option,heap) *) ia64_deferred_options;
5788
 
5789
  FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
5790
    {
5791
      switch (opt->opt_index)
5792
        {
5793
        case OPT_mfixed_range_:
5794
          fix_range (opt->arg);
5795
          break;
5796
 
5797
        default:
5798
          gcc_unreachable ();
5799
        }
5800
    }
5801
 
5802
  if (TARGET_AUTO_PIC)
5803
    target_flags |= MASK_CONST_GP;
5804
 
5805
  /* Numerous experiment shows that IRA based loop pressure
5806
     calculation works better for RTL loop invariant motion on targets
5807
     with enough (>= 32) registers.  It is an expensive optimization.
5808
     So it is on only for peak performance.  */
5809
  if (optimize >= 3)
5810
    flag_ira_loop_pressure = 1;
5811
 
5812
 
5813
  ia64_section_threshold = (global_options_set.x_g_switch_value
5814
                            ? g_switch_value
5815
                            : IA64_DEFAULT_GVALUE);
5816
 
5817
  init_machine_status = ia64_init_machine_status;
5818
 
5819
  if (align_functions <= 0)
5820
    align_functions = 64;
5821
  if (align_loops <= 0)
5822
    align_loops = 32;
5823
  if (TARGET_ABI_OPEN_VMS)
5824
    flag_no_common = 1;
5825
 
5826
  ia64_override_options_after_change();
5827
}
5828
 
5829
/* Implement targetm.override_options_after_change.  */
5830
 
5831
static void
5832
ia64_override_options_after_change (void)
5833
{
5834
  if (optimize >= 3
5835
      && !global_options_set.x_flag_selective_scheduling
5836
      && !global_options_set.x_flag_selective_scheduling2)
5837
    {
5838
      flag_selective_scheduling2 = 1;
5839
      flag_sel_sched_pipelining = 1;
5840
    }
5841
  if (mflag_sched_control_spec == 2)
5842
    {
5843
      /* Control speculation is on by default for the selective scheduler,
5844
         but not for the Haifa scheduler.  */
5845
      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5846
    }
5847
  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5848
    {
5849
      /* FIXME: remove this when we'd implement breaking autoinsns as
5850
         a transformation.  */
5851
      flag_auto_inc_dec = 0;
5852
    }
5853
}
5854
 
5855
/* Initialize the record of emitted frame related registers.  */
5856
 
5857
void ia64_init_expanders (void)
5858
{
5859
  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5860
}
5861
 
5862
static struct machine_function *
5863
ia64_init_machine_status (void)
5864
{
5865
  return ggc_alloc_cleared_machine_function ();
5866
}
5867
 
5868
static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5869
static enum attr_type ia64_safe_type (rtx);
5870
 
5871
static enum attr_itanium_class
5872
ia64_safe_itanium_class (rtx insn)
5873
{
5874
  if (recog_memoized (insn) >= 0)
5875
    return get_attr_itanium_class (insn);
5876
  else if (DEBUG_INSN_P (insn))
5877
    return ITANIUM_CLASS_IGNORE;
5878
  else
5879
    return ITANIUM_CLASS_UNKNOWN;
5880
}
5881
 
5882
static enum attr_type
5883
ia64_safe_type (rtx insn)
5884
{
5885
  if (recog_memoized (insn) >= 0)
5886
    return get_attr_type (insn);
5887
  else
5888
    return TYPE_UNKNOWN;
5889
}
5890
 
5891
/* The following collection of routines emit instruction group stop bits as
5892
   necessary to avoid dependencies.  */
5893
 
5894
/* Need to track some additional registers as far as serialization is
5895
   concerned so we can properly handle br.call and br.ret.  We could
5896
   make these registers visible to gcc, but since these registers are
5897
   never explicitly used in gcc generated code, it seems wasteful to
5898
   do so (plus it would make the call and return patterns needlessly
5899
   complex).  */
5900
#define REG_RP          (BR_REG (0))
5901
#define REG_AR_CFM      (FIRST_PSEUDO_REGISTER + 1)
5902
/* This is used for volatile asms which may require a stop bit immediately
5903
   before and after them.  */
5904
#define REG_VOLATILE    (FIRST_PSEUDO_REGISTER + 2)
5905
#define AR_UNAT_BIT_0   (FIRST_PSEUDO_REGISTER + 3)
5906
#define NUM_REGS        (AR_UNAT_BIT_0 + 64)
5907
 
5908
/* For each register, we keep track of how it has been written in the
5909
   current instruction group.
5910
 
5911
   If a register is written unconditionally (no qualifying predicate),
5912
   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5913
 
5914
   If a register is written if its qualifying predicate P is true, we
5915
   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5916
   may be written again by the complement of P (P^1) and when this happens,
5917
   WRITE_COUNT gets set to 2.
5918
 
5919
   The result of this is that whenever an insn attempts to write a register
5920
   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5921
 
5922
   If a predicate register is written by a floating-point insn, we set
5923
   WRITTEN_BY_FP to true.
5924
 
5925
   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5926
   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5927
 
5928
#if GCC_VERSION >= 4000
5929
#define RWS_FIELD_TYPE __extension__ unsigned short
5930
#else
5931
#define RWS_FIELD_TYPE unsigned int
5932
#endif
5933
struct reg_write_state
5934
{
5935
  RWS_FIELD_TYPE write_count : 2;
5936
  RWS_FIELD_TYPE first_pred : 10;
5937
  RWS_FIELD_TYPE written_by_fp : 1;
5938
  RWS_FIELD_TYPE written_by_and : 1;
5939
  RWS_FIELD_TYPE written_by_or : 1;
5940
};
5941
 
5942
/* Cumulative info for the current instruction group.  */
5943
struct reg_write_state rws_sum[NUM_REGS];
5944
#ifdef ENABLE_CHECKING
5945
/* Bitmap whether a register has been written in the current insn.  */
5946
HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5947
                           / HOST_BITS_PER_WIDEST_FAST_INT];
5948
 
5949
static inline void
5950
rws_insn_set (int regno)
5951
{
5952
  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5953
  SET_HARD_REG_BIT (rws_insn, regno);
5954
}
5955
 
5956
static inline int
5957
rws_insn_test (int regno)
5958
{
5959
  return TEST_HARD_REG_BIT (rws_insn, regno);
5960
}
5961
#else
5962
/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
5963
unsigned char rws_insn[2];
5964
 
5965
static inline void
5966
rws_insn_set (int regno)
5967
{
5968
  if (regno == REG_AR_CFM)
5969
    rws_insn[0] = 1;
5970
  else if (regno == REG_VOLATILE)
5971
    rws_insn[1] = 1;
5972
}
5973
 
5974
static inline int
5975
rws_insn_test (int regno)
5976
{
5977
  if (regno == REG_AR_CFM)
5978
    return rws_insn[0];
5979
  if (regno == REG_VOLATILE)
5980
    return rws_insn[1];
5981
  return 0;
5982
}
5983
#endif
5984
 
5985
/* Indicates whether this is the first instruction after a stop bit,
5986
   in which case we don't need another stop bit.  Without this,
5987
   ia64_variable_issue will die when scheduling an alloc.  */
5988
static int first_instruction;
5989
 
5990
/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5991
   RTL for one instruction.  */
5992
struct reg_flags
5993
{
5994
  unsigned int is_write : 1;    /* Is register being written?  */
5995
  unsigned int is_fp : 1;       /* Is register used as part of an fp op?  */
5996
  unsigned int is_branch : 1;   /* Is register used as part of a branch?  */
5997
  unsigned int is_and : 1;      /* Is register used as part of and.orcm?  */
5998
  unsigned int is_or : 1;       /* Is register used as part of or.andcm?  */
5999
  unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
6000
};
6001
 
6002
static void rws_update (int, struct reg_flags, int);
6003
static int rws_access_regno (int, struct reg_flags, int);
6004
static int rws_access_reg (rtx, struct reg_flags, int);
6005
static void update_set_flags (rtx, struct reg_flags *);
6006
static int set_src_needs_barrier (rtx, struct reg_flags, int);
6007
static int rtx_needs_barrier (rtx, struct reg_flags, int);
6008
static void init_insn_group_barriers (void);
6009
static int group_barrier_needed (rtx);
6010
static int safe_group_barrier_needed (rtx);
6011
static int in_safe_group_barrier;
6012
 
6013
/* Update *RWS for REGNO, which is being written by the current instruction,
6014
   with predicate PRED, and associated register flags in FLAGS.  */
6015
 
6016
static void
6017
rws_update (int regno, struct reg_flags flags, int pred)
6018
{
6019
  if (pred)
6020
    rws_sum[regno].write_count++;
6021
  else
6022
    rws_sum[regno].write_count = 2;
6023
  rws_sum[regno].written_by_fp |= flags.is_fp;
6024
  /* ??? Not tracking and/or across differing predicates.  */
6025
  rws_sum[regno].written_by_and = flags.is_and;
6026
  rws_sum[regno].written_by_or = flags.is_or;
6027
  rws_sum[regno].first_pred = pred;
6028
}
6029
 
6030
/* Handle an access to register REGNO of type FLAGS using predicate register
6031
   PRED.  Update rws_sum array.  Return 1 if this access creates
6032
   a dependency with an earlier instruction in the same group.  */
6033
 
6034
static int
6035
rws_access_regno (int regno, struct reg_flags flags, int pred)
6036
{
6037
  int need_barrier = 0;
6038
 
6039
  gcc_assert (regno < NUM_REGS);
6040
 
6041
  if (! PR_REGNO_P (regno))
6042
    flags.is_and = flags.is_or = 0;
6043
 
6044
  if (flags.is_write)
6045
    {
6046
      int write_count;
6047
 
6048
      rws_insn_set (regno);
6049
      write_count = rws_sum[regno].write_count;
6050
 
6051
      switch (write_count)
6052
        {
6053
        case 0:
6054
          /* The register has not been written yet.  */
6055
          if (!in_safe_group_barrier)
6056
            rws_update (regno, flags, pred);
6057
          break;
6058
 
6059
        case 1:
6060
          /* The register has been written via a predicate.  Treat
6061
             it like a unconditional write and do not try to check
6062
             for complementary pred reg in earlier write.  */
6063
          if (flags.is_and && rws_sum[regno].written_by_and)
6064
            ;
6065
          else if (flags.is_or && rws_sum[regno].written_by_or)
6066
            ;
6067
          else
6068
            need_barrier = 1;
6069
          if (!in_safe_group_barrier)
6070
            rws_update (regno, flags, pred);
6071
          break;
6072
 
6073
        case 2:
6074
          /* The register has been unconditionally written already.  We
6075
             need a barrier.  */
6076
          if (flags.is_and && rws_sum[regno].written_by_and)
6077
            ;
6078
          else if (flags.is_or && rws_sum[regno].written_by_or)
6079
            ;
6080
          else
6081
            need_barrier = 1;
6082
          if (!in_safe_group_barrier)
6083
            {
6084
              rws_sum[regno].written_by_and = flags.is_and;
6085
              rws_sum[regno].written_by_or = flags.is_or;
6086
            }
6087
          break;
6088
 
6089
        default:
6090
          gcc_unreachable ();
6091
        }
6092
    }
6093
  else
6094
    {
6095
      if (flags.is_branch)
6096
        {
6097
          /* Branches have several RAW exceptions that allow to avoid
6098
             barriers.  */
6099
 
6100
          if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6101
            /* RAW dependencies on branch regs are permissible as long
6102
               as the writer is a non-branch instruction.  Since we
6103
               never generate code that uses a branch register written
6104
               by a branch instruction, handling this case is
6105
               easy.  */
6106
            return 0;
6107
 
6108
          if (REGNO_REG_CLASS (regno) == PR_REGS
6109
              && ! rws_sum[regno].written_by_fp)
6110
            /* The predicates of a branch are available within the
6111
               same insn group as long as the predicate was written by
6112
               something other than a floating-point instruction.  */
6113
            return 0;
6114
        }
6115
 
6116
      if (flags.is_and && rws_sum[regno].written_by_and)
6117
        return 0;
6118
      if (flags.is_or && rws_sum[regno].written_by_or)
6119
        return 0;
6120
 
6121
      switch (rws_sum[regno].write_count)
6122
        {
6123
        case 0:
6124
          /* The register has not been written yet.  */
6125
          break;
6126
 
6127
        case 1:
6128
          /* The register has been written via a predicate, assume we
6129
             need a barrier (don't check for complementary regs).  */
6130
          need_barrier = 1;
6131
          break;
6132
 
6133
        case 2:
6134
          /* The register has been unconditionally written already.  We
6135
             need a barrier.  */
6136
          need_barrier = 1;
6137
          break;
6138
 
6139
        default:
6140
          gcc_unreachable ();
6141
        }
6142
    }
6143
 
6144
  return need_barrier;
6145
}
6146
 
6147
static int
6148
rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6149
{
6150
  int regno = REGNO (reg);
6151
  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6152
 
6153
  if (n == 1)
6154
    return rws_access_regno (regno, flags, pred);
6155
  else
6156
    {
6157
      int need_barrier = 0;
6158
      while (--n >= 0)
6159
        need_barrier |= rws_access_regno (regno + n, flags, pred);
6160
      return need_barrier;
6161
    }
6162
}
6163
 
6164
/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6165
   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6166
 
6167
static void
6168
update_set_flags (rtx x, struct reg_flags *pflags)
6169
{
6170
  rtx src = SET_SRC (x);
6171
 
6172
  switch (GET_CODE (src))
6173
    {
6174
    case CALL:
6175
      return;
6176
 
6177
    case IF_THEN_ELSE:
6178
      /* There are four cases here:
6179
         (1) The destination is (pc), in which case this is a branch,
6180
         nothing here applies.
6181
         (2) The destination is ar.lc, in which case this is a
6182
         doloop_end_internal,
6183
         (3) The destination is an fp register, in which case this is
6184
         an fselect instruction.
6185
         (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6186
         this is a check load.
6187
         In all cases, nothing we do in this function applies.  */
6188
      return;
6189
 
6190
    default:
6191
      if (COMPARISON_P (src)
6192
          && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6193
        /* Set pflags->is_fp to 1 so that we know we're dealing
6194
           with a floating point comparison when processing the
6195
           destination of the SET.  */
6196
        pflags->is_fp = 1;
6197
 
6198
      /* Discover if this is a parallel comparison.  We only handle
6199
         and.orcm and or.andcm at present, since we must retain a
6200
         strict inverse on the predicate pair.  */
6201
      else if (GET_CODE (src) == AND)
6202
        pflags->is_and = 1;
6203
      else if (GET_CODE (src) == IOR)
6204
        pflags->is_or = 1;
6205
 
6206
      break;
6207
    }
6208
}
6209
 
6210
/* Subroutine of rtx_needs_barrier; this function determines whether the
6211
   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6212
   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6213
   for this insn.  */
6214
 
6215
static int
6216
set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6217
{
6218
  int need_barrier = 0;
6219
  rtx dst;
6220
  rtx src = SET_SRC (x);
6221
 
6222
  if (GET_CODE (src) == CALL)
6223
    /* We don't need to worry about the result registers that
6224
       get written by subroutine call.  */
6225
    return rtx_needs_barrier (src, flags, pred);
6226
  else if (SET_DEST (x) == pc_rtx)
6227
    {
6228
      /* X is a conditional branch.  */
6229
      /* ??? This seems redundant, as the caller sets this bit for
6230
         all JUMP_INSNs.  */
6231
      if (!ia64_spec_check_src_p (src))
6232
        flags.is_branch = 1;
6233
      return rtx_needs_barrier (src, flags, pred);
6234
    }
6235
 
6236
  if (ia64_spec_check_src_p (src))
6237
    /* Avoid checking one register twice (in condition
6238
       and in 'then' section) for ldc pattern.  */
6239
    {
6240
      gcc_assert (REG_P (XEXP (src, 2)));
6241
      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6242
 
6243
      /* We process MEM below.  */
6244
      src = XEXP (src, 1);
6245
    }
6246
 
6247
  need_barrier |= rtx_needs_barrier (src, flags, pred);
6248
 
6249
  dst = SET_DEST (x);
6250
  if (GET_CODE (dst) == ZERO_EXTRACT)
6251
    {
6252
      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6253
      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6254
    }
6255
  return need_barrier;
6256
}
6257
 
6258
/* Handle an access to rtx X of type FLAGS using predicate register
6259
   PRED.  Return 1 if this access creates a dependency with an earlier
6260
   instruction in the same group.  */
6261
 
6262
static int
6263
rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6264
{
6265
  int i, j;
6266
  int is_complemented = 0;
6267
  int need_barrier = 0;
6268
  const char *format_ptr;
6269
  struct reg_flags new_flags;
6270
  rtx cond;
6271
 
6272
  if (! x)
6273
    return 0;
6274
 
6275
  new_flags = flags;
6276
 
6277
  switch (GET_CODE (x))
6278
    {
6279
    case SET:
6280
      update_set_flags (x, &new_flags);
6281
      need_barrier = set_src_needs_barrier (x, new_flags, pred);
6282
      if (GET_CODE (SET_SRC (x)) != CALL)
6283
        {
6284
          new_flags.is_write = 1;
6285
          need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6286
        }
6287
      break;
6288
 
6289
    case CALL:
6290
      new_flags.is_write = 0;
6291
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6292
 
6293
      /* Avoid multiple register writes, in case this is a pattern with
6294
         multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6295
      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6296
        {
6297
          new_flags.is_write = 1;
6298
          need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6299
          need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6300
          need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6301
        }
6302
      break;
6303
 
6304
    case COND_EXEC:
6305
      /* X is a predicated instruction.  */
6306
 
6307
      cond = COND_EXEC_TEST (x);
6308
      gcc_assert (!pred);
6309
      need_barrier = rtx_needs_barrier (cond, flags, 0);
6310
 
6311
      if (GET_CODE (cond) == EQ)
6312
        is_complemented = 1;
6313
      cond = XEXP (cond, 0);
6314
      gcc_assert (GET_CODE (cond) == REG
6315
                  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6316
      pred = REGNO (cond);
6317
      if (is_complemented)
6318
        ++pred;
6319
 
6320
      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6321
      return need_barrier;
6322
 
6323
    case CLOBBER:
6324
    case USE:
6325
      /* Clobber & use are for earlier compiler-phases only.  */
6326
      break;
6327
 
6328
    case ASM_OPERANDS:
6329
    case ASM_INPUT:
6330
      /* We always emit stop bits for traditional asms.  We emit stop bits
6331
         for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6332
      if (GET_CODE (x) != ASM_OPERANDS
6333
          || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6334
        {
6335
          /* Avoid writing the register multiple times if we have multiple
6336
             asm outputs.  This avoids a failure in rws_access_reg.  */
6337
          if (! rws_insn_test (REG_VOLATILE))
6338
            {
6339
              new_flags.is_write = 1;
6340
              rws_access_regno (REG_VOLATILE, new_flags, pred);
6341
            }
6342
          return 1;
6343
        }
6344
 
6345
      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6346
         We cannot just fall through here since then we would be confused
6347
         by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6348
         traditional asms unlike their normal usage.  */
6349
 
6350
      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6351
        if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6352
          need_barrier = 1;
6353
      break;
6354
 
6355
    case PARALLEL:
6356
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6357
        {
6358
          rtx pat = XVECEXP (x, 0, i);
6359
          switch (GET_CODE (pat))
6360
            {
6361
            case SET:
6362
              update_set_flags (pat, &new_flags);
6363
              need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6364
              break;
6365
 
6366
            case USE:
6367
            case CALL:
6368
            case ASM_OPERANDS:
6369
              need_barrier |= rtx_needs_barrier (pat, flags, pred);
6370
              break;
6371
 
6372
            case CLOBBER:
6373
              if (REG_P (XEXP (pat, 0))
6374
                  && extract_asm_operands (x) != NULL_RTX
6375
                  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6376
                {
6377
                  new_flags.is_write = 1;
6378
                  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6379
                                                     new_flags, pred);
6380
                  new_flags = flags;
6381
                }
6382
              break;
6383
 
6384
            case RETURN:
6385
              break;
6386
 
6387
            default:
6388
              gcc_unreachable ();
6389
            }
6390
        }
6391
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6392
        {
6393
          rtx pat = XVECEXP (x, 0, i);
6394
          if (GET_CODE (pat) == SET)
6395
            {
6396
              if (GET_CODE (SET_SRC (pat)) != CALL)
6397
                {
6398
                  new_flags.is_write = 1;
6399
                  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6400
                                                     pred);
6401
                }
6402
            }
6403
          else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6404
            need_barrier |= rtx_needs_barrier (pat, flags, pred);
6405
        }
6406
      break;
6407
 
6408
    case SUBREG:
6409
      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6410
      break;
6411
    case REG:
6412
      if (REGNO (x) == AR_UNAT_REGNUM)
6413
        {
6414
          for (i = 0; i < 64; ++i)
6415
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6416
        }
6417
      else
6418
        need_barrier = rws_access_reg (x, flags, pred);
6419
      break;
6420
 
6421
    case MEM:
6422
      /* Find the regs used in memory address computation.  */
6423
      new_flags.is_write = 0;
6424
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6425
      break;
6426
 
6427
    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6428
    case SYMBOL_REF:  case LABEL_REF:     case CONST:
6429
      break;
6430
 
6431
      /* Operators with side-effects.  */
6432
    case POST_INC:    case POST_DEC:
6433
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6434
 
6435
      new_flags.is_write = 0;
6436
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6437
      new_flags.is_write = 1;
6438
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6439
      break;
6440
 
6441
    case POST_MODIFY:
6442
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6443
 
6444
      new_flags.is_write = 0;
6445
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6446
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6447
      new_flags.is_write = 1;
6448
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6449
      break;
6450
 
6451
      /* Handle common unary and binary ops for efficiency.  */
6452
    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6453
    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6454
    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6455
    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6456
    case NE:       case EQ:      case GE:      case GT:        case LE:
6457
    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6458
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6459
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6460
      break;
6461
 
6462
    case NEG:      case NOT:            case SIGN_EXTEND:     case ZERO_EXTEND:
6463
    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6464
    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6465
    case SQRT:     case FFS:            case POPCOUNT:
6466
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6467
      break;
6468
 
6469
    case VEC_SELECT:
6470
      /* VEC_SELECT's second argument is a PARALLEL with integers that
6471
         describe the elements selected.  On ia64, those integers are
6472
         always constants.  Avoid walking the PARALLEL so that we don't
6473
         get confused with "normal" parallels and then die.  */
6474
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6475
      break;
6476
 
6477
    case UNSPEC:
6478
      switch (XINT (x, 1))
6479
        {
6480
        case UNSPEC_LTOFF_DTPMOD:
6481
        case UNSPEC_LTOFF_DTPREL:
6482
        case UNSPEC_DTPREL:
6483
        case UNSPEC_LTOFF_TPREL:
6484
        case UNSPEC_TPREL:
6485
        case UNSPEC_PRED_REL_MUTEX:
6486
        case UNSPEC_PIC_CALL:
6487
        case UNSPEC_MF:
6488
        case UNSPEC_FETCHADD_ACQ:
6489
        case UNSPEC_FETCHADD_REL:
6490
        case UNSPEC_BSP_VALUE:
6491
        case UNSPEC_FLUSHRS:
6492
        case UNSPEC_BUNDLE_SELECTOR:
6493
          break;
6494
 
6495
        case UNSPEC_GR_SPILL:
6496
        case UNSPEC_GR_RESTORE:
6497
          {
6498
            HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6499
            HOST_WIDE_INT bit = (offset >> 3) & 63;
6500
 
6501
            need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6502
            new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6503
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6504
                                              new_flags, pred);
6505
            break;
6506
          }
6507
 
6508
        case UNSPEC_FR_SPILL:
6509
        case UNSPEC_FR_RESTORE:
6510
        case UNSPEC_GETF_EXP:
6511
        case UNSPEC_SETF_EXP:
6512
        case UNSPEC_ADDP4:
6513
        case UNSPEC_FR_SQRT_RECIP_APPROX:
6514
        case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6515
        case UNSPEC_LDA:
6516
        case UNSPEC_LDS:
6517
        case UNSPEC_LDS_A:
6518
        case UNSPEC_LDSA:
6519
        case UNSPEC_CHKACLR:
6520
        case UNSPEC_CHKS:
6521
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6522
          break;
6523
 
6524
        case UNSPEC_FR_RECIP_APPROX:
6525
        case UNSPEC_SHRP:
6526
        case UNSPEC_COPYSIGN:
6527
        case UNSPEC_FR_RECIP_APPROX_RES:
6528
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6529
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6530
          break;
6531
 
6532
        case UNSPEC_CMPXCHG_ACQ:
6533
        case UNSPEC_CMPXCHG_REL:
6534
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6535
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6536
          break;
6537
 
6538
        default:
6539
          gcc_unreachable ();
6540
        }
6541
      break;
6542
 
6543
    case UNSPEC_VOLATILE:
6544
      switch (XINT (x, 1))
6545
        {
6546
        case UNSPECV_ALLOC:
6547
          /* Alloc must always be the first instruction of a group.
6548
             We force this by always returning true.  */
6549
          /* ??? We might get better scheduling if we explicitly check for
6550
             input/local/output register dependencies, and modify the
6551
             scheduler so that alloc is always reordered to the start of
6552
             the current group.  We could then eliminate all of the
6553
             first_instruction code.  */
6554
          rws_access_regno (AR_PFS_REGNUM, flags, pred);
6555
 
6556
          new_flags.is_write = 1;
6557
          rws_access_regno (REG_AR_CFM, new_flags, pred);
6558
          return 1;
6559
 
6560
        case UNSPECV_SET_BSP:
6561
          need_barrier = 1;
6562
          break;
6563
 
6564
        case UNSPECV_BLOCKAGE:
6565
        case UNSPECV_INSN_GROUP_BARRIER:
6566
        case UNSPECV_BREAK:
6567
        case UNSPECV_PSAC_ALL:
6568
        case UNSPECV_PSAC_NORMAL:
6569
          return 0;
6570
 
6571
        default:
6572
          gcc_unreachable ();
6573
        }
6574
      break;
6575
 
6576
    case RETURN:
6577
      new_flags.is_write = 0;
6578
      need_barrier  = rws_access_regno (REG_RP, flags, pred);
6579
      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6580
 
6581
      new_flags.is_write = 1;
6582
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6583
      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6584
      break;
6585
 
6586
    default:
6587
      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6588
      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6589
        switch (format_ptr[i])
6590
          {
6591
          case '0':     /* unused field */
6592
          case 'i':     /* integer */
6593
          case 'n':     /* note */
6594
          case 'w':     /* wide integer */
6595
          case 's':     /* pointer to string */
6596
          case 'S':     /* optional pointer to string */
6597
            break;
6598
 
6599
          case 'e':
6600
            if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6601
              need_barrier = 1;
6602
            break;
6603
 
6604
          case 'E':
6605
            for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6606
              if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6607
                need_barrier = 1;
6608
            break;
6609
 
6610
          default:
6611
            gcc_unreachable ();
6612
          }
6613
      break;
6614
    }
6615
  return need_barrier;
6616
}
6617
 
6618
/* Clear out the state for group_barrier_needed at the start of a
6619
   sequence of insns.  */
6620
 
6621
static void
6622
init_insn_group_barriers (void)
6623
{
6624
  memset (rws_sum, 0, sizeof (rws_sum));
6625
  first_instruction = 1;
6626
}
6627
 
6628
/* Given the current state, determine whether a group barrier (a stop bit) is
6629
   necessary before INSN.  Return nonzero if so.  This modifies the state to
6630
   include the effects of INSN as a side-effect.  */
6631
 
6632
static int
6633
group_barrier_needed (rtx insn)
6634
{
6635
  rtx pat;
6636
  int need_barrier = 0;
6637
  struct reg_flags flags;
6638
 
6639
  memset (&flags, 0, sizeof (flags));
6640
  switch (GET_CODE (insn))
6641
    {
6642
    case NOTE:
6643
    case DEBUG_INSN:
6644
      break;
6645
 
6646
    case BARRIER:
6647
      /* A barrier doesn't imply an instruction group boundary.  */
6648
      break;
6649
 
6650
    case CODE_LABEL:
6651
      memset (rws_insn, 0, sizeof (rws_insn));
6652
      return 1;
6653
 
6654
    case CALL_INSN:
6655
      flags.is_branch = 1;
6656
      flags.is_sibcall = SIBLING_CALL_P (insn);
6657
      memset (rws_insn, 0, sizeof (rws_insn));
6658
 
6659
      /* Don't bundle a call following another call.  */
6660
      if ((pat = prev_active_insn (insn))
6661
          && GET_CODE (pat) == CALL_INSN)
6662
        {
6663
          need_barrier = 1;
6664
          break;
6665
        }
6666
 
6667
      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6668
      break;
6669
 
6670
    case JUMP_INSN:
6671
      if (!ia64_spec_check_p (insn))
6672
        flags.is_branch = 1;
6673
 
6674
      /* Don't bundle a jump following a call.  */
6675
      if ((pat = prev_active_insn (insn))
6676
          && GET_CODE (pat) == CALL_INSN)
6677
        {
6678
          need_barrier = 1;
6679
          break;
6680
        }
6681
      /* FALLTHRU */
6682
 
6683
    case INSN:
6684
      if (GET_CODE (PATTERN (insn)) == USE
6685
          || GET_CODE (PATTERN (insn)) == CLOBBER)
6686
        /* Don't care about USE and CLOBBER "insns"---those are used to
6687
           indicate to the optimizer that it shouldn't get rid of
6688
           certain operations.  */
6689
        break;
6690
 
6691
      pat = PATTERN (insn);
6692
 
6693
      /* Ug.  Hack hacks hacked elsewhere.  */
6694
      switch (recog_memoized (insn))
6695
        {
6696
          /* We play dependency tricks with the epilogue in order
6697
             to get proper schedules.  Undo this for dv analysis.  */
6698
        case CODE_FOR_epilogue_deallocate_stack:
6699
        case CODE_FOR_prologue_allocate_stack:
6700
          pat = XVECEXP (pat, 0, 0);
6701
          break;
6702
 
6703
          /* The pattern we use for br.cloop confuses the code above.
6704
             The second element of the vector is representative.  */
6705
        case CODE_FOR_doloop_end_internal:
6706
          pat = XVECEXP (pat, 0, 1);
6707
          break;
6708
 
6709
          /* Doesn't generate code.  */
6710
        case CODE_FOR_pred_rel_mutex:
6711
        case CODE_FOR_prologue_use:
6712
          return 0;
6713
 
6714
        default:
6715
          break;
6716
        }
6717
 
6718
      memset (rws_insn, 0, sizeof (rws_insn));
6719
      need_barrier = rtx_needs_barrier (pat, flags, 0);
6720
 
6721
      /* Check to see if the previous instruction was a volatile
6722
         asm.  */
6723
      if (! need_barrier)
6724
        need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6725
 
6726
      break;
6727
 
6728
    default:
6729
      gcc_unreachable ();
6730
    }
6731
 
6732
  if (first_instruction && INSN_P (insn)
6733
      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6734
      && GET_CODE (PATTERN (insn)) != USE
6735
      && GET_CODE (PATTERN (insn)) != CLOBBER)
6736
    {
6737
      need_barrier = 0;
6738
      first_instruction = 0;
6739
    }
6740
 
6741
  return need_barrier;
6742
}
6743
 
6744
/* Like group_barrier_needed, but do not clobber the current state.  */
6745
 
6746
static int
6747
safe_group_barrier_needed (rtx insn)
6748
{
6749
  int saved_first_instruction;
6750
  int t;
6751
 
6752
  saved_first_instruction = first_instruction;
6753
  in_safe_group_barrier = 1;
6754
 
6755
  t = group_barrier_needed (insn);
6756
 
6757
  first_instruction = saved_first_instruction;
6758
  in_safe_group_barrier = 0;
6759
 
6760
  return t;
6761
}
6762
 
6763
/* Scan the current function and insert stop bits as necessary to
6764
   eliminate dependencies.  This function assumes that a final
6765
   instruction scheduling pass has been run which has already
6766
   inserted most of the necessary stop bits.  This function only
6767
   inserts new ones at basic block boundaries, since these are
6768
   invisible to the scheduler.  */
6769
 
6770
static void
6771
emit_insn_group_barriers (FILE *dump)
6772
{
6773
  rtx insn;
6774
  rtx last_label = 0;
6775
  int insns_since_last_label = 0;
6776
 
6777
  init_insn_group_barriers ();
6778
 
6779
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6780
    {
6781
      if (GET_CODE (insn) == CODE_LABEL)
6782
        {
6783
          if (insns_since_last_label)
6784
            last_label = insn;
6785
          insns_since_last_label = 0;
6786
        }
6787
      else if (GET_CODE (insn) == NOTE
6788
               && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6789
        {
6790
          if (insns_since_last_label)
6791
            last_label = insn;
6792
          insns_since_last_label = 0;
6793
        }
6794
      else if (GET_CODE (insn) == INSN
6795
               && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6796
               && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6797
        {
6798
          init_insn_group_barriers ();
6799
          last_label = 0;
6800
        }
6801
      else if (NONDEBUG_INSN_P (insn))
6802
        {
6803
          insns_since_last_label = 1;
6804
 
6805
          if (group_barrier_needed (insn))
6806
            {
6807
              if (last_label)
6808
                {
6809
                  if (dump)
6810
                    fprintf (dump, "Emitting stop before label %d\n",
6811
                             INSN_UID (last_label));
6812
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6813
                  insn = last_label;
6814
 
6815
                  init_insn_group_barriers ();
6816
                  last_label = 0;
6817
                }
6818
            }
6819
        }
6820
    }
6821
}
6822
 
6823
/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6824
   This function has to emit all necessary group barriers.  */
6825
 
6826
static void
6827
emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6828
{
6829
  rtx insn;
6830
 
6831
  init_insn_group_barriers ();
6832
 
6833
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6834
    {
6835
      if (GET_CODE (insn) == BARRIER)
6836
        {
6837
          rtx last = prev_active_insn (insn);
6838
 
6839
          if (! last)
6840
            continue;
6841
          if (GET_CODE (last) == JUMP_INSN
6842
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6843
            last = prev_active_insn (last);
6844
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6845
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6846
 
6847
          init_insn_group_barriers ();
6848
        }
6849
      else if (NONDEBUG_INSN_P (insn))
6850
        {
6851
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6852
            init_insn_group_barriers ();
6853
          else if (group_barrier_needed (insn))
6854
            {
6855
              emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6856
              init_insn_group_barriers ();
6857
              group_barrier_needed (insn);
6858
            }
6859
        }
6860
    }
6861
}
6862
 
6863
 
6864
 
6865
/* Instruction scheduling support.  */
6866
 
6867
#define NR_BUNDLES 10
6868
 
6869
/* A list of names of all available bundles.  */
6870
 
6871
static const char *bundle_name [NR_BUNDLES] =
6872
{
6873
  ".mii",
6874
  ".mmi",
6875
  ".mfi",
6876
  ".mmf",
6877
#if NR_BUNDLES == 10
6878
  ".bbb",
6879
  ".mbb",
6880
#endif
6881
  ".mib",
6882
  ".mmb",
6883
  ".mfb",
6884
  ".mlx"
6885
};
6886
 
6887
/* Nonzero if we should insert stop bits into the schedule.  */
6888
 
6889
int ia64_final_schedule = 0;
6890
 
6891
/* Codes of the corresponding queried units: */
6892
 
6893
static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6894
static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6895
 
6896
static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6897
static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6898
 
6899
static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6900
 
6901
/* The following variable value is an insn group barrier.  */
6902
 
6903
static rtx dfa_stop_insn;
6904
 
6905
/* The following variable value is the last issued insn.  */
6906
 
6907
static rtx last_scheduled_insn;
6908
 
6909
/* The following variable value is pointer to a DFA state used as
6910
   temporary variable.  */
6911
 
6912
static state_t temp_dfa_state = NULL;
6913
 
6914
/* The following variable value is DFA state after issuing the last
6915
   insn.  */
6916
 
6917
static state_t prev_cycle_state = NULL;
6918
 
6919
/* The following array element values are TRUE if the corresponding
6920
   insn requires to add stop bits before it.  */
6921
 
6922
static char *stops_p = NULL;
6923
 
6924
/* The following variable is used to set up the mentioned above array.  */
6925
 
6926
static int stop_before_p = 0;
6927
 
6928
/* The following variable value is length of the arrays `clocks' and
6929
   `add_cycles'. */
6930
 
6931
static int clocks_length;
6932
 
6933
/* The following variable value is number of data speculations in progress.  */
6934
static int pending_data_specs = 0;
6935
 
6936
/* Number of memory references on current and three future processor cycles.  */
6937
static char mem_ops_in_group[4];
6938
 
6939
/* Number of current processor cycle (from scheduler's point of view).  */
6940
static int current_cycle;
6941
 
6942
static rtx ia64_single_set (rtx);
6943
static void ia64_emit_insn_before (rtx, rtx);
6944
 
6945
/* Map a bundle number to its pseudo-op.  */
6946
 
6947
const char *
6948
get_bundle_name (int b)
6949
{
6950
  return bundle_name[b];
6951
}
6952
 
6953
 
6954
/* Return the maximum number of instructions a cpu can issue.  */
6955
 
6956
static int
6957
ia64_issue_rate (void)
6958
{
6959
  return 6;
6960
}
6961
 
6962
/* Helper function - like single_set, but look inside COND_EXEC.  */
6963
 
6964
static rtx
6965
ia64_single_set (rtx insn)
6966
{
6967
  rtx x = PATTERN (insn), ret;
6968
  if (GET_CODE (x) == COND_EXEC)
6969
    x = COND_EXEC_CODE (x);
6970
  if (GET_CODE (x) == SET)
6971
    return x;
6972
 
6973
  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6974
     Although they are not classical single set, the second set is there just
6975
     to protect it from moving past FP-relative stack accesses.  */
6976
  switch (recog_memoized (insn))
6977
    {
6978
    case CODE_FOR_prologue_allocate_stack:
6979
    case CODE_FOR_epilogue_deallocate_stack:
6980
      ret = XVECEXP (x, 0, 0);
6981
      break;
6982
 
6983
    default:
6984
      ret = single_set_2 (insn, x);
6985
      break;
6986
    }
6987
 
6988
  return ret;
6989
}
6990
 
6991
/* Adjust the cost of a scheduling dependency.
6992
   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6993
   COST is the current cost, DW is dependency weakness.  */
6994
static int
6995
ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6996
{
6997
  enum reg_note dep_type = (enum reg_note) dep_type1;
6998
  enum attr_itanium_class dep_class;
6999
  enum attr_itanium_class insn_class;
7000
 
7001
  insn_class = ia64_safe_itanium_class (insn);
7002
  dep_class = ia64_safe_itanium_class (dep_insn);
7003
 
7004
  /* Treat true memory dependencies separately.  Ignore apparent true
7005
     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7006
  if (dep_type == REG_DEP_TRUE
7007
      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7008
      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7009
    return 0;
7010
 
7011
  if (dw == MIN_DEP_WEAK)
7012
    /* Store and load are likely to alias, use higher cost to avoid stall.  */
7013
    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7014
  else if (dw > MIN_DEP_WEAK)
7015
    {
7016
      /* Store and load are less likely to alias.  */
7017
      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7018
        /* Assume there will be no cache conflict for floating-point data.
7019
           For integer data, L1 conflict penalty is huge (17 cycles), so we
7020
           never assume it will not cause a conflict.  */
7021
        return 0;
7022
      else
7023
        return cost;
7024
    }
7025
 
7026
  if (dep_type != REG_DEP_OUTPUT)
7027
    return cost;
7028
 
7029
  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7030
      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7031
    return 0;
7032
 
7033
  return cost;
7034
}
7035
 
7036
/* Like emit_insn_before, but skip cycle_display notes.
7037
   ??? When cycle display notes are implemented, update this.  */
7038
 
7039
static void
7040
ia64_emit_insn_before (rtx insn, rtx before)
7041
{
7042
  emit_insn_before (insn, before);
7043
}
7044
 
7045
/* The following function marks insns who produce addresses for load
7046
   and store insns.  Such insns will be placed into M slots because it
7047
   decrease latency time for Itanium1 (see function
7048
   `ia64_produce_address_p' and the DFA descriptions).  */
7049
 
7050
static void
7051
ia64_dependencies_evaluation_hook (rtx head, rtx tail)
7052
{
7053
  rtx insn, next, next_tail;
7054
 
7055
  /* Before reload, which_alternative is not set, which means that
7056
     ia64_safe_itanium_class will produce wrong results for (at least)
7057
     move instructions.  */
7058
  if (!reload_completed)
7059
    return;
7060
 
7061
  next_tail = NEXT_INSN (tail);
7062
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7063
    if (INSN_P (insn))
7064
      insn->call = 0;
7065
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7066
    if (INSN_P (insn)
7067
        && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7068
      {
7069
        sd_iterator_def sd_it;
7070
        dep_t dep;
7071
        bool has_mem_op_consumer_p = false;
7072
 
7073
        FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7074
          {
7075
            enum attr_itanium_class c;
7076
 
7077
            if (DEP_TYPE (dep) != REG_DEP_TRUE)
7078
              continue;
7079
 
7080
            next = DEP_CON (dep);
7081
            c = ia64_safe_itanium_class (next);
7082
            if ((c == ITANIUM_CLASS_ST
7083
                 || c == ITANIUM_CLASS_STF)
7084
                && ia64_st_address_bypass_p (insn, next))
7085
              {
7086
                has_mem_op_consumer_p = true;
7087
                break;
7088
              }
7089
            else if ((c == ITANIUM_CLASS_LD
7090
                      || c == ITANIUM_CLASS_FLD
7091
                      || c == ITANIUM_CLASS_FLDP)
7092
                     && ia64_ld_address_bypass_p (insn, next))
7093
              {
7094
                has_mem_op_consumer_p = true;
7095
                break;
7096
              }
7097
          }
7098
 
7099
        insn->call = has_mem_op_consumer_p;
7100
      }
7101
}
7102
 
7103
/* We're beginning a new block.  Initialize data structures as necessary.  */
7104
 
7105
static void
7106
ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7107
                 int sched_verbose ATTRIBUTE_UNUSED,
7108
                 int max_ready ATTRIBUTE_UNUSED)
7109
{
7110
#ifdef ENABLE_CHECKING
7111
  rtx insn;
7112
 
7113
  if (!sel_sched_p () && reload_completed)
7114
    for (insn = NEXT_INSN (current_sched_info->prev_head);
7115
         insn != current_sched_info->next_tail;
7116
         insn = NEXT_INSN (insn))
7117
      gcc_assert (!SCHED_GROUP_P (insn));
7118
#endif
7119
  last_scheduled_insn = NULL_RTX;
7120
  init_insn_group_barriers ();
7121
 
7122
  current_cycle = 0;
7123
  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7124
}
7125
 
7126
/* We're beginning a scheduling pass.  Check assertion.  */
7127
 
7128
static void
7129
ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7130
                        int sched_verbose ATTRIBUTE_UNUSED,
7131
                        int max_ready ATTRIBUTE_UNUSED)
7132
{
7133
  gcc_assert (pending_data_specs == 0);
7134
}
7135
 
7136
/* Scheduling pass is now finished.  Free/reset static variable.  */
7137
static void
7138
ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7139
                          int sched_verbose ATTRIBUTE_UNUSED)
7140
{
7141
  gcc_assert (pending_data_specs == 0);
7142
}
7143
 
7144
/* Return TRUE if INSN is a load (either normal or speculative, but not a
7145
   speculation check), FALSE otherwise.  */
7146
static bool
7147
is_load_p (rtx insn)
7148
{
7149
  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7150
 
7151
  return
7152
   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7153
    && get_attr_check_load (insn) == CHECK_LOAD_NO);
7154
}
7155
 
7156
/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7157
   (taking account for 3-cycle cache reference postponing for stores: Intel
7158
   Itanium 2 Reference Manual for Software Development and Optimization,
7159
   6.7.3.1).  */
7160
static void
7161
record_memory_reference (rtx insn)
7162
{
7163
  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7164
 
7165
  switch (insn_class) {
7166
    case ITANIUM_CLASS_FLD:
7167
    case ITANIUM_CLASS_LD:
7168
      mem_ops_in_group[current_cycle % 4]++;
7169
      break;
7170
    case ITANIUM_CLASS_STF:
7171
    case ITANIUM_CLASS_ST:
7172
      mem_ops_in_group[(current_cycle + 3) % 4]++;
7173
      break;
7174
    default:;
7175
  }
7176
}
7177
 
7178
/* We are about to being issuing insns for this clock cycle.
7179
   Override the default sort algorithm to better slot instructions.  */
7180
 
7181
static int
7182
ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
7183
                        int *pn_ready, int clock_var,
7184
                        int reorder_type)
7185
{
7186
  int n_asms;
7187
  int n_ready = *pn_ready;
7188
  rtx *e_ready = ready + n_ready;
7189
  rtx *insnp;
7190
 
7191
  if (sched_verbose)
7192
    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7193
 
7194
  if (reorder_type == 0)
7195
    {
7196
      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7197
      n_asms = 0;
7198
      for (insnp = ready; insnp < e_ready; insnp++)
7199
        if (insnp < e_ready)
7200
          {
7201
            rtx insn = *insnp;
7202
            enum attr_type t = ia64_safe_type (insn);
7203
            if (t == TYPE_UNKNOWN)
7204
              {
7205
                if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7206
                    || asm_noperands (PATTERN (insn)) >= 0)
7207
                  {
7208
                    rtx lowest = ready[n_asms];
7209
                    ready[n_asms] = insn;
7210
                    *insnp = lowest;
7211
                    n_asms++;
7212
                  }
7213
                else
7214
                  {
7215
                    rtx highest = ready[n_ready - 1];
7216
                    ready[n_ready - 1] = insn;
7217
                    *insnp = highest;
7218
                    return 1;
7219
                  }
7220
              }
7221
          }
7222
 
7223
      if (n_asms < n_ready)
7224
        {
7225
          /* Some normal insns to process.  Skip the asms.  */
7226
          ready += n_asms;
7227
          n_ready -= n_asms;
7228
        }
7229
      else if (n_ready > 0)
7230
        return 1;
7231
    }
7232
 
7233
  if (ia64_final_schedule)
7234
    {
7235
      int deleted = 0;
7236
      int nr_need_stop = 0;
7237
 
7238
      for (insnp = ready; insnp < e_ready; insnp++)
7239
        if (safe_group_barrier_needed (*insnp))
7240
          nr_need_stop++;
7241
 
7242
      if (reorder_type == 1 && n_ready == nr_need_stop)
7243
        return 0;
7244
      if (reorder_type == 0)
7245
        return 1;
7246
      insnp = e_ready;
7247
      /* Move down everything that needs a stop bit, preserving
7248
         relative order.  */
7249
      while (insnp-- > ready + deleted)
7250
        while (insnp >= ready + deleted)
7251
          {
7252
            rtx insn = *insnp;
7253
            if (! safe_group_barrier_needed (insn))
7254
              break;
7255
            memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7256
            *ready = insn;
7257
            deleted++;
7258
          }
7259
      n_ready -= deleted;
7260
      ready += deleted;
7261
    }
7262
 
7263
  current_cycle = clock_var;
7264
  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7265
    {
7266
      int moved = 0;
7267
 
7268
      insnp = e_ready;
7269
      /* Move down loads/stores, preserving relative order.  */
7270
      while (insnp-- > ready + moved)
7271
        while (insnp >= ready + moved)
7272
          {
7273
            rtx insn = *insnp;
7274
            if (! is_load_p (insn))
7275
              break;
7276
            memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7277
            *ready = insn;
7278
            moved++;
7279
          }
7280
      n_ready -= moved;
7281
      ready += moved;
7282
    }
7283
 
7284
  return 1;
7285
}
7286
 
7287
/* We are about to being issuing insns for this clock cycle.  Override
7288
   the default sort algorithm to better slot instructions.  */
7289
 
7290
static int
7291
ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7292
                    int clock_var)
7293
{
7294
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7295
                                 pn_ready, clock_var, 0);
7296
}
7297
 
7298
/* Like ia64_sched_reorder, but called after issuing each insn.
7299
   Override the default sort algorithm to better slot instructions.  */
7300
 
7301
static int
7302
ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7303
                     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7304
                     int *pn_ready, int clock_var)
7305
{
7306
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7307
                                 clock_var, 1);
7308
}
7309
 
7310
/* We are about to issue INSN.  Return the number of insns left on the
7311
   ready queue that can be issued this cycle.  */
7312
 
7313
static int
7314
ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7315
                     int sched_verbose ATTRIBUTE_UNUSED,
7316
                     rtx insn ATTRIBUTE_UNUSED,
7317
                     int can_issue_more ATTRIBUTE_UNUSED)
7318
{
7319
  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7320
    /* Modulo scheduling does not extend h_i_d when emitting
7321
       new instructions.  Don't use h_i_d, if we don't have to.  */
7322
    {
7323
      if (DONE_SPEC (insn) & BEGIN_DATA)
7324
        pending_data_specs++;
7325
      if (CHECK_SPEC (insn) & BEGIN_DATA)
7326
        pending_data_specs--;
7327
    }
7328
 
7329
  if (DEBUG_INSN_P (insn))
7330
    return 1;
7331
 
7332
  last_scheduled_insn = insn;
7333
  memcpy (prev_cycle_state, curr_state, dfa_state_size);
7334
  if (reload_completed)
7335
    {
7336
      int needed = group_barrier_needed (insn);
7337
 
7338
      gcc_assert (!needed);
7339
      if (GET_CODE (insn) == CALL_INSN)
7340
        init_insn_group_barriers ();
7341
      stops_p [INSN_UID (insn)] = stop_before_p;
7342
      stop_before_p = 0;
7343
 
7344
      record_memory_reference (insn);
7345
    }
7346
  return 1;
7347
}
7348
 
7349
/* We are choosing insn from the ready queue.  Return nonzero if INSN
7350
   can be chosen.  */
7351
 
7352
static int
7353
ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7354
{
7355
  gcc_assert (insn && INSN_P (insn));
7356
  return ((!reload_completed
7357
           || !safe_group_barrier_needed (insn))
7358
          && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7359
          && (!mflag_sched_mem_insns_hard_limit
7360
              || !is_load_p (insn)
7361
              || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7362
}
7363
 
7364
/* We are choosing insn from the ready queue.  Return nonzero if INSN
7365
   can be chosen.  */
7366
 
7367
static bool
7368
ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7369
{
7370
  gcc_assert (insn  && INSN_P (insn));
7371
  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
7372
     we keep ALAT half-empty.  */
7373
  return (pending_data_specs < 16
7374
          || !(TODO_SPEC (insn) & BEGIN_DATA));
7375
}
7376
 
7377
/* The following variable value is pseudo-insn used by the DFA insn
7378
   scheduler to change the DFA state when the simulated clock is
7379
   increased.  */
7380
 
7381
static rtx dfa_pre_cycle_insn;
7382
 
7383
/* Returns 1 when a meaningful insn was scheduled between the last group
7384
   barrier and LAST.  */
7385
static int
7386
scheduled_good_insn (rtx last)
7387
{
7388
  if (last && recog_memoized (last) >= 0)
7389
    return 1;
7390
 
7391
  for ( ;
7392
       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7393
       && !stops_p[INSN_UID (last)];
7394
       last = PREV_INSN (last))
7395
    /* We could hit a NOTE_INSN_DELETED here which is actually outside
7396
       the ebb we're scheduling.  */
7397
    if (INSN_P (last) && recog_memoized (last) >= 0)
7398
      return 1;
7399
 
7400
  return 0;
7401
}
7402
 
7403
/* We are about to being issuing INSN.  Return nonzero if we cannot
7404
   issue it on given cycle CLOCK and return zero if we should not sort
7405
   the ready queue on the next clock start.  */
7406
 
7407
static int
7408
ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7409
                    int clock, int *sort_p)
7410
{
7411
  gcc_assert (insn && INSN_P (insn));
7412
 
7413
  if (DEBUG_INSN_P (insn))
7414
    return 0;
7415
 
7416
  /* When a group barrier is needed for insn, last_scheduled_insn
7417
     should be set.  */
7418
  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7419
              || last_scheduled_insn);
7420
 
7421
  if ((reload_completed
7422
       && (safe_group_barrier_needed (insn)
7423
           || (mflag_sched_stop_bits_after_every_cycle
7424
               && last_clock != clock
7425
               && last_scheduled_insn
7426
               && scheduled_good_insn (last_scheduled_insn))))
7427
      || (last_scheduled_insn
7428
          && (GET_CODE (last_scheduled_insn) == CALL_INSN
7429
              || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7430
              || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7431
    {
7432
      init_insn_group_barriers ();
7433
 
7434
      if (verbose && dump)
7435
        fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7436
                 last_clock == clock ? " + cycle advance" : "");
7437
 
7438
      stop_before_p = 1;
7439
      current_cycle = clock;
7440
      mem_ops_in_group[current_cycle % 4] = 0;
7441
 
7442
      if (last_clock == clock)
7443
        {
7444
          state_transition (curr_state, dfa_stop_insn);
7445
          if (TARGET_EARLY_STOP_BITS)
7446
            *sort_p = (last_scheduled_insn == NULL_RTX
7447
                       || GET_CODE (last_scheduled_insn) != CALL_INSN);
7448
          else
7449
            *sort_p = 0;
7450
          return 1;
7451
        }
7452
 
7453
      if (last_scheduled_insn)
7454
        {
7455
          if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7456
              || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7457
            state_reset (curr_state);
7458
          else
7459
            {
7460
              memcpy (curr_state, prev_cycle_state, dfa_state_size);
7461
              state_transition (curr_state, dfa_stop_insn);
7462
              state_transition (curr_state, dfa_pre_cycle_insn);
7463
              state_transition (curr_state, NULL);
7464
            }
7465
        }
7466
    }
7467
  return 0;
7468
}
7469
 
7470
/* Implement targetm.sched.h_i_d_extended hook.
7471
   Extend internal data structures.  */
7472
static void
7473
ia64_h_i_d_extended (void)
7474
{
7475
  if (stops_p != NULL)
7476
    {
7477
      int new_clocks_length = get_max_uid () * 3 / 2;
7478
      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7479
      clocks_length = new_clocks_length;
7480
    }
7481
}
7482
 
7483
 
7484
/* This structure describes the data used by the backend to guide scheduling.
7485
   When the current scheduling point is switched, this data should be saved
7486
   and restored later, if the scheduler returns to this point.  */
7487
struct _ia64_sched_context
7488
{
7489
  state_t prev_cycle_state;
7490
  rtx last_scheduled_insn;
7491
  struct reg_write_state rws_sum[NUM_REGS];
7492
  struct reg_write_state rws_insn[NUM_REGS];
7493
  int first_instruction;
7494
  int pending_data_specs;
7495
  int current_cycle;
7496
  char mem_ops_in_group[4];
7497
};
7498
typedef struct _ia64_sched_context *ia64_sched_context_t;
7499
 
7500
/* Allocates a scheduling context.  */
7501
static void *
7502
ia64_alloc_sched_context (void)
7503
{
7504
  return xmalloc (sizeof (struct _ia64_sched_context));
7505
}
7506
 
7507
/* Initializes the _SC context with clean data, if CLEAN_P, and from
7508
   the global context otherwise.  */
7509
static void
7510
ia64_init_sched_context (void *_sc, bool clean_p)
7511
{
7512
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7513
 
7514
  sc->prev_cycle_state = xmalloc (dfa_state_size);
7515
  if (clean_p)
7516
    {
7517
      state_reset (sc->prev_cycle_state);
7518
      sc->last_scheduled_insn = NULL_RTX;
7519
      memset (sc->rws_sum, 0, sizeof (rws_sum));
7520
      memset (sc->rws_insn, 0, sizeof (rws_insn));
7521
      sc->first_instruction = 1;
7522
      sc->pending_data_specs = 0;
7523
      sc->current_cycle = 0;
7524
      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7525
    }
7526
  else
7527
    {
7528
      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7529
      sc->last_scheduled_insn = last_scheduled_insn;
7530
      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7531
      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7532
      sc->first_instruction = first_instruction;
7533
      sc->pending_data_specs = pending_data_specs;
7534
      sc->current_cycle = current_cycle;
7535
      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7536
    }
7537
}
7538
 
7539
/* Sets the global scheduling context to the one pointed to by _SC.  */
7540
static void
7541
ia64_set_sched_context (void *_sc)
7542
{
7543
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7544
 
7545
  gcc_assert (sc != NULL);
7546
 
7547
  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7548
  last_scheduled_insn = sc->last_scheduled_insn;
7549
  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7550
  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7551
  first_instruction = sc->first_instruction;
7552
  pending_data_specs = sc->pending_data_specs;
7553
  current_cycle = sc->current_cycle;
7554
  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7555
}
7556
 
7557
/* Clears the data in the _SC scheduling context.  */
7558
static void
7559
ia64_clear_sched_context (void *_sc)
7560
{
7561
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7562
 
7563
  free (sc->prev_cycle_state);
7564
  sc->prev_cycle_state = NULL;
7565
}
7566
 
7567
/* Frees the _SC scheduling context.  */
7568
static void
7569
ia64_free_sched_context (void *_sc)
7570
{
7571
  gcc_assert (_sc != NULL);
7572
 
7573
  free (_sc);
7574
}
7575
 
7576
typedef rtx (* gen_func_t) (rtx, rtx);
7577
 
7578
/* Return a function that will generate a load of mode MODE_NO
7579
   with speculation types TS.  */
7580
static gen_func_t
7581
get_spec_load_gen_function (ds_t ts, int mode_no)
7582
{
7583
  static gen_func_t gen_ld_[] = {
7584
    gen_movbi,
7585
    gen_movqi_internal,
7586
    gen_movhi_internal,
7587
    gen_movsi_internal,
7588
    gen_movdi_internal,
7589
    gen_movsf_internal,
7590
    gen_movdf_internal,
7591
    gen_movxf_internal,
7592
    gen_movti_internal,
7593
    gen_zero_extendqidi2,
7594
    gen_zero_extendhidi2,
7595
    gen_zero_extendsidi2,
7596
  };
7597
 
7598
  static gen_func_t gen_ld_a[] = {
7599
    gen_movbi_advanced,
7600
    gen_movqi_advanced,
7601
    gen_movhi_advanced,
7602
    gen_movsi_advanced,
7603
    gen_movdi_advanced,
7604
    gen_movsf_advanced,
7605
    gen_movdf_advanced,
7606
    gen_movxf_advanced,
7607
    gen_movti_advanced,
7608
    gen_zero_extendqidi2_advanced,
7609
    gen_zero_extendhidi2_advanced,
7610
    gen_zero_extendsidi2_advanced,
7611
  };
7612
  static gen_func_t gen_ld_s[] = {
7613
    gen_movbi_speculative,
7614
    gen_movqi_speculative,
7615
    gen_movhi_speculative,
7616
    gen_movsi_speculative,
7617
    gen_movdi_speculative,
7618
    gen_movsf_speculative,
7619
    gen_movdf_speculative,
7620
    gen_movxf_speculative,
7621
    gen_movti_speculative,
7622
    gen_zero_extendqidi2_speculative,
7623
    gen_zero_extendhidi2_speculative,
7624
    gen_zero_extendsidi2_speculative,
7625
  };
7626
  static gen_func_t gen_ld_sa[] = {
7627
    gen_movbi_speculative_advanced,
7628
    gen_movqi_speculative_advanced,
7629
    gen_movhi_speculative_advanced,
7630
    gen_movsi_speculative_advanced,
7631
    gen_movdi_speculative_advanced,
7632
    gen_movsf_speculative_advanced,
7633
    gen_movdf_speculative_advanced,
7634
    gen_movxf_speculative_advanced,
7635
    gen_movti_speculative_advanced,
7636
    gen_zero_extendqidi2_speculative_advanced,
7637
    gen_zero_extendhidi2_speculative_advanced,
7638
    gen_zero_extendsidi2_speculative_advanced,
7639
  };
7640
  static gen_func_t gen_ld_s_a[] = {
7641
    gen_movbi_speculative_a,
7642
    gen_movqi_speculative_a,
7643
    gen_movhi_speculative_a,
7644
    gen_movsi_speculative_a,
7645
    gen_movdi_speculative_a,
7646
    gen_movsf_speculative_a,
7647
    gen_movdf_speculative_a,
7648
    gen_movxf_speculative_a,
7649
    gen_movti_speculative_a,
7650
    gen_zero_extendqidi2_speculative_a,
7651
    gen_zero_extendhidi2_speculative_a,
7652
    gen_zero_extendsidi2_speculative_a,
7653
  };
7654
 
7655
  gen_func_t *gen_ld;
7656
 
7657
  if (ts & BEGIN_DATA)
7658
    {
7659
      if (ts & BEGIN_CONTROL)
7660
        gen_ld = gen_ld_sa;
7661
      else
7662
        gen_ld = gen_ld_a;
7663
    }
7664
  else if (ts & BEGIN_CONTROL)
7665
    {
7666
      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7667
          || ia64_needs_block_p (ts))
7668
        gen_ld = gen_ld_s;
7669
      else
7670
        gen_ld = gen_ld_s_a;
7671
    }
7672
  else if (ts == 0)
7673
    gen_ld = gen_ld_;
7674
  else
7675
    gcc_unreachable ();
7676
 
7677
  return gen_ld[mode_no];
7678
}
7679
 
7680
/* Constants that help mapping 'enum machine_mode' to int.  */
7681
enum SPEC_MODES
7682
  {
7683
    SPEC_MODE_INVALID = -1,
7684
    SPEC_MODE_FIRST = 0,
7685
    SPEC_MODE_FOR_EXTEND_FIRST = 1,
7686
    SPEC_MODE_FOR_EXTEND_LAST = 3,
7687
    SPEC_MODE_LAST = 8
7688
  };
7689
 
7690
enum
7691
  {
7692
    /* Offset to reach ZERO_EXTEND patterns.  */
7693
    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7694
  };
7695
 
7696
/* Return index of the MODE.  */
7697
static int
7698
ia64_mode_to_int (enum machine_mode mode)
7699
{
7700
  switch (mode)
7701
    {
7702
    case BImode: return 0; /* SPEC_MODE_FIRST  */
7703
    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7704
    case HImode: return 2;
7705
    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7706
    case DImode: return 4;
7707
    case SFmode: return 5;
7708
    case DFmode: return 6;
7709
    case XFmode: return 7;
7710
    case TImode:
7711
      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7712
         mentioned in itanium[12].md.  Predicate fp_register_operand also
7713
         needs to be defined.  Bottom line: better disable for now.  */
7714
      return SPEC_MODE_INVALID;
7715
    default:     return SPEC_MODE_INVALID;
7716
    }
7717
}
7718
 
7719
/* Provide information about speculation capabilities.  */
7720
static void
7721
ia64_set_sched_flags (spec_info_t spec_info)
7722
{
7723
  unsigned int *flags = &(current_sched_info->flags);
7724
 
7725
  if (*flags & SCHED_RGN
7726
      || *flags & SCHED_EBB
7727
      || *flags & SEL_SCHED)
7728
    {
7729
      int mask = 0;
7730
 
7731
      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7732
          || (mflag_sched_ar_data_spec && reload_completed))
7733
        {
7734
          mask |= BEGIN_DATA;
7735
 
7736
          if (!sel_sched_p ()
7737
              && ((mflag_sched_br_in_data_spec && !reload_completed)
7738
                  || (mflag_sched_ar_in_data_spec && reload_completed)))
7739
            mask |= BE_IN_DATA;
7740
        }
7741
 
7742
      if (mflag_sched_control_spec
7743
          && (!sel_sched_p ()
7744
              || reload_completed))
7745
        {
7746
          mask |= BEGIN_CONTROL;
7747
 
7748
          if (!sel_sched_p () && mflag_sched_in_control_spec)
7749
            mask |= BE_IN_CONTROL;
7750
        }
7751
 
7752
      spec_info->mask = mask;
7753
 
7754
      if (mask)
7755
        {
7756
          *flags |= USE_DEPS_LIST | DO_SPECULATION;
7757
 
7758
          if (mask & BE_IN_SPEC)
7759
            *flags |= NEW_BBS;
7760
 
7761
          spec_info->flags = 0;
7762
 
7763
          if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7764
            spec_info->flags |= PREFER_NON_DATA_SPEC;
7765
 
7766
          if (mask & CONTROL_SPEC)
7767
            {
7768
              if (mflag_sched_prefer_non_control_spec_insns)
7769
                spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7770
 
7771
              if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7772
                spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7773
            }
7774
 
7775
          if (sched_verbose >= 1)
7776
            spec_info->dump = sched_dump;
7777
          else
7778
            spec_info->dump = 0;
7779
 
7780
          if (mflag_sched_count_spec_in_critical_path)
7781
            spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7782
        }
7783
    }
7784
  else
7785
    spec_info->mask = 0;
7786
}
7787
 
7788
/* If INSN is an appropriate load return its mode.
7789
   Return -1 otherwise.  */
7790
static int
7791
get_mode_no_for_insn (rtx insn)
7792
{
7793
  rtx reg, mem, mode_rtx;
7794
  int mode_no;
7795
  bool extend_p;
7796
 
7797
  extract_insn_cached (insn);
7798
 
7799
  /* We use WHICH_ALTERNATIVE only after reload.  This will
7800
     guarantee that reload won't touch a speculative insn.  */
7801
 
7802
  if (recog_data.n_operands != 2)
7803
    return -1;
7804
 
7805
  reg = recog_data.operand[0];
7806
  mem = recog_data.operand[1];
7807
 
7808
  /* We should use MEM's mode since REG's mode in presence of
7809
     ZERO_EXTEND will always be DImode.  */
7810
  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7811
    /* Process non-speculative ld.  */
7812
    {
7813
      if (!reload_completed)
7814
        {
7815
          /* Do not speculate into regs like ar.lc.  */
7816
          if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7817
            return -1;
7818
 
7819
          if (!MEM_P (mem))
7820
            return -1;
7821
 
7822
          {
7823
            rtx mem_reg = XEXP (mem, 0);
7824
 
7825
            if (!REG_P (mem_reg))
7826
              return -1;
7827
          }
7828
 
7829
          mode_rtx = mem;
7830
        }
7831
      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7832
        {
7833
          gcc_assert (REG_P (reg) && MEM_P (mem));
7834
          mode_rtx = mem;
7835
        }
7836
      else
7837
        return -1;
7838
    }
7839
  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7840
           || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7841
           || get_attr_check_load (insn) == CHECK_LOAD_YES)
7842
    /* Process speculative ld or ld.c.  */
7843
    {
7844
      gcc_assert (REG_P (reg) && MEM_P (mem));
7845
      mode_rtx = mem;
7846
    }
7847
  else
7848
    {
7849
      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7850
 
7851
      if (attr_class == ITANIUM_CLASS_CHK_A
7852
          || attr_class == ITANIUM_CLASS_CHK_S_I
7853
          || attr_class == ITANIUM_CLASS_CHK_S_F)
7854
        /* Process chk.  */
7855
        mode_rtx = reg;
7856
      else
7857
        return -1;
7858
    }
7859
 
7860
  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7861
 
7862
  if (mode_no == SPEC_MODE_INVALID)
7863
    return -1;
7864
 
7865
  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7866
 
7867
  if (extend_p)
7868
    {
7869
      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7870
            && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7871
        return -1;
7872
 
7873
      mode_no += SPEC_GEN_EXTEND_OFFSET;
7874
    }
7875
 
7876
  return mode_no;
7877
}
7878
 
7879
/* If X is an unspec part of a speculative load, return its code.
7880
   Return -1 otherwise.  */
7881
static int
7882
get_spec_unspec_code (const_rtx x)
7883
{
7884
  if (GET_CODE (x) != UNSPEC)
7885
    return -1;
7886
 
7887
  {
7888
    int code;
7889
 
7890
    code = XINT (x, 1);
7891
 
7892
    switch (code)
7893
      {
7894
      case UNSPEC_LDA:
7895
      case UNSPEC_LDS:
7896
      case UNSPEC_LDS_A:
7897
      case UNSPEC_LDSA:
7898
        return code;
7899
 
7900
      default:
7901
        return -1;
7902
      }
7903
  }
7904
}
7905
 
7906
/* Implement skip_rtx_p hook.  */
7907
static bool
7908
ia64_skip_rtx_p (const_rtx x)
7909
{
7910
  return get_spec_unspec_code (x) != -1;
7911
}
7912
 
7913
/* If INSN is a speculative load, return its UNSPEC code.
7914
   Return -1 otherwise.  */
7915
static int
7916
get_insn_spec_code (const_rtx insn)
7917
{
7918
  rtx pat, reg, mem;
7919
 
7920
  pat = PATTERN (insn);
7921
 
7922
  if (GET_CODE (pat) == COND_EXEC)
7923
    pat = COND_EXEC_CODE (pat);
7924
 
7925
  if (GET_CODE (pat) != SET)
7926
    return -1;
7927
 
7928
  reg = SET_DEST (pat);
7929
  if (!REG_P (reg))
7930
    return -1;
7931
 
7932
  mem = SET_SRC (pat);
7933
  if (GET_CODE (mem) == ZERO_EXTEND)
7934
    mem = XEXP (mem, 0);
7935
 
7936
  return get_spec_unspec_code (mem);
7937
}
7938
 
7939
/* If INSN is a speculative load, return a ds with the speculation types.
7940
   Otherwise [if INSN is a normal instruction] return 0.  */
7941
static ds_t
7942
ia64_get_insn_spec_ds (rtx insn)
7943
{
7944
  int code = get_insn_spec_code (insn);
7945
 
7946
  switch (code)
7947
    {
7948
    case UNSPEC_LDA:
7949
      return BEGIN_DATA;
7950
 
7951
    case UNSPEC_LDS:
7952
    case UNSPEC_LDS_A:
7953
      return BEGIN_CONTROL;
7954
 
7955
    case UNSPEC_LDSA:
7956
      return BEGIN_DATA | BEGIN_CONTROL;
7957
 
7958
    default:
7959
      return 0;
7960
    }
7961
}
7962
 
7963
/* If INSN is a speculative load return a ds with the speculation types that
7964
   will be checked.
7965
   Otherwise [if INSN is a normal instruction] return 0.  */
7966
static ds_t
7967
ia64_get_insn_checked_ds (rtx insn)
7968
{
7969
  int code = get_insn_spec_code (insn);
7970
 
7971
  switch (code)
7972
    {
7973
    case UNSPEC_LDA:
7974
      return BEGIN_DATA | BEGIN_CONTROL;
7975
 
7976
    case UNSPEC_LDS:
7977
      return BEGIN_CONTROL;
7978
 
7979
    case UNSPEC_LDS_A:
7980
    case UNSPEC_LDSA:
7981
      return BEGIN_DATA | BEGIN_CONTROL;
7982
 
7983
    default:
7984
      return 0;
7985
    }
7986
}
7987
 
7988
/* If GEN_P is true, calculate the index of needed speculation check and return
7989
   speculative pattern for INSN with speculative mode TS, machine mode
7990
   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7991
   If GEN_P is false, just calculate the index of needed speculation check.  */
7992
static rtx
7993
ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7994
{
7995
  rtx pat, new_pat;
7996
  gen_func_t gen_load;
7997
 
7998
  gen_load = get_spec_load_gen_function (ts, mode_no);
7999
 
8000
  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8001
                      copy_rtx (recog_data.operand[1]));
8002
 
8003
  pat = PATTERN (insn);
8004
  if (GET_CODE (pat) == COND_EXEC)
8005
    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8006
                                 new_pat);
8007
 
8008
  return new_pat;
8009
}
8010
 
8011
static bool
8012
insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8013
                              ds_t ds ATTRIBUTE_UNUSED)
8014
{
8015
  return false;
8016
}
8017
 
8018
/* Implement targetm.sched.speculate_insn hook.
8019
   Check if the INSN can be TS speculative.
8020
   If 'no' - return -1.
8021
   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8022
   If current pattern of the INSN already provides TS speculation,
8023
   return 0.  */
8024
static int
8025
ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
8026
{
8027
  int mode_no;
8028
  int res;
8029
 
8030
  gcc_assert (!(ts & ~SPECULATIVE));
8031
 
8032
  if (ia64_spec_check_p (insn))
8033
    return -1;
8034
 
8035
  if ((ts & BE_IN_SPEC)
8036
      && !insn_can_be_in_speculative_p (insn, ts))
8037
    return -1;
8038
 
8039
  mode_no = get_mode_no_for_insn (insn);
8040
 
8041
  if (mode_no != SPEC_MODE_INVALID)
8042
    {
8043
      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8044
        res = 0;
8045
      else
8046
        {
8047
          res = 1;
8048
          *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8049
        }
8050
    }
8051
  else
8052
    res = -1;
8053
 
8054
  return res;
8055
}
8056
 
8057
/* Return a function that will generate a check for speculation TS with mode
8058
   MODE_NO.
8059
   If simple check is needed, pass true for SIMPLE_CHECK_P.
8060
   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8061
static gen_func_t
8062
get_spec_check_gen_function (ds_t ts, int mode_no,
8063
                             bool simple_check_p, bool clearing_check_p)
8064
{
8065
  static gen_func_t gen_ld_c_clr[] = {
8066
    gen_movbi_clr,
8067
    gen_movqi_clr,
8068
    gen_movhi_clr,
8069
    gen_movsi_clr,
8070
    gen_movdi_clr,
8071
    gen_movsf_clr,
8072
    gen_movdf_clr,
8073
    gen_movxf_clr,
8074
    gen_movti_clr,
8075
    gen_zero_extendqidi2_clr,
8076
    gen_zero_extendhidi2_clr,
8077
    gen_zero_extendsidi2_clr,
8078
  };
8079
  static gen_func_t gen_ld_c_nc[] = {
8080
    gen_movbi_nc,
8081
    gen_movqi_nc,
8082
    gen_movhi_nc,
8083
    gen_movsi_nc,
8084
    gen_movdi_nc,
8085
    gen_movsf_nc,
8086
    gen_movdf_nc,
8087
    gen_movxf_nc,
8088
    gen_movti_nc,
8089
    gen_zero_extendqidi2_nc,
8090
    gen_zero_extendhidi2_nc,
8091
    gen_zero_extendsidi2_nc,
8092
  };
8093
  static gen_func_t gen_chk_a_clr[] = {
8094
    gen_advanced_load_check_clr_bi,
8095
    gen_advanced_load_check_clr_qi,
8096
    gen_advanced_load_check_clr_hi,
8097
    gen_advanced_load_check_clr_si,
8098
    gen_advanced_load_check_clr_di,
8099
    gen_advanced_load_check_clr_sf,
8100
    gen_advanced_load_check_clr_df,
8101
    gen_advanced_load_check_clr_xf,
8102
    gen_advanced_load_check_clr_ti,
8103
    gen_advanced_load_check_clr_di,
8104
    gen_advanced_load_check_clr_di,
8105
    gen_advanced_load_check_clr_di,
8106
  };
8107
  static gen_func_t gen_chk_a_nc[] = {
8108
    gen_advanced_load_check_nc_bi,
8109
    gen_advanced_load_check_nc_qi,
8110
    gen_advanced_load_check_nc_hi,
8111
    gen_advanced_load_check_nc_si,
8112
    gen_advanced_load_check_nc_di,
8113
    gen_advanced_load_check_nc_sf,
8114
    gen_advanced_load_check_nc_df,
8115
    gen_advanced_load_check_nc_xf,
8116
    gen_advanced_load_check_nc_ti,
8117
    gen_advanced_load_check_nc_di,
8118
    gen_advanced_load_check_nc_di,
8119
    gen_advanced_load_check_nc_di,
8120
  };
8121
  static gen_func_t gen_chk_s[] = {
8122
    gen_speculation_check_bi,
8123
    gen_speculation_check_qi,
8124
    gen_speculation_check_hi,
8125
    gen_speculation_check_si,
8126
    gen_speculation_check_di,
8127
    gen_speculation_check_sf,
8128
    gen_speculation_check_df,
8129
    gen_speculation_check_xf,
8130
    gen_speculation_check_ti,
8131
    gen_speculation_check_di,
8132
    gen_speculation_check_di,
8133
    gen_speculation_check_di,
8134
  };
8135
 
8136
  gen_func_t *gen_check;
8137
 
8138
  if (ts & BEGIN_DATA)
8139
    {
8140
      /* We don't need recovery because even if this is ld.sa
8141
         ALAT entry will be allocated only if NAT bit is set to zero.
8142
         So it is enough to use ld.c here.  */
8143
 
8144
      if (simple_check_p)
8145
        {
8146
          gcc_assert (mflag_sched_spec_ldc);
8147
 
8148
          if (clearing_check_p)
8149
            gen_check = gen_ld_c_clr;
8150
          else
8151
            gen_check = gen_ld_c_nc;
8152
        }
8153
      else
8154
        {
8155
          if (clearing_check_p)
8156
            gen_check = gen_chk_a_clr;
8157
          else
8158
            gen_check = gen_chk_a_nc;
8159
        }
8160
    }
8161
  else if (ts & BEGIN_CONTROL)
8162
    {
8163
      if (simple_check_p)
8164
        /* We might want to use ld.sa -> ld.c instead of
8165
           ld.s -> chk.s.  */
8166
        {
8167
          gcc_assert (!ia64_needs_block_p (ts));
8168
 
8169
          if (clearing_check_p)
8170
            gen_check = gen_ld_c_clr;
8171
          else
8172
            gen_check = gen_ld_c_nc;
8173
        }
8174
      else
8175
        {
8176
          gen_check = gen_chk_s;
8177
        }
8178
    }
8179
  else
8180
    gcc_unreachable ();
8181
 
8182
  gcc_assert (mode_no >= 0);
8183
  return gen_check[mode_no];
8184
}
8185
 
8186
/* Return nonzero, if INSN needs branchy recovery check.  */
8187
static bool
8188
ia64_needs_block_p (ds_t ts)
8189
{
8190
  if (ts & BEGIN_DATA)
8191
    return !mflag_sched_spec_ldc;
8192
 
8193
  gcc_assert ((ts & BEGIN_CONTROL) != 0);
8194
 
8195
  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8196
}
8197
 
8198
/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
8199
   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
8200
   Otherwise, generate a simple check.  */
8201
static rtx
8202
ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
8203
{
8204
  rtx op1, pat, check_pat;
8205
  gen_func_t gen_check;
8206
  int mode_no;
8207
 
8208
  mode_no = get_mode_no_for_insn (insn);
8209
  gcc_assert (mode_no >= 0);
8210
 
8211
  if (label)
8212
    op1 = label;
8213
  else
8214
    {
8215
      gcc_assert (!ia64_needs_block_p (ds));
8216
      op1 = copy_rtx (recog_data.operand[1]);
8217
    }
8218
 
8219
  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8220
                                           true);
8221
 
8222
  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8223
 
8224
  pat = PATTERN (insn);
8225
  if (GET_CODE (pat) == COND_EXEC)
8226
    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8227
                                   check_pat);
8228
 
8229
  return check_pat;
8230
}
8231
 
8232
/* Return nonzero, if X is branchy recovery check.  */
8233
static int
8234
ia64_spec_check_p (rtx x)
8235
{
8236
  x = PATTERN (x);
8237
  if (GET_CODE (x) == COND_EXEC)
8238
    x = COND_EXEC_CODE (x);
8239
  if (GET_CODE (x) == SET)
8240
    return ia64_spec_check_src_p (SET_SRC (x));
8241
  return 0;
8242
}
8243
 
8244
/* Return nonzero, if SRC belongs to recovery check.  */
8245
static int
8246
ia64_spec_check_src_p (rtx src)
8247
{
8248
  if (GET_CODE (src) == IF_THEN_ELSE)
8249
    {
8250
      rtx t;
8251
 
8252
      t = XEXP (src, 0);
8253
      if (GET_CODE (t) == NE)
8254
        {
8255
          t = XEXP (t, 0);
8256
 
8257
          if (GET_CODE (t) == UNSPEC)
8258
            {
8259
              int code;
8260
 
8261
              code = XINT (t, 1);
8262
 
8263
              if (code == UNSPEC_LDCCLR
8264
                  || code == UNSPEC_LDCNC
8265
                  || code == UNSPEC_CHKACLR
8266
                  || code == UNSPEC_CHKANC
8267
                  || code == UNSPEC_CHKS)
8268
                {
8269
                  gcc_assert (code != 0);
8270
                  return code;
8271
                }
8272
            }
8273
        }
8274
    }
8275
  return 0;
8276
}
8277
 
8278
 
8279
/* The following page contains abstract data `bundle states' which are
8280
   used for bundling insns (inserting nops and template generation).  */
8281
 
8282
/* The following describes state of insn bundling.  */
8283
 
8284
struct bundle_state
8285
{
8286
  /* Unique bundle state number to identify them in the debugging
8287
     output  */
8288
  int unique_num;
8289
  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
8290
  /* number nops before and after the insn  */
8291
  short before_nops_num, after_nops_num;
8292
  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8293
                   insn */
8294
  int cost;     /* cost of the state in cycles */
8295
  int accumulated_insns_num; /* number of all previous insns including
8296
                                nops.  L is considered as 2 insns */
8297
  int branch_deviation; /* deviation of previous branches from 3rd slots  */
8298
  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8299
  struct bundle_state *next;  /* next state with the same insn_num  */
8300
  struct bundle_state *originator; /* originator (previous insn state)  */
8301
  /* All bundle states are in the following chain.  */
8302
  struct bundle_state *allocated_states_chain;
8303
  /* The DFA State after issuing the insn and the nops.  */
8304
  state_t dfa_state;
8305
};
8306
 
8307
/* The following is map insn number to the corresponding bundle state.  */
8308
 
8309
static struct bundle_state **index_to_bundle_states;
8310
 
8311
/* The unique number of next bundle state.  */
8312
 
8313
static int bundle_states_num;
8314
 
8315
/* All allocated bundle states are in the following chain.  */
8316
 
8317
static struct bundle_state *allocated_bundle_states_chain;
8318
 
8319
/* All allocated but not used bundle states are in the following
8320
   chain.  */
8321
 
8322
static struct bundle_state *free_bundle_state_chain;
8323
 
8324
 
8325
/* The following function returns a free bundle state.  */
8326
 
8327
static struct bundle_state *
8328
get_free_bundle_state (void)
8329
{
8330
  struct bundle_state *result;
8331
 
8332
  if (free_bundle_state_chain != NULL)
8333
    {
8334
      result = free_bundle_state_chain;
8335
      free_bundle_state_chain = result->next;
8336
    }
8337
  else
8338
    {
8339
      result = XNEW (struct bundle_state);
8340
      result->dfa_state = xmalloc (dfa_state_size);
8341
      result->allocated_states_chain = allocated_bundle_states_chain;
8342
      allocated_bundle_states_chain = result;
8343
    }
8344
  result->unique_num = bundle_states_num++;
8345
  return result;
8346
 
8347
}
8348
 
8349
/* The following function frees given bundle state.  */
8350
 
8351
static void
8352
free_bundle_state (struct bundle_state *state)
8353
{
8354
  state->next = free_bundle_state_chain;
8355
  free_bundle_state_chain = state;
8356
}
8357
 
8358
/* Start work with abstract data `bundle states'.  */
8359
 
8360
static void
8361
initiate_bundle_states (void)
8362
{
8363
  bundle_states_num = 0;
8364
  free_bundle_state_chain = NULL;
8365
  allocated_bundle_states_chain = NULL;
8366
}
8367
 
8368
/* Finish work with abstract data `bundle states'.  */
8369
 
8370
static void
8371
finish_bundle_states (void)
8372
{
8373
  struct bundle_state *curr_state, *next_state;
8374
 
8375
  for (curr_state = allocated_bundle_states_chain;
8376
       curr_state != NULL;
8377
       curr_state = next_state)
8378
    {
8379
      next_state = curr_state->allocated_states_chain;
8380
      free (curr_state->dfa_state);
8381
      free (curr_state);
8382
    }
8383
}
8384
 
8385
/* Hash table of the bundle states.  The key is dfa_state and insn_num
8386
   of the bundle states.  */
8387
 
8388
static htab_t bundle_state_table;
8389
 
8390
/* The function returns hash of BUNDLE_STATE.  */
8391
 
8392
static unsigned
8393
bundle_state_hash (const void *bundle_state)
8394
{
8395
  const struct bundle_state *const state
8396
    = (const struct bundle_state *) bundle_state;
8397
  unsigned result, i;
8398
 
8399
  for (result = i = 0; i < dfa_state_size; i++)
8400
    result += (((unsigned char *) state->dfa_state) [i]
8401
               << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8402
  return result + state->insn_num;
8403
}
8404
 
8405
/* The function returns nonzero if the bundle state keys are equal.  */
8406
 
8407
static int
8408
bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8409
{
8410
  const struct bundle_state *const state1
8411
    = (const struct bundle_state *) bundle_state_1;
8412
  const struct bundle_state *const state2
8413
    = (const struct bundle_state *) bundle_state_2;
8414
 
8415
  return (state1->insn_num == state2->insn_num
8416
          && memcmp (state1->dfa_state, state2->dfa_state,
8417
                     dfa_state_size) == 0);
8418
}
8419
 
8420
/* The function inserts the BUNDLE_STATE into the hash table.  The
8421
   function returns nonzero if the bundle has been inserted into the
8422
   table.  The table contains the best bundle state with given key.  */
8423
 
8424
static int
8425
insert_bundle_state (struct bundle_state *bundle_state)
8426
{
8427
  void **entry_ptr;
8428
 
8429
  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8430
  if (*entry_ptr == NULL)
8431
    {
8432
      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8433
      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8434
      *entry_ptr = (void *) bundle_state;
8435
      return TRUE;
8436
    }
8437
  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8438
           || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8439
               && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8440
                   > bundle_state->accumulated_insns_num
8441
                   || (((struct bundle_state *)
8442
                        *entry_ptr)->accumulated_insns_num
8443
                       == bundle_state->accumulated_insns_num
8444
                       && (((struct bundle_state *)
8445
                            *entry_ptr)->branch_deviation
8446
                           > bundle_state->branch_deviation
8447
                           || (((struct bundle_state *)
8448
                                *entry_ptr)->branch_deviation
8449
                               == bundle_state->branch_deviation
8450
                               && ((struct bundle_state *)
8451
                                   *entry_ptr)->middle_bundle_stops
8452
                               > bundle_state->middle_bundle_stops))))))
8453
 
8454
    {
8455
      struct bundle_state temp;
8456
 
8457
      temp = *(struct bundle_state *) *entry_ptr;
8458
      *(struct bundle_state *) *entry_ptr = *bundle_state;
8459
      ((struct bundle_state *) *entry_ptr)->next = temp.next;
8460
      *bundle_state = temp;
8461
    }
8462
  return FALSE;
8463
}
8464
 
8465
/* Start work with the hash table.  */
8466
 
8467
static void
8468
initiate_bundle_state_table (void)
8469
{
8470
  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8471
                                    (htab_del) 0);
8472
}
8473
 
8474
/* Finish work with the hash table.  */
8475
 
8476
static void
8477
finish_bundle_state_table (void)
8478
{
8479
  htab_delete (bundle_state_table);
8480
}
8481
 
8482
 
8483
 
8484
/* The following variable is a insn `nop' used to check bundle states
8485
   with different number of inserted nops.  */
8486
 
8487
static rtx ia64_nop;
8488
 
8489
/* The following function tries to issue NOPS_NUM nops for the current
8490
   state without advancing processor cycle.  If it failed, the
8491
   function returns FALSE and frees the current state.  */
8492
 
8493
static int
8494
try_issue_nops (struct bundle_state *curr_state, int nops_num)
8495
{
8496
  int i;
8497
 
8498
  for (i = 0; i < nops_num; i++)
8499
    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8500
      {
8501
        free_bundle_state (curr_state);
8502
        return FALSE;
8503
      }
8504
  return TRUE;
8505
}
8506
 
8507
/* The following function tries to issue INSN for the current
8508
   state without advancing processor cycle.  If it failed, the
8509
   function returns FALSE and frees the current state.  */
8510
 
8511
static int
8512
try_issue_insn (struct bundle_state *curr_state, rtx insn)
8513
{
8514
  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8515
    {
8516
      free_bundle_state (curr_state);
8517
      return FALSE;
8518
    }
8519
  return TRUE;
8520
}
8521
 
8522
/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8523
   starting with ORIGINATOR without advancing processor cycle.  If
8524
   TRY_BUNDLE_END_P is TRUE, the function also/only (if
8525
   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8526
   If it was successful, the function creates new bundle state and
8527
   insert into the hash table and into `index_to_bundle_states'.  */
8528
 
8529
static void
8530
issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8531
                     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8532
{
8533
  struct bundle_state *curr_state;
8534
 
8535
  curr_state = get_free_bundle_state ();
8536
  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8537
  curr_state->insn = insn;
8538
  curr_state->insn_num = originator->insn_num + 1;
8539
  curr_state->cost = originator->cost;
8540
  curr_state->originator = originator;
8541
  curr_state->before_nops_num = before_nops_num;
8542
  curr_state->after_nops_num = 0;
8543
  curr_state->accumulated_insns_num
8544
    = originator->accumulated_insns_num + before_nops_num;
8545
  curr_state->branch_deviation = originator->branch_deviation;
8546
  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8547
  gcc_assert (insn);
8548
  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8549
    {
8550
      gcc_assert (GET_MODE (insn) != TImode);
8551
      if (!try_issue_nops (curr_state, before_nops_num))
8552
        return;
8553
      if (!try_issue_insn (curr_state, insn))
8554
        return;
8555
      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8556
      if (curr_state->accumulated_insns_num % 3 != 0)
8557
        curr_state->middle_bundle_stops++;
8558
      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8559
          && curr_state->accumulated_insns_num % 3 != 0)
8560
        {
8561
          free_bundle_state (curr_state);
8562
          return;
8563
        }
8564
    }
8565
  else if (GET_MODE (insn) != TImode)
8566
    {
8567
      if (!try_issue_nops (curr_state, before_nops_num))
8568
        return;
8569
      if (!try_issue_insn (curr_state, insn))
8570
        return;
8571
      curr_state->accumulated_insns_num++;
8572
      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8573
                  && asm_noperands (PATTERN (insn)) < 0);
8574
 
8575
      if (ia64_safe_type (insn) == TYPE_L)
8576
        curr_state->accumulated_insns_num++;
8577
    }
8578
  else
8579
    {
8580
      /* If this is an insn that must be first in a group, then don't allow
8581
         nops to be emitted before it.  Currently, alloc is the only such
8582
         supported instruction.  */
8583
      /* ??? The bundling automatons should handle this for us, but they do
8584
         not yet have support for the first_insn attribute.  */
8585
      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8586
        {
8587
          free_bundle_state (curr_state);
8588
          return;
8589
        }
8590
 
8591
      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8592
      state_transition (curr_state->dfa_state, NULL);
8593
      curr_state->cost++;
8594
      if (!try_issue_nops (curr_state, before_nops_num))
8595
        return;
8596
      if (!try_issue_insn (curr_state, insn))
8597
        return;
8598
      curr_state->accumulated_insns_num++;
8599
      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8600
          || asm_noperands (PATTERN (insn)) >= 0)
8601
        {
8602
          /* Finish bundle containing asm insn.  */
8603
          curr_state->after_nops_num
8604
            = 3 - curr_state->accumulated_insns_num % 3;
8605
          curr_state->accumulated_insns_num
8606
            += 3 - curr_state->accumulated_insns_num % 3;
8607
        }
8608
      else if (ia64_safe_type (insn) == TYPE_L)
8609
        curr_state->accumulated_insns_num++;
8610
    }
8611
  if (ia64_safe_type (insn) == TYPE_B)
8612
    curr_state->branch_deviation
8613
      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8614
  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8615
    {
8616
      if (!only_bundle_end_p && insert_bundle_state (curr_state))
8617
        {
8618
          state_t dfa_state;
8619
          struct bundle_state *curr_state1;
8620
          struct bundle_state *allocated_states_chain;
8621
 
8622
          curr_state1 = get_free_bundle_state ();
8623
          dfa_state = curr_state1->dfa_state;
8624
          allocated_states_chain = curr_state1->allocated_states_chain;
8625
          *curr_state1 = *curr_state;
8626
          curr_state1->dfa_state = dfa_state;
8627
          curr_state1->allocated_states_chain = allocated_states_chain;
8628
          memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8629
                  dfa_state_size);
8630
          curr_state = curr_state1;
8631
        }
8632
      if (!try_issue_nops (curr_state,
8633
                           3 - curr_state->accumulated_insns_num % 3))
8634
        return;
8635
      curr_state->after_nops_num
8636
        = 3 - curr_state->accumulated_insns_num % 3;
8637
      curr_state->accumulated_insns_num
8638
        += 3 - curr_state->accumulated_insns_num % 3;
8639
    }
8640
  if (!insert_bundle_state (curr_state))
8641
    free_bundle_state (curr_state);
8642
  return;
8643
}
8644
 
8645
/* The following function returns position in the two window bundle
8646
   for given STATE.  */
8647
 
8648
static int
8649
get_max_pos (state_t state)
8650
{
8651
  if (cpu_unit_reservation_p (state, pos_6))
8652
    return 6;
8653
  else if (cpu_unit_reservation_p (state, pos_5))
8654
    return 5;
8655
  else if (cpu_unit_reservation_p (state, pos_4))
8656
    return 4;
8657
  else if (cpu_unit_reservation_p (state, pos_3))
8658
    return 3;
8659
  else if (cpu_unit_reservation_p (state, pos_2))
8660
    return 2;
8661
  else if (cpu_unit_reservation_p (state, pos_1))
8662
    return 1;
8663
  else
8664
    return 0;
8665
}
8666
 
8667
/* The function returns code of a possible template for given position
8668
   and state.  The function should be called only with 2 values of
8669
   position equal to 3 or 6.  We avoid generating F NOPs by putting
8670
   templates containing F insns at the end of the template search
8671
   because undocumented anomaly in McKinley derived cores which can
8672
   cause stalls if an F-unit insn (including a NOP) is issued within a
8673
   six-cycle window after reading certain application registers (such
8674
   as ar.bsp).  Furthermore, power-considerations also argue against
8675
   the use of F-unit instructions unless they're really needed.  */
8676
 
8677
static int
8678
get_template (state_t state, int pos)
8679
{
8680
  switch (pos)
8681
    {
8682
    case 3:
8683
      if (cpu_unit_reservation_p (state, _0mmi_))
8684
        return 1;
8685
      else if (cpu_unit_reservation_p (state, _0mii_))
8686
        return 0;
8687
      else if (cpu_unit_reservation_p (state, _0mmb_))
8688
        return 7;
8689
      else if (cpu_unit_reservation_p (state, _0mib_))
8690
        return 6;
8691
      else if (cpu_unit_reservation_p (state, _0mbb_))
8692
        return 5;
8693
      else if (cpu_unit_reservation_p (state, _0bbb_))
8694
        return 4;
8695
      else if (cpu_unit_reservation_p (state, _0mmf_))
8696
        return 3;
8697
      else if (cpu_unit_reservation_p (state, _0mfi_))
8698
        return 2;
8699
      else if (cpu_unit_reservation_p (state, _0mfb_))
8700
        return 8;
8701
      else if (cpu_unit_reservation_p (state, _0mlx_))
8702
        return 9;
8703
      else
8704
        gcc_unreachable ();
8705
    case 6:
8706
      if (cpu_unit_reservation_p (state, _1mmi_))
8707
        return 1;
8708
      else if (cpu_unit_reservation_p (state, _1mii_))
8709
        return 0;
8710
      else if (cpu_unit_reservation_p (state, _1mmb_))
8711
        return 7;
8712
      else if (cpu_unit_reservation_p (state, _1mib_))
8713
        return 6;
8714
      else if (cpu_unit_reservation_p (state, _1mbb_))
8715
        return 5;
8716
      else if (cpu_unit_reservation_p (state, _1bbb_))
8717
        return 4;
8718
      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8719
        return 3;
8720
      else if (cpu_unit_reservation_p (state, _1mfi_))
8721
        return 2;
8722
      else if (cpu_unit_reservation_p (state, _1mfb_))
8723
        return 8;
8724
      else if (cpu_unit_reservation_p (state, _1mlx_))
8725
        return 9;
8726
      else
8727
        gcc_unreachable ();
8728
    default:
8729
      gcc_unreachable ();
8730
    }
8731
}
8732
 
8733
/* True when INSN is important for bundling.  */
8734
static bool
8735
important_for_bundling_p (rtx insn)
8736
{
8737
  return (INSN_P (insn)
8738
          && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8739
          && GET_CODE (PATTERN (insn)) != USE
8740
          && GET_CODE (PATTERN (insn)) != CLOBBER);
8741
}
8742
 
8743
/* The following function returns an insn important for insn bundling
8744
   followed by INSN and before TAIL.  */
8745
 
8746
static rtx
8747
get_next_important_insn (rtx insn, rtx tail)
8748
{
8749
  for (; insn && insn != tail; insn = NEXT_INSN (insn))
8750
    if (important_for_bundling_p (insn))
8751
      return insn;
8752
  return NULL_RTX;
8753
}
8754
 
8755
/* Add a bundle selector TEMPLATE0 before INSN.  */
8756
 
8757
static void
8758
ia64_add_bundle_selector_before (int template0, rtx insn)
8759
{
8760
  rtx b = gen_bundle_selector (GEN_INT (template0));
8761
 
8762
  ia64_emit_insn_before (b, insn);
8763
#if NR_BUNDLES == 10
8764
  if ((template0 == 4 || template0 == 5)
8765
      && ia64_except_unwind_info (&global_options) == UI_TARGET)
8766
    {
8767
      int i;
8768
      rtx note = NULL_RTX;
8769
 
8770
      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8771
         first or second slot.  If it is and has REG_EH_NOTE set, copy it
8772
         to following nops, as br.call sets rp to the address of following
8773
         bundle and therefore an EH region end must be on a bundle
8774
         boundary.  */
8775
      insn = PREV_INSN (insn);
8776
      for (i = 0; i < 3; i++)
8777
        {
8778
          do
8779
            insn = next_active_insn (insn);
8780
          while (GET_CODE (insn) == INSN
8781
                 && get_attr_empty (insn) == EMPTY_YES);
8782
          if (GET_CODE (insn) == CALL_INSN)
8783
            note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8784
          else if (note)
8785
            {
8786
              int code;
8787
 
8788
              gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8789
                          || code == CODE_FOR_nop_b);
8790
              if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8791
                note = NULL_RTX;
8792
              else
8793
                add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8794
            }
8795
        }
8796
    }
8797
#endif
8798
}
8799
 
8800
/* The following function does insn bundling.  Bundling means
8801
   inserting templates and nop insns to fit insn groups into permitted
8802
   templates.  Instruction scheduling uses NDFA (non-deterministic
8803
   finite automata) encoding informations about the templates and the
8804
   inserted nops.  Nondeterminism of the automata permits follows
8805
   all possible insn sequences very fast.
8806
 
8807
   Unfortunately it is not possible to get information about inserting
8808
   nop insns and used templates from the automata states.  The
8809
   automata only says that we can issue an insn possibly inserting
8810
   some nops before it and using some template.  Therefore insn
8811
   bundling in this function is implemented by using DFA
8812
   (deterministic finite automata).  We follow all possible insn
8813
   sequences by inserting 0-2 nops (that is what the NDFA describe for
8814
   insn scheduling) before/after each insn being bundled.  We know the
8815
   start of simulated processor cycle from insn scheduling (insn
8816
   starting a new cycle has TImode).
8817
 
8818
   Simple implementation of insn bundling would create enormous
8819
   number of possible insn sequences satisfying information about new
8820
   cycle ticks taken from the insn scheduling.  To make the algorithm
8821
   practical we use dynamic programming.  Each decision (about
8822
   inserting nops and implicitly about previous decisions) is described
8823
   by structure bundle_state (see above).  If we generate the same
8824
   bundle state (key is automaton state after issuing the insns and
8825
   nops for it), we reuse already generated one.  As consequence we
8826
   reject some decisions which cannot improve the solution and
8827
   reduce memory for the algorithm.
8828
 
8829
   When we reach the end of EBB (extended basic block), we choose the
8830
   best sequence and then, moving back in EBB, insert templates for
8831
   the best alternative.  The templates are taken from querying
8832
   automaton state for each insn in chosen bundle states.
8833
 
8834
   So the algorithm makes two (forward and backward) passes through
8835
   EBB.  */
8836
 
8837
static void
8838
bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8839
{
8840
  struct bundle_state *curr_state, *next_state, *best_state;
8841
  rtx insn, next_insn;
8842
  int insn_num;
8843
  int i, bundle_end_p, only_bundle_end_p, asm_p;
8844
  int pos = 0, max_pos, template0, template1;
8845
  rtx b;
8846
  rtx nop;
8847
  enum attr_type type;
8848
 
8849
  insn_num = 0;
8850
  /* Count insns in the EBB.  */
8851
  for (insn = NEXT_INSN (prev_head_insn);
8852
       insn && insn != tail;
8853
       insn = NEXT_INSN (insn))
8854
    if (INSN_P (insn))
8855
      insn_num++;
8856
  if (insn_num == 0)
8857
    return;
8858
  bundling_p = 1;
8859
  dfa_clean_insn_cache ();
8860
  initiate_bundle_state_table ();
8861
  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8862
  /* First (forward) pass -- generation of bundle states.  */
8863
  curr_state = get_free_bundle_state ();
8864
  curr_state->insn = NULL;
8865
  curr_state->before_nops_num = 0;
8866
  curr_state->after_nops_num = 0;
8867
  curr_state->insn_num = 0;
8868
  curr_state->cost = 0;
8869
  curr_state->accumulated_insns_num = 0;
8870
  curr_state->branch_deviation = 0;
8871
  curr_state->middle_bundle_stops = 0;
8872
  curr_state->next = NULL;
8873
  curr_state->originator = NULL;
8874
  state_reset (curr_state->dfa_state);
8875
  index_to_bundle_states [0] = curr_state;
8876
  insn_num = 0;
8877
  /* Shift cycle mark if it is put on insn which could be ignored.  */
8878
  for (insn = NEXT_INSN (prev_head_insn);
8879
       insn != tail;
8880
       insn = NEXT_INSN (insn))
8881
    if (INSN_P (insn)
8882
        && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8883
            || GET_CODE (PATTERN (insn)) == USE
8884
            || GET_CODE (PATTERN (insn)) == CLOBBER)
8885
        && GET_MODE (insn) == TImode)
8886
      {
8887
        PUT_MODE (insn, VOIDmode);
8888
        for (next_insn = NEXT_INSN (insn);
8889
             next_insn != tail;
8890
             next_insn = NEXT_INSN (next_insn))
8891
          if (INSN_P (next_insn)
8892
              && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8893
              && GET_CODE (PATTERN (next_insn)) != USE
8894
              && GET_CODE (PATTERN (next_insn)) != CLOBBER
8895
              && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8896
            {
8897
              PUT_MODE (next_insn, TImode);
8898
              break;
8899
            }
8900
      }
8901
  /* Forward pass: generation of bundle states.  */
8902
  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8903
       insn != NULL_RTX;
8904
       insn = next_insn)
8905
    {
8906
      gcc_assert (INSN_P (insn)
8907
                  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8908
                  && GET_CODE (PATTERN (insn)) != USE
8909
                  && GET_CODE (PATTERN (insn)) != CLOBBER);
8910
      type = ia64_safe_type (insn);
8911
      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8912
      insn_num++;
8913
      index_to_bundle_states [insn_num] = NULL;
8914
      for (curr_state = index_to_bundle_states [insn_num - 1];
8915
           curr_state != NULL;
8916
           curr_state = next_state)
8917
        {
8918
          pos = curr_state->accumulated_insns_num % 3;
8919
          next_state = curr_state->next;
8920
          /* We must fill up the current bundle in order to start a
8921
             subsequent asm insn in a new bundle.  Asm insn is always
8922
             placed in a separate bundle.  */
8923
          only_bundle_end_p
8924
            = (next_insn != NULL_RTX
8925
               && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8926
               && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8927
          /* We may fill up the current bundle if it is the cycle end
8928
             without a group barrier.  */
8929
          bundle_end_p
8930
            = (only_bundle_end_p || next_insn == NULL_RTX
8931
               || (GET_MODE (next_insn) == TImode
8932
                   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8933
          if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8934
              || type == TYPE_S)
8935
            issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8936
                                 only_bundle_end_p);
8937
          issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8938
                               only_bundle_end_p);
8939
          issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8940
                               only_bundle_end_p);
8941
        }
8942
      gcc_assert (index_to_bundle_states [insn_num]);
8943
      for (curr_state = index_to_bundle_states [insn_num];
8944
           curr_state != NULL;
8945
           curr_state = curr_state->next)
8946
        if (verbose >= 2 && dump)
8947
          {
8948
            /* This structure is taken from generated code of the
8949
               pipeline hazard recognizer (see file insn-attrtab.c).
8950
               Please don't forget to change the structure if a new
8951
               automaton is added to .md file.  */
8952
            struct DFA_chip
8953
            {
8954
              unsigned short one_automaton_state;
8955
              unsigned short oneb_automaton_state;
8956
              unsigned short two_automaton_state;
8957
              unsigned short twob_automaton_state;
8958
            };
8959
 
8960
            fprintf
8961
              (dump,
8962
               "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8963
               curr_state->unique_num,
8964
               (curr_state->originator == NULL
8965
                ? -1 : curr_state->originator->unique_num),
8966
               curr_state->cost,
8967
               curr_state->before_nops_num, curr_state->after_nops_num,
8968
               curr_state->accumulated_insns_num, curr_state->branch_deviation,
8969
               curr_state->middle_bundle_stops,
8970
               ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8971
               INSN_UID (insn));
8972
          }
8973
    }
8974
 
8975
  /* We should find a solution because the 2nd insn scheduling has
8976
     found one.  */
8977
  gcc_assert (index_to_bundle_states [insn_num]);
8978
  /* Find a state corresponding to the best insn sequence.  */
8979
  best_state = NULL;
8980
  for (curr_state = index_to_bundle_states [insn_num];
8981
       curr_state != NULL;
8982
       curr_state = curr_state->next)
8983
    /* We are just looking at the states with fully filled up last
8984
       bundle.  The first we prefer insn sequences with minimal cost
8985
       then with minimal inserted nops and finally with branch insns
8986
       placed in the 3rd slots.  */
8987
    if (curr_state->accumulated_insns_num % 3 == 0
8988
        && (best_state == NULL || best_state->cost > curr_state->cost
8989
            || (best_state->cost == curr_state->cost
8990
                && (curr_state->accumulated_insns_num
8991
                    < best_state->accumulated_insns_num
8992
                    || (curr_state->accumulated_insns_num
8993
                        == best_state->accumulated_insns_num
8994
                        && (curr_state->branch_deviation
8995
                            < best_state->branch_deviation
8996
                            || (curr_state->branch_deviation
8997
                                == best_state->branch_deviation
8998
                                && curr_state->middle_bundle_stops
8999
                                < best_state->middle_bundle_stops)))))))
9000
      best_state = curr_state;
9001
  /* Second (backward) pass: adding nops and templates.  */
9002
  gcc_assert (best_state);
9003
  insn_num = best_state->before_nops_num;
9004
  template0 = template1 = -1;
9005
  for (curr_state = best_state;
9006
       curr_state->originator != NULL;
9007
       curr_state = curr_state->originator)
9008
    {
9009
      insn = curr_state->insn;
9010
      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
9011
               || asm_noperands (PATTERN (insn)) >= 0);
9012
      insn_num++;
9013
      if (verbose >= 2 && dump)
9014
        {
9015
          struct DFA_chip
9016
          {
9017
            unsigned short one_automaton_state;
9018
            unsigned short oneb_automaton_state;
9019
            unsigned short two_automaton_state;
9020
            unsigned short twob_automaton_state;
9021
          };
9022
 
9023
          fprintf
9024
            (dump,
9025
             "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9026
             curr_state->unique_num,
9027
             (curr_state->originator == NULL
9028
              ? -1 : curr_state->originator->unique_num),
9029
             curr_state->cost,
9030
             curr_state->before_nops_num, curr_state->after_nops_num,
9031
             curr_state->accumulated_insns_num, curr_state->branch_deviation,
9032
             curr_state->middle_bundle_stops,
9033
             ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9034
             INSN_UID (insn));
9035
        }
9036
      /* Find the position in the current bundle window.  The window can
9037
         contain at most two bundles.  Two bundle window means that
9038
         the processor will make two bundle rotation.  */
9039
      max_pos = get_max_pos (curr_state->dfa_state);
9040
      if (max_pos == 6
9041
          /* The following (negative template number) means that the
9042
             processor did one bundle rotation.  */
9043
          || (max_pos == 3 && template0 < 0))
9044
        {
9045
          /* We are at the end of the window -- find template(s) for
9046
             its bundle(s).  */
9047
          pos = max_pos;
9048
          if (max_pos == 3)
9049
            template0 = get_template (curr_state->dfa_state, 3);
9050
          else
9051
            {
9052
              template1 = get_template (curr_state->dfa_state, 3);
9053
              template0 = get_template (curr_state->dfa_state, 6);
9054
            }
9055
        }
9056
      if (max_pos > 3 && template1 < 0)
9057
        /* It may happen when we have the stop inside a bundle.  */
9058
        {
9059
          gcc_assert (pos <= 3);
9060
          template1 = get_template (curr_state->dfa_state, 3);
9061
          pos += 3;
9062
        }
9063
      if (!asm_p)
9064
        /* Emit nops after the current insn.  */
9065
        for (i = 0; i < curr_state->after_nops_num; i++)
9066
          {
9067
            nop = gen_nop ();
9068
            emit_insn_after (nop, insn);
9069
            pos--;
9070
            gcc_assert (pos >= 0);
9071
            if (pos % 3 == 0)
9072
              {
9073
                /* We are at the start of a bundle: emit the template
9074
                   (it should be defined).  */
9075
                gcc_assert (template0 >= 0);
9076
                ia64_add_bundle_selector_before (template0, nop);
9077
                /* If we have two bundle window, we make one bundle
9078
                   rotation.  Otherwise template0 will be undefined
9079
                   (negative value).  */
9080
                template0 = template1;
9081
                template1 = -1;
9082
              }
9083
          }
9084
      /* Move the position backward in the window.  Group barrier has
9085
         no slot.  Asm insn takes all bundle.  */
9086
      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9087
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
9088
          && asm_noperands (PATTERN (insn)) < 0)
9089
        pos--;
9090
      /* Long insn takes 2 slots.  */
9091
      if (ia64_safe_type (insn) == TYPE_L)
9092
        pos--;
9093
      gcc_assert (pos >= 0);
9094
      if (pos % 3 == 0
9095
          && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9096
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
9097
          && asm_noperands (PATTERN (insn)) < 0)
9098
        {
9099
          /* The current insn is at the bundle start: emit the
9100
             template.  */
9101
          gcc_assert (template0 >= 0);
9102
          ia64_add_bundle_selector_before (template0, insn);
9103
          b = PREV_INSN (insn);
9104
          insn = b;
9105
          /* See comment above in analogous place for emitting nops
9106
             after the insn.  */
9107
          template0 = template1;
9108
          template1 = -1;
9109
        }
9110
      /* Emit nops after the current insn.  */
9111
      for (i = 0; i < curr_state->before_nops_num; i++)
9112
        {
9113
          nop = gen_nop ();
9114
          ia64_emit_insn_before (nop, insn);
9115
          nop = PREV_INSN (insn);
9116
          insn = nop;
9117
          pos--;
9118
          gcc_assert (pos >= 0);
9119
          if (pos % 3 == 0)
9120
            {
9121
              /* See comment above in analogous place for emitting nops
9122
                 after the insn.  */
9123
              gcc_assert (template0 >= 0);
9124
              ia64_add_bundle_selector_before (template0, insn);
9125
              b = PREV_INSN (insn);
9126
              insn = b;
9127
              template0 = template1;
9128
              template1 = -1;
9129
            }
9130
        }
9131
    }
9132
 
9133
#ifdef ENABLE_CHECKING
9134
  {
9135
    /* Assert right calculation of middle_bundle_stops.  */
9136
    int num = best_state->middle_bundle_stops;
9137
    bool start_bundle = true, end_bundle = false;
9138
 
9139
    for (insn = NEXT_INSN (prev_head_insn);
9140
         insn && insn != tail;
9141
         insn = NEXT_INSN (insn))
9142
      {
9143
        if (!INSN_P (insn))
9144
          continue;
9145
        if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9146
          start_bundle = true;
9147
        else
9148
          {
9149
            rtx next_insn;
9150
 
9151
            for (next_insn = NEXT_INSN (insn);
9152
                 next_insn && next_insn != tail;
9153
                 next_insn = NEXT_INSN (next_insn))
9154
              if (INSN_P (next_insn)
9155
                  && (ia64_safe_itanium_class (next_insn)
9156
                      != ITANIUM_CLASS_IGNORE
9157
                      || recog_memoized (next_insn)
9158
                      == CODE_FOR_bundle_selector)
9159
                  && GET_CODE (PATTERN (next_insn)) != USE
9160
                  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9161
                break;
9162
 
9163
            end_bundle = next_insn == NULL_RTX
9164
             || next_insn == tail
9165
             || (INSN_P (next_insn)
9166
                 && recog_memoized (next_insn)
9167
                 == CODE_FOR_bundle_selector);
9168
            if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9169
                && !start_bundle && !end_bundle
9170
                && next_insn
9171
                && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
9172
                && asm_noperands (PATTERN (next_insn)) < 0)
9173
              num--;
9174
 
9175
            start_bundle = false;
9176
          }
9177
      }
9178
 
9179
    gcc_assert (num == 0);
9180
  }
9181
#endif
9182
 
9183
  free (index_to_bundle_states);
9184
  finish_bundle_state_table ();
9185
  bundling_p = 0;
9186
  dfa_clean_insn_cache ();
9187
}
9188
 
9189
/* The following function is called at the end of scheduling BB or
9190
   EBB.  After reload, it inserts stop bits and does insn bundling.  */
9191
 
9192
static void
9193
ia64_sched_finish (FILE *dump, int sched_verbose)
9194
{
9195
  if (sched_verbose)
9196
    fprintf (dump, "// Finishing schedule.\n");
9197
  if (!reload_completed)
9198
    return;
9199
  if (reload_completed)
9200
    {
9201
      final_emit_insn_group_barriers (dump);
9202
      bundling (dump, sched_verbose, current_sched_info->prev_head,
9203
                current_sched_info->next_tail);
9204
      if (sched_verbose && dump)
9205
        fprintf (dump, "//    finishing %d-%d\n",
9206
                 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9207
                 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9208
 
9209
      return;
9210
    }
9211
}
9212
 
9213
/* The following function inserts stop bits in scheduled BB or EBB.  */
9214
 
9215
static void
9216
final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9217
{
9218
  rtx insn;
9219
  int need_barrier_p = 0;
9220
  int seen_good_insn = 0;
9221
 
9222
  init_insn_group_barriers ();
9223
 
9224
  for (insn = NEXT_INSN (current_sched_info->prev_head);
9225
       insn != current_sched_info->next_tail;
9226
       insn = NEXT_INSN (insn))
9227
    {
9228
      if (GET_CODE (insn) == BARRIER)
9229
        {
9230
          rtx last = prev_active_insn (insn);
9231
 
9232
          if (! last)
9233
            continue;
9234
          if (GET_CODE (last) == JUMP_INSN
9235
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9236
            last = prev_active_insn (last);
9237
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9238
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9239
 
9240
          init_insn_group_barriers ();
9241
          seen_good_insn = 0;
9242
          need_barrier_p = 0;
9243
        }
9244
      else if (NONDEBUG_INSN_P (insn))
9245
        {
9246
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9247
            {
9248
              init_insn_group_barriers ();
9249
              seen_good_insn = 0;
9250
              need_barrier_p = 0;
9251
            }
9252
          else if (need_barrier_p || group_barrier_needed (insn)
9253
                   || (mflag_sched_stop_bits_after_every_cycle
9254
                       && GET_MODE (insn) == TImode
9255
                       && seen_good_insn))
9256
            {
9257
              if (TARGET_EARLY_STOP_BITS)
9258
                {
9259
                  rtx last;
9260
 
9261
                  for (last = insn;
9262
                       last != current_sched_info->prev_head;
9263
                       last = PREV_INSN (last))
9264
                    if (INSN_P (last) && GET_MODE (last) == TImode
9265
                        && stops_p [INSN_UID (last)])
9266
                      break;
9267
                  if (last == current_sched_info->prev_head)
9268
                    last = insn;
9269
                  last = prev_active_insn (last);
9270
                  if (last
9271
                      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9272
                    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9273
                                     last);
9274
                  init_insn_group_barriers ();
9275
                  for (last = NEXT_INSN (last);
9276
                       last != insn;
9277
                       last = NEXT_INSN (last))
9278
                    if (INSN_P (last))
9279
                      {
9280
                        group_barrier_needed (last);
9281
                        if (recog_memoized (last) >= 0
9282
                            && important_for_bundling_p (last))
9283
                          seen_good_insn = 1;
9284
                      }
9285
                }
9286
              else
9287
                {
9288
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9289
                                    insn);
9290
                  init_insn_group_barriers ();
9291
                  seen_good_insn = 0;
9292
                }
9293
              group_barrier_needed (insn);
9294
              if (recog_memoized (insn) >= 0
9295
                  && important_for_bundling_p (insn))
9296
                seen_good_insn = 1;
9297
            }
9298
          else if (recog_memoized (insn) >= 0
9299
                   && important_for_bundling_p (insn))
9300
            seen_good_insn = 1;
9301
          need_barrier_p = (GET_CODE (insn) == CALL_INSN
9302
                            || GET_CODE (PATTERN (insn)) == ASM_INPUT
9303
                            || asm_noperands (PATTERN (insn)) >= 0);
9304
        }
9305
    }
9306
}
9307
 
9308
 
9309
 
9310
/* If the following function returns TRUE, we will use the DFA
9311
   insn scheduler.  */
9312
 
9313
static int
9314
ia64_first_cycle_multipass_dfa_lookahead (void)
9315
{
9316
  return (reload_completed ? 6 : 4);
9317
}
9318
 
9319
/* The following function initiates variable `dfa_pre_cycle_insn'.  */
9320
 
9321
static void
9322
ia64_init_dfa_pre_cycle_insn (void)
9323
{
9324
  if (temp_dfa_state == NULL)
9325
    {
9326
      dfa_state_size = state_size ();
9327
      temp_dfa_state = xmalloc (dfa_state_size);
9328
      prev_cycle_state = xmalloc (dfa_state_size);
9329
    }
9330
  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9331
  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9332
  recog_memoized (dfa_pre_cycle_insn);
9333
  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9334
  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9335
  recog_memoized (dfa_stop_insn);
9336
}
9337
 
9338
/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9339
   used by the DFA insn scheduler.  */
9340
 
9341
static rtx
9342
ia64_dfa_pre_cycle_insn (void)
9343
{
9344
  return dfa_pre_cycle_insn;
9345
}
9346
 
9347
/* The following function returns TRUE if PRODUCER (of type ilog or
9348
   ld) produces address for CONSUMER (of type st or stf). */
9349
 
9350
int
9351
ia64_st_address_bypass_p (rtx producer, rtx consumer)
9352
{
9353
  rtx dest, reg, mem;
9354
 
9355
  gcc_assert (producer && consumer);
9356
  dest = ia64_single_set (producer);
9357
  gcc_assert (dest);
9358
  reg = SET_DEST (dest);
9359
  gcc_assert (reg);
9360
  if (GET_CODE (reg) == SUBREG)
9361
    reg = SUBREG_REG (reg);
9362
  gcc_assert (GET_CODE (reg) == REG);
9363
 
9364
  dest = ia64_single_set (consumer);
9365
  gcc_assert (dest);
9366
  mem = SET_DEST (dest);
9367
  gcc_assert (mem && GET_CODE (mem) == MEM);
9368
  return reg_mentioned_p (reg, mem);
9369
}
9370
 
9371
/* The following function returns TRUE if PRODUCER (of type ilog or
9372
   ld) produces address for CONSUMER (of type ld or fld). */
9373
 
9374
int
9375
ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9376
{
9377
  rtx dest, src, reg, mem;
9378
 
9379
  gcc_assert (producer && consumer);
9380
  dest = ia64_single_set (producer);
9381
  gcc_assert (dest);
9382
  reg = SET_DEST (dest);
9383
  gcc_assert (reg);
9384
  if (GET_CODE (reg) == SUBREG)
9385
    reg = SUBREG_REG (reg);
9386
  gcc_assert (GET_CODE (reg) == REG);
9387
 
9388
  src = ia64_single_set (consumer);
9389
  gcc_assert (src);
9390
  mem = SET_SRC (src);
9391
  gcc_assert (mem);
9392
 
9393
  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9394
    mem = XVECEXP (mem, 0, 0);
9395
  else if (GET_CODE (mem) == IF_THEN_ELSE)
9396
    /* ??? Is this bypass necessary for ld.c?  */
9397
    {
9398
      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9399
      mem = XEXP (mem, 1);
9400
    }
9401
 
9402
  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9403
    mem = XEXP (mem, 0);
9404
 
9405
  if (GET_CODE (mem) == UNSPEC)
9406
    {
9407
      int c = XINT (mem, 1);
9408
 
9409
      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9410
                  || c == UNSPEC_LDSA);
9411
      mem = XVECEXP (mem, 0, 0);
9412
    }
9413
 
9414
  /* Note that LO_SUM is used for GOT loads.  */
9415
  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9416
 
9417
  return reg_mentioned_p (reg, mem);
9418
}
9419
 
9420
/* The following function returns TRUE if INSN produces address for a
9421
   load/store insn.  We will place such insns into M slot because it
9422
   decreases its latency time.  */
9423
 
9424
int
9425
ia64_produce_address_p (rtx insn)
9426
{
9427
  return insn->call;
9428
}
9429
 
9430
 
9431
/* Emit pseudo-ops for the assembler to describe predicate relations.
9432
   At present this assumes that we only consider predicate pairs to
9433
   be mutex, and that the assembler can deduce proper values from
9434
   straight-line code.  */
9435
 
9436
static void
9437
emit_predicate_relation_info (void)
9438
{
9439
  basic_block bb;
9440
 
9441
  FOR_EACH_BB_REVERSE (bb)
9442
    {
9443
      int r;
9444
      rtx head = BB_HEAD (bb);
9445
 
9446
      /* We only need such notes at code labels.  */
9447
      if (GET_CODE (head) != CODE_LABEL)
9448
        continue;
9449
      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9450
        head = NEXT_INSN (head);
9451
 
9452
      /* Skip p0, which may be thought to be live due to (reg:DI p0)
9453
         grabbing the entire block of predicate registers.  */
9454
      for (r = PR_REG (2); r < PR_REG (64); r += 2)
9455
        if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9456
          {
9457
            rtx p = gen_rtx_REG (BImode, r);
9458
            rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9459
            if (head == BB_END (bb))
9460
              BB_END (bb) = n;
9461
            head = n;
9462
          }
9463
    }
9464
 
9465
  /* Look for conditional calls that do not return, and protect predicate
9466
     relations around them.  Otherwise the assembler will assume the call
9467
     returns, and complain about uses of call-clobbered predicates after
9468
     the call.  */
9469
  FOR_EACH_BB_REVERSE (bb)
9470
    {
9471
      rtx insn = BB_HEAD (bb);
9472
 
9473
      while (1)
9474
        {
9475
          if (GET_CODE (insn) == CALL_INSN
9476
              && GET_CODE (PATTERN (insn)) == COND_EXEC
9477
              && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9478
            {
9479
              rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9480
              rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9481
              if (BB_HEAD (bb) == insn)
9482
                BB_HEAD (bb) = b;
9483
              if (BB_END (bb) == insn)
9484
                BB_END (bb) = a;
9485
            }
9486
 
9487
          if (insn == BB_END (bb))
9488
            break;
9489
          insn = NEXT_INSN (insn);
9490
        }
9491
    }
9492
}
9493
 
9494
/* Perform machine dependent operations on the rtl chain INSNS.  */
9495
 
9496
static void
9497
ia64_reorg (void)
9498
{
9499
  /* We are freeing block_for_insn in the toplev to keep compatibility
9500
     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9501
  compute_bb_for_insn ();
9502
 
9503
  /* If optimizing, we'll have split before scheduling.  */
9504
  if (optimize == 0)
9505
    split_all_insns ();
9506
 
9507
  if (optimize && flag_schedule_insns_after_reload
9508
      && dbg_cnt (ia64_sched2))
9509
    {
9510
      basic_block bb;
9511
      timevar_push (TV_SCHED2);
9512
      ia64_final_schedule = 1;
9513
 
9514
      /* We can't let modulo-sched prevent us from scheduling any bbs,
9515
         since we need the final schedule to produce bundle information.  */
9516
      FOR_EACH_BB (bb)
9517
        bb->flags &= ~BB_DISABLE_SCHEDULE;
9518
 
9519
      initiate_bundle_states ();
9520
      ia64_nop = make_insn_raw (gen_nop ());
9521
      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9522
      recog_memoized (ia64_nop);
9523
      clocks_length = get_max_uid () + 1;
9524
      stops_p = XCNEWVEC (char, clocks_length);
9525
 
9526
      if (ia64_tune == PROCESSOR_ITANIUM2)
9527
        {
9528
          pos_1 = get_cpu_unit_code ("2_1");
9529
          pos_2 = get_cpu_unit_code ("2_2");
9530
          pos_3 = get_cpu_unit_code ("2_3");
9531
          pos_4 = get_cpu_unit_code ("2_4");
9532
          pos_5 = get_cpu_unit_code ("2_5");
9533
          pos_6 = get_cpu_unit_code ("2_6");
9534
          _0mii_ = get_cpu_unit_code ("2b_0mii.");
9535
          _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9536
          _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9537
          _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9538
          _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9539
          _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9540
          _0mib_ = get_cpu_unit_code ("2b_0mib.");
9541
          _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9542
          _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9543
          _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9544
          _1mii_ = get_cpu_unit_code ("2b_1mii.");
9545
          _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9546
          _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9547
          _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9548
          _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9549
          _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9550
          _1mib_ = get_cpu_unit_code ("2b_1mib.");
9551
          _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9552
          _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9553
          _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9554
        }
9555
      else
9556
        {
9557
          pos_1 = get_cpu_unit_code ("1_1");
9558
          pos_2 = get_cpu_unit_code ("1_2");
9559
          pos_3 = get_cpu_unit_code ("1_3");
9560
          pos_4 = get_cpu_unit_code ("1_4");
9561
          pos_5 = get_cpu_unit_code ("1_5");
9562
          pos_6 = get_cpu_unit_code ("1_6");
9563
          _0mii_ = get_cpu_unit_code ("1b_0mii.");
9564
          _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9565
          _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9566
          _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9567
          _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9568
          _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9569
          _0mib_ = get_cpu_unit_code ("1b_0mib.");
9570
          _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9571
          _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9572
          _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9573
          _1mii_ = get_cpu_unit_code ("1b_1mii.");
9574
          _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9575
          _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9576
          _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9577
          _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9578
          _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9579
          _1mib_ = get_cpu_unit_code ("1b_1mib.");
9580
          _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9581
          _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9582
          _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9583
        }
9584
 
9585
      if (flag_selective_scheduling2
9586
          && !maybe_skip_selective_scheduling ())
9587
        run_selective_scheduling ();
9588
      else
9589
        schedule_ebbs ();
9590
 
9591
      /* Redo alignment computation, as it might gone wrong.  */
9592
      compute_alignments ();
9593
 
9594
      /* We cannot reuse this one because it has been corrupted by the
9595
         evil glat.  */
9596
      finish_bundle_states ();
9597
      free (stops_p);
9598
      stops_p = NULL;
9599
      emit_insn_group_barriers (dump_file);
9600
 
9601
      ia64_final_schedule = 0;
9602
      timevar_pop (TV_SCHED2);
9603
    }
9604
  else
9605
    emit_all_insn_group_barriers (dump_file);
9606
 
9607
  df_analyze ();
9608
 
9609
  /* A call must not be the last instruction in a function, so that the
9610
     return address is still within the function, so that unwinding works
9611
     properly.  Note that IA-64 differs from dwarf2 on this point.  */
9612
  if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9613
    {
9614
      rtx insn;
9615
      int saw_stop = 0;
9616
 
9617
      insn = get_last_insn ();
9618
      if (! INSN_P (insn))
9619
        insn = prev_active_insn (insn);
9620
      if (insn)
9621
        {
9622
          /* Skip over insns that expand to nothing.  */
9623
          while (GET_CODE (insn) == INSN
9624
                 && get_attr_empty (insn) == EMPTY_YES)
9625
            {
9626
              if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9627
                  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9628
                saw_stop = 1;
9629
              insn = prev_active_insn (insn);
9630
            }
9631
          if (GET_CODE (insn) == CALL_INSN)
9632
            {
9633
              if (! saw_stop)
9634
                emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9635
              emit_insn (gen_break_f ());
9636
              emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9637
            }
9638
        }
9639
    }
9640
 
9641
  emit_predicate_relation_info ();
9642
 
9643
  if (flag_var_tracking)
9644
    {
9645
      timevar_push (TV_VAR_TRACKING);
9646
      variable_tracking_main ();
9647
      timevar_pop (TV_VAR_TRACKING);
9648
    }
9649
  df_finish_pass (false);
9650
}
9651
 
9652
/* Return true if REGNO is used by the epilogue.  */
9653
 
9654
int
9655
ia64_epilogue_uses (int regno)
9656
{
9657
  switch (regno)
9658
    {
9659
    case R_GR (1):
9660
      /* With a call to a function in another module, we will write a new
9661
         value to "gp".  After returning from such a call, we need to make
9662
         sure the function restores the original gp-value, even if the
9663
         function itself does not use the gp anymore.  */
9664
      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9665
 
9666
    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9667
    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9668
      /* For functions defined with the syscall_linkage attribute, all
9669
         input registers are marked as live at all function exits.  This
9670
         prevents the register allocator from using the input registers,
9671
         which in turn makes it possible to restart a system call after
9672
         an interrupt without having to save/restore the input registers.
9673
         This also prevents kernel data from leaking to application code.  */
9674
      return lookup_attribute ("syscall_linkage",
9675
           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9676
 
9677
    case R_BR (0):
9678
      /* Conditional return patterns can't represent the use of `b0' as
9679
         the return address, so we force the value live this way.  */
9680
      return 1;
9681
 
9682
    case AR_PFS_REGNUM:
9683
      /* Likewise for ar.pfs, which is used by br.ret.  */
9684
      return 1;
9685
 
9686
    default:
9687
      return 0;
9688
    }
9689
}
9690
 
9691
/* Return true if REGNO is used by the frame unwinder.  */
9692
 
9693
int
9694
ia64_eh_uses (int regno)
9695
{
9696
  unsigned int r;
9697
 
9698
  if (! reload_completed)
9699
    return 0;
9700
 
9701
  if (regno == 0)
9702
    return 0;
9703
 
9704
  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9705
    if (regno == current_frame_info.r[r]
9706
       || regno == emitted_frame_related_regs[r])
9707
      return 1;
9708
 
9709
  return 0;
9710
}
9711
 
9712
/* Return true if this goes in small data/bss.  */
9713
 
9714
/* ??? We could also support own long data here.  Generating movl/add/ld8
9715
   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9716
   code faster because there is one less load.  This also includes incomplete
9717
   types which can't go in sdata/sbss.  */
9718
 
9719
static bool
9720
ia64_in_small_data_p (const_tree exp)
9721
{
9722
  if (TARGET_NO_SDATA)
9723
    return false;
9724
 
9725
  /* We want to merge strings, so we never consider them small data.  */
9726
  if (TREE_CODE (exp) == STRING_CST)
9727
    return false;
9728
 
9729
  /* Functions are never small data.  */
9730
  if (TREE_CODE (exp) == FUNCTION_DECL)
9731
    return false;
9732
 
9733
  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9734
    {
9735
      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9736
 
9737
      if (strcmp (section, ".sdata") == 0
9738
          || strncmp (section, ".sdata.", 7) == 0
9739
          || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9740
          || strcmp (section, ".sbss") == 0
9741
          || strncmp (section, ".sbss.", 6) == 0
9742
          || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9743
        return true;
9744
    }
9745
  else
9746
    {
9747
      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9748
 
9749
      /* If this is an incomplete type with size 0, then we can't put it
9750
         in sdata because it might be too big when completed.  */
9751
      if (size > 0 && size <= ia64_section_threshold)
9752
        return true;
9753
    }
9754
 
9755
  return false;
9756
}
9757
 
9758
/* Output assembly directives for prologue regions.  */
9759
 
9760
/* The current basic block number.  */
9761
 
9762
static bool last_block;
9763
 
9764
/* True if we need a copy_state command at the start of the next block.  */
9765
 
9766
static bool need_copy_state;
9767
 
9768
#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9769
# define MAX_ARTIFICIAL_LABEL_BYTES 30
9770
#endif
9771
 
9772
/* The function emits unwind directives for the start of an epilogue.  */
9773
 
9774
static void
9775
process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9776
                  bool unwind, bool frame ATTRIBUTE_UNUSED)
9777
{
9778
  /* If this isn't the last block of the function, then we need to label the
9779
     current state, and copy it back in at the start of the next block.  */
9780
 
9781
  if (!last_block)
9782
    {
9783
      if (unwind)
9784
        fprintf (asm_out_file, "\t.label_state %d\n",
9785
                 ++cfun->machine->state_num);
9786
      need_copy_state = true;
9787
    }
9788
 
9789
  if (unwind)
9790
    fprintf (asm_out_file, "\t.restore sp\n");
9791
}
9792
 
9793
/* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
9794
 
9795
static void
9796
process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9797
                        bool unwind, bool frame)
9798
{
9799
  rtx dest = SET_DEST (pat);
9800
  rtx src = SET_SRC (pat);
9801
 
9802
  if (dest == stack_pointer_rtx)
9803
    {
9804
      if (GET_CODE (src) == PLUS)
9805
        {
9806
          rtx op0 = XEXP (src, 0);
9807
          rtx op1 = XEXP (src, 1);
9808
 
9809
          gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9810
 
9811
          if (INTVAL (op1) < 0)
9812
            {
9813
              gcc_assert (!frame_pointer_needed);
9814
              if (unwind)
9815
                fprintf (asm_out_file,
9816
                         "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9817
                         -INTVAL (op1));
9818
            }
9819
          else
9820
            process_epilogue (asm_out_file, insn, unwind, frame);
9821
        }
9822
      else
9823
        {
9824
          gcc_assert (src == hard_frame_pointer_rtx);
9825
          process_epilogue (asm_out_file, insn, unwind, frame);
9826
        }
9827
    }
9828
  else if (dest == hard_frame_pointer_rtx)
9829
    {
9830
      gcc_assert (src == stack_pointer_rtx);
9831
      gcc_assert (frame_pointer_needed);
9832
 
9833
      if (unwind)
9834
        fprintf (asm_out_file, "\t.vframe r%d\n",
9835
                 ia64_dbx_register_number (REGNO (dest)));
9836
    }
9837
  else
9838
    gcc_unreachable ();
9839
}
9840
 
9841
/* This function processes a SET pattern for REG_CFA_REGISTER.  */
9842
 
9843
static void
9844
process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
9845
{
9846
  rtx dest = SET_DEST (pat);
9847
  rtx src = SET_SRC (pat);
9848
  int dest_regno = REGNO (dest);
9849
  int src_regno;
9850
 
9851
  if (src == pc_rtx)
9852
    {
9853
      /* Saving return address pointer.  */
9854
      if (unwind)
9855
        fprintf (asm_out_file, "\t.save rp, r%d\n",
9856
                 ia64_dbx_register_number (dest_regno));
9857
      return;
9858
    }
9859
 
9860
  src_regno = REGNO (src);
9861
 
9862
  switch (src_regno)
9863
    {
9864
    case PR_REG (0):
9865
      gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9866
      if (unwind)
9867
        fprintf (asm_out_file, "\t.save pr, r%d\n",
9868
                 ia64_dbx_register_number (dest_regno));
9869
      break;
9870
 
9871
    case AR_UNAT_REGNUM:
9872
      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9873
      if (unwind)
9874
        fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9875
                 ia64_dbx_register_number (dest_regno));
9876
      break;
9877
 
9878
    case AR_LC_REGNUM:
9879
      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9880
      if (unwind)
9881
        fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9882
                 ia64_dbx_register_number (dest_regno));
9883
      break;
9884
 
9885
    default:
9886
      /* Everything else should indicate being stored to memory.  */
9887
      gcc_unreachable ();
9888
    }
9889
}
9890
 
9891
/* This function processes a SET pattern for REG_CFA_OFFSET.  */
9892
 
9893
static void
9894
process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
9895
{
9896
  rtx dest = SET_DEST (pat);
9897
  rtx src = SET_SRC (pat);
9898
  int src_regno = REGNO (src);
9899
  const char *saveop;
9900
  HOST_WIDE_INT off;
9901
  rtx base;
9902
 
9903
  gcc_assert (MEM_P (dest));
9904
  if (GET_CODE (XEXP (dest, 0)) == REG)
9905
    {
9906
      base = XEXP (dest, 0);
9907
      off = 0;
9908
    }
9909
  else
9910
    {
9911
      gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9912
                  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9913
      base = XEXP (XEXP (dest, 0), 0);
9914
      off = INTVAL (XEXP (XEXP (dest, 0), 1));
9915
    }
9916
 
9917
  if (base == hard_frame_pointer_rtx)
9918
    {
9919
      saveop = ".savepsp";
9920
      off = - off;
9921
    }
9922
  else
9923
    {
9924
      gcc_assert (base == stack_pointer_rtx);
9925
      saveop = ".savesp";
9926
    }
9927
 
9928
  src_regno = REGNO (src);
9929
  switch (src_regno)
9930
    {
9931
    case BR_REG (0):
9932
      gcc_assert (!current_frame_info.r[reg_save_b0]);
9933
      if (unwind)
9934
        fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
9935
                 saveop, off);
9936
      break;
9937
 
9938
    case PR_REG (0):
9939
      gcc_assert (!current_frame_info.r[reg_save_pr]);
9940
      if (unwind)
9941
        fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
9942
                 saveop, off);
9943
      break;
9944
 
9945
    case AR_LC_REGNUM:
9946
      gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9947
      if (unwind)
9948
        fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
9949
                 saveop, off);
9950
      break;
9951
 
9952
    case AR_PFS_REGNUM:
9953
      gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9954
      if (unwind)
9955
        fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
9956
                 saveop, off);
9957
      break;
9958
 
9959
    case AR_UNAT_REGNUM:
9960
      gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9961
      if (unwind)
9962
        fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
9963
                 saveop, off);
9964
      break;
9965
 
9966
    case GR_REG (4):
9967
    case GR_REG (5):
9968
    case GR_REG (6):
9969
    case GR_REG (7):
9970
      if (unwind)
9971
        fprintf (asm_out_file, "\t.save.g 0x%x\n",
9972
                 1 << (src_regno - GR_REG (4)));
9973
      break;
9974
 
9975
    case BR_REG (1):
9976
    case BR_REG (2):
9977
    case BR_REG (3):
9978
    case BR_REG (4):
9979
    case BR_REG (5):
9980
      if (unwind)
9981
        fprintf (asm_out_file, "\t.save.b 0x%x\n",
9982
                 1 << (src_regno - BR_REG (1)));
9983
      break;
9984
 
9985
    case FR_REG (2):
9986
    case FR_REG (3):
9987
    case FR_REG (4):
9988
    case FR_REG (5):
9989
      if (unwind)
9990
        fprintf (asm_out_file, "\t.save.f 0x%x\n",
9991
                 1 << (src_regno - FR_REG (2)));
9992
      break;
9993
 
9994
    case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9995
    case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9996
    case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9997
    case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9998
      if (unwind)
9999
        fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10000
                 1 << (src_regno - FR_REG (12)));
10001
      break;
10002
 
10003
    default:
10004
      /* ??? For some reason we mark other general registers, even those
10005
         we can't represent in the unwind info.  Ignore them.  */
10006
      break;
10007
    }
10008
}
10009
 
10010
/* This function looks at a single insn and emits any directives
10011
   required to unwind this insn.  */
10012
 
10013
static void
10014
ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
10015
{
10016
  bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10017
  bool frame = dwarf2out_do_frame ();
10018
  rtx note, pat;
10019
  bool handled_one;
10020
 
10021
  if (!unwind && !frame)
10022
    return;
10023
 
10024
  if (NOTE_INSN_BASIC_BLOCK_P (insn))
10025
    {
10026
      last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
10027
 
10028
      /* Restore unwind state from immediately before the epilogue.  */
10029
      if (need_copy_state)
10030
        {
10031
          if (unwind)
10032
            {
10033
              fprintf (asm_out_file, "\t.body\n");
10034
              fprintf (asm_out_file, "\t.copy_state %d\n",
10035
                       cfun->machine->state_num);
10036
            }
10037
          need_copy_state = false;
10038
        }
10039
    }
10040
 
10041
  if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
10042
    return;
10043
 
10044
  /* Look for the ALLOC insn.  */
10045
  if (INSN_CODE (insn) == CODE_FOR_alloc)
10046
    {
10047
      rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10048
      int dest_regno = REGNO (dest);
10049
 
10050
      /* If this is the final destination for ar.pfs, then this must
10051
         be the alloc in the prologue.  */
10052
      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10053
        {
10054
          if (unwind)
10055
            fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10056
                     ia64_dbx_register_number (dest_regno));
10057
        }
10058
      else
10059
        {
10060
          /* This must be an alloc before a sibcall.  We must drop the
10061
             old frame info.  The easiest way to drop the old frame
10062
             info is to ensure we had a ".restore sp" directive
10063
             followed by a new prologue.  If the procedure doesn't
10064
             have a memory-stack frame, we'll issue a dummy ".restore
10065
             sp" now.  */
10066
          if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10067
            /* if haven't done process_epilogue() yet, do it now */
10068
            process_epilogue (asm_out_file, insn, unwind, frame);
10069
          if (unwind)
10070
            fprintf (asm_out_file, "\t.prologue\n");
10071
        }
10072
      return;
10073
    }
10074
 
10075
  handled_one = false;
10076
  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10077
    switch (REG_NOTE_KIND (note))
10078
      {
10079
      case REG_CFA_ADJUST_CFA:
10080
        pat = XEXP (note, 0);
10081
        if (pat == NULL)
10082
          pat = PATTERN (insn);
10083
        process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10084
        handled_one = true;
10085
        break;
10086
 
10087
      case REG_CFA_OFFSET:
10088
        pat = XEXP (note, 0);
10089
        if (pat == NULL)
10090
          pat = PATTERN (insn);
10091
        process_cfa_offset (asm_out_file, pat, unwind);
10092
        handled_one = true;
10093
        break;
10094
 
10095
      case REG_CFA_REGISTER:
10096
        pat = XEXP (note, 0);
10097
        if (pat == NULL)
10098
          pat = PATTERN (insn);
10099
        process_cfa_register (asm_out_file, pat, unwind);
10100
        handled_one = true;
10101
        break;
10102
 
10103
      case REG_FRAME_RELATED_EXPR:
10104
      case REG_CFA_DEF_CFA:
10105
      case REG_CFA_EXPRESSION:
10106
      case REG_CFA_RESTORE:
10107
      case REG_CFA_SET_VDRAP:
10108
        /* Not used in the ia64 port.  */
10109
        gcc_unreachable ();
10110
 
10111
      default:
10112
        /* Not a frame-related note.  */
10113
        break;
10114
      }
10115
 
10116
  /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10117
     explicit action to take.  No guessing required.  */
10118
  gcc_assert (handled_one);
10119
}
10120
 
10121
/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10122
 
10123
static void
10124
ia64_asm_emit_except_personality (rtx personality)
10125
{
10126
  fputs ("\t.personality\t", asm_out_file);
10127
  output_addr_const (asm_out_file, personality);
10128
  fputc ('\n', asm_out_file);
10129
}
10130
 
10131
/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10132
 
10133
static void
10134
ia64_asm_init_sections (void)
10135
{
10136
  exception_section = get_unnamed_section (0, output_section_asm_op,
10137
                                           "\t.handlerdata");
10138
}
10139
 
10140
/* Implement TARGET_DEBUG_UNWIND_INFO.  */
10141
 
10142
static enum unwind_info_type
10143
ia64_debug_unwind_info (void)
10144
{
10145
  return UI_TARGET;
10146
}
10147
 
10148
enum ia64_builtins
10149
{
10150
  IA64_BUILTIN_BSP,
10151
  IA64_BUILTIN_COPYSIGNQ,
10152
  IA64_BUILTIN_FABSQ,
10153
  IA64_BUILTIN_FLUSHRS,
10154
  IA64_BUILTIN_INFQ,
10155
  IA64_BUILTIN_HUGE_VALQ,
10156
  IA64_BUILTIN_max
10157
};
10158
 
10159
static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10160
 
10161
void
10162
ia64_init_builtins (void)
10163
{
10164
  tree fpreg_type;
10165
  tree float80_type;
10166
  tree decl;
10167
 
10168
  /* The __fpreg type.  */
10169
  fpreg_type = make_node (REAL_TYPE);
10170
  TYPE_PRECISION (fpreg_type) = 82;
10171
  layout_type (fpreg_type);
10172
  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10173
 
10174
  /* The __float80 type.  */
10175
  float80_type = make_node (REAL_TYPE);
10176
  TYPE_PRECISION (float80_type) = 80;
10177
  layout_type (float80_type);
10178
  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10179
 
10180
  /* The __float128 type.  */
10181
  if (!TARGET_HPUX)
10182
    {
10183
      tree ftype;
10184
      tree float128_type = make_node (REAL_TYPE);
10185
 
10186
      TYPE_PRECISION (float128_type) = 128;
10187
      layout_type (float128_type);
10188
      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10189
 
10190
      /* TFmode support builtins.  */
10191
      ftype = build_function_type_list (float128_type, NULL_TREE);
10192
      decl = add_builtin_function ("__builtin_infq", ftype,
10193
                                   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10194
                                   NULL, NULL_TREE);
10195
      ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10196
 
10197
      decl = add_builtin_function ("__builtin_huge_valq", ftype,
10198
                                   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10199
                                   NULL, NULL_TREE);
10200
      ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10201
 
10202
      ftype = build_function_type_list (float128_type,
10203
                                        float128_type,
10204
                                        NULL_TREE);
10205
      decl = add_builtin_function ("__builtin_fabsq", ftype,
10206
                                   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10207
                                   "__fabstf2", NULL_TREE);
10208
      TREE_READONLY (decl) = 1;
10209
      ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10210
 
10211
      ftype = build_function_type_list (float128_type,
10212
                                        float128_type,
10213
                                        float128_type,
10214
                                        NULL_TREE);
10215
      decl = add_builtin_function ("__builtin_copysignq", ftype,
10216
                                   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10217
                                   "__copysigntf3", NULL_TREE);
10218
      TREE_READONLY (decl) = 1;
10219
      ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10220
    }
10221
  else
10222
    /* Under HPUX, this is a synonym for "long double".  */
10223
    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10224
                                               "__float128");
10225
 
10226
  /* Fwrite on VMS is non-standard.  */
10227
#if TARGET_ABI_OPEN_VMS
10228
  vms_patch_builtins ();
10229
#endif
10230
 
10231
#define def_builtin(name, type, code)                                   \
10232
  add_builtin_function ((name), (type), (code), BUILT_IN_MD,    \
10233
                       NULL, NULL_TREE)
10234
 
10235
  decl = def_builtin ("__builtin_ia64_bsp",
10236
                      build_function_type_list (ptr_type_node, NULL_TREE),
10237
                      IA64_BUILTIN_BSP);
10238
  ia64_builtins[IA64_BUILTIN_BSP] = decl;
10239
 
10240
  decl = def_builtin ("__builtin_ia64_flushrs",
10241
                      build_function_type_list (void_type_node, NULL_TREE),
10242
                      IA64_BUILTIN_FLUSHRS);
10243
  ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10244
 
10245
#undef def_builtin
10246
 
10247
  if (TARGET_HPUX)
10248
    {
10249
      if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10250
        set_user_assembler_name (decl, "_Isfinite");
10251
      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10252
        set_user_assembler_name (decl, "_Isfinitef");
10253
      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10254
        set_user_assembler_name (decl, "_Isfinitef128");
10255
    }
10256
}
10257
 
10258
rtx
10259
ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10260
                     enum machine_mode mode ATTRIBUTE_UNUSED,
10261
                     int ignore ATTRIBUTE_UNUSED)
10262
{
10263
  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10264
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10265
 
10266
  switch (fcode)
10267
    {
10268
    case IA64_BUILTIN_BSP:
10269
      if (! target || ! register_operand (target, DImode))
10270
        target = gen_reg_rtx (DImode);
10271
      emit_insn (gen_bsp_value (target));
10272
#ifdef POINTERS_EXTEND_UNSIGNED
10273
      target = convert_memory_address (ptr_mode, target);
10274
#endif
10275
      return target;
10276
 
10277
    case IA64_BUILTIN_FLUSHRS:
10278
      emit_insn (gen_flushrs ());
10279
      return const0_rtx;
10280
 
10281
    case IA64_BUILTIN_INFQ:
10282
    case IA64_BUILTIN_HUGE_VALQ:
10283
      {
10284
        enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10285
        REAL_VALUE_TYPE inf;
10286
        rtx tmp;
10287
 
10288
        real_inf (&inf);
10289
        tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10290
 
10291
        tmp = validize_mem (force_const_mem (target_mode, tmp));
10292
 
10293
        if (target == 0)
10294
          target = gen_reg_rtx (target_mode);
10295
 
10296
        emit_move_insn (target, tmp);
10297
        return target;
10298
      }
10299
 
10300
    case IA64_BUILTIN_FABSQ:
10301
    case IA64_BUILTIN_COPYSIGNQ:
10302
      return expand_call (exp, target, ignore);
10303
 
10304
    default:
10305
      gcc_unreachable ();
10306
    }
10307
 
10308
  return NULL_RTX;
10309
}
10310
 
10311
/* Return the ia64 builtin for CODE.  */
10312
 
10313
static tree
10314
ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10315
{
10316
  if (code >= IA64_BUILTIN_max)
10317
    return error_mark_node;
10318
 
10319
  return ia64_builtins[code];
10320
}
10321
 
10322
/* For the HP-UX IA64 aggregate parameters are passed stored in the
10323
   most significant bits of the stack slot.  */
10324
 
10325
enum direction
10326
ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
10327
{
10328
   /* Exception to normal case for structures/unions/etc.  */
10329
 
10330
   if (type && AGGREGATE_TYPE_P (type)
10331
       && int_size_in_bytes (type) < UNITS_PER_WORD)
10332
     return upward;
10333
 
10334
   /* Fall back to the default.  */
10335
   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10336
}
10337
 
10338
/* Emit text to declare externally defined variables and functions, because
10339
   the Intel assembler does not support undefined externals.  */
10340
 
10341
void
10342
ia64_asm_output_external (FILE *file, tree decl, const char *name)
10343
{
10344
  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10345
     set in order to avoid putting out names that are never really
10346
     used. */
10347
  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10348
    {
10349
      /* maybe_assemble_visibility will return 1 if the assembler
10350
         visibility directive is output.  */
10351
      int need_visibility = ((*targetm.binds_local_p) (decl)
10352
                             && maybe_assemble_visibility (decl));
10353
 
10354
      /* GNU as does not need anything here, but the HP linker does
10355
         need something for external functions.  */
10356
      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10357
          && TREE_CODE (decl) == FUNCTION_DECL)
10358
          (*targetm.asm_out.globalize_decl_name) (file, decl);
10359
      else if (need_visibility && !TARGET_GNU_AS)
10360
        (*targetm.asm_out.globalize_label) (file, name);
10361
    }
10362
}
10363
 
10364
/* Set SImode div/mod functions, init_integral_libfuncs only initializes
10365
   modes of word_mode and larger.  Rename the TFmode libfuncs using the
10366
   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10367
   backward compatibility. */
10368
 
10369
static void
10370
ia64_init_libfuncs (void)
10371
{
10372
  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10373
  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10374
  set_optab_libfunc (smod_optab, SImode, "__modsi3");
10375
  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10376
 
10377
  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10378
  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10379
  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10380
  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10381
  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10382
 
10383
  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10384
  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10385
  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10386
  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10387
  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10388
  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10389
 
10390
  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10391
  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10392
  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10393
  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10394
  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10395
 
10396
  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10397
  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10398
  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10399
  /* HP-UX 11.23 libc does not have a function for unsigned
10400
     SImode-to-TFmode conversion.  */
10401
  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10402
}
10403
 
10404
/* Rename all the TFmode libfuncs using the HPUX conventions.  */
10405
 
10406
static void
10407
ia64_hpux_init_libfuncs (void)
10408
{
10409
  ia64_init_libfuncs ();
10410
 
10411
  /* The HP SI millicode division and mod functions expect DI arguments.
10412
     By turning them off completely we avoid using both libgcc and the
10413
     non-standard millicode routines and use the HP DI millicode routines
10414
     instead.  */
10415
 
10416
  set_optab_libfunc (sdiv_optab, SImode, 0);
10417
  set_optab_libfunc (udiv_optab, SImode, 0);
10418
  set_optab_libfunc (smod_optab, SImode, 0);
10419
  set_optab_libfunc (umod_optab, SImode, 0);
10420
 
10421
  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10422
  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10423
  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10424
  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10425
 
10426
  /* HP-UX libc has TF min/max/abs routines in it.  */
10427
  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10428
  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10429
  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10430
 
10431
  /* ia64_expand_compare uses this.  */
10432
  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10433
 
10434
  /* These should never be used.  */
10435
  set_optab_libfunc (eq_optab, TFmode, 0);
10436
  set_optab_libfunc (ne_optab, TFmode, 0);
10437
  set_optab_libfunc (gt_optab, TFmode, 0);
10438
  set_optab_libfunc (ge_optab, TFmode, 0);
10439
  set_optab_libfunc (lt_optab, TFmode, 0);
10440
  set_optab_libfunc (le_optab, TFmode, 0);
10441
}
10442
 
10443
/* Rename the division and modulus functions in VMS.  */
10444
 
10445
static void
10446
ia64_vms_init_libfuncs (void)
10447
{
10448
  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10449
  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10450
  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10451
  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10452
  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10453
  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10454
  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10455
  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10456
  abort_libfunc = init_one_libfunc ("decc$abort");
10457
  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10458
#ifdef MEM_LIBFUNCS_INIT
10459
  MEM_LIBFUNCS_INIT;
10460
#endif
10461
}
10462
 
10463
/* Rename the TFmode libfuncs available from soft-fp in glibc using
10464
   the HPUX conventions.  */
10465
 
10466
static void
10467
ia64_sysv4_init_libfuncs (void)
10468
{
10469
  ia64_init_libfuncs ();
10470
 
10471
  /* These functions are not part of the HPUX TFmode interface.  We
10472
     use them instead of _U_Qfcmp, which doesn't work the way we
10473
     expect.  */
10474
  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10475
  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10476
  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10477
  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10478
  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10479
  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10480
 
10481
  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10482
     glibc doesn't have them.  */
10483
}
10484
 
10485
/* Use soft-fp.  */
10486
 
10487
static void
10488
ia64_soft_fp_init_libfuncs (void)
10489
{
10490
}
10491
 
10492
static bool
10493
ia64_vms_valid_pointer_mode (enum machine_mode mode)
10494
{
10495
  return (mode == SImode || mode == DImode);
10496
}
10497
 
10498
/* For HPUX, it is illegal to have relocations in shared segments.  */
10499
 
10500
static int
10501
ia64_hpux_reloc_rw_mask (void)
10502
{
10503
  return 3;
10504
}
10505
 
10506
/* For others, relax this so that relocations to local data goes in
10507
   read-only segments, but we still cannot allow global relocations
10508
   in read-only segments.  */
10509
 
10510
static int
10511
ia64_reloc_rw_mask (void)
10512
{
10513
  return flag_pic ? 3 : 2;
10514
}
10515
 
10516
/* Return the section to use for X.  The only special thing we do here
10517
   is to honor small data.  */
10518
 
10519
static section *
10520
ia64_select_rtx_section (enum machine_mode mode, rtx x,
10521
                         unsigned HOST_WIDE_INT align)
10522
{
10523
  if (GET_MODE_SIZE (mode) > 0
10524
      && GET_MODE_SIZE (mode) <= ia64_section_threshold
10525
      && !TARGET_NO_SDATA)
10526
    return sdata_section;
10527
  else
10528
    return default_elf_select_rtx_section (mode, x, align);
10529
}
10530
 
10531
static unsigned int
10532
ia64_section_type_flags (tree decl, const char *name, int reloc)
10533
{
10534
  unsigned int flags = 0;
10535
 
10536
  if (strcmp (name, ".sdata") == 0
10537
      || strncmp (name, ".sdata.", 7) == 0
10538
      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10539
      || strncmp (name, ".sdata2.", 8) == 0
10540
      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10541
      || strcmp (name, ".sbss") == 0
10542
      || strncmp (name, ".sbss.", 6) == 0
10543
      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10544
    flags = SECTION_SMALL;
10545
 
10546
#if TARGET_ABI_OPEN_VMS
10547
  if (decl && DECL_ATTRIBUTES (decl)
10548
      && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10549
    flags |= SECTION_VMS_OVERLAY;
10550
#endif
10551
 
10552
  flags |= default_section_type_flags (decl, name, reloc);
10553
  return flags;
10554
}
10555
 
10556
/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10557
   structure type and that the address of that type should be passed
10558
   in out0, rather than in r8.  */
10559
 
10560
static bool
10561
ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10562
{
10563
  tree ret_type = TREE_TYPE (fntype);
10564
 
10565
  /* The Itanium C++ ABI requires that out0, rather than r8, be used
10566
     as the structure return address parameter, if the return value
10567
     type has a non-trivial copy constructor or destructor.  It is not
10568
     clear if this same convention should be used for other
10569
     programming languages.  Until G++ 3.4, we incorrectly used r8 for
10570
     these return values.  */
10571
  return (abi_version_at_least (2)
10572
          && ret_type
10573
          && TYPE_MODE (ret_type) == BLKmode
10574
          && TREE_ADDRESSABLE (ret_type)
10575
          && strcmp (lang_hooks.name, "GNU C++") == 0);
10576
}
10577
 
10578
/* Output the assembler code for a thunk function.  THUNK_DECL is the
10579
   declaration for the thunk function itself, FUNCTION is the decl for
10580
   the target function.  DELTA is an immediate constant offset to be
10581
   added to THIS.  If VCALL_OFFSET is nonzero, the word at
10582
   *(*this + vcall_offset) should be added to THIS.  */
10583
 
10584
static void
10585
ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10586
                      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10587
                      tree function)
10588
{
10589
  rtx this_rtx, insn, funexp;
10590
  unsigned int this_parmno;
10591
  unsigned int this_regno;
10592
  rtx delta_rtx;
10593
 
10594
  reload_completed = 1;
10595
  epilogue_completed = 1;
10596
 
10597
  /* Set things up as ia64_expand_prologue might.  */
10598
  last_scratch_gr_reg = 15;
10599
 
10600
  memset (&current_frame_info, 0, sizeof (current_frame_info));
10601
  current_frame_info.spill_cfa_off = -16;
10602
  current_frame_info.n_input_regs = 1;
10603
  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10604
 
10605
  /* Mark the end of the (empty) prologue.  */
10606
  emit_note (NOTE_INSN_PROLOGUE_END);
10607
 
10608
  /* Figure out whether "this" will be the first parameter (the
10609
     typical case) or the second parameter (as happens when the
10610
     virtual function returns certain class objects).  */
10611
  this_parmno
10612
    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10613
       ? 1 : 0);
10614
  this_regno = IN_REG (this_parmno);
10615
  if (!TARGET_REG_NAMES)
10616
    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10617
 
10618
  this_rtx = gen_rtx_REG (Pmode, this_regno);
10619
 
10620
  /* Apply the constant offset, if required.  */
10621
  delta_rtx = GEN_INT (delta);
10622
  if (TARGET_ILP32)
10623
    {
10624
      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10625
      REG_POINTER (tmp) = 1;
10626
      if (delta && satisfies_constraint_I (delta_rtx))
10627
        {
10628
          emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10629
          delta = 0;
10630
        }
10631
      else
10632
        emit_insn (gen_ptr_extend (this_rtx, tmp));
10633
    }
10634
  if (delta)
10635
    {
10636
      if (!satisfies_constraint_I (delta_rtx))
10637
        {
10638
          rtx tmp = gen_rtx_REG (Pmode, 2);
10639
          emit_move_insn (tmp, delta_rtx);
10640
          delta_rtx = tmp;
10641
        }
10642
      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10643
    }
10644
 
10645
  /* Apply the offset from the vtable, if required.  */
10646
  if (vcall_offset)
10647
    {
10648
      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10649
      rtx tmp = gen_rtx_REG (Pmode, 2);
10650
 
10651
      if (TARGET_ILP32)
10652
        {
10653
          rtx t = gen_rtx_REG (ptr_mode, 2);
10654
          REG_POINTER (t) = 1;
10655
          emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10656
          if (satisfies_constraint_I (vcall_offset_rtx))
10657
            {
10658
              emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10659
              vcall_offset = 0;
10660
            }
10661
          else
10662
            emit_insn (gen_ptr_extend (tmp, t));
10663
        }
10664
      else
10665
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10666
 
10667
      if (vcall_offset)
10668
        {
10669
          if (!satisfies_constraint_J (vcall_offset_rtx))
10670
            {
10671
              rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10672
              emit_move_insn (tmp2, vcall_offset_rtx);
10673
              vcall_offset_rtx = tmp2;
10674
            }
10675
          emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10676
        }
10677
 
10678
      if (TARGET_ILP32)
10679
        emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10680
      else
10681
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10682
 
10683
      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10684
    }
10685
 
10686
  /* Generate a tail call to the target function.  */
10687
  if (! TREE_USED (function))
10688
    {
10689
      assemble_external (function);
10690
      TREE_USED (function) = 1;
10691
    }
10692
  funexp = XEXP (DECL_RTL (function), 0);
10693
  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10694
  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10695
  insn = get_last_insn ();
10696
  SIBLING_CALL_P (insn) = 1;
10697
 
10698
  /* Code generation for calls relies on splitting.  */
10699
  reload_completed = 1;
10700
  epilogue_completed = 1;
10701
  try_split (PATTERN (insn), insn, 0);
10702
 
10703
  emit_barrier ();
10704
 
10705
  /* Run just enough of rest_of_compilation to get the insns emitted.
10706
     There's not really enough bulk here to make other passes such as
10707
     instruction scheduling worth while.  Note that use_thunk calls
10708
     assemble_start_function and assemble_end_function.  */
10709
 
10710
  insn_locators_alloc ();
10711
  emit_all_insn_group_barriers (NULL);
10712
  insn = get_insns ();
10713
  shorten_branches (insn);
10714
  final_start_function (insn, file, 1);
10715
  final (insn, file, 1);
10716
  final_end_function ();
10717
 
10718
  reload_completed = 0;
10719
  epilogue_completed = 0;
10720
}
10721
 
10722
/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10723
 
10724
static rtx
10725
ia64_struct_value_rtx (tree fntype,
10726
                       int incoming ATTRIBUTE_UNUSED)
10727
{
10728
  if (TARGET_ABI_OPEN_VMS ||
10729
      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10730
    return NULL_RTX;
10731
  return gen_rtx_REG (Pmode, GR_REG (8));
10732
}
10733
 
10734
static bool
10735
ia64_scalar_mode_supported_p (enum machine_mode mode)
10736
{
10737
  switch (mode)
10738
    {
10739
    case QImode:
10740
    case HImode:
10741
    case SImode:
10742
    case DImode:
10743
    case TImode:
10744
      return true;
10745
 
10746
    case SFmode:
10747
    case DFmode:
10748
    case XFmode:
10749
    case RFmode:
10750
      return true;
10751
 
10752
    case TFmode:
10753
      return true;
10754
 
10755
    default:
10756
      return false;
10757
    }
10758
}
10759
 
10760
static bool
10761
ia64_vector_mode_supported_p (enum machine_mode mode)
10762
{
10763
  switch (mode)
10764
    {
10765
    case V8QImode:
10766
    case V4HImode:
10767
    case V2SImode:
10768
      return true;
10769
 
10770
    case V2SFmode:
10771
      return true;
10772
 
10773
    default:
10774
      return false;
10775
    }
10776
}
10777
 
10778
/* Implement the FUNCTION_PROFILER macro.  */
10779
 
10780
void
10781
ia64_output_function_profiler (FILE *file, int labelno)
10782
{
10783
  bool indirect_call;
10784
 
10785
  /* If the function needs a static chain and the static chain
10786
     register is r15, we use an indirect call so as to bypass
10787
     the PLT stub in case the executable is dynamically linked,
10788
     because the stub clobbers r15 as per 5.3.6 of the psABI.
10789
     We don't need to do that in non canonical PIC mode.  */
10790
 
10791
  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10792
    {
10793
      gcc_assert (STATIC_CHAIN_REGNUM == 15);
10794
      indirect_call = true;
10795
    }
10796
  else
10797
    indirect_call = false;
10798
 
10799
  if (TARGET_GNU_AS)
10800
    fputs ("\t.prologue 4, r40\n", file);
10801
  else
10802
    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10803
  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10804
 
10805
  if (NO_PROFILE_COUNTERS)
10806
    fputs ("\tmov out3 = r0\n", file);
10807
  else
10808
    {
10809
      char buf[20];
10810
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10811
 
10812
      if (TARGET_AUTO_PIC)
10813
        fputs ("\tmovl out3 = @gprel(", file);
10814
      else
10815
        fputs ("\taddl out3 = @ltoff(", file);
10816
      assemble_name (file, buf);
10817
      if (TARGET_AUTO_PIC)
10818
        fputs (")\n", file);
10819
      else
10820
        fputs ("), r1\n", file);
10821
    }
10822
 
10823
  if (indirect_call)
10824
    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10825
  fputs ("\t;;\n", file);
10826
 
10827
  fputs ("\t.save rp, r42\n", file);
10828
  fputs ("\tmov out2 = b0\n", file);
10829
  if (indirect_call)
10830
    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10831
  fputs ("\t.body\n", file);
10832
  fputs ("\tmov out1 = r1\n", file);
10833
  if (indirect_call)
10834
    {
10835
      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10836
      fputs ("\tmov b6 = r16\n", file);
10837
      fputs ("\tld8 r1 = [r14]\n", file);
10838
      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10839
    }
10840
  else
10841
    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10842
}
10843
 
10844
static GTY(()) rtx mcount_func_rtx;
10845
static rtx
10846
gen_mcount_func_rtx (void)
10847
{
10848
  if (!mcount_func_rtx)
10849
    mcount_func_rtx = init_one_libfunc ("_mcount");
10850
  return mcount_func_rtx;
10851
}
10852
 
10853
void
10854
ia64_profile_hook (int labelno)
10855
{
10856
  rtx label, ip;
10857
 
10858
  if (NO_PROFILE_COUNTERS)
10859
    label = const0_rtx;
10860
  else
10861
    {
10862
      char buf[30];
10863
      const char *label_name;
10864
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10865
      label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
10866
      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10867
      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10868
    }
10869
  ip = gen_reg_rtx (Pmode);
10870
  emit_insn (gen_ip_value (ip));
10871
  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10872
                     VOIDmode, 3,
10873
                     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10874
                     ip, Pmode,
10875
                     label, Pmode);
10876
}
10877
 
10878
/* Return the mangling of TYPE if it is an extended fundamental type.  */
10879
 
10880
static const char *
10881
ia64_mangle_type (const_tree type)
10882
{
10883
  type = TYPE_MAIN_VARIANT (type);
10884
 
10885
  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10886
      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10887
    return NULL;
10888
 
10889
  /* On HP-UX, "long double" is mangled as "e" so __float128 is
10890
     mangled as "e".  */
10891
  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10892
    return "g";
10893
  /* On HP-UX, "e" is not available as a mangling of __float80 so use
10894
     an extended mangling.  Elsewhere, "e" is available since long
10895
     double is 80 bits.  */
10896
  if (TYPE_MODE (type) == XFmode)
10897
    return TARGET_HPUX ? "u9__float80" : "e";
10898
  if (TYPE_MODE (type) == RFmode)
10899
    return "u7__fpreg";
10900
  return NULL;
10901
}
10902
 
10903
/* Return the diagnostic message string if conversion from FROMTYPE to
10904
   TOTYPE is not allowed, NULL otherwise.  */
10905
static const char *
10906
ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10907
{
10908
  /* Reject nontrivial conversion to or from __fpreg.  */
10909
  if (TYPE_MODE (fromtype) == RFmode
10910
      && TYPE_MODE (totype) != RFmode
10911
      && TYPE_MODE (totype) != VOIDmode)
10912
    return N_("invalid conversion from %<__fpreg%>");
10913
  if (TYPE_MODE (totype) == RFmode
10914
      && TYPE_MODE (fromtype) != RFmode)
10915
    return N_("invalid conversion to %<__fpreg%>");
10916
  return NULL;
10917
}
10918
 
10919
/* Return the diagnostic message string if the unary operation OP is
10920
   not permitted on TYPE, NULL otherwise.  */
10921
static const char *
10922
ia64_invalid_unary_op (int op, const_tree type)
10923
{
10924
  /* Reject operations on __fpreg other than unary + or &.  */
10925
  if (TYPE_MODE (type) == RFmode
10926
      && op != CONVERT_EXPR
10927
      && op != ADDR_EXPR)
10928
    return N_("invalid operation on %<__fpreg%>");
10929
  return NULL;
10930
}
10931
 
10932
/* Return the diagnostic message string if the binary operation OP is
10933
   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
10934
static const char *
10935
ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10936
{
10937
  /* Reject operations on __fpreg.  */
10938
  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10939
    return N_("invalid operation on %<__fpreg%>");
10940
  return NULL;
10941
}
10942
 
10943
/* HP-UX version_id attribute.
10944
   For object foo, if the version_id is set to 1234 put out an alias
10945
   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
10946
   other than an alias statement because it is an illegal symbol name.  */
10947
 
10948
static tree
10949
ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10950
                                 tree name ATTRIBUTE_UNUSED,
10951
                                 tree args,
10952
                                 int flags ATTRIBUTE_UNUSED,
10953
                                 bool *no_add_attrs)
10954
{
10955
  tree arg = TREE_VALUE (args);
10956
 
10957
  if (TREE_CODE (arg) != STRING_CST)
10958
    {
10959
      error("version attribute is not a string");
10960
      *no_add_attrs = true;
10961
      return NULL_TREE;
10962
    }
10963
  return NULL_TREE;
10964
}
10965
 
10966
/* Target hook for c_mode_for_suffix.  */
10967
 
10968
static enum machine_mode
10969
ia64_c_mode_for_suffix (char suffix)
10970
{
10971
  if (suffix == 'q')
10972
    return TFmode;
10973
  if (suffix == 'w')
10974
    return XFmode;
10975
 
10976
  return VOIDmode;
10977
}
10978
 
10979
static GTY(()) rtx ia64_dconst_0_5_rtx;
10980
 
10981
rtx
10982
ia64_dconst_0_5 (void)
10983
{
10984
  if (! ia64_dconst_0_5_rtx)
10985
    {
10986
      REAL_VALUE_TYPE rv;
10987
      real_from_string (&rv, "0.5");
10988
      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10989
    }
10990
  return ia64_dconst_0_5_rtx;
10991
}
10992
 
10993
static GTY(()) rtx ia64_dconst_0_375_rtx;
10994
 
10995
rtx
10996
ia64_dconst_0_375 (void)
10997
{
10998
  if (! ia64_dconst_0_375_rtx)
10999
    {
11000
      REAL_VALUE_TYPE rv;
11001
      real_from_string (&rv, "0.375");
11002
      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11003
    }
11004
  return ia64_dconst_0_375_rtx;
11005
}
11006
 
11007
static enum machine_mode
11008
ia64_get_reg_raw_mode (int regno)
11009
{
11010
  if (FR_REGNO_P (regno))
11011
    return XFmode;
11012
  return default_get_reg_raw_mode(regno);
11013
}
11014
 
11015
/* Always default to .text section until HP-UX linker is fixed.  */
11016
 
11017
ATTRIBUTE_UNUSED static section *
11018
ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11019
                            enum node_frequency freq ATTRIBUTE_UNUSED,
11020
                            bool startup ATTRIBUTE_UNUSED,
11021
                            bool exit ATTRIBUTE_UNUSED)
11022
{
11023
  return NULL;
11024
}
11025
 
11026
/* Construct (set target (vec_select op0 (parallel perm))) and
11027
   return true if that's a valid instruction in the active ISA.  */
11028
 
11029
static bool
11030
expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11031
{
11032
  rtx rperm[MAX_VECT_LEN], x;
11033
  unsigned i;
11034
 
11035
  for (i = 0; i < nelt; ++i)
11036
    rperm[i] = GEN_INT (perm[i]);
11037
 
11038
  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11039
  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11040
  x = gen_rtx_SET (VOIDmode, target, x);
11041
 
11042
  x = emit_insn (x);
11043
  if (recog_memoized (x) < 0)
11044
    {
11045
      remove_insn (x);
11046
      return false;
11047
    }
11048
  return true;
11049
}
11050
 
11051
/* Similar, but generate a vec_concat from op0 and op1 as well.  */
11052
 
11053
static bool
11054
expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11055
                        const unsigned char *perm, unsigned nelt)
11056
{
11057
  enum machine_mode v2mode;
11058
  rtx x;
11059
 
11060
  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11061
  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11062
  return expand_vselect (target, x, perm, nelt);
11063
}
11064
 
11065
/* Try to expand a no-op permutation.  */
11066
 
11067
static bool
11068
expand_vec_perm_identity (struct expand_vec_perm_d *d)
11069
{
11070
  unsigned i, nelt = d->nelt;
11071
 
11072
  for (i = 0; i < nelt; ++i)
11073
    if (d->perm[i] != i)
11074
      return false;
11075
 
11076
  if (!d->testing_p)
11077
    emit_move_insn (d->target, d->op0);
11078
 
11079
  return true;
11080
}
11081
 
11082
/* Try to expand D via a shrp instruction.  */
11083
 
11084
static bool
11085
expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11086
{
11087
  unsigned i, nelt = d->nelt, shift, mask;
11088
  rtx tmp, hi, lo;
11089
 
11090
  /* ??? Don't force V2SFmode into the integer registers.  */
11091
  if (d->vmode == V2SFmode)
11092
    return false;
11093
 
11094
  mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11095
 
11096
  shift = d->perm[0];
11097
  if (BYTES_BIG_ENDIAN && shift > nelt)
11098
    return false;
11099
 
11100
  for (i = 1; i < nelt; ++i)
11101
    if (d->perm[i] != ((shift + i) & mask))
11102
      return false;
11103
 
11104
  if (d->testing_p)
11105
    return true;
11106
 
11107
  hi = shift < nelt ? d->op1 : d->op0;
11108
  lo = shift < nelt ? d->op0 : d->op1;
11109
 
11110
  shift %= nelt;
11111
 
11112
  shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11113
 
11114
  /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11115
  gcc_assert (IN_RANGE (shift, 1, 63));
11116
 
11117
  /* Recall that big-endian elements are numbered starting at the top of
11118
     the register.  Ideally we'd have a shift-left-pair.  But since we
11119
     don't, convert to a shift the other direction.  */
11120
  if (BYTES_BIG_ENDIAN)
11121
    shift = 64 - shift;
11122
 
11123
  tmp = gen_reg_rtx (DImode);
11124
  hi = gen_lowpart (DImode, hi);
11125
  lo = gen_lowpart (DImode, lo);
11126
  emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11127
 
11128
  emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11129
  return true;
11130
}
11131
 
11132
/* Try to instantiate D in a single instruction.  */
11133
 
11134
static bool
11135
expand_vec_perm_1 (struct expand_vec_perm_d *d)
11136
{
11137
  unsigned i, nelt = d->nelt;
11138
  unsigned char perm2[MAX_VECT_LEN];
11139
 
11140
  /* Try single-operand selections.  */
11141
  if (d->one_operand_p)
11142
    {
11143
      if (expand_vec_perm_identity (d))
11144
        return true;
11145
      if (expand_vselect (d->target, d->op0, d->perm, nelt))
11146
        return true;
11147
    }
11148
 
11149
  /* Try two operand selections.  */
11150
  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11151
    return true;
11152
 
11153
  /* Recognize interleave style patterns with reversed operands.  */
11154
  if (!d->one_operand_p)
11155
    {
11156
      for (i = 0; i < nelt; ++i)
11157
        {
11158
          unsigned e = d->perm[i];
11159
          if (e >= nelt)
11160
            e -= nelt;
11161
          else
11162
            e += nelt;
11163
          perm2[i] = e;
11164
        }
11165
 
11166
      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11167
        return true;
11168
    }
11169
 
11170
  if (expand_vec_perm_shrp (d))
11171
    return true;
11172
 
11173
  /* ??? Look for deposit-like permutations where most of the result
11174
     comes from one vector unchanged and the rest comes from a
11175
     sequential hunk of the other vector.  */
11176
 
11177
  return false;
11178
}
11179
 
11180
/* Pattern match broadcast permutations.  */
11181
 
11182
static bool
11183
expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11184
{
11185
  unsigned i, elt, nelt = d->nelt;
11186
  unsigned char perm2[2];
11187
  rtx temp;
11188
  bool ok;
11189
 
11190
  if (!d->one_operand_p)
11191
    return false;
11192
 
11193
  elt = d->perm[0];
11194
  for (i = 1; i < nelt; ++i)
11195
    if (d->perm[i] != elt)
11196
      return false;
11197
 
11198
  switch (d->vmode)
11199
    {
11200
    case V2SImode:
11201
    case V2SFmode:
11202
      /* Implementable by interleave.  */
11203
      perm2[0] = elt;
11204
      perm2[1] = elt + 2;
11205
      ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11206
      gcc_assert (ok);
11207
      break;
11208
 
11209
    case V8QImode:
11210
      /* Implementable by extract + broadcast.  */
11211
      if (BYTES_BIG_ENDIAN)
11212
        elt = 7 - elt;
11213
      elt *= BITS_PER_UNIT;
11214
      temp = gen_reg_rtx (DImode);
11215
      emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11216
                            GEN_INT (8), GEN_INT (elt)));
11217
      emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11218
      break;
11219
 
11220
    case V4HImode:
11221
      /* Should have been matched directly by vec_select.  */
11222
    default:
11223
      gcc_unreachable ();
11224
    }
11225
 
11226
  return true;
11227
}
11228
 
11229
/* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11230
   two vector permutation into a single vector permutation by using
11231
   an interleave operation to merge the vectors.  */
11232
 
11233
static bool
11234
expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11235
{
11236
  struct expand_vec_perm_d dremap, dfinal;
11237
  unsigned char remap[2 * MAX_VECT_LEN];
11238
  unsigned contents, i, nelt, nelt2;
11239
  unsigned h0, h1, h2, h3;
11240
  rtx seq;
11241
  bool ok;
11242
 
11243
  if (d->one_operand_p)
11244
    return false;
11245
 
11246
  nelt = d->nelt;
11247
  nelt2 = nelt / 2;
11248
 
11249
  /* Examine from whence the elements come.  */
11250
  contents = 0;
11251
  for (i = 0; i < nelt; ++i)
11252
    contents |= 1u << d->perm[i];
11253
 
11254
  memset (remap, 0xff, sizeof (remap));
11255
  dremap = *d;
11256
 
11257
  h0 = (1u << nelt2) - 1;
11258
  h1 = h0 << nelt2;
11259
  h2 = h0 << nelt;
11260
  h3 = h0 << (nelt + nelt2);
11261
 
11262
  if ((contents & (h0 | h2)) == contents)       /* punpck even halves */
11263
    {
11264
      for (i = 0; i < nelt; ++i)
11265
        {
11266
          unsigned which = i / 2 + (i & 1 ? nelt : 0);
11267
          remap[which] = i;
11268
          dremap.perm[i] = which;
11269
        }
11270
    }
11271
  else if ((contents & (h1 | h3)) == contents)  /* punpck odd halves */
11272
    {
11273
      for (i = 0; i < nelt; ++i)
11274
        {
11275
          unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11276
          remap[which] = i;
11277
          dremap.perm[i] = which;
11278
        }
11279
    }
11280
  else if ((contents & 0x5555) == contents)     /* mix even elements */
11281
    {
11282
      for (i = 0; i < nelt; ++i)
11283
        {
11284
          unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11285
          remap[which] = i;
11286
          dremap.perm[i] = which;
11287
        }
11288
    }
11289
  else if ((contents & 0xaaaa) == contents)     /* mix odd elements */
11290
    {
11291
      for (i = 0; i < nelt; ++i)
11292
        {
11293
          unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11294
          remap[which] = i;
11295
          dremap.perm[i] = which;
11296
        }
11297
    }
11298
  else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11299
    {
11300
      unsigned shift = ctz_hwi (contents);
11301
      for (i = 0; i < nelt; ++i)
11302
        {
11303
          unsigned which = (i + shift) & (2 * nelt - 1);
11304
          remap[which] = i;
11305
          dremap.perm[i] = which;
11306
        }
11307
    }
11308
  else
11309
    return false;
11310
 
11311
  /* Use the remapping array set up above to move the elements from their
11312
     swizzled locations into their final destinations.  */
11313
  dfinal = *d;
11314
  for (i = 0; i < nelt; ++i)
11315
    {
11316
      unsigned e = remap[d->perm[i]];
11317
      gcc_assert (e < nelt);
11318
      dfinal.perm[i] = e;
11319
    }
11320
  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11321
  dfinal.op1 = dfinal.op0;
11322
  dfinal.one_operand_p = true;
11323
  dremap.target = dfinal.op0;
11324
 
11325
  /* Test if the final remap can be done with a single insn.  For V4HImode
11326
     this *will* succeed.  For V8QImode or V2SImode it may not.  */
11327
  start_sequence ();
11328
  ok = expand_vec_perm_1 (&dfinal);
11329
  seq = get_insns ();
11330
  end_sequence ();
11331
  if (!ok)
11332
    return false;
11333
  if (d->testing_p)
11334
    return true;
11335
 
11336
  ok = expand_vec_perm_1 (&dremap);
11337
  gcc_assert (ok);
11338
 
11339
  emit_insn (seq);
11340
  return true;
11341
}
11342
 
11343
/* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11344
   constant permutation via two mux2 and a merge.  */
11345
 
11346
static bool
11347
expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11348
{
11349
  unsigned char perm2[4];
11350
  rtx rmask[4];
11351
  unsigned i;
11352
  rtx t0, t1, mask, x;
11353
  bool ok;
11354
 
11355
  if (d->vmode != V4HImode || d->one_operand_p)
11356
    return false;
11357
  if (d->testing_p)
11358
    return true;
11359
 
11360
  for (i = 0; i < 4; ++i)
11361
    {
11362
      perm2[i] = d->perm[i] & 3;
11363
      rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11364
    }
11365
  mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11366
  mask = force_reg (V4HImode, mask);
11367
 
11368
  t0 = gen_reg_rtx (V4HImode);
11369
  t1 = gen_reg_rtx (V4HImode);
11370
 
11371
  ok = expand_vselect (t0, d->op0, perm2, 4);
11372
  gcc_assert (ok);
11373
  ok = expand_vselect (t1, d->op1, perm2, 4);
11374
  gcc_assert (ok);
11375
 
11376
  x = gen_rtx_AND (V4HImode, mask, t0);
11377
  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
11378
 
11379
  x = gen_rtx_NOT (V4HImode, mask);
11380
  x = gen_rtx_AND (V4HImode, x, t1);
11381
  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
11382
 
11383
  x = gen_rtx_IOR (V4HImode, t0, t1);
11384
  emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
11385
 
11386
  return true;
11387
}
11388
 
11389
/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11390
   With all of the interface bits taken care of, perform the expansion
11391
   in D and return true on success.  */
11392
 
11393
static bool
11394
ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11395
{
11396
  if (expand_vec_perm_1 (d))
11397
    return true;
11398
  if (expand_vec_perm_broadcast (d))
11399
    return true;
11400
  if (expand_vec_perm_interleave_2 (d))
11401
    return true;
11402
  if (expand_vec_perm_v4hi_5 (d))
11403
    return true;
11404
  return false;
11405
}
11406
 
11407
bool
11408
ia64_expand_vec_perm_const (rtx operands[4])
11409
{
11410
  struct expand_vec_perm_d d;
11411
  unsigned char perm[MAX_VECT_LEN];
11412
  int i, nelt, which;
11413
  rtx sel;
11414
 
11415
  d.target = operands[0];
11416
  d.op0 = operands[1];
11417
  d.op1 = operands[2];
11418
  sel = operands[3];
11419
 
11420
  d.vmode = GET_MODE (d.target);
11421
  gcc_assert (VECTOR_MODE_P (d.vmode));
11422
  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11423
  d.testing_p = false;
11424
 
11425
  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11426
  gcc_assert (XVECLEN (sel, 0) == nelt);
11427
  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11428
 
11429
  for (i = which = 0; i < nelt; ++i)
11430
    {
11431
      rtx e = XVECEXP (sel, 0, i);
11432
      int ei = INTVAL (e) & (2 * nelt - 1);
11433
 
11434
      which |= (ei < nelt ? 1 : 2);
11435
      d.perm[i] = ei;
11436
      perm[i] = ei;
11437
    }
11438
 
11439
  switch (which)
11440
    {
11441
    default:
11442
      gcc_unreachable();
11443
 
11444
    case 3:
11445
      if (!rtx_equal_p (d.op0, d.op1))
11446
        {
11447
          d.one_operand_p = false;
11448
          break;
11449
        }
11450
 
11451
      /* The elements of PERM do not suggest that only the first operand
11452
         is used, but both operands are identical.  Allow easier matching
11453
         of the permutation by folding the permutation into the single
11454
         input vector.  */
11455
      for (i = 0; i < nelt; ++i)
11456
        if (d.perm[i] >= nelt)
11457
          d.perm[i] -= nelt;
11458
      /* FALLTHRU */
11459
 
11460
    case 1:
11461
      d.op1 = d.op0;
11462
      d.one_operand_p = true;
11463
      break;
11464
 
11465
    case 2:
11466
      for (i = 0; i < nelt; ++i)
11467
        d.perm[i] -= nelt;
11468
      d.op0 = d.op1;
11469
      d.one_operand_p = true;
11470
      break;
11471
    }
11472
 
11473
  if (ia64_expand_vec_perm_const_1 (&d))
11474
    return true;
11475
 
11476
  /* If the mask says both arguments are needed, but they are the same,
11477
     the above tried to expand with one_operand_p true.  If that didn't
11478
     work, retry with one_operand_p false, as that's what we used in _ok.  */
11479
  if (which == 3 && d.one_operand_p)
11480
    {
11481
      memcpy (d.perm, perm, sizeof (perm));
11482
      d.one_operand_p = false;
11483
      return ia64_expand_vec_perm_const_1 (&d);
11484
    }
11485
 
11486
  return false;
11487
}
11488
 
11489
/* Implement targetm.vectorize.vec_perm_const_ok.  */
11490
 
11491
static bool
11492
ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
11493
                                  const unsigned char *sel)
11494
{
11495
  struct expand_vec_perm_d d;
11496
  unsigned int i, nelt, which;
11497
  bool ret;
11498
 
11499
  d.vmode = vmode;
11500
  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11501
  d.testing_p = true;
11502
 
11503
  /* Extract the values from the vector CST into the permutation
11504
     array in D.  */
11505
  memcpy (d.perm, sel, nelt);
11506
  for (i = which = 0; i < nelt; ++i)
11507
    {
11508
      unsigned char e = d.perm[i];
11509
      gcc_assert (e < 2 * nelt);
11510
      which |= (e < nelt ? 1 : 2);
11511
    }
11512
 
11513
  /* For all elements from second vector, fold the elements to first.  */
11514
  if (which == 2)
11515
    for (i = 0; i < nelt; ++i)
11516
      d.perm[i] -= nelt;
11517
 
11518
  /* Check whether the mask can be applied to the vector type.  */
11519
  d.one_operand_p = (which != 3);
11520
 
11521
  /* Otherwise we have to go through the motions and see if we can
11522
     figure out how to generate the requested permutation.  */
11523
  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11524
  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11525
  if (!d.one_operand_p)
11526
    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11527
 
11528
  start_sequence ();
11529
  ret = ia64_expand_vec_perm_const_1 (&d);
11530
  end_sequence ();
11531
 
11532
  return ret;
11533
}
11534
 
11535
void
11536
ia64_expand_vec_setv2sf (rtx operands[3])
11537
{
11538
  struct expand_vec_perm_d d;
11539
  unsigned int which;
11540
  bool ok;
11541
 
11542
  d.target = operands[0];
11543
  d.op0 = operands[0];
11544
  d.op1 = gen_reg_rtx (V2SFmode);
11545
  d.vmode = V2SFmode;
11546
  d.nelt = 2;
11547
  d.one_operand_p = false;
11548
  d.testing_p = false;
11549
 
11550
  which = INTVAL (operands[2]);
11551
  gcc_assert (which <= 1);
11552
  d.perm[0] = 1 - which;
11553
  d.perm[1] = which + 2;
11554
 
11555
  emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11556
 
11557
  ok = ia64_expand_vec_perm_const_1 (&d);
11558
  gcc_assert (ok);
11559
}
11560
 
11561
void
11562
ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11563
{
11564
  struct expand_vec_perm_d d;
11565
  enum machine_mode vmode = GET_MODE (target);
11566
  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11567
  bool ok;
11568
 
11569
  d.target = target;
11570
  d.op0 = op0;
11571
  d.op1 = op1;
11572
  d.vmode = vmode;
11573
  d.nelt = nelt;
11574
  d.one_operand_p = false;
11575
  d.testing_p = false;
11576
 
11577
  for (i = 0; i < nelt; ++i)
11578
    d.perm[i] = i * 2 + odd;
11579
 
11580
  ok = ia64_expand_vec_perm_const_1 (&d);
11581
  gcc_assert (ok);
11582
}
11583
 
11584
#include "gt-ia64.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.