OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [ia64/] [ia64.c] - Blame information for rev 282

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* Definitions of target machine for GNU compiler.
2
   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3
   2009, 2010
4
   Free Software Foundation, Inc.
5
   Contributed by James E. Wilson <wilson@cygnus.com> and
6
                  David Mosberger <davidm@hpl.hp.com>.
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify
11
it under the terms of the GNU General Public License as published by
12
the Free Software Foundation; either version 3, or (at your option)
13
any later version.
14
 
15
GCC is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
GNU General Public License for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
#include "config.h"
25
#include "system.h"
26
#include "coretypes.h"
27
#include "tm.h"
28
#include "rtl.h"
29
#include "tree.h"
30
#include "regs.h"
31
#include "hard-reg-set.h"
32
#include "real.h"
33
#include "insn-config.h"
34
#include "conditions.h"
35
#include "output.h"
36
#include "insn-attr.h"
37
#include "flags.h"
38
#include "recog.h"
39
#include "expr.h"
40
#include "optabs.h"
41
#include "except.h"
42
#include "function.h"
43
#include "ggc.h"
44
#include "basic-block.h"
45
#include "libfuncs.h"
46
#include "toplev.h"
47
#include "sched-int.h"
48
#include "timevar.h"
49
#include "target.h"
50
#include "target-def.h"
51
#include "tm_p.h"
52
#include "hashtab.h"
53
#include "langhooks.h"
54
#include "cfglayout.h"
55
#include "gimple.h"
56
#include "intl.h"
57
#include "df.h"
58
#include "debug.h"
59
#include "params.h"
60
#include "dbgcnt.h"
61
#include "tm-constrs.h"
62
#include "sel-sched.h"
63
 
64
/* This is used for communication between ASM_OUTPUT_LABEL and
65
   ASM_OUTPUT_LABELREF.  */
66
int ia64_asm_output_label = 0;
67
 
68
/* Register names for ia64_expand_prologue.  */
69
static const char * const ia64_reg_numbers[96] =
70
{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71
  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72
  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73
  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74
  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75
  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76
  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77
  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78
  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79
  "r104","r105","r106","r107","r108","r109","r110","r111",
80
  "r112","r113","r114","r115","r116","r117","r118","r119",
81
  "r120","r121","r122","r123","r124","r125","r126","r127"};
82
 
83
/* ??? These strings could be shared with REGISTER_NAMES.  */
84
static const char * const ia64_input_reg_names[8] =
85
{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
86
 
87
/* ??? These strings could be shared with REGISTER_NAMES.  */
88
static const char * const ia64_local_reg_names[80] =
89
{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90
  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91
  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92
  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93
  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94
  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95
  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96
  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97
  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98
  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
 
100
/* ??? These strings could be shared with REGISTER_NAMES.  */
101
static const char * const ia64_output_reg_names[8] =
102
{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
 
104
/* Which cpu are we scheduling for.  */
105
enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106
 
107
/* Determines whether we run our final scheduling pass or not.  We always
108
   avoid the normal second scheduling pass.  */
109
static int ia64_flag_schedule_insns2;
110
 
111
/* Determines whether we run variable tracking in machine dependent
112
   reorganization.  */
113
static int ia64_flag_var_tracking;
114
 
115
/* Variables which are this size or smaller are put in the sdata/sbss
116
   sections.  */
117
 
118
unsigned int ia64_section_threshold;
119
 
120
/* The following variable is used by the DFA insn scheduler.  The value is
121
   TRUE if we do insn bundling instead of insn scheduling.  */
122
int bundling_p = 0;
123
 
124
enum ia64_frame_regs
125
{
126
   reg_fp,
127
   reg_save_b0,
128
   reg_save_pr,
129
   reg_save_ar_pfs,
130
   reg_save_ar_unat,
131
   reg_save_ar_lc,
132
   reg_save_gp,
133
   number_of_ia64_frame_regs
134
};
135
 
136
/* Structure to be filled in by ia64_compute_frame_size with register
137
   save masks and offsets for the current function.  */
138
 
139
struct ia64_frame_info
140
{
141
  HOST_WIDE_INT total_size;     /* size of the stack frame, not including
142
                                   the caller's scratch area.  */
143
  HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
144
  HOST_WIDE_INT spill_size;     /* size of the gr/br/fr spill area.  */
145
  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
146
  HARD_REG_SET mask;            /* mask of saved registers.  */
147
  unsigned int gr_used_mask;    /* mask of registers in use as gr spill
148
                                   registers or long-term scratches.  */
149
  int n_spilled;                /* number of spilled registers.  */
150
  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
151
  int n_input_regs;             /* number of input registers used.  */
152
  int n_local_regs;             /* number of local registers used.  */
153
  int n_output_regs;            /* number of output registers used.  */
154
  int n_rotate_regs;            /* number of rotating registers used.  */
155
 
156
  char need_regstk;             /* true if a .regstk directive needed.  */
157
  char initialized;             /* true if the data is finalized.  */
158
};
159
 
160
/* Current frame information calculated by ia64_compute_frame_size.  */
161
static struct ia64_frame_info current_frame_info;
162
/* The actual registers that are emitted.  */
163
static int emitted_frame_related_regs[number_of_ia64_frame_regs];
164
 
165
static int ia64_first_cycle_multipass_dfa_lookahead (void);
166
static void ia64_dependencies_evaluation_hook (rtx, rtx);
167
static void ia64_init_dfa_pre_cycle_insn (void);
168
static rtx ia64_dfa_pre_cycle_insn (void);
169
static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
170
static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
171
static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
172
static void ia64_h_i_d_extended (void);
173
static void * ia64_alloc_sched_context (void);
174
static void ia64_init_sched_context (void *, bool);
175
static void ia64_set_sched_context (void *);
176
static void ia64_clear_sched_context (void *);
177
static void ia64_free_sched_context (void *);
178
static int ia64_mode_to_int (enum machine_mode);
179
static void ia64_set_sched_flags (spec_info_t);
180
static ds_t ia64_get_insn_spec_ds (rtx);
181
static ds_t ia64_get_insn_checked_ds (rtx);
182
static bool ia64_skip_rtx_p (const_rtx);
183
static int ia64_speculate_insn (rtx, ds_t, rtx *);
184
static bool ia64_needs_block_p (int);
185
static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
186
static int ia64_spec_check_p (rtx);
187
static int ia64_spec_check_src_p (rtx);
188
static rtx gen_tls_get_addr (void);
189
static rtx gen_thread_pointer (void);
190
static int find_gr_spill (enum ia64_frame_regs, int);
191
static int next_scratch_gr_reg (void);
192
static void mark_reg_gr_used_mask (rtx, void *);
193
static void ia64_compute_frame_size (HOST_WIDE_INT);
194
static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195
static void finish_spill_pointers (void);
196
static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197
static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198
static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199
static rtx gen_movdi_x (rtx, rtx, rtx);
200
static rtx gen_fr_spill_x (rtx, rtx, rtx);
201
static rtx gen_fr_restore_x (rtx, rtx, rtx);
202
 
203
static bool ia64_can_eliminate (const int, const int);
204
static enum machine_mode hfa_element_mode (const_tree, bool);
205
static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
206
                                         tree, int *, int);
207
static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
208
                                   tree, bool);
209
static bool ia64_function_ok_for_sibcall (tree, tree);
210
static bool ia64_return_in_memory (const_tree, const_tree);
211
static bool ia64_rtx_costs (rtx, int, int, int *, bool);
212
static int ia64_unspec_may_trap_p (const_rtx, unsigned);
213
static void fix_range (const char *);
214
static bool ia64_handle_option (size_t, const char *, int);
215
static struct machine_function * ia64_init_machine_status (void);
216
static void emit_insn_group_barriers (FILE *);
217
static void emit_all_insn_group_barriers (FILE *);
218
static void final_emit_insn_group_barriers (FILE *);
219
static void emit_predicate_relation_info (void);
220
static void ia64_reorg (void);
221
static bool ia64_in_small_data_p (const_tree);
222
static void process_epilogue (FILE *, rtx, bool, bool);
223
static int process_set (FILE *, rtx, rtx, bool, bool);
224
 
225
static bool ia64_assemble_integer (rtx, unsigned int, int);
226
static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227
static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228
static void ia64_output_function_end_prologue (FILE *);
229
 
230
static int ia64_issue_rate (void);
231
static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
232
static void ia64_sched_init (FILE *, int, int);
233
static void ia64_sched_init_global (FILE *, int, int);
234
static void ia64_sched_finish_global (FILE *, int);
235
static void ia64_sched_finish (FILE *, int);
236
static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237
static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238
static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239
static int ia64_variable_issue (FILE *, int, rtx, int);
240
 
241
static struct bundle_state *get_free_bundle_state (void);
242
static void free_bundle_state (struct bundle_state *);
243
static void initiate_bundle_states (void);
244
static void finish_bundle_states (void);
245
static unsigned bundle_state_hash (const void *);
246
static int bundle_state_eq_p (const void *, const void *);
247
static int insert_bundle_state (struct bundle_state *);
248
static void initiate_bundle_state_table (void);
249
static void finish_bundle_state_table (void);
250
static int try_issue_nops (struct bundle_state *, int);
251
static int try_issue_insn (struct bundle_state *, rtx);
252
static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253
static int get_max_pos (state_t);
254
static int get_template (state_t, int);
255
 
256
static rtx get_next_important_insn (rtx, rtx);
257
static bool important_for_bundling_p (rtx);
258
static void bundling (FILE *, int, rtx, rtx);
259
 
260
static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261
                                  HOST_WIDE_INT, tree);
262
static void ia64_file_start (void);
263
static void ia64_globalize_decl_name (FILE *, tree);
264
 
265
static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266
static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
267
static section *ia64_select_rtx_section (enum machine_mode, rtx,
268
                                         unsigned HOST_WIDE_INT);
269
static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
270
     ATTRIBUTE_UNUSED;
271
static unsigned int ia64_section_type_flags (tree, const char *, int);
272
static void ia64_init_libfuncs (void)
273
     ATTRIBUTE_UNUSED;
274
static void ia64_hpux_init_libfuncs (void)
275
     ATTRIBUTE_UNUSED;
276
static void ia64_sysv4_init_libfuncs (void)
277
     ATTRIBUTE_UNUSED;
278
static void ia64_vms_init_libfuncs (void)
279
     ATTRIBUTE_UNUSED;
280
static void ia64_soft_fp_init_libfuncs (void)
281
     ATTRIBUTE_UNUSED;
282
static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
283
     ATTRIBUTE_UNUSED;
284
static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
285
     ATTRIBUTE_UNUSED;
286
 
287
static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
288
static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
289
static void ia64_encode_section_info (tree, rtx, int);
290
static rtx ia64_struct_value_rtx (tree, int);
291
static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
292
static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
293
static bool ia64_vector_mode_supported_p (enum machine_mode mode);
294
static bool ia64_cannot_force_const_mem (rtx);
295
static const char *ia64_mangle_type (const_tree);
296
static const char *ia64_invalid_conversion (const_tree, const_tree);
297
static const char *ia64_invalid_unary_op (int, const_tree);
298
static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
299
static enum machine_mode ia64_c_mode_for_suffix (char);
300
static enum machine_mode ia64_promote_function_mode (const_tree,
301
                                                     enum machine_mode,
302
                                                     int *,
303
                                                     const_tree,
304
                                                     int);
305
static void ia64_trampoline_init (rtx, tree, rtx);
306
static void ia64_override_options_after_change (void);
307
 
308
/* Table of valid machine attributes.  */
309
static const struct attribute_spec ia64_attribute_table[] =
310
{
311
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312
  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
313
  { "model",           1, 1, true, false, false, ia64_handle_model_attribute },
314
#if TARGET_ABI_OPEN_VMS
315
  { "common_object",   1, 1, true, false, false, ia64_vms_common_object_attribute},
316
#endif
317
  { "version_id",      1, 1, true, false, false,
318
    ia64_handle_version_id_attribute },
319
  { NULL,              0, 0, false, false, false, NULL }
320
};
321
 
322
/* Initialize the GCC target structure.  */
323
#undef TARGET_ATTRIBUTE_TABLE
324
#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
325
 
326
#undef TARGET_INIT_BUILTINS
327
#define TARGET_INIT_BUILTINS ia64_init_builtins
328
 
329
#undef TARGET_EXPAND_BUILTIN
330
#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
331
 
332
#undef TARGET_ASM_BYTE_OP
333
#define TARGET_ASM_BYTE_OP "\tdata1\t"
334
#undef TARGET_ASM_ALIGNED_HI_OP
335
#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336
#undef TARGET_ASM_ALIGNED_SI_OP
337
#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338
#undef TARGET_ASM_ALIGNED_DI_OP
339
#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340
#undef TARGET_ASM_UNALIGNED_HI_OP
341
#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342
#undef TARGET_ASM_UNALIGNED_SI_OP
343
#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344
#undef TARGET_ASM_UNALIGNED_DI_OP
345
#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346
#undef TARGET_ASM_INTEGER
347
#define TARGET_ASM_INTEGER ia64_assemble_integer
348
 
349
#undef TARGET_ASM_FUNCTION_PROLOGUE
350
#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
351
#undef TARGET_ASM_FUNCTION_END_PROLOGUE
352
#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
353
#undef TARGET_ASM_FUNCTION_EPILOGUE
354
#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
355
 
356
#undef TARGET_IN_SMALL_DATA_P
357
#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
358
 
359
#undef TARGET_SCHED_ADJUST_COST_2
360
#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
361
#undef TARGET_SCHED_ISSUE_RATE
362
#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363
#undef TARGET_SCHED_VARIABLE_ISSUE
364
#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365
#undef TARGET_SCHED_INIT
366
#define TARGET_SCHED_INIT ia64_sched_init
367
#undef TARGET_SCHED_FINISH
368
#define TARGET_SCHED_FINISH ia64_sched_finish
369
#undef TARGET_SCHED_INIT_GLOBAL
370
#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371
#undef TARGET_SCHED_FINISH_GLOBAL
372
#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
373
#undef TARGET_SCHED_REORDER
374
#define TARGET_SCHED_REORDER ia64_sched_reorder
375
#undef TARGET_SCHED_REORDER2
376
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
377
 
378
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
380
 
381
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
383
 
384
#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386
#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
388
 
389
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391
  ia64_first_cycle_multipass_dfa_lookahead_guard
392
 
393
#undef TARGET_SCHED_DFA_NEW_CYCLE
394
#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
395
 
396
#undef TARGET_SCHED_H_I_D_EXTENDED
397
#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
398
 
399
#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400
#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
401
 
402
#undef TARGET_SCHED_INIT_SCHED_CONTEXT
403
#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
404
 
405
#undef TARGET_SCHED_SET_SCHED_CONTEXT
406
#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
407
 
408
#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409
#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
410
 
411
#undef TARGET_SCHED_FREE_SCHED_CONTEXT
412
#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
413
 
414
#undef TARGET_SCHED_SET_SCHED_FLAGS
415
#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
416
 
417
#undef TARGET_SCHED_GET_INSN_SPEC_DS
418
#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
419
 
420
#undef TARGET_SCHED_GET_INSN_CHECKED_DS
421
#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
422
 
423
#undef TARGET_SCHED_SPECULATE_INSN
424
#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
425
 
426
#undef TARGET_SCHED_NEEDS_BLOCK_P
427
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
428
 
429
#undef TARGET_SCHED_GEN_SPEC_CHECK
430
#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
431
 
432
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434
  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
435
 
436
#undef TARGET_SCHED_SKIP_RTX_P
437
#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
438
 
439
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
440
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
441
#undef TARGET_ARG_PARTIAL_BYTES
442
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
443
 
444
#undef TARGET_ASM_OUTPUT_MI_THUNK
445
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
446
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
447
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
448
 
449
#undef TARGET_ASM_FILE_START
450
#define TARGET_ASM_FILE_START ia64_file_start
451
 
452
#undef TARGET_ASM_GLOBALIZE_DECL_NAME
453
#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
454
 
455
#undef TARGET_RTX_COSTS
456
#define TARGET_RTX_COSTS ia64_rtx_costs
457
#undef TARGET_ADDRESS_COST
458
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
459
 
460
#undef TARGET_UNSPEC_MAY_TRAP_P
461
#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
462
 
463
#undef TARGET_MACHINE_DEPENDENT_REORG
464
#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
465
 
466
#undef TARGET_ENCODE_SECTION_INFO
467
#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
468
 
469
#undef  TARGET_SECTION_TYPE_FLAGS
470
#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
471
 
472
#ifdef HAVE_AS_TLS
473
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474
#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
475
#endif
476
 
477
#undef TARGET_PROMOTE_FUNCTION_MODE
478
#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
479
 
480
/* ??? Investigate.  */
481
#if 0
482
#undef TARGET_PROMOTE_PROTOTYPES
483
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
484
#endif
485
 
486
#undef TARGET_STRUCT_VALUE_RTX
487
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488
#undef TARGET_RETURN_IN_MEMORY
489
#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
490
#undef TARGET_SETUP_INCOMING_VARARGS
491
#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492
#undef TARGET_STRICT_ARGUMENT_NAMING
493
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
494
#undef TARGET_MUST_PASS_IN_STACK
495
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
496
 
497
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
498
#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
499
 
500
#undef TARGET_UNWIND_EMIT
501
#define TARGET_UNWIND_EMIT process_for_unwind_directive
502
 
503
#undef TARGET_SCALAR_MODE_SUPPORTED_P
504
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
505
#undef TARGET_VECTOR_MODE_SUPPORTED_P
506
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
507
 
508
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509
   in an order different from the specified program order.  */
510
#undef TARGET_RELAXED_ORDERING
511
#define TARGET_RELAXED_ORDERING true
512
 
513
#undef TARGET_DEFAULT_TARGET_FLAGS
514
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515
#undef TARGET_HANDLE_OPTION
516
#define TARGET_HANDLE_OPTION ia64_handle_option
517
 
518
#undef TARGET_CANNOT_FORCE_CONST_MEM
519
#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
520
 
521
#undef TARGET_MANGLE_TYPE
522
#define TARGET_MANGLE_TYPE ia64_mangle_type
523
 
524
#undef TARGET_INVALID_CONVERSION
525
#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526
#undef TARGET_INVALID_UNARY_OP
527
#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528
#undef TARGET_INVALID_BINARY_OP
529
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
530
 
531
#undef TARGET_C_MODE_FOR_SUFFIX
532
#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
533
 
534
#undef TARGET_CAN_ELIMINATE
535
#define TARGET_CAN_ELIMINATE ia64_can_eliminate
536
 
537
#undef TARGET_TRAMPOLINE_INIT
538
#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
539
 
540
#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541
#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
542
 
543
struct gcc_target targetm = TARGET_INITIALIZER;
544
 
545
typedef enum
546
  {
547
    ADDR_AREA_NORMAL,   /* normal address area */
548
    ADDR_AREA_SMALL     /* addressable by "addl" (-2MB < addr < 2MB) */
549
  }
550
ia64_addr_area;
551
 
552
static GTY(()) tree small_ident1;
553
static GTY(()) tree small_ident2;
554
 
555
static void
556
init_idents (void)
557
{
558
  if (small_ident1 == 0)
559
    {
560
      small_ident1 = get_identifier ("small");
561
      small_ident2 = get_identifier ("__small__");
562
    }
563
}
564
 
565
/* Retrieve the address area that has been chosen for the given decl.  */
566
 
567
static ia64_addr_area
568
ia64_get_addr_area (tree decl)
569
{
570
  tree model_attr;
571
 
572
  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
573
  if (model_attr)
574
    {
575
      tree id;
576
 
577
      init_idents ();
578
      id = TREE_VALUE (TREE_VALUE (model_attr));
579
      if (id == small_ident1 || id == small_ident2)
580
        return ADDR_AREA_SMALL;
581
    }
582
  return ADDR_AREA_NORMAL;
583
}
584
 
585
static tree
586
ia64_handle_model_attribute (tree *node, tree name, tree args,
587
                             int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
588
{
589
  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
590
  ia64_addr_area area;
591
  tree arg, decl = *node;
592
 
593
  init_idents ();
594
  arg = TREE_VALUE (args);
595
  if (arg == small_ident1 || arg == small_ident2)
596
    {
597
      addr_area = ADDR_AREA_SMALL;
598
    }
599
  else
600
    {
601
      warning (OPT_Wattributes, "invalid argument of %qE attribute",
602
               name);
603
      *no_add_attrs = true;
604
    }
605
 
606
  switch (TREE_CODE (decl))
607
    {
608
    case VAR_DECL:
609
      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
610
           == FUNCTION_DECL)
611
          && !TREE_STATIC (decl))
612
        {
613
          error_at (DECL_SOURCE_LOCATION (decl),
614
                    "an address area attribute cannot be specified for "
615
                    "local variables");
616
          *no_add_attrs = true;
617
        }
618
      area = ia64_get_addr_area (decl);
619
      if (area != ADDR_AREA_NORMAL && addr_area != area)
620
        {
621
          error ("address area of %q+D conflicts with previous "
622
                 "declaration", decl);
623
          *no_add_attrs = true;
624
        }
625
      break;
626
 
627
    case FUNCTION_DECL:
628
      error_at (DECL_SOURCE_LOCATION (decl),
629
                "address area attribute cannot be specified for "
630
                "functions");
631
      *no_add_attrs = true;
632
      break;
633
 
634
    default:
635
      warning (OPT_Wattributes, "%qE attribute ignored",
636
               name);
637
      *no_add_attrs = true;
638
      break;
639
    }
640
 
641
  return NULL_TREE;
642
}
643
 
644
/* The section must have global and overlaid attributes.  */
645
#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
646
 
647
/* Part of the low level implementation of DEC Ada pragma Common_Object which
648
   enables the shared use of variables stored in overlaid linker areas
649
   corresponding to the use of Fortran COMMON.  */
650
 
651
static tree
652
ia64_vms_common_object_attribute (tree *node, tree name, tree args,
653
                                  int flags ATTRIBUTE_UNUSED,
654
                                  bool *no_add_attrs)
655
{
656
    tree decl = *node;
657
    tree id, val;
658
    if (! DECL_P (decl))
659
      abort ();
660
 
661
    DECL_COMMON (decl) = 1;
662
    id = TREE_VALUE (args);
663
    if (TREE_CODE (id) == IDENTIFIER_NODE)
664
      val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
665
    else if (TREE_CODE (id) == STRING_CST)
666
      val = id;
667
    else
668
      {
669
        warning (OPT_Wattributes,
670
                 "%qE attribute requires a string constant argument", name);
671
        *no_add_attrs = true;
672
        return NULL_TREE;
673
      }
674
    DECL_SECTION_NAME (decl) = val;
675
    return NULL_TREE;
676
}
677
 
678
/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
679
 
680
void
681
ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
682
                                     unsigned HOST_WIDE_INT size,
683
                                     unsigned int align)
684
{
685
  tree attr = DECL_ATTRIBUTES (decl);
686
 
687
  /* As common_object attribute set DECL_SECTION_NAME check it before
688
     looking up the attribute.  */
689
  if (DECL_SECTION_NAME (decl) && attr)
690
    attr = lookup_attribute ("common_object", attr);
691
  else
692
    attr = NULL_TREE;
693
 
694
  if (!attr)
695
    {
696
      /*  Code from elfos.h.  */
697
      fprintf (file, "%s", COMMON_ASM_OP);
698
      assemble_name (file, name);
699
      fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
700
               size, align / BITS_PER_UNIT);
701
    }
702
  else
703
    {
704
      ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
705
      ASM_OUTPUT_LABEL (file, name);
706
      ASM_OUTPUT_SKIP (file, size ? size : 1);
707
    }
708
}
709
 
710
/* Definition of TARGET_ASM_NAMED_SECTION for VMS.  */
711
 
712
void
713
ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
714
                                tree decl)
715
{
716
  if (!(flags & SECTION_VMS_OVERLAY))
717
    {
718
      default_elf_asm_named_section (name, flags, decl);
719
      return;
720
    }
721
  if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
722
    abort ();
723
 
724
  if (flags & SECTION_DECLARED)
725
    {
726
      fprintf (asm_out_file, "\t.section\t%s\n", name);
727
      return;
728
    }
729
 
730
  fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
731
}
732
 
733
static void
734
ia64_encode_addr_area (tree decl, rtx symbol)
735
{
736
  int flags;
737
 
738
  flags = SYMBOL_REF_FLAGS (symbol);
739
  switch (ia64_get_addr_area (decl))
740
    {
741
    case ADDR_AREA_NORMAL: break;
742
    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
743
    default: gcc_unreachable ();
744
    }
745
  SYMBOL_REF_FLAGS (symbol) = flags;
746
}
747
 
748
static void
749
ia64_encode_section_info (tree decl, rtx rtl, int first)
750
{
751
  default_encode_section_info (decl, rtl, first);
752
 
753
  /* Careful not to prod global register variables.  */
754
  if (TREE_CODE (decl) == VAR_DECL
755
      && GET_CODE (DECL_RTL (decl)) == MEM
756
      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
757
      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
758
    ia64_encode_addr_area (decl, XEXP (rtl, 0));
759
}
760
 
761
/* Return 1 if the operands of a move are ok.  */
762
 
763
int
764
ia64_move_ok (rtx dst, rtx src)
765
{
766
  /* If we're under init_recog_no_volatile, we'll not be able to use
767
     memory_operand.  So check the code directly and don't worry about
768
     the validity of the underlying address, which should have been
769
     checked elsewhere anyway.  */
770
  if (GET_CODE (dst) != MEM)
771
    return 1;
772
  if (GET_CODE (src) == MEM)
773
    return 0;
774
  if (register_operand (src, VOIDmode))
775
    return 1;
776
 
777
  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
778
  if (INTEGRAL_MODE_P (GET_MODE (dst)))
779
    return src == const0_rtx;
780
  else
781
    return satisfies_constraint_G (src);
782
}
783
 
784
/* Return 1 if the operands are ok for a floating point load pair.  */
785
 
786
int
787
ia64_load_pair_ok (rtx dst, rtx src)
788
{
789
  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
790
    return 0;
791
  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
792
    return 0;
793
  switch (GET_CODE (XEXP (src, 0)))
794
    {
795
    case REG:
796
    case POST_INC:
797
      break;
798
    case POST_DEC:
799
      return 0;
800
    case POST_MODIFY:
801
      {
802
        rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
803
 
804
        if (GET_CODE (adjust) != CONST_INT
805
            || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
806
          return 0;
807
      }
808
      break;
809
    default:
810
      abort ();
811
    }
812
  return 1;
813
}
814
 
815
int
816
addp4_optimize_ok (rtx op1, rtx op2)
817
{
818
  return (basereg_operand (op1, GET_MODE(op1)) !=
819
          basereg_operand (op2, GET_MODE(op2)));
820
}
821
 
822
/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
823
   Return the length of the field, or <= 0 on failure.  */
824
 
825
int
826
ia64_depz_field_mask (rtx rop, rtx rshift)
827
{
828
  unsigned HOST_WIDE_INT op = INTVAL (rop);
829
  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
830
 
831
  /* Get rid of the zero bits we're shifting in.  */
832
  op >>= shift;
833
 
834
  /* We must now have a solid block of 1's at bit 0.  */
835
  return exact_log2 (op + 1);
836
}
837
 
838
/* Return the TLS model to use for ADDR.  */
839
 
840
static enum tls_model
841
tls_symbolic_operand_type (rtx addr)
842
{
843
  enum tls_model tls_kind = TLS_MODEL_NONE;
844
 
845
  if (GET_CODE (addr) == CONST)
846
    {
847
      if (GET_CODE (XEXP (addr, 0)) == PLUS
848
          && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
849
        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
850
    }
851
  else if (GET_CODE (addr) == SYMBOL_REF)
852
    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
853
 
854
  return tls_kind;
855
}
856
 
857
/* Return true if X is a constant that is valid for some immediate
858
   field in an instruction.  */
859
 
860
bool
861
ia64_legitimate_constant_p (rtx x)
862
{
863
  switch (GET_CODE (x))
864
    {
865
    case CONST_INT:
866
    case LABEL_REF:
867
      return true;
868
 
869
    case CONST_DOUBLE:
870
      if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
871
          || GET_MODE (x) == DFmode)
872
        return true;
873
      return satisfies_constraint_G (x);
874
 
875
    case CONST:
876
    case SYMBOL_REF:
877
      /* ??? Short term workaround for PR 28490.  We must make the code here
878
         match the code in ia64_expand_move and move_operand, even though they
879
         are both technically wrong.  */
880
      if (tls_symbolic_operand_type (x) == 0)
881
        {
882
          HOST_WIDE_INT addend = 0;
883
          rtx op = x;
884
 
885
          if (GET_CODE (op) == CONST
886
              && GET_CODE (XEXP (op, 0)) == PLUS
887
              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
888
            {
889
              addend = INTVAL (XEXP (XEXP (op, 0), 1));
890
              op = XEXP (XEXP (op, 0), 0);
891
            }
892
 
893
          if (any_offset_symbol_operand (op, GET_MODE (op))
894
              || function_operand (op, GET_MODE (op)))
895
            return true;
896
          if (aligned_offset_symbol_operand (op, GET_MODE (op)))
897
            return (addend & 0x3fff) == 0;
898
          return false;
899
        }
900
      return false;
901
 
902
    case CONST_VECTOR:
903
      {
904
        enum machine_mode mode = GET_MODE (x);
905
 
906
        if (mode == V2SFmode)
907
          return satisfies_constraint_Y (x);
908
 
909
        return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
910
                && GET_MODE_SIZE (mode) <= 8);
911
      }
912
 
913
    default:
914
      return false;
915
    }
916
}
917
 
918
/* Don't allow TLS addresses to get spilled to memory.  */
919
 
920
static bool
921
ia64_cannot_force_const_mem (rtx x)
922
{
923
  if (GET_MODE (x) == RFmode)
924
    return true;
925
  return tls_symbolic_operand_type (x) != 0;
926
}
927
 
928
/* Expand a symbolic constant load.  */
929
 
930
bool
931
ia64_expand_load_address (rtx dest, rtx src)
932
{
933
  gcc_assert (GET_CODE (dest) == REG);
934
 
935
  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
936
     having to pointer-extend the value afterward.  Other forms of address
937
     computation below are also more natural to compute as 64-bit quantities.
938
     If we've been given an SImode destination register, change it.  */
939
  if (GET_MODE (dest) != Pmode)
940
    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
941
                               byte_lowpart_offset (Pmode, GET_MODE (dest)));
942
 
943
  if (TARGET_NO_PIC)
944
    return false;
945
  if (small_addr_symbolic_operand (src, VOIDmode))
946
    return false;
947
 
948
  if (TARGET_AUTO_PIC)
949
    emit_insn (gen_load_gprel64 (dest, src));
950
  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
951
    emit_insn (gen_load_fptr (dest, src));
952
  else if (sdata_symbolic_operand (src, VOIDmode))
953
    emit_insn (gen_load_gprel (dest, src));
954
  else
955
    {
956
      HOST_WIDE_INT addend = 0;
957
      rtx tmp;
958
 
959
      /* We did split constant offsets in ia64_expand_move, and we did try
960
         to keep them split in move_operand, but we also allowed reload to
961
         rematerialize arbitrary constants rather than spill the value to
962
         the stack and reload it.  So we have to be prepared here to split
963
         them apart again.  */
964
      if (GET_CODE (src) == CONST)
965
        {
966
          HOST_WIDE_INT hi, lo;
967
 
968
          hi = INTVAL (XEXP (XEXP (src, 0), 1));
969
          lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
970
          hi = hi - lo;
971
 
972
          if (lo != 0)
973
            {
974
              addend = lo;
975
              src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
976
            }
977
        }
978
 
979
      tmp = gen_rtx_HIGH (Pmode, src);
980
      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
981
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
982
 
983
      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
984
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
985
 
986
      if (addend)
987
        {
988
          tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
989
          emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
990
        }
991
    }
992
 
993
  return true;
994
}
995
 
996
static GTY(()) rtx gen_tls_tga;
997
static rtx
998
gen_tls_get_addr (void)
999
{
1000
  if (!gen_tls_tga)
1001
    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1002
  return gen_tls_tga;
1003
}
1004
 
1005
static GTY(()) rtx thread_pointer_rtx;
1006
static rtx
1007
gen_thread_pointer (void)
1008
{
1009
  if (!thread_pointer_rtx)
1010
    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1011
  return thread_pointer_rtx;
1012
}
1013
 
1014
static rtx
1015
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1016
                         rtx orig_op1, HOST_WIDE_INT addend)
1017
{
1018
  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1019
  rtx orig_op0 = op0;
1020
  HOST_WIDE_INT addend_lo, addend_hi;
1021
 
1022
  switch (tls_kind)
1023
    {
1024
    case TLS_MODEL_GLOBAL_DYNAMIC:
1025
      start_sequence ();
1026
 
1027
      tga_op1 = gen_reg_rtx (Pmode);
1028
      emit_insn (gen_load_dtpmod (tga_op1, op1));
1029
 
1030
      tga_op2 = gen_reg_rtx (Pmode);
1031
      emit_insn (gen_load_dtprel (tga_op2, op1));
1032
 
1033
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1034
                                         LCT_CONST, Pmode, 2, tga_op1,
1035
                                         Pmode, tga_op2, Pmode);
1036
 
1037
      insns = get_insns ();
1038
      end_sequence ();
1039
 
1040
      if (GET_MODE (op0) != Pmode)
1041
        op0 = tga_ret;
1042
      emit_libcall_block (insns, op0, tga_ret, op1);
1043
      break;
1044
 
1045
    case TLS_MODEL_LOCAL_DYNAMIC:
1046
      /* ??? This isn't the completely proper way to do local-dynamic
1047
         If the call to __tls_get_addr is used only by a single symbol,
1048
         then we should (somehow) move the dtprel to the second arg
1049
         to avoid the extra add.  */
1050
      start_sequence ();
1051
 
1052
      tga_op1 = gen_reg_rtx (Pmode);
1053
      emit_insn (gen_load_dtpmod (tga_op1, op1));
1054
 
1055
      tga_op2 = const0_rtx;
1056
 
1057
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1058
                                         LCT_CONST, Pmode, 2, tga_op1,
1059
                                         Pmode, tga_op2, Pmode);
1060
 
1061
      insns = get_insns ();
1062
      end_sequence ();
1063
 
1064
      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065
                                UNSPEC_LD_BASE);
1066
      tmp = gen_reg_rtx (Pmode);
1067
      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1068
 
1069
      if (!register_operand (op0, Pmode))
1070
        op0 = gen_reg_rtx (Pmode);
1071
      if (TARGET_TLS64)
1072
        {
1073
          emit_insn (gen_load_dtprel (op0, op1));
1074
          emit_insn (gen_adddi3 (op0, tmp, op0));
1075
        }
1076
      else
1077
        emit_insn (gen_add_dtprel (op0, op1, tmp));
1078
      break;
1079
 
1080
    case TLS_MODEL_INITIAL_EXEC:
1081
      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1082
      addend_hi = addend - addend_lo;
1083
 
1084
      op1 = plus_constant (op1, addend_hi);
1085
      addend = addend_lo;
1086
 
1087
      tmp = gen_reg_rtx (Pmode);
1088
      emit_insn (gen_load_tprel (tmp, op1));
1089
 
1090
      if (!register_operand (op0, Pmode))
1091
        op0 = gen_reg_rtx (Pmode);
1092
      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1093
      break;
1094
 
1095
    case TLS_MODEL_LOCAL_EXEC:
1096
      if (!register_operand (op0, Pmode))
1097
        op0 = gen_reg_rtx (Pmode);
1098
 
1099
      op1 = orig_op1;
1100
      addend = 0;
1101
      if (TARGET_TLS64)
1102
        {
1103
          emit_insn (gen_load_tprel (op0, op1));
1104
          emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1105
        }
1106
      else
1107
        emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1108
      break;
1109
 
1110
    default:
1111
      gcc_unreachable ();
1112
    }
1113
 
1114
  if (addend)
1115
    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1116
                               orig_op0, 1, OPTAB_DIRECT);
1117
  if (orig_op0 == op0)
1118
    return NULL_RTX;
1119
  if (GET_MODE (orig_op0) == Pmode)
1120
    return op0;
1121
  return gen_lowpart (GET_MODE (orig_op0), op0);
1122
}
1123
 
1124
rtx
1125
ia64_expand_move (rtx op0, rtx op1)
1126
{
1127
  enum machine_mode mode = GET_MODE (op0);
1128
 
1129
  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1130
    op1 = force_reg (mode, op1);
1131
 
1132
  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1133
    {
1134
      HOST_WIDE_INT addend = 0;
1135
      enum tls_model tls_kind;
1136
      rtx sym = op1;
1137
 
1138
      if (GET_CODE (op1) == CONST
1139
          && GET_CODE (XEXP (op1, 0)) == PLUS
1140
          && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1141
        {
1142
          addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1143
          sym = XEXP (XEXP (op1, 0), 0);
1144
        }
1145
 
1146
      tls_kind = tls_symbolic_operand_type (sym);
1147
      if (tls_kind)
1148
        return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1149
 
1150
      if (any_offset_symbol_operand (sym, mode))
1151
        addend = 0;
1152
      else if (aligned_offset_symbol_operand (sym, mode))
1153
        {
1154
          HOST_WIDE_INT addend_lo, addend_hi;
1155
 
1156
          addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1157
          addend_hi = addend - addend_lo;
1158
 
1159
          if (addend_lo != 0)
1160
            {
1161
              op1 = plus_constant (sym, addend_hi);
1162
              addend = addend_lo;
1163
            }
1164
          else
1165
            addend = 0;
1166
        }
1167
      else
1168
        op1 = sym;
1169
 
1170
      if (reload_completed)
1171
        {
1172
          /* We really should have taken care of this offset earlier.  */
1173
          gcc_assert (addend == 0);
1174
          if (ia64_expand_load_address (op0, op1))
1175
            return NULL_RTX;
1176
        }
1177
 
1178
      if (addend)
1179
        {
1180
          rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1181
 
1182
          emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1183
 
1184
          op1 = expand_simple_binop (mode, PLUS, subtarget,
1185
                                     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1186
          if (op0 == op1)
1187
            return NULL_RTX;
1188
        }
1189
    }
1190
 
1191
  return op1;
1192
}
1193
 
1194
/* Split a move from OP1 to OP0 conditional on COND.  */
1195
 
1196
void
1197
ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1198
{
1199
  rtx insn, first = get_last_insn ();
1200
 
1201
  emit_move_insn (op0, op1);
1202
 
1203
  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1204
    if (INSN_P (insn))
1205
      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1206
                                          PATTERN (insn));
1207
}
1208
 
1209
/* Split a post-reload TImode or TFmode reference into two DImode
1210
   components.  This is made extra difficult by the fact that we do
1211
   not get any scratch registers to work with, because reload cannot
1212
   be prevented from giving us a scratch that overlaps the register
1213
   pair involved.  So instead, when addressing memory, we tweak the
1214
   pointer register up and back down with POST_INCs.  Or up and not
1215
   back down when we can get away with it.
1216
 
1217
   REVERSED is true when the loads must be done in reversed order
1218
   (high word first) for correctness.  DEAD is true when the pointer
1219
   dies with the second insn we generate and therefore the second
1220
   address must not carry a postmodify.
1221
 
1222
   May return an insn which is to be emitted after the moves.  */
1223
 
1224
static rtx
1225
ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1226
{
1227
  rtx fixup = 0;
1228
 
1229
  switch (GET_CODE (in))
1230
    {
1231
    case REG:
1232
      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1233
      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1234
      break;
1235
 
1236
    case CONST_INT:
1237
    case CONST_DOUBLE:
1238
      /* Cannot occur reversed.  */
1239
      gcc_assert (!reversed);
1240
 
1241
      if (GET_MODE (in) != TFmode)
1242
        split_double (in, &out[0], &out[1]);
1243
      else
1244
        /* split_double does not understand how to split a TFmode
1245
           quantity into a pair of DImode constants.  */
1246
        {
1247
          REAL_VALUE_TYPE r;
1248
          unsigned HOST_WIDE_INT p[2];
1249
          long l[4];  /* TFmode is 128 bits */
1250
 
1251
          REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1252
          real_to_target (l, &r, TFmode);
1253
 
1254
          if (FLOAT_WORDS_BIG_ENDIAN)
1255
            {
1256
              p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1257
              p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1258
            }
1259
          else
1260
            {
1261
              p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1262
              p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1263
            }
1264
          out[0] = GEN_INT (p[0]);
1265
          out[1] = GEN_INT (p[1]);
1266
        }
1267
      break;
1268
 
1269
    case MEM:
1270
      {
1271
        rtx base = XEXP (in, 0);
1272
        rtx offset;
1273
 
1274
        switch (GET_CODE (base))
1275
          {
1276
          case REG:
1277
            if (!reversed)
1278
              {
1279
                out[0] = adjust_automodify_address
1280
                  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1281
                out[1] = adjust_automodify_address
1282
                  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1283
              }
1284
            else
1285
              {
1286
                /* Reversal requires a pre-increment, which can only
1287
                   be done as a separate insn.  */
1288
                emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1289
                out[0] = adjust_automodify_address
1290
                  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1291
                out[1] = adjust_address (in, DImode, 0);
1292
              }
1293
            break;
1294
 
1295
          case POST_INC:
1296
            gcc_assert (!reversed && !dead);
1297
 
1298
            /* Just do the increment in two steps.  */
1299
            out[0] = adjust_automodify_address (in, DImode, 0, 0);
1300
            out[1] = adjust_automodify_address (in, DImode, 0, 8);
1301
            break;
1302
 
1303
          case POST_DEC:
1304
            gcc_assert (!reversed && !dead);
1305
 
1306
            /* Add 8, subtract 24.  */
1307
            base = XEXP (base, 0);
1308
            out[0] = adjust_automodify_address
1309
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1310
            out[1] = adjust_automodify_address
1311
              (in, DImode,
1312
               gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1313
               8);
1314
            break;
1315
 
1316
          case POST_MODIFY:
1317
            gcc_assert (!reversed && !dead);
1318
 
1319
            /* Extract and adjust the modification.  This case is
1320
               trickier than the others, because we might have an
1321
               index register, or we might have a combined offset that
1322
               doesn't fit a signed 9-bit displacement field.  We can
1323
               assume the incoming expression is already legitimate.  */
1324
            offset = XEXP (base, 1);
1325
            base = XEXP (base, 0);
1326
 
1327
            out[0] = adjust_automodify_address
1328
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1329
 
1330
            if (GET_CODE (XEXP (offset, 1)) == REG)
1331
              {
1332
                /* Can't adjust the postmodify to match.  Emit the
1333
                   original, then a separate addition insn.  */
1334
                out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335
                fixup = gen_adddi3 (base, base, GEN_INT (-8));
1336
              }
1337
            else
1338
              {
1339
                gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1340
                if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1341
                  {
1342
                    /* Again the postmodify cannot be made to match,
1343
                       but in this case it's more efficient to get rid
1344
                       of the postmodify entirely and fix up with an
1345
                       add insn.  */
1346
                    out[1] = adjust_automodify_address (in, DImode, base, 8);
1347
                    fixup = gen_adddi3
1348
                      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1349
                  }
1350
                else
1351
                  {
1352
                    /* Combined offset still fits in the displacement field.
1353
                       (We cannot overflow it at the high end.)  */
1354
                    out[1] = adjust_automodify_address
1355
                      (in, DImode, gen_rtx_POST_MODIFY
1356
                       (Pmode, base, gen_rtx_PLUS
1357
                        (Pmode, base,
1358
                         GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1359
                       8);
1360
                  }
1361
              }
1362
            break;
1363
 
1364
          default:
1365
            gcc_unreachable ();
1366
          }
1367
        break;
1368
      }
1369
 
1370
    default:
1371
      gcc_unreachable ();
1372
    }
1373
 
1374
  return fixup;
1375
}
1376
 
1377
/* Split a TImode or TFmode move instruction after reload.
1378
   This is used by *movtf_internal and *movti_internal.  */
1379
void
1380
ia64_split_tmode_move (rtx operands[])
1381
{
1382
  rtx in[2], out[2], insn;
1383
  rtx fixup[2];
1384
  bool dead = false;
1385
  bool reversed = false;
1386
 
1387
  /* It is possible for reload to decide to overwrite a pointer with
1388
     the value it points to.  In that case we have to do the loads in
1389
     the appropriate order so that the pointer is not destroyed too
1390
     early.  Also we must not generate a postmodify for that second
1391
     load, or rws_access_regno will die.  */
1392
  if (GET_CODE (operands[1]) == MEM
1393
      && reg_overlap_mentioned_p (operands[0], operands[1]))
1394
    {
1395
      rtx base = XEXP (operands[1], 0);
1396
      while (GET_CODE (base) != REG)
1397
        base = XEXP (base, 0);
1398
 
1399
      if (REGNO (base) == REGNO (operands[0]))
1400
        reversed = true;
1401
      dead = true;
1402
    }
1403
  /* Another reason to do the moves in reversed order is if the first
1404
     element of the target register pair is also the second element of
1405
     the source register pair.  */
1406
  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1407
      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1408
    reversed = true;
1409
 
1410
  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1411
  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1412
 
1413
#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)                               \
1414
  if (GET_CODE (EXP) == MEM                                             \
1415
      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY                        \
1416
          || GET_CODE (XEXP (EXP, 0)) == POST_INC                        \
1417
          || GET_CODE (XEXP (EXP, 0)) == POST_DEC))                      \
1418
    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1419
 
1420
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1421
  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1422
  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1423
 
1424
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1425
  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1426
  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1427
 
1428
  if (fixup[0])
1429
    emit_insn (fixup[0]);
1430
  if (fixup[1])
1431
    emit_insn (fixup[1]);
1432
 
1433
#undef MAYBE_ADD_REG_INC_NOTE
1434
}
1435
 
1436
/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1437
   through memory plus an extra GR scratch register.  Except that you can
1438
   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439
   SECONDARY_RELOAD_CLASS, but not both.
1440
 
1441
   We got into problems in the first place by allowing a construct like
1442
   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1443
   This solution attempts to prevent this situation from occurring.  When
1444
   we see something like the above, we spill the inner register to memory.  */
1445
 
1446
static rtx
1447
spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1448
{
1449
  if (GET_CODE (in) == SUBREG
1450
      && GET_MODE (SUBREG_REG (in)) == TImode
1451
      && GET_CODE (SUBREG_REG (in)) == REG)
1452
    {
1453
      rtx memt = assign_stack_temp (TImode, 16, 0);
1454
      emit_move_insn (memt, SUBREG_REG (in));
1455
      return adjust_address (memt, mode, 0);
1456
    }
1457
  else if (force && GET_CODE (in) == REG)
1458
    {
1459
      rtx memx = assign_stack_temp (mode, 16, 0);
1460
      emit_move_insn (memx, in);
1461
      return memx;
1462
    }
1463
  else
1464
    return in;
1465
}
1466
 
1467
/* Expand the movxf or movrf pattern (MODE says which) with the given
1468
   OPERANDS, returning true if the pattern should then invoke
1469
   DONE.  */
1470
 
1471
bool
1472
ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1473
{
1474
  rtx op0 = operands[0];
1475
 
1476
  if (GET_CODE (op0) == SUBREG)
1477
    op0 = SUBREG_REG (op0);
1478
 
1479
  /* We must support XFmode loads into general registers for stdarg/vararg,
1480
     unprototyped calls, and a rare case where a long double is passed as
1481
     an argument after a float HFA fills the FP registers.  We split them into
1482
     DImode loads for convenience.  We also need to support XFmode stores
1483
     for the last case.  This case does not happen for stdarg/vararg routines,
1484
     because we do a block store to memory of unnamed arguments.  */
1485
 
1486
  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1487
    {
1488
      rtx out[2];
1489
 
1490
      /* We're hoping to transform everything that deals with XFmode
1491
         quantities and GR registers early in the compiler.  */
1492
      gcc_assert (can_create_pseudo_p ());
1493
 
1494
      /* Struct to register can just use TImode instead.  */
1495
      if ((GET_CODE (operands[1]) == SUBREG
1496
           && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1497
          || (GET_CODE (operands[1]) == REG
1498
              && GR_REGNO_P (REGNO (operands[1]))))
1499
        {
1500
          rtx op1 = operands[1];
1501
 
1502
          if (GET_CODE (op1) == SUBREG)
1503
            op1 = SUBREG_REG (op1);
1504
          else
1505
            op1 = gen_rtx_REG (TImode, REGNO (op1));
1506
 
1507
          emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1508
          return true;
1509
        }
1510
 
1511
      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1512
        {
1513
          /* Don't word-swap when reading in the constant.  */
1514
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1515
                          operand_subword (operands[1], WORDS_BIG_ENDIAN,
1516
                                           0, mode));
1517
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1518
                          operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1519
                                           0, mode));
1520
          return true;
1521
        }
1522
 
1523
      /* If the quantity is in a register not known to be GR, spill it.  */
1524
      if (register_operand (operands[1], mode))
1525
        operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1526
 
1527
      gcc_assert (GET_CODE (operands[1]) == MEM);
1528
 
1529
      /* Don't word-swap when reading in the value.  */
1530
      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1531
      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1532
 
1533
      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1534
      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1535
      return true;
1536
    }
1537
 
1538
  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1539
    {
1540
      /* We're hoping to transform everything that deals with XFmode
1541
         quantities and GR registers early in the compiler.  */
1542
      gcc_assert (can_create_pseudo_p ());
1543
 
1544
      /* Op0 can't be a GR_REG here, as that case is handled above.
1545
         If op0 is a register, then we spill op1, so that we now have a
1546
         MEM operand.  This requires creating an XFmode subreg of a TImode reg
1547
         to force the spill.  */
1548
      if (register_operand (operands[0], mode))
1549
        {
1550
          rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1551
          op1 = gen_rtx_SUBREG (mode, op1, 0);
1552
          operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1553
        }
1554
 
1555
      else
1556
        {
1557
          rtx in[2];
1558
 
1559
          gcc_assert (GET_CODE (operands[0]) == MEM);
1560
 
1561
          /* Don't word-swap when writing out the value.  */
1562
          in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1563
          in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1564
 
1565
          emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1566
          emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1567
          return true;
1568
        }
1569
    }
1570
 
1571
  if (!reload_in_progress && !reload_completed)
1572
    {
1573
      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1574
 
1575
      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1576
        {
1577
          rtx memt, memx, in = operands[1];
1578
          if (CONSTANT_P (in))
1579
            in = validize_mem (force_const_mem (mode, in));
1580
          if (GET_CODE (in) == MEM)
1581
            memt = adjust_address (in, TImode, 0);
1582
          else
1583
            {
1584
              memt = assign_stack_temp (TImode, 16, 0);
1585
              memx = adjust_address (memt, mode, 0);
1586
              emit_move_insn (memx, in);
1587
            }
1588
          emit_move_insn (op0, memt);
1589
          return true;
1590
        }
1591
 
1592
      if (!ia64_move_ok (operands[0], operands[1]))
1593
        operands[1] = force_reg (mode, operands[1]);
1594
    }
1595
 
1596
  return false;
1597
}
1598
 
1599
/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600
   with the expression that holds the compare result (in VOIDmode).  */
1601
 
1602
static GTY(()) rtx cmptf_libfunc;
1603
 
1604
void
1605
ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1606
{
1607
  enum rtx_code code = GET_CODE (*expr);
1608
  rtx cmp;
1609
 
1610
  /* If we have a BImode input, then we already have a compare result, and
1611
     do not need to emit another comparison.  */
1612
  if (GET_MODE (*op0) == BImode)
1613
    {
1614
      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1615
      cmp = *op0;
1616
    }
1617
  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618
     magic number as its third argument, that indicates what to do.
1619
     The return value is an integer to be compared against zero.  */
1620
  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1621
    {
1622
      enum qfcmp_magic {
1623
        QCMP_INV = 1,   /* Raise FP_INVALID on SNaN as a side effect.  */
1624
        QCMP_UNORD = 2,
1625
        QCMP_EQ = 4,
1626
        QCMP_LT = 8,
1627
        QCMP_GT = 16
1628
      };
1629
      int magic;
1630
      enum rtx_code ncode;
1631
      rtx ret, insns;
1632
 
1633
      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1634
      switch (code)
1635
        {
1636
          /* 1 = equal, 0 = not equal.  Equality operators do
1637
             not raise FP_INVALID when given an SNaN operand.  */
1638
        case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1639
        case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1640
          /* isunordered() from C99.  */
1641
        case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1642
        case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1643
          /* Relational operators raise FP_INVALID when given
1644
             an SNaN operand.  */
1645
        case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1646
        case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1647
        case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1648
        case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649
          /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650
             Expanders for buneq etc. weuld have to be added to ia64.md
1651
             for this to be useful.  */
1652
        default: gcc_unreachable ();
1653
        }
1654
 
1655
      start_sequence ();
1656
 
1657
      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1658
                                     *op0, TFmode, *op1, TFmode,
1659
                                     GEN_INT (magic), DImode);
1660
      cmp = gen_reg_rtx (BImode);
1661
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1662
                              gen_rtx_fmt_ee (ncode, BImode,
1663
                                              ret, const0_rtx)));
1664
 
1665
      insns = get_insns ();
1666
      end_sequence ();
1667
 
1668
      emit_libcall_block (insns, cmp, cmp,
1669
                          gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1670
      code = NE;
1671
    }
1672
  else
1673
    {
1674
      cmp = gen_reg_rtx (BImode);
1675
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1676
                              gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1677
      code = NE;
1678
    }
1679
 
1680
  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1681
  *op0 = cmp;
1682
  *op1 = const0_rtx;
1683
}
1684
 
1685
/* Generate an integral vector comparison.  Return true if the condition has
1686
   been reversed, and so the sense of the comparison should be inverted.  */
1687
 
1688
static bool
1689
ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1690
                            rtx dest, rtx op0, rtx op1)
1691
{
1692
  bool negate = false;
1693
  rtx x;
1694
 
1695
  /* Canonicalize the comparison to EQ, GT, GTU.  */
1696
  switch (code)
1697
    {
1698
    case EQ:
1699
    case GT:
1700
    case GTU:
1701
      break;
1702
 
1703
    case NE:
1704
    case LE:
1705
    case LEU:
1706
      code = reverse_condition (code);
1707
      negate = true;
1708
      break;
1709
 
1710
    case GE:
1711
    case GEU:
1712
      code = reverse_condition (code);
1713
      negate = true;
1714
      /* FALLTHRU */
1715
 
1716
    case LT:
1717
    case LTU:
1718
      code = swap_condition (code);
1719
      x = op0, op0 = op1, op1 = x;
1720
      break;
1721
 
1722
    default:
1723
      gcc_unreachable ();
1724
    }
1725
 
1726
  /* Unsigned parallel compare is not supported by the hardware.  Play some
1727
     tricks to turn this into a signed comparison against 0.  */
1728
  if (code == GTU)
1729
    {
1730
      switch (mode)
1731
        {
1732
        case V2SImode:
1733
          {
1734
            rtx t1, t2, mask;
1735
 
1736
            /* Subtract (-(INT MAX) - 1) from both operands to make
1737
               them signed.  */
1738
            mask = GEN_INT (0x80000000);
1739
            mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1740
            mask = force_reg (mode, mask);
1741
            t1 = gen_reg_rtx (mode);
1742
            emit_insn (gen_subv2si3 (t1, op0, mask));
1743
            t2 = gen_reg_rtx (mode);
1744
            emit_insn (gen_subv2si3 (t2, op1, mask));
1745
            op0 = t1;
1746
            op1 = t2;
1747
            code = GT;
1748
          }
1749
          break;
1750
 
1751
        case V8QImode:
1752
        case V4HImode:
1753
          /* Perform a parallel unsigned saturating subtraction.  */
1754
          x = gen_reg_rtx (mode);
1755
          emit_insn (gen_rtx_SET (VOIDmode, x,
1756
                                  gen_rtx_US_MINUS (mode, op0, op1)));
1757
 
1758
          code = EQ;
1759
          op0 = x;
1760
          op1 = CONST0_RTX (mode);
1761
          negate = !negate;
1762
          break;
1763
 
1764
        default:
1765
          gcc_unreachable ();
1766
        }
1767
    }
1768
 
1769
  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1770
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1771
 
1772
  return negate;
1773
}
1774
 
1775
/* Emit an integral vector conditional move.  */
1776
 
1777
void
1778
ia64_expand_vecint_cmov (rtx operands[])
1779
{
1780
  enum machine_mode mode = GET_MODE (operands[0]);
1781
  enum rtx_code code = GET_CODE (operands[3]);
1782
  bool negate;
1783
  rtx cmp, x, ot, of;
1784
 
1785
  cmp = gen_reg_rtx (mode);
1786
  negate = ia64_expand_vecint_compare (code, mode, cmp,
1787
                                       operands[4], operands[5]);
1788
 
1789
  ot = operands[1+negate];
1790
  of = operands[2-negate];
1791
 
1792
  if (ot == CONST0_RTX (mode))
1793
    {
1794
      if (of == CONST0_RTX (mode))
1795
        {
1796
          emit_move_insn (operands[0], ot);
1797
          return;
1798
        }
1799
 
1800
      x = gen_rtx_NOT (mode, cmp);
1801
      x = gen_rtx_AND (mode, x, of);
1802
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1803
    }
1804
  else if (of == CONST0_RTX (mode))
1805
    {
1806
      x = gen_rtx_AND (mode, cmp, ot);
1807
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1808
    }
1809
  else
1810
    {
1811
      rtx t, f;
1812
 
1813
      t = gen_reg_rtx (mode);
1814
      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1815
      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1816
 
1817
      f = gen_reg_rtx (mode);
1818
      x = gen_rtx_NOT (mode, cmp);
1819
      x = gen_rtx_AND (mode, x, operands[2-negate]);
1820
      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1821
 
1822
      x = gen_rtx_IOR (mode, t, f);
1823
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1824
    }
1825
}
1826
 
1827
/* Emit an integral vector min or max operation.  Return true if all done.  */
1828
 
1829
bool
1830
ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1831
                           rtx operands[])
1832
{
1833
  rtx xops[6];
1834
 
1835
  /* These four combinations are supported directly.  */
1836
  if (mode == V8QImode && (code == UMIN || code == UMAX))
1837
    return false;
1838
  if (mode == V4HImode && (code == SMIN || code == SMAX))
1839
    return false;
1840
 
1841
  /* This combination can be implemented with only saturating subtraction.  */
1842
  if (mode == V4HImode && code == UMAX)
1843
    {
1844
      rtx x, tmp = gen_reg_rtx (mode);
1845
 
1846
      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1847
      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1848
 
1849
      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1850
      return true;
1851
    }
1852
 
1853
  /* Everything else implemented via vector comparisons.  */
1854
  xops[0] = operands[0];
1855
  xops[4] = xops[1] = operands[1];
1856
  xops[5] = xops[2] = operands[2];
1857
 
1858
  switch (code)
1859
    {
1860
    case UMIN:
1861
      code = LTU;
1862
      break;
1863
    case UMAX:
1864
      code = GTU;
1865
      break;
1866
    case SMIN:
1867
      code = LT;
1868
      break;
1869
    case SMAX:
1870
      code = GT;
1871
      break;
1872
    default:
1873
      gcc_unreachable ();
1874
    }
1875
  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1876
 
1877
  ia64_expand_vecint_cmov (xops);
1878
  return true;
1879
}
1880
 
1881
/* Emit an integral vector widening sum operations.  */
1882
 
1883
void
1884
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1885
{
1886
  rtx l, h, x, s;
1887
  enum machine_mode wmode, mode;
1888
  rtx (*unpack_l) (rtx, rtx, rtx);
1889
  rtx (*unpack_h) (rtx, rtx, rtx);
1890
  rtx (*plus) (rtx, rtx, rtx);
1891
 
1892
  wmode = GET_MODE (operands[0]);
1893
  mode = GET_MODE (operands[1]);
1894
 
1895
  switch (mode)
1896
    {
1897
    case V8QImode:
1898
      unpack_l = gen_unpack1_l;
1899
      unpack_h = gen_unpack1_h;
1900
      plus = gen_addv4hi3;
1901
      break;
1902
    case V4HImode:
1903
      unpack_l = gen_unpack2_l;
1904
      unpack_h = gen_unpack2_h;
1905
      plus = gen_addv2si3;
1906
      break;
1907
    default:
1908
      gcc_unreachable ();
1909
    }
1910
 
1911
  /* Fill in x with the sign extension of each element in op1.  */
1912
  if (unsignedp)
1913
    x = CONST0_RTX (mode);
1914
  else
1915
    {
1916
      bool neg;
1917
 
1918
      x = gen_reg_rtx (mode);
1919
 
1920
      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1921
                                        CONST0_RTX (mode));
1922
      gcc_assert (!neg);
1923
    }
1924
 
1925
  l = gen_reg_rtx (wmode);
1926
  h = gen_reg_rtx (wmode);
1927
  s = gen_reg_rtx (wmode);
1928
 
1929
  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1930
  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1931
  emit_insn (plus (s, l, operands[2]));
1932
  emit_insn (plus (operands[0], h, s));
1933
}
1934
 
1935
/* Emit a signed or unsigned V8QI dot product operation.  */
1936
 
1937
void
1938
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1939
{
1940
  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1941
 
1942
  /* Fill in x1 and x2 with the sign extension of each element.  */
1943
  if (unsignedp)
1944
    x1 = x2 = CONST0_RTX (V8QImode);
1945
  else
1946
    {
1947
      bool neg;
1948
 
1949
      x1 = gen_reg_rtx (V8QImode);
1950
      x2 = gen_reg_rtx (V8QImode);
1951
 
1952
      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1953
                                        CONST0_RTX (V8QImode));
1954
      gcc_assert (!neg);
1955
      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1956
                                        CONST0_RTX (V8QImode));
1957
      gcc_assert (!neg);
1958
    }
1959
 
1960
  l1 = gen_reg_rtx (V4HImode);
1961
  l2 = gen_reg_rtx (V4HImode);
1962
  h1 = gen_reg_rtx (V4HImode);
1963
  h2 = gen_reg_rtx (V4HImode);
1964
 
1965
  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1966
  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1967
  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1968
  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1969
 
1970
  p1 = gen_reg_rtx (V2SImode);
1971
  p2 = gen_reg_rtx (V2SImode);
1972
  p3 = gen_reg_rtx (V2SImode);
1973
  p4 = gen_reg_rtx (V2SImode);
1974
  emit_insn (gen_pmpy2_r (p1, l1, l2));
1975
  emit_insn (gen_pmpy2_l (p2, l1, l2));
1976
  emit_insn (gen_pmpy2_r (p3, h1, h2));
1977
  emit_insn (gen_pmpy2_l (p4, h1, h2));
1978
 
1979
  s1 = gen_reg_rtx (V2SImode);
1980
  s2 = gen_reg_rtx (V2SImode);
1981
  s3 = gen_reg_rtx (V2SImode);
1982
  emit_insn (gen_addv2si3 (s1, p1, p2));
1983
  emit_insn (gen_addv2si3 (s2, p3, p4));
1984
  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1985
  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1986
}
1987
 
1988
/* Emit the appropriate sequence for a call.  */
1989
 
1990
void
1991
ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1992
                  int sibcall_p)
1993
{
1994
  rtx insn, b0;
1995
 
1996
  addr = XEXP (addr, 0);
1997
  addr = convert_memory_address (DImode, addr);
1998
  b0 = gen_rtx_REG (DImode, R_BR (0));
1999
 
2000
  /* ??? Should do this for functions known to bind local too.  */
2001
  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2002
    {
2003
      if (sibcall_p)
2004
        insn = gen_sibcall_nogp (addr);
2005
      else if (! retval)
2006
        insn = gen_call_nogp (addr, b0);
2007
      else
2008
        insn = gen_call_value_nogp (retval, addr, b0);
2009
      insn = emit_call_insn (insn);
2010
    }
2011
  else
2012
    {
2013
      if (sibcall_p)
2014
        insn = gen_sibcall_gp (addr);
2015
      else if (! retval)
2016
        insn = gen_call_gp (addr, b0);
2017
      else
2018
        insn = gen_call_value_gp (retval, addr, b0);
2019
      insn = emit_call_insn (insn);
2020
 
2021
      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2022
    }
2023
 
2024
  if (sibcall_p)
2025
    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2026
 
2027
  if (TARGET_ABI_OPEN_VMS)
2028
    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2029
             gen_rtx_REG (DImode, GR_REG (25)));
2030
}
2031
 
2032
static void
2033
reg_emitted (enum ia64_frame_regs r)
2034
{
2035
  if (emitted_frame_related_regs[r] == 0)
2036
    emitted_frame_related_regs[r] = current_frame_info.r[r];
2037
  else
2038
    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2039
}
2040
 
2041
static int
2042
get_reg (enum ia64_frame_regs r)
2043
{
2044
  reg_emitted (r);
2045
  return current_frame_info.r[r];
2046
}
2047
 
2048
static bool
2049
is_emitted (int regno)
2050
{
2051
  unsigned int r;
2052
 
2053
  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2054
    if (emitted_frame_related_regs[r] == regno)
2055
      return true;
2056
  return false;
2057
}
2058
 
2059
void
2060
ia64_reload_gp (void)
2061
{
2062
  rtx tmp;
2063
 
2064
  if (current_frame_info.r[reg_save_gp])
2065
    {
2066
      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2067
    }
2068
  else
2069
    {
2070
      HOST_WIDE_INT offset;
2071
      rtx offset_r;
2072
 
2073
      offset = (current_frame_info.spill_cfa_off
2074
                + current_frame_info.spill_size);
2075
      if (frame_pointer_needed)
2076
        {
2077
          tmp = hard_frame_pointer_rtx;
2078
          offset = -offset;
2079
        }
2080
      else
2081
        {
2082
          tmp = stack_pointer_rtx;
2083
          offset = current_frame_info.total_size - offset;
2084
        }
2085
 
2086
      offset_r = GEN_INT (offset);
2087
      if (satisfies_constraint_I (offset_r))
2088
        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2089
      else
2090
        {
2091
          emit_move_insn (pic_offset_table_rtx, offset_r);
2092
          emit_insn (gen_adddi3 (pic_offset_table_rtx,
2093
                                 pic_offset_table_rtx, tmp));
2094
        }
2095
 
2096
      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2097
    }
2098
 
2099
  emit_move_insn (pic_offset_table_rtx, tmp);
2100
}
2101
 
2102
void
2103
ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2104
                 rtx scratch_b, int noreturn_p, int sibcall_p)
2105
{
2106
  rtx insn;
2107
  bool is_desc = false;
2108
 
2109
  /* If we find we're calling through a register, then we're actually
2110
     calling through a descriptor, so load up the values.  */
2111
  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2112
    {
2113
      rtx tmp;
2114
      bool addr_dead_p;
2115
 
2116
      /* ??? We are currently constrained to *not* use peep2, because
2117
         we can legitimately change the global lifetime of the GP
2118
         (in the form of killing where previously live).  This is
2119
         because a call through a descriptor doesn't use the previous
2120
         value of the GP, while a direct call does, and we do not
2121
         commit to either form until the split here.
2122
 
2123
         That said, this means that we lack precise life info for
2124
         whether ADDR is dead after this call.  This is not terribly
2125
         important, since we can fix things up essentially for free
2126
         with the POST_DEC below, but it's nice to not use it when we
2127
         can immediately tell it's not necessary.  */
2128
      addr_dead_p = ((noreturn_p || sibcall_p
2129
                      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2130
                                            REGNO (addr)))
2131
                     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2132
 
2133
      /* Load the code address into scratch_b.  */
2134
      tmp = gen_rtx_POST_INC (Pmode, addr);
2135
      tmp = gen_rtx_MEM (Pmode, tmp);
2136
      emit_move_insn (scratch_r, tmp);
2137
      emit_move_insn (scratch_b, scratch_r);
2138
 
2139
      /* Load the GP address.  If ADDR is not dead here, then we must
2140
         revert the change made above via the POST_INCREMENT.  */
2141
      if (!addr_dead_p)
2142
        tmp = gen_rtx_POST_DEC (Pmode, addr);
2143
      else
2144
        tmp = addr;
2145
      tmp = gen_rtx_MEM (Pmode, tmp);
2146
      emit_move_insn (pic_offset_table_rtx, tmp);
2147
 
2148
      is_desc = true;
2149
      addr = scratch_b;
2150
    }
2151
 
2152
  if (sibcall_p)
2153
    insn = gen_sibcall_nogp (addr);
2154
  else if (retval)
2155
    insn = gen_call_value_nogp (retval, addr, retaddr);
2156
  else
2157
    insn = gen_call_nogp (addr, retaddr);
2158
  emit_call_insn (insn);
2159
 
2160
  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2161
    ia64_reload_gp ();
2162
}
2163
 
2164
/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2165
 
2166
   This differs from the generic code in that we know about the zero-extending
2167
   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2168
   also know that ld.acq+cmpxchg.rel equals a full barrier.
2169
 
2170
   The loop we want to generate looks like
2171
 
2172
        cmp_reg = mem;
2173
      label:
2174
        old_reg = cmp_reg;
2175
        new_reg = cmp_reg op val;
2176
        cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177
        if (cmp_reg != old_reg)
2178
          goto label;
2179
 
2180
   Note that we only do the plain load from memory once.  Subsequent
2181
   iterations use the value loaded by the compare-and-swap pattern.  */
2182
 
2183
void
2184
ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2185
                       rtx old_dst, rtx new_dst)
2186
{
2187
  enum machine_mode mode = GET_MODE (mem);
2188
  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2189
  enum insn_code icode;
2190
 
2191
  /* Special case for using fetchadd.  */
2192
  if ((mode == SImode || mode == DImode)
2193
      && (code == PLUS || code == MINUS)
2194
      && fetchadd_operand (val, mode))
2195
    {
2196
      if (code == MINUS)
2197
        val = GEN_INT (-INTVAL (val));
2198
 
2199
      if (!old_dst)
2200
        old_dst = gen_reg_rtx (mode);
2201
 
2202
      emit_insn (gen_memory_barrier ());
2203
 
2204
      if (mode == SImode)
2205
        icode = CODE_FOR_fetchadd_acq_si;
2206
      else
2207
        icode = CODE_FOR_fetchadd_acq_di;
2208
      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2209
 
2210
      if (new_dst)
2211
        {
2212
          new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2213
                                         true, OPTAB_WIDEN);
2214
          if (new_reg != new_dst)
2215
            emit_move_insn (new_dst, new_reg);
2216
        }
2217
      return;
2218
    }
2219
 
2220
  /* Because of the volatile mem read, we get an ld.acq, which is the
2221
     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2222
  gcc_assert (MEM_VOLATILE_P (mem));
2223
 
2224
  old_reg = gen_reg_rtx (DImode);
2225
  cmp_reg = gen_reg_rtx (DImode);
2226
  label = gen_label_rtx ();
2227
 
2228
  if (mode != DImode)
2229
    {
2230
      val = simplify_gen_subreg (DImode, val, mode, 0);
2231
      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2232
    }
2233
  else
2234
    emit_move_insn (cmp_reg, mem);
2235
 
2236
  emit_label (label);
2237
 
2238
  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2239
  emit_move_insn (old_reg, cmp_reg);
2240
  emit_move_insn (ar_ccv, cmp_reg);
2241
 
2242
  if (old_dst)
2243
    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2244
 
2245
  new_reg = cmp_reg;
2246
  if (code == NOT)
2247
    {
2248
      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2249
                                     true, OPTAB_DIRECT);
2250
      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2251
    }
2252
  else
2253
    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2254
                                   true, OPTAB_DIRECT);
2255
 
2256
  if (mode != DImode)
2257
    new_reg = gen_lowpart (mode, new_reg);
2258
  if (new_dst)
2259
    emit_move_insn (new_dst, new_reg);
2260
 
2261
  switch (mode)
2262
    {
2263
    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2264
    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2265
    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2266
    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2267
    default:
2268
      gcc_unreachable ();
2269
    }
2270
 
2271
  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2272
 
2273
  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2274
}
2275
 
2276
/* Begin the assembly file.  */
2277
 
2278
static void
2279
ia64_file_start (void)
2280
{
2281
  /* Variable tracking should be run after all optimizations which change order
2282
     of insns.  It also needs a valid CFG.  This can't be done in
2283
     ia64_override_options, because flag_var_tracking is finalized after
2284
     that.  */
2285
  ia64_flag_var_tracking = flag_var_tracking;
2286
  flag_var_tracking = 0;
2287
 
2288
  default_file_start ();
2289
  emit_safe_across_calls ();
2290
}
2291
 
2292
void
2293
emit_safe_across_calls (void)
2294
{
2295
  unsigned int rs, re;
2296
  int out_state;
2297
 
2298
  rs = 1;
2299
  out_state = 0;
2300
  while (1)
2301
    {
2302
      while (rs < 64 && call_used_regs[PR_REG (rs)])
2303
        rs++;
2304
      if (rs >= 64)
2305
        break;
2306
      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2307
        continue;
2308
      if (out_state == 0)
2309
        {
2310
          fputs ("\t.pred.safe_across_calls ", asm_out_file);
2311
          out_state = 1;
2312
        }
2313
      else
2314
        fputc (',', asm_out_file);
2315
      if (re == rs + 1)
2316
        fprintf (asm_out_file, "p%u", rs);
2317
      else
2318
        fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2319
      rs = re + 1;
2320
    }
2321
  if (out_state)
2322
    fputc ('\n', asm_out_file);
2323
}
2324
 
2325
/* Globalize a declaration.  */
2326
 
2327
static void
2328
ia64_globalize_decl_name (FILE * stream, tree decl)
2329
{
2330
  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2331
  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2332
  if (version_attr)
2333
    {
2334
      tree v = TREE_VALUE (TREE_VALUE (version_attr));
2335
      const char *p = TREE_STRING_POINTER (v);
2336
      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2337
    }
2338
  targetm.asm_out.globalize_label (stream, name);
2339
  if (TREE_CODE (decl) == FUNCTION_DECL)
2340
    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2341
}
2342
 
2343
/* Helper function for ia64_compute_frame_size: find an appropriate general
2344
   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2345
   bits in GR0 to GR31 that have already been allocated by this routine.
2346
   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2347
 
2348
static int
2349
find_gr_spill (enum ia64_frame_regs r, int try_locals)
2350
{
2351
  int regno;
2352
 
2353
  if (emitted_frame_related_regs[r] != 0)
2354
    {
2355
      regno = emitted_frame_related_regs[r];
2356
      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2357
          && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2358
        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359
      else if (current_function_is_leaf
2360
               && regno >= GR_REG (1) && regno <= GR_REG (31))
2361
        current_frame_info.gr_used_mask |= 1 << regno;
2362
 
2363
      return regno;
2364
    }
2365
 
2366
  /* If this is a leaf function, first try an otherwise unused
2367
     call-clobbered register.  */
2368
  if (current_function_is_leaf)
2369
    {
2370
      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2371
        if (! df_regs_ever_live_p (regno)
2372
            && call_used_regs[regno]
2373
            && ! fixed_regs[regno]
2374
            && ! global_regs[regno]
2375
            && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2376
            && ! is_emitted (regno))
2377
          {
2378
            current_frame_info.gr_used_mask |= 1 << regno;
2379
            return regno;
2380
          }
2381
    }
2382
 
2383
  if (try_locals)
2384
    {
2385
      regno = current_frame_info.n_local_regs;
2386
      /* If there is a frame pointer, then we can't use loc79, because
2387
         that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2388
         reg_name switching code in ia64_expand_prologue.  */
2389
      while (regno < (80 - frame_pointer_needed))
2390
        if (! is_emitted (LOC_REG (regno++)))
2391
          {
2392
            current_frame_info.n_local_regs = regno;
2393
            return LOC_REG (regno - 1);
2394
          }
2395
    }
2396
 
2397
  /* Failed to find a general register to spill to.  Must use stack.  */
2398
  return 0;
2399
}
2400
 
2401
/* In order to make for nice schedules, we try to allocate every temporary
2402
   to a different register.  We must of course stay away from call-saved,
2403
   fixed, and global registers.  We must also stay away from registers
2404
   allocated in current_frame_info.gr_used_mask, since those include regs
2405
   used all through the prologue.
2406
 
2407
   Any register allocated here must be used immediately.  The idea is to
2408
   aid scheduling, not to solve data flow problems.  */
2409
 
2410
static int last_scratch_gr_reg;
2411
 
2412
static int
2413
next_scratch_gr_reg (void)
2414
{
2415
  int i, regno;
2416
 
2417
  for (i = 0; i < 32; ++i)
2418
    {
2419
      regno = (last_scratch_gr_reg + i + 1) & 31;
2420
      if (call_used_regs[regno]
2421
          && ! fixed_regs[regno]
2422
          && ! global_regs[regno]
2423
          && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2424
        {
2425
          last_scratch_gr_reg = regno;
2426
          return regno;
2427
        }
2428
    }
2429
 
2430
  /* There must be _something_ available.  */
2431
  gcc_unreachable ();
2432
}
2433
 
2434
/* Helper function for ia64_compute_frame_size, called through
2435
   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2436
 
2437
static void
2438
mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2439
{
2440
  unsigned int regno = REGNO (reg);
2441
  if (regno < 32)
2442
    {
2443
      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2444
      for (i = 0; i < n; ++i)
2445
        current_frame_info.gr_used_mask |= 1 << (regno + i);
2446
    }
2447
}
2448
 
2449
 
2450
/* Returns the number of bytes offset between the frame pointer and the stack
2451
   pointer for the current function.  SIZE is the number of bytes of space
2452
   needed for local variables.  */
2453
 
2454
static void
2455
ia64_compute_frame_size (HOST_WIDE_INT size)
2456
{
2457
  HOST_WIDE_INT total_size;
2458
  HOST_WIDE_INT spill_size = 0;
2459
  HOST_WIDE_INT extra_spill_size = 0;
2460
  HOST_WIDE_INT pretend_args_size;
2461
  HARD_REG_SET mask;
2462
  int n_spilled = 0;
2463
  int spilled_gr_p = 0;
2464
  int spilled_fr_p = 0;
2465
  unsigned int regno;
2466
  int min_regno;
2467
  int max_regno;
2468
  int i;
2469
 
2470
  if (current_frame_info.initialized)
2471
    return;
2472
 
2473
  memset (&current_frame_info, 0, sizeof current_frame_info);
2474
  CLEAR_HARD_REG_SET (mask);
2475
 
2476
  /* Don't allocate scratches to the return register.  */
2477
  diddle_return_value (mark_reg_gr_used_mask, NULL);
2478
 
2479
  /* Don't allocate scratches to the EH scratch registers.  */
2480
  if (cfun->machine->ia64_eh_epilogue_sp)
2481
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2482
  if (cfun->machine->ia64_eh_epilogue_bsp)
2483
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2484
 
2485
  /* Find the size of the register stack frame.  We have only 80 local
2486
     registers, because we reserve 8 for the inputs and 8 for the
2487
     outputs.  */
2488
 
2489
  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490
     since we'll be adjusting that down later.  */
2491
  regno = LOC_REG (78) + ! frame_pointer_needed;
2492
  for (; regno >= LOC_REG (0); regno--)
2493
    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2494
      break;
2495
  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2496
 
2497
  /* For functions marked with the syscall_linkage attribute, we must mark
2498
     all eight input registers as in use, so that locals aren't visible to
2499
     the caller.  */
2500
 
2501
  if (cfun->machine->n_varargs > 0
2502
      || lookup_attribute ("syscall_linkage",
2503
                           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2504
    current_frame_info.n_input_regs = 8;
2505
  else
2506
    {
2507
      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2508
        if (df_regs_ever_live_p (regno))
2509
          break;
2510
      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2511
    }
2512
 
2513
  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2514
    if (df_regs_ever_live_p (regno))
2515
      break;
2516
  i = regno - OUT_REG (0) + 1;
2517
 
2518
#ifndef PROFILE_HOOK
2519
  /* When -p profiling, we need one output register for the mcount argument.
2520
     Likewise for -a profiling for the bb_init_func argument.  For -ax
2521
     profiling, we need two output registers for the two bb_init_trace_func
2522
     arguments.  */
2523
  if (crtl->profile)
2524
    i = MAX (i, 1);
2525
#endif
2526
  current_frame_info.n_output_regs = i;
2527
 
2528
  /* ??? No rotating register support yet.  */
2529
  current_frame_info.n_rotate_regs = 0;
2530
 
2531
  /* Discover which registers need spilling, and how much room that
2532
     will take.  Begin with floating point and general registers,
2533
     which will always wind up on the stack.  */
2534
 
2535
  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2536
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2537
      {
2538
        SET_HARD_REG_BIT (mask, regno);
2539
        spill_size += 16;
2540
        n_spilled += 1;
2541
        spilled_fr_p = 1;
2542
      }
2543
 
2544
  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2545
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2546
      {
2547
        SET_HARD_REG_BIT (mask, regno);
2548
        spill_size += 8;
2549
        n_spilled += 1;
2550
        spilled_gr_p = 1;
2551
      }
2552
 
2553
  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2554
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2555
      {
2556
        SET_HARD_REG_BIT (mask, regno);
2557
        spill_size += 8;
2558
        n_spilled += 1;
2559
      }
2560
 
2561
  /* Now come all special registers that might get saved in other
2562
     general registers.  */
2563
 
2564
  if (frame_pointer_needed)
2565
    {
2566
      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2567
      /* If we did not get a register, then we take LOC79.  This is guaranteed
2568
         to be free, even if regs_ever_live is already set, because this is
2569
         HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2570
         as we don't count loc79 above.  */
2571
      if (current_frame_info.r[reg_fp] == 0)
2572
        {
2573
          current_frame_info.r[reg_fp] = LOC_REG (79);
2574
          current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2575
        }
2576
    }
2577
 
2578
  if (! current_function_is_leaf)
2579
    {
2580
      /* Emit a save of BR0 if we call other functions.  Do this even
2581
         if this function doesn't return, as EH depends on this to be
2582
         able to unwind the stack.  */
2583
      SET_HARD_REG_BIT (mask, BR_REG (0));
2584
 
2585
      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2586
      if (current_frame_info.r[reg_save_b0] == 0)
2587
        {
2588
          extra_spill_size += 8;
2589
          n_spilled += 1;
2590
        }
2591
 
2592
      /* Similarly for ar.pfs.  */
2593
      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2594
      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2595
      if (current_frame_info.r[reg_save_ar_pfs] == 0)
2596
        {
2597
          extra_spill_size += 8;
2598
          n_spilled += 1;
2599
        }
2600
 
2601
      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2602
         registers are clobbered, so we fall back to the stack.  */
2603
      current_frame_info.r[reg_save_gp]
2604
        = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2605
      if (current_frame_info.r[reg_save_gp] == 0)
2606
        {
2607
          SET_HARD_REG_BIT (mask, GR_REG (1));
2608
          spill_size += 8;
2609
          n_spilled += 1;
2610
        }
2611
    }
2612
  else
2613
    {
2614
      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2615
        {
2616
          SET_HARD_REG_BIT (mask, BR_REG (0));
2617
          extra_spill_size += 8;
2618
          n_spilled += 1;
2619
        }
2620
 
2621
      if (df_regs_ever_live_p (AR_PFS_REGNUM))
2622
        {
2623
          SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2624
          current_frame_info.r[reg_save_ar_pfs]
2625
            = find_gr_spill (reg_save_ar_pfs, 1);
2626
          if (current_frame_info.r[reg_save_ar_pfs] == 0)
2627
            {
2628
              extra_spill_size += 8;
2629
              n_spilled += 1;
2630
            }
2631
        }
2632
    }
2633
 
2634
  /* Unwind descriptor hackery: things are most efficient if we allocate
2635
     consecutive GR save registers for RP, PFS, FP in that order. However,
2636
     it is absolutely critical that FP get the only hard register that's
2637
     guaranteed to be free, so we allocated it first.  If all three did
2638
     happen to be allocated hard regs, and are consecutive, rearrange them
2639
     into the preferred order now.
2640
 
2641
     If we have already emitted code for any of those registers,
2642
     then it's already too late to change.  */
2643
  min_regno = MIN (current_frame_info.r[reg_fp],
2644
                   MIN (current_frame_info.r[reg_save_b0],
2645
                        current_frame_info.r[reg_save_ar_pfs]));
2646
  max_regno = MAX (current_frame_info.r[reg_fp],
2647
                   MAX (current_frame_info.r[reg_save_b0],
2648
                        current_frame_info.r[reg_save_ar_pfs]));
2649
  if (min_regno > 0
2650
      && min_regno + 2 == max_regno
2651
      && (current_frame_info.r[reg_fp] == min_regno + 1
2652
          || current_frame_info.r[reg_save_b0] == min_regno + 1
2653
          || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2654
      && (emitted_frame_related_regs[reg_save_b0] == 0
2655
          || emitted_frame_related_regs[reg_save_b0] == min_regno)
2656
      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2657
          || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2658
      && (emitted_frame_related_regs[reg_fp] == 0
2659
          || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2660
    {
2661
      current_frame_info.r[reg_save_b0] = min_regno;
2662
      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2663
      current_frame_info.r[reg_fp] = min_regno + 2;
2664
    }
2665
 
2666
  /* See if we need to store the predicate register block.  */
2667
  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2668
    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2669
      break;
2670
  if (regno <= PR_REG (63))
2671
    {
2672
      SET_HARD_REG_BIT (mask, PR_REG (0));
2673
      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2674
      if (current_frame_info.r[reg_save_pr] == 0)
2675
        {
2676
          extra_spill_size += 8;
2677
          n_spilled += 1;
2678
        }
2679
 
2680
      /* ??? Mark them all as used so that register renaming and such
2681
         are free to use them.  */
2682
      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2683
        df_set_regs_ever_live (regno, true);
2684
    }
2685
 
2686
  /* If we're forced to use st8.spill, we're forced to save and restore
2687
     ar.unat as well.  The check for existing liveness allows inline asm
2688
     to touch ar.unat.  */
2689
  if (spilled_gr_p || cfun->machine->n_varargs
2690
      || df_regs_ever_live_p (AR_UNAT_REGNUM))
2691
    {
2692
      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2693
      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2694
      current_frame_info.r[reg_save_ar_unat]
2695
        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2696
      if (current_frame_info.r[reg_save_ar_unat] == 0)
2697
        {
2698
          extra_spill_size += 8;
2699
          n_spilled += 1;
2700
        }
2701
    }
2702
 
2703
  if (df_regs_ever_live_p (AR_LC_REGNUM))
2704
    {
2705
      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2706
      current_frame_info.r[reg_save_ar_lc]
2707
        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2708
      if (current_frame_info.r[reg_save_ar_lc] == 0)
2709
        {
2710
          extra_spill_size += 8;
2711
          n_spilled += 1;
2712
        }
2713
    }
2714
 
2715
  /* If we have an odd number of words of pretend arguments written to
2716
     the stack, then the FR save area will be unaligned.  We round the
2717
     size of this area up to keep things 16 byte aligned.  */
2718
  if (spilled_fr_p)
2719
    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2720
  else
2721
    pretend_args_size = crtl->args.pretend_args_size;
2722
 
2723
  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2724
                + crtl->outgoing_args_size);
2725
  total_size = IA64_STACK_ALIGN (total_size);
2726
 
2727
  /* We always use the 16-byte scratch area provided by the caller, but
2728
     if we are a leaf function, there's no one to which we need to provide
2729
     a scratch area.  */
2730
  if (current_function_is_leaf)
2731
    total_size = MAX (0, total_size - 16);
2732
 
2733
  current_frame_info.total_size = total_size;
2734
  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2735
  current_frame_info.spill_size = spill_size;
2736
  current_frame_info.extra_spill_size = extra_spill_size;
2737
  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2738
  current_frame_info.n_spilled = n_spilled;
2739
  current_frame_info.initialized = reload_completed;
2740
}
2741
 
2742
/* Worker function for TARGET_CAN_ELIMINATE.  */
2743
 
2744
bool
2745
ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2746
{
2747
  return (to == BR_REG (0) ? current_function_is_leaf : true);
2748
}
2749
 
2750
/* Compute the initial difference between the specified pair of registers.  */
2751
 
2752
HOST_WIDE_INT
2753
ia64_initial_elimination_offset (int from, int to)
2754
{
2755
  HOST_WIDE_INT offset;
2756
 
2757
  ia64_compute_frame_size (get_frame_size ());
2758
  switch (from)
2759
    {
2760
    case FRAME_POINTER_REGNUM:
2761
      switch (to)
2762
        {
2763
        case HARD_FRAME_POINTER_REGNUM:
2764
          if (current_function_is_leaf)
2765
            offset = -current_frame_info.total_size;
2766
          else
2767
            offset = -(current_frame_info.total_size
2768
                       - crtl->outgoing_args_size - 16);
2769
          break;
2770
 
2771
        case STACK_POINTER_REGNUM:
2772
          if (current_function_is_leaf)
2773
            offset = 0;
2774
          else
2775
            offset = 16 + crtl->outgoing_args_size;
2776
          break;
2777
 
2778
        default:
2779
          gcc_unreachable ();
2780
        }
2781
      break;
2782
 
2783
    case ARG_POINTER_REGNUM:
2784
      /* Arguments start above the 16 byte save area, unless stdarg
2785
         in which case we store through the 16 byte save area.  */
2786
      switch (to)
2787
        {
2788
        case HARD_FRAME_POINTER_REGNUM:
2789
          offset = 16 - crtl->args.pretend_args_size;
2790
          break;
2791
 
2792
        case STACK_POINTER_REGNUM:
2793
          offset = (current_frame_info.total_size
2794
                    + 16 - crtl->args.pretend_args_size);
2795
          break;
2796
 
2797
        default:
2798
          gcc_unreachable ();
2799
        }
2800
      break;
2801
 
2802
    default:
2803
      gcc_unreachable ();
2804
    }
2805
 
2806
  return offset;
2807
}
2808
 
2809
/* If there are more than a trivial number of register spills, we use
2810
   two interleaved iterators so that we can get two memory references
2811
   per insn group.
2812
 
2813
   In order to simplify things in the prologue and epilogue expanders,
2814
   we use helper functions to fix up the memory references after the
2815
   fact with the appropriate offsets to a POST_MODIFY memory mode.
2816
   The following data structure tracks the state of the two iterators
2817
   while insns are being emitted.  */
2818
 
2819
struct spill_fill_data
2820
{
2821
  rtx init_after;               /* point at which to emit initializations */
2822
  rtx init_reg[2];              /* initial base register */
2823
  rtx iter_reg[2];              /* the iterator registers */
2824
  rtx *prev_addr[2];            /* address of last memory use */
2825
  rtx prev_insn[2];             /* the insn corresponding to prev_addr */
2826
  HOST_WIDE_INT prev_off[2];    /* last offset */
2827
  int n_iter;                   /* number of iterators in use */
2828
  int next_iter;                /* next iterator to use */
2829
  unsigned int save_gr_used_mask;
2830
};
2831
 
2832
static struct spill_fill_data spill_fill_data;
2833
 
2834
static void
2835
setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2836
{
2837
  int i;
2838
 
2839
  spill_fill_data.init_after = get_last_insn ();
2840
  spill_fill_data.init_reg[0] = init_reg;
2841
  spill_fill_data.init_reg[1] = init_reg;
2842
  spill_fill_data.prev_addr[0] = NULL;
2843
  spill_fill_data.prev_addr[1] = NULL;
2844
  spill_fill_data.prev_insn[0] = NULL;
2845
  spill_fill_data.prev_insn[1] = NULL;
2846
  spill_fill_data.prev_off[0] = cfa_off;
2847
  spill_fill_data.prev_off[1] = cfa_off;
2848
  spill_fill_data.next_iter = 0;
2849
  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2850
 
2851
  spill_fill_data.n_iter = 1 + (n_spills > 2);
2852
  for (i = 0; i < spill_fill_data.n_iter; ++i)
2853
    {
2854
      int regno = next_scratch_gr_reg ();
2855
      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2856
      current_frame_info.gr_used_mask |= 1 << regno;
2857
    }
2858
}
2859
 
2860
static void
2861
finish_spill_pointers (void)
2862
{
2863
  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2864
}
2865
 
2866
static rtx
2867
spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2868
{
2869
  int iter = spill_fill_data.next_iter;
2870
  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2871
  rtx disp_rtx = GEN_INT (disp);
2872
  rtx mem;
2873
 
2874
  if (spill_fill_data.prev_addr[iter])
2875
    {
2876
      if (satisfies_constraint_N (disp_rtx))
2877
        {
2878
          *spill_fill_data.prev_addr[iter]
2879
            = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2880
                                   gen_rtx_PLUS (DImode,
2881
                                                 spill_fill_data.iter_reg[iter],
2882
                                                 disp_rtx));
2883
          add_reg_note (spill_fill_data.prev_insn[iter],
2884
                        REG_INC, spill_fill_data.iter_reg[iter]);
2885
        }
2886
      else
2887
        {
2888
          /* ??? Could use register post_modify for loads.  */
2889
          if (!satisfies_constraint_I (disp_rtx))
2890
            {
2891
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2892
              emit_move_insn (tmp, disp_rtx);
2893
              disp_rtx = tmp;
2894
            }
2895
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2896
                                 spill_fill_data.iter_reg[iter], disp_rtx));
2897
        }
2898
    }
2899
  /* Micro-optimization: if we've created a frame pointer, it's at
2900
     CFA 0, which may allow the real iterator to be initialized lower,
2901
     slightly increasing parallelism.  Also, if there are few saves
2902
     it may eliminate the iterator entirely.  */
2903
  else if (disp == 0
2904
           && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2905
           && frame_pointer_needed)
2906
    {
2907
      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2908
      set_mem_alias_set (mem, get_varargs_alias_set ());
2909
      return mem;
2910
    }
2911
  else
2912
    {
2913
      rtx seq, insn;
2914
 
2915
      if (disp == 0)
2916
        seq = gen_movdi (spill_fill_data.iter_reg[iter],
2917
                         spill_fill_data.init_reg[iter]);
2918
      else
2919
        {
2920
          start_sequence ();
2921
 
2922
          if (!satisfies_constraint_I (disp_rtx))
2923
            {
2924
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2925
              emit_move_insn (tmp, disp_rtx);
2926
              disp_rtx = tmp;
2927
            }
2928
 
2929
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930
                                 spill_fill_data.init_reg[iter],
2931
                                 disp_rtx));
2932
 
2933
          seq = get_insns ();
2934
          end_sequence ();
2935
        }
2936
 
2937
      /* Careful for being the first insn in a sequence.  */
2938
      if (spill_fill_data.init_after)
2939
        insn = emit_insn_after (seq, spill_fill_data.init_after);
2940
      else
2941
        {
2942
          rtx first = get_insns ();
2943
          if (first)
2944
            insn = emit_insn_before (seq, first);
2945
          else
2946
            insn = emit_insn (seq);
2947
        }
2948
      spill_fill_data.init_after = insn;
2949
    }
2950
 
2951
  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2952
 
2953
  /* ??? Not all of the spills are for varargs, but some of them are.
2954
     The rest of the spills belong in an alias set of their own.  But
2955
     it doesn't actually hurt to include them here.  */
2956
  set_mem_alias_set (mem, get_varargs_alias_set ());
2957
 
2958
  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2959
  spill_fill_data.prev_off[iter] = cfa_off;
2960
 
2961
  if (++iter >= spill_fill_data.n_iter)
2962
    iter = 0;
2963
  spill_fill_data.next_iter = iter;
2964
 
2965
  return mem;
2966
}
2967
 
2968
static void
2969
do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2970
          rtx frame_reg)
2971
{
2972
  int iter = spill_fill_data.next_iter;
2973
  rtx mem, insn;
2974
 
2975
  mem = spill_restore_mem (reg, cfa_off);
2976
  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2977
  spill_fill_data.prev_insn[iter] = insn;
2978
 
2979
  if (frame_reg)
2980
    {
2981
      rtx base;
2982
      HOST_WIDE_INT off;
2983
 
2984
      RTX_FRAME_RELATED_P (insn) = 1;
2985
 
2986
      /* Don't even pretend that the unwind code can intuit its way
2987
         through a pair of interleaved post_modify iterators.  Just
2988
         provide the correct answer.  */
2989
 
2990
      if (frame_pointer_needed)
2991
        {
2992
          base = hard_frame_pointer_rtx;
2993
          off = - cfa_off;
2994
        }
2995
      else
2996
        {
2997
          base = stack_pointer_rtx;
2998
          off = current_frame_info.total_size - cfa_off;
2999
        }
3000
 
3001
      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3002
                    gen_rtx_SET (VOIDmode,
3003
                                 gen_rtx_MEM (GET_MODE (reg),
3004
                                              plus_constant (base, off)),
3005
                                 frame_reg));
3006
    }
3007
}
3008
 
3009
static void
3010
do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3011
{
3012
  int iter = spill_fill_data.next_iter;
3013
  rtx insn;
3014
 
3015
  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3016
                                GEN_INT (cfa_off)));
3017
  spill_fill_data.prev_insn[iter] = insn;
3018
}
3019
 
3020
/* Wrapper functions that discards the CONST_INT spill offset.  These
3021
   exist so that we can give gr_spill/gr_fill the offset they need and
3022
   use a consistent function interface.  */
3023
 
3024
static rtx
3025
gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3026
{
3027
  return gen_movdi (dest, src);
3028
}
3029
 
3030
static rtx
3031
gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3032
{
3033
  return gen_fr_spill (dest, src);
3034
}
3035
 
3036
static rtx
3037
gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3038
{
3039
  return gen_fr_restore (dest, src);
3040
}
3041
 
3042
/* Called after register allocation to add any instructions needed for the
3043
   prologue.  Using a prologue insn is favored compared to putting all of the
3044
   instructions in output_function_prologue(), since it allows the scheduler
3045
   to intermix instructions with the saves of the caller saved registers.  In
3046
   some cases, it might be necessary to emit a barrier instruction as the last
3047
   insn to prevent such scheduling.
3048
 
3049
   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3050
   so that the debug info generation code can handle them properly.
3051
 
3052
   The register save area is layed out like so:
3053
   cfa+16
3054
        [ varargs spill area ]
3055
        [ fr register spill area ]
3056
        [ br register spill area ]
3057
        [ ar register spill area ]
3058
        [ pr register spill area ]
3059
        [ gr register spill area ] */
3060
 
3061
/* ??? Get inefficient code when the frame size is larger than can fit in an
3062
   adds instruction.  */
3063
 
3064
void
3065
ia64_expand_prologue (void)
3066
{
3067
  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3068
  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3069
  rtx reg, alt_reg;
3070
 
3071
  ia64_compute_frame_size (get_frame_size ());
3072
  last_scratch_gr_reg = 15;
3073
 
3074
  if (dump_file)
3075
    {
3076
      fprintf (dump_file, "ia64 frame related registers "
3077
               "recorded in current_frame_info.r[]:\n");
3078
#define PRINTREG(a) if (current_frame_info.r[a]) \
3079
        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3080
      PRINTREG(reg_fp);
3081
      PRINTREG(reg_save_b0);
3082
      PRINTREG(reg_save_pr);
3083
      PRINTREG(reg_save_ar_pfs);
3084
      PRINTREG(reg_save_ar_unat);
3085
      PRINTREG(reg_save_ar_lc);
3086
      PRINTREG(reg_save_gp);
3087
#undef PRINTREG
3088
    }
3089
 
3090
  /* If there is no epilogue, then we don't need some prologue insns.
3091
     We need to avoid emitting the dead prologue insns, because flow
3092
     will complain about them.  */
3093
  if (optimize)
3094
    {
3095
      edge e;
3096
      edge_iterator ei;
3097
 
3098
      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3099
        if ((e->flags & EDGE_FAKE) == 0
3100
            && (e->flags & EDGE_FALLTHRU) != 0)
3101
          break;
3102
      epilogue_p = (e != NULL);
3103
    }
3104
  else
3105
    epilogue_p = 1;
3106
 
3107
  /* Set the local, input, and output register names.  We need to do this
3108
     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109
     half.  If we use in/loc/out register names, then we get assembler errors
3110
     in crtn.S because there is no alloc insn or regstk directive in there.  */
3111
  if (! TARGET_REG_NAMES)
3112
    {
3113
      int inputs = current_frame_info.n_input_regs;
3114
      int locals = current_frame_info.n_local_regs;
3115
      int outputs = current_frame_info.n_output_regs;
3116
 
3117
      for (i = 0; i < inputs; i++)
3118
        reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3119
      for (i = 0; i < locals; i++)
3120
        reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3121
      for (i = 0; i < outputs; i++)
3122
        reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3123
    }
3124
 
3125
  /* Set the frame pointer register name.  The regnum is logically loc79,
3126
     but of course we'll not have allocated that many locals.  Rather than
3127
     worrying about renumbering the existing rtxs, we adjust the name.  */
3128
  /* ??? This code means that we can never use one local register when
3129
     there is a frame pointer.  loc79 gets wasted in this case, as it is
3130
     renamed to a register that will never be used.  See also the try_locals
3131
     code in find_gr_spill.  */
3132
  if (current_frame_info.r[reg_fp])
3133
    {
3134
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3135
      reg_names[HARD_FRAME_POINTER_REGNUM]
3136
        = reg_names[current_frame_info.r[reg_fp]];
3137
      reg_names[current_frame_info.r[reg_fp]] = tmp;
3138
    }
3139
 
3140
  /* We don't need an alloc instruction if we've used no outputs or locals.  */
3141
  if (current_frame_info.n_local_regs == 0
3142
      && current_frame_info.n_output_regs == 0
3143
      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3144
      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3145
    {
3146
      /* If there is no alloc, but there are input registers used, then we
3147
         need a .regstk directive.  */
3148
      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3149
      ar_pfs_save_reg = NULL_RTX;
3150
    }
3151
  else
3152
    {
3153
      current_frame_info.need_regstk = 0;
3154
 
3155
      if (current_frame_info.r[reg_save_ar_pfs])
3156
        {
3157
          regno = current_frame_info.r[reg_save_ar_pfs];
3158
          reg_emitted (reg_save_ar_pfs);
3159
        }
3160
      else
3161
        regno = next_scratch_gr_reg ();
3162
      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3163
 
3164
      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3165
                                   GEN_INT (current_frame_info.n_input_regs),
3166
                                   GEN_INT (current_frame_info.n_local_regs),
3167
                                   GEN_INT (current_frame_info.n_output_regs),
3168
                                   GEN_INT (current_frame_info.n_rotate_regs)));
3169
      RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3170
    }
3171
 
3172
  /* Set up frame pointer, stack pointer, and spill iterators.  */
3173
 
3174
  n_varargs = cfun->machine->n_varargs;
3175
  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3176
                        stack_pointer_rtx, 0);
3177
 
3178
  if (frame_pointer_needed)
3179
    {
3180
      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3181
      RTX_FRAME_RELATED_P (insn) = 1;
3182
    }
3183
 
3184
  if (current_frame_info.total_size != 0)
3185
    {
3186
      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3187
      rtx offset;
3188
 
3189
      if (satisfies_constraint_I (frame_size_rtx))
3190
        offset = frame_size_rtx;
3191
      else
3192
        {
3193
          regno = next_scratch_gr_reg ();
3194
          offset = gen_rtx_REG (DImode, regno);
3195
          emit_move_insn (offset, frame_size_rtx);
3196
        }
3197
 
3198
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3199
                                    stack_pointer_rtx, offset));
3200
 
3201
      if (! frame_pointer_needed)
3202
        {
3203
          RTX_FRAME_RELATED_P (insn) = 1;
3204
          if (GET_CODE (offset) != CONST_INT)
3205
            add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3206
                          gen_rtx_SET (VOIDmode,
3207
                                       stack_pointer_rtx,
3208
                                       gen_rtx_PLUS (DImode,
3209
                                                     stack_pointer_rtx,
3210
                                                     frame_size_rtx)));
3211
        }
3212
 
3213
      /* ??? At this point we must generate a magic insn that appears to
3214
         modify the stack pointer, the frame pointer, and all spill
3215
         iterators.  This would allow the most scheduling freedom.  For
3216
         now, just hard stop.  */
3217
      emit_insn (gen_blockage ());
3218
    }
3219
 
3220
  /* Must copy out ar.unat before doing any integer spills.  */
3221
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3222
    {
3223
      if (current_frame_info.r[reg_save_ar_unat])
3224
        {
3225
          ar_unat_save_reg
3226
            = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3227
          reg_emitted (reg_save_ar_unat);
3228
        }
3229
      else
3230
        {
3231
          alt_regno = next_scratch_gr_reg ();
3232
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3233
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3234
        }
3235
 
3236
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3237
      insn = emit_move_insn (ar_unat_save_reg, reg);
3238
      RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3239
 
3240
      /* Even if we're not going to generate an epilogue, we still
3241
         need to save the register so that EH works.  */
3242
      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3243
        emit_insn (gen_prologue_use (ar_unat_save_reg));
3244
    }
3245
  else
3246
    ar_unat_save_reg = NULL_RTX;
3247
 
3248
  /* Spill all varargs registers.  Do this before spilling any GR registers,
3249
     since we want the UNAT bits for the GR registers to override the UNAT
3250
     bits from varargs, which we don't care about.  */
3251
 
3252
  cfa_off = -16;
3253
  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3254
    {
3255
      reg = gen_rtx_REG (DImode, regno);
3256
      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3257
    }
3258
 
3259
  /* Locate the bottom of the register save area.  */
3260
  cfa_off = (current_frame_info.spill_cfa_off
3261
             + current_frame_info.spill_size
3262
             + current_frame_info.extra_spill_size);
3263
 
3264
  /* Save the predicate register block either in a register or in memory.  */
3265
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3266
    {
3267
      reg = gen_rtx_REG (DImode, PR_REG (0));
3268
      if (current_frame_info.r[reg_save_pr] != 0)
3269
        {
3270
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3271
          reg_emitted (reg_save_pr);
3272
          insn = emit_move_insn (alt_reg, reg);
3273
 
3274
          /* ??? Denote pr spill/fill by a DImode move that modifies all
3275
             64 hard registers.  */
3276
          RTX_FRAME_RELATED_P (insn) = 1;
3277
          add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3278
                        gen_rtx_SET (VOIDmode, alt_reg, reg));
3279
 
3280
          /* Even if we're not going to generate an epilogue, we still
3281
             need to save the register so that EH works.  */
3282
          if (! epilogue_p)
3283
            emit_insn (gen_prologue_use (alt_reg));
3284
        }
3285
      else
3286
        {
3287
          alt_regno = next_scratch_gr_reg ();
3288
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3289
          insn = emit_move_insn (alt_reg, reg);
3290
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3291
          cfa_off -= 8;
3292
        }
3293
    }
3294
 
3295
  /* Handle AR regs in numerical order.  All of them get special handling.  */
3296
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3297
      && current_frame_info.r[reg_save_ar_unat] == 0)
3298
    {
3299
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3300
      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3301
      cfa_off -= 8;
3302
    }
3303
 
3304
  /* The alloc insn already copied ar.pfs into a general register.  The
3305
     only thing we have to do now is copy that register to a stack slot
3306
     if we'd not allocated a local register for the job.  */
3307
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3308
      && current_frame_info.r[reg_save_ar_pfs] == 0)
3309
    {
3310
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3311
      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3312
      cfa_off -= 8;
3313
    }
3314
 
3315
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3316
    {
3317
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3318
      if (current_frame_info.r[reg_save_ar_lc] != 0)
3319
        {
3320
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3321
          reg_emitted (reg_save_ar_lc);
3322
          insn = emit_move_insn (alt_reg, reg);
3323
          RTX_FRAME_RELATED_P (insn) = 1;
3324
 
3325
          /* Even if we're not going to generate an epilogue, we still
3326
             need to save the register so that EH works.  */
3327
          if (! epilogue_p)
3328
            emit_insn (gen_prologue_use (alt_reg));
3329
        }
3330
      else
3331
        {
3332
          alt_regno = next_scratch_gr_reg ();
3333
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3334
          emit_move_insn (alt_reg, reg);
3335
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3336
          cfa_off -= 8;
3337
        }
3338
    }
3339
 
3340
  /* Save the return pointer.  */
3341
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3342
    {
3343
      reg = gen_rtx_REG (DImode, BR_REG (0));
3344
      if (current_frame_info.r[reg_save_b0] != 0)
3345
        {
3346
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3347
          reg_emitted (reg_save_b0);
3348
          insn = emit_move_insn (alt_reg, reg);
3349
          RTX_FRAME_RELATED_P (insn) = 1;
3350
 
3351
          /* Even if we're not going to generate an epilogue, we still
3352
             need to save the register so that EH works.  */
3353
          if (! epilogue_p)
3354
            emit_insn (gen_prologue_use (alt_reg));
3355
        }
3356
      else
3357
        {
3358
          alt_regno = next_scratch_gr_reg ();
3359
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3360
          emit_move_insn (alt_reg, reg);
3361
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3362
          cfa_off -= 8;
3363
        }
3364
    }
3365
 
3366
  if (current_frame_info.r[reg_save_gp])
3367
    {
3368
      reg_emitted (reg_save_gp);
3369
      insn = emit_move_insn (gen_rtx_REG (DImode,
3370
                                          current_frame_info.r[reg_save_gp]),
3371
                             pic_offset_table_rtx);
3372
    }
3373
 
3374
  /* We should now be at the base of the gr/br/fr spill area.  */
3375
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376
                          + current_frame_info.spill_size));
3377
 
3378
  /* Spill all general registers.  */
3379
  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3380
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3381
      {
3382
        reg = gen_rtx_REG (DImode, regno);
3383
        do_spill (gen_gr_spill, reg, cfa_off, reg);
3384
        cfa_off -= 8;
3385
      }
3386
 
3387
  /* Spill the rest of the BR registers.  */
3388
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3389
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3390
      {
3391
        alt_regno = next_scratch_gr_reg ();
3392
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3393
        reg = gen_rtx_REG (DImode, regno);
3394
        emit_move_insn (alt_reg, reg);
3395
        do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3396
        cfa_off -= 8;
3397
      }
3398
 
3399
  /* Align the frame and spill all FR registers.  */
3400
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3401
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3402
      {
3403
        gcc_assert (!(cfa_off & 15));
3404
        reg = gen_rtx_REG (XFmode, regno);
3405
        do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3406
        cfa_off -= 16;
3407
      }
3408
 
3409
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3410
 
3411
  finish_spill_pointers ();
3412
}
3413
 
3414
/* Called after register allocation to add any instructions needed for the
3415
   epilogue.  Using an epilogue insn is favored compared to putting all of the
3416
   instructions in output_function_prologue(), since it allows the scheduler
3417
   to intermix instructions with the saves of the caller saved registers.  In
3418
   some cases, it might be necessary to emit a barrier instruction as the last
3419
   insn to prevent such scheduling.  */
3420
 
3421
void
3422
ia64_expand_epilogue (int sibcall_p)
3423
{
3424
  rtx insn, reg, alt_reg, ar_unat_save_reg;
3425
  int regno, alt_regno, cfa_off;
3426
 
3427
  ia64_compute_frame_size (get_frame_size ());
3428
 
3429
  /* If there is a frame pointer, then we use it instead of the stack
3430
     pointer, so that the stack pointer does not need to be valid when
3431
     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3432
  if (frame_pointer_needed)
3433
    setup_spill_pointers (current_frame_info.n_spilled,
3434
                          hard_frame_pointer_rtx, 0);
3435
  else
3436
    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3437
                          current_frame_info.total_size);
3438
 
3439
  if (current_frame_info.total_size != 0)
3440
    {
3441
      /* ??? At this point we must generate a magic insn that appears to
3442
         modify the spill iterators and the frame pointer.  This would
3443
         allow the most scheduling freedom.  For now, just hard stop.  */
3444
      emit_insn (gen_blockage ());
3445
    }
3446
 
3447
  /* Locate the bottom of the register save area.  */
3448
  cfa_off = (current_frame_info.spill_cfa_off
3449
             + current_frame_info.spill_size
3450
             + current_frame_info.extra_spill_size);
3451
 
3452
  /* Restore the predicate registers.  */
3453
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3454
    {
3455
      if (current_frame_info.r[reg_save_pr] != 0)
3456
        {
3457
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3458
          reg_emitted (reg_save_pr);
3459
        }
3460
      else
3461
        {
3462
          alt_regno = next_scratch_gr_reg ();
3463
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3464
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3465
          cfa_off -= 8;
3466
        }
3467
      reg = gen_rtx_REG (DImode, PR_REG (0));
3468
      emit_move_insn (reg, alt_reg);
3469
    }
3470
 
3471
  /* Restore the application registers.  */
3472
 
3473
  /* Load the saved unat from the stack, but do not restore it until
3474
     after the GRs have been restored.  */
3475
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3476
    {
3477
      if (current_frame_info.r[reg_save_ar_unat] != 0)
3478
        {
3479
          ar_unat_save_reg
3480
            = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3481
          reg_emitted (reg_save_ar_unat);
3482
        }
3483
      else
3484
        {
3485
          alt_regno = next_scratch_gr_reg ();
3486
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3487
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3488
          do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3489
          cfa_off -= 8;
3490
        }
3491
    }
3492
  else
3493
    ar_unat_save_reg = NULL_RTX;
3494
 
3495
  if (current_frame_info.r[reg_save_ar_pfs] != 0)
3496
    {
3497
      reg_emitted (reg_save_ar_pfs);
3498
      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3499
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3500
      emit_move_insn (reg, alt_reg);
3501
    }
3502
  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3503
    {
3504
      alt_regno = next_scratch_gr_reg ();
3505
      alt_reg = gen_rtx_REG (DImode, alt_regno);
3506
      do_restore (gen_movdi_x, alt_reg, cfa_off);
3507
      cfa_off -= 8;
3508
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3509
      emit_move_insn (reg, alt_reg);
3510
    }
3511
 
3512
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3513
    {
3514
      if (current_frame_info.r[reg_save_ar_lc] != 0)
3515
        {
3516
          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3517
          reg_emitted (reg_save_ar_lc);
3518
        }
3519
      else
3520
        {
3521
          alt_regno = next_scratch_gr_reg ();
3522
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3523
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3524
          cfa_off -= 8;
3525
        }
3526
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3527
      emit_move_insn (reg, alt_reg);
3528
    }
3529
 
3530
  /* Restore the return pointer.  */
3531
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3532
    {
3533
      if (current_frame_info.r[reg_save_b0] != 0)
3534
        {
3535
         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3536
         reg_emitted (reg_save_b0);
3537
        }
3538
      else
3539
        {
3540
          alt_regno = next_scratch_gr_reg ();
3541
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3542
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3543
          cfa_off -= 8;
3544
        }
3545
      reg = gen_rtx_REG (DImode, BR_REG (0));
3546
      emit_move_insn (reg, alt_reg);
3547
    }
3548
 
3549
  /* We should now be at the base of the gr/br/fr spill area.  */
3550
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3551
                          + current_frame_info.spill_size));
3552
 
3553
  /* The GP may be stored on the stack in the prologue, but it's
3554
     never restored in the epilogue.  Skip the stack slot.  */
3555
  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3556
    cfa_off -= 8;
3557
 
3558
  /* Restore all general registers.  */
3559
  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3560
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3561
      {
3562
        reg = gen_rtx_REG (DImode, regno);
3563
        do_restore (gen_gr_restore, reg, cfa_off);
3564
        cfa_off -= 8;
3565
      }
3566
 
3567
  /* Restore the branch registers.  */
3568
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3569
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3570
      {
3571
        alt_regno = next_scratch_gr_reg ();
3572
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3573
        do_restore (gen_movdi_x, alt_reg, cfa_off);
3574
        cfa_off -= 8;
3575
        reg = gen_rtx_REG (DImode, regno);
3576
        emit_move_insn (reg, alt_reg);
3577
      }
3578
 
3579
  /* Restore floating point registers.  */
3580
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3581
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3582
      {
3583
        gcc_assert (!(cfa_off & 15));
3584
        reg = gen_rtx_REG (XFmode, regno);
3585
        do_restore (gen_fr_restore_x, reg, cfa_off);
3586
        cfa_off -= 16;
3587
      }
3588
 
3589
  /* Restore ar.unat for real.  */
3590
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3591
    {
3592
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3593
      emit_move_insn (reg, ar_unat_save_reg);
3594
    }
3595
 
3596
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3597
 
3598
  finish_spill_pointers ();
3599
 
3600
  if (current_frame_info.total_size
3601
      || cfun->machine->ia64_eh_epilogue_sp
3602
      || frame_pointer_needed)
3603
    {
3604
      /* ??? At this point we must generate a magic insn that appears to
3605
         modify the spill iterators, the stack pointer, and the frame
3606
         pointer.  This would allow the most scheduling freedom.  For now,
3607
         just hard stop.  */
3608
      emit_insn (gen_blockage ());
3609
    }
3610
 
3611
  if (cfun->machine->ia64_eh_epilogue_sp)
3612
    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3613
  else if (frame_pointer_needed)
3614
    {
3615
      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3616
      RTX_FRAME_RELATED_P (insn) = 1;
3617
    }
3618
  else if (current_frame_info.total_size)
3619
    {
3620
      rtx offset, frame_size_rtx;
3621
 
3622
      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3623
      if (satisfies_constraint_I (frame_size_rtx))
3624
        offset = frame_size_rtx;
3625
      else
3626
        {
3627
          regno = next_scratch_gr_reg ();
3628
          offset = gen_rtx_REG (DImode, regno);
3629
          emit_move_insn (offset, frame_size_rtx);
3630
        }
3631
 
3632
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3633
                                    offset));
3634
 
3635
      RTX_FRAME_RELATED_P (insn) = 1;
3636
      if (GET_CODE (offset) != CONST_INT)
3637
        add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3638
                      gen_rtx_SET (VOIDmode,
3639
                                   stack_pointer_rtx,
3640
                                   gen_rtx_PLUS (DImode,
3641
                                                 stack_pointer_rtx,
3642
                                                 frame_size_rtx)));
3643
    }
3644
 
3645
  if (cfun->machine->ia64_eh_epilogue_bsp)
3646
    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3647
 
3648
  if (! sibcall_p)
3649
    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3650
  else
3651
    {
3652
      int fp = GR_REG (2);
3653
      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3654
         first available call clobbered register.  If there was a frame_pointer
3655
         register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3656
         so we have to make sure we're using the string "r2" when emitting
3657
         the register name for the assembler.  */
3658
      if (current_frame_info.r[reg_fp]
3659
          && current_frame_info.r[reg_fp] == GR_REG (2))
3660
        fp = HARD_FRAME_POINTER_REGNUM;
3661
 
3662
      /* We must emit an alloc to force the input registers to become output
3663
         registers.  Otherwise, if the callee tries to pass its parameters
3664
         through to another call without an intervening alloc, then these
3665
         values get lost.  */
3666
      /* ??? We don't need to preserve all input registers.  We only need to
3667
         preserve those input registers used as arguments to the sibling call.
3668
         It is unclear how to compute that number here.  */
3669
      if (current_frame_info.n_input_regs != 0)
3670
        {
3671
          rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3672
          insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3673
                                const0_rtx, const0_rtx,
3674
                                n_inputs, const0_rtx));
3675
          RTX_FRAME_RELATED_P (insn) = 1;
3676
        }
3677
    }
3678
}
3679
 
3680
/* Return 1 if br.ret can do all the work required to return from a
3681
   function.  */
3682
 
3683
int
3684
ia64_direct_return (void)
3685
{
3686
  if (reload_completed && ! frame_pointer_needed)
3687
    {
3688
      ia64_compute_frame_size (get_frame_size ());
3689
 
3690
      return (current_frame_info.total_size == 0
3691
              && current_frame_info.n_spilled == 0
3692
              && current_frame_info.r[reg_save_b0] == 0
3693
              && current_frame_info.r[reg_save_pr] == 0
3694
              && current_frame_info.r[reg_save_ar_pfs] == 0
3695
              && current_frame_info.r[reg_save_ar_unat] == 0
3696
              && current_frame_info.r[reg_save_ar_lc] == 0);
3697
    }
3698
  return 0;
3699
}
3700
 
3701
/* Return the magic cookie that we use to hold the return address
3702
   during early compilation.  */
3703
 
3704
rtx
3705
ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3706
{
3707
  if (count != 0)
3708
    return NULL;
3709
  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3710
}
3711
 
3712
/* Split this value after reload, now that we know where the return
3713
   address is saved.  */
3714
 
3715
void
3716
ia64_split_return_addr_rtx (rtx dest)
3717
{
3718
  rtx src;
3719
 
3720
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3721
    {
3722
      if (current_frame_info.r[reg_save_b0] != 0)
3723
        {
3724
          src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3725
          reg_emitted (reg_save_b0);
3726
        }
3727
      else
3728
        {
3729
          HOST_WIDE_INT off;
3730
          unsigned int regno;
3731
          rtx off_r;
3732
 
3733
          /* Compute offset from CFA for BR0.  */
3734
          /* ??? Must be kept in sync with ia64_expand_prologue.  */
3735
          off = (current_frame_info.spill_cfa_off
3736
                 + current_frame_info.spill_size);
3737
          for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3738
            if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3739
              off -= 8;
3740
 
3741
          /* Convert CFA offset to a register based offset.  */
3742
          if (frame_pointer_needed)
3743
            src = hard_frame_pointer_rtx;
3744
          else
3745
            {
3746
              src = stack_pointer_rtx;
3747
              off += current_frame_info.total_size;
3748
            }
3749
 
3750
          /* Load address into scratch register.  */
3751
          off_r = GEN_INT (off);
3752
          if (satisfies_constraint_I (off_r))
3753
            emit_insn (gen_adddi3 (dest, src, off_r));
3754
          else
3755
            {
3756
              emit_move_insn (dest, off_r);
3757
              emit_insn (gen_adddi3 (dest, src, dest));
3758
            }
3759
 
3760
          src = gen_rtx_MEM (Pmode, dest);
3761
        }
3762
    }
3763
  else
3764
    src = gen_rtx_REG (DImode, BR_REG (0));
3765
 
3766
  emit_move_insn (dest, src);
3767
}
3768
 
3769
int
3770
ia64_hard_regno_rename_ok (int from, int to)
3771
{
3772
  /* Don't clobber any of the registers we reserved for the prologue.  */
3773
  unsigned int r;
3774
 
3775
  for (r = reg_fp; r <= reg_save_ar_lc; r++)
3776
    if (to == current_frame_info.r[r]
3777
        || from == current_frame_info.r[r]
3778
        || to == emitted_frame_related_regs[r]
3779
        || from == emitted_frame_related_regs[r])
3780
      return 0;
3781
 
3782
  /* Don't use output registers outside the register frame.  */
3783
  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3784
    return 0;
3785
 
3786
  /* Retain even/oddness on predicate register pairs.  */
3787
  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3788
    return (from & 1) == (to & 1);
3789
 
3790
  return 1;
3791
}
3792
 
3793
/* Target hook for assembling integer objects.  Handle word-sized
3794
   aligned objects and detect the cases when @fptr is needed.  */
3795
 
3796
static bool
3797
ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3798
{
3799
  if (size == POINTER_SIZE / BITS_PER_UNIT
3800
      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3801
      && GET_CODE (x) == SYMBOL_REF
3802
      && SYMBOL_REF_FUNCTION_P (x))
3803
    {
3804
      static const char * const directive[2][2] = {
3805
          /* 64-bit pointer */  /* 32-bit pointer */
3806
        { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},  /* unaligned */
3807
        { "\tdata8\t@fptr(",    "\tdata4\t@fptr("}      /* aligned */
3808
      };
3809
      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3810
      output_addr_const (asm_out_file, x);
3811
      fputs (")\n", asm_out_file);
3812
      return true;
3813
    }
3814
  return default_assemble_integer (x, size, aligned_p);
3815
}
3816
 
3817
/* Emit the function prologue.  */
3818
 
3819
static void
3820
ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3821
{
3822
  int mask, grsave, grsave_prev;
3823
 
3824
  if (current_frame_info.need_regstk)
3825
    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3826
             current_frame_info.n_input_regs,
3827
             current_frame_info.n_local_regs,
3828
             current_frame_info.n_output_regs,
3829
             current_frame_info.n_rotate_regs);
3830
 
3831
  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3832
    return;
3833
 
3834
  /* Emit the .prologue directive.  */
3835
 
3836
  mask = 0;
3837
  grsave = grsave_prev = 0;
3838
  if (current_frame_info.r[reg_save_b0] != 0)
3839
    {
3840
      mask |= 8;
3841
      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3842
    }
3843
  if (current_frame_info.r[reg_save_ar_pfs] != 0
3844
      && (grsave_prev == 0
3845
          || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3846
    {
3847
      mask |= 4;
3848
      if (grsave_prev == 0)
3849
        grsave = current_frame_info.r[reg_save_ar_pfs];
3850
      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3851
    }
3852
  if (current_frame_info.r[reg_fp] != 0
3853
      && (grsave_prev == 0
3854
          || current_frame_info.r[reg_fp] == grsave_prev + 1))
3855
    {
3856
      mask |= 2;
3857
      if (grsave_prev == 0)
3858
        grsave = HARD_FRAME_POINTER_REGNUM;
3859
      grsave_prev = current_frame_info.r[reg_fp];
3860
    }
3861
  if (current_frame_info.r[reg_save_pr] != 0
3862
      && (grsave_prev == 0
3863
          || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3864
    {
3865
      mask |= 1;
3866
      if (grsave_prev == 0)
3867
        grsave = current_frame_info.r[reg_save_pr];
3868
    }
3869
 
3870
  if (mask && TARGET_GNU_AS)
3871
    fprintf (file, "\t.prologue %d, %d\n", mask,
3872
             ia64_dbx_register_number (grsave));
3873
  else
3874
    fputs ("\t.prologue\n", file);
3875
 
3876
  /* Emit a .spill directive, if necessary, to relocate the base of
3877
     the register spill area.  */
3878
  if (current_frame_info.spill_cfa_off != -16)
3879
    fprintf (file, "\t.spill %ld\n",
3880
             (long) (current_frame_info.spill_cfa_off
3881
                     + current_frame_info.spill_size));
3882
}
3883
 
3884
/* Emit the .body directive at the scheduled end of the prologue.  */
3885
 
3886
static void
3887
ia64_output_function_end_prologue (FILE *file)
3888
{
3889
  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3890
    return;
3891
 
3892
  fputs ("\t.body\n", file);
3893
}
3894
 
3895
/* Emit the function epilogue.  */
3896
 
3897
static void
3898
ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3899
                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3900
{
3901
  int i;
3902
 
3903
  if (current_frame_info.r[reg_fp])
3904
    {
3905
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3906
      reg_names[HARD_FRAME_POINTER_REGNUM]
3907
        = reg_names[current_frame_info.r[reg_fp]];
3908
      reg_names[current_frame_info.r[reg_fp]] = tmp;
3909
      reg_emitted (reg_fp);
3910
    }
3911
  if (! TARGET_REG_NAMES)
3912
    {
3913
      for (i = 0; i < current_frame_info.n_input_regs; i++)
3914
        reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3915
      for (i = 0; i < current_frame_info.n_local_regs; i++)
3916
        reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3917
      for (i = 0; i < current_frame_info.n_output_regs; i++)
3918
        reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3919
    }
3920
 
3921
  current_frame_info.initialized = 0;
3922
}
3923
 
3924
int
3925
ia64_dbx_register_number (int regno)
3926
{
3927
  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3928
     from its home at loc79 to something inside the register frame.  We
3929
     must perform the same renumbering here for the debug info.  */
3930
  if (current_frame_info.r[reg_fp])
3931
    {
3932
      if (regno == HARD_FRAME_POINTER_REGNUM)
3933
        regno = current_frame_info.r[reg_fp];
3934
      else if (regno == current_frame_info.r[reg_fp])
3935
        regno = HARD_FRAME_POINTER_REGNUM;
3936
    }
3937
 
3938
  if (IN_REGNO_P (regno))
3939
    return 32 + regno - IN_REG (0);
3940
  else if (LOC_REGNO_P (regno))
3941
    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3942
  else if (OUT_REGNO_P (regno))
3943
    return (32 + current_frame_info.n_input_regs
3944
            + current_frame_info.n_local_regs + regno - OUT_REG (0));
3945
  else
3946
    return regno;
3947
}
3948
 
3949
/* Implement TARGET_TRAMPOLINE_INIT.
3950
 
3951
   The trampoline should set the static chain pointer to value placed
3952
   into the trampoline and should branch to the specified routine.
3953
   To make the normal indirect-subroutine calling convention work,
3954
   the trampoline must look like a function descriptor; the first
3955
   word being the target address and the second being the target's
3956
   global pointer.
3957
 
3958
   We abuse the concept of a global pointer by arranging for it
3959
   to point to the data we need to load.  The complete trampoline
3960
   has the following form:
3961
 
3962
                +-------------------+ \
3963
        TRAMP:  | __ia64_trampoline | |
3964
                +-------------------+  > fake function descriptor
3965
                | TRAMP+16          | |
3966
                +-------------------+ /
3967
                | target descriptor |
3968
                +-------------------+
3969
                | static link       |
3970
                +-------------------+
3971
*/
3972
 
3973
static void
3974
ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
3975
{
3976
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3977
  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
3978
 
3979
  /* The Intel assembler requires that the global __ia64_trampoline symbol
3980
     be declared explicitly */
3981
  if (!TARGET_GNU_AS)
3982
    {
3983
      static bool declared_ia64_trampoline = false;
3984
 
3985
      if (!declared_ia64_trampoline)
3986
        {
3987
          declared_ia64_trampoline = true;
3988
          (*targetm.asm_out.globalize_label) (asm_out_file,
3989
                                              "__ia64_trampoline");
3990
        }
3991
    }
3992
 
3993
  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3994
  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
3995
  fnaddr = convert_memory_address (Pmode, fnaddr);
3996
  static_chain = convert_memory_address (Pmode, static_chain);
3997
 
3998
  /* Load up our iterator.  */
3999
  addr_reg = copy_to_reg (addr);
4000
  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4001
 
4002
  /* The first two words are the fake descriptor:
4003
     __ia64_trampoline, ADDR+16.  */
4004
  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4005
  if (TARGET_ABI_OPEN_VMS)
4006
    {
4007
      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4008
         in the Macro-32 compiler) and changed the semantics of the LTOFF22
4009
         relocation against function symbols to make it identical to the
4010
         LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4011
         strict ELF and dereference to get the bare code address.  */
4012
      rtx reg = gen_reg_rtx (Pmode);
4013
      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4014
      emit_move_insn (reg, tramp);
4015
      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4016
      tramp = reg;
4017
   }
4018
  emit_move_insn (m_tramp, tramp);
4019
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4020
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4021
 
4022
  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4023
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4024
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4025
 
4026
  /* The third word is the target descriptor.  */
4027
  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4028
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4029
  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4030
 
4031
  /* The fourth word is the static chain.  */
4032
  emit_move_insn (m_tramp, static_chain);
4033
}
4034
 
4035
/* Do any needed setup for a variadic function.  CUM has not been updated
4036
   for the last named argument which has type TYPE and mode MODE.
4037
 
4038
   We generate the actual spill instructions during prologue generation.  */
4039
 
4040
static void
4041
ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4042
                             tree type, int * pretend_size,
4043
                             int second_time ATTRIBUTE_UNUSED)
4044
{
4045
  CUMULATIVE_ARGS next_cum = *cum;
4046
 
4047
  /* Skip the current argument.  */
4048
  ia64_function_arg_advance (&next_cum, mode, type, 1);
4049
 
4050
  if (next_cum.words < MAX_ARGUMENT_SLOTS)
4051
    {
4052
      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4053
      *pretend_size = n * UNITS_PER_WORD;
4054
      cfun->machine->n_varargs = n;
4055
    }
4056
}
4057
 
4058
/* Check whether TYPE is a homogeneous floating point aggregate.  If
4059
   it is, return the mode of the floating point type that appears
4060
   in all leafs.  If it is not, return VOIDmode.
4061
 
4062
   An aggregate is a homogeneous floating point aggregate is if all
4063
   fields/elements in it have the same floating point type (e.g,
4064
   SFmode).  128-bit quad-precision floats are excluded.
4065
 
4066
   Variable sized aggregates should never arrive here, since we should
4067
   have already decided to pass them by reference.  Top-level zero-sized
4068
   aggregates are excluded because our parallels crash the middle-end.  */
4069
 
4070
static enum machine_mode
4071
hfa_element_mode (const_tree type, bool nested)
4072
{
4073
  enum machine_mode element_mode = VOIDmode;
4074
  enum machine_mode mode;
4075
  enum tree_code code = TREE_CODE (type);
4076
  int know_element_mode = 0;
4077
  tree t;
4078
 
4079
  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4080
    return VOIDmode;
4081
 
4082
  switch (code)
4083
    {
4084
    case VOID_TYPE:     case INTEGER_TYPE:      case ENUMERAL_TYPE:
4085
    case BOOLEAN_TYPE:  case POINTER_TYPE:
4086
    case OFFSET_TYPE:   case REFERENCE_TYPE:    case METHOD_TYPE:
4087
    case LANG_TYPE:             case FUNCTION_TYPE:
4088
      return VOIDmode;
4089
 
4090
      /* Fortran complex types are supposed to be HFAs, so we need to handle
4091
         gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4092
         types though.  */
4093
    case COMPLEX_TYPE:
4094
      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4095
          && TYPE_MODE (type) != TCmode)
4096
        return GET_MODE_INNER (TYPE_MODE (type));
4097
      else
4098
        return VOIDmode;
4099
 
4100
    case REAL_TYPE:
4101
      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4102
         mode if this is contained within an aggregate.  */
4103
      if (nested && TYPE_MODE (type) != TFmode)
4104
        return TYPE_MODE (type);
4105
      else
4106
        return VOIDmode;
4107
 
4108
    case ARRAY_TYPE:
4109
      return hfa_element_mode (TREE_TYPE (type), 1);
4110
 
4111
    case RECORD_TYPE:
4112
    case UNION_TYPE:
4113
    case QUAL_UNION_TYPE:
4114
      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4115
        {
4116
          if (TREE_CODE (t) != FIELD_DECL)
4117
            continue;
4118
 
4119
          mode = hfa_element_mode (TREE_TYPE (t), 1);
4120
          if (know_element_mode)
4121
            {
4122
              if (mode != element_mode)
4123
                return VOIDmode;
4124
            }
4125
          else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4126
            return VOIDmode;
4127
          else
4128
            {
4129
              know_element_mode = 1;
4130
              element_mode = mode;
4131
            }
4132
        }
4133
      return element_mode;
4134
 
4135
    default:
4136
      /* If we reach here, we probably have some front-end specific type
4137
         that the backend doesn't know about.  This can happen via the
4138
         aggregate_value_p call in init_function_start.  All we can do is
4139
         ignore unknown tree types.  */
4140
      return VOIDmode;
4141
    }
4142
 
4143
  return VOIDmode;
4144
}
4145
 
4146
/* Return the number of words required to hold a quantity of TYPE and MODE
4147
   when passed as an argument.  */
4148
static int
4149
ia64_function_arg_words (tree type, enum machine_mode mode)
4150
{
4151
  int words;
4152
 
4153
  if (mode == BLKmode)
4154
    words = int_size_in_bytes (type);
4155
  else
4156
    words = GET_MODE_SIZE (mode);
4157
 
4158
  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4159
}
4160
 
4161
/* Return the number of registers that should be skipped so the current
4162
   argument (described by TYPE and WORDS) will be properly aligned.
4163
 
4164
   Integer and float arguments larger than 8 bytes start at the next
4165
   even boundary.  Aggregates larger than 8 bytes start at the next
4166
   even boundary if the aggregate has 16 byte alignment.  Note that
4167
   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4168
   but are still to be aligned in registers.
4169
 
4170
   ??? The ABI does not specify how to handle aggregates with
4171
   alignment from 9 to 15 bytes, or greater than 16.  We handle them
4172
   all as if they had 16 byte alignment.  Such aggregates can occur
4173
   only if gcc extensions are used.  */
4174
static int
4175
ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4176
{
4177
  /* No registers are skipped on VMS.  */
4178
  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4179
    return 0;
4180
 
4181
  if (type
4182
      && TREE_CODE (type) != INTEGER_TYPE
4183
      && TREE_CODE (type) != REAL_TYPE)
4184
    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4185
  else
4186
    return words > 1;
4187
}
4188
 
4189
/* Return rtx for register where argument is passed, or zero if it is passed
4190
   on the stack.  */
4191
/* ??? 128-bit quad-precision floats are always passed in general
4192
   registers.  */
4193
 
4194
rtx
4195
ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4196
                   int named, int incoming)
4197
{
4198
  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4199
  int words = ia64_function_arg_words (type, mode);
4200
  int offset = ia64_function_arg_offset (cum, type, words);
4201
  enum machine_mode hfa_mode = VOIDmode;
4202
 
4203
  /* For OPEN VMS, emit the instruction setting up the argument register here,
4204
     when we know this will be together with the other arguments setup related
4205
     insns.  This is not the conceptually best place to do this, but this is
4206
     the easiest as we have convenient access to cumulative args info.  */
4207
 
4208
  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4209
      && named == 1)
4210
    {
4211
      unsigned HOST_WIDE_INT regval = cum->words;
4212
      int i;
4213
 
4214
      for (i = 0; i < 8; i++)
4215
        regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4216
 
4217
      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4218
                      GEN_INT (regval));
4219
    }
4220
 
4221
  /* If all argument slots are used, then it must go on the stack.  */
4222
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4223
    return 0;
4224
 
4225
  /* Check for and handle homogeneous FP aggregates.  */
4226
  if (type)
4227
    hfa_mode = hfa_element_mode (type, 0);
4228
 
4229
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4230
     and unprototyped hfas are passed specially.  */
4231
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4232
    {
4233
      rtx loc[16];
4234
      int i = 0;
4235
      int fp_regs = cum->fp_regs;
4236
      int int_regs = cum->words + offset;
4237
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4238
      int byte_size;
4239
      int args_byte_size;
4240
 
4241
      /* If prototyped, pass it in FR regs then GR regs.
4242
         If not prototyped, pass it in both FR and GR regs.
4243
 
4244
         If this is an SFmode aggregate, then it is possible to run out of
4245
         FR regs while GR regs are still left.  In that case, we pass the
4246
         remaining part in the GR regs.  */
4247
 
4248
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4249
         of the argument, the last FP register, or the last argument slot.  */
4250
 
4251
      byte_size = ((mode == BLKmode)
4252
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4253
      args_byte_size = int_regs * UNITS_PER_WORD;
4254
      offset = 0;
4255
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4256
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4257
        {
4258
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4259
                                      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4260
                                                              + fp_regs)),
4261
                                      GEN_INT (offset));
4262
          offset += hfa_size;
4263
          args_byte_size += hfa_size;
4264
          fp_regs++;
4265
        }
4266
 
4267
      /* If no prototype, then the whole thing must go in GR regs.  */
4268
      if (! cum->prototype)
4269
        offset = 0;
4270
      /* If this is an SFmode aggregate, then we might have some left over
4271
         that needs to go in GR regs.  */
4272
      else if (byte_size != offset)
4273
        int_regs += offset / UNITS_PER_WORD;
4274
 
4275
      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4276
 
4277
      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4278
        {
4279
          enum machine_mode gr_mode = DImode;
4280
          unsigned int gr_size;
4281
 
4282
          /* If we have an odd 4 byte hunk because we ran out of FR regs,
4283
             then this goes in a GR reg left adjusted/little endian, right
4284
             adjusted/big endian.  */
4285
          /* ??? Currently this is handled wrong, because 4-byte hunks are
4286
             always right adjusted/little endian.  */
4287
          if (offset & 0x4)
4288
            gr_mode = SImode;
4289
          /* If we have an even 4 byte hunk because the aggregate is a
4290
             multiple of 4 bytes in size, then this goes in a GR reg right
4291
             adjusted/little endian.  */
4292
          else if (byte_size - offset == 4)
4293
            gr_mode = SImode;
4294
 
4295
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4296
                                      gen_rtx_REG (gr_mode, (basereg
4297
                                                             + int_regs)),
4298
                                      GEN_INT (offset));
4299
 
4300
          gr_size = GET_MODE_SIZE (gr_mode);
4301
          offset += gr_size;
4302
          if (gr_size == UNITS_PER_WORD
4303
              || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4304
            int_regs++;
4305
          else if (gr_size > UNITS_PER_WORD)
4306
            int_regs += gr_size / UNITS_PER_WORD;
4307
        }
4308
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4309
    }
4310
 
4311
  /* On OpenVMS variable argument is either in Rn or Fn.  */
4312
  else if (TARGET_ABI_OPEN_VMS && named == 0)
4313
    {
4314
      if (FLOAT_MODE_P (mode))
4315
        return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4316
      else
4317
        return gen_rtx_REG (mode, basereg + cum->words);
4318
    }
4319
 
4320
  /* Integral and aggregates go in general registers.  If we have run out of
4321
     FR registers, then FP values must also go in general registers.  This can
4322
     happen when we have a SFmode HFA.  */
4323
  else if (mode == TFmode || mode == TCmode
4324
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4325
    {
4326
      int byte_size = ((mode == BLKmode)
4327
                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4328
      if (BYTES_BIG_ENDIAN
4329
        && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4330
        && byte_size < UNITS_PER_WORD
4331
        && byte_size > 0)
4332
        {
4333
          rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4334
                                          gen_rtx_REG (DImode,
4335
                                                       (basereg + cum->words
4336
                                                        + offset)),
4337
                                          const0_rtx);
4338
          return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4339
        }
4340
      else
4341
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4342
 
4343
    }
4344
 
4345
  /* If there is a prototype, then FP values go in a FR register when
4346
     named, and in a GR register when unnamed.  */
4347
  else if (cum->prototype)
4348
    {
4349
      if (named)
4350
        return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4351
      /* In big-endian mode, an anonymous SFmode value must be represented
4352
         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4353
         the value into the high half of the general register.  */
4354
      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4355
        return gen_rtx_PARALLEL (mode,
4356
                 gen_rtvec (1,
4357
                   gen_rtx_EXPR_LIST (VOIDmode,
4358
                     gen_rtx_REG (DImode, basereg + cum->words + offset),
4359
                                      const0_rtx)));
4360
      else
4361
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4362
    }
4363
  /* If there is no prototype, then FP values go in both FR and GR
4364
     registers.  */
4365
  else
4366
    {
4367
      /* See comment above.  */
4368
      enum machine_mode inner_mode =
4369
        (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4370
 
4371
      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4372
                                      gen_rtx_REG (mode, (FR_ARG_FIRST
4373
                                                          + cum->fp_regs)),
4374
                                      const0_rtx);
4375
      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4376
                                      gen_rtx_REG (inner_mode,
4377
                                                   (basereg + cum->words
4378
                                                    + offset)),
4379
                                      const0_rtx);
4380
 
4381
      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4382
    }
4383
}
4384
 
4385
/* Return number of bytes, at the beginning of the argument, that must be
4386
   put in registers.  0 is the argument is entirely in registers or entirely
4387
   in memory.  */
4388
 
4389
static int
4390
ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4391
                        tree type, bool named ATTRIBUTE_UNUSED)
4392
{
4393
  int words = ia64_function_arg_words (type, mode);
4394
  int offset = ia64_function_arg_offset (cum, type, words);
4395
 
4396
  /* If all argument slots are used, then it must go on the stack.  */
4397
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4398
    return 0;
4399
 
4400
  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4401
     it fits within the 8 argument slots, then it goes entirely in
4402
     registers.  If it extends past the last argument slot, then the rest
4403
     goes on the stack.  */
4404
 
4405
  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4406
    return 0;
4407
 
4408
  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4409
}
4410
 
4411
/* Return ivms_arg_type based on machine_mode.  */
4412
 
4413
static enum ivms_arg_type
4414
ia64_arg_type (enum machine_mode mode)
4415
{
4416
  switch (mode)
4417
    {
4418
    case SFmode:
4419
      return FS;
4420
    case DFmode:
4421
      return FT;
4422
    default:
4423
      return I64;
4424
    }
4425
}
4426
 
4427
/* Update CUM to point after this argument.  This is patterned after
4428
   ia64_function_arg.  */
4429
 
4430
void
4431
ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4432
                           tree type, int named)
4433
{
4434
  int words = ia64_function_arg_words (type, mode);
4435
  int offset = ia64_function_arg_offset (cum, type, words);
4436
  enum machine_mode hfa_mode = VOIDmode;
4437
 
4438
  /* If all arg slots are already full, then there is nothing to do.  */
4439
  if (cum->words >= MAX_ARGUMENT_SLOTS)
4440
    {
4441
      cum->words += words + offset;
4442
      return;
4443
    }
4444
 
4445
  cum->atypes[cum->words] = ia64_arg_type (mode);
4446
  cum->words += words + offset;
4447
 
4448
  /* Check for and handle homogeneous FP aggregates.  */
4449
  if (type)
4450
    hfa_mode = hfa_element_mode (type, 0);
4451
 
4452
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4453
     and unprototyped hfas are passed specially.  */
4454
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4455
    {
4456
      int fp_regs = cum->fp_regs;
4457
      /* This is the original value of cum->words + offset.  */
4458
      int int_regs = cum->words - words;
4459
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4460
      int byte_size;
4461
      int args_byte_size;
4462
 
4463
      /* If prototyped, pass it in FR regs then GR regs.
4464
         If not prototyped, pass it in both FR and GR regs.
4465
 
4466
         If this is an SFmode aggregate, then it is possible to run out of
4467
         FR regs while GR regs are still left.  In that case, we pass the
4468
         remaining part in the GR regs.  */
4469
 
4470
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4471
         of the argument, the last FP register, or the last argument slot.  */
4472
 
4473
      byte_size = ((mode == BLKmode)
4474
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4475
      args_byte_size = int_regs * UNITS_PER_WORD;
4476
      offset = 0;
4477
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4478
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4479
        {
4480
          offset += hfa_size;
4481
          args_byte_size += hfa_size;
4482
          fp_regs++;
4483
        }
4484
 
4485
      cum->fp_regs = fp_regs;
4486
    }
4487
 
4488
  /* On OpenVMS variable argument is either in Rn or Fn.  */
4489
  else if (TARGET_ABI_OPEN_VMS && named == 0)
4490
    {
4491
      cum->int_regs = cum->words;
4492
      cum->fp_regs = cum->words;
4493
    }
4494
 
4495
  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4496
     If we have run out of FR registers, then other FP values must also go in
4497
     general registers.  This can happen when we have a SFmode HFA.  */
4498
  else if (mode == TFmode || mode == TCmode
4499
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4500
    cum->int_regs = cum->words;
4501
 
4502
  /* If there is a prototype, then FP values go in a FR register when
4503
     named, and in a GR register when unnamed.  */
4504
  else if (cum->prototype)
4505
    {
4506
      if (! named)
4507
        cum->int_regs = cum->words;
4508
      else
4509
        /* ??? Complex types should not reach here.  */
4510
        cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4511
    }
4512
  /* If there is no prototype, then FP values go in both FR and GR
4513
     registers.  */
4514
  else
4515
    {
4516
      /* ??? Complex types should not reach here.  */
4517
      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4518
      cum->int_regs = cum->words;
4519
    }
4520
}
4521
 
4522
/* Arguments with alignment larger than 8 bytes start at the next even
4523
   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4524
   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4525
 
4526
int
4527
ia64_function_arg_boundary (enum machine_mode mode, tree type)
4528
{
4529
 
4530
  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4531
    return PARM_BOUNDARY * 2;
4532
 
4533
  if (type)
4534
    {
4535
      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4536
        return PARM_BOUNDARY * 2;
4537
      else
4538
        return PARM_BOUNDARY;
4539
    }
4540
 
4541
  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4542
    return PARM_BOUNDARY * 2;
4543
  else
4544
    return PARM_BOUNDARY;
4545
}
4546
 
4547
/* True if it is OK to do sibling call optimization for the specified
4548
   call expression EXP.  DECL will be the called function, or NULL if
4549
   this is an indirect call.  */
4550
static bool
4551
ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4552
{
4553
  /* We can't perform a sibcall if the current function has the syscall_linkage
4554
     attribute.  */
4555
  if (lookup_attribute ("syscall_linkage",
4556
                        TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4557
    return false;
4558
 
4559
  /* We must always return with our current GP.  This means we can
4560
     only sibcall to functions defined in the current module unless
4561
     TARGET_CONST_GP is set to true.  */
4562
  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4563
}
4564
 
4565
 
4566
/* Implement va_arg.  */
4567
 
4568
static tree
4569
ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4570
                      gimple_seq *post_p)
4571
{
4572
  /* Variable sized types are passed by reference.  */
4573
  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4574
    {
4575
      tree ptrtype = build_pointer_type (type);
4576
      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4577
      return build_va_arg_indirect_ref (addr);
4578
    }
4579
 
4580
  /* Aggregate arguments with alignment larger than 8 bytes start at
4581
     the next even boundary.  Integer and floating point arguments
4582
     do so if they are larger than 8 bytes, whether or not they are
4583
     also aligned larger than 8 bytes.  */
4584
  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4585
      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4586
    {
4587
      tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4588
                       size_int (2 * UNITS_PER_WORD - 1));
4589
      t = fold_convert (sizetype, t);
4590
      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4591
                  size_int (-2 * UNITS_PER_WORD));
4592
      t = fold_convert (TREE_TYPE (valist), t);
4593
      gimplify_assign (unshare_expr (valist), t, pre_p);
4594
    }
4595
 
4596
  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4597
}
4598
 
4599
/* Return 1 if function return value returned in memory.  Return 0 if it is
4600
   in a register.  */
4601
 
4602
static bool
4603
ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4604
{
4605
  enum machine_mode mode;
4606
  enum machine_mode hfa_mode;
4607
  HOST_WIDE_INT byte_size;
4608
 
4609
  mode = TYPE_MODE (valtype);
4610
  byte_size = GET_MODE_SIZE (mode);
4611
  if (mode == BLKmode)
4612
    {
4613
      byte_size = int_size_in_bytes (valtype);
4614
      if (byte_size < 0)
4615
        return true;
4616
    }
4617
 
4618
  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4619
 
4620
  hfa_mode = hfa_element_mode (valtype, 0);
4621
  if (hfa_mode != VOIDmode)
4622
    {
4623
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4624
 
4625
      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4626
        return true;
4627
      else
4628
        return false;
4629
    }
4630
  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4631
    return true;
4632
  else
4633
    return false;
4634
}
4635
 
4636
/* Return rtx for register that holds the function return value.  */
4637
 
4638
rtx
4639
ia64_function_value (const_tree valtype, const_tree func)
4640
{
4641
  enum machine_mode mode;
4642
  enum machine_mode hfa_mode;
4643
  int unsignedp;
4644
 
4645
  mode = TYPE_MODE (valtype);
4646
  hfa_mode = hfa_element_mode (valtype, 0);
4647
 
4648
  if (hfa_mode != VOIDmode)
4649
    {
4650
      rtx loc[8];
4651
      int i;
4652
      int hfa_size;
4653
      int byte_size;
4654
      int offset;
4655
 
4656
      hfa_size = GET_MODE_SIZE (hfa_mode);
4657
      byte_size = ((mode == BLKmode)
4658
                   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4659
      offset = 0;
4660
      for (i = 0; offset < byte_size; i++)
4661
        {
4662
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4663
                                      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4664
                                      GEN_INT (offset));
4665
          offset += hfa_size;
4666
        }
4667
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4668
    }
4669
  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4670
    return gen_rtx_REG (mode, FR_ARG_FIRST);
4671
  else
4672
    {
4673
      bool need_parallel = false;
4674
 
4675
      /* In big-endian mode, we need to manage the layout of aggregates
4676
         in the registers so that we get the bits properly aligned in
4677
         the highpart of the registers.  */
4678
      if (BYTES_BIG_ENDIAN
4679
          && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4680
        need_parallel = true;
4681
 
4682
      /* Something like struct S { long double x; char a[0] } is not an
4683
         HFA structure, and therefore doesn't go in fp registers.  But
4684
         the middle-end will give it XFmode anyway, and XFmode values
4685
         don't normally fit in integer registers.  So we need to smuggle
4686
         the value inside a parallel.  */
4687
      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4688
        need_parallel = true;
4689
 
4690
      if (need_parallel)
4691
        {
4692
          rtx loc[8];
4693
          int offset;
4694
          int bytesize;
4695
          int i;
4696
 
4697
          offset = 0;
4698
          bytesize = int_size_in_bytes (valtype);
4699
          /* An empty PARALLEL is invalid here, but the return value
4700
             doesn't matter for empty structs.  */
4701
          if (bytesize == 0)
4702
            return gen_rtx_REG (mode, GR_RET_FIRST);
4703
          for (i = 0; offset < bytesize; i++)
4704
            {
4705
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4706
                                          gen_rtx_REG (DImode,
4707
                                                       GR_RET_FIRST + i),
4708
                                          GEN_INT (offset));
4709
              offset += UNITS_PER_WORD;
4710
            }
4711
          return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4712
        }
4713
 
4714
      mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4715
                                         func ? TREE_TYPE (func) : NULL_TREE,
4716
                                         true);
4717
 
4718
      return gen_rtx_REG (mode, GR_RET_FIRST);
4719
    }
4720
}
4721
 
4722
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4723
   We need to emit DTP-relative relocations.  */
4724
 
4725
static void
4726
ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4727
{
4728
  gcc_assert (size == 4 || size == 8);
4729
  if (size == 4)
4730
    fputs ("\tdata4.ua\t@dtprel(", file);
4731
  else
4732
    fputs ("\tdata8.ua\t@dtprel(", file);
4733
  output_addr_const (file, x);
4734
  fputs (")", file);
4735
}
4736
 
4737
/* Print a memory address as an operand to reference that memory location.  */
4738
 
4739
/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4740
   also call this from ia64_print_operand for memory addresses.  */
4741
 
4742
void
4743
ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4744
                            rtx address ATTRIBUTE_UNUSED)
4745
{
4746
}
4747
 
4748
/* Print an operand to an assembler instruction.
4749
   C    Swap and print a comparison operator.
4750
   D    Print an FP comparison operator.
4751
   E    Print 32 - constant, for SImode shifts as extract.
4752
   e    Print 64 - constant, for DImode rotates.
4753
   F    A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4754
        a floating point register emitted normally.
4755
   G    A floating point constant.
4756
   I    Invert a predicate register by adding 1.
4757
   J    Select the proper predicate register for a condition.
4758
   j    Select the inverse predicate register for a condition.
4759
   O    Append .acq for volatile load.
4760
   P    Postincrement of a MEM.
4761
   Q    Append .rel for volatile store.
4762
   R    Print .s .d or nothing for a single, double or no truncation.
4763
   S    Shift amount for shladd instruction.
4764
   T    Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4765
        for Intel assembler.
4766
   U    Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4767
        for Intel assembler.
4768
   X    A pair of floating point registers.
4769
   r    Print register name, or constant 0 as r0.  HP compatibility for
4770
        Linux kernel.
4771
   v    Print vector constant value as an 8-byte integer value.  */
4772
 
4773
void
4774
ia64_print_operand (FILE * file, rtx x, int code)
4775
{
4776
  const char *str;
4777
 
4778
  switch (code)
4779
    {
4780
    case 0:
4781
      /* Handled below.  */
4782
      break;
4783
 
4784
    case 'C':
4785
      {
4786
        enum rtx_code c = swap_condition (GET_CODE (x));
4787
        fputs (GET_RTX_NAME (c), file);
4788
        return;
4789
      }
4790
 
4791
    case 'D':
4792
      switch (GET_CODE (x))
4793
        {
4794
        case NE:
4795
          str = "neq";
4796
          break;
4797
        case UNORDERED:
4798
          str = "unord";
4799
          break;
4800
        case ORDERED:
4801
          str = "ord";
4802
          break;
4803
        case UNLT:
4804
          str = "nge";
4805
          break;
4806
        case UNLE:
4807
          str = "ngt";
4808
          break;
4809
        case UNGT:
4810
          str = "nle";
4811
          break;
4812
        case UNGE:
4813
          str = "nlt";
4814
          break;
4815
        default:
4816
          str = GET_RTX_NAME (GET_CODE (x));
4817
          break;
4818
        }
4819
      fputs (str, file);
4820
      return;
4821
 
4822
    case 'E':
4823
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4824
      return;
4825
 
4826
    case 'e':
4827
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4828
      return;
4829
 
4830
    case 'F':
4831
      if (x == CONST0_RTX (GET_MODE (x)))
4832
        str = reg_names [FR_REG (0)];
4833
      else if (x == CONST1_RTX (GET_MODE (x)))
4834
        str = reg_names [FR_REG (1)];
4835
      else
4836
        {
4837
          gcc_assert (GET_CODE (x) == REG);
4838
          str = reg_names [REGNO (x)];
4839
        }
4840
      fputs (str, file);
4841
      return;
4842
 
4843
    case 'G':
4844
      {
4845
        long val[4];
4846
        REAL_VALUE_TYPE rv;
4847
        REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4848
        real_to_target (val, &rv, GET_MODE (x));
4849
        if (GET_MODE (x) == SFmode)
4850
          fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4851
        else if (GET_MODE (x) == DFmode)
4852
          fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4853
                                          & 0xffffffff,
4854
                                         (WORDS_BIG_ENDIAN ? val[1] : val[0])
4855
                                          & 0xffffffff);
4856
        else
4857
          output_operand_lossage ("invalid %%G mode");
4858
      }
4859
      return;
4860
 
4861
    case 'I':
4862
      fputs (reg_names [REGNO (x) + 1], file);
4863
      return;
4864
 
4865
    case 'J':
4866
    case 'j':
4867
      {
4868
        unsigned int regno = REGNO (XEXP (x, 0));
4869
        if (GET_CODE (x) == EQ)
4870
          regno += 1;
4871
        if (code == 'j')
4872
          regno ^= 1;
4873
        fputs (reg_names [regno], file);
4874
      }
4875
      return;
4876
 
4877
    case 'O':
4878
      if (MEM_VOLATILE_P (x))
4879
        fputs(".acq", file);
4880
      return;
4881
 
4882
    case 'P':
4883
      {
4884
        HOST_WIDE_INT value;
4885
 
4886
        switch (GET_CODE (XEXP (x, 0)))
4887
          {
4888
          default:
4889
            return;
4890
 
4891
          case POST_MODIFY:
4892
            x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4893
            if (GET_CODE (x) == CONST_INT)
4894
              value = INTVAL (x);
4895
            else
4896
              {
4897
                gcc_assert (GET_CODE (x) == REG);
4898
                fprintf (file, ", %s", reg_names[REGNO (x)]);
4899
                return;
4900
              }
4901
            break;
4902
 
4903
          case POST_INC:
4904
            value = GET_MODE_SIZE (GET_MODE (x));
4905
            break;
4906
 
4907
          case POST_DEC:
4908
            value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4909
            break;
4910
          }
4911
 
4912
        fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4913
        return;
4914
      }
4915
 
4916
    case 'Q':
4917
      if (MEM_VOLATILE_P (x))
4918
        fputs(".rel", file);
4919
      return;
4920
 
4921
    case 'R':
4922
      if (x == CONST0_RTX (GET_MODE (x)))
4923
        fputs(".s", file);
4924
      else if (x == CONST1_RTX (GET_MODE (x)))
4925
        fputs(".d", file);
4926
      else if (x == CONST2_RTX (GET_MODE (x)))
4927
        ;
4928
      else
4929
        output_operand_lossage ("invalid %%R value");
4930
      return;
4931
 
4932
    case 'S':
4933
      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4934
      return;
4935
 
4936
    case 'T':
4937
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4938
        {
4939
          fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4940
          return;
4941
        }
4942
      break;
4943
 
4944
    case 'U':
4945
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4946
        {
4947
          const char *prefix = "0x";
4948
          if (INTVAL (x) & 0x80000000)
4949
            {
4950
              fprintf (file, "0xffffffff");
4951
              prefix = "";
4952
            }
4953
          fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4954
          return;
4955
        }
4956
      break;
4957
 
4958
    case 'X':
4959
      {
4960
        unsigned int regno = REGNO (x);
4961
        fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4962
      }
4963
      return;
4964
 
4965
    case 'r':
4966
      /* If this operand is the constant zero, write it as register zero.
4967
         Any register, zero, or CONST_INT value is OK here.  */
4968
      if (GET_CODE (x) == REG)
4969
        fputs (reg_names[REGNO (x)], file);
4970
      else if (x == CONST0_RTX (GET_MODE (x)))
4971
        fputs ("r0", file);
4972
      else if (GET_CODE (x) == CONST_INT)
4973
        output_addr_const (file, x);
4974
      else
4975
        output_operand_lossage ("invalid %%r value");
4976
      return;
4977
 
4978
    case 'v':
4979
      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4980
      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4981
      break;
4982
 
4983
    case '+':
4984
      {
4985
        const char *which;
4986
 
4987
        /* For conditional branches, returns or calls, substitute
4988
           sptk, dptk, dpnt, or spnt for %s.  */
4989
        x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4990
        if (x)
4991
          {
4992
            int pred_val = INTVAL (XEXP (x, 0));
4993
 
4994
            /* Guess top and bottom 10% statically predicted.  */
4995
            if (pred_val < REG_BR_PROB_BASE / 50
4996
                && br_prob_note_reliable_p (x))
4997
              which = ".spnt";
4998
            else if (pred_val < REG_BR_PROB_BASE / 2)
4999
              which = ".dpnt";
5000
            else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5001
                     || !br_prob_note_reliable_p (x))
5002
              which = ".dptk";
5003
            else
5004
              which = ".sptk";
5005
          }
5006
        else if (GET_CODE (current_output_insn) == CALL_INSN)
5007
          which = ".sptk";
5008
        else
5009
          which = ".dptk";
5010
 
5011
        fputs (which, file);
5012
        return;
5013
      }
5014
 
5015
    case ',':
5016
      x = current_insn_predicate;
5017
      if (x)
5018
        {
5019
          unsigned int regno = REGNO (XEXP (x, 0));
5020
          if (GET_CODE (x) == EQ)
5021
            regno += 1;
5022
          fprintf (file, "(%s) ", reg_names [regno]);
5023
        }
5024
      return;
5025
 
5026
    default:
5027
      output_operand_lossage ("ia64_print_operand: unknown code");
5028
      return;
5029
    }
5030
 
5031
  switch (GET_CODE (x))
5032
    {
5033
      /* This happens for the spill/restore instructions.  */
5034
    case POST_INC:
5035
    case POST_DEC:
5036
    case POST_MODIFY:
5037
      x = XEXP (x, 0);
5038
      /* ... fall through ...  */
5039
 
5040
    case REG:
5041
      fputs (reg_names [REGNO (x)], file);
5042
      break;
5043
 
5044
    case MEM:
5045
      {
5046
        rtx addr = XEXP (x, 0);
5047
        if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5048
          addr = XEXP (addr, 0);
5049
        fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5050
        break;
5051
      }
5052
 
5053
    default:
5054
      output_addr_const (file, x);
5055
      break;
5056
    }
5057
 
5058
  return;
5059
}
5060
 
5061
/* Compute a (partial) cost for rtx X.  Return true if the complete
5062
   cost has been computed, and false if subexpressions should be
5063
   scanned.  In either case, *TOTAL contains the cost result.  */
5064
/* ??? This is incomplete.  */
5065
 
5066
static bool
5067
ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5068
                bool speed ATTRIBUTE_UNUSED)
5069
{
5070
  switch (code)
5071
    {
5072
    case CONST_INT:
5073
      switch (outer_code)
5074
        {
5075
        case SET:
5076
          *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5077
          return true;
5078
        case PLUS:
5079
          if (satisfies_constraint_I (x))
5080
            *total = 0;
5081
          else if (satisfies_constraint_J (x))
5082
            *total = 1;
5083
          else
5084
            *total = COSTS_N_INSNS (1);
5085
          return true;
5086
        default:
5087
          if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5088
            *total = 0;
5089
          else
5090
            *total = COSTS_N_INSNS (1);
5091
          return true;
5092
        }
5093
 
5094
    case CONST_DOUBLE:
5095
      *total = COSTS_N_INSNS (1);
5096
      return true;
5097
 
5098
    case CONST:
5099
    case SYMBOL_REF:
5100
    case LABEL_REF:
5101
      *total = COSTS_N_INSNS (3);
5102
      return true;
5103
 
5104
    case MULT:
5105
      /* For multiplies wider than HImode, we have to go to the FPU,
5106
         which normally involves copies.  Plus there's the latency
5107
         of the multiply itself, and the latency of the instructions to
5108
         transfer integer regs to FP regs.  */
5109
      /* ??? Check for FP mode.  */
5110
      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5111
        *total = COSTS_N_INSNS (10);
5112
      else
5113
        *total = COSTS_N_INSNS (2);
5114
      return true;
5115
 
5116
    case PLUS:
5117
    case MINUS:
5118
    case ASHIFT:
5119
    case ASHIFTRT:
5120
    case LSHIFTRT:
5121
      *total = COSTS_N_INSNS (1);
5122
      return true;
5123
 
5124
    case DIV:
5125
    case UDIV:
5126
    case MOD:
5127
    case UMOD:
5128
      /* We make divide expensive, so that divide-by-constant will be
5129
         optimized to a multiply.  */
5130
      *total = COSTS_N_INSNS (60);
5131
      return true;
5132
 
5133
    default:
5134
      return false;
5135
    }
5136
}
5137
 
5138
/* Calculate the cost of moving data from a register in class FROM to
5139
   one in class TO, using MODE.  */
5140
 
5141
int
5142
ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5143
                         enum reg_class to)
5144
{
5145
  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5146
  if (to == ADDL_REGS)
5147
    to = GR_REGS;
5148
  if (from == ADDL_REGS)
5149
    from = GR_REGS;
5150
 
5151
  /* All costs are symmetric, so reduce cases by putting the
5152
     lower number class as the destination.  */
5153
  if (from < to)
5154
    {
5155
      enum reg_class tmp = to;
5156
      to = from, from = tmp;
5157
    }
5158
 
5159
  /* Moving from FR<->GR in XFmode must be more expensive than 2,
5160
     so that we get secondary memory reloads.  Between FR_REGS,
5161
     we have to make this at least as expensive as MEMORY_MOVE_COST
5162
     to avoid spectacularly poor register class preferencing.  */
5163
  if (mode == XFmode || mode == RFmode)
5164
    {
5165
      if (to != GR_REGS || from != GR_REGS)
5166
        return MEMORY_MOVE_COST (mode, to, 0);
5167
      else
5168
        return 3;
5169
    }
5170
 
5171
  switch (to)
5172
    {
5173
    case PR_REGS:
5174
      /* Moving between PR registers takes two insns.  */
5175
      if (from == PR_REGS)
5176
        return 3;
5177
      /* Moving between PR and anything but GR is impossible.  */
5178
      if (from != GR_REGS)
5179
        return MEMORY_MOVE_COST (mode, to, 0);
5180
      break;
5181
 
5182
    case BR_REGS:
5183
      /* Moving between BR and anything but GR is impossible.  */
5184
      if (from != GR_REGS && from != GR_AND_BR_REGS)
5185
        return MEMORY_MOVE_COST (mode, to, 0);
5186
      break;
5187
 
5188
    case AR_I_REGS:
5189
    case AR_M_REGS:
5190
      /* Moving between AR and anything but GR is impossible.  */
5191
      if (from != GR_REGS)
5192
        return MEMORY_MOVE_COST (mode, to, 0);
5193
      break;
5194
 
5195
    case GR_REGS:
5196
    case FR_REGS:
5197
    case FP_REGS:
5198
    case GR_AND_FR_REGS:
5199
    case GR_AND_BR_REGS:
5200
    case ALL_REGS:
5201
      break;
5202
 
5203
    default:
5204
      gcc_unreachable ();
5205
    }
5206
 
5207
  return 2;
5208
}
5209
 
5210
/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on RCLASS
5211
   to use when copying X into that class.  */
5212
 
5213
enum reg_class
5214
ia64_preferred_reload_class (rtx x, enum reg_class rclass)
5215
{
5216
  switch (rclass)
5217
    {
5218
    case FR_REGS:
5219
    case FP_REGS:
5220
      /* Don't allow volatile mem reloads into floating point registers.
5221
         This is defined to force reload to choose the r/m case instead
5222
         of the f/f case when reloading (set (reg fX) (mem/v)).  */
5223
      if (MEM_P (x) && MEM_VOLATILE_P (x))
5224
        return NO_REGS;
5225
 
5226
      /* Force all unrecognized constants into the constant pool.  */
5227
      if (CONSTANT_P (x))
5228
        return NO_REGS;
5229
      break;
5230
 
5231
    case AR_M_REGS:
5232
    case AR_I_REGS:
5233
      if (!OBJECT_P (x))
5234
        return NO_REGS;
5235
      break;
5236
 
5237
    default:
5238
      break;
5239
    }
5240
 
5241
  return rclass;
5242
}
5243
 
5244
/* This function returns the register class required for a secondary
5245
   register when copying between one of the registers in RCLASS, and X,
5246
   using MODE.  A return value of NO_REGS means that no secondary register
5247
   is required.  */
5248
 
5249
enum reg_class
5250
ia64_secondary_reload_class (enum reg_class rclass,
5251
                             enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5252
{
5253
  int regno = -1;
5254
 
5255
  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5256
    regno = true_regnum (x);
5257
 
5258
  switch (rclass)
5259
    {
5260
    case BR_REGS:
5261
    case AR_M_REGS:
5262
    case AR_I_REGS:
5263
      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5264
         interaction.  We end up with two pseudos with overlapping lifetimes
5265
         both of which are equiv to the same constant, and both which need
5266
         to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5267
         changes depending on the path length, which means the qty_first_reg
5268
         check in make_regs_eqv can give different answers at different times.
5269
         At some point I'll probably need a reload_indi pattern to handle
5270
         this.
5271
 
5272
         We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5273
         wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5274
         non-general registers for good measure.  */
5275
      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5276
        return GR_REGS;
5277
 
5278
      /* This is needed if a pseudo used as a call_operand gets spilled to a
5279
         stack slot.  */
5280
      if (GET_CODE (x) == MEM)
5281
        return GR_REGS;
5282
      break;
5283
 
5284
    case FR_REGS:
5285
    case FP_REGS:
5286
      /* Need to go through general registers to get to other class regs.  */
5287
      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5288
        return GR_REGS;
5289
 
5290
      /* This can happen when a paradoxical subreg is an operand to the
5291
         muldi3 pattern.  */
5292
      /* ??? This shouldn't be necessary after instruction scheduling is
5293
         enabled, because paradoxical subregs are not accepted by
5294
         register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5295
         stop the paradoxical subreg stupidity in the *_operand functions
5296
         in recog.c.  */
5297
      if (GET_CODE (x) == MEM
5298
          && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5299
              || GET_MODE (x) == QImode))
5300
        return GR_REGS;
5301
 
5302
      /* This can happen because of the ior/and/etc patterns that accept FP
5303
         registers as operands.  If the third operand is a constant, then it
5304
         needs to be reloaded into a FP register.  */
5305
      if (GET_CODE (x) == CONST_INT)
5306
        return GR_REGS;
5307
 
5308
      /* This can happen because of register elimination in a muldi3 insn.
5309
         E.g. `26107 * (unsigned long)&u'.  */
5310
      if (GET_CODE (x) == PLUS)
5311
        return GR_REGS;
5312
      break;
5313
 
5314
    case PR_REGS:
5315
      /* ??? This happens if we cse/gcse a BImode value across a call,
5316
         and the function has a nonlocal goto.  This is because global
5317
         does not allocate call crossing pseudos to hard registers when
5318
         crtl->has_nonlocal_goto is true.  This is relatively
5319
         common for C++ programs that use exceptions.  To reproduce,
5320
         return NO_REGS and compile libstdc++.  */
5321
      if (GET_CODE (x) == MEM)
5322
        return GR_REGS;
5323
 
5324
      /* This can happen when we take a BImode subreg of a DImode value,
5325
         and that DImode value winds up in some non-GR register.  */
5326
      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5327
        return GR_REGS;
5328
      break;
5329
 
5330
    default:
5331
      break;
5332
    }
5333
 
5334
  return NO_REGS;
5335
}
5336
 
5337
 
5338
/* Implement targetm.unspec_may_trap_p hook.  */
5339
static int
5340
ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5341
{
5342
  if (GET_CODE (x) == UNSPEC)
5343
    {
5344
      switch (XINT (x, 1))
5345
        {
5346
        case UNSPEC_LDA:
5347
        case UNSPEC_LDS:
5348
        case UNSPEC_LDSA:
5349
        case UNSPEC_LDCCLR:
5350
        case UNSPEC_CHKACLR:
5351
        case UNSPEC_CHKS:
5352
          /* These unspecs are just wrappers.  */
5353
          return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5354
        }
5355
    }
5356
 
5357
  return default_unspec_may_trap_p (x, flags);
5358
}
5359
 
5360
 
5361
/* Parse the -mfixed-range= option string.  */
5362
 
5363
static void
5364
fix_range (const char *const_str)
5365
{
5366
  int i, first, last;
5367
  char *str, *dash, *comma;
5368
 
5369
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5370
     REG2 are either register names or register numbers.  The effect
5371
     of this option is to mark the registers in the range from REG1 to
5372
     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5373
     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5374
 
5375
  i = strlen (const_str);
5376
  str = (char *) alloca (i + 1);
5377
  memcpy (str, const_str, i + 1);
5378
 
5379
  while (1)
5380
    {
5381
      dash = strchr (str, '-');
5382
      if (!dash)
5383
        {
5384
          warning (0, "value of -mfixed-range must have form REG1-REG2");
5385
          return;
5386
        }
5387
      *dash = '\0';
5388
 
5389
      comma = strchr (dash + 1, ',');
5390
      if (comma)
5391
        *comma = '\0';
5392
 
5393
      first = decode_reg_name (str);
5394
      if (first < 0)
5395
        {
5396
          warning (0, "unknown register name: %s", str);
5397
          return;
5398
        }
5399
 
5400
      last = decode_reg_name (dash + 1);
5401
      if (last < 0)
5402
        {
5403
          warning (0, "unknown register name: %s", dash + 1);
5404
          return;
5405
        }
5406
 
5407
      *dash = '-';
5408
 
5409
      if (first > last)
5410
        {
5411
          warning (0, "%s-%s is an empty range", str, dash + 1);
5412
          return;
5413
        }
5414
 
5415
      for (i = first; i <= last; ++i)
5416
        fixed_regs[i] = call_used_regs[i] = 1;
5417
 
5418
      if (!comma)
5419
        break;
5420
 
5421
      *comma = ',';
5422
      str = comma + 1;
5423
    }
5424
}
5425
 
5426
/* Implement TARGET_HANDLE_OPTION.  */
5427
 
5428
static bool
5429
ia64_handle_option (size_t code, const char *arg, int value)
5430
{
5431
  switch (code)
5432
    {
5433
    case OPT_mfixed_range_:
5434
      fix_range (arg);
5435
      return true;
5436
 
5437
    case OPT_mtls_size_:
5438
      if (value != 14 && value != 22 && value != 64)
5439
        error ("bad value %<%s%> for -mtls-size= switch", arg);
5440
      return true;
5441
 
5442
    case OPT_mtune_:
5443
      {
5444
        static struct pta
5445
          {
5446
            const char *name;           /* processor name or nickname.  */
5447
            enum processor_type processor;
5448
          }
5449
        const processor_alias_table[] =
5450
          {
5451
            {"itanium2", PROCESSOR_ITANIUM2},
5452
            {"mckinley", PROCESSOR_ITANIUM2},
5453
          };
5454
        int const pta_size = ARRAY_SIZE (processor_alias_table);
5455
        int i;
5456
 
5457
        for (i = 0; i < pta_size; i++)
5458
          if (!strcmp (arg, processor_alias_table[i].name))
5459
            {
5460
              ia64_tune = processor_alias_table[i].processor;
5461
              break;
5462
            }
5463
        if (i == pta_size)
5464
          error ("bad value %<%s%> for -mtune= switch", arg);
5465
        return true;
5466
      }
5467
 
5468
    default:
5469
      return true;
5470
    }
5471
}
5472
 
5473
/* Implement OVERRIDE_OPTIONS.  */
5474
 
5475
void
5476
ia64_override_options (void)
5477
{
5478
  if (TARGET_AUTO_PIC)
5479
    target_flags |= MASK_CONST_GP;
5480
 
5481
  /* Numerous experiment shows that IRA based loop pressure
5482
     calculation works better for RTL loop invariant motion on targets
5483
     with enough (>= 32) registers.  It is an expensive optimization.
5484
     So it is on only for peak performance.  */
5485
  if (optimize >= 3)
5486
    flag_ira_loop_pressure = 1;
5487
 
5488
 
5489
  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5490
 
5491
  init_machine_status = ia64_init_machine_status;
5492
 
5493
  if (align_functions <= 0)
5494
    align_functions = 64;
5495
  if (align_loops <= 0)
5496
    align_loops = 32;
5497
  if (TARGET_ABI_OPEN_VMS)
5498
    flag_no_common = 1;
5499
 
5500
  ia64_override_options_after_change();
5501
}
5502
 
5503
/* Implement targetm.override_options_after_change.  */
5504
 
5505
static void
5506
ia64_override_options_after_change (void)
5507
{
5508
  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5509
  flag_schedule_insns_after_reload = 0;
5510
 
5511
  if (optimize >= 3
5512
      && ! sel_sched_switch_set)
5513
    {
5514
      flag_selective_scheduling2 = 1;
5515
      flag_sel_sched_pipelining = 1;
5516
    }
5517
  if (mflag_sched_control_spec == 2)
5518
    {
5519
      /* Control speculation is on by default for the selective scheduler,
5520
         but not for the Haifa scheduler.  */
5521
      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5522
    }
5523
  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5524
    {
5525
      /* FIXME: remove this when we'd implement breaking autoinsns as
5526
         a transformation.  */
5527
      flag_auto_inc_dec = 0;
5528
    }
5529
}
5530
 
5531
/* Initialize the record of emitted frame related registers.  */
5532
 
5533
void ia64_init_expanders (void)
5534
{
5535
  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5536
}
5537
 
5538
static struct machine_function *
5539
ia64_init_machine_status (void)
5540
{
5541
  return GGC_CNEW (struct machine_function);
5542
}
5543
 
5544
static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5545
static enum attr_type ia64_safe_type (rtx);
5546
 
5547
static enum attr_itanium_class
5548
ia64_safe_itanium_class (rtx insn)
5549
{
5550
  if (recog_memoized (insn) >= 0)
5551
    return get_attr_itanium_class (insn);
5552
  else if (DEBUG_INSN_P (insn))
5553
    return ITANIUM_CLASS_IGNORE;
5554
  else
5555
    return ITANIUM_CLASS_UNKNOWN;
5556
}
5557
 
5558
static enum attr_type
5559
ia64_safe_type (rtx insn)
5560
{
5561
  if (recog_memoized (insn) >= 0)
5562
    return get_attr_type (insn);
5563
  else
5564
    return TYPE_UNKNOWN;
5565
}
5566
 
5567
/* The following collection of routines emit instruction group stop bits as
5568
   necessary to avoid dependencies.  */
5569
 
5570
/* Need to track some additional registers as far as serialization is
5571
   concerned so we can properly handle br.call and br.ret.  We could
5572
   make these registers visible to gcc, but since these registers are
5573
   never explicitly used in gcc generated code, it seems wasteful to
5574
   do so (plus it would make the call and return patterns needlessly
5575
   complex).  */
5576
#define REG_RP          (BR_REG (0))
5577
#define REG_AR_CFM      (FIRST_PSEUDO_REGISTER + 1)
5578
/* This is used for volatile asms which may require a stop bit immediately
5579
   before and after them.  */
5580
#define REG_VOLATILE    (FIRST_PSEUDO_REGISTER + 2)
5581
#define AR_UNAT_BIT_0   (FIRST_PSEUDO_REGISTER + 3)
5582
#define NUM_REGS        (AR_UNAT_BIT_0 + 64)
5583
 
5584
/* For each register, we keep track of how it has been written in the
5585
   current instruction group.
5586
 
5587
   If a register is written unconditionally (no qualifying predicate),
5588
   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5589
 
5590
   If a register is written if its qualifying predicate P is true, we
5591
   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5592
   may be written again by the complement of P (P^1) and when this happens,
5593
   WRITE_COUNT gets set to 2.
5594
 
5595
   The result of this is that whenever an insn attempts to write a register
5596
   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5597
 
5598
   If a predicate register is written by a floating-point insn, we set
5599
   WRITTEN_BY_FP to true.
5600
 
5601
   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5602
   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5603
 
5604
#if GCC_VERSION >= 4000
5605
#define RWS_FIELD_TYPE __extension__ unsigned short
5606
#else
5607
#define RWS_FIELD_TYPE unsigned int
5608
#endif
5609
struct reg_write_state
5610
{
5611
  RWS_FIELD_TYPE write_count : 2;
5612
  RWS_FIELD_TYPE first_pred : 10;
5613
  RWS_FIELD_TYPE written_by_fp : 1;
5614
  RWS_FIELD_TYPE written_by_and : 1;
5615
  RWS_FIELD_TYPE written_by_or : 1;
5616
};
5617
 
5618
/* Cumulative info for the current instruction group.  */
5619
struct reg_write_state rws_sum[NUM_REGS];
5620
#ifdef ENABLE_CHECKING
5621
/* Bitmap whether a register has been written in the current insn.  */
5622
HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5623
                           / HOST_BITS_PER_WIDEST_FAST_INT];
5624
 
5625
static inline void
5626
rws_insn_set (int regno)
5627
{
5628
  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5629
  SET_HARD_REG_BIT (rws_insn, regno);
5630
}
5631
 
5632
static inline int
5633
rws_insn_test (int regno)
5634
{
5635
  return TEST_HARD_REG_BIT (rws_insn, regno);
5636
}
5637
#else
5638
/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
5639
unsigned char rws_insn[2];
5640
 
5641
static inline void
5642
rws_insn_set (int regno)
5643
{
5644
  if (regno == REG_AR_CFM)
5645
    rws_insn[0] = 1;
5646
  else if (regno == REG_VOLATILE)
5647
    rws_insn[1] = 1;
5648
}
5649
 
5650
static inline int
5651
rws_insn_test (int regno)
5652
{
5653
  if (regno == REG_AR_CFM)
5654
    return rws_insn[0];
5655
  if (regno == REG_VOLATILE)
5656
    return rws_insn[1];
5657
  return 0;
5658
}
5659
#endif
5660
 
5661
/* Indicates whether this is the first instruction after a stop bit,
5662
   in which case we don't need another stop bit.  Without this,
5663
   ia64_variable_issue will die when scheduling an alloc.  */
5664
static int first_instruction;
5665
 
5666
/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5667
   RTL for one instruction.  */
5668
struct reg_flags
5669
{
5670
  unsigned int is_write : 1;    /* Is register being written?  */
5671
  unsigned int is_fp : 1;       /* Is register used as part of an fp op?  */
5672
  unsigned int is_branch : 1;   /* Is register used as part of a branch?  */
5673
  unsigned int is_and : 1;      /* Is register used as part of and.orcm?  */
5674
  unsigned int is_or : 1;       /* Is register used as part of or.andcm?  */
5675
  unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
5676
};
5677
 
5678
static void rws_update (int, struct reg_flags, int);
5679
static int rws_access_regno (int, struct reg_flags, int);
5680
static int rws_access_reg (rtx, struct reg_flags, int);
5681
static void update_set_flags (rtx, struct reg_flags *);
5682
static int set_src_needs_barrier (rtx, struct reg_flags, int);
5683
static int rtx_needs_barrier (rtx, struct reg_flags, int);
5684
static void init_insn_group_barriers (void);
5685
static int group_barrier_needed (rtx);
5686
static int safe_group_barrier_needed (rtx);
5687
static int in_safe_group_barrier;
5688
 
5689
/* Update *RWS for REGNO, which is being written by the current instruction,
5690
   with predicate PRED, and associated register flags in FLAGS.  */
5691
 
5692
static void
5693
rws_update (int regno, struct reg_flags flags, int pred)
5694
{
5695
  if (pred)
5696
    rws_sum[regno].write_count++;
5697
  else
5698
    rws_sum[regno].write_count = 2;
5699
  rws_sum[regno].written_by_fp |= flags.is_fp;
5700
  /* ??? Not tracking and/or across differing predicates.  */
5701
  rws_sum[regno].written_by_and = flags.is_and;
5702
  rws_sum[regno].written_by_or = flags.is_or;
5703
  rws_sum[regno].first_pred = pred;
5704
}
5705
 
5706
/* Handle an access to register REGNO of type FLAGS using predicate register
5707
   PRED.  Update rws_sum array.  Return 1 if this access creates
5708
   a dependency with an earlier instruction in the same group.  */
5709
 
5710
static int
5711
rws_access_regno (int regno, struct reg_flags flags, int pred)
5712
{
5713
  int need_barrier = 0;
5714
 
5715
  gcc_assert (regno < NUM_REGS);
5716
 
5717
  if (! PR_REGNO_P (regno))
5718
    flags.is_and = flags.is_or = 0;
5719
 
5720
  if (flags.is_write)
5721
    {
5722
      int write_count;
5723
 
5724
      rws_insn_set (regno);
5725
      write_count = rws_sum[regno].write_count;
5726
 
5727
      switch (write_count)
5728
        {
5729
        case 0:
5730
          /* The register has not been written yet.  */
5731
          if (!in_safe_group_barrier)
5732
            rws_update (regno, flags, pred);
5733
          break;
5734
 
5735
        case 1:
5736
          /* The register has been written via a predicate.  If this is
5737
             not a complementary predicate, then we need a barrier.  */
5738
          /* ??? This assumes that P and P+1 are always complementary
5739
             predicates for P even.  */
5740
          if (flags.is_and && rws_sum[regno].written_by_and)
5741
            ;
5742
          else if (flags.is_or && rws_sum[regno].written_by_or)
5743
            ;
5744
          else if ((rws_sum[regno].first_pred ^ 1) != pred)
5745
            need_barrier = 1;
5746
          if (!in_safe_group_barrier)
5747
            rws_update (regno, flags, pred);
5748
          break;
5749
 
5750
        case 2:
5751
          /* The register has been unconditionally written already.  We
5752
             need a barrier.  */
5753
          if (flags.is_and && rws_sum[regno].written_by_and)
5754
            ;
5755
          else if (flags.is_or && rws_sum[regno].written_by_or)
5756
            ;
5757
          else
5758
            need_barrier = 1;
5759
          if (!in_safe_group_barrier)
5760
            {
5761
              rws_sum[regno].written_by_and = flags.is_and;
5762
              rws_sum[regno].written_by_or = flags.is_or;
5763
            }
5764
          break;
5765
 
5766
        default:
5767
          gcc_unreachable ();
5768
        }
5769
    }
5770
  else
5771
    {
5772
      if (flags.is_branch)
5773
        {
5774
          /* Branches have several RAW exceptions that allow to avoid
5775
             barriers.  */
5776
 
5777
          if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5778
            /* RAW dependencies on branch regs are permissible as long
5779
               as the writer is a non-branch instruction.  Since we
5780
               never generate code that uses a branch register written
5781
               by a branch instruction, handling this case is
5782
               easy.  */
5783
            return 0;
5784
 
5785
          if (REGNO_REG_CLASS (regno) == PR_REGS
5786
              && ! rws_sum[regno].written_by_fp)
5787
            /* The predicates of a branch are available within the
5788
               same insn group as long as the predicate was written by
5789
               something other than a floating-point instruction.  */
5790
            return 0;
5791
        }
5792
 
5793
      if (flags.is_and && rws_sum[regno].written_by_and)
5794
        return 0;
5795
      if (flags.is_or && rws_sum[regno].written_by_or)
5796
        return 0;
5797
 
5798
      switch (rws_sum[regno].write_count)
5799
        {
5800
        case 0:
5801
          /* The register has not been written yet.  */
5802
          break;
5803
 
5804
        case 1:
5805
          /* The register has been written via a predicate.  If this is
5806
             not a complementary predicate, then we need a barrier.  */
5807
          /* ??? This assumes that P and P+1 are always complementary
5808
             predicates for P even.  */
5809
          if ((rws_sum[regno].first_pred ^ 1) != pred)
5810
            need_barrier = 1;
5811
          break;
5812
 
5813
        case 2:
5814
          /* The register has been unconditionally written already.  We
5815
             need a barrier.  */
5816
          need_barrier = 1;
5817
          break;
5818
 
5819
        default:
5820
          gcc_unreachable ();
5821
        }
5822
    }
5823
 
5824
  return need_barrier;
5825
}
5826
 
5827
static int
5828
rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5829
{
5830
  int regno = REGNO (reg);
5831
  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5832
 
5833
  if (n == 1)
5834
    return rws_access_regno (regno, flags, pred);
5835
  else
5836
    {
5837
      int need_barrier = 0;
5838
      while (--n >= 0)
5839
        need_barrier |= rws_access_regno (regno + n, flags, pred);
5840
      return need_barrier;
5841
    }
5842
}
5843
 
5844
/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5845
   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5846
 
5847
static void
5848
update_set_flags (rtx x, struct reg_flags *pflags)
5849
{
5850
  rtx src = SET_SRC (x);
5851
 
5852
  switch (GET_CODE (src))
5853
    {
5854
    case CALL:
5855
      return;
5856
 
5857
    case IF_THEN_ELSE:
5858
      /* There are four cases here:
5859
         (1) The destination is (pc), in which case this is a branch,
5860
         nothing here applies.
5861
         (2) The destination is ar.lc, in which case this is a
5862
         doloop_end_internal,
5863
         (3) The destination is an fp register, in which case this is
5864
         an fselect instruction.
5865
         (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5866
         this is a check load.
5867
         In all cases, nothing we do in this function applies.  */
5868
      return;
5869
 
5870
    default:
5871
      if (COMPARISON_P (src)
5872
          && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5873
        /* Set pflags->is_fp to 1 so that we know we're dealing
5874
           with a floating point comparison when processing the
5875
           destination of the SET.  */
5876
        pflags->is_fp = 1;
5877
 
5878
      /* Discover if this is a parallel comparison.  We only handle
5879
         and.orcm and or.andcm at present, since we must retain a
5880
         strict inverse on the predicate pair.  */
5881
      else if (GET_CODE (src) == AND)
5882
        pflags->is_and = 1;
5883
      else if (GET_CODE (src) == IOR)
5884
        pflags->is_or = 1;
5885
 
5886
      break;
5887
    }
5888
}
5889
 
5890
/* Subroutine of rtx_needs_barrier; this function determines whether the
5891
   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5892
   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5893
   for this insn.  */
5894
 
5895
static int
5896
set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5897
{
5898
  int need_barrier = 0;
5899
  rtx dst;
5900
  rtx src = SET_SRC (x);
5901
 
5902
  if (GET_CODE (src) == CALL)
5903
    /* We don't need to worry about the result registers that
5904
       get written by subroutine call.  */
5905
    return rtx_needs_barrier (src, flags, pred);
5906
  else if (SET_DEST (x) == pc_rtx)
5907
    {
5908
      /* X is a conditional branch.  */
5909
      /* ??? This seems redundant, as the caller sets this bit for
5910
         all JUMP_INSNs.  */
5911
      if (!ia64_spec_check_src_p (src))
5912
        flags.is_branch = 1;
5913
      return rtx_needs_barrier (src, flags, pred);
5914
    }
5915
 
5916
  if (ia64_spec_check_src_p (src))
5917
    /* Avoid checking one register twice (in condition
5918
       and in 'then' section) for ldc pattern.  */
5919
    {
5920
      gcc_assert (REG_P (XEXP (src, 2)));
5921
      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5922
 
5923
      /* We process MEM below.  */
5924
      src = XEXP (src, 1);
5925
    }
5926
 
5927
  need_barrier |= rtx_needs_barrier (src, flags, pred);
5928
 
5929
  dst = SET_DEST (x);
5930
  if (GET_CODE (dst) == ZERO_EXTRACT)
5931
    {
5932
      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5933
      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5934
    }
5935
  return need_barrier;
5936
}
5937
 
5938
/* Handle an access to rtx X of type FLAGS using predicate register
5939
   PRED.  Return 1 if this access creates a dependency with an earlier
5940
   instruction in the same group.  */
5941
 
5942
static int
5943
rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5944
{
5945
  int i, j;
5946
  int is_complemented = 0;
5947
  int need_barrier = 0;
5948
  const char *format_ptr;
5949
  struct reg_flags new_flags;
5950
  rtx cond;
5951
 
5952
  if (! x)
5953
    return 0;
5954
 
5955
  new_flags = flags;
5956
 
5957
  switch (GET_CODE (x))
5958
    {
5959
    case SET:
5960
      update_set_flags (x, &new_flags);
5961
      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5962
      if (GET_CODE (SET_SRC (x)) != CALL)
5963
        {
5964
          new_flags.is_write = 1;
5965
          need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5966
        }
5967
      break;
5968
 
5969
    case CALL:
5970
      new_flags.is_write = 0;
5971
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5972
 
5973
      /* Avoid multiple register writes, in case this is a pattern with
5974
         multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5975
      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5976
        {
5977
          new_flags.is_write = 1;
5978
          need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5979
          need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5980
          need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5981
        }
5982
      break;
5983
 
5984
    case COND_EXEC:
5985
      /* X is a predicated instruction.  */
5986
 
5987
      cond = COND_EXEC_TEST (x);
5988
      gcc_assert (!pred);
5989
      need_barrier = rtx_needs_barrier (cond, flags, 0);
5990
 
5991
      if (GET_CODE (cond) == EQ)
5992
        is_complemented = 1;
5993
      cond = XEXP (cond, 0);
5994
      gcc_assert (GET_CODE (cond) == REG
5995
                  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5996
      pred = REGNO (cond);
5997
      if (is_complemented)
5998
        ++pred;
5999
 
6000
      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6001
      return need_barrier;
6002
 
6003
    case CLOBBER:
6004
    case USE:
6005
      /* Clobber & use are for earlier compiler-phases only.  */
6006
      break;
6007
 
6008
    case ASM_OPERANDS:
6009
    case ASM_INPUT:
6010
      /* We always emit stop bits for traditional asms.  We emit stop bits
6011
         for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6012
      if (GET_CODE (x) != ASM_OPERANDS
6013
          || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6014
        {
6015
          /* Avoid writing the register multiple times if we have multiple
6016
             asm outputs.  This avoids a failure in rws_access_reg.  */
6017
          if (! rws_insn_test (REG_VOLATILE))
6018
            {
6019
              new_flags.is_write = 1;
6020
              rws_access_regno (REG_VOLATILE, new_flags, pred);
6021
            }
6022
          return 1;
6023
        }
6024
 
6025
      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6026
         We cannot just fall through here since then we would be confused
6027
         by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6028
         traditional asms unlike their normal usage.  */
6029
 
6030
      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6031
        if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6032
          need_barrier = 1;
6033
      break;
6034
 
6035
    case PARALLEL:
6036
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6037
        {
6038
          rtx pat = XVECEXP (x, 0, i);
6039
          switch (GET_CODE (pat))
6040
            {
6041
            case SET:
6042
              update_set_flags (pat, &new_flags);
6043
              need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6044
              break;
6045
 
6046
            case USE:
6047
            case CALL:
6048
            case ASM_OPERANDS:
6049
              need_barrier |= rtx_needs_barrier (pat, flags, pred);
6050
              break;
6051
 
6052
            case CLOBBER:
6053
            case RETURN:
6054
              break;
6055
 
6056
            default:
6057
              gcc_unreachable ();
6058
            }
6059
        }
6060
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6061
        {
6062
          rtx pat = XVECEXP (x, 0, i);
6063
          if (GET_CODE (pat) == SET)
6064
            {
6065
              if (GET_CODE (SET_SRC (pat)) != CALL)
6066
                {
6067
                  new_flags.is_write = 1;
6068
                  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6069
                                                     pred);
6070
                }
6071
            }
6072
          else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6073
            need_barrier |= rtx_needs_barrier (pat, flags, pred);
6074
        }
6075
      break;
6076
 
6077
    case SUBREG:
6078
      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6079
      break;
6080
    case REG:
6081
      if (REGNO (x) == AR_UNAT_REGNUM)
6082
        {
6083
          for (i = 0; i < 64; ++i)
6084
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6085
        }
6086
      else
6087
        need_barrier = rws_access_reg (x, flags, pred);
6088
      break;
6089
 
6090
    case MEM:
6091
      /* Find the regs used in memory address computation.  */
6092
      new_flags.is_write = 0;
6093
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6094
      break;
6095
 
6096
    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6097
    case SYMBOL_REF:  case LABEL_REF:     case CONST:
6098
      break;
6099
 
6100
      /* Operators with side-effects.  */
6101
    case POST_INC:    case POST_DEC:
6102
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6103
 
6104
      new_flags.is_write = 0;
6105
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6106
      new_flags.is_write = 1;
6107
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6108
      break;
6109
 
6110
    case POST_MODIFY:
6111
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6112
 
6113
      new_flags.is_write = 0;
6114
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6115
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6116
      new_flags.is_write = 1;
6117
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6118
      break;
6119
 
6120
      /* Handle common unary and binary ops for efficiency.  */
6121
    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6122
    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6123
    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6124
    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6125
    case NE:       case EQ:      case GE:      case GT:        case LE:
6126
    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6127
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6128
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6129
      break;
6130
 
6131
    case NEG:      case NOT:            case SIGN_EXTEND:     case ZERO_EXTEND:
6132
    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6133
    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6134
    case SQRT:     case FFS:            case POPCOUNT:
6135
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6136
      break;
6137
 
6138
    case VEC_SELECT:
6139
      /* VEC_SELECT's second argument is a PARALLEL with integers that
6140
         describe the elements selected.  On ia64, those integers are
6141
         always constants.  Avoid walking the PARALLEL so that we don't
6142
         get confused with "normal" parallels and then die.  */
6143
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6144
      break;
6145
 
6146
    case UNSPEC:
6147
      switch (XINT (x, 1))
6148
        {
6149
        case UNSPEC_LTOFF_DTPMOD:
6150
        case UNSPEC_LTOFF_DTPREL:
6151
        case UNSPEC_DTPREL:
6152
        case UNSPEC_LTOFF_TPREL:
6153
        case UNSPEC_TPREL:
6154
        case UNSPEC_PRED_REL_MUTEX:
6155
        case UNSPEC_PIC_CALL:
6156
        case UNSPEC_MF:
6157
        case UNSPEC_FETCHADD_ACQ:
6158
        case UNSPEC_BSP_VALUE:
6159
        case UNSPEC_FLUSHRS:
6160
        case UNSPEC_BUNDLE_SELECTOR:
6161
          break;
6162
 
6163
        case UNSPEC_GR_SPILL:
6164
        case UNSPEC_GR_RESTORE:
6165
          {
6166
            HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6167
            HOST_WIDE_INT bit = (offset >> 3) & 63;
6168
 
6169
            need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6170
            new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6171
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6172
                                              new_flags, pred);
6173
            break;
6174
          }
6175
 
6176
        case UNSPEC_FR_SPILL:
6177
        case UNSPEC_FR_RESTORE:
6178
        case UNSPEC_GETF_EXP:
6179
        case UNSPEC_SETF_EXP:
6180
        case UNSPEC_ADDP4:
6181
        case UNSPEC_FR_SQRT_RECIP_APPROX:
6182
        case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6183
        case UNSPEC_LDA:
6184
        case UNSPEC_LDS:
6185
        case UNSPEC_LDS_A:
6186
        case UNSPEC_LDSA:
6187
        case UNSPEC_CHKACLR:
6188
        case UNSPEC_CHKS:
6189
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6190
          break;
6191
 
6192
        case UNSPEC_FR_RECIP_APPROX:
6193
        case UNSPEC_SHRP:
6194
        case UNSPEC_COPYSIGN:
6195
        case UNSPEC_FR_RECIP_APPROX_RES:
6196
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6197
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6198
          break;
6199
 
6200
        case UNSPEC_CMPXCHG_ACQ:
6201
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6202
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6203
          break;
6204
 
6205
        default:
6206
          gcc_unreachable ();
6207
        }
6208
      break;
6209
 
6210
    case UNSPEC_VOLATILE:
6211
      switch (XINT (x, 1))
6212
        {
6213
        case UNSPECV_ALLOC:
6214
          /* Alloc must always be the first instruction of a group.
6215
             We force this by always returning true.  */
6216
          /* ??? We might get better scheduling if we explicitly check for
6217
             input/local/output register dependencies, and modify the
6218
             scheduler so that alloc is always reordered to the start of
6219
             the current group.  We could then eliminate all of the
6220
             first_instruction code.  */
6221
          rws_access_regno (AR_PFS_REGNUM, flags, pred);
6222
 
6223
          new_flags.is_write = 1;
6224
          rws_access_regno (REG_AR_CFM, new_flags, pred);
6225
          return 1;
6226
 
6227
        case UNSPECV_SET_BSP:
6228
          need_barrier = 1;
6229
          break;
6230
 
6231
        case UNSPECV_BLOCKAGE:
6232
        case UNSPECV_INSN_GROUP_BARRIER:
6233
        case UNSPECV_BREAK:
6234
        case UNSPECV_PSAC_ALL:
6235
        case UNSPECV_PSAC_NORMAL:
6236
          return 0;
6237
 
6238
        default:
6239
          gcc_unreachable ();
6240
        }
6241
      break;
6242
 
6243
    case RETURN:
6244
      new_flags.is_write = 0;
6245
      need_barrier  = rws_access_regno (REG_RP, flags, pred);
6246
      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6247
 
6248
      new_flags.is_write = 1;
6249
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6250
      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6251
      break;
6252
 
6253
    default:
6254
      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6255
      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6256
        switch (format_ptr[i])
6257
          {
6258
          case '0':     /* unused field */
6259
          case 'i':     /* integer */
6260
          case 'n':     /* note */
6261
          case 'w':     /* wide integer */
6262
          case 's':     /* pointer to string */
6263
          case 'S':     /* optional pointer to string */
6264
            break;
6265
 
6266
          case 'e':
6267
            if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6268
              need_barrier = 1;
6269
            break;
6270
 
6271
          case 'E':
6272
            for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6273
              if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6274
                need_barrier = 1;
6275
            break;
6276
 
6277
          default:
6278
            gcc_unreachable ();
6279
          }
6280
      break;
6281
    }
6282
  return need_barrier;
6283
}
6284
 
6285
/* Clear out the state for group_barrier_needed at the start of a
6286
   sequence of insns.  */
6287
 
6288
static void
6289
init_insn_group_barriers (void)
6290
{
6291
  memset (rws_sum, 0, sizeof (rws_sum));
6292
  first_instruction = 1;
6293
}
6294
 
6295
/* Given the current state, determine whether a group barrier (a stop bit) is
6296
   necessary before INSN.  Return nonzero if so.  This modifies the state to
6297
   include the effects of INSN as a side-effect.  */
6298
 
6299
static int
6300
group_barrier_needed (rtx insn)
6301
{
6302
  rtx pat;
6303
  int need_barrier = 0;
6304
  struct reg_flags flags;
6305
 
6306
  memset (&flags, 0, sizeof (flags));
6307
  switch (GET_CODE (insn))
6308
    {
6309
    case NOTE:
6310
    case DEBUG_INSN:
6311
      break;
6312
 
6313
    case BARRIER:
6314
      /* A barrier doesn't imply an instruction group boundary.  */
6315
      break;
6316
 
6317
    case CODE_LABEL:
6318
      memset (rws_insn, 0, sizeof (rws_insn));
6319
      return 1;
6320
 
6321
    case CALL_INSN:
6322
      flags.is_branch = 1;
6323
      flags.is_sibcall = SIBLING_CALL_P (insn);
6324
      memset (rws_insn, 0, sizeof (rws_insn));
6325
 
6326
      /* Don't bundle a call following another call.  */
6327
      if ((pat = prev_active_insn (insn))
6328
          && GET_CODE (pat) == CALL_INSN)
6329
        {
6330
          need_barrier = 1;
6331
          break;
6332
        }
6333
 
6334
      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6335
      break;
6336
 
6337
    case JUMP_INSN:
6338
      if (!ia64_spec_check_p (insn))
6339
        flags.is_branch = 1;
6340
 
6341
      /* Don't bundle a jump following a call.  */
6342
      if ((pat = prev_active_insn (insn))
6343
          && GET_CODE (pat) == CALL_INSN)
6344
        {
6345
          need_barrier = 1;
6346
          break;
6347
        }
6348
      /* FALLTHRU */
6349
 
6350
    case INSN:
6351
      if (GET_CODE (PATTERN (insn)) == USE
6352
          || GET_CODE (PATTERN (insn)) == CLOBBER)
6353
        /* Don't care about USE and CLOBBER "insns"---those are used to
6354
           indicate to the optimizer that it shouldn't get rid of
6355
           certain operations.  */
6356
        break;
6357
 
6358
      pat = PATTERN (insn);
6359
 
6360
      /* Ug.  Hack hacks hacked elsewhere.  */
6361
      switch (recog_memoized (insn))
6362
        {
6363
          /* We play dependency tricks with the epilogue in order
6364
             to get proper schedules.  Undo this for dv analysis.  */
6365
        case CODE_FOR_epilogue_deallocate_stack:
6366
        case CODE_FOR_prologue_allocate_stack:
6367
          pat = XVECEXP (pat, 0, 0);
6368
          break;
6369
 
6370
          /* The pattern we use for br.cloop confuses the code above.
6371
             The second element of the vector is representative.  */
6372
        case CODE_FOR_doloop_end_internal:
6373
          pat = XVECEXP (pat, 0, 1);
6374
          break;
6375
 
6376
          /* Doesn't generate code.  */
6377
        case CODE_FOR_pred_rel_mutex:
6378
        case CODE_FOR_prologue_use:
6379
          return 0;
6380
 
6381
        default:
6382
          break;
6383
        }
6384
 
6385
      memset (rws_insn, 0, sizeof (rws_insn));
6386
      need_barrier = rtx_needs_barrier (pat, flags, 0);
6387
 
6388
      /* Check to see if the previous instruction was a volatile
6389
         asm.  */
6390
      if (! need_barrier)
6391
        need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6392
 
6393
      break;
6394
 
6395
    default:
6396
      gcc_unreachable ();
6397
    }
6398
 
6399
  if (first_instruction && INSN_P (insn)
6400
      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6401
      && GET_CODE (PATTERN (insn)) != USE
6402
      && GET_CODE (PATTERN (insn)) != CLOBBER)
6403
    {
6404
      need_barrier = 0;
6405
      first_instruction = 0;
6406
    }
6407
 
6408
  return need_barrier;
6409
}
6410
 
6411
/* Like group_barrier_needed, but do not clobber the current state.  */
6412
 
6413
static int
6414
safe_group_barrier_needed (rtx insn)
6415
{
6416
  int saved_first_instruction;
6417
  int t;
6418
 
6419
  saved_first_instruction = first_instruction;
6420
  in_safe_group_barrier = 1;
6421
 
6422
  t = group_barrier_needed (insn);
6423
 
6424
  first_instruction = saved_first_instruction;
6425
  in_safe_group_barrier = 0;
6426
 
6427
  return t;
6428
}
6429
 
6430
/* Scan the current function and insert stop bits as necessary to
6431
   eliminate dependencies.  This function assumes that a final
6432
   instruction scheduling pass has been run which has already
6433
   inserted most of the necessary stop bits.  This function only
6434
   inserts new ones at basic block boundaries, since these are
6435
   invisible to the scheduler.  */
6436
 
6437
static void
6438
emit_insn_group_barriers (FILE *dump)
6439
{
6440
  rtx insn;
6441
  rtx last_label = 0;
6442
  int insns_since_last_label = 0;
6443
 
6444
  init_insn_group_barriers ();
6445
 
6446
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6447
    {
6448
      if (GET_CODE (insn) == CODE_LABEL)
6449
        {
6450
          if (insns_since_last_label)
6451
            last_label = insn;
6452
          insns_since_last_label = 0;
6453
        }
6454
      else if (GET_CODE (insn) == NOTE
6455
               && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6456
        {
6457
          if (insns_since_last_label)
6458
            last_label = insn;
6459
          insns_since_last_label = 0;
6460
        }
6461
      else if (GET_CODE (insn) == INSN
6462
               && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6463
               && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6464
        {
6465
          init_insn_group_barriers ();
6466
          last_label = 0;
6467
        }
6468
      else if (NONDEBUG_INSN_P (insn))
6469
        {
6470
          insns_since_last_label = 1;
6471
 
6472
          if (group_barrier_needed (insn))
6473
            {
6474
              if (last_label)
6475
                {
6476
                  if (dump)
6477
                    fprintf (dump, "Emitting stop before label %d\n",
6478
                             INSN_UID (last_label));
6479
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6480
                  insn = last_label;
6481
 
6482
                  init_insn_group_barriers ();
6483
                  last_label = 0;
6484
                }
6485
            }
6486
        }
6487
    }
6488
}
6489
 
6490
/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6491
   This function has to emit all necessary group barriers.  */
6492
 
6493
static void
6494
emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6495
{
6496
  rtx insn;
6497
 
6498
  init_insn_group_barriers ();
6499
 
6500
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6501
    {
6502
      if (GET_CODE (insn) == BARRIER)
6503
        {
6504
          rtx last = prev_active_insn (insn);
6505
 
6506
          if (! last)
6507
            continue;
6508
          if (GET_CODE (last) == JUMP_INSN
6509
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6510
            last = prev_active_insn (last);
6511
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6512
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6513
 
6514
          init_insn_group_barriers ();
6515
        }
6516
      else if (NONDEBUG_INSN_P (insn))
6517
        {
6518
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6519
            init_insn_group_barriers ();
6520
          else if (group_barrier_needed (insn))
6521
            {
6522
              emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6523
              init_insn_group_barriers ();
6524
              group_barrier_needed (insn);
6525
            }
6526
        }
6527
    }
6528
}
6529
 
6530
 
6531
 
6532
/* Instruction scheduling support.  */
6533
 
6534
#define NR_BUNDLES 10
6535
 
6536
/* A list of names of all available bundles.  */
6537
 
6538
static const char *bundle_name [NR_BUNDLES] =
6539
{
6540
  ".mii",
6541
  ".mmi",
6542
  ".mfi",
6543
  ".mmf",
6544
#if NR_BUNDLES == 10
6545
  ".bbb",
6546
  ".mbb",
6547
#endif
6548
  ".mib",
6549
  ".mmb",
6550
  ".mfb",
6551
  ".mlx"
6552
};
6553
 
6554
/* Nonzero if we should insert stop bits into the schedule.  */
6555
 
6556
int ia64_final_schedule = 0;
6557
 
6558
/* Codes of the corresponding queried units: */
6559
 
6560
static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6561
static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6562
 
6563
static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6564
static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6565
 
6566
static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6567
 
6568
/* The following variable value is an insn group barrier.  */
6569
 
6570
static rtx dfa_stop_insn;
6571
 
6572
/* The following variable value is the last issued insn.  */
6573
 
6574
static rtx last_scheduled_insn;
6575
 
6576
/* The following variable value is pointer to a DFA state used as
6577
   temporary variable.  */
6578
 
6579
static state_t temp_dfa_state = NULL;
6580
 
6581
/* The following variable value is DFA state after issuing the last
6582
   insn.  */
6583
 
6584
static state_t prev_cycle_state = NULL;
6585
 
6586
/* The following array element values are TRUE if the corresponding
6587
   insn requires to add stop bits before it.  */
6588
 
6589
static char *stops_p = NULL;
6590
 
6591
/* The following variable is used to set up the mentioned above array.  */
6592
 
6593
static int stop_before_p = 0;
6594
 
6595
/* The following variable value is length of the arrays `clocks' and
6596
   `add_cycles'. */
6597
 
6598
static int clocks_length;
6599
 
6600
/* The following variable value is number of data speculations in progress.  */
6601
static int pending_data_specs = 0;
6602
 
6603
/* Number of memory references on current and three future processor cycles.  */
6604
static char mem_ops_in_group[4];
6605
 
6606
/* Number of current processor cycle (from scheduler's point of view).  */
6607
static int current_cycle;
6608
 
6609
static rtx ia64_single_set (rtx);
6610
static void ia64_emit_insn_before (rtx, rtx);
6611
 
6612
/* Map a bundle number to its pseudo-op.  */
6613
 
6614
const char *
6615
get_bundle_name (int b)
6616
{
6617
  return bundle_name[b];
6618
}
6619
 
6620
 
6621
/* Return the maximum number of instructions a cpu can issue.  */
6622
 
6623
static int
6624
ia64_issue_rate (void)
6625
{
6626
  return 6;
6627
}
6628
 
6629
/* Helper function - like single_set, but look inside COND_EXEC.  */
6630
 
6631
static rtx
6632
ia64_single_set (rtx insn)
6633
{
6634
  rtx x = PATTERN (insn), ret;
6635
  if (GET_CODE (x) == COND_EXEC)
6636
    x = COND_EXEC_CODE (x);
6637
  if (GET_CODE (x) == SET)
6638
    return x;
6639
 
6640
  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6641
     Although they are not classical single set, the second set is there just
6642
     to protect it from moving past FP-relative stack accesses.  */
6643
  switch (recog_memoized (insn))
6644
    {
6645
    case CODE_FOR_prologue_allocate_stack:
6646
    case CODE_FOR_epilogue_deallocate_stack:
6647
      ret = XVECEXP (x, 0, 0);
6648
      break;
6649
 
6650
    default:
6651
      ret = single_set_2 (insn, x);
6652
      break;
6653
    }
6654
 
6655
  return ret;
6656
}
6657
 
6658
/* Adjust the cost of a scheduling dependency.
6659
   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6660
   COST is the current cost, DW is dependency weakness.  */
6661
static int
6662
ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6663
{
6664
  enum reg_note dep_type = (enum reg_note) dep_type1;
6665
  enum attr_itanium_class dep_class;
6666
  enum attr_itanium_class insn_class;
6667
 
6668
  insn_class = ia64_safe_itanium_class (insn);
6669
  dep_class = ia64_safe_itanium_class (dep_insn);
6670
 
6671
  /* Treat true memory dependencies separately.  Ignore apparent true
6672
     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
6673
  if (dep_type == REG_DEP_TRUE
6674
      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6675
      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6676
    return 0;
6677
 
6678
  if (dw == MIN_DEP_WEAK)
6679
    /* Store and load are likely to alias, use higher cost to avoid stall.  */
6680
    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6681
  else if (dw > MIN_DEP_WEAK)
6682
    {
6683
      /* Store and load are less likely to alias.  */
6684
      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6685
        /* Assume there will be no cache conflict for floating-point data.
6686
           For integer data, L1 conflict penalty is huge (17 cycles), so we
6687
           never assume it will not cause a conflict.  */
6688
        return 0;
6689
      else
6690
        return cost;
6691
    }
6692
 
6693
  if (dep_type != REG_DEP_OUTPUT)
6694
    return cost;
6695
 
6696
  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6697
      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6698
    return 0;
6699
 
6700
  return cost;
6701
}
6702
 
6703
/* Like emit_insn_before, but skip cycle_display notes.
6704
   ??? When cycle display notes are implemented, update this.  */
6705
 
6706
static void
6707
ia64_emit_insn_before (rtx insn, rtx before)
6708
{
6709
  emit_insn_before (insn, before);
6710
}
6711
 
6712
/* The following function marks insns who produce addresses for load
6713
   and store insns.  Such insns will be placed into M slots because it
6714
   decrease latency time for Itanium1 (see function
6715
   `ia64_produce_address_p' and the DFA descriptions).  */
6716
 
6717
static void
6718
ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6719
{
6720
  rtx insn, next, next_tail;
6721
 
6722
  /* Before reload, which_alternative is not set, which means that
6723
     ia64_safe_itanium_class will produce wrong results for (at least)
6724
     move instructions.  */
6725
  if (!reload_completed)
6726
    return;
6727
 
6728
  next_tail = NEXT_INSN (tail);
6729
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6730
    if (INSN_P (insn))
6731
      insn->call = 0;
6732
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6733
    if (INSN_P (insn)
6734
        && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6735
      {
6736
        sd_iterator_def sd_it;
6737
        dep_t dep;
6738
        bool has_mem_op_consumer_p = false;
6739
 
6740
        FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6741
          {
6742
            enum attr_itanium_class c;
6743
 
6744
            if (DEP_TYPE (dep) != REG_DEP_TRUE)
6745
              continue;
6746
 
6747
            next = DEP_CON (dep);
6748
            c = ia64_safe_itanium_class (next);
6749
            if ((c == ITANIUM_CLASS_ST
6750
                 || c == ITANIUM_CLASS_STF)
6751
                && ia64_st_address_bypass_p (insn, next))
6752
              {
6753
                has_mem_op_consumer_p = true;
6754
                break;
6755
              }
6756
            else if ((c == ITANIUM_CLASS_LD
6757
                      || c == ITANIUM_CLASS_FLD
6758
                      || c == ITANIUM_CLASS_FLDP)
6759
                     && ia64_ld_address_bypass_p (insn, next))
6760
              {
6761
                has_mem_op_consumer_p = true;
6762
                break;
6763
              }
6764
          }
6765
 
6766
        insn->call = has_mem_op_consumer_p;
6767
      }
6768
}
6769
 
6770
/* We're beginning a new block.  Initialize data structures as necessary.  */
6771
 
6772
static void
6773
ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6774
                 int sched_verbose ATTRIBUTE_UNUSED,
6775
                 int max_ready ATTRIBUTE_UNUSED)
6776
{
6777
#ifdef ENABLE_CHECKING
6778
  rtx insn;
6779
 
6780
  if (!sel_sched_p () && reload_completed)
6781
    for (insn = NEXT_INSN (current_sched_info->prev_head);
6782
         insn != current_sched_info->next_tail;
6783
         insn = NEXT_INSN (insn))
6784
      gcc_assert (!SCHED_GROUP_P (insn));
6785
#endif
6786
  last_scheduled_insn = NULL_RTX;
6787
  init_insn_group_barriers ();
6788
 
6789
  current_cycle = 0;
6790
  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6791
}
6792
 
6793
/* We're beginning a scheduling pass.  Check assertion.  */
6794
 
6795
static void
6796
ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6797
                        int sched_verbose ATTRIBUTE_UNUSED,
6798
                        int max_ready ATTRIBUTE_UNUSED)
6799
{
6800
  gcc_assert (pending_data_specs == 0);
6801
}
6802
 
6803
/* Scheduling pass is now finished.  Free/reset static variable.  */
6804
static void
6805
ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6806
                          int sched_verbose ATTRIBUTE_UNUSED)
6807
{
6808
  gcc_assert (pending_data_specs == 0);
6809
}
6810
 
6811
/* Return TRUE if INSN is a load (either normal or speculative, but not a
6812
   speculation check), FALSE otherwise.  */
6813
static bool
6814
is_load_p (rtx insn)
6815
{
6816
  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6817
 
6818
  return
6819
   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6820
    && get_attr_check_load (insn) == CHECK_LOAD_NO);
6821
}
6822
 
6823
/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6824
   (taking account for 3-cycle cache reference postponing for stores: Intel
6825
   Itanium 2 Reference Manual for Software Development and Optimization,
6826
   6.7.3.1).  */
6827
static void
6828
record_memory_reference (rtx insn)
6829
{
6830
  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6831
 
6832
  switch (insn_class) {
6833
    case ITANIUM_CLASS_FLD:
6834
    case ITANIUM_CLASS_LD:
6835
      mem_ops_in_group[current_cycle % 4]++;
6836
      break;
6837
    case ITANIUM_CLASS_STF:
6838
    case ITANIUM_CLASS_ST:
6839
      mem_ops_in_group[(current_cycle + 3) % 4]++;
6840
      break;
6841
    default:;
6842
  }
6843
}
6844
 
6845
/* We are about to being issuing insns for this clock cycle.
6846
   Override the default sort algorithm to better slot instructions.  */
6847
 
6848
static int
6849
ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6850
                        int *pn_ready, int clock_var,
6851
                        int reorder_type)
6852
{
6853
  int n_asms;
6854
  int n_ready = *pn_ready;
6855
  rtx *e_ready = ready + n_ready;
6856
  rtx *insnp;
6857
 
6858
  if (sched_verbose)
6859
    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6860
 
6861
  if (reorder_type == 0)
6862
    {
6863
      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6864
      n_asms = 0;
6865
      for (insnp = ready; insnp < e_ready; insnp++)
6866
        if (insnp < e_ready)
6867
          {
6868
            rtx insn = *insnp;
6869
            enum attr_type t = ia64_safe_type (insn);
6870
            if (t == TYPE_UNKNOWN)
6871
              {
6872
                if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6873
                    || asm_noperands (PATTERN (insn)) >= 0)
6874
                  {
6875
                    rtx lowest = ready[n_asms];
6876
                    ready[n_asms] = insn;
6877
                    *insnp = lowest;
6878
                    n_asms++;
6879
                  }
6880
                else
6881
                  {
6882
                    rtx highest = ready[n_ready - 1];
6883
                    ready[n_ready - 1] = insn;
6884
                    *insnp = highest;
6885
                    return 1;
6886
                  }
6887
              }
6888
          }
6889
 
6890
      if (n_asms < n_ready)
6891
        {
6892
          /* Some normal insns to process.  Skip the asms.  */
6893
          ready += n_asms;
6894
          n_ready -= n_asms;
6895
        }
6896
      else if (n_ready > 0)
6897
        return 1;
6898
    }
6899
 
6900
  if (ia64_final_schedule)
6901
    {
6902
      int deleted = 0;
6903
      int nr_need_stop = 0;
6904
 
6905
      for (insnp = ready; insnp < e_ready; insnp++)
6906
        if (safe_group_barrier_needed (*insnp))
6907
          nr_need_stop++;
6908
 
6909
      if (reorder_type == 1 && n_ready == nr_need_stop)
6910
        return 0;
6911
      if (reorder_type == 0)
6912
        return 1;
6913
      insnp = e_ready;
6914
      /* Move down everything that needs a stop bit, preserving
6915
         relative order.  */
6916
      while (insnp-- > ready + deleted)
6917
        while (insnp >= ready + deleted)
6918
          {
6919
            rtx insn = *insnp;
6920
            if (! safe_group_barrier_needed (insn))
6921
              break;
6922
            memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6923
            *ready = insn;
6924
            deleted++;
6925
          }
6926
      n_ready -= deleted;
6927
      ready += deleted;
6928
    }
6929
 
6930
  current_cycle = clock_var;
6931
  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6932
    {
6933
      int moved = 0;
6934
 
6935
      insnp = e_ready;
6936
      /* Move down loads/stores, preserving relative order.  */
6937
      while (insnp-- > ready + moved)
6938
        while (insnp >= ready + moved)
6939
          {
6940
            rtx insn = *insnp;
6941
            if (! is_load_p (insn))
6942
              break;
6943
            memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6944
            *ready = insn;
6945
            moved++;
6946
          }
6947
      n_ready -= moved;
6948
      ready += moved;
6949
    }
6950
 
6951
  return 1;
6952
}
6953
 
6954
/* We are about to being issuing insns for this clock cycle.  Override
6955
   the default sort algorithm to better slot instructions.  */
6956
 
6957
static int
6958
ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6959
                    int clock_var)
6960
{
6961
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6962
                                 pn_ready, clock_var, 0);
6963
}
6964
 
6965
/* Like ia64_sched_reorder, but called after issuing each insn.
6966
   Override the default sort algorithm to better slot instructions.  */
6967
 
6968
static int
6969
ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6970
                     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6971
                     int *pn_ready, int clock_var)
6972
{
6973
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6974
                                 clock_var, 1);
6975
}
6976
 
6977
/* We are about to issue INSN.  Return the number of insns left on the
6978
   ready queue that can be issued this cycle.  */
6979
 
6980
static int
6981
ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6982
                     int sched_verbose ATTRIBUTE_UNUSED,
6983
                     rtx insn ATTRIBUTE_UNUSED,
6984
                     int can_issue_more ATTRIBUTE_UNUSED)
6985
{
6986
  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
6987
    /* Modulo scheduling does not extend h_i_d when emitting
6988
       new instructions.  Don't use h_i_d, if we don't have to.  */
6989
    {
6990
      if (DONE_SPEC (insn) & BEGIN_DATA)
6991
        pending_data_specs++;
6992
      if (CHECK_SPEC (insn) & BEGIN_DATA)
6993
        pending_data_specs--;
6994
    }
6995
 
6996
  if (DEBUG_INSN_P (insn))
6997
    return 1;
6998
 
6999
  last_scheduled_insn = insn;
7000
  memcpy (prev_cycle_state, curr_state, dfa_state_size);
7001
  if (reload_completed)
7002
    {
7003
      int needed = group_barrier_needed (insn);
7004
 
7005
      gcc_assert (!needed);
7006
      if (GET_CODE (insn) == CALL_INSN)
7007
        init_insn_group_barriers ();
7008
      stops_p [INSN_UID (insn)] = stop_before_p;
7009
      stop_before_p = 0;
7010
 
7011
      record_memory_reference (insn);
7012
    }
7013
  return 1;
7014
}
7015
 
7016
/* We are choosing insn from the ready queue.  Return nonzero if INSN
7017
   can be chosen.  */
7018
 
7019
static int
7020
ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7021
{
7022
  gcc_assert (insn && INSN_P (insn));
7023
  return ((!reload_completed
7024
           || !safe_group_barrier_needed (insn))
7025
          && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7026
          && (!mflag_sched_mem_insns_hard_limit
7027
              || !is_load_p (insn)
7028
              || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7029
}
7030
 
7031
/* We are choosing insn from the ready queue.  Return nonzero if INSN
7032
   can be chosen.  */
7033
 
7034
static bool
7035
ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7036
{
7037
  gcc_assert (insn  && INSN_P (insn));
7038
  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
7039
     we keep ALAT half-empty.  */
7040
  return (pending_data_specs < 16
7041
          || !(TODO_SPEC (insn) & BEGIN_DATA));
7042
}
7043
 
7044
/* The following variable value is pseudo-insn used by the DFA insn
7045
   scheduler to change the DFA state when the simulated clock is
7046
   increased.  */
7047
 
7048
static rtx dfa_pre_cycle_insn;
7049
 
7050
/* Returns 1 when a meaningful insn was scheduled between the last group
7051
   barrier and LAST.  */
7052
static int
7053
scheduled_good_insn (rtx last)
7054
{
7055
  if (last && recog_memoized (last) >= 0)
7056
    return 1;
7057
 
7058
  for ( ;
7059
       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7060
       && !stops_p[INSN_UID (last)];
7061
       last = PREV_INSN (last))
7062
    /* We could hit a NOTE_INSN_DELETED here which is actually outside
7063
       the ebb we're scheduling.  */
7064
    if (INSN_P (last) && recog_memoized (last) >= 0)
7065
      return 1;
7066
 
7067
  return 0;
7068
}
7069
 
7070
/* We are about to being issuing INSN.  Return nonzero if we cannot
7071
   issue it on given cycle CLOCK and return zero if we should not sort
7072
   the ready queue on the next clock start.  */
7073
 
7074
static int
7075
ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7076
                    int clock, int *sort_p)
7077
{
7078
  int setup_clocks_p = FALSE;
7079
 
7080
  gcc_assert (insn && INSN_P (insn));
7081
 
7082
  if (DEBUG_INSN_P (insn))
7083
    return 0;
7084
 
7085
  /* When a group barrier is needed for insn, last_scheduled_insn
7086
     should be set.  */
7087
  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7088
              || last_scheduled_insn);
7089
 
7090
  if ((reload_completed
7091
       && (safe_group_barrier_needed (insn)
7092
           || (mflag_sched_stop_bits_after_every_cycle
7093
               && last_clock != clock
7094
               && last_scheduled_insn
7095
               && scheduled_good_insn (last_scheduled_insn))))
7096
      || (last_scheduled_insn
7097
          && (GET_CODE (last_scheduled_insn) == CALL_INSN
7098
              || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7099
              || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7100
    {
7101
      init_insn_group_barriers ();
7102
 
7103
      if (verbose && dump)
7104
        fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7105
                 last_clock == clock ? " + cycle advance" : "");
7106
 
7107
      stop_before_p = 1;
7108
      current_cycle = clock;
7109
      mem_ops_in_group[current_cycle % 4] = 0;
7110
 
7111
      if (last_clock == clock)
7112
        {
7113
          state_transition (curr_state, dfa_stop_insn);
7114
          if (TARGET_EARLY_STOP_BITS)
7115
            *sort_p = (last_scheduled_insn == NULL_RTX
7116
                       || GET_CODE (last_scheduled_insn) != CALL_INSN);
7117
          else
7118
            *sort_p = 0;
7119
          return 1;
7120
        }
7121
      else if (reload_completed)
7122
        setup_clocks_p = TRUE;
7123
 
7124
      if (last_scheduled_insn)
7125
        {
7126
          if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7127
              || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7128
            state_reset (curr_state);
7129
          else
7130
            {
7131
              memcpy (curr_state, prev_cycle_state, dfa_state_size);
7132
              state_transition (curr_state, dfa_stop_insn);
7133
              state_transition (curr_state, dfa_pre_cycle_insn);
7134
              state_transition (curr_state, NULL);
7135
            }
7136
        }
7137
    }
7138
  else if (reload_completed)
7139
    setup_clocks_p = TRUE;
7140
 
7141
  return 0;
7142
}
7143
 
7144
/* Implement targetm.sched.h_i_d_extended hook.
7145
   Extend internal data structures.  */
7146
static void
7147
ia64_h_i_d_extended (void)
7148
{
7149
  if (stops_p != NULL)
7150
    {
7151
      int new_clocks_length = get_max_uid () * 3 / 2;
7152
      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7153
      clocks_length = new_clocks_length;
7154
    }
7155
}
7156
 
7157
 
7158
/* This structure describes the data used by the backend to guide scheduling.
7159
   When the current scheduling point is switched, this data should be saved
7160
   and restored later, if the scheduler returns to this point.  */
7161
struct _ia64_sched_context
7162
{
7163
  state_t prev_cycle_state;
7164
  rtx last_scheduled_insn;
7165
  struct reg_write_state rws_sum[NUM_REGS];
7166
  struct reg_write_state rws_insn[NUM_REGS];
7167
  int first_instruction;
7168
  int pending_data_specs;
7169
  int current_cycle;
7170
  char mem_ops_in_group[4];
7171
};
7172
typedef struct _ia64_sched_context *ia64_sched_context_t;
7173
 
7174
/* Allocates a scheduling context.  */
7175
static void *
7176
ia64_alloc_sched_context (void)
7177
{
7178
  return xmalloc (sizeof (struct _ia64_sched_context));
7179
}
7180
 
7181
/* Initializes the _SC context with clean data, if CLEAN_P, and from
7182
   the global context otherwise.  */
7183
static void
7184
ia64_init_sched_context (void *_sc, bool clean_p)
7185
{
7186
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7187
 
7188
  sc->prev_cycle_state = xmalloc (dfa_state_size);
7189
  if (clean_p)
7190
    {
7191
      state_reset (sc->prev_cycle_state);
7192
      sc->last_scheduled_insn = NULL_RTX;
7193
      memset (sc->rws_sum, 0, sizeof (rws_sum));
7194
      memset (sc->rws_insn, 0, sizeof (rws_insn));
7195
      sc->first_instruction = 1;
7196
      sc->pending_data_specs = 0;
7197
      sc->current_cycle = 0;
7198
      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7199
    }
7200
  else
7201
    {
7202
      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7203
      sc->last_scheduled_insn = last_scheduled_insn;
7204
      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7205
      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7206
      sc->first_instruction = first_instruction;
7207
      sc->pending_data_specs = pending_data_specs;
7208
      sc->current_cycle = current_cycle;
7209
      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7210
    }
7211
}
7212
 
7213
/* Sets the global scheduling context to the one pointed to by _SC.  */
7214
static void
7215
ia64_set_sched_context (void *_sc)
7216
{
7217
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7218
 
7219
  gcc_assert (sc != NULL);
7220
 
7221
  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7222
  last_scheduled_insn = sc->last_scheduled_insn;
7223
  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7224
  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7225
  first_instruction = sc->first_instruction;
7226
  pending_data_specs = sc->pending_data_specs;
7227
  current_cycle = sc->current_cycle;
7228
  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7229
}
7230
 
7231
/* Clears the data in the _SC scheduling context.  */
7232
static void
7233
ia64_clear_sched_context (void *_sc)
7234
{
7235
  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7236
 
7237
  free (sc->prev_cycle_state);
7238
  sc->prev_cycle_state = NULL;
7239
}
7240
 
7241
/* Frees the _SC scheduling context.  */
7242
static void
7243
ia64_free_sched_context (void *_sc)
7244
{
7245
  gcc_assert (_sc != NULL);
7246
 
7247
  free (_sc);
7248
}
7249
 
7250
typedef rtx (* gen_func_t) (rtx, rtx);
7251
 
7252
/* Return a function that will generate a load of mode MODE_NO
7253
   with speculation types TS.  */
7254
static gen_func_t
7255
get_spec_load_gen_function (ds_t ts, int mode_no)
7256
{
7257
  static gen_func_t gen_ld_[] = {
7258
    gen_movbi,
7259
    gen_movqi_internal,
7260
    gen_movhi_internal,
7261
    gen_movsi_internal,
7262
    gen_movdi_internal,
7263
    gen_movsf_internal,
7264
    gen_movdf_internal,
7265
    gen_movxf_internal,
7266
    gen_movti_internal,
7267
    gen_zero_extendqidi2,
7268
    gen_zero_extendhidi2,
7269
    gen_zero_extendsidi2,
7270
  };
7271
 
7272
  static gen_func_t gen_ld_a[] = {
7273
    gen_movbi_advanced,
7274
    gen_movqi_advanced,
7275
    gen_movhi_advanced,
7276
    gen_movsi_advanced,
7277
    gen_movdi_advanced,
7278
    gen_movsf_advanced,
7279
    gen_movdf_advanced,
7280
    gen_movxf_advanced,
7281
    gen_movti_advanced,
7282
    gen_zero_extendqidi2_advanced,
7283
    gen_zero_extendhidi2_advanced,
7284
    gen_zero_extendsidi2_advanced,
7285
  };
7286
  static gen_func_t gen_ld_s[] = {
7287
    gen_movbi_speculative,
7288
    gen_movqi_speculative,
7289
    gen_movhi_speculative,
7290
    gen_movsi_speculative,
7291
    gen_movdi_speculative,
7292
    gen_movsf_speculative,
7293
    gen_movdf_speculative,
7294
    gen_movxf_speculative,
7295
    gen_movti_speculative,
7296
    gen_zero_extendqidi2_speculative,
7297
    gen_zero_extendhidi2_speculative,
7298
    gen_zero_extendsidi2_speculative,
7299
  };
7300
  static gen_func_t gen_ld_sa[] = {
7301
    gen_movbi_speculative_advanced,
7302
    gen_movqi_speculative_advanced,
7303
    gen_movhi_speculative_advanced,
7304
    gen_movsi_speculative_advanced,
7305
    gen_movdi_speculative_advanced,
7306
    gen_movsf_speculative_advanced,
7307
    gen_movdf_speculative_advanced,
7308
    gen_movxf_speculative_advanced,
7309
    gen_movti_speculative_advanced,
7310
    gen_zero_extendqidi2_speculative_advanced,
7311
    gen_zero_extendhidi2_speculative_advanced,
7312
    gen_zero_extendsidi2_speculative_advanced,
7313
  };
7314
  static gen_func_t gen_ld_s_a[] = {
7315
    gen_movbi_speculative_a,
7316
    gen_movqi_speculative_a,
7317
    gen_movhi_speculative_a,
7318
    gen_movsi_speculative_a,
7319
    gen_movdi_speculative_a,
7320
    gen_movsf_speculative_a,
7321
    gen_movdf_speculative_a,
7322
    gen_movxf_speculative_a,
7323
    gen_movti_speculative_a,
7324
    gen_zero_extendqidi2_speculative_a,
7325
    gen_zero_extendhidi2_speculative_a,
7326
    gen_zero_extendsidi2_speculative_a,
7327
  };
7328
 
7329
  gen_func_t *gen_ld;
7330
 
7331
  if (ts & BEGIN_DATA)
7332
    {
7333
      if (ts & BEGIN_CONTROL)
7334
        gen_ld = gen_ld_sa;
7335
      else
7336
        gen_ld = gen_ld_a;
7337
    }
7338
  else if (ts & BEGIN_CONTROL)
7339
    {
7340
      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7341
          || ia64_needs_block_p (ts))
7342
        gen_ld = gen_ld_s;
7343
      else
7344
        gen_ld = gen_ld_s_a;
7345
    }
7346
  else if (ts == 0)
7347
    gen_ld = gen_ld_;
7348
  else
7349
    gcc_unreachable ();
7350
 
7351
  return gen_ld[mode_no];
7352
}
7353
 
7354
/* Constants that help mapping 'enum machine_mode' to int.  */
7355
enum SPEC_MODES
7356
  {
7357
    SPEC_MODE_INVALID = -1,
7358
    SPEC_MODE_FIRST = 0,
7359
    SPEC_MODE_FOR_EXTEND_FIRST = 1,
7360
    SPEC_MODE_FOR_EXTEND_LAST = 3,
7361
    SPEC_MODE_LAST = 8
7362
  };
7363
 
7364
enum
7365
  {
7366
    /* Offset to reach ZERO_EXTEND patterns.  */
7367
    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7368
  };
7369
 
7370
/* Return index of the MODE.  */
7371
static int
7372
ia64_mode_to_int (enum machine_mode mode)
7373
{
7374
  switch (mode)
7375
    {
7376
    case BImode: return 0; /* SPEC_MODE_FIRST  */
7377
    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7378
    case HImode: return 2;
7379
    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7380
    case DImode: return 4;
7381
    case SFmode: return 5;
7382
    case DFmode: return 6;
7383
    case XFmode: return 7;
7384
    case TImode:
7385
      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7386
         mentioned in itanium[12].md.  Predicate fp_register_operand also
7387
         needs to be defined.  Bottom line: better disable for now.  */
7388
      return SPEC_MODE_INVALID;
7389
    default:     return SPEC_MODE_INVALID;
7390
    }
7391
}
7392
 
7393
/* Provide information about speculation capabilities.  */
7394
static void
7395
ia64_set_sched_flags (spec_info_t spec_info)
7396
{
7397
  unsigned int *flags = &(current_sched_info->flags);
7398
 
7399
  if (*flags & SCHED_RGN
7400
      || *flags & SCHED_EBB
7401
      || *flags & SEL_SCHED)
7402
    {
7403
      int mask = 0;
7404
 
7405
      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7406
          || (mflag_sched_ar_data_spec && reload_completed))
7407
        {
7408
          mask |= BEGIN_DATA;
7409
 
7410
          if (!sel_sched_p ()
7411
              && ((mflag_sched_br_in_data_spec && !reload_completed)
7412
                  || (mflag_sched_ar_in_data_spec && reload_completed)))
7413
            mask |= BE_IN_DATA;
7414
        }
7415
 
7416
      if (mflag_sched_control_spec
7417
          && (!sel_sched_p ()
7418
              || reload_completed))
7419
        {
7420
          mask |= BEGIN_CONTROL;
7421
 
7422
          if (!sel_sched_p () && mflag_sched_in_control_spec)
7423
            mask |= BE_IN_CONTROL;
7424
        }
7425
 
7426
      spec_info->mask = mask;
7427
 
7428
      if (mask)
7429
        {
7430
          *flags |= USE_DEPS_LIST | DO_SPECULATION;
7431
 
7432
          if (mask & BE_IN_SPEC)
7433
            *flags |= NEW_BBS;
7434
 
7435
          spec_info->flags = 0;
7436
 
7437
          if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7438
            spec_info->flags |= PREFER_NON_DATA_SPEC;
7439
 
7440
          if (mask & CONTROL_SPEC)
7441
            {
7442
              if (mflag_sched_prefer_non_control_spec_insns)
7443
                spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7444
 
7445
              if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7446
                spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7447
            }
7448
 
7449
          if (sched_verbose >= 1)
7450
            spec_info->dump = sched_dump;
7451
          else
7452
            spec_info->dump = 0;
7453
 
7454
          if (mflag_sched_count_spec_in_critical_path)
7455
            spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7456
        }
7457
    }
7458
  else
7459
    spec_info->mask = 0;
7460
}
7461
 
7462
/* If INSN is an appropriate load return its mode.
7463
   Return -1 otherwise.  */
7464
static int
7465
get_mode_no_for_insn (rtx insn)
7466
{
7467
  rtx reg, mem, mode_rtx;
7468
  int mode_no;
7469
  bool extend_p;
7470
 
7471
  extract_insn_cached (insn);
7472
 
7473
  /* We use WHICH_ALTERNATIVE only after reload.  This will
7474
     guarantee that reload won't touch a speculative insn.  */
7475
 
7476
  if (recog_data.n_operands != 2)
7477
    return -1;
7478
 
7479
  reg = recog_data.operand[0];
7480
  mem = recog_data.operand[1];
7481
 
7482
  /* We should use MEM's mode since REG's mode in presence of
7483
     ZERO_EXTEND will always be DImode.  */
7484
  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7485
    /* Process non-speculative ld.  */
7486
    {
7487
      if (!reload_completed)
7488
        {
7489
          /* Do not speculate into regs like ar.lc.  */
7490
          if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7491
            return -1;
7492
 
7493
          if (!MEM_P (mem))
7494
            return -1;
7495
 
7496
          {
7497
            rtx mem_reg = XEXP (mem, 0);
7498
 
7499
            if (!REG_P (mem_reg))
7500
              return -1;
7501
          }
7502
 
7503
          mode_rtx = mem;
7504
        }
7505
      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7506
        {
7507
          gcc_assert (REG_P (reg) && MEM_P (mem));
7508
          mode_rtx = mem;
7509
        }
7510
      else
7511
        return -1;
7512
    }
7513
  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7514
           || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7515
           || get_attr_check_load (insn) == CHECK_LOAD_YES)
7516
    /* Process speculative ld or ld.c.  */
7517
    {
7518
      gcc_assert (REG_P (reg) && MEM_P (mem));
7519
      mode_rtx = mem;
7520
    }
7521
  else
7522
    {
7523
      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7524
 
7525
      if (attr_class == ITANIUM_CLASS_CHK_A
7526
          || attr_class == ITANIUM_CLASS_CHK_S_I
7527
          || attr_class == ITANIUM_CLASS_CHK_S_F)
7528
        /* Process chk.  */
7529
        mode_rtx = reg;
7530
      else
7531
        return -1;
7532
    }
7533
 
7534
  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7535
 
7536
  if (mode_no == SPEC_MODE_INVALID)
7537
    return -1;
7538
 
7539
  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7540
 
7541
  if (extend_p)
7542
    {
7543
      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7544
            && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7545
        return -1;
7546
 
7547
      mode_no += SPEC_GEN_EXTEND_OFFSET;
7548
    }
7549
 
7550
  return mode_no;
7551
}
7552
 
7553
/* If X is an unspec part of a speculative load, return its code.
7554
   Return -1 otherwise.  */
7555
static int
7556
get_spec_unspec_code (const_rtx x)
7557
{
7558
  if (GET_CODE (x) != UNSPEC)
7559
    return -1;
7560
 
7561
  {
7562
    int code;
7563
 
7564
    code = XINT (x, 1);
7565
 
7566
    switch (code)
7567
      {
7568
      case UNSPEC_LDA:
7569
      case UNSPEC_LDS:
7570
      case UNSPEC_LDS_A:
7571
      case UNSPEC_LDSA:
7572
        return code;
7573
 
7574
      default:
7575
        return -1;
7576
      }
7577
  }
7578
}
7579
 
7580
/* Implement skip_rtx_p hook.  */
7581
static bool
7582
ia64_skip_rtx_p (const_rtx x)
7583
{
7584
  return get_spec_unspec_code (x) != -1;
7585
}
7586
 
7587
/* If INSN is a speculative load, return its UNSPEC code.
7588
   Return -1 otherwise.  */
7589
static int
7590
get_insn_spec_code (const_rtx insn)
7591
{
7592
  rtx pat, reg, mem;
7593
 
7594
  pat = PATTERN (insn);
7595
 
7596
  if (GET_CODE (pat) == COND_EXEC)
7597
    pat = COND_EXEC_CODE (pat);
7598
 
7599
  if (GET_CODE (pat) != SET)
7600
    return -1;
7601
 
7602
  reg = SET_DEST (pat);
7603
  if (!REG_P (reg))
7604
    return -1;
7605
 
7606
  mem = SET_SRC (pat);
7607
  if (GET_CODE (mem) == ZERO_EXTEND)
7608
    mem = XEXP (mem, 0);
7609
 
7610
  return get_spec_unspec_code (mem);
7611
}
7612
 
7613
/* If INSN is a speculative load, return a ds with the speculation types.
7614
   Otherwise [if INSN is a normal instruction] return 0.  */
7615
static ds_t
7616
ia64_get_insn_spec_ds (rtx insn)
7617
{
7618
  int code = get_insn_spec_code (insn);
7619
 
7620
  switch (code)
7621
    {
7622
    case UNSPEC_LDA:
7623
      return BEGIN_DATA;
7624
 
7625
    case UNSPEC_LDS:
7626
    case UNSPEC_LDS_A:
7627
      return BEGIN_CONTROL;
7628
 
7629
    case UNSPEC_LDSA:
7630
      return BEGIN_DATA | BEGIN_CONTROL;
7631
 
7632
    default:
7633
      return 0;
7634
    }
7635
}
7636
 
7637
/* If INSN is a speculative load return a ds with the speculation types that
7638
   will be checked.
7639
   Otherwise [if INSN is a normal instruction] return 0.  */
7640
static ds_t
7641
ia64_get_insn_checked_ds (rtx insn)
7642
{
7643
  int code = get_insn_spec_code (insn);
7644
 
7645
  switch (code)
7646
    {
7647
    case UNSPEC_LDA:
7648
      return BEGIN_DATA | BEGIN_CONTROL;
7649
 
7650
    case UNSPEC_LDS:
7651
      return BEGIN_CONTROL;
7652
 
7653
    case UNSPEC_LDS_A:
7654
    case UNSPEC_LDSA:
7655
      return BEGIN_DATA | BEGIN_CONTROL;
7656
 
7657
    default:
7658
      return 0;
7659
    }
7660
}
7661
 
7662
/* If GEN_P is true, calculate the index of needed speculation check and return
7663
   speculative pattern for INSN with speculative mode TS, machine mode
7664
   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7665
   If GEN_P is false, just calculate the index of needed speculation check.  */
7666
static rtx
7667
ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7668
{
7669
  rtx pat, new_pat;
7670
  gen_func_t gen_load;
7671
 
7672
  gen_load = get_spec_load_gen_function (ts, mode_no);
7673
 
7674
  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7675
                      copy_rtx (recog_data.operand[1]));
7676
 
7677
  pat = PATTERN (insn);
7678
  if (GET_CODE (pat) == COND_EXEC)
7679
    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7680
                                 new_pat);
7681
 
7682
  return new_pat;
7683
}
7684
 
7685
static bool
7686
insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7687
                              ds_t ds ATTRIBUTE_UNUSED)
7688
{
7689
  return false;
7690
}
7691
 
7692
/* Implement targetm.sched.speculate_insn hook.
7693
   Check if the INSN can be TS speculative.
7694
   If 'no' - return -1.
7695
   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7696
   If current pattern of the INSN already provides TS speculation,
7697
   return 0.  */
7698
static int
7699
ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7700
{
7701
  int mode_no;
7702
  int res;
7703
 
7704
  gcc_assert (!(ts & ~SPECULATIVE));
7705
 
7706
  if (ia64_spec_check_p (insn))
7707
    return -1;
7708
 
7709
  if ((ts & BE_IN_SPEC)
7710
      && !insn_can_be_in_speculative_p (insn, ts))
7711
    return -1;
7712
 
7713
  mode_no = get_mode_no_for_insn (insn);
7714
 
7715
  if (mode_no != SPEC_MODE_INVALID)
7716
    {
7717
      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7718
        res = 0;
7719
      else
7720
        {
7721
          res = 1;
7722
          *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7723
        }
7724
    }
7725
  else
7726
    res = -1;
7727
 
7728
  return res;
7729
}
7730
 
7731
/* Return a function that will generate a check for speculation TS with mode
7732
   MODE_NO.
7733
   If simple check is needed, pass true for SIMPLE_CHECK_P.
7734
   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
7735
static gen_func_t
7736
get_spec_check_gen_function (ds_t ts, int mode_no,
7737
                             bool simple_check_p, bool clearing_check_p)
7738
{
7739
  static gen_func_t gen_ld_c_clr[] = {
7740
    gen_movbi_clr,
7741
    gen_movqi_clr,
7742
    gen_movhi_clr,
7743
    gen_movsi_clr,
7744
    gen_movdi_clr,
7745
    gen_movsf_clr,
7746
    gen_movdf_clr,
7747
    gen_movxf_clr,
7748
    gen_movti_clr,
7749
    gen_zero_extendqidi2_clr,
7750
    gen_zero_extendhidi2_clr,
7751
    gen_zero_extendsidi2_clr,
7752
  };
7753
  static gen_func_t gen_ld_c_nc[] = {
7754
    gen_movbi_nc,
7755
    gen_movqi_nc,
7756
    gen_movhi_nc,
7757
    gen_movsi_nc,
7758
    gen_movdi_nc,
7759
    gen_movsf_nc,
7760
    gen_movdf_nc,
7761
    gen_movxf_nc,
7762
    gen_movti_nc,
7763
    gen_zero_extendqidi2_nc,
7764
    gen_zero_extendhidi2_nc,
7765
    gen_zero_extendsidi2_nc,
7766
  };
7767
  static gen_func_t gen_chk_a_clr[] = {
7768
    gen_advanced_load_check_clr_bi,
7769
    gen_advanced_load_check_clr_qi,
7770
    gen_advanced_load_check_clr_hi,
7771
    gen_advanced_load_check_clr_si,
7772
    gen_advanced_load_check_clr_di,
7773
    gen_advanced_load_check_clr_sf,
7774
    gen_advanced_load_check_clr_df,
7775
    gen_advanced_load_check_clr_xf,
7776
    gen_advanced_load_check_clr_ti,
7777
    gen_advanced_load_check_clr_di,
7778
    gen_advanced_load_check_clr_di,
7779
    gen_advanced_load_check_clr_di,
7780
  };
7781
  static gen_func_t gen_chk_a_nc[] = {
7782
    gen_advanced_load_check_nc_bi,
7783
    gen_advanced_load_check_nc_qi,
7784
    gen_advanced_load_check_nc_hi,
7785
    gen_advanced_load_check_nc_si,
7786
    gen_advanced_load_check_nc_di,
7787
    gen_advanced_load_check_nc_sf,
7788
    gen_advanced_load_check_nc_df,
7789
    gen_advanced_load_check_nc_xf,
7790
    gen_advanced_load_check_nc_ti,
7791
    gen_advanced_load_check_nc_di,
7792
    gen_advanced_load_check_nc_di,
7793
    gen_advanced_load_check_nc_di,
7794
  };
7795
  static gen_func_t gen_chk_s[] = {
7796
    gen_speculation_check_bi,
7797
    gen_speculation_check_qi,
7798
    gen_speculation_check_hi,
7799
    gen_speculation_check_si,
7800
    gen_speculation_check_di,
7801
    gen_speculation_check_sf,
7802
    gen_speculation_check_df,
7803
    gen_speculation_check_xf,
7804
    gen_speculation_check_ti,
7805
    gen_speculation_check_di,
7806
    gen_speculation_check_di,
7807
    gen_speculation_check_di,
7808
  };
7809
 
7810
  gen_func_t *gen_check;
7811
 
7812
  if (ts & BEGIN_DATA)
7813
    {
7814
      /* We don't need recovery because even if this is ld.sa
7815
         ALAT entry will be allocated only if NAT bit is set to zero.
7816
         So it is enough to use ld.c here.  */
7817
 
7818
      if (simple_check_p)
7819
        {
7820
          gcc_assert (mflag_sched_spec_ldc);
7821
 
7822
          if (clearing_check_p)
7823
            gen_check = gen_ld_c_clr;
7824
          else
7825
            gen_check = gen_ld_c_nc;
7826
        }
7827
      else
7828
        {
7829
          if (clearing_check_p)
7830
            gen_check = gen_chk_a_clr;
7831
          else
7832
            gen_check = gen_chk_a_nc;
7833
        }
7834
    }
7835
  else if (ts & BEGIN_CONTROL)
7836
    {
7837
      if (simple_check_p)
7838
        /* We might want to use ld.sa -> ld.c instead of
7839
           ld.s -> chk.s.  */
7840
        {
7841
          gcc_assert (!ia64_needs_block_p (ts));
7842
 
7843
          if (clearing_check_p)
7844
            gen_check = gen_ld_c_clr;
7845
          else
7846
            gen_check = gen_ld_c_nc;
7847
        }
7848
      else
7849
        {
7850
          gen_check = gen_chk_s;
7851
        }
7852
    }
7853
  else
7854
    gcc_unreachable ();
7855
 
7856
  gcc_assert (mode_no >= 0);
7857
  return gen_check[mode_no];
7858
}
7859
 
7860
/* Return nonzero, if INSN needs branchy recovery check.  */
7861
static bool
7862
ia64_needs_block_p (ds_t ts)
7863
{
7864
  if (ts & BEGIN_DATA)
7865
    return !mflag_sched_spec_ldc;
7866
 
7867
  gcc_assert ((ts & BEGIN_CONTROL) != 0);
7868
 
7869
  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7870
}
7871
 
7872
/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7873
   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7874
   Otherwise, generate a simple check.  */
7875
static rtx
7876
ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7877
{
7878
  rtx op1, pat, check_pat;
7879
  gen_func_t gen_check;
7880
  int mode_no;
7881
 
7882
  mode_no = get_mode_no_for_insn (insn);
7883
  gcc_assert (mode_no >= 0);
7884
 
7885
  if (label)
7886
    op1 = label;
7887
  else
7888
    {
7889
      gcc_assert (!ia64_needs_block_p (ds));
7890
      op1 = copy_rtx (recog_data.operand[1]);
7891
    }
7892
 
7893
  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7894
                                           true);
7895
 
7896
  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
7897
 
7898
  pat = PATTERN (insn);
7899
  if (GET_CODE (pat) == COND_EXEC)
7900
    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7901
                                   check_pat);
7902
 
7903
  return check_pat;
7904
}
7905
 
7906
/* Return nonzero, if X is branchy recovery check.  */
7907
static int
7908
ia64_spec_check_p (rtx x)
7909
{
7910
  x = PATTERN (x);
7911
  if (GET_CODE (x) == COND_EXEC)
7912
    x = COND_EXEC_CODE (x);
7913
  if (GET_CODE (x) == SET)
7914
    return ia64_spec_check_src_p (SET_SRC (x));
7915
  return 0;
7916
}
7917
 
7918
/* Return nonzero, if SRC belongs to recovery check.  */
7919
static int
7920
ia64_spec_check_src_p (rtx src)
7921
{
7922
  if (GET_CODE (src) == IF_THEN_ELSE)
7923
    {
7924
      rtx t;
7925
 
7926
      t = XEXP (src, 0);
7927
      if (GET_CODE (t) == NE)
7928
        {
7929
          t = XEXP (t, 0);
7930
 
7931
          if (GET_CODE (t) == UNSPEC)
7932
            {
7933
              int code;
7934
 
7935
              code = XINT (t, 1);
7936
 
7937
              if (code == UNSPEC_LDCCLR
7938
                  || code == UNSPEC_LDCNC
7939
                  || code == UNSPEC_CHKACLR
7940
                  || code == UNSPEC_CHKANC
7941
                  || code == UNSPEC_CHKS)
7942
                {
7943
                  gcc_assert (code != 0);
7944
                  return code;
7945
                }
7946
            }
7947
        }
7948
    }
7949
  return 0;
7950
}
7951
 
7952
 
7953
/* The following page contains abstract data `bundle states' which are
7954
   used for bundling insns (inserting nops and template generation).  */
7955
 
7956
/* The following describes state of insn bundling.  */
7957
 
7958
struct bundle_state
7959
{
7960
  /* Unique bundle state number to identify them in the debugging
7961
     output  */
7962
  int unique_num;
7963
  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7964
  /* number nops before and after the insn  */
7965
  short before_nops_num, after_nops_num;
7966
  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7967
                   insn */
7968
  int cost;     /* cost of the state in cycles */
7969
  int accumulated_insns_num; /* number of all previous insns including
7970
                                nops.  L is considered as 2 insns */
7971
  int branch_deviation; /* deviation of previous branches from 3rd slots  */
7972
  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
7973
  struct bundle_state *next;  /* next state with the same insn_num  */
7974
  struct bundle_state *originator; /* originator (previous insn state)  */
7975
  /* All bundle states are in the following chain.  */
7976
  struct bundle_state *allocated_states_chain;
7977
  /* The DFA State after issuing the insn and the nops.  */
7978
  state_t dfa_state;
7979
};
7980
 
7981
/* The following is map insn number to the corresponding bundle state.  */
7982
 
7983
static struct bundle_state **index_to_bundle_states;
7984
 
7985
/* The unique number of next bundle state.  */
7986
 
7987
static int bundle_states_num;
7988
 
7989
/* All allocated bundle states are in the following chain.  */
7990
 
7991
static struct bundle_state *allocated_bundle_states_chain;
7992
 
7993
/* All allocated but not used bundle states are in the following
7994
   chain.  */
7995
 
7996
static struct bundle_state *free_bundle_state_chain;
7997
 
7998
 
7999
/* The following function returns a free bundle state.  */
8000
 
8001
static struct bundle_state *
8002
get_free_bundle_state (void)
8003
{
8004
  struct bundle_state *result;
8005
 
8006
  if (free_bundle_state_chain != NULL)
8007
    {
8008
      result = free_bundle_state_chain;
8009
      free_bundle_state_chain = result->next;
8010
    }
8011
  else
8012
    {
8013
      result = XNEW (struct bundle_state);
8014
      result->dfa_state = xmalloc (dfa_state_size);
8015
      result->allocated_states_chain = allocated_bundle_states_chain;
8016
      allocated_bundle_states_chain = result;
8017
    }
8018
  result->unique_num = bundle_states_num++;
8019
  return result;
8020
 
8021
}
8022
 
8023
/* The following function frees given bundle state.  */
8024
 
8025
static void
8026
free_bundle_state (struct bundle_state *state)
8027
{
8028
  state->next = free_bundle_state_chain;
8029
  free_bundle_state_chain = state;
8030
}
8031
 
8032
/* Start work with abstract data `bundle states'.  */
8033
 
8034
static void
8035
initiate_bundle_states (void)
8036
{
8037
  bundle_states_num = 0;
8038
  free_bundle_state_chain = NULL;
8039
  allocated_bundle_states_chain = NULL;
8040
}
8041
 
8042
/* Finish work with abstract data `bundle states'.  */
8043
 
8044
static void
8045
finish_bundle_states (void)
8046
{
8047
  struct bundle_state *curr_state, *next_state;
8048
 
8049
  for (curr_state = allocated_bundle_states_chain;
8050
       curr_state != NULL;
8051
       curr_state = next_state)
8052
    {
8053
      next_state = curr_state->allocated_states_chain;
8054
      free (curr_state->dfa_state);
8055
      free (curr_state);
8056
    }
8057
}
8058
 
8059
/* Hash table of the bundle states.  The key is dfa_state and insn_num
8060
   of the bundle states.  */
8061
 
8062
static htab_t bundle_state_table;
8063
 
8064
/* The function returns hash of BUNDLE_STATE.  */
8065
 
8066
static unsigned
8067
bundle_state_hash (const void *bundle_state)
8068
{
8069
  const struct bundle_state *const state
8070
    = (const struct bundle_state *) bundle_state;
8071
  unsigned result, i;
8072
 
8073
  for (result = i = 0; i < dfa_state_size; i++)
8074
    result += (((unsigned char *) state->dfa_state) [i]
8075
               << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8076
  return result + state->insn_num;
8077
}
8078
 
8079
/* The function returns nonzero if the bundle state keys are equal.  */
8080
 
8081
static int
8082
bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8083
{
8084
  const struct bundle_state *const state1
8085
    = (const struct bundle_state *) bundle_state_1;
8086
  const struct bundle_state *const state2
8087
    = (const struct bundle_state *) bundle_state_2;
8088
 
8089
  return (state1->insn_num == state2->insn_num
8090
          && memcmp (state1->dfa_state, state2->dfa_state,
8091
                     dfa_state_size) == 0);
8092
}
8093
 
8094
/* The function inserts the BUNDLE_STATE into the hash table.  The
8095
   function returns nonzero if the bundle has been inserted into the
8096
   table.  The table contains the best bundle state with given key.  */
8097
 
8098
static int
8099
insert_bundle_state (struct bundle_state *bundle_state)
8100
{
8101
  void **entry_ptr;
8102
 
8103
  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8104
  if (*entry_ptr == NULL)
8105
    {
8106
      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8107
      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8108
      *entry_ptr = (void *) bundle_state;
8109
      return TRUE;
8110
    }
8111
  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8112
           || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8113
               && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8114
                   > bundle_state->accumulated_insns_num
8115
                   || (((struct bundle_state *)
8116
                        *entry_ptr)->accumulated_insns_num
8117
                       == bundle_state->accumulated_insns_num
8118
                       && (((struct bundle_state *)
8119
                            *entry_ptr)->branch_deviation
8120
                           > bundle_state->branch_deviation
8121
                           || (((struct bundle_state *)
8122
                                *entry_ptr)->branch_deviation
8123
                               == bundle_state->branch_deviation
8124
                               && ((struct bundle_state *)
8125
                                   *entry_ptr)->middle_bundle_stops
8126
                               > bundle_state->middle_bundle_stops))))))
8127
 
8128
    {
8129
      struct bundle_state temp;
8130
 
8131
      temp = *(struct bundle_state *) *entry_ptr;
8132
      *(struct bundle_state *) *entry_ptr = *bundle_state;
8133
      ((struct bundle_state *) *entry_ptr)->next = temp.next;
8134
      *bundle_state = temp;
8135
    }
8136
  return FALSE;
8137
}
8138
 
8139
/* Start work with the hash table.  */
8140
 
8141
static void
8142
initiate_bundle_state_table (void)
8143
{
8144
  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8145
                                    (htab_del) 0);
8146
}
8147
 
8148
/* Finish work with the hash table.  */
8149
 
8150
static void
8151
finish_bundle_state_table (void)
8152
{
8153
  htab_delete (bundle_state_table);
8154
}
8155
 
8156
 
8157
 
8158
/* The following variable is a insn `nop' used to check bundle states
8159
   with different number of inserted nops.  */
8160
 
8161
static rtx ia64_nop;
8162
 
8163
/* The following function tries to issue NOPS_NUM nops for the current
8164
   state without advancing processor cycle.  If it failed, the
8165
   function returns FALSE and frees the current state.  */
8166
 
8167
static int
8168
try_issue_nops (struct bundle_state *curr_state, int nops_num)
8169
{
8170
  int i;
8171
 
8172
  for (i = 0; i < nops_num; i++)
8173
    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8174
      {
8175
        free_bundle_state (curr_state);
8176
        return FALSE;
8177
      }
8178
  return TRUE;
8179
}
8180
 
8181
/* The following function tries to issue INSN for the current
8182
   state without advancing processor cycle.  If it failed, the
8183
   function returns FALSE and frees the current state.  */
8184
 
8185
static int
8186
try_issue_insn (struct bundle_state *curr_state, rtx insn)
8187
{
8188
  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8189
    {
8190
      free_bundle_state (curr_state);
8191
      return FALSE;
8192
    }
8193
  return TRUE;
8194
}
8195
 
8196
/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8197
   starting with ORIGINATOR without advancing processor cycle.  If
8198
   TRY_BUNDLE_END_P is TRUE, the function also/only (if
8199
   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8200
   If it was successful, the function creates new bundle state and
8201
   insert into the hash table and into `index_to_bundle_states'.  */
8202
 
8203
static void
8204
issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8205
                     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8206
{
8207
  struct bundle_state *curr_state;
8208
 
8209
  curr_state = get_free_bundle_state ();
8210
  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8211
  curr_state->insn = insn;
8212
  curr_state->insn_num = originator->insn_num + 1;
8213
  curr_state->cost = originator->cost;
8214
  curr_state->originator = originator;
8215
  curr_state->before_nops_num = before_nops_num;
8216
  curr_state->after_nops_num = 0;
8217
  curr_state->accumulated_insns_num
8218
    = originator->accumulated_insns_num + before_nops_num;
8219
  curr_state->branch_deviation = originator->branch_deviation;
8220
  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8221
  gcc_assert (insn);
8222
  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8223
    {
8224
      gcc_assert (GET_MODE (insn) != TImode);
8225
      if (!try_issue_nops (curr_state, before_nops_num))
8226
        return;
8227
      if (!try_issue_insn (curr_state, insn))
8228
        return;
8229
      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8230
      if (curr_state->accumulated_insns_num % 3 != 0)
8231
        curr_state->middle_bundle_stops++;
8232
      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8233
          && curr_state->accumulated_insns_num % 3 != 0)
8234
        {
8235
          free_bundle_state (curr_state);
8236
          return;
8237
        }
8238
    }
8239
  else if (GET_MODE (insn) != TImode)
8240
    {
8241
      if (!try_issue_nops (curr_state, before_nops_num))
8242
        return;
8243
      if (!try_issue_insn (curr_state, insn))
8244
        return;
8245
      curr_state->accumulated_insns_num++;
8246
      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8247
                  && asm_noperands (PATTERN (insn)) < 0);
8248
 
8249
      if (ia64_safe_type (insn) == TYPE_L)
8250
        curr_state->accumulated_insns_num++;
8251
    }
8252
  else
8253
    {
8254
      /* If this is an insn that must be first in a group, then don't allow
8255
         nops to be emitted before it.  Currently, alloc is the only such
8256
         supported instruction.  */
8257
      /* ??? The bundling automatons should handle this for us, but they do
8258
         not yet have support for the first_insn attribute.  */
8259
      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8260
        {
8261
          free_bundle_state (curr_state);
8262
          return;
8263
        }
8264
 
8265
      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8266
      state_transition (curr_state->dfa_state, NULL);
8267
      curr_state->cost++;
8268
      if (!try_issue_nops (curr_state, before_nops_num))
8269
        return;
8270
      if (!try_issue_insn (curr_state, insn))
8271
        return;
8272
      curr_state->accumulated_insns_num++;
8273
      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8274
          || asm_noperands (PATTERN (insn)) >= 0)
8275
        {
8276
          /* Finish bundle containing asm insn.  */
8277
          curr_state->after_nops_num
8278
            = 3 - curr_state->accumulated_insns_num % 3;
8279
          curr_state->accumulated_insns_num
8280
            += 3 - curr_state->accumulated_insns_num % 3;
8281
        }
8282
      else if (ia64_safe_type (insn) == TYPE_L)
8283
        curr_state->accumulated_insns_num++;
8284
    }
8285
  if (ia64_safe_type (insn) == TYPE_B)
8286
    curr_state->branch_deviation
8287
      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8288
  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8289
    {
8290
      if (!only_bundle_end_p && insert_bundle_state (curr_state))
8291
        {
8292
          state_t dfa_state;
8293
          struct bundle_state *curr_state1;
8294
          struct bundle_state *allocated_states_chain;
8295
 
8296
          curr_state1 = get_free_bundle_state ();
8297
          dfa_state = curr_state1->dfa_state;
8298
          allocated_states_chain = curr_state1->allocated_states_chain;
8299
          *curr_state1 = *curr_state;
8300
          curr_state1->dfa_state = dfa_state;
8301
          curr_state1->allocated_states_chain = allocated_states_chain;
8302
          memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8303
                  dfa_state_size);
8304
          curr_state = curr_state1;
8305
        }
8306
      if (!try_issue_nops (curr_state,
8307
                           3 - curr_state->accumulated_insns_num % 3))
8308
        return;
8309
      curr_state->after_nops_num
8310
        = 3 - curr_state->accumulated_insns_num % 3;
8311
      curr_state->accumulated_insns_num
8312
        += 3 - curr_state->accumulated_insns_num % 3;
8313
    }
8314
  if (!insert_bundle_state (curr_state))
8315
    free_bundle_state (curr_state);
8316
  return;
8317
}
8318
 
8319
/* The following function returns position in the two window bundle
8320
   for given STATE.  */
8321
 
8322
static int
8323
get_max_pos (state_t state)
8324
{
8325
  if (cpu_unit_reservation_p (state, pos_6))
8326
    return 6;
8327
  else if (cpu_unit_reservation_p (state, pos_5))
8328
    return 5;
8329
  else if (cpu_unit_reservation_p (state, pos_4))
8330
    return 4;
8331
  else if (cpu_unit_reservation_p (state, pos_3))
8332
    return 3;
8333
  else if (cpu_unit_reservation_p (state, pos_2))
8334
    return 2;
8335
  else if (cpu_unit_reservation_p (state, pos_1))
8336
    return 1;
8337
  else
8338
    return 0;
8339
}
8340
 
8341
/* The function returns code of a possible template for given position
8342
   and state.  The function should be called only with 2 values of
8343
   position equal to 3 or 6.  We avoid generating F NOPs by putting
8344
   templates containing F insns at the end of the template search
8345
   because undocumented anomaly in McKinley derived cores which can
8346
   cause stalls if an F-unit insn (including a NOP) is issued within a
8347
   six-cycle window after reading certain application registers (such
8348
   as ar.bsp).  Furthermore, power-considerations also argue against
8349
   the use of F-unit instructions unless they're really needed.  */
8350
 
8351
static int
8352
get_template (state_t state, int pos)
8353
{
8354
  switch (pos)
8355
    {
8356
    case 3:
8357
      if (cpu_unit_reservation_p (state, _0mmi_))
8358
        return 1;
8359
      else if (cpu_unit_reservation_p (state, _0mii_))
8360
        return 0;
8361
      else if (cpu_unit_reservation_p (state, _0mmb_))
8362
        return 7;
8363
      else if (cpu_unit_reservation_p (state, _0mib_))
8364
        return 6;
8365
      else if (cpu_unit_reservation_p (state, _0mbb_))
8366
        return 5;
8367
      else if (cpu_unit_reservation_p (state, _0bbb_))
8368
        return 4;
8369
      else if (cpu_unit_reservation_p (state, _0mmf_))
8370
        return 3;
8371
      else if (cpu_unit_reservation_p (state, _0mfi_))
8372
        return 2;
8373
      else if (cpu_unit_reservation_p (state, _0mfb_))
8374
        return 8;
8375
      else if (cpu_unit_reservation_p (state, _0mlx_))
8376
        return 9;
8377
      else
8378
        gcc_unreachable ();
8379
    case 6:
8380
      if (cpu_unit_reservation_p (state, _1mmi_))
8381
        return 1;
8382
      else if (cpu_unit_reservation_p (state, _1mii_))
8383
        return 0;
8384
      else if (cpu_unit_reservation_p (state, _1mmb_))
8385
        return 7;
8386
      else if (cpu_unit_reservation_p (state, _1mib_))
8387
        return 6;
8388
      else if (cpu_unit_reservation_p (state, _1mbb_))
8389
        return 5;
8390
      else if (cpu_unit_reservation_p (state, _1bbb_))
8391
        return 4;
8392
      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8393
        return 3;
8394
      else if (cpu_unit_reservation_p (state, _1mfi_))
8395
        return 2;
8396
      else if (cpu_unit_reservation_p (state, _1mfb_))
8397
        return 8;
8398
      else if (cpu_unit_reservation_p (state, _1mlx_))
8399
        return 9;
8400
      else
8401
        gcc_unreachable ();
8402
    default:
8403
      gcc_unreachable ();
8404
    }
8405
}
8406
 
8407
/* True when INSN is important for bundling.  */
8408
static bool
8409
important_for_bundling_p (rtx insn)
8410
{
8411
  return (INSN_P (insn)
8412
          && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8413
          && GET_CODE (PATTERN (insn)) != USE
8414
          && GET_CODE (PATTERN (insn)) != CLOBBER);
8415
}
8416
 
8417
/* The following function returns an insn important for insn bundling
8418
   followed by INSN and before TAIL.  */
8419
 
8420
static rtx
8421
get_next_important_insn (rtx insn, rtx tail)
8422
{
8423
  for (; insn && insn != tail; insn = NEXT_INSN (insn))
8424
    if (important_for_bundling_p (insn))
8425
      return insn;
8426
  return NULL_RTX;
8427
}
8428
 
8429
/* Add a bundle selector TEMPLATE0 before INSN.  */
8430
 
8431
static void
8432
ia64_add_bundle_selector_before (int template0, rtx insn)
8433
{
8434
  rtx b = gen_bundle_selector (GEN_INT (template0));
8435
 
8436
  ia64_emit_insn_before (b, insn);
8437
#if NR_BUNDLES == 10
8438
  if ((template0 == 4 || template0 == 5)
8439
      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8440
    {
8441
      int i;
8442
      rtx note = NULL_RTX;
8443
 
8444
      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8445
         first or second slot.  If it is and has REG_EH_NOTE set, copy it
8446
         to following nops, as br.call sets rp to the address of following
8447
         bundle and therefore an EH region end must be on a bundle
8448
         boundary.  */
8449
      insn = PREV_INSN (insn);
8450
      for (i = 0; i < 3; i++)
8451
        {
8452
          do
8453
            insn = next_active_insn (insn);
8454
          while (GET_CODE (insn) == INSN
8455
                 && get_attr_empty (insn) == EMPTY_YES);
8456
          if (GET_CODE (insn) == CALL_INSN)
8457
            note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8458
          else if (note)
8459
            {
8460
              int code;
8461
 
8462
              gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8463
                          || code == CODE_FOR_nop_b);
8464
              if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8465
                note = NULL_RTX;
8466
              else
8467
                add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8468
            }
8469
        }
8470
    }
8471
#endif
8472
}
8473
 
8474
/* The following function does insn bundling.  Bundling means
8475
   inserting templates and nop insns to fit insn groups into permitted
8476
   templates.  Instruction scheduling uses NDFA (non-deterministic
8477
   finite automata) encoding informations about the templates and the
8478
   inserted nops.  Nondeterminism of the automata permits follows
8479
   all possible insn sequences very fast.
8480
 
8481
   Unfortunately it is not possible to get information about inserting
8482
   nop insns and used templates from the automata states.  The
8483
   automata only says that we can issue an insn possibly inserting
8484
   some nops before it and using some template.  Therefore insn
8485
   bundling in this function is implemented by using DFA
8486
   (deterministic finite automata).  We follow all possible insn
8487
   sequences by inserting 0-2 nops (that is what the NDFA describe for
8488
   insn scheduling) before/after each insn being bundled.  We know the
8489
   start of simulated processor cycle from insn scheduling (insn
8490
   starting a new cycle has TImode).
8491
 
8492
   Simple implementation of insn bundling would create enormous
8493
   number of possible insn sequences satisfying information about new
8494
   cycle ticks taken from the insn scheduling.  To make the algorithm
8495
   practical we use dynamic programming.  Each decision (about
8496
   inserting nops and implicitly about previous decisions) is described
8497
   by structure bundle_state (see above).  If we generate the same
8498
   bundle state (key is automaton state after issuing the insns and
8499
   nops for it), we reuse already generated one.  As consequence we
8500
   reject some decisions which cannot improve the solution and
8501
   reduce memory for the algorithm.
8502
 
8503
   When we reach the end of EBB (extended basic block), we choose the
8504
   best sequence and then, moving back in EBB, insert templates for
8505
   the best alternative.  The templates are taken from querying
8506
   automaton state for each insn in chosen bundle states.
8507
 
8508
   So the algorithm makes two (forward and backward) passes through
8509
   EBB.  */
8510
 
8511
static void
8512
bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8513
{
8514
  struct bundle_state *curr_state, *next_state, *best_state;
8515
  rtx insn, next_insn;
8516
  int insn_num;
8517
  int i, bundle_end_p, only_bundle_end_p, asm_p;
8518
  int pos = 0, max_pos, template0, template1;
8519
  rtx b;
8520
  rtx nop;
8521
  enum attr_type type;
8522
 
8523
  insn_num = 0;
8524
  /* Count insns in the EBB.  */
8525
  for (insn = NEXT_INSN (prev_head_insn);
8526
       insn && insn != tail;
8527
       insn = NEXT_INSN (insn))
8528
    if (INSN_P (insn))
8529
      insn_num++;
8530
  if (insn_num == 0)
8531
    return;
8532
  bundling_p = 1;
8533
  dfa_clean_insn_cache ();
8534
  initiate_bundle_state_table ();
8535
  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8536
  /* First (forward) pass -- generation of bundle states.  */
8537
  curr_state = get_free_bundle_state ();
8538
  curr_state->insn = NULL;
8539
  curr_state->before_nops_num = 0;
8540
  curr_state->after_nops_num = 0;
8541
  curr_state->insn_num = 0;
8542
  curr_state->cost = 0;
8543
  curr_state->accumulated_insns_num = 0;
8544
  curr_state->branch_deviation = 0;
8545
  curr_state->middle_bundle_stops = 0;
8546
  curr_state->next = NULL;
8547
  curr_state->originator = NULL;
8548
  state_reset (curr_state->dfa_state);
8549
  index_to_bundle_states [0] = curr_state;
8550
  insn_num = 0;
8551
  /* Shift cycle mark if it is put on insn which could be ignored.  */
8552
  for (insn = NEXT_INSN (prev_head_insn);
8553
       insn != tail;
8554
       insn = NEXT_INSN (insn))
8555
    if (INSN_P (insn)
8556
        && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8557
            || GET_CODE (PATTERN (insn)) == USE
8558
            || GET_CODE (PATTERN (insn)) == CLOBBER)
8559
        && GET_MODE (insn) == TImode)
8560
      {
8561
        PUT_MODE (insn, VOIDmode);
8562
        for (next_insn = NEXT_INSN (insn);
8563
             next_insn != tail;
8564
             next_insn = NEXT_INSN (next_insn))
8565
          if (INSN_P (next_insn)
8566
              && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8567
              && GET_CODE (PATTERN (next_insn)) != USE
8568
              && GET_CODE (PATTERN (next_insn)) != CLOBBER
8569
              && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8570
            {
8571
              PUT_MODE (next_insn, TImode);
8572
              break;
8573
            }
8574
      }
8575
  /* Forward pass: generation of bundle states.  */
8576
  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8577
       insn != NULL_RTX;
8578
       insn = next_insn)
8579
    {
8580
      gcc_assert (INSN_P (insn)
8581
                  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8582
                  && GET_CODE (PATTERN (insn)) != USE
8583
                  && GET_CODE (PATTERN (insn)) != CLOBBER);
8584
      type = ia64_safe_type (insn);
8585
      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8586
      insn_num++;
8587
      index_to_bundle_states [insn_num] = NULL;
8588
      for (curr_state = index_to_bundle_states [insn_num - 1];
8589
           curr_state != NULL;
8590
           curr_state = next_state)
8591
        {
8592
          pos = curr_state->accumulated_insns_num % 3;
8593
          next_state = curr_state->next;
8594
          /* We must fill up the current bundle in order to start a
8595
             subsequent asm insn in a new bundle.  Asm insn is always
8596
             placed in a separate bundle.  */
8597
          only_bundle_end_p
8598
            = (next_insn != NULL_RTX
8599
               && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8600
               && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8601
          /* We may fill up the current bundle if it is the cycle end
8602
             without a group barrier.  */
8603
          bundle_end_p
8604
            = (only_bundle_end_p || next_insn == NULL_RTX
8605
               || (GET_MODE (next_insn) == TImode
8606
                   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8607
          if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8608
              || type == TYPE_S)
8609
            issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8610
                                 only_bundle_end_p);
8611
          issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8612
                               only_bundle_end_p);
8613
          issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8614
                               only_bundle_end_p);
8615
        }
8616
      gcc_assert (index_to_bundle_states [insn_num]);
8617
      for (curr_state = index_to_bundle_states [insn_num];
8618
           curr_state != NULL;
8619
           curr_state = curr_state->next)
8620
        if (verbose >= 2 && dump)
8621
          {
8622
            /* This structure is taken from generated code of the
8623
               pipeline hazard recognizer (see file insn-attrtab.c).
8624
               Please don't forget to change the structure if a new
8625
               automaton is added to .md file.  */
8626
            struct DFA_chip
8627
            {
8628
              unsigned short one_automaton_state;
8629
              unsigned short oneb_automaton_state;
8630
              unsigned short two_automaton_state;
8631
              unsigned short twob_automaton_state;
8632
            };
8633
 
8634
            fprintf
8635
              (dump,
8636
               "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8637
               curr_state->unique_num,
8638
               (curr_state->originator == NULL
8639
                ? -1 : curr_state->originator->unique_num),
8640
               curr_state->cost,
8641
               curr_state->before_nops_num, curr_state->after_nops_num,
8642
               curr_state->accumulated_insns_num, curr_state->branch_deviation,
8643
               curr_state->middle_bundle_stops,
8644
               ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8645
               INSN_UID (insn));
8646
          }
8647
    }
8648
 
8649
  /* We should find a solution because the 2nd insn scheduling has
8650
     found one.  */
8651
  gcc_assert (index_to_bundle_states [insn_num]);
8652
  /* Find a state corresponding to the best insn sequence.  */
8653
  best_state = NULL;
8654
  for (curr_state = index_to_bundle_states [insn_num];
8655
       curr_state != NULL;
8656
       curr_state = curr_state->next)
8657
    /* We are just looking at the states with fully filled up last
8658
       bundle.  The first we prefer insn sequences with minimal cost
8659
       then with minimal inserted nops and finally with branch insns
8660
       placed in the 3rd slots.  */
8661
    if (curr_state->accumulated_insns_num % 3 == 0
8662
        && (best_state == NULL || best_state->cost > curr_state->cost
8663
            || (best_state->cost == curr_state->cost
8664
                && (curr_state->accumulated_insns_num
8665
                    < best_state->accumulated_insns_num
8666
                    || (curr_state->accumulated_insns_num
8667
                        == best_state->accumulated_insns_num
8668
                        && (curr_state->branch_deviation
8669
                            < best_state->branch_deviation
8670
                            || (curr_state->branch_deviation
8671
                                == best_state->branch_deviation
8672
                                && curr_state->middle_bundle_stops
8673
                                < best_state->middle_bundle_stops)))))))
8674
      best_state = curr_state;
8675
  /* Second (backward) pass: adding nops and templates.  */
8676
  gcc_assert (best_state);
8677
  insn_num = best_state->before_nops_num;
8678
  template0 = template1 = -1;
8679
  for (curr_state = best_state;
8680
       curr_state->originator != NULL;
8681
       curr_state = curr_state->originator)
8682
    {
8683
      insn = curr_state->insn;
8684
      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8685
               || asm_noperands (PATTERN (insn)) >= 0);
8686
      insn_num++;
8687
      if (verbose >= 2 && dump)
8688
        {
8689
          struct DFA_chip
8690
          {
8691
            unsigned short one_automaton_state;
8692
            unsigned short oneb_automaton_state;
8693
            unsigned short two_automaton_state;
8694
            unsigned short twob_automaton_state;
8695
          };
8696
 
8697
          fprintf
8698
            (dump,
8699
             "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8700
             curr_state->unique_num,
8701
             (curr_state->originator == NULL
8702
              ? -1 : curr_state->originator->unique_num),
8703
             curr_state->cost,
8704
             curr_state->before_nops_num, curr_state->after_nops_num,
8705
             curr_state->accumulated_insns_num, curr_state->branch_deviation,
8706
             curr_state->middle_bundle_stops,
8707
             ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8708
             INSN_UID (insn));
8709
        }
8710
      /* Find the position in the current bundle window.  The window can
8711
         contain at most two bundles.  Two bundle window means that
8712
         the processor will make two bundle rotation.  */
8713
      max_pos = get_max_pos (curr_state->dfa_state);
8714
      if (max_pos == 6
8715
          /* The following (negative template number) means that the
8716
             processor did one bundle rotation.  */
8717
          || (max_pos == 3 && template0 < 0))
8718
        {
8719
          /* We are at the end of the window -- find template(s) for
8720
             its bundle(s).  */
8721
          pos = max_pos;
8722
          if (max_pos == 3)
8723
            template0 = get_template (curr_state->dfa_state, 3);
8724
          else
8725
            {
8726
              template1 = get_template (curr_state->dfa_state, 3);
8727
              template0 = get_template (curr_state->dfa_state, 6);
8728
            }
8729
        }
8730
      if (max_pos > 3 && template1 < 0)
8731
        /* It may happen when we have the stop inside a bundle.  */
8732
        {
8733
          gcc_assert (pos <= 3);
8734
          template1 = get_template (curr_state->dfa_state, 3);
8735
          pos += 3;
8736
        }
8737
      if (!asm_p)
8738
        /* Emit nops after the current insn.  */
8739
        for (i = 0; i < curr_state->after_nops_num; i++)
8740
          {
8741
            nop = gen_nop ();
8742
            emit_insn_after (nop, insn);
8743
            pos--;
8744
            gcc_assert (pos >= 0);
8745
            if (pos % 3 == 0)
8746
              {
8747
                /* We are at the start of a bundle: emit the template
8748
                   (it should be defined).  */
8749
                gcc_assert (template0 >= 0);
8750
                ia64_add_bundle_selector_before (template0, nop);
8751
                /* If we have two bundle window, we make one bundle
8752
                   rotation.  Otherwise template0 will be undefined
8753
                   (negative value).  */
8754
                template0 = template1;
8755
                template1 = -1;
8756
              }
8757
          }
8758
      /* Move the position backward in the window.  Group barrier has
8759
         no slot.  Asm insn takes all bundle.  */
8760
      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8761
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
8762
          && asm_noperands (PATTERN (insn)) < 0)
8763
        pos--;
8764
      /* Long insn takes 2 slots.  */
8765
      if (ia64_safe_type (insn) == TYPE_L)
8766
        pos--;
8767
      gcc_assert (pos >= 0);
8768
      if (pos % 3 == 0
8769
          && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8770
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
8771
          && asm_noperands (PATTERN (insn)) < 0)
8772
        {
8773
          /* The current insn is at the bundle start: emit the
8774
             template.  */
8775
          gcc_assert (template0 >= 0);
8776
          ia64_add_bundle_selector_before (template0, insn);
8777
          b = PREV_INSN (insn);
8778
          insn = b;
8779
          /* See comment above in analogous place for emitting nops
8780
             after the insn.  */
8781
          template0 = template1;
8782
          template1 = -1;
8783
        }
8784
      /* Emit nops after the current insn.  */
8785
      for (i = 0; i < curr_state->before_nops_num; i++)
8786
        {
8787
          nop = gen_nop ();
8788
          ia64_emit_insn_before (nop, insn);
8789
          nop = PREV_INSN (insn);
8790
          insn = nop;
8791
          pos--;
8792
          gcc_assert (pos >= 0);
8793
          if (pos % 3 == 0)
8794
            {
8795
              /* See comment above in analogous place for emitting nops
8796
                 after the insn.  */
8797
              gcc_assert (template0 >= 0);
8798
              ia64_add_bundle_selector_before (template0, insn);
8799
              b = PREV_INSN (insn);
8800
              insn = b;
8801
              template0 = template1;
8802
              template1 = -1;
8803
            }
8804
        }
8805
    }
8806
 
8807
#ifdef ENABLE_CHECKING
8808
  {
8809
    /* Assert right calculation of middle_bundle_stops.  */
8810
    int num = best_state->middle_bundle_stops;
8811
    bool start_bundle = true, end_bundle = false;
8812
 
8813
    for (insn = NEXT_INSN (prev_head_insn);
8814
         insn && insn != tail;
8815
         insn = NEXT_INSN (insn))
8816
      {
8817
        if (!INSN_P (insn))
8818
          continue;
8819
        if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8820
          start_bundle = true;
8821
        else
8822
          {
8823
            rtx next_insn;
8824
 
8825
            for (next_insn = NEXT_INSN (insn);
8826
                 next_insn && next_insn != tail;
8827
                 next_insn = NEXT_INSN (next_insn))
8828
              if (INSN_P (next_insn)
8829
                  && (ia64_safe_itanium_class (next_insn)
8830
                      != ITANIUM_CLASS_IGNORE
8831
                      || recog_memoized (next_insn)
8832
                      == CODE_FOR_bundle_selector)
8833
                  && GET_CODE (PATTERN (next_insn)) != USE
8834
                  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8835
                break;
8836
 
8837
            end_bundle = next_insn == NULL_RTX
8838
             || next_insn == tail
8839
             || (INSN_P (next_insn)
8840
                 && recog_memoized (next_insn)
8841
                 == CODE_FOR_bundle_selector);
8842
            if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8843
                && !start_bundle && !end_bundle
8844
                && next_insn
8845
                && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8846
                && asm_noperands (PATTERN (next_insn)) < 0)
8847
              num--;
8848
 
8849
            start_bundle = false;
8850
          }
8851
      }
8852
 
8853
    gcc_assert (num == 0);
8854
  }
8855
#endif
8856
 
8857
  free (index_to_bundle_states);
8858
  finish_bundle_state_table ();
8859
  bundling_p = 0;
8860
  dfa_clean_insn_cache ();
8861
}
8862
 
8863
/* The following function is called at the end of scheduling BB or
8864
   EBB.  After reload, it inserts stop bits and does insn bundling.  */
8865
 
8866
static void
8867
ia64_sched_finish (FILE *dump, int sched_verbose)
8868
{
8869
  if (sched_verbose)
8870
    fprintf (dump, "// Finishing schedule.\n");
8871
  if (!reload_completed)
8872
    return;
8873
  if (reload_completed)
8874
    {
8875
      final_emit_insn_group_barriers (dump);
8876
      bundling (dump, sched_verbose, current_sched_info->prev_head,
8877
                current_sched_info->next_tail);
8878
      if (sched_verbose && dump)
8879
        fprintf (dump, "//    finishing %d-%d\n",
8880
                 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8881
                 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8882
 
8883
      return;
8884
    }
8885
}
8886
 
8887
/* The following function inserts stop bits in scheduled BB or EBB.  */
8888
 
8889
static void
8890
final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8891
{
8892
  rtx insn;
8893
  int need_barrier_p = 0;
8894
  int seen_good_insn = 0;
8895
  rtx prev_insn = NULL_RTX;
8896
 
8897
  init_insn_group_barriers ();
8898
 
8899
  for (insn = NEXT_INSN (current_sched_info->prev_head);
8900
       insn != current_sched_info->next_tail;
8901
       insn = NEXT_INSN (insn))
8902
    {
8903
      if (GET_CODE (insn) == BARRIER)
8904
        {
8905
          rtx last = prev_active_insn (insn);
8906
 
8907
          if (! last)
8908
            continue;
8909
          if (GET_CODE (last) == JUMP_INSN
8910
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8911
            last = prev_active_insn (last);
8912
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8913
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8914
 
8915
          init_insn_group_barriers ();
8916
          seen_good_insn = 0;
8917
          need_barrier_p = 0;
8918
          prev_insn = NULL_RTX;
8919
        }
8920
      else if (NONDEBUG_INSN_P (insn))
8921
        {
8922
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8923
            {
8924
              init_insn_group_barriers ();
8925
              seen_good_insn = 0;
8926
              need_barrier_p = 0;
8927
              prev_insn = NULL_RTX;
8928
            }
8929
          else if (need_barrier_p || group_barrier_needed (insn)
8930
                   || (mflag_sched_stop_bits_after_every_cycle
8931
                       && GET_MODE (insn) == TImode
8932
                       && seen_good_insn))
8933
            {
8934
              if (TARGET_EARLY_STOP_BITS)
8935
                {
8936
                  rtx last;
8937
 
8938
                  for (last = insn;
8939
                       last != current_sched_info->prev_head;
8940
                       last = PREV_INSN (last))
8941
                    if (INSN_P (last) && GET_MODE (last) == TImode
8942
                        && stops_p [INSN_UID (last)])
8943
                      break;
8944
                  if (last == current_sched_info->prev_head)
8945
                    last = insn;
8946
                  last = prev_active_insn (last);
8947
                  if (last
8948
                      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8949
                    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8950
                                     last);
8951
                  init_insn_group_barriers ();
8952
                  for (last = NEXT_INSN (last);
8953
                       last != insn;
8954
                       last = NEXT_INSN (last))
8955
                    if (INSN_P (last))
8956
                      {
8957
                        group_barrier_needed (last);
8958
                        if (recog_memoized (last) >= 0
8959
                            && important_for_bundling_p (last))
8960
                          seen_good_insn = 1;
8961
                      }
8962
                }
8963
              else
8964
                {
8965
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8966
                                    insn);
8967
                  init_insn_group_barriers ();
8968
                  seen_good_insn = 0;
8969
                }
8970
              group_barrier_needed (insn);
8971
              if (recog_memoized (insn) >= 0
8972
                  && important_for_bundling_p (insn))
8973
                seen_good_insn = 1;
8974
              prev_insn = NULL_RTX;
8975
            }
8976
          else if (recog_memoized (insn) >= 0
8977
                   && important_for_bundling_p (insn))
8978
            {
8979
              prev_insn = insn;
8980
              seen_good_insn = 1;
8981
            }
8982
          need_barrier_p = (GET_CODE (insn) == CALL_INSN
8983
                            || GET_CODE (PATTERN (insn)) == ASM_INPUT
8984
                            || asm_noperands (PATTERN (insn)) >= 0);
8985
        }
8986
    }
8987
}
8988
 
8989
 
8990
 
8991
/* If the following function returns TRUE, we will use the DFA
8992
   insn scheduler.  */
8993
 
8994
static int
8995
ia64_first_cycle_multipass_dfa_lookahead (void)
8996
{
8997
  return (reload_completed ? 6 : 4);
8998
}
8999
 
9000
/* The following function initiates variable `dfa_pre_cycle_insn'.  */
9001
 
9002
static void
9003
ia64_init_dfa_pre_cycle_insn (void)
9004
{
9005
  if (temp_dfa_state == NULL)
9006
    {
9007
      dfa_state_size = state_size ();
9008
      temp_dfa_state = xmalloc (dfa_state_size);
9009
      prev_cycle_state = xmalloc (dfa_state_size);
9010
    }
9011
  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9012
  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9013
  recog_memoized (dfa_pre_cycle_insn);
9014
  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9015
  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9016
  recog_memoized (dfa_stop_insn);
9017
}
9018
 
9019
/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9020
   used by the DFA insn scheduler.  */
9021
 
9022
static rtx
9023
ia64_dfa_pre_cycle_insn (void)
9024
{
9025
  return dfa_pre_cycle_insn;
9026
}
9027
 
9028
/* The following function returns TRUE if PRODUCER (of type ilog or
9029
   ld) produces address for CONSUMER (of type st or stf). */
9030
 
9031
int
9032
ia64_st_address_bypass_p (rtx producer, rtx consumer)
9033
{
9034
  rtx dest, reg, mem;
9035
 
9036
  gcc_assert (producer && consumer);
9037
  dest = ia64_single_set (producer);
9038
  gcc_assert (dest);
9039
  reg = SET_DEST (dest);
9040
  gcc_assert (reg);
9041
  if (GET_CODE (reg) == SUBREG)
9042
    reg = SUBREG_REG (reg);
9043
  gcc_assert (GET_CODE (reg) == REG);
9044
 
9045
  dest = ia64_single_set (consumer);
9046
  gcc_assert (dest);
9047
  mem = SET_DEST (dest);
9048
  gcc_assert (mem && GET_CODE (mem) == MEM);
9049
  return reg_mentioned_p (reg, mem);
9050
}
9051
 
9052
/* The following function returns TRUE if PRODUCER (of type ilog or
9053
   ld) produces address for CONSUMER (of type ld or fld). */
9054
 
9055
int
9056
ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9057
{
9058
  rtx dest, src, reg, mem;
9059
 
9060
  gcc_assert (producer && consumer);
9061
  dest = ia64_single_set (producer);
9062
  gcc_assert (dest);
9063
  reg = SET_DEST (dest);
9064
  gcc_assert (reg);
9065
  if (GET_CODE (reg) == SUBREG)
9066
    reg = SUBREG_REG (reg);
9067
  gcc_assert (GET_CODE (reg) == REG);
9068
 
9069
  src = ia64_single_set (consumer);
9070
  gcc_assert (src);
9071
  mem = SET_SRC (src);
9072
  gcc_assert (mem);
9073
 
9074
  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9075
    mem = XVECEXP (mem, 0, 0);
9076
  else if (GET_CODE (mem) == IF_THEN_ELSE)
9077
    /* ??? Is this bypass necessary for ld.c?  */
9078
    {
9079
      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9080
      mem = XEXP (mem, 1);
9081
    }
9082
 
9083
  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9084
    mem = XEXP (mem, 0);
9085
 
9086
  if (GET_CODE (mem) == UNSPEC)
9087
    {
9088
      int c = XINT (mem, 1);
9089
 
9090
      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9091
                  || c == UNSPEC_LDSA);
9092
      mem = XVECEXP (mem, 0, 0);
9093
    }
9094
 
9095
  /* Note that LO_SUM is used for GOT loads.  */
9096
  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9097
 
9098
  return reg_mentioned_p (reg, mem);
9099
}
9100
 
9101
/* The following function returns TRUE if INSN produces address for a
9102
   load/store insn.  We will place such insns into M slot because it
9103
   decreases its latency time.  */
9104
 
9105
int
9106
ia64_produce_address_p (rtx insn)
9107
{
9108
  return insn->call;
9109
}
9110
 
9111
 
9112
/* Emit pseudo-ops for the assembler to describe predicate relations.
9113
   At present this assumes that we only consider predicate pairs to
9114
   be mutex, and that the assembler can deduce proper values from
9115
   straight-line code.  */
9116
 
9117
static void
9118
emit_predicate_relation_info (void)
9119
{
9120
  basic_block bb;
9121
 
9122
  FOR_EACH_BB_REVERSE (bb)
9123
    {
9124
      int r;
9125
      rtx head = BB_HEAD (bb);
9126
 
9127
      /* We only need such notes at code labels.  */
9128
      if (GET_CODE (head) != CODE_LABEL)
9129
        continue;
9130
      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9131
        head = NEXT_INSN (head);
9132
 
9133
      /* Skip p0, which may be thought to be live due to (reg:DI p0)
9134
         grabbing the entire block of predicate registers.  */
9135
      for (r = PR_REG (2); r < PR_REG (64); r += 2)
9136
        if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9137
          {
9138
            rtx p = gen_rtx_REG (BImode, r);
9139
            rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9140
            if (head == BB_END (bb))
9141
              BB_END (bb) = n;
9142
            head = n;
9143
          }
9144
    }
9145
 
9146
  /* Look for conditional calls that do not return, and protect predicate
9147
     relations around them.  Otherwise the assembler will assume the call
9148
     returns, and complain about uses of call-clobbered predicates after
9149
     the call.  */
9150
  FOR_EACH_BB_REVERSE (bb)
9151
    {
9152
      rtx insn = BB_HEAD (bb);
9153
 
9154
      while (1)
9155
        {
9156
          if (GET_CODE (insn) == CALL_INSN
9157
              && GET_CODE (PATTERN (insn)) == COND_EXEC
9158
              && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9159
            {
9160
              rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9161
              rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9162
              if (BB_HEAD (bb) == insn)
9163
                BB_HEAD (bb) = b;
9164
              if (BB_END (bb) == insn)
9165
                BB_END (bb) = a;
9166
            }
9167
 
9168
          if (insn == BB_END (bb))
9169
            break;
9170
          insn = NEXT_INSN (insn);
9171
        }
9172
    }
9173
}
9174
 
9175
/* Perform machine dependent operations on the rtl chain INSNS.  */
9176
 
9177
static void
9178
ia64_reorg (void)
9179
{
9180
  /* We are freeing block_for_insn in the toplev to keep compatibility
9181
     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9182
  compute_bb_for_insn ();
9183
 
9184
  /* If optimizing, we'll have split before scheduling.  */
9185
  if (optimize == 0)
9186
    split_all_insns ();
9187
 
9188
  if (optimize && ia64_flag_schedule_insns2
9189
      && dbg_cnt (ia64_sched2))
9190
    {
9191
      timevar_push (TV_SCHED2);
9192
      ia64_final_schedule = 1;
9193
 
9194
      initiate_bundle_states ();
9195
      ia64_nop = make_insn_raw (gen_nop ());
9196
      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9197
      recog_memoized (ia64_nop);
9198
      clocks_length = get_max_uid () + 1;
9199
      stops_p = XCNEWVEC (char, clocks_length);
9200
 
9201
      if (ia64_tune == PROCESSOR_ITANIUM2)
9202
        {
9203
          pos_1 = get_cpu_unit_code ("2_1");
9204
          pos_2 = get_cpu_unit_code ("2_2");
9205
          pos_3 = get_cpu_unit_code ("2_3");
9206
          pos_4 = get_cpu_unit_code ("2_4");
9207
          pos_5 = get_cpu_unit_code ("2_5");
9208
          pos_6 = get_cpu_unit_code ("2_6");
9209
          _0mii_ = get_cpu_unit_code ("2b_0mii.");
9210
          _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9211
          _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9212
          _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9213
          _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9214
          _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9215
          _0mib_ = get_cpu_unit_code ("2b_0mib.");
9216
          _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9217
          _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9218
          _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9219
          _1mii_ = get_cpu_unit_code ("2b_1mii.");
9220
          _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9221
          _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9222
          _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9223
          _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9224
          _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9225
          _1mib_ = get_cpu_unit_code ("2b_1mib.");
9226
          _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9227
          _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9228
          _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9229
        }
9230
      else
9231
        {
9232
          pos_1 = get_cpu_unit_code ("1_1");
9233
          pos_2 = get_cpu_unit_code ("1_2");
9234
          pos_3 = get_cpu_unit_code ("1_3");
9235
          pos_4 = get_cpu_unit_code ("1_4");
9236
          pos_5 = get_cpu_unit_code ("1_5");
9237
          pos_6 = get_cpu_unit_code ("1_6");
9238
          _0mii_ = get_cpu_unit_code ("1b_0mii.");
9239
          _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9240
          _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9241
          _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9242
          _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9243
          _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9244
          _0mib_ = get_cpu_unit_code ("1b_0mib.");
9245
          _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9246
          _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9247
          _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9248
          _1mii_ = get_cpu_unit_code ("1b_1mii.");
9249
          _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9250
          _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9251
          _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9252
          _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9253
          _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9254
          _1mib_ = get_cpu_unit_code ("1b_1mib.");
9255
          _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9256
          _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9257
          _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9258
        }
9259
 
9260
      if (flag_selective_scheduling2
9261
          && !maybe_skip_selective_scheduling ())
9262
        run_selective_scheduling ();
9263
      else
9264
        schedule_ebbs ();
9265
 
9266
      /* Redo alignment computation, as it might gone wrong.  */
9267
      compute_alignments ();
9268
 
9269
      /* We cannot reuse this one because it has been corrupted by the
9270
         evil glat.  */
9271
      finish_bundle_states ();
9272
      free (stops_p);
9273
      stops_p = NULL;
9274
      emit_insn_group_barriers (dump_file);
9275
 
9276
      ia64_final_schedule = 0;
9277
      timevar_pop (TV_SCHED2);
9278
    }
9279
  else
9280
    emit_all_insn_group_barriers (dump_file);
9281
 
9282
  df_analyze ();
9283
 
9284
  /* A call must not be the last instruction in a function, so that the
9285
     return address is still within the function, so that unwinding works
9286
     properly.  Note that IA-64 differs from dwarf2 on this point.  */
9287
  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9288
    {
9289
      rtx insn;
9290
      int saw_stop = 0;
9291
 
9292
      insn = get_last_insn ();
9293
      if (! INSN_P (insn))
9294
        insn = prev_active_insn (insn);
9295
      if (insn)
9296
        {
9297
          /* Skip over insns that expand to nothing.  */
9298
          while (GET_CODE (insn) == INSN
9299
                 && get_attr_empty (insn) == EMPTY_YES)
9300
            {
9301
              if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9302
                  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9303
                saw_stop = 1;
9304
              insn = prev_active_insn (insn);
9305
            }
9306
          if (GET_CODE (insn) == CALL_INSN)
9307
            {
9308
              if (! saw_stop)
9309
                emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9310
              emit_insn (gen_break_f ());
9311
              emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9312
            }
9313
        }
9314
    }
9315
 
9316
  emit_predicate_relation_info ();
9317
 
9318
  if (ia64_flag_var_tracking)
9319
    {
9320
      timevar_push (TV_VAR_TRACKING);
9321
      variable_tracking_main ();
9322
      timevar_pop (TV_VAR_TRACKING);
9323
    }
9324
  df_finish_pass (false);
9325
}
9326
 
9327
/* Return true if REGNO is used by the epilogue.  */
9328
 
9329
int
9330
ia64_epilogue_uses (int regno)
9331
{
9332
  switch (regno)
9333
    {
9334
    case R_GR (1):
9335
      /* With a call to a function in another module, we will write a new
9336
         value to "gp".  After returning from such a call, we need to make
9337
         sure the function restores the original gp-value, even if the
9338
         function itself does not use the gp anymore.  */
9339
      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9340
 
9341
    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9342
    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9343
      /* For functions defined with the syscall_linkage attribute, all
9344
         input registers are marked as live at all function exits.  This
9345
         prevents the register allocator from using the input registers,
9346
         which in turn makes it possible to restart a system call after
9347
         an interrupt without having to save/restore the input registers.
9348
         This also prevents kernel data from leaking to application code.  */
9349
      return lookup_attribute ("syscall_linkage",
9350
           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9351
 
9352
    case R_BR (0):
9353
      /* Conditional return patterns can't represent the use of `b0' as
9354
         the return address, so we force the value live this way.  */
9355
      return 1;
9356
 
9357
    case AR_PFS_REGNUM:
9358
      /* Likewise for ar.pfs, which is used by br.ret.  */
9359
      return 1;
9360
 
9361
    default:
9362
      return 0;
9363
    }
9364
}
9365
 
9366
/* Return true if REGNO is used by the frame unwinder.  */
9367
 
9368
int
9369
ia64_eh_uses (int regno)
9370
{
9371
  unsigned int r;
9372
 
9373
  if (! reload_completed)
9374
    return 0;
9375
 
9376
  if (regno == 0)
9377
    return 0;
9378
 
9379
  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9380
    if (regno == current_frame_info.r[r]
9381
       || regno == emitted_frame_related_regs[r])
9382
      return 1;
9383
 
9384
  return 0;
9385
}
9386
 
9387
/* Return true if this goes in small data/bss.  */
9388
 
9389
/* ??? We could also support own long data here.  Generating movl/add/ld8
9390
   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9391
   code faster because there is one less load.  This also includes incomplete
9392
   types which can't go in sdata/sbss.  */
9393
 
9394
static bool
9395
ia64_in_small_data_p (const_tree exp)
9396
{
9397
  if (TARGET_NO_SDATA)
9398
    return false;
9399
 
9400
  /* We want to merge strings, so we never consider them small data.  */
9401
  if (TREE_CODE (exp) == STRING_CST)
9402
    return false;
9403
 
9404
  /* Functions are never small data.  */
9405
  if (TREE_CODE (exp) == FUNCTION_DECL)
9406
    return false;
9407
 
9408
  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9409
    {
9410
      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9411
 
9412
      if (strcmp (section, ".sdata") == 0
9413
          || strncmp (section, ".sdata.", 7) == 0
9414
          || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9415
          || strcmp (section, ".sbss") == 0
9416
          || strncmp (section, ".sbss.", 6) == 0
9417
          || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9418
        return true;
9419
    }
9420
  else
9421
    {
9422
      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9423
 
9424
      /* If this is an incomplete type with size 0, then we can't put it
9425
         in sdata because it might be too big when completed.  */
9426
      if (size > 0 && size <= ia64_section_threshold)
9427
        return true;
9428
    }
9429
 
9430
  return false;
9431
}
9432
 
9433
/* Output assembly directives for prologue regions.  */
9434
 
9435
/* The current basic block number.  */
9436
 
9437
static bool last_block;
9438
 
9439
/* True if we need a copy_state command at the start of the next block.  */
9440
 
9441
static bool need_copy_state;
9442
 
9443
#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9444
# define MAX_ARTIFICIAL_LABEL_BYTES 30
9445
#endif
9446
 
9447
/* Emit a debugging label after a call-frame-related insn.  We'd
9448
   rather output the label right away, but we'd have to output it
9449
   after, not before, the instruction, and the instruction has not
9450
   been output yet.  So we emit the label after the insn, delete it to
9451
   avoid introducing basic blocks, and mark it as preserved, such that
9452
   it is still output, given that it is referenced in debug info.  */
9453
 
9454
static const char *
9455
ia64_emit_deleted_label_after_insn (rtx insn)
9456
{
9457
  char label[MAX_ARTIFICIAL_LABEL_BYTES];
9458
  rtx lb = gen_label_rtx ();
9459
  rtx label_insn = emit_label_after (lb, insn);
9460
 
9461
  LABEL_PRESERVE_P (lb) = 1;
9462
 
9463
  delete_insn (label_insn);
9464
 
9465
  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9466
 
9467
  return xstrdup (label);
9468
}
9469
 
9470
/* Define the CFA after INSN with the steady-state definition.  */
9471
 
9472
static void
9473
ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9474
{
9475
  rtx fp = frame_pointer_needed
9476
    ? hard_frame_pointer_rtx
9477
    : stack_pointer_rtx;
9478
  const char *label = ia64_emit_deleted_label_after_insn (insn);
9479
 
9480
  if (!frame)
9481
    return;
9482
 
9483
  dwarf2out_def_cfa
9484
    (label, REGNO (fp),
9485
     ia64_initial_elimination_offset
9486
     (REGNO (arg_pointer_rtx), REGNO (fp))
9487
     + ARG_POINTER_CFA_OFFSET (current_function_decl));
9488
}
9489
 
9490
/* The generic dwarf2 frame debug info generator does not define a
9491
   separate region for the very end of the epilogue, so refrain from
9492
   doing so in the IA64-specific code as well.  */
9493
 
9494
#define IA64_CHANGE_CFA_IN_EPILOGUE 0
9495
 
9496
/* The function emits unwind directives for the start of an epilogue.  */
9497
 
9498
static void
9499
process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9500
{
9501
  /* If this isn't the last block of the function, then we need to label the
9502
     current state, and copy it back in at the start of the next block.  */
9503
 
9504
  if (!last_block)
9505
    {
9506
      if (unwind)
9507
        fprintf (asm_out_file, "\t.label_state %d\n",
9508
                 ++cfun->machine->state_num);
9509
      need_copy_state = true;
9510
    }
9511
 
9512
  if (unwind)
9513
    fprintf (asm_out_file, "\t.restore sp\n");
9514
  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9515
    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9516
                       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9517
}
9518
 
9519
/* This function processes a SET pattern looking for specific patterns
9520
   which result in emitting an assembly directive required for unwinding.  */
9521
 
9522
static int
9523
process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
9524
{
9525
  rtx src = SET_SRC (pat);
9526
  rtx dest = SET_DEST (pat);
9527
  int src_regno, dest_regno;
9528
 
9529
  /* Look for the ALLOC insn.  */
9530
  if (GET_CODE (src) == UNSPEC_VOLATILE
9531
      && XINT (src, 1) == UNSPECV_ALLOC
9532
      && GET_CODE (dest) == REG)
9533
    {
9534
      dest_regno = REGNO (dest);
9535
 
9536
      /* If this is the final destination for ar.pfs, then this must
9537
         be the alloc in the prologue.  */
9538
      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9539
        {
9540
          if (unwind)
9541
            fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9542
                     ia64_dbx_register_number (dest_regno));
9543
        }
9544
      else
9545
        {
9546
          /* This must be an alloc before a sibcall.  We must drop the
9547
             old frame info.  The easiest way to drop the old frame
9548
             info is to ensure we had a ".restore sp" directive
9549
             followed by a new prologue.  If the procedure doesn't
9550
             have a memory-stack frame, we'll issue a dummy ".restore
9551
             sp" now.  */
9552
          if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9553
            /* if haven't done process_epilogue() yet, do it now */
9554
            process_epilogue (asm_out_file, insn, unwind, frame);
9555
          if (unwind)
9556
            fprintf (asm_out_file, "\t.prologue\n");
9557
        }
9558
      return 1;
9559
    }
9560
 
9561
  /* Look for SP = ....  */
9562
  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9563
    {
9564
      if (GET_CODE (src) == PLUS)
9565
        {
9566
          rtx op0 = XEXP (src, 0);
9567
          rtx op1 = XEXP (src, 1);
9568
 
9569
          gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9570
 
9571
          if (INTVAL (op1) < 0)
9572
            {
9573
              gcc_assert (!frame_pointer_needed);
9574
              if (unwind)
9575
                fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9576
                         -INTVAL (op1));
9577
              ia64_dwarf2out_def_steady_cfa (insn, frame);
9578
            }
9579
          else
9580
            process_epilogue (asm_out_file, insn, unwind, frame);
9581
        }
9582
      else
9583
        {
9584
          gcc_assert (GET_CODE (src) == REG
9585
                      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9586
          process_epilogue (asm_out_file, insn, unwind, frame);
9587
        }
9588
 
9589
      return 1;
9590
    }
9591
 
9592
  /* Register move we need to look at.  */
9593
  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9594
    {
9595
      src_regno = REGNO (src);
9596
      dest_regno = REGNO (dest);
9597
 
9598
      switch (src_regno)
9599
        {
9600
        case BR_REG (0):
9601
          /* Saving return address pointer.  */
9602
          gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9603
          if (unwind)
9604
            fprintf (asm_out_file, "\t.save rp, r%d\n",
9605
                     ia64_dbx_register_number (dest_regno));
9606
          return 1;
9607
 
9608
        case PR_REG (0):
9609
          gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9610
          if (unwind)
9611
            fprintf (asm_out_file, "\t.save pr, r%d\n",
9612
                     ia64_dbx_register_number (dest_regno));
9613
          return 1;
9614
 
9615
        case AR_UNAT_REGNUM:
9616
          gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9617
          if (unwind)
9618
            fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9619
                     ia64_dbx_register_number (dest_regno));
9620
          return 1;
9621
 
9622
        case AR_LC_REGNUM:
9623
          gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9624
          if (unwind)
9625
            fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9626
                     ia64_dbx_register_number (dest_regno));
9627
          return 1;
9628
 
9629
        case STACK_POINTER_REGNUM:
9630
          gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9631
                      && frame_pointer_needed);
9632
          if (unwind)
9633
            fprintf (asm_out_file, "\t.vframe r%d\n",
9634
                     ia64_dbx_register_number (dest_regno));
9635
          ia64_dwarf2out_def_steady_cfa (insn, frame);
9636
          return 1;
9637
 
9638
        default:
9639
          /* Everything else should indicate being stored to memory.  */
9640
          gcc_unreachable ();
9641
        }
9642
    }
9643
 
9644
  /* Memory store we need to look at.  */
9645
  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9646
    {
9647
      long off;
9648
      rtx base;
9649
      const char *saveop;
9650
 
9651
      if (GET_CODE (XEXP (dest, 0)) == REG)
9652
        {
9653
          base = XEXP (dest, 0);
9654
          off = 0;
9655
        }
9656
      else
9657
        {
9658
          gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9659
                      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9660
          base = XEXP (XEXP (dest, 0), 0);
9661
          off = INTVAL (XEXP (XEXP (dest, 0), 1));
9662
        }
9663
 
9664
      if (base == hard_frame_pointer_rtx)
9665
        {
9666
          saveop = ".savepsp";
9667
          off = - off;
9668
        }
9669
      else
9670
        {
9671
          gcc_assert (base == stack_pointer_rtx);
9672
          saveop = ".savesp";
9673
        }
9674
 
9675
      src_regno = REGNO (src);
9676
      switch (src_regno)
9677
        {
9678
        case BR_REG (0):
9679
          gcc_assert (!current_frame_info.r[reg_save_b0]);
9680
          if (unwind)
9681
            fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9682
          return 1;
9683
 
9684
        case PR_REG (0):
9685
          gcc_assert (!current_frame_info.r[reg_save_pr]);
9686
          if (unwind)
9687
            fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9688
          return 1;
9689
 
9690
        case AR_LC_REGNUM:
9691
          gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9692
          if (unwind)
9693
            fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9694
          return 1;
9695
 
9696
        case AR_PFS_REGNUM:
9697
          gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9698
          if (unwind)
9699
            fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9700
          return 1;
9701
 
9702
        case AR_UNAT_REGNUM:
9703
          gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9704
          if (unwind)
9705
            fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9706
          return 1;
9707
 
9708
        case GR_REG (4):
9709
        case GR_REG (5):
9710
        case GR_REG (6):
9711
        case GR_REG (7):
9712
          if (unwind)
9713
            fprintf (asm_out_file, "\t.save.g 0x%x\n",
9714
                     1 << (src_regno - GR_REG (4)));
9715
          return 1;
9716
 
9717
        case BR_REG (1):
9718
        case BR_REG (2):
9719
        case BR_REG (3):
9720
        case BR_REG (4):
9721
        case BR_REG (5):
9722
          if (unwind)
9723
            fprintf (asm_out_file, "\t.save.b 0x%x\n",
9724
                     1 << (src_regno - BR_REG (1)));
9725
          return 1;
9726
 
9727
        case FR_REG (2):
9728
        case FR_REG (3):
9729
        case FR_REG (4):
9730
        case FR_REG (5):
9731
          if (unwind)
9732
            fprintf (asm_out_file, "\t.save.f 0x%x\n",
9733
                     1 << (src_regno - FR_REG (2)));
9734
          return 1;
9735
 
9736
        case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9737
        case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9738
        case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9739
        case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9740
          if (unwind)
9741
            fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9742
                     1 << (src_regno - FR_REG (12)));
9743
          return 1;
9744
 
9745
        default:
9746
          return 0;
9747
        }
9748
    }
9749
 
9750
  return 0;
9751
}
9752
 
9753
 
9754
/* This function looks at a single insn and emits any directives
9755
   required to unwind this insn.  */
9756
void
9757
process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9758
{
9759
  bool unwind = (flag_unwind_tables
9760
                 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9761
  bool frame = dwarf2out_do_frame ();
9762
 
9763
  if (unwind || frame)
9764
    {
9765
      rtx pat;
9766
 
9767
      if (NOTE_INSN_BASIC_BLOCK_P (insn))
9768
        {
9769
          last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9770
 
9771
          /* Restore unwind state from immediately before the epilogue.  */
9772
          if (need_copy_state)
9773
            {
9774
              if (unwind)
9775
                {
9776
                  fprintf (asm_out_file, "\t.body\n");
9777
                  fprintf (asm_out_file, "\t.copy_state %d\n",
9778
                           cfun->machine->state_num);
9779
                }
9780
              if (IA64_CHANGE_CFA_IN_EPILOGUE)
9781
                ia64_dwarf2out_def_steady_cfa (insn, frame);
9782
              need_copy_state = false;
9783
            }
9784
        }
9785
 
9786
      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9787
        return;
9788
 
9789
      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9790
      if (pat)
9791
        pat = XEXP (pat, 0);
9792
      else
9793
        pat = PATTERN (insn);
9794
 
9795
      switch (GET_CODE (pat))
9796
        {
9797
        case SET:
9798
          process_set (asm_out_file, pat, insn, unwind, frame);
9799
          break;
9800
 
9801
        case PARALLEL:
9802
          {
9803
            int par_index;
9804
            int limit = XVECLEN (pat, 0);
9805
            for (par_index = 0; par_index < limit; par_index++)
9806
              {
9807
                rtx x = XVECEXP (pat, 0, par_index);
9808
                if (GET_CODE (x) == SET)
9809
                  process_set (asm_out_file, x, insn, unwind, frame);
9810
              }
9811
            break;
9812
          }
9813
 
9814
        default:
9815
          gcc_unreachable ();
9816
        }
9817
    }
9818
}
9819
 
9820
 
9821
enum ia64_builtins
9822
{
9823
  IA64_BUILTIN_BSP,
9824
  IA64_BUILTIN_COPYSIGNQ,
9825
  IA64_BUILTIN_FABSQ,
9826
  IA64_BUILTIN_FLUSHRS,
9827
  IA64_BUILTIN_INFQ,
9828
  IA64_BUILTIN_HUGE_VALQ
9829
};
9830
 
9831
void
9832
ia64_init_builtins (void)
9833
{
9834
  tree fpreg_type;
9835
  tree float80_type;
9836
 
9837
  /* The __fpreg type.  */
9838
  fpreg_type = make_node (REAL_TYPE);
9839
  TYPE_PRECISION (fpreg_type) = 82;
9840
  layout_type (fpreg_type);
9841
  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9842
 
9843
  /* The __float80 type.  */
9844
  float80_type = make_node (REAL_TYPE);
9845
  TYPE_PRECISION (float80_type) = 80;
9846
  layout_type (float80_type);
9847
  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9848
 
9849
  /* The __float128 type.  */
9850
  if (!TARGET_HPUX)
9851
    {
9852
      tree ftype, decl;
9853
      tree float128_type = make_node (REAL_TYPE);
9854
 
9855
      TYPE_PRECISION (float128_type) = 128;
9856
      layout_type (float128_type);
9857
      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9858
 
9859
      /* TFmode support builtins.  */
9860
      ftype = build_function_type (float128_type, void_list_node);
9861
      add_builtin_function ("__builtin_infq", ftype,
9862
                            IA64_BUILTIN_INFQ, BUILT_IN_MD,
9863
                            NULL, NULL_TREE);
9864
 
9865
      add_builtin_function ("__builtin_huge_valq", ftype,
9866
                            IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9867
                            NULL, NULL_TREE);
9868
 
9869
      ftype = build_function_type_list (float128_type,
9870
                                        float128_type,
9871
                                        NULL_TREE);
9872
      decl = add_builtin_function ("__builtin_fabsq", ftype,
9873
                                   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9874
                                   "__fabstf2", NULL_TREE);
9875
      TREE_READONLY (decl) = 1;
9876
 
9877
      ftype = build_function_type_list (float128_type,
9878
                                        float128_type,
9879
                                        float128_type,
9880
                                        NULL_TREE);
9881
      decl = add_builtin_function ("__builtin_copysignq", ftype,
9882
                                   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9883
                                   "__copysigntf3", NULL_TREE);
9884
      TREE_READONLY (decl) = 1;
9885
    }
9886
  else
9887
    /* Under HPUX, this is a synonym for "long double".  */
9888
    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9889
                                               "__float128");
9890
 
9891
  /* Fwrite on VMS is non-standard.  */
9892
  if (TARGET_ABI_OPEN_VMS)
9893
    {
9894
      implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
9895
      implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
9896
    }
9897
 
9898
#define def_builtin(name, type, code)                                   \
9899
  add_builtin_function ((name), (type), (code), BUILT_IN_MD,    \
9900
                       NULL, NULL_TREE)
9901
 
9902
  def_builtin ("__builtin_ia64_bsp",
9903
               build_function_type (ptr_type_node, void_list_node),
9904
               IA64_BUILTIN_BSP);
9905
 
9906
  def_builtin ("__builtin_ia64_flushrs",
9907
               build_function_type (void_type_node, void_list_node),
9908
               IA64_BUILTIN_FLUSHRS);
9909
 
9910
#undef def_builtin
9911
 
9912
  if (TARGET_HPUX)
9913
    {
9914
      if (built_in_decls [BUILT_IN_FINITE])
9915
        set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9916
          "_Isfinite");
9917
      if (built_in_decls [BUILT_IN_FINITEF])
9918
        set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9919
          "_Isfinitef");
9920
      if (built_in_decls [BUILT_IN_FINITEL])
9921
        set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9922
          "_Isfinitef128");
9923
    }
9924
}
9925
 
9926
rtx
9927
ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9928
                     enum machine_mode mode ATTRIBUTE_UNUSED,
9929
                     int ignore ATTRIBUTE_UNUSED)
9930
{
9931
  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9932
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9933
 
9934
  switch (fcode)
9935
    {
9936
    case IA64_BUILTIN_BSP:
9937
      if (! target || ! register_operand (target, DImode))
9938
        target = gen_reg_rtx (DImode);
9939
      emit_insn (gen_bsp_value (target));
9940
#ifdef POINTERS_EXTEND_UNSIGNED
9941
      target = convert_memory_address (ptr_mode, target);
9942
#endif
9943
      return target;
9944
 
9945
    case IA64_BUILTIN_FLUSHRS:
9946
      emit_insn (gen_flushrs ());
9947
      return const0_rtx;
9948
 
9949
    case IA64_BUILTIN_INFQ:
9950
    case IA64_BUILTIN_HUGE_VALQ:
9951
      {
9952
        REAL_VALUE_TYPE inf;
9953
        rtx tmp;
9954
 
9955
        real_inf (&inf);
9956
        tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9957
 
9958
        tmp = validize_mem (force_const_mem (mode, tmp));
9959
 
9960
        if (target == 0)
9961
          target = gen_reg_rtx (mode);
9962
 
9963
        emit_move_insn (target, tmp);
9964
        return target;
9965
      }
9966
 
9967
    case IA64_BUILTIN_FABSQ:
9968
    case IA64_BUILTIN_COPYSIGNQ:
9969
      return expand_call (exp, target, ignore);
9970
 
9971
    default:
9972
      gcc_unreachable ();
9973
    }
9974
 
9975
  return NULL_RTX;
9976
}
9977
 
9978
/* For the HP-UX IA64 aggregate parameters are passed stored in the
9979
   most significant bits of the stack slot.  */
9980
 
9981
enum direction
9982
ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9983
{
9984
   /* Exception to normal case for structures/unions/etc.  */
9985
 
9986
   if (type && AGGREGATE_TYPE_P (type)
9987
       && int_size_in_bytes (type) < UNITS_PER_WORD)
9988
     return upward;
9989
 
9990
   /* Fall back to the default.  */
9991
   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9992
}
9993
 
9994
/* Emit text to declare externally defined variables and functions, because
9995
   the Intel assembler does not support undefined externals.  */
9996
 
9997
void
9998
ia64_asm_output_external (FILE *file, tree decl, const char *name)
9999
{
10000
  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10001
     set in order to avoid putting out names that are never really
10002
     used. */
10003
  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10004
    {
10005
      /* maybe_assemble_visibility will return 1 if the assembler
10006
         visibility directive is output.  */
10007
      int need_visibility = ((*targetm.binds_local_p) (decl)
10008
                             && maybe_assemble_visibility (decl));
10009
 
10010
#ifdef DO_CRTL_NAMES
10011
      DO_CRTL_NAMES;
10012
#endif
10013
 
10014
      /* GNU as does not need anything here, but the HP linker does
10015
         need something for external functions.  */
10016
      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10017
          && TREE_CODE (decl) == FUNCTION_DECL)
10018
          (*targetm.asm_out.globalize_decl_name) (file, decl);
10019
      else if (need_visibility && !TARGET_GNU_AS)
10020
        (*targetm.asm_out.globalize_label) (file, name);
10021
    }
10022
}
10023
 
10024
/* Set SImode div/mod functions, init_integral_libfuncs only initializes
10025
   modes of word_mode and larger.  Rename the TFmode libfuncs using the
10026
   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10027
   backward compatibility. */
10028
 
10029
static void
10030
ia64_init_libfuncs (void)
10031
{
10032
  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10033
  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10034
  set_optab_libfunc (smod_optab, SImode, "__modsi3");
10035
  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10036
 
10037
  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10038
  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10039
  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10040
  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10041
  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10042
 
10043
  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10044
  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10045
  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10046
  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10047
  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10048
  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10049
 
10050
  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10051
  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10052
  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10053
  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10054
  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10055
 
10056
  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10057
  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10058
  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10059
  /* HP-UX 11.23 libc does not have a function for unsigned
10060
     SImode-to-TFmode conversion.  */
10061
  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10062
}
10063
 
10064
/* Rename all the TFmode libfuncs using the HPUX conventions.  */
10065
 
10066
static void
10067
ia64_hpux_init_libfuncs (void)
10068
{
10069
  ia64_init_libfuncs ();
10070
 
10071
  /* The HP SI millicode division and mod functions expect DI arguments.
10072
     By turning them off completely we avoid using both libgcc and the
10073
     non-standard millicode routines and use the HP DI millicode routines
10074
     instead.  */
10075
 
10076
  set_optab_libfunc (sdiv_optab, SImode, 0);
10077
  set_optab_libfunc (udiv_optab, SImode, 0);
10078
  set_optab_libfunc (smod_optab, SImode, 0);
10079
  set_optab_libfunc (umod_optab, SImode, 0);
10080
 
10081
  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10082
  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10083
  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10084
  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10085
 
10086
  /* HP-UX libc has TF min/max/abs routines in it.  */
10087
  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10088
  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10089
  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10090
 
10091
  /* ia64_expand_compare uses this.  */
10092
  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10093
 
10094
  /* These should never be used.  */
10095
  set_optab_libfunc (eq_optab, TFmode, 0);
10096
  set_optab_libfunc (ne_optab, TFmode, 0);
10097
  set_optab_libfunc (gt_optab, TFmode, 0);
10098
  set_optab_libfunc (ge_optab, TFmode, 0);
10099
  set_optab_libfunc (lt_optab, TFmode, 0);
10100
  set_optab_libfunc (le_optab, TFmode, 0);
10101
}
10102
 
10103
/* Rename the division and modulus functions in VMS.  */
10104
 
10105
static void
10106
ia64_vms_init_libfuncs (void)
10107
{
10108
  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10109
  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10110
  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10111
  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10112
  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10113
  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10114
  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10115
  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10116
  abort_libfunc = init_one_libfunc ("decc$abort");
10117
  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10118
#ifdef MEM_LIBFUNCS_INIT
10119
  MEM_LIBFUNCS_INIT;
10120
#endif
10121
}
10122
 
10123
/* Rename the TFmode libfuncs available from soft-fp in glibc using
10124
   the HPUX conventions.  */
10125
 
10126
static void
10127
ia64_sysv4_init_libfuncs (void)
10128
{
10129
  ia64_init_libfuncs ();
10130
 
10131
  /* These functions are not part of the HPUX TFmode interface.  We
10132
     use them instead of _U_Qfcmp, which doesn't work the way we
10133
     expect.  */
10134
  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10135
  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10136
  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10137
  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10138
  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10139
  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10140
 
10141
  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10142
     glibc doesn't have them.  */
10143
}
10144
 
10145
/* Use soft-fp.  */
10146
 
10147
static void
10148
ia64_soft_fp_init_libfuncs (void)
10149
{
10150
}
10151
 
10152
static bool
10153
ia64_vms_valid_pointer_mode (enum machine_mode mode)
10154
{
10155
  return (mode == SImode || mode == DImode);
10156
}
10157
 
10158
/* For HPUX, it is illegal to have relocations in shared segments.  */
10159
 
10160
static int
10161
ia64_hpux_reloc_rw_mask (void)
10162
{
10163
  return 3;
10164
}
10165
 
10166
/* For others, relax this so that relocations to local data goes in
10167
   read-only segments, but we still cannot allow global relocations
10168
   in read-only segments.  */
10169
 
10170
static int
10171
ia64_reloc_rw_mask (void)
10172
{
10173
  return flag_pic ? 3 : 2;
10174
}
10175
 
10176
/* Return the section to use for X.  The only special thing we do here
10177
   is to honor small data.  */
10178
 
10179
static section *
10180
ia64_select_rtx_section (enum machine_mode mode, rtx x,
10181
                         unsigned HOST_WIDE_INT align)
10182
{
10183
  if (GET_MODE_SIZE (mode) > 0
10184
      && GET_MODE_SIZE (mode) <= ia64_section_threshold
10185
      && !TARGET_NO_SDATA)
10186
    return sdata_section;
10187
  else
10188
    return default_elf_select_rtx_section (mode, x, align);
10189
}
10190
 
10191
static unsigned int
10192
ia64_section_type_flags (tree decl, const char *name, int reloc)
10193
{
10194
  unsigned int flags = 0;
10195
 
10196
  if (strcmp (name, ".sdata") == 0
10197
      || strncmp (name, ".sdata.", 7) == 0
10198
      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10199
      || strncmp (name, ".sdata2.", 8) == 0
10200
      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10201
      || strcmp (name, ".sbss") == 0
10202
      || strncmp (name, ".sbss.", 6) == 0
10203
      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10204
    flags = SECTION_SMALL;
10205
 
10206
#if TARGET_ABI_OPEN_VMS
10207
  if (decl && DECL_ATTRIBUTES (decl)
10208
      && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10209
    flags |= SECTION_VMS_OVERLAY;
10210
#endif
10211
 
10212
  flags |= default_section_type_flags (decl, name, reloc);
10213
  return flags;
10214
}
10215
 
10216
/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10217
   structure type and that the address of that type should be passed
10218
   in out0, rather than in r8.  */
10219
 
10220
static bool
10221
ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10222
{
10223
  tree ret_type = TREE_TYPE (fntype);
10224
 
10225
  /* The Itanium C++ ABI requires that out0, rather than r8, be used
10226
     as the structure return address parameter, if the return value
10227
     type has a non-trivial copy constructor or destructor.  It is not
10228
     clear if this same convention should be used for other
10229
     programming languages.  Until G++ 3.4, we incorrectly used r8 for
10230
     these return values.  */
10231
  return (abi_version_at_least (2)
10232
          && ret_type
10233
          && TYPE_MODE (ret_type) == BLKmode
10234
          && TREE_ADDRESSABLE (ret_type)
10235
          && strcmp (lang_hooks.name, "GNU C++") == 0);
10236
}
10237
 
10238
/* Output the assembler code for a thunk function.  THUNK_DECL is the
10239
   declaration for the thunk function itself, FUNCTION is the decl for
10240
   the target function.  DELTA is an immediate constant offset to be
10241
   added to THIS.  If VCALL_OFFSET is nonzero, the word at
10242
   *(*this + vcall_offset) should be added to THIS.  */
10243
 
10244
static void
10245
ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10246
                      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10247
                      tree function)
10248
{
10249
  rtx this_rtx, insn, funexp;
10250
  unsigned int this_parmno;
10251
  unsigned int this_regno;
10252
  rtx delta_rtx;
10253
 
10254
  reload_completed = 1;
10255
  epilogue_completed = 1;
10256
 
10257
  /* Set things up as ia64_expand_prologue might.  */
10258
  last_scratch_gr_reg = 15;
10259
 
10260
  memset (&current_frame_info, 0, sizeof (current_frame_info));
10261
  current_frame_info.spill_cfa_off = -16;
10262
  current_frame_info.n_input_regs = 1;
10263
  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10264
 
10265
  /* Mark the end of the (empty) prologue.  */
10266
  emit_note (NOTE_INSN_PROLOGUE_END);
10267
 
10268
  /* Figure out whether "this" will be the first parameter (the
10269
     typical case) or the second parameter (as happens when the
10270
     virtual function returns certain class objects).  */
10271
  this_parmno
10272
    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10273
       ? 1 : 0);
10274
  this_regno = IN_REG (this_parmno);
10275
  if (!TARGET_REG_NAMES)
10276
    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10277
 
10278
  this_rtx = gen_rtx_REG (Pmode, this_regno);
10279
 
10280
  /* Apply the constant offset, if required.  */
10281
  delta_rtx = GEN_INT (delta);
10282
  if (TARGET_ILP32)
10283
    {
10284
      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10285
      REG_POINTER (tmp) = 1;
10286
      if (delta && satisfies_constraint_I (delta_rtx))
10287
        {
10288
          emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10289
          delta = 0;
10290
        }
10291
      else
10292
        emit_insn (gen_ptr_extend (this_rtx, tmp));
10293
    }
10294
  if (delta)
10295
    {
10296
      if (!satisfies_constraint_I (delta_rtx))
10297
        {
10298
          rtx tmp = gen_rtx_REG (Pmode, 2);
10299
          emit_move_insn (tmp, delta_rtx);
10300
          delta_rtx = tmp;
10301
        }
10302
      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10303
    }
10304
 
10305
  /* Apply the offset from the vtable, if required.  */
10306
  if (vcall_offset)
10307
    {
10308
      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10309
      rtx tmp = gen_rtx_REG (Pmode, 2);
10310
 
10311
      if (TARGET_ILP32)
10312
        {
10313
          rtx t = gen_rtx_REG (ptr_mode, 2);
10314
          REG_POINTER (t) = 1;
10315
          emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10316
          if (satisfies_constraint_I (vcall_offset_rtx))
10317
            {
10318
              emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10319
              vcall_offset = 0;
10320
            }
10321
          else
10322
            emit_insn (gen_ptr_extend (tmp, t));
10323
        }
10324
      else
10325
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10326
 
10327
      if (vcall_offset)
10328
        {
10329
          if (!satisfies_constraint_J (vcall_offset_rtx))
10330
            {
10331
              rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10332
              emit_move_insn (tmp2, vcall_offset_rtx);
10333
              vcall_offset_rtx = tmp2;
10334
            }
10335
          emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10336
        }
10337
 
10338
      if (TARGET_ILP32)
10339
        emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10340
      else
10341
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10342
 
10343
      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10344
    }
10345
 
10346
  /* Generate a tail call to the target function.  */
10347
  if (! TREE_USED (function))
10348
    {
10349
      assemble_external (function);
10350
      TREE_USED (function) = 1;
10351
    }
10352
  funexp = XEXP (DECL_RTL (function), 0);
10353
  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10354
  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10355
  insn = get_last_insn ();
10356
  SIBLING_CALL_P (insn) = 1;
10357
 
10358
  /* Code generation for calls relies on splitting.  */
10359
  reload_completed = 1;
10360
  epilogue_completed = 1;
10361
  try_split (PATTERN (insn), insn, 0);
10362
 
10363
  emit_barrier ();
10364
 
10365
  /* Run just enough of rest_of_compilation to get the insns emitted.
10366
     There's not really enough bulk here to make other passes such as
10367
     instruction scheduling worth while.  Note that use_thunk calls
10368
     assemble_start_function and assemble_end_function.  */
10369
 
10370
  insn_locators_alloc ();
10371
  emit_all_insn_group_barriers (NULL);
10372
  insn = get_insns ();
10373
  shorten_branches (insn);
10374
  final_start_function (insn, file, 1);
10375
  final (insn, file, 1);
10376
  final_end_function ();
10377
 
10378
  reload_completed = 0;
10379
  epilogue_completed = 0;
10380
}
10381
 
10382
/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10383
 
10384
static rtx
10385
ia64_struct_value_rtx (tree fntype,
10386
                       int incoming ATTRIBUTE_UNUSED)
10387
{
10388
  if (TARGET_ABI_OPEN_VMS ||
10389
      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10390
    return NULL_RTX;
10391
  return gen_rtx_REG (Pmode, GR_REG (8));
10392
}
10393
 
10394
static bool
10395
ia64_scalar_mode_supported_p (enum machine_mode mode)
10396
{
10397
  switch (mode)
10398
    {
10399
    case QImode:
10400
    case HImode:
10401
    case SImode:
10402
    case DImode:
10403
    case TImode:
10404
      return true;
10405
 
10406
    case SFmode:
10407
    case DFmode:
10408
    case XFmode:
10409
    case RFmode:
10410
      return true;
10411
 
10412
    case TFmode:
10413
      return true;
10414
 
10415
    default:
10416
      return false;
10417
    }
10418
}
10419
 
10420
static bool
10421
ia64_vector_mode_supported_p (enum machine_mode mode)
10422
{
10423
  switch (mode)
10424
    {
10425
    case V8QImode:
10426
    case V4HImode:
10427
    case V2SImode:
10428
      return true;
10429
 
10430
    case V2SFmode:
10431
      return true;
10432
 
10433
    default:
10434
      return false;
10435
    }
10436
}
10437
 
10438
/* Implement the FUNCTION_PROFILER macro.  */
10439
 
10440
void
10441
ia64_output_function_profiler (FILE *file, int labelno)
10442
{
10443
  bool indirect_call;
10444
 
10445
  /* If the function needs a static chain and the static chain
10446
     register is r15, we use an indirect call so as to bypass
10447
     the PLT stub in case the executable is dynamically linked,
10448
     because the stub clobbers r15 as per 5.3.6 of the psABI.
10449
     We don't need to do that in non canonical PIC mode.  */
10450
 
10451
  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10452
    {
10453
      gcc_assert (STATIC_CHAIN_REGNUM == 15);
10454
      indirect_call = true;
10455
    }
10456
  else
10457
    indirect_call = false;
10458
 
10459
  if (TARGET_GNU_AS)
10460
    fputs ("\t.prologue 4, r40\n", file);
10461
  else
10462
    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10463
  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10464
 
10465
  if (NO_PROFILE_COUNTERS)
10466
    fputs ("\tmov out3 = r0\n", file);
10467
  else
10468
    {
10469
      char buf[20];
10470
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10471
 
10472
      if (TARGET_AUTO_PIC)
10473
        fputs ("\tmovl out3 = @gprel(", file);
10474
      else
10475
        fputs ("\taddl out3 = @ltoff(", file);
10476
      assemble_name (file, buf);
10477
      if (TARGET_AUTO_PIC)
10478
        fputs (")\n", file);
10479
      else
10480
        fputs ("), r1\n", file);
10481
    }
10482
 
10483
  if (indirect_call)
10484
    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10485
  fputs ("\t;;\n", file);
10486
 
10487
  fputs ("\t.save rp, r42\n", file);
10488
  fputs ("\tmov out2 = b0\n", file);
10489
  if (indirect_call)
10490
    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10491
  fputs ("\t.body\n", file);
10492
  fputs ("\tmov out1 = r1\n", file);
10493
  if (indirect_call)
10494
    {
10495
      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10496
      fputs ("\tmov b6 = r16\n", file);
10497
      fputs ("\tld8 r1 = [r14]\n", file);
10498
      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10499
    }
10500
  else
10501
    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10502
}
10503
 
10504
static GTY(()) rtx mcount_func_rtx;
10505
static rtx
10506
gen_mcount_func_rtx (void)
10507
{
10508
  if (!mcount_func_rtx)
10509
    mcount_func_rtx = init_one_libfunc ("_mcount");
10510
  return mcount_func_rtx;
10511
}
10512
 
10513
void
10514
ia64_profile_hook (int labelno)
10515
{
10516
  rtx label, ip;
10517
 
10518
  if (NO_PROFILE_COUNTERS)
10519
    label = const0_rtx;
10520
  else
10521
    {
10522
      char buf[30];
10523
      const char *label_name;
10524
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10525
      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10526
      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10527
      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10528
    }
10529
  ip = gen_reg_rtx (Pmode);
10530
  emit_insn (gen_ip_value (ip));
10531
  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10532
                     VOIDmode, 3,
10533
                     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10534
                     ip, Pmode,
10535
                     label, Pmode);
10536
}
10537
 
10538
/* Return the mangling of TYPE if it is an extended fundamental type.  */
10539
 
10540
static const char *
10541
ia64_mangle_type (const_tree type)
10542
{
10543
  type = TYPE_MAIN_VARIANT (type);
10544
 
10545
  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10546
      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10547
    return NULL;
10548
 
10549
  /* On HP-UX, "long double" is mangled as "e" so __float128 is
10550
     mangled as "e".  */
10551
  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10552
    return "g";
10553
  /* On HP-UX, "e" is not available as a mangling of __float80 so use
10554
     an extended mangling.  Elsewhere, "e" is available since long
10555
     double is 80 bits.  */
10556
  if (TYPE_MODE (type) == XFmode)
10557
    return TARGET_HPUX ? "u9__float80" : "e";
10558
  if (TYPE_MODE (type) == RFmode)
10559
    return "u7__fpreg";
10560
  return NULL;
10561
}
10562
 
10563
/* Return the diagnostic message string if conversion from FROMTYPE to
10564
   TOTYPE is not allowed, NULL otherwise.  */
10565
static const char *
10566
ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10567
{
10568
  /* Reject nontrivial conversion to or from __fpreg.  */
10569
  if (TYPE_MODE (fromtype) == RFmode
10570
      && TYPE_MODE (totype) != RFmode
10571
      && TYPE_MODE (totype) != VOIDmode)
10572
    return N_("invalid conversion from %<__fpreg%>");
10573
  if (TYPE_MODE (totype) == RFmode
10574
      && TYPE_MODE (fromtype) != RFmode)
10575
    return N_("invalid conversion to %<__fpreg%>");
10576
  return NULL;
10577
}
10578
 
10579
/* Return the diagnostic message string if the unary operation OP is
10580
   not permitted on TYPE, NULL otherwise.  */
10581
static const char *
10582
ia64_invalid_unary_op (int op, const_tree type)
10583
{
10584
  /* Reject operations on __fpreg other than unary + or &.  */
10585
  if (TYPE_MODE (type) == RFmode
10586
      && op != CONVERT_EXPR
10587
      && op != ADDR_EXPR)
10588
    return N_("invalid operation on %<__fpreg%>");
10589
  return NULL;
10590
}
10591
 
10592
/* Return the diagnostic message string if the binary operation OP is
10593
   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
10594
static const char *
10595
ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10596
{
10597
  /* Reject operations on __fpreg.  */
10598
  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10599
    return N_("invalid operation on %<__fpreg%>");
10600
  return NULL;
10601
}
10602
 
10603
/* Implement overriding of the optimization options.  */
10604
void
10605
ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10606
                           int size ATTRIBUTE_UNUSED)
10607
{
10608
  /* Let the scheduler form additional regions.  */
10609
  set_param_value ("max-sched-extend-regions-iters", 2);
10610
 
10611
  /* Set the default values for cache-related parameters.  */
10612
  set_param_value ("simultaneous-prefetches", 6);
10613
  set_param_value ("l1-cache-line-size", 32);
10614
 
10615
  set_param_value("sched-mem-true-dep-cost", 4);
10616
}
10617
 
10618
/* HP-UX version_id attribute.
10619
   For object foo, if the version_id is set to 1234 put out an alias
10620
   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
10621
   other than an alias statement because it is an illegal symbol name.  */
10622
 
10623
static tree
10624
ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10625
                                 tree name ATTRIBUTE_UNUSED,
10626
                                 tree args,
10627
                                 int flags ATTRIBUTE_UNUSED,
10628
                                 bool *no_add_attrs)
10629
{
10630
  tree arg = TREE_VALUE (args);
10631
 
10632
  if (TREE_CODE (arg) != STRING_CST)
10633
    {
10634
      error("version attribute is not a string");
10635
      *no_add_attrs = true;
10636
      return NULL_TREE;
10637
    }
10638
  return NULL_TREE;
10639
}
10640
 
10641
/* Target hook for c_mode_for_suffix.  */
10642
 
10643
static enum machine_mode
10644
ia64_c_mode_for_suffix (char suffix)
10645
{
10646
  if (suffix == 'q')
10647
    return TFmode;
10648
  if (suffix == 'w')
10649
    return XFmode;
10650
 
10651
  return VOIDmode;
10652
}
10653
 
10654
static enum machine_mode
10655
ia64_promote_function_mode (const_tree type,
10656
                            enum machine_mode mode,
10657
                            int *punsignedp,
10658
                            const_tree funtype,
10659
                            int for_return)
10660
{
10661
  /* Special processing required for OpenVMS ...  */
10662
 
10663
  if (!TARGET_ABI_OPEN_VMS)
10664
    return default_promote_function_mode(type, mode, punsignedp, funtype,
10665
                                         for_return);
10666
 
10667
  /* HP OpenVMS Calling Standard dated June, 2004, that describes
10668
     HP OpenVMS I64 Version 8.2EFT,
10669
     chapter 4 "OpenVMS I64 Conventions"
10670
     section 4.7 "Procedure Linkage"
10671
     subsection 4.7.5.2, "Normal Register Parameters"
10672
 
10673
     "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10674
     values passed in registers are zero-filled; signed integral values as
10675
     well as unsigned 32-bit integral values are sign-extended to 64 bits.
10676
     For all other types passed in the general registers, unused bits are
10677
     undefined."  */
10678
 
10679
  if (!AGGREGATE_TYPE_P (type)
10680
      && GET_MODE_CLASS (mode) == MODE_INT
10681
      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10682
    {
10683
      if (mode == SImode)
10684
        *punsignedp = 0;
10685
      return DImode;
10686
    }
10687
  else
10688
    return promote_mode (type, mode, punsignedp);
10689
}
10690
 
10691
static GTY(()) rtx ia64_dconst_0_5_rtx;
10692
 
10693
rtx
10694
ia64_dconst_0_5 (void)
10695
{
10696
  if (! ia64_dconst_0_5_rtx)
10697
    {
10698
      REAL_VALUE_TYPE rv;
10699
      real_from_string (&rv, "0.5");
10700
      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10701
    }
10702
  return ia64_dconst_0_5_rtx;
10703
}
10704
 
10705
static GTY(()) rtx ia64_dconst_0_375_rtx;
10706
 
10707
rtx
10708
ia64_dconst_0_375 (void)
10709
{
10710
  if (! ia64_dconst_0_375_rtx)
10711
    {
10712
      REAL_VALUE_TYPE rv;
10713
      real_from_string (&rv, "0.375");
10714
      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10715
    }
10716
  return ia64_dconst_0_375_rtx;
10717
}
10718
 
10719
 
10720
#include "gt-ia64.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.