OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [ia64/] [ia64.c] - Blame information for rev 455

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Definitions of target machine for GNU compiler.
2
   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
3
   Free Software Foundation, Inc.
4
   Contributed by James E. Wilson <wilson@cygnus.com> and
5
                  David Mosberger <davidm@hpl.hp.com>.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 3, or (at your option)
12
any later version.
13
 
14
GCC is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "rtl.h"
28
#include "tree.h"
29
#include "regs.h"
30
#include "hard-reg-set.h"
31
#include "real.h"
32
#include "insn-config.h"
33
#include "conditions.h"
34
#include "output.h"
35
#include "insn-attr.h"
36
#include "flags.h"
37
#include "recog.h"
38
#include "expr.h"
39
#include "optabs.h"
40
#include "except.h"
41
#include "function.h"
42
#include "ggc.h"
43
#include "basic-block.h"
44
#include "toplev.h"
45
#include "sched-int.h"
46
#include "timevar.h"
47
#include "target.h"
48
#include "target-def.h"
49
#include "tm_p.h"
50
#include "hashtab.h"
51
#include "langhooks.h"
52
#include "cfglayout.h"
53
#include "tree-gimple.h"
54
#include "intl.h"
55
#include "debug.h"
56
#include "params.h"
57
 
58
/* This is used for communication between ASM_OUTPUT_LABEL and
59
   ASM_OUTPUT_LABELREF.  */
60
int ia64_asm_output_label = 0;
61
 
62
/* Define the information needed to generate branch and scc insns.  This is
63
   stored from the compare operation.  */
64
struct rtx_def * ia64_compare_op0;
65
struct rtx_def * ia64_compare_op1;
66
 
67
/* Register names for ia64_expand_prologue.  */
68
static const char * const ia64_reg_numbers[96] =
69
{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
70
  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
71
  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
72
  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
73
  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
74
  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
75
  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
76
  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
77
  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
78
  "r104","r105","r106","r107","r108","r109","r110","r111",
79
  "r112","r113","r114","r115","r116","r117","r118","r119",
80
  "r120","r121","r122","r123","r124","r125","r126","r127"};
81
 
82
/* ??? These strings could be shared with REGISTER_NAMES.  */
83
static const char * const ia64_input_reg_names[8] =
84
{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
85
 
86
/* ??? These strings could be shared with REGISTER_NAMES.  */
87
static const char * const ia64_local_reg_names[80] =
88
{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
89
  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
90
  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
91
  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
92
  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
93
  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
94
  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
95
  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
96
  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
97
  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
98
 
99
/* ??? These strings could be shared with REGISTER_NAMES.  */
100
static const char * const ia64_output_reg_names[8] =
101
{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
102
 
103
/* Which cpu are we scheduling for.  */
104
enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
105
 
106
/* Determines whether we run our final scheduling pass or not.  We always
107
   avoid the normal second scheduling pass.  */
108
static int ia64_flag_schedule_insns2;
109
 
110
/* Determines whether we run variable tracking in machine dependent
111
   reorganization.  */
112
static int ia64_flag_var_tracking;
113
 
114
/* Variables which are this size or smaller are put in the sdata/sbss
115
   sections.  */
116
 
117
unsigned int ia64_section_threshold;
118
 
119
/* The following variable is used by the DFA insn scheduler.  The value is
120
   TRUE if we do insn bundling instead of insn scheduling.  */
121
int bundling_p = 0;
122
 
123
/* Structure to be filled in by ia64_compute_frame_size with register
124
   save masks and offsets for the current function.  */
125
 
126
struct ia64_frame_info
127
{
128
  HOST_WIDE_INT total_size;     /* size of the stack frame, not including
129
                                   the caller's scratch area.  */
130
  HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
131
  HOST_WIDE_INT spill_size;     /* size of the gr/br/fr spill area.  */
132
  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
133
  HARD_REG_SET mask;            /* mask of saved registers.  */
134
  unsigned int gr_used_mask;    /* mask of registers in use as gr spill
135
                                   registers or long-term scratches.  */
136
  int n_spilled;                /* number of spilled registers.  */
137
  int reg_fp;                   /* register for fp.  */
138
  int reg_save_b0;              /* save register for b0.  */
139
  int reg_save_pr;              /* save register for prs.  */
140
  int reg_save_ar_pfs;          /* save register for ar.pfs.  */
141
  int reg_save_ar_unat;         /* save register for ar.unat.  */
142
  int reg_save_ar_lc;           /* save register for ar.lc.  */
143
  int reg_save_gp;              /* save register for gp.  */
144
  int n_input_regs;             /* number of input registers used.  */
145
  int n_local_regs;             /* number of local registers used.  */
146
  int n_output_regs;            /* number of output registers used.  */
147
  int n_rotate_regs;            /* number of rotating registers used.  */
148
 
149
  char need_regstk;             /* true if a .regstk directive needed.  */
150
  char initialized;             /* true if the data is finalized.  */
151
};
152
 
153
/* Current frame information calculated by ia64_compute_frame_size.  */
154
static struct ia64_frame_info current_frame_info;
155
 
156
static int ia64_first_cycle_multipass_dfa_lookahead (void);
157
static void ia64_dependencies_evaluation_hook (rtx, rtx);
158
static void ia64_init_dfa_pre_cycle_insn (void);
159
static rtx ia64_dfa_pre_cycle_insn (void);
160
static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161
static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
162
static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
163
static void ia64_h_i_d_extended (void);
164
static int ia64_mode_to_int (enum machine_mode);
165
static void ia64_set_sched_flags (spec_info_t);
166
static int ia64_speculate_insn (rtx, ds_t, rtx *);
167
static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
168
static bool ia64_needs_block_p (rtx);
169
static rtx ia64_gen_check (rtx, rtx, bool);
170
static int ia64_spec_check_p (rtx);
171
static int ia64_spec_check_src_p (rtx);
172
static rtx gen_tls_get_addr (void);
173
static rtx gen_thread_pointer (void);
174
static int find_gr_spill (int);
175
static int next_scratch_gr_reg (void);
176
static void mark_reg_gr_used_mask (rtx, void *);
177
static void ia64_compute_frame_size (HOST_WIDE_INT);
178
static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
179
static void finish_spill_pointers (void);
180
static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
181
static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
182
static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
183
static rtx gen_movdi_x (rtx, rtx, rtx);
184
static rtx gen_fr_spill_x (rtx, rtx, rtx);
185
static rtx gen_fr_restore_x (rtx, rtx, rtx);
186
 
187
static enum machine_mode hfa_element_mode (tree, bool);
188
static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
189
                                         tree, int *, int);
190
static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
191
                                   tree, bool);
192
static bool ia64_function_ok_for_sibcall (tree, tree);
193
static bool ia64_return_in_memory (tree, tree);
194
static bool ia64_rtx_costs (rtx, int, int, int *);
195
static void fix_range (const char *);
196
static bool ia64_handle_option (size_t, const char *, int);
197
static struct machine_function * ia64_init_machine_status (void);
198
static void emit_insn_group_barriers (FILE *);
199
static void emit_all_insn_group_barriers (FILE *);
200
static void final_emit_insn_group_barriers (FILE *);
201
static void emit_predicate_relation_info (void);
202
static void ia64_reorg (void);
203
static bool ia64_in_small_data_p (tree);
204
static void process_epilogue (FILE *, rtx, bool, bool);
205
static int process_set (FILE *, rtx, rtx, bool, bool);
206
 
207
static bool ia64_assemble_integer (rtx, unsigned int, int);
208
static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
209
static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
210
static void ia64_output_function_end_prologue (FILE *);
211
 
212
static int ia64_issue_rate (void);
213
static int ia64_adjust_cost_2 (rtx, int, rtx, int);
214
static void ia64_sched_init (FILE *, int, int);
215
static void ia64_sched_init_global (FILE *, int, int);
216
static void ia64_sched_finish_global (FILE *, int);
217
static void ia64_sched_finish (FILE *, int);
218
static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
219
static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
220
static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
221
static int ia64_variable_issue (FILE *, int, rtx, int);
222
 
223
static struct bundle_state *get_free_bundle_state (void);
224
static void free_bundle_state (struct bundle_state *);
225
static void initiate_bundle_states (void);
226
static void finish_bundle_states (void);
227
static unsigned bundle_state_hash (const void *);
228
static int bundle_state_eq_p (const void *, const void *);
229
static int insert_bundle_state (struct bundle_state *);
230
static void initiate_bundle_state_table (void);
231
static void finish_bundle_state_table (void);
232
static int try_issue_nops (struct bundle_state *, int);
233
static int try_issue_insn (struct bundle_state *, rtx);
234
static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
235
static int get_max_pos (state_t);
236
static int get_template (state_t, int);
237
 
238
static rtx get_next_important_insn (rtx, rtx);
239
static void bundling (FILE *, int, rtx, rtx);
240
 
241
static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
242
                                  HOST_WIDE_INT, tree);
243
static void ia64_file_start (void);
244
 
245
static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
246
static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
247
static section *ia64_select_rtx_section (enum machine_mode, rtx,
248
                                         unsigned HOST_WIDE_INT);
249
static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
250
     ATTRIBUTE_UNUSED;
251
static unsigned int ia64_section_type_flags (tree, const char *, int);
252
static void ia64_hpux_add_extern_decl (tree decl)
253
     ATTRIBUTE_UNUSED;
254
static void ia64_hpux_file_end (void)
255
     ATTRIBUTE_UNUSED;
256
static void ia64_init_libfuncs (void)
257
     ATTRIBUTE_UNUSED;
258
static void ia64_hpux_init_libfuncs (void)
259
     ATTRIBUTE_UNUSED;
260
static void ia64_sysv4_init_libfuncs (void)
261
     ATTRIBUTE_UNUSED;
262
static void ia64_vms_init_libfuncs (void)
263
     ATTRIBUTE_UNUSED;
264
 
265
static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
266
static void ia64_encode_section_info (tree, rtx, int);
267
static rtx ia64_struct_value_rtx (tree, int);
268
static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
269
static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
270
static bool ia64_vector_mode_supported_p (enum machine_mode mode);
271
static bool ia64_cannot_force_const_mem (rtx);
272
static const char *ia64_mangle_fundamental_type (tree);
273
static const char *ia64_invalid_conversion (tree, tree);
274
static const char *ia64_invalid_unary_op (int, tree);
275
static const char *ia64_invalid_binary_op (int, tree, tree);
276
 
277
/* Table of valid machine attributes.  */
278
static const struct attribute_spec ia64_attribute_table[] =
279
{
280
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
281
  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
282
  { "model",           1, 1, true, false, false, ia64_handle_model_attribute },
283
  { NULL,              0, 0, false, false, false, NULL }
284
};
285
 
286
/* Initialize the GCC target structure.  */
287
#undef TARGET_ATTRIBUTE_TABLE
288
#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
289
 
290
#undef TARGET_INIT_BUILTINS
291
#define TARGET_INIT_BUILTINS ia64_init_builtins
292
 
293
#undef TARGET_EXPAND_BUILTIN
294
#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
295
 
296
#undef TARGET_ASM_BYTE_OP
297
#define TARGET_ASM_BYTE_OP "\tdata1\t"
298
#undef TARGET_ASM_ALIGNED_HI_OP
299
#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
300
#undef TARGET_ASM_ALIGNED_SI_OP
301
#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
302
#undef TARGET_ASM_ALIGNED_DI_OP
303
#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
304
#undef TARGET_ASM_UNALIGNED_HI_OP
305
#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
306
#undef TARGET_ASM_UNALIGNED_SI_OP
307
#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
308
#undef TARGET_ASM_UNALIGNED_DI_OP
309
#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
310
#undef TARGET_ASM_INTEGER
311
#define TARGET_ASM_INTEGER ia64_assemble_integer
312
 
313
#undef TARGET_ASM_FUNCTION_PROLOGUE
314
#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
315
#undef TARGET_ASM_FUNCTION_END_PROLOGUE
316
#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
317
#undef TARGET_ASM_FUNCTION_EPILOGUE
318
#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
319
 
320
#undef TARGET_IN_SMALL_DATA_P
321
#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
322
 
323
#undef TARGET_SCHED_ADJUST_COST_2
324
#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
325
#undef TARGET_SCHED_ISSUE_RATE
326
#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
327
#undef TARGET_SCHED_VARIABLE_ISSUE
328
#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
329
#undef TARGET_SCHED_INIT
330
#define TARGET_SCHED_INIT ia64_sched_init
331
#undef TARGET_SCHED_FINISH
332
#define TARGET_SCHED_FINISH ia64_sched_finish
333
#undef TARGET_SCHED_INIT_GLOBAL
334
#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
335
#undef TARGET_SCHED_FINISH_GLOBAL
336
#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
337
#undef TARGET_SCHED_REORDER
338
#define TARGET_SCHED_REORDER ia64_sched_reorder
339
#undef TARGET_SCHED_REORDER2
340
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
341
 
342
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
344
 
345
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
347
 
348
#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350
#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
352
 
353
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355
  ia64_first_cycle_multipass_dfa_lookahead_guard
356
 
357
#undef TARGET_SCHED_DFA_NEW_CYCLE
358
#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
359
 
360
#undef TARGET_SCHED_H_I_D_EXTENDED
361
#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
362
 
363
#undef TARGET_SCHED_SET_SCHED_FLAGS
364
#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
365
 
366
#undef TARGET_SCHED_SPECULATE_INSN
367
#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
368
 
369
#undef TARGET_SCHED_NEEDS_BLOCK_P
370
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
371
 
372
#undef TARGET_SCHED_GEN_CHECK
373
#define TARGET_SCHED_GEN_CHECK ia64_gen_check
374
 
375
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
376
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
377
  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
378
 
379
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
380
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
381
#undef TARGET_ARG_PARTIAL_BYTES
382
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
383
 
384
#undef TARGET_ASM_OUTPUT_MI_THUNK
385
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
386
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
387
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
388
 
389
#undef TARGET_ASM_FILE_START
390
#define TARGET_ASM_FILE_START ia64_file_start
391
 
392
#undef TARGET_RTX_COSTS
393
#define TARGET_RTX_COSTS ia64_rtx_costs
394
#undef TARGET_ADDRESS_COST
395
#define TARGET_ADDRESS_COST hook_int_rtx_0
396
 
397
#undef TARGET_MACHINE_DEPENDENT_REORG
398
#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
399
 
400
#undef TARGET_ENCODE_SECTION_INFO
401
#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
402
 
403
#undef  TARGET_SECTION_TYPE_FLAGS
404
#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
405
 
406
#ifdef HAVE_AS_TLS
407
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
408
#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
409
#endif
410
 
411
/* ??? ABI doesn't allow us to define this.  */
412
#if 0
413
#undef TARGET_PROMOTE_FUNCTION_ARGS
414
#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
415
#endif
416
 
417
/* ??? ABI doesn't allow us to define this.  */
418
#if 0
419
#undef TARGET_PROMOTE_FUNCTION_RETURN
420
#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
421
#endif
422
 
423
/* ??? Investigate.  */
424
#if 0
425
#undef TARGET_PROMOTE_PROTOTYPES
426
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
427
#endif
428
 
429
#undef TARGET_STRUCT_VALUE_RTX
430
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
431
#undef TARGET_RETURN_IN_MEMORY
432
#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
433
#undef TARGET_SETUP_INCOMING_VARARGS
434
#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
435
#undef TARGET_STRICT_ARGUMENT_NAMING
436
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
437
#undef TARGET_MUST_PASS_IN_STACK
438
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
439
 
440
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
441
#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
442
 
443
#undef TARGET_UNWIND_EMIT
444
#define TARGET_UNWIND_EMIT process_for_unwind_directive
445
 
446
#undef TARGET_SCALAR_MODE_SUPPORTED_P
447
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
448
#undef TARGET_VECTOR_MODE_SUPPORTED_P
449
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
450
 
451
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
452
   in an order different from the specified program order.  */
453
#undef TARGET_RELAXED_ORDERING
454
#define TARGET_RELAXED_ORDERING true
455
 
456
#undef TARGET_DEFAULT_TARGET_FLAGS
457
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
458
#undef TARGET_HANDLE_OPTION
459
#define TARGET_HANDLE_OPTION ia64_handle_option
460
 
461
#undef TARGET_CANNOT_FORCE_CONST_MEM
462
#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
463
 
464
#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
465
#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
466
 
467
#undef TARGET_INVALID_CONVERSION
468
#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
469
#undef TARGET_INVALID_UNARY_OP
470
#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
471
#undef TARGET_INVALID_BINARY_OP
472
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
473
 
474
struct gcc_target targetm = TARGET_INITIALIZER;
475
 
476
typedef enum
477
  {
478
    ADDR_AREA_NORMAL,   /* normal address area */
479
    ADDR_AREA_SMALL     /* addressable by "addl" (-2MB < addr < 2MB) */
480
  }
481
ia64_addr_area;
482
 
483
static GTY(()) tree small_ident1;
484
static GTY(()) tree small_ident2;
485
 
486
static void
487
init_idents (void)
488
{
489
  if (small_ident1 == 0)
490
    {
491
      small_ident1 = get_identifier ("small");
492
      small_ident2 = get_identifier ("__small__");
493
    }
494
}
495
 
496
/* Retrieve the address area that has been chosen for the given decl.  */
497
 
498
static ia64_addr_area
499
ia64_get_addr_area (tree decl)
500
{
501
  tree model_attr;
502
 
503
  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
504
  if (model_attr)
505
    {
506
      tree id;
507
 
508
      init_idents ();
509
      id = TREE_VALUE (TREE_VALUE (model_attr));
510
      if (id == small_ident1 || id == small_ident2)
511
        return ADDR_AREA_SMALL;
512
    }
513
  return ADDR_AREA_NORMAL;
514
}
515
 
516
static tree
517
ia64_handle_model_attribute (tree *node, tree name, tree args,
518
                             int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
519
{
520
  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
521
  ia64_addr_area area;
522
  tree arg, decl = *node;
523
 
524
  init_idents ();
525
  arg = TREE_VALUE (args);
526
  if (arg == small_ident1 || arg == small_ident2)
527
    {
528
      addr_area = ADDR_AREA_SMALL;
529
    }
530
  else
531
    {
532
      warning (OPT_Wattributes, "invalid argument of %qs attribute",
533
               IDENTIFIER_POINTER (name));
534
      *no_add_attrs = true;
535
    }
536
 
537
  switch (TREE_CODE (decl))
538
    {
539
    case VAR_DECL:
540
      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
541
           == FUNCTION_DECL)
542
          && !TREE_STATIC (decl))
543
        {
544
          error ("%Jan address area attribute cannot be specified for "
545
                 "local variables", decl);
546
          *no_add_attrs = true;
547
        }
548
      area = ia64_get_addr_area (decl);
549
      if (area != ADDR_AREA_NORMAL && addr_area != area)
550
        {
551
          error ("address area of %q+D conflicts with previous "
552
                 "declaration", decl);
553
          *no_add_attrs = true;
554
        }
555
      break;
556
 
557
    case FUNCTION_DECL:
558
      error ("%Jaddress area attribute cannot be specified for functions",
559
             decl);
560
      *no_add_attrs = true;
561
      break;
562
 
563
    default:
564
      warning (OPT_Wattributes, "%qs attribute ignored",
565
               IDENTIFIER_POINTER (name));
566
      *no_add_attrs = true;
567
      break;
568
    }
569
 
570
  return NULL_TREE;
571
}
572
 
573
static void
574
ia64_encode_addr_area (tree decl, rtx symbol)
575
{
576
  int flags;
577
 
578
  flags = SYMBOL_REF_FLAGS (symbol);
579
  switch (ia64_get_addr_area (decl))
580
    {
581
    case ADDR_AREA_NORMAL: break;
582
    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
583
    default: gcc_unreachable ();
584
    }
585
  SYMBOL_REF_FLAGS (symbol) = flags;
586
}
587
 
588
static void
589
ia64_encode_section_info (tree decl, rtx rtl, int first)
590
{
591
  default_encode_section_info (decl, rtl, first);
592
 
593
  /* Careful not to prod global register variables.  */
594
  if (TREE_CODE (decl) == VAR_DECL
595
      && GET_CODE (DECL_RTL (decl)) == MEM
596
      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
597
      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
598
    ia64_encode_addr_area (decl, XEXP (rtl, 0));
599
}
600
 
601
/* Implement CONST_OK_FOR_LETTER_P.  */
602
 
603
bool
604
ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
605
{
606
  switch (c)
607
    {
608
    case 'I':
609
      return CONST_OK_FOR_I (value);
610
    case 'J':
611
      return CONST_OK_FOR_J (value);
612
    case 'K':
613
      return CONST_OK_FOR_K (value);
614
    case 'L':
615
      return CONST_OK_FOR_L (value);
616
    case 'M':
617
      return CONST_OK_FOR_M (value);
618
    case 'N':
619
      return CONST_OK_FOR_N (value);
620
    case 'O':
621
      return CONST_OK_FOR_O (value);
622
    case 'P':
623
      return CONST_OK_FOR_P (value);
624
    default:
625
      return false;
626
    }
627
}
628
 
629
/* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
630
 
631
bool
632
ia64_const_double_ok_for_letter_p (rtx value, char c)
633
{
634
  switch (c)
635
    {
636
    case 'G':
637
      return CONST_DOUBLE_OK_FOR_G (value);
638
    default:
639
      return false;
640
    }
641
}
642
 
643
/* Implement EXTRA_CONSTRAINT.  */
644
 
645
bool
646
ia64_extra_constraint (rtx value, char c)
647
{
648
  switch (c)
649
    {
650
    case 'Q':
651
      /* Non-volatile memory for FP_REG loads/stores.  */
652
      return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
653
 
654
    case 'R':
655
      /* 1..4 for shladd arguments.  */
656
      return (GET_CODE (value) == CONST_INT
657
              && INTVAL (value) >= 1 && INTVAL (value) <= 4);
658
 
659
    case 'S':
660
      /* Non-post-inc memory for asms and other unsavory creatures.  */
661
      return (GET_CODE (value) == MEM
662
              && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
663
              && (reload_in_progress || memory_operand (value, VOIDmode)));
664
 
665
    case 'T':
666
      /* Symbol ref to small-address-area.  */
667
      return small_addr_symbolic_operand (value, VOIDmode);
668
 
669
    case 'U':
670
      /* Vector zero.  */
671
      return value == CONST0_RTX (GET_MODE (value));
672
 
673
    case 'W':
674
      /* An integer vector, such that conversion to an integer yields a
675
         value appropriate for an integer 'J' constraint.  */
676
      if (GET_CODE (value) == CONST_VECTOR
677
          && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
678
        {
679
          value = simplify_subreg (DImode, value, GET_MODE (value), 0);
680
          return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
681
        }
682
      return false;
683
 
684
    case 'Y':
685
      /* A V2SF vector containing elements that satisfy 'G'.  */
686
      return
687
        (GET_CODE (value) == CONST_VECTOR
688
         && GET_MODE (value) == V2SFmode
689
         && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
690
         && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
691
 
692
    default:
693
      return false;
694
    }
695
}
696
 
697
/* Return 1 if the operands of a move are ok.  */
698
 
699
int
700
ia64_move_ok (rtx dst, rtx src)
701
{
702
  /* If we're under init_recog_no_volatile, we'll not be able to use
703
     memory_operand.  So check the code directly and don't worry about
704
     the validity of the underlying address, which should have been
705
     checked elsewhere anyway.  */
706
  if (GET_CODE (dst) != MEM)
707
    return 1;
708
  if (GET_CODE (src) == MEM)
709
    return 0;
710
  if (register_operand (src, VOIDmode))
711
    return 1;
712
 
713
  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
714
  if (INTEGRAL_MODE_P (GET_MODE (dst)))
715
    return src == const0_rtx;
716
  else
717
    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
718
}
719
 
720
/* Return 1 if the operands are ok for a floating point load pair.  */
721
 
722
int
723
ia64_load_pair_ok (rtx dst, rtx src)
724
{
725
  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
726
    return 0;
727
  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
728
    return 0;
729
  switch (GET_CODE (XEXP (src, 0)))
730
    {
731
    case REG:
732
    case POST_INC:
733
      break;
734
    case POST_DEC:
735
      return 0;
736
    case POST_MODIFY:
737
      {
738
        rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
739
 
740
        if (GET_CODE (adjust) != CONST_INT
741
            || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
742
          return 0;
743
      }
744
      break;
745
    default:
746
      abort ();
747
    }
748
  return 1;
749
}
750
 
751
int
752
addp4_optimize_ok (rtx op1, rtx op2)
753
{
754
  return (basereg_operand (op1, GET_MODE(op1)) !=
755
          basereg_operand (op2, GET_MODE(op2)));
756
}
757
 
758
/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
759
   Return the length of the field, or <= 0 on failure.  */
760
 
761
int
762
ia64_depz_field_mask (rtx rop, rtx rshift)
763
{
764
  unsigned HOST_WIDE_INT op = INTVAL (rop);
765
  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
766
 
767
  /* Get rid of the zero bits we're shifting in.  */
768
  op >>= shift;
769
 
770
  /* We must now have a solid block of 1's at bit 0.  */
771
  return exact_log2 (op + 1);
772
}
773
 
774
/* Return the TLS model to use for ADDR.  */
775
 
776
static enum tls_model
777
tls_symbolic_operand_type (rtx addr)
778
{
779
  enum tls_model tls_kind = 0;
780
 
781
  if (GET_CODE (addr) == CONST)
782
    {
783
      if (GET_CODE (XEXP (addr, 0)) == PLUS
784
          && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
785
        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
786
    }
787
  else if (GET_CODE (addr) == SYMBOL_REF)
788
    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
789
 
790
  return tls_kind;
791
}
792
 
793
/* Return true if X is a constant that is valid for some immediate
794
   field in an instruction.  */
795
 
796
bool
797
ia64_legitimate_constant_p (rtx x)
798
{
799
  switch (GET_CODE (x))
800
    {
801
    case CONST_INT:
802
    case LABEL_REF:
803
      return true;
804
 
805
    case CONST_DOUBLE:
806
      if (GET_MODE (x) == VOIDmode)
807
        return true;
808
      return CONST_DOUBLE_OK_FOR_G (x);
809
 
810
    case CONST:
811
    case SYMBOL_REF:
812
      /* ??? Short term workaround for PR 28490.  We must make the code here
813
         match the code in ia64_expand_move and move_operand, even though they
814
         are both technically wrong.  */
815
      if (tls_symbolic_operand_type (x) == 0)
816
        {
817
          HOST_WIDE_INT addend = 0;
818
          rtx op = x;
819
 
820
          if (GET_CODE (op) == CONST
821
              && GET_CODE (XEXP (op, 0)) == PLUS
822
              && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
823
            {
824
              addend = INTVAL (XEXP (XEXP (op, 0), 1));
825
              op = XEXP (XEXP (op, 0), 0);
826
            }
827
 
828
          if (any_offset_symbol_operand (op, GET_MODE (op))
829
              || function_operand (op, GET_MODE (op)))
830
            return true;
831
          if (aligned_offset_symbol_operand (op, GET_MODE (op)))
832
            return (addend & 0x3fff) == 0;
833
          return false;
834
        }
835
      return false;
836
 
837
    case CONST_VECTOR:
838
      {
839
        enum machine_mode mode = GET_MODE (x);
840
 
841
        if (mode == V2SFmode)
842
          return ia64_extra_constraint (x, 'Y');
843
 
844
        return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
845
                && GET_MODE_SIZE (mode) <= 8);
846
      }
847
 
848
    default:
849
      return false;
850
    }
851
}
852
 
853
/* Don't allow TLS addresses to get spilled to memory.  */
854
 
855
static bool
856
ia64_cannot_force_const_mem (rtx x)
857
{
858
  return tls_symbolic_operand_type (x) != 0;
859
}
860
 
861
/* Expand a symbolic constant load.  */
862
 
863
bool
864
ia64_expand_load_address (rtx dest, rtx src)
865
{
866
  gcc_assert (GET_CODE (dest) == REG);
867
 
868
  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
869
     having to pointer-extend the value afterward.  Other forms of address
870
     computation below are also more natural to compute as 64-bit quantities.
871
     If we've been given an SImode destination register, change it.  */
872
  if (GET_MODE (dest) != Pmode)
873
    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
874
 
875
  if (TARGET_NO_PIC)
876
    return false;
877
  if (small_addr_symbolic_operand (src, VOIDmode))
878
    return false;
879
 
880
  if (TARGET_AUTO_PIC)
881
    emit_insn (gen_load_gprel64 (dest, src));
882
  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
883
    emit_insn (gen_load_fptr (dest, src));
884
  else if (sdata_symbolic_operand (src, VOIDmode))
885
    emit_insn (gen_load_gprel (dest, src));
886
  else
887
    {
888
      HOST_WIDE_INT addend = 0;
889
      rtx tmp;
890
 
891
      /* We did split constant offsets in ia64_expand_move, and we did try
892
         to keep them split in move_operand, but we also allowed reload to
893
         rematerialize arbitrary constants rather than spill the value to
894
         the stack and reload it.  So we have to be prepared here to split
895
         them apart again.  */
896
      if (GET_CODE (src) == CONST)
897
        {
898
          HOST_WIDE_INT hi, lo;
899
 
900
          hi = INTVAL (XEXP (XEXP (src, 0), 1));
901
          lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
902
          hi = hi - lo;
903
 
904
          if (lo != 0)
905
            {
906
              addend = lo;
907
              src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
908
            }
909
        }
910
 
911
      tmp = gen_rtx_HIGH (Pmode, src);
912
      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
913
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
914
 
915
      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
916
      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
917
 
918
      if (addend)
919
        {
920
          tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
921
          emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
922
        }
923
    }
924
 
925
  return true;
926
}
927
 
928
static GTY(()) rtx gen_tls_tga;
929
static rtx
930
gen_tls_get_addr (void)
931
{
932
  if (!gen_tls_tga)
933
    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
934
  return gen_tls_tga;
935
}
936
 
937
static GTY(()) rtx thread_pointer_rtx;
938
static rtx
939
gen_thread_pointer (void)
940
{
941
  if (!thread_pointer_rtx)
942
    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
943
  return thread_pointer_rtx;
944
}
945
 
946
static rtx
947
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
948
                         rtx orig_op1, HOST_WIDE_INT addend)
949
{
950
  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
951
  rtx orig_op0 = op0;
952
  HOST_WIDE_INT addend_lo, addend_hi;
953
 
954
  switch (tls_kind)
955
    {
956
    case TLS_MODEL_GLOBAL_DYNAMIC:
957
      start_sequence ();
958
 
959
      tga_op1 = gen_reg_rtx (Pmode);
960
      emit_insn (gen_load_dtpmod (tga_op1, op1));
961
 
962
      tga_op2 = gen_reg_rtx (Pmode);
963
      emit_insn (gen_load_dtprel (tga_op2, op1));
964
 
965
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
966
                                         LCT_CONST, Pmode, 2, tga_op1,
967
                                         Pmode, tga_op2, Pmode);
968
 
969
      insns = get_insns ();
970
      end_sequence ();
971
 
972
      if (GET_MODE (op0) != Pmode)
973
        op0 = tga_ret;
974
      emit_libcall_block (insns, op0, tga_ret, op1);
975
      break;
976
 
977
    case TLS_MODEL_LOCAL_DYNAMIC:
978
      /* ??? This isn't the completely proper way to do local-dynamic
979
         If the call to __tls_get_addr is used only by a single symbol,
980
         then we should (somehow) move the dtprel to the second arg
981
         to avoid the extra add.  */
982
      start_sequence ();
983
 
984
      tga_op1 = gen_reg_rtx (Pmode);
985
      emit_insn (gen_load_dtpmod (tga_op1, op1));
986
 
987
      tga_op2 = const0_rtx;
988
 
989
      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
990
                                         LCT_CONST, Pmode, 2, tga_op1,
991
                                         Pmode, tga_op2, Pmode);
992
 
993
      insns = get_insns ();
994
      end_sequence ();
995
 
996
      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
997
                                UNSPEC_LD_BASE);
998
      tmp = gen_reg_rtx (Pmode);
999
      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1000
 
1001
      if (!register_operand (op0, Pmode))
1002
        op0 = gen_reg_rtx (Pmode);
1003
      if (TARGET_TLS64)
1004
        {
1005
          emit_insn (gen_load_dtprel (op0, op1));
1006
          emit_insn (gen_adddi3 (op0, tmp, op0));
1007
        }
1008
      else
1009
        emit_insn (gen_add_dtprel (op0, op1, tmp));
1010
      break;
1011
 
1012
    case TLS_MODEL_INITIAL_EXEC:
1013
      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1014
      addend_hi = addend - addend_lo;
1015
 
1016
      op1 = plus_constant (op1, addend_hi);
1017
      addend = addend_lo;
1018
 
1019
      tmp = gen_reg_rtx (Pmode);
1020
      emit_insn (gen_load_tprel (tmp, op1));
1021
 
1022
      if (!register_operand (op0, Pmode))
1023
        op0 = gen_reg_rtx (Pmode);
1024
      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1025
      break;
1026
 
1027
    case TLS_MODEL_LOCAL_EXEC:
1028
      if (!register_operand (op0, Pmode))
1029
        op0 = gen_reg_rtx (Pmode);
1030
 
1031
      op1 = orig_op1;
1032
      addend = 0;
1033
      if (TARGET_TLS64)
1034
        {
1035
          emit_insn (gen_load_tprel (op0, op1));
1036
          emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1037
        }
1038
      else
1039
        emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1040
      break;
1041
 
1042
    default:
1043
      gcc_unreachable ();
1044
    }
1045
 
1046
  if (addend)
1047
    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1048
                               orig_op0, 1, OPTAB_DIRECT);
1049
  if (orig_op0 == op0)
1050
    return NULL_RTX;
1051
  if (GET_MODE (orig_op0) == Pmode)
1052
    return op0;
1053
  return gen_lowpart (GET_MODE (orig_op0), op0);
1054
}
1055
 
1056
rtx
1057
ia64_expand_move (rtx op0, rtx op1)
1058
{
1059
  enum machine_mode mode = GET_MODE (op0);
1060
 
1061
  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1062
    op1 = force_reg (mode, op1);
1063
 
1064
  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1065
    {
1066
      HOST_WIDE_INT addend = 0;
1067
      enum tls_model tls_kind;
1068
      rtx sym = op1;
1069
 
1070
      if (GET_CODE (op1) == CONST
1071
          && GET_CODE (XEXP (op1, 0)) == PLUS
1072
          && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1073
        {
1074
          addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1075
          sym = XEXP (XEXP (op1, 0), 0);
1076
        }
1077
 
1078
      tls_kind = tls_symbolic_operand_type (sym);
1079
      if (tls_kind)
1080
        return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1081
 
1082
      if (any_offset_symbol_operand (sym, mode))
1083
        addend = 0;
1084
      else if (aligned_offset_symbol_operand (sym, mode))
1085
        {
1086
          HOST_WIDE_INT addend_lo, addend_hi;
1087
 
1088
          addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1089
          addend_hi = addend - addend_lo;
1090
 
1091
          if (addend_lo != 0)
1092
            {
1093
              op1 = plus_constant (sym, addend_hi);
1094
              addend = addend_lo;
1095
            }
1096
          else
1097
            addend = 0;
1098
        }
1099
      else
1100
        op1 = sym;
1101
 
1102
      if (reload_completed)
1103
        {
1104
          /* We really should have taken care of this offset earlier.  */
1105
          gcc_assert (addend == 0);
1106
          if (ia64_expand_load_address (op0, op1))
1107
            return NULL_RTX;
1108
        }
1109
 
1110
      if (addend)
1111
        {
1112
          rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1113
 
1114
          emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1115
 
1116
          op1 = expand_simple_binop (mode, PLUS, subtarget,
1117
                                     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1118
          if (op0 == op1)
1119
            return NULL_RTX;
1120
        }
1121
    }
1122
 
1123
  return op1;
1124
}
1125
 
1126
/* Split a move from OP1 to OP0 conditional on COND.  */
1127
 
1128
void
1129
ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1130
{
1131
  rtx insn, first = get_last_insn ();
1132
 
1133
  emit_move_insn (op0, op1);
1134
 
1135
  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1136
    if (INSN_P (insn))
1137
      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1138
                                          PATTERN (insn));
1139
}
1140
 
1141
/* Split a post-reload TImode or TFmode reference into two DImode
1142
   components.  This is made extra difficult by the fact that we do
1143
   not get any scratch registers to work with, because reload cannot
1144
   be prevented from giving us a scratch that overlaps the register
1145
   pair involved.  So instead, when addressing memory, we tweak the
1146
   pointer register up and back down with POST_INCs.  Or up and not
1147
   back down when we can get away with it.
1148
 
1149
   REVERSED is true when the loads must be done in reversed order
1150
   (high word first) for correctness.  DEAD is true when the pointer
1151
   dies with the second insn we generate and therefore the second
1152
   address must not carry a postmodify.
1153
 
1154
   May return an insn which is to be emitted after the moves.  */
1155
 
1156
static rtx
1157
ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1158
{
1159
  rtx fixup = 0;
1160
 
1161
  switch (GET_CODE (in))
1162
    {
1163
    case REG:
1164
      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1165
      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1166
      break;
1167
 
1168
    case CONST_INT:
1169
    case CONST_DOUBLE:
1170
      /* Cannot occur reversed.  */
1171
      gcc_assert (!reversed);
1172
 
1173
      if (GET_MODE (in) != TFmode)
1174
        split_double (in, &out[0], &out[1]);
1175
      else
1176
        /* split_double does not understand how to split a TFmode
1177
           quantity into a pair of DImode constants.  */
1178
        {
1179
          REAL_VALUE_TYPE r;
1180
          unsigned HOST_WIDE_INT p[2];
1181
          long l[4];  /* TFmode is 128 bits */
1182
 
1183
          REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1184
          real_to_target (l, &r, TFmode);
1185
 
1186
          if (FLOAT_WORDS_BIG_ENDIAN)
1187
            {
1188
              p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1189
              p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1190
            }
1191
          else
1192
            {
1193
              p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1194
              p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1195
            }
1196
          out[0] = GEN_INT (p[0]);
1197
          out[1] = GEN_INT (p[1]);
1198
        }
1199
      break;
1200
 
1201
    case MEM:
1202
      {
1203
        rtx base = XEXP (in, 0);
1204
        rtx offset;
1205
 
1206
        switch (GET_CODE (base))
1207
          {
1208
          case REG:
1209
            if (!reversed)
1210
              {
1211
                out[0] = adjust_automodify_address
1212
                  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1213
                out[1] = adjust_automodify_address
1214
                  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1215
              }
1216
            else
1217
              {
1218
                /* Reversal requires a pre-increment, which can only
1219
                   be done as a separate insn.  */
1220
                emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1221
                out[0] = adjust_automodify_address
1222
                  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1223
                out[1] = adjust_address (in, DImode, 0);
1224
              }
1225
            break;
1226
 
1227
          case POST_INC:
1228
            gcc_assert (!reversed && !dead);
1229
 
1230
            /* Just do the increment in two steps.  */
1231
            out[0] = adjust_automodify_address (in, DImode, 0, 0);
1232
            out[1] = adjust_automodify_address (in, DImode, 0, 8);
1233
            break;
1234
 
1235
          case POST_DEC:
1236
            gcc_assert (!reversed && !dead);
1237
 
1238
            /* Add 8, subtract 24.  */
1239
            base = XEXP (base, 0);
1240
            out[0] = adjust_automodify_address
1241
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1242
            out[1] = adjust_automodify_address
1243
              (in, DImode,
1244
               gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1245
               8);
1246
            break;
1247
 
1248
          case POST_MODIFY:
1249
            gcc_assert (!reversed && !dead);
1250
 
1251
            /* Extract and adjust the modification.  This case is
1252
               trickier than the others, because we might have an
1253
               index register, or we might have a combined offset that
1254
               doesn't fit a signed 9-bit displacement field.  We can
1255
               assume the incoming expression is already legitimate.  */
1256
            offset = XEXP (base, 1);
1257
            base = XEXP (base, 0);
1258
 
1259
            out[0] = adjust_automodify_address
1260
              (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1261
 
1262
            if (GET_CODE (XEXP (offset, 1)) == REG)
1263
              {
1264
                /* Can't adjust the postmodify to match.  Emit the
1265
                   original, then a separate addition insn.  */
1266
                out[1] = adjust_automodify_address (in, DImode, 0, 8);
1267
                fixup = gen_adddi3 (base, base, GEN_INT (-8));
1268
              }
1269
            else
1270
              {
1271
                gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1272
                if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1273
                  {
1274
                    /* Again the postmodify cannot be made to match,
1275
                       but in this case it's more efficient to get rid
1276
                       of the postmodify entirely and fix up with an
1277
                       add insn.  */
1278
                    out[1] = adjust_automodify_address (in, DImode, base, 8);
1279
                    fixup = gen_adddi3
1280
                      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1281
                  }
1282
                else
1283
                  {
1284
                    /* Combined offset still fits in the displacement field.
1285
                       (We cannot overflow it at the high end.)  */
1286
                    out[1] = adjust_automodify_address
1287
                      (in, DImode, gen_rtx_POST_MODIFY
1288
                       (Pmode, base, gen_rtx_PLUS
1289
                        (Pmode, base,
1290
                         GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1291
                       8);
1292
                  }
1293
              }
1294
            break;
1295
 
1296
          default:
1297
            gcc_unreachable ();
1298
          }
1299
        break;
1300
      }
1301
 
1302
    default:
1303
      gcc_unreachable ();
1304
    }
1305
 
1306
  return fixup;
1307
}
1308
 
1309
/* Split a TImode or TFmode move instruction after reload.
1310
   This is used by *movtf_internal and *movti_internal.  */
1311
void
1312
ia64_split_tmode_move (rtx operands[])
1313
{
1314
  rtx in[2], out[2], insn;
1315
  rtx fixup[2];
1316
  bool dead = false;
1317
  bool reversed = false;
1318
 
1319
  /* It is possible for reload to decide to overwrite a pointer with
1320
     the value it points to.  In that case we have to do the loads in
1321
     the appropriate order so that the pointer is not destroyed too
1322
     early.  Also we must not generate a postmodify for that second
1323
     load, or rws_access_regno will die.  */
1324
  if (GET_CODE (operands[1]) == MEM
1325
      && reg_overlap_mentioned_p (operands[0], operands[1]))
1326
    {
1327
      rtx base = XEXP (operands[1], 0);
1328
      while (GET_CODE (base) != REG)
1329
        base = XEXP (base, 0);
1330
 
1331
      if (REGNO (base) == REGNO (operands[0]))
1332
        reversed = true;
1333
      dead = true;
1334
    }
1335
  /* Another reason to do the moves in reversed order is if the first
1336
     element of the target register pair is also the second element of
1337
     the source register pair.  */
1338
  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1339
      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1340
    reversed = true;
1341
 
1342
  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1343
  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1344
 
1345
#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)                               \
1346
  if (GET_CODE (EXP) == MEM                                             \
1347
      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY                        \
1348
          || GET_CODE (XEXP (EXP, 0)) == POST_INC                        \
1349
          || GET_CODE (XEXP (EXP, 0)) == POST_DEC))                      \
1350
    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,                      \
1351
                                          XEXP (XEXP (EXP, 0), 0),        \
1352
                                          REG_NOTES (INSN))
1353
 
1354
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1355
  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1356
  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1357
 
1358
  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1359
  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1360
  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1361
 
1362
  if (fixup[0])
1363
    emit_insn (fixup[0]);
1364
  if (fixup[1])
1365
    emit_insn (fixup[1]);
1366
 
1367
#undef MAYBE_ADD_REG_INC_NOTE
1368
}
1369
 
1370
/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1371
   through memory plus an extra GR scratch register.  Except that you can
1372
   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1373
   SECONDARY_RELOAD_CLASS, but not both.
1374
 
1375
   We got into problems in the first place by allowing a construct like
1376
   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1377
   This solution attempts to prevent this situation from occurring.  When
1378
   we see something like the above, we spill the inner register to memory.  */
1379
 
1380
static rtx
1381
spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1382
{
1383
  if (GET_CODE (in) == SUBREG
1384
      && GET_MODE (SUBREG_REG (in)) == TImode
1385
      && GET_CODE (SUBREG_REG (in)) == REG)
1386
    {
1387
      rtx memt = assign_stack_temp (TImode, 16, 0);
1388
      emit_move_insn (memt, SUBREG_REG (in));
1389
      return adjust_address (memt, mode, 0);
1390
    }
1391
  else if (force && GET_CODE (in) == REG)
1392
    {
1393
      rtx memx = assign_stack_temp (mode, 16, 0);
1394
      emit_move_insn (memx, in);
1395
      return memx;
1396
    }
1397
  else
1398
    return in;
1399
}
1400
 
1401
/* Expand the movxf or movrf pattern (MODE says which) with the given
1402
   OPERANDS, returning true if the pattern should then invoke
1403
   DONE.  */
1404
 
1405
bool
1406
ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1407
{
1408
  rtx op0 = operands[0];
1409
 
1410
  if (GET_CODE (op0) == SUBREG)
1411
    op0 = SUBREG_REG (op0);
1412
 
1413
  /* We must support XFmode loads into general registers for stdarg/vararg,
1414
     unprototyped calls, and a rare case where a long double is passed as
1415
     an argument after a float HFA fills the FP registers.  We split them into
1416
     DImode loads for convenience.  We also need to support XFmode stores
1417
     for the last case.  This case does not happen for stdarg/vararg routines,
1418
     because we do a block store to memory of unnamed arguments.  */
1419
 
1420
  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1421
    {
1422
      rtx out[2];
1423
 
1424
      /* We're hoping to transform everything that deals with XFmode
1425
         quantities and GR registers early in the compiler.  */
1426
      gcc_assert (!no_new_pseudos);
1427
 
1428
      /* Struct to register can just use TImode instead.  */
1429
      if ((GET_CODE (operands[1]) == SUBREG
1430
           && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1431
          || (GET_CODE (operands[1]) == REG
1432
              && GR_REGNO_P (REGNO (operands[1]))))
1433
        {
1434
          rtx op1 = operands[1];
1435
 
1436
          if (GET_CODE (op1) == SUBREG)
1437
            op1 = SUBREG_REG (op1);
1438
          else
1439
            op1 = gen_rtx_REG (TImode, REGNO (op1));
1440
 
1441
          emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1442
          return true;
1443
        }
1444
 
1445
      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1446
        {
1447
          /* Don't word-swap when reading in the constant.  */
1448
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1449
                          operand_subword (operands[1], WORDS_BIG_ENDIAN,
1450
                                           0, mode));
1451
          emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1452
                          operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1453
                                           0, mode));
1454
          return true;
1455
        }
1456
 
1457
      /* If the quantity is in a register not known to be GR, spill it.  */
1458
      if (register_operand (operands[1], mode))
1459
        operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1460
 
1461
      gcc_assert (GET_CODE (operands[1]) == MEM);
1462
 
1463
      /* Don't word-swap when reading in the value.  */
1464
      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1465
      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1466
 
1467
      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1468
      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1469
      return true;
1470
    }
1471
 
1472
  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1473
    {
1474
      /* We're hoping to transform everything that deals with XFmode
1475
         quantities and GR registers early in the compiler.  */
1476
      gcc_assert (!no_new_pseudos);
1477
 
1478
      /* Op0 can't be a GR_REG here, as that case is handled above.
1479
         If op0 is a register, then we spill op1, so that we now have a
1480
         MEM operand.  This requires creating an XFmode subreg of a TImode reg
1481
         to force the spill.  */
1482
      if (register_operand (operands[0], mode))
1483
        {
1484
          rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1485
          op1 = gen_rtx_SUBREG (mode, op1, 0);
1486
          operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1487
        }
1488
 
1489
      else
1490
        {
1491
          rtx in[2];
1492
 
1493
          gcc_assert (GET_CODE (operands[0]) == MEM);
1494
 
1495
          /* Don't word-swap when writing out the value.  */
1496
          in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1497
          in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1498
 
1499
          emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1500
          emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1501
          return true;
1502
        }
1503
    }
1504
 
1505
  if (!reload_in_progress && !reload_completed)
1506
    {
1507
      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1508
 
1509
      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1510
        {
1511
          rtx memt, memx, in = operands[1];
1512
          if (CONSTANT_P (in))
1513
            in = validize_mem (force_const_mem (mode, in));
1514
          if (GET_CODE (in) == MEM)
1515
            memt = adjust_address (in, TImode, 0);
1516
          else
1517
            {
1518
              memt = assign_stack_temp (TImode, 16, 0);
1519
              memx = adjust_address (memt, mode, 0);
1520
              emit_move_insn (memx, in);
1521
            }
1522
          emit_move_insn (op0, memt);
1523
          return true;
1524
        }
1525
 
1526
      if (!ia64_move_ok (operands[0], operands[1]))
1527
        operands[1] = force_reg (mode, operands[1]);
1528
    }
1529
 
1530
  return false;
1531
}
1532
 
1533
/* Emit comparison instruction if necessary, returning the expression
1534
   that holds the compare result in the proper mode.  */
1535
 
1536
static GTY(()) rtx cmptf_libfunc;
1537
 
1538
rtx
1539
ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1540
{
1541
  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1542
  rtx cmp;
1543
 
1544
  /* If we have a BImode input, then we already have a compare result, and
1545
     do not need to emit another comparison.  */
1546
  if (GET_MODE (op0) == BImode)
1547
    {
1548
      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1549
      cmp = op0;
1550
    }
1551
  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1552
     magic number as its third argument, that indicates what to do.
1553
     The return value is an integer to be compared against zero.  */
1554
  else if (GET_MODE (op0) == TFmode)
1555
    {
1556
      enum qfcmp_magic {
1557
        QCMP_INV = 1,   /* Raise FP_INVALID on SNaN as a side effect.  */
1558
        QCMP_UNORD = 2,
1559
        QCMP_EQ = 4,
1560
        QCMP_LT = 8,
1561
        QCMP_GT = 16
1562
      } magic;
1563
      enum rtx_code ncode;
1564
      rtx ret, insns;
1565
 
1566
      gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1567
      switch (code)
1568
        {
1569
          /* 1 = equal, 0 = not equal.  Equality operators do
1570
             not raise FP_INVALID when given an SNaN operand.  */
1571
        case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1572
        case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1573
          /* isunordered() from C99.  */
1574
        case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1575
        case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1576
          /* Relational operators raise FP_INVALID when given
1577
             an SNaN operand.  */
1578
        case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1579
        case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1580
        case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1581
        case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1582
          /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1583
             Expanders for buneq etc. weuld have to be added to ia64.md
1584
             for this to be useful.  */
1585
        default: gcc_unreachable ();
1586
        }
1587
 
1588
      start_sequence ();
1589
 
1590
      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1591
                                     op0, TFmode, op1, TFmode,
1592
                                     GEN_INT (magic), DImode);
1593
      cmp = gen_reg_rtx (BImode);
1594
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1595
                              gen_rtx_fmt_ee (ncode, BImode,
1596
                                              ret, const0_rtx)));
1597
 
1598
      insns = get_insns ();
1599
      end_sequence ();
1600
 
1601
      emit_libcall_block (insns, cmp, cmp,
1602
                          gen_rtx_fmt_ee (code, BImode, op0, op1));
1603
      code = NE;
1604
    }
1605
  else
1606
    {
1607
      cmp = gen_reg_rtx (BImode);
1608
      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1609
                              gen_rtx_fmt_ee (code, BImode, op0, op1)));
1610
      code = NE;
1611
    }
1612
 
1613
  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1614
}
1615
 
1616
/* Generate an integral vector comparison.  Return true if the condition has
1617
   been reversed, and so the sense of the comparison should be inverted.  */
1618
 
1619
static bool
1620
ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1621
                            rtx dest, rtx op0, rtx op1)
1622
{
1623
  bool negate = false;
1624
  rtx x;
1625
 
1626
  /* Canonicalize the comparison to EQ, GT, GTU.  */
1627
  switch (code)
1628
    {
1629
    case EQ:
1630
    case GT:
1631
    case GTU:
1632
      break;
1633
 
1634
    case NE:
1635
    case LE:
1636
    case LEU:
1637
      code = reverse_condition (code);
1638
      negate = true;
1639
      break;
1640
 
1641
    case GE:
1642
    case GEU:
1643
      code = reverse_condition (code);
1644
      negate = true;
1645
      /* FALLTHRU */
1646
 
1647
    case LT:
1648
    case LTU:
1649
      code = swap_condition (code);
1650
      x = op0, op0 = op1, op1 = x;
1651
      break;
1652
 
1653
    default:
1654
      gcc_unreachable ();
1655
    }
1656
 
1657
  /* Unsigned parallel compare is not supported by the hardware.  Play some
1658
     tricks to turn this into a signed comparison against 0.  */
1659
  if (code == GTU)
1660
    {
1661
      switch (mode)
1662
        {
1663
        case V2SImode:
1664
          {
1665
            rtx t1, t2, mask;
1666
 
1667
            /* Perform a parallel modulo subtraction.  */
1668
            t1 = gen_reg_rtx (V2SImode);
1669
            emit_insn (gen_subv2si3 (t1, op0, op1));
1670
 
1671
            /* Extract the original sign bit of op0.  */
1672
            mask = GEN_INT (-0x80000000);
1673
            mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1674
            mask = force_reg (V2SImode, mask);
1675
            t2 = gen_reg_rtx (V2SImode);
1676
            emit_insn (gen_andv2si3 (t2, op0, mask));
1677
 
1678
            /* XOR it back into the result of the subtraction.  This results
1679
               in the sign bit set iff we saw unsigned underflow.  */
1680
            x = gen_reg_rtx (V2SImode);
1681
            emit_insn (gen_xorv2si3 (x, t1, t2));
1682
 
1683
            code = GT;
1684
            op0 = x;
1685
            op1 = CONST0_RTX (mode);
1686
          }
1687
          break;
1688
 
1689
        case V8QImode:
1690
        case V4HImode:
1691
          /* Perform a parallel unsigned saturating subtraction.  */
1692
          x = gen_reg_rtx (mode);
1693
          emit_insn (gen_rtx_SET (VOIDmode, x,
1694
                                  gen_rtx_US_MINUS (mode, op0, op1)));
1695
 
1696
          code = EQ;
1697
          op0 = x;
1698
          op1 = CONST0_RTX (mode);
1699
          negate = !negate;
1700
          break;
1701
 
1702
        default:
1703
          gcc_unreachable ();
1704
        }
1705
    }
1706
 
1707
  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1708
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1709
 
1710
  return negate;
1711
}
1712
 
1713
/* Emit an integral vector conditional move.  */
1714
 
1715
void
1716
ia64_expand_vecint_cmov (rtx operands[])
1717
{
1718
  enum machine_mode mode = GET_MODE (operands[0]);
1719
  enum rtx_code code = GET_CODE (operands[3]);
1720
  bool negate;
1721
  rtx cmp, x, ot, of;
1722
 
1723
  cmp = gen_reg_rtx (mode);
1724
  negate = ia64_expand_vecint_compare (code, mode, cmp,
1725
                                       operands[4], operands[5]);
1726
 
1727
  ot = operands[1+negate];
1728
  of = operands[2-negate];
1729
 
1730
  if (ot == CONST0_RTX (mode))
1731
    {
1732
      if (of == CONST0_RTX (mode))
1733
        {
1734
          emit_move_insn (operands[0], ot);
1735
          return;
1736
        }
1737
 
1738
      x = gen_rtx_NOT (mode, cmp);
1739
      x = gen_rtx_AND (mode, x, of);
1740
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1741
    }
1742
  else if (of == CONST0_RTX (mode))
1743
    {
1744
      x = gen_rtx_AND (mode, cmp, ot);
1745
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1746
    }
1747
  else
1748
    {
1749
      rtx t, f;
1750
 
1751
      t = gen_reg_rtx (mode);
1752
      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1753
      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1754
 
1755
      f = gen_reg_rtx (mode);
1756
      x = gen_rtx_NOT (mode, cmp);
1757
      x = gen_rtx_AND (mode, x, operands[2-negate]);
1758
      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1759
 
1760
      x = gen_rtx_IOR (mode, t, f);
1761
      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1762
    }
1763
}
1764
 
1765
/* Emit an integral vector min or max operation.  Return true if all done.  */
1766
 
1767
bool
1768
ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1769
                           rtx operands[])
1770
{
1771
  rtx xops[6];
1772
 
1773
  /* These four combinations are supported directly.  */
1774
  if (mode == V8QImode && (code == UMIN || code == UMAX))
1775
    return false;
1776
  if (mode == V4HImode && (code == SMIN || code == SMAX))
1777
    return false;
1778
 
1779
  /* This combination can be implemented with only saturating subtraction.  */
1780
  if (mode == V4HImode && code == UMAX)
1781
    {
1782
      rtx x, tmp = gen_reg_rtx (mode);
1783
 
1784
      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1785
      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1786
 
1787
      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1788
      return true;
1789
    }
1790
 
1791
  /* Everything else implemented via vector comparisons.  */
1792
  xops[0] = operands[0];
1793
  xops[4] = xops[1] = operands[1];
1794
  xops[5] = xops[2] = operands[2];
1795
 
1796
  switch (code)
1797
    {
1798
    case UMIN:
1799
      code = LTU;
1800
      break;
1801
    case UMAX:
1802
      code = GTU;
1803
      break;
1804
    case SMIN:
1805
      code = LT;
1806
      break;
1807
    case SMAX:
1808
      code = GT;
1809
      break;
1810
    default:
1811
      gcc_unreachable ();
1812
    }
1813
  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1814
 
1815
  ia64_expand_vecint_cmov (xops);
1816
  return true;
1817
}
1818
 
1819
/* Emit an integral vector widening sum operations.  */
1820
 
1821
void
1822
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1823
{
1824
  rtx l, h, x, s;
1825
  enum machine_mode wmode, mode;
1826
  rtx (*unpack_l) (rtx, rtx, rtx);
1827
  rtx (*unpack_h) (rtx, rtx, rtx);
1828
  rtx (*plus) (rtx, rtx, rtx);
1829
 
1830
  wmode = GET_MODE (operands[0]);
1831
  mode = GET_MODE (operands[1]);
1832
 
1833
  switch (mode)
1834
    {
1835
    case V8QImode:
1836
      unpack_l = gen_unpack1_l;
1837
      unpack_h = gen_unpack1_h;
1838
      plus = gen_addv4hi3;
1839
      break;
1840
    case V4HImode:
1841
      unpack_l = gen_unpack2_l;
1842
      unpack_h = gen_unpack2_h;
1843
      plus = gen_addv2si3;
1844
      break;
1845
    default:
1846
      gcc_unreachable ();
1847
    }
1848
 
1849
  /* Fill in x with the sign extension of each element in op1.  */
1850
  if (unsignedp)
1851
    x = CONST0_RTX (mode);
1852
  else
1853
    {
1854
      bool neg;
1855
 
1856
      x = gen_reg_rtx (mode);
1857
 
1858
      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1859
                                        CONST0_RTX (mode));
1860
      gcc_assert (!neg);
1861
    }
1862
 
1863
  l = gen_reg_rtx (wmode);
1864
  h = gen_reg_rtx (wmode);
1865
  s = gen_reg_rtx (wmode);
1866
 
1867
  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1868
  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1869
  emit_insn (plus (s, l, operands[2]));
1870
  emit_insn (plus (operands[0], h, s));
1871
}
1872
 
1873
/* Emit a signed or unsigned V8QI dot product operation.  */
1874
 
1875
void
1876
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1877
{
1878
  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1879
 
1880
  /* Fill in x1 and x2 with the sign extension of each element.  */
1881
  if (unsignedp)
1882
    x1 = x2 = CONST0_RTX (V8QImode);
1883
  else
1884
    {
1885
      bool neg;
1886
 
1887
      x1 = gen_reg_rtx (V8QImode);
1888
      x2 = gen_reg_rtx (V8QImode);
1889
 
1890
      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1891
                                        CONST0_RTX (V8QImode));
1892
      gcc_assert (!neg);
1893
      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1894
                                        CONST0_RTX (V8QImode));
1895
      gcc_assert (!neg);
1896
    }
1897
 
1898
  l1 = gen_reg_rtx (V4HImode);
1899
  l2 = gen_reg_rtx (V4HImode);
1900
  h1 = gen_reg_rtx (V4HImode);
1901
  h2 = gen_reg_rtx (V4HImode);
1902
 
1903
  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1904
  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1905
  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1906
  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1907
 
1908
  p1 = gen_reg_rtx (V2SImode);
1909
  p2 = gen_reg_rtx (V2SImode);
1910
  p3 = gen_reg_rtx (V2SImode);
1911
  p4 = gen_reg_rtx (V2SImode);
1912
  emit_insn (gen_pmpy2_r (p1, l1, l2));
1913
  emit_insn (gen_pmpy2_l (p2, l1, l2));
1914
  emit_insn (gen_pmpy2_r (p3, h1, h2));
1915
  emit_insn (gen_pmpy2_l (p4, h1, h2));
1916
 
1917
  s1 = gen_reg_rtx (V2SImode);
1918
  s2 = gen_reg_rtx (V2SImode);
1919
  s3 = gen_reg_rtx (V2SImode);
1920
  emit_insn (gen_addv2si3 (s1, p1, p2));
1921
  emit_insn (gen_addv2si3 (s2, p3, p4));
1922
  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1923
  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1924
}
1925
 
1926
/* Emit the appropriate sequence for a call.  */
1927
 
1928
void
1929
ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1930
                  int sibcall_p)
1931
{
1932
  rtx insn, b0;
1933
 
1934
  addr = XEXP (addr, 0);
1935
  addr = convert_memory_address (DImode, addr);
1936
  b0 = gen_rtx_REG (DImode, R_BR (0));
1937
 
1938
  /* ??? Should do this for functions known to bind local too.  */
1939
  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1940
    {
1941
      if (sibcall_p)
1942
        insn = gen_sibcall_nogp (addr);
1943
      else if (! retval)
1944
        insn = gen_call_nogp (addr, b0);
1945
      else
1946
        insn = gen_call_value_nogp (retval, addr, b0);
1947
      insn = emit_call_insn (insn);
1948
    }
1949
  else
1950
    {
1951
      if (sibcall_p)
1952
        insn = gen_sibcall_gp (addr);
1953
      else if (! retval)
1954
        insn = gen_call_gp (addr, b0);
1955
      else
1956
        insn = gen_call_value_gp (retval, addr, b0);
1957
      insn = emit_call_insn (insn);
1958
 
1959
      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1960
    }
1961
 
1962
  if (sibcall_p)
1963
    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1964
}
1965
 
1966
void
1967
ia64_reload_gp (void)
1968
{
1969
  rtx tmp;
1970
 
1971
  if (current_frame_info.reg_save_gp)
1972
    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1973
  else
1974
    {
1975
      HOST_WIDE_INT offset;
1976
 
1977
      offset = (current_frame_info.spill_cfa_off
1978
                + current_frame_info.spill_size);
1979
      if (frame_pointer_needed)
1980
        {
1981
          tmp = hard_frame_pointer_rtx;
1982
          offset = -offset;
1983
        }
1984
      else
1985
        {
1986
          tmp = stack_pointer_rtx;
1987
          offset = current_frame_info.total_size - offset;
1988
        }
1989
 
1990
      if (CONST_OK_FOR_I (offset))
1991
        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1992
                               tmp, GEN_INT (offset)));
1993
      else
1994
        {
1995
          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1996
          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1997
                                 pic_offset_table_rtx, tmp));
1998
        }
1999
 
2000
      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2001
    }
2002
 
2003
  emit_move_insn (pic_offset_table_rtx, tmp);
2004
}
2005
 
2006
void
2007
ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2008
                 rtx scratch_b, int noreturn_p, int sibcall_p)
2009
{
2010
  rtx insn;
2011
  bool is_desc = false;
2012
 
2013
  /* If we find we're calling through a register, then we're actually
2014
     calling through a descriptor, so load up the values.  */
2015
  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2016
    {
2017
      rtx tmp;
2018
      bool addr_dead_p;
2019
 
2020
      /* ??? We are currently constrained to *not* use peep2, because
2021
         we can legitimately change the global lifetime of the GP
2022
         (in the form of killing where previously live).  This is
2023
         because a call through a descriptor doesn't use the previous
2024
         value of the GP, while a direct call does, and we do not
2025
         commit to either form until the split here.
2026
 
2027
         That said, this means that we lack precise life info for
2028
         whether ADDR is dead after this call.  This is not terribly
2029
         important, since we can fix things up essentially for free
2030
         with the POST_DEC below, but it's nice to not use it when we
2031
         can immediately tell it's not necessary.  */
2032
      addr_dead_p = ((noreturn_p || sibcall_p
2033
                      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2034
                                            REGNO (addr)))
2035
                     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2036
 
2037
      /* Load the code address into scratch_b.  */
2038
      tmp = gen_rtx_POST_INC (Pmode, addr);
2039
      tmp = gen_rtx_MEM (Pmode, tmp);
2040
      emit_move_insn (scratch_r, tmp);
2041
      emit_move_insn (scratch_b, scratch_r);
2042
 
2043
      /* Load the GP address.  If ADDR is not dead here, then we must
2044
         revert the change made above via the POST_INCREMENT.  */
2045
      if (!addr_dead_p)
2046
        tmp = gen_rtx_POST_DEC (Pmode, addr);
2047
      else
2048
        tmp = addr;
2049
      tmp = gen_rtx_MEM (Pmode, tmp);
2050
      emit_move_insn (pic_offset_table_rtx, tmp);
2051
 
2052
      is_desc = true;
2053
      addr = scratch_b;
2054
    }
2055
 
2056
  if (sibcall_p)
2057
    insn = gen_sibcall_nogp (addr);
2058
  else if (retval)
2059
    insn = gen_call_value_nogp (retval, addr, retaddr);
2060
  else
2061
    insn = gen_call_nogp (addr, retaddr);
2062
  emit_call_insn (insn);
2063
 
2064
  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2065
    ia64_reload_gp ();
2066
}
2067
 
2068
/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2069
 
2070
   This differs from the generic code in that we know about the zero-extending
2071
   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2072
   also know that ld.acq+cmpxchg.rel equals a full barrier.
2073
 
2074
   The loop we want to generate looks like
2075
 
2076
        cmp_reg = mem;
2077
      label:
2078
        old_reg = cmp_reg;
2079
        new_reg = cmp_reg op val;
2080
        cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2081
        if (cmp_reg != old_reg)
2082
          goto label;
2083
 
2084
   Note that we only do the plain load from memory once.  Subsequent
2085
   iterations use the value loaded by the compare-and-swap pattern.  */
2086
 
2087
void
2088
ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2089
                       rtx old_dst, rtx new_dst)
2090
{
2091
  enum machine_mode mode = GET_MODE (mem);
2092
  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2093
  enum insn_code icode;
2094
 
2095
  /* Special case for using fetchadd.  */
2096
  if ((mode == SImode || mode == DImode)
2097
      && (code == PLUS || code == MINUS)
2098
      && fetchadd_operand (val, mode))
2099
    {
2100
      if (code == MINUS)
2101
        val = GEN_INT (-INTVAL (val));
2102
 
2103
      if (!old_dst)
2104
        old_dst = gen_reg_rtx (mode);
2105
 
2106
      emit_insn (gen_memory_barrier ());
2107
 
2108
      if (mode == SImode)
2109
        icode = CODE_FOR_fetchadd_acq_si;
2110
      else
2111
        icode = CODE_FOR_fetchadd_acq_di;
2112
      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2113
 
2114
      if (new_dst)
2115
        {
2116
          new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2117
                                         true, OPTAB_WIDEN);
2118
          if (new_reg != new_dst)
2119
            emit_move_insn (new_dst, new_reg);
2120
        }
2121
      return;
2122
    }
2123
 
2124
  /* Because of the volatile mem read, we get an ld.acq, which is the
2125
     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2126
  gcc_assert (MEM_VOLATILE_P (mem));
2127
 
2128
  old_reg = gen_reg_rtx (DImode);
2129
  cmp_reg = gen_reg_rtx (DImode);
2130
  label = gen_label_rtx ();
2131
 
2132
  if (mode != DImode)
2133
    {
2134
      val = simplify_gen_subreg (DImode, val, mode, 0);
2135
      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2136
    }
2137
  else
2138
    emit_move_insn (cmp_reg, mem);
2139
 
2140
  emit_label (label);
2141
 
2142
  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2143
  emit_move_insn (old_reg, cmp_reg);
2144
  emit_move_insn (ar_ccv, cmp_reg);
2145
 
2146
  if (old_dst)
2147
    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2148
 
2149
  new_reg = cmp_reg;
2150
  if (code == NOT)
2151
    {
2152
      new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2153
      code = AND;
2154
    }
2155
  new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2156
                                 true, OPTAB_DIRECT);
2157
 
2158
  if (mode != DImode)
2159
    new_reg = gen_lowpart (mode, new_reg);
2160
  if (new_dst)
2161
    emit_move_insn (new_dst, new_reg);
2162
 
2163
  switch (mode)
2164
    {
2165
    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2166
    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2167
    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2168
    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2169
    default:
2170
      gcc_unreachable ();
2171
    }
2172
 
2173
  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2174
 
2175
  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2176
}
2177
 
2178
/* Begin the assembly file.  */
2179
 
2180
static void
2181
ia64_file_start (void)
2182
{
2183
  /* Variable tracking should be run after all optimizations which change order
2184
     of insns.  It also needs a valid CFG.  This can't be done in
2185
     ia64_override_options, because flag_var_tracking is finalized after
2186
     that.  */
2187
  ia64_flag_var_tracking = flag_var_tracking;
2188
  flag_var_tracking = 0;
2189
 
2190
  default_file_start ();
2191
  emit_safe_across_calls ();
2192
}
2193
 
2194
void
2195
emit_safe_across_calls (void)
2196
{
2197
  unsigned int rs, re;
2198
  int out_state;
2199
 
2200
  rs = 1;
2201
  out_state = 0;
2202
  while (1)
2203
    {
2204
      while (rs < 64 && call_used_regs[PR_REG (rs)])
2205
        rs++;
2206
      if (rs >= 64)
2207
        break;
2208
      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2209
        continue;
2210
      if (out_state == 0)
2211
        {
2212
          fputs ("\t.pred.safe_across_calls ", asm_out_file);
2213
          out_state = 1;
2214
        }
2215
      else
2216
        fputc (',', asm_out_file);
2217
      if (re == rs + 1)
2218
        fprintf (asm_out_file, "p%u", rs);
2219
      else
2220
        fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2221
      rs = re + 1;
2222
    }
2223
  if (out_state)
2224
    fputc ('\n', asm_out_file);
2225
}
2226
 
2227
/* Helper function for ia64_compute_frame_size: find an appropriate general
2228
   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2229
   bits in GR0 to GR31 that have already been allocated by this routine.
2230
   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2231
 
2232
static int
2233
find_gr_spill (int try_locals)
2234
{
2235
  int regno;
2236
 
2237
  /* If this is a leaf function, first try an otherwise unused
2238
     call-clobbered register.  */
2239
  if (current_function_is_leaf)
2240
    {
2241
      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2242
        if (! regs_ever_live[regno]
2243
            && call_used_regs[regno]
2244
            && ! fixed_regs[regno]
2245
            && ! global_regs[regno]
2246
            && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2247
          {
2248
            current_frame_info.gr_used_mask |= 1 << regno;
2249
            return regno;
2250
          }
2251
    }
2252
 
2253
  if (try_locals)
2254
    {
2255
      regno = current_frame_info.n_local_regs;
2256
      /* If there is a frame pointer, then we can't use loc79, because
2257
         that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2258
         reg_name switching code in ia64_expand_prologue.  */
2259
      if (regno < (80 - frame_pointer_needed))
2260
        {
2261
          current_frame_info.n_local_regs = regno + 1;
2262
          return LOC_REG (0) + regno;
2263
        }
2264
    }
2265
 
2266
  /* Failed to find a general register to spill to.  Must use stack.  */
2267
  return 0;
2268
}
2269
 
2270
/* In order to make for nice schedules, we try to allocate every temporary
2271
   to a different register.  We must of course stay away from call-saved,
2272
   fixed, and global registers.  We must also stay away from registers
2273
   allocated in current_frame_info.gr_used_mask, since those include regs
2274
   used all through the prologue.
2275
 
2276
   Any register allocated here must be used immediately.  The idea is to
2277
   aid scheduling, not to solve data flow problems.  */
2278
 
2279
static int last_scratch_gr_reg;
2280
 
2281
static int
2282
next_scratch_gr_reg (void)
2283
{
2284
  int i, regno;
2285
 
2286
  for (i = 0; i < 32; ++i)
2287
    {
2288
      regno = (last_scratch_gr_reg + i + 1) & 31;
2289
      if (call_used_regs[regno]
2290
          && ! fixed_regs[regno]
2291
          && ! global_regs[regno]
2292
          && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2293
        {
2294
          last_scratch_gr_reg = regno;
2295
          return regno;
2296
        }
2297
    }
2298
 
2299
  /* There must be _something_ available.  */
2300
  gcc_unreachable ();
2301
}
2302
 
2303
/* Helper function for ia64_compute_frame_size, called through
2304
   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2305
 
2306
static void
2307
mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2308
{
2309
  unsigned int regno = REGNO (reg);
2310
  if (regno < 32)
2311
    {
2312
      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2313
      for (i = 0; i < n; ++i)
2314
        current_frame_info.gr_used_mask |= 1 << (regno + i);
2315
    }
2316
}
2317
 
2318
/* Returns the number of bytes offset between the frame pointer and the stack
2319
   pointer for the current function.  SIZE is the number of bytes of space
2320
   needed for local variables.  */
2321
 
2322
static void
2323
ia64_compute_frame_size (HOST_WIDE_INT size)
2324
{
2325
  HOST_WIDE_INT total_size;
2326
  HOST_WIDE_INT spill_size = 0;
2327
  HOST_WIDE_INT extra_spill_size = 0;
2328
  HOST_WIDE_INT pretend_args_size;
2329
  HARD_REG_SET mask;
2330
  int n_spilled = 0;
2331
  int spilled_gr_p = 0;
2332
  int spilled_fr_p = 0;
2333
  unsigned int regno;
2334
  int i;
2335
 
2336
  if (current_frame_info.initialized)
2337
    return;
2338
 
2339
  memset (&current_frame_info, 0, sizeof current_frame_info);
2340
  CLEAR_HARD_REG_SET (mask);
2341
 
2342
  /* Don't allocate scratches to the return register.  */
2343
  diddle_return_value (mark_reg_gr_used_mask, NULL);
2344
 
2345
  /* Don't allocate scratches to the EH scratch registers.  */
2346
  if (cfun->machine->ia64_eh_epilogue_sp)
2347
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2348
  if (cfun->machine->ia64_eh_epilogue_bsp)
2349
    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2350
 
2351
  /* Find the size of the register stack frame.  We have only 80 local
2352
     registers, because we reserve 8 for the inputs and 8 for the
2353
     outputs.  */
2354
 
2355
  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2356
     since we'll be adjusting that down later.  */
2357
  regno = LOC_REG (78) + ! frame_pointer_needed;
2358
  for (; regno >= LOC_REG (0); regno--)
2359
    if (regs_ever_live[regno])
2360
      break;
2361
  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2362
 
2363
  /* For functions marked with the syscall_linkage attribute, we must mark
2364
     all eight input registers as in use, so that locals aren't visible to
2365
     the caller.  */
2366
 
2367
  if (cfun->machine->n_varargs > 0
2368
      || lookup_attribute ("syscall_linkage",
2369
                           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2370
    current_frame_info.n_input_regs = 8;
2371
  else
2372
    {
2373
      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2374
        if (regs_ever_live[regno])
2375
          break;
2376
      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2377
    }
2378
 
2379
  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2380
    if (regs_ever_live[regno])
2381
      break;
2382
  i = regno - OUT_REG (0) + 1;
2383
 
2384
#ifndef PROFILE_HOOK
2385
  /* When -p profiling, we need one output register for the mcount argument.
2386
     Likewise for -a profiling for the bb_init_func argument.  For -ax
2387
     profiling, we need two output registers for the two bb_init_trace_func
2388
     arguments.  */
2389
  if (current_function_profile)
2390
    i = MAX (i, 1);
2391
#endif
2392
  current_frame_info.n_output_regs = i;
2393
 
2394
  /* ??? No rotating register support yet.  */
2395
  current_frame_info.n_rotate_regs = 0;
2396
 
2397
  /* Discover which registers need spilling, and how much room that
2398
     will take.  Begin with floating point and general registers,
2399
     which will always wind up on the stack.  */
2400
 
2401
  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2402
    if (regs_ever_live[regno] && ! call_used_regs[regno])
2403
      {
2404
        SET_HARD_REG_BIT (mask, regno);
2405
        spill_size += 16;
2406
        n_spilled += 1;
2407
        spilled_fr_p = 1;
2408
      }
2409
 
2410
  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2411
    if (regs_ever_live[regno] && ! call_used_regs[regno])
2412
      {
2413
        SET_HARD_REG_BIT (mask, regno);
2414
        spill_size += 8;
2415
        n_spilled += 1;
2416
        spilled_gr_p = 1;
2417
      }
2418
 
2419
  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2420
    if (regs_ever_live[regno] && ! call_used_regs[regno])
2421
      {
2422
        SET_HARD_REG_BIT (mask, regno);
2423
        spill_size += 8;
2424
        n_spilled += 1;
2425
      }
2426
 
2427
  /* Now come all special registers that might get saved in other
2428
     general registers.  */
2429
 
2430
  if (frame_pointer_needed)
2431
    {
2432
      current_frame_info.reg_fp = find_gr_spill (1);
2433
      /* If we did not get a register, then we take LOC79.  This is guaranteed
2434
         to be free, even if regs_ever_live is already set, because this is
2435
         HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2436
         as we don't count loc79 above.  */
2437
      if (current_frame_info.reg_fp == 0)
2438
        {
2439
          current_frame_info.reg_fp = LOC_REG (79);
2440
          current_frame_info.n_local_regs++;
2441
        }
2442
    }
2443
 
2444
  if (! current_function_is_leaf)
2445
    {
2446
      /* Emit a save of BR0 if we call other functions.  Do this even
2447
         if this function doesn't return, as EH depends on this to be
2448
         able to unwind the stack.  */
2449
      SET_HARD_REG_BIT (mask, BR_REG (0));
2450
 
2451
      current_frame_info.reg_save_b0 = find_gr_spill (1);
2452
      if (current_frame_info.reg_save_b0 == 0)
2453
        {
2454
          extra_spill_size += 8;
2455
          n_spilled += 1;
2456
        }
2457
 
2458
      /* Similarly for ar.pfs.  */
2459
      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2460
      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2461
      if (current_frame_info.reg_save_ar_pfs == 0)
2462
        {
2463
          extra_spill_size += 8;
2464
          n_spilled += 1;
2465
        }
2466
 
2467
      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2468
         registers are clobbered, so we fall back to the stack.  */
2469
      current_frame_info.reg_save_gp
2470
        = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2471
      if (current_frame_info.reg_save_gp == 0)
2472
        {
2473
          SET_HARD_REG_BIT (mask, GR_REG (1));
2474
          spill_size += 8;
2475
          n_spilled += 1;
2476
        }
2477
    }
2478
  else
2479
    {
2480
      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2481
        {
2482
          SET_HARD_REG_BIT (mask, BR_REG (0));
2483
          extra_spill_size += 8;
2484
          n_spilled += 1;
2485
        }
2486
 
2487
      if (regs_ever_live[AR_PFS_REGNUM])
2488
        {
2489
          SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2490
          current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2491
          if (current_frame_info.reg_save_ar_pfs == 0)
2492
            {
2493
              extra_spill_size += 8;
2494
              n_spilled += 1;
2495
            }
2496
        }
2497
    }
2498
 
2499
  /* Unwind descriptor hackery: things are most efficient if we allocate
2500
     consecutive GR save registers for RP, PFS, FP in that order. However,
2501
     it is absolutely critical that FP get the only hard register that's
2502
     guaranteed to be free, so we allocated it first.  If all three did
2503
     happen to be allocated hard regs, and are consecutive, rearrange them
2504
     into the preferred order now.  */
2505
  if (current_frame_info.reg_fp != 0
2506
      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2507
      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2508
    {
2509
      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2510
      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2511
      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2512
    }
2513
 
2514
  /* See if we need to store the predicate register block.  */
2515
  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2516
    if (regs_ever_live[regno] && ! call_used_regs[regno])
2517
      break;
2518
  if (regno <= PR_REG (63))
2519
    {
2520
      SET_HARD_REG_BIT (mask, PR_REG (0));
2521
      current_frame_info.reg_save_pr = find_gr_spill (1);
2522
      if (current_frame_info.reg_save_pr == 0)
2523
        {
2524
          extra_spill_size += 8;
2525
          n_spilled += 1;
2526
        }
2527
 
2528
      /* ??? Mark them all as used so that register renaming and such
2529
         are free to use them.  */
2530
      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2531
        regs_ever_live[regno] = 1;
2532
    }
2533
 
2534
  /* If we're forced to use st8.spill, we're forced to save and restore
2535
     ar.unat as well.  The check for existing liveness allows inline asm
2536
     to touch ar.unat.  */
2537
  if (spilled_gr_p || cfun->machine->n_varargs
2538
      || regs_ever_live[AR_UNAT_REGNUM])
2539
    {
2540
      regs_ever_live[AR_UNAT_REGNUM] = 1;
2541
      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2542
      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2543
      if (current_frame_info.reg_save_ar_unat == 0)
2544
        {
2545
          extra_spill_size += 8;
2546
          n_spilled += 1;
2547
        }
2548
    }
2549
 
2550
  if (regs_ever_live[AR_LC_REGNUM])
2551
    {
2552
      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2553
      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2554
      if (current_frame_info.reg_save_ar_lc == 0)
2555
        {
2556
          extra_spill_size += 8;
2557
          n_spilled += 1;
2558
        }
2559
    }
2560
 
2561
  /* If we have an odd number of words of pretend arguments written to
2562
     the stack, then the FR save area will be unaligned.  We round the
2563
     size of this area up to keep things 16 byte aligned.  */
2564
  if (spilled_fr_p)
2565
    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2566
  else
2567
    pretend_args_size = current_function_pretend_args_size;
2568
 
2569
  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2570
                + current_function_outgoing_args_size);
2571
  total_size = IA64_STACK_ALIGN (total_size);
2572
 
2573
  /* We always use the 16-byte scratch area provided by the caller, but
2574
     if we are a leaf function, there's no one to which we need to provide
2575
     a scratch area.  */
2576
  if (current_function_is_leaf)
2577
    total_size = MAX (0, total_size - 16);
2578
 
2579
  current_frame_info.total_size = total_size;
2580
  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2581
  current_frame_info.spill_size = spill_size;
2582
  current_frame_info.extra_spill_size = extra_spill_size;
2583
  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2584
  current_frame_info.n_spilled = n_spilled;
2585
  current_frame_info.initialized = reload_completed;
2586
}
2587
 
2588
/* Compute the initial difference between the specified pair of registers.  */
2589
 
2590
HOST_WIDE_INT
2591
ia64_initial_elimination_offset (int from, int to)
2592
{
2593
  HOST_WIDE_INT offset;
2594
 
2595
  ia64_compute_frame_size (get_frame_size ());
2596
  switch (from)
2597
    {
2598
    case FRAME_POINTER_REGNUM:
2599
      switch (to)
2600
        {
2601
        case HARD_FRAME_POINTER_REGNUM:
2602
          if (current_function_is_leaf)
2603
            offset = -current_frame_info.total_size;
2604
          else
2605
            offset = -(current_frame_info.total_size
2606
                       - current_function_outgoing_args_size - 16);
2607
          break;
2608
 
2609
        case STACK_POINTER_REGNUM:
2610
          if (current_function_is_leaf)
2611
            offset = 0;
2612
          else
2613
            offset = 16 + current_function_outgoing_args_size;
2614
          break;
2615
 
2616
        default:
2617
          gcc_unreachable ();
2618
        }
2619
      break;
2620
 
2621
    case ARG_POINTER_REGNUM:
2622
      /* Arguments start above the 16 byte save area, unless stdarg
2623
         in which case we store through the 16 byte save area.  */
2624
      switch (to)
2625
        {
2626
        case HARD_FRAME_POINTER_REGNUM:
2627
          offset = 16 - current_function_pretend_args_size;
2628
          break;
2629
 
2630
        case STACK_POINTER_REGNUM:
2631
          offset = (current_frame_info.total_size
2632
                    + 16 - current_function_pretend_args_size);
2633
          break;
2634
 
2635
        default:
2636
          gcc_unreachable ();
2637
        }
2638
      break;
2639
 
2640
    default:
2641
      gcc_unreachable ();
2642
    }
2643
 
2644
  return offset;
2645
}
2646
 
2647
/* If there are more than a trivial number of register spills, we use
2648
   two interleaved iterators so that we can get two memory references
2649
   per insn group.
2650
 
2651
   In order to simplify things in the prologue and epilogue expanders,
2652
   we use helper functions to fix up the memory references after the
2653
   fact with the appropriate offsets to a POST_MODIFY memory mode.
2654
   The following data structure tracks the state of the two iterators
2655
   while insns are being emitted.  */
2656
 
2657
struct spill_fill_data
2658
{
2659
  rtx init_after;               /* point at which to emit initializations */
2660
  rtx init_reg[2];              /* initial base register */
2661
  rtx iter_reg[2];              /* the iterator registers */
2662
  rtx *prev_addr[2];            /* address of last memory use */
2663
  rtx prev_insn[2];             /* the insn corresponding to prev_addr */
2664
  HOST_WIDE_INT prev_off[2];    /* last offset */
2665
  int n_iter;                   /* number of iterators in use */
2666
  int next_iter;                /* next iterator to use */
2667
  unsigned int save_gr_used_mask;
2668
};
2669
 
2670
static struct spill_fill_data spill_fill_data;
2671
 
2672
static void
2673
setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2674
{
2675
  int i;
2676
 
2677
  spill_fill_data.init_after = get_last_insn ();
2678
  spill_fill_data.init_reg[0] = init_reg;
2679
  spill_fill_data.init_reg[1] = init_reg;
2680
  spill_fill_data.prev_addr[0] = NULL;
2681
  spill_fill_data.prev_addr[1] = NULL;
2682
  spill_fill_data.prev_insn[0] = NULL;
2683
  spill_fill_data.prev_insn[1] = NULL;
2684
  spill_fill_data.prev_off[0] = cfa_off;
2685
  spill_fill_data.prev_off[1] = cfa_off;
2686
  spill_fill_data.next_iter = 0;
2687
  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2688
 
2689
  spill_fill_data.n_iter = 1 + (n_spills > 2);
2690
  for (i = 0; i < spill_fill_data.n_iter; ++i)
2691
    {
2692
      int regno = next_scratch_gr_reg ();
2693
      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2694
      current_frame_info.gr_used_mask |= 1 << regno;
2695
    }
2696
}
2697
 
2698
static void
2699
finish_spill_pointers (void)
2700
{
2701
  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2702
}
2703
 
2704
static rtx
2705
spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2706
{
2707
  int iter = spill_fill_data.next_iter;
2708
  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2709
  rtx disp_rtx = GEN_INT (disp);
2710
  rtx mem;
2711
 
2712
  if (spill_fill_data.prev_addr[iter])
2713
    {
2714
      if (CONST_OK_FOR_N (disp))
2715
        {
2716
          *spill_fill_data.prev_addr[iter]
2717
            = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2718
                                   gen_rtx_PLUS (DImode,
2719
                                                 spill_fill_data.iter_reg[iter],
2720
                                                 disp_rtx));
2721
          REG_NOTES (spill_fill_data.prev_insn[iter])
2722
            = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2723
                                 REG_NOTES (spill_fill_data.prev_insn[iter]));
2724
        }
2725
      else
2726
        {
2727
          /* ??? Could use register post_modify for loads.  */
2728
          if (! CONST_OK_FOR_I (disp))
2729
            {
2730
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2731
              emit_move_insn (tmp, disp_rtx);
2732
              disp_rtx = tmp;
2733
            }
2734
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2735
                                 spill_fill_data.iter_reg[iter], disp_rtx));
2736
        }
2737
    }
2738
  /* Micro-optimization: if we've created a frame pointer, it's at
2739
     CFA 0, which may allow the real iterator to be initialized lower,
2740
     slightly increasing parallelism.  Also, if there are few saves
2741
     it may eliminate the iterator entirely.  */
2742
  else if (disp == 0
2743
           && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2744
           && frame_pointer_needed)
2745
    {
2746
      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2747
      set_mem_alias_set (mem, get_varargs_alias_set ());
2748
      return mem;
2749
    }
2750
  else
2751
    {
2752
      rtx seq, insn;
2753
 
2754
      if (disp == 0)
2755
        seq = gen_movdi (spill_fill_data.iter_reg[iter],
2756
                         spill_fill_data.init_reg[iter]);
2757
      else
2758
        {
2759
          start_sequence ();
2760
 
2761
          if (! CONST_OK_FOR_I (disp))
2762
            {
2763
              rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2764
              emit_move_insn (tmp, disp_rtx);
2765
              disp_rtx = tmp;
2766
            }
2767
 
2768
          emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2769
                                 spill_fill_data.init_reg[iter],
2770
                                 disp_rtx));
2771
 
2772
          seq = get_insns ();
2773
          end_sequence ();
2774
        }
2775
 
2776
      /* Careful for being the first insn in a sequence.  */
2777
      if (spill_fill_data.init_after)
2778
        insn = emit_insn_after (seq, spill_fill_data.init_after);
2779
      else
2780
        {
2781
          rtx first = get_insns ();
2782
          if (first)
2783
            insn = emit_insn_before (seq, first);
2784
          else
2785
            insn = emit_insn (seq);
2786
        }
2787
      spill_fill_data.init_after = insn;
2788
 
2789
      /* If DISP is 0, we may or may not have a further adjustment
2790
         afterward.  If we do, then the load/store insn may be modified
2791
         to be a post-modify.  If we don't, then this copy may be
2792
         eliminated by copyprop_hardreg_forward, which makes this
2793
         insn garbage, which runs afoul of the sanity check in
2794
         propagate_one_insn.  So mark this insn as legal to delete.  */
2795
      if (disp == 0)
2796
        REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2797
                                             REG_NOTES (insn));
2798
    }
2799
 
2800
  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2801
 
2802
  /* ??? Not all of the spills are for varargs, but some of them are.
2803
     The rest of the spills belong in an alias set of their own.  But
2804
     it doesn't actually hurt to include them here.  */
2805
  set_mem_alias_set (mem, get_varargs_alias_set ());
2806
 
2807
  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2808
  spill_fill_data.prev_off[iter] = cfa_off;
2809
 
2810
  if (++iter >= spill_fill_data.n_iter)
2811
    iter = 0;
2812
  spill_fill_data.next_iter = iter;
2813
 
2814
  return mem;
2815
}
2816
 
2817
static void
2818
do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2819
          rtx frame_reg)
2820
{
2821
  int iter = spill_fill_data.next_iter;
2822
  rtx mem, insn;
2823
 
2824
  mem = spill_restore_mem (reg, cfa_off);
2825
  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2826
  spill_fill_data.prev_insn[iter] = insn;
2827
 
2828
  if (frame_reg)
2829
    {
2830
      rtx base;
2831
      HOST_WIDE_INT off;
2832
 
2833
      RTX_FRAME_RELATED_P (insn) = 1;
2834
 
2835
      /* Don't even pretend that the unwind code can intuit its way
2836
         through a pair of interleaved post_modify iterators.  Just
2837
         provide the correct answer.  */
2838
 
2839
      if (frame_pointer_needed)
2840
        {
2841
          base = hard_frame_pointer_rtx;
2842
          off = - cfa_off;
2843
        }
2844
      else
2845
        {
2846
          base = stack_pointer_rtx;
2847
          off = current_frame_info.total_size - cfa_off;
2848
        }
2849
 
2850
      REG_NOTES (insn)
2851
        = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2852
                gen_rtx_SET (VOIDmode,
2853
                             gen_rtx_MEM (GET_MODE (reg),
2854
                                          plus_constant (base, off)),
2855
                             frame_reg),
2856
                REG_NOTES (insn));
2857
    }
2858
}
2859
 
2860
static void
2861
do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2862
{
2863
  int iter = spill_fill_data.next_iter;
2864
  rtx insn;
2865
 
2866
  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2867
                                GEN_INT (cfa_off)));
2868
  spill_fill_data.prev_insn[iter] = insn;
2869
}
2870
 
2871
/* Wrapper functions that discards the CONST_INT spill offset.  These
2872
   exist so that we can give gr_spill/gr_fill the offset they need and
2873
   use a consistent function interface.  */
2874
 
2875
static rtx
2876
gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2877
{
2878
  return gen_movdi (dest, src);
2879
}
2880
 
2881
static rtx
2882
gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2883
{
2884
  return gen_fr_spill (dest, src);
2885
}
2886
 
2887
static rtx
2888
gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2889
{
2890
  return gen_fr_restore (dest, src);
2891
}
2892
 
2893
/* Called after register allocation to add any instructions needed for the
2894
   prologue.  Using a prologue insn is favored compared to putting all of the
2895
   instructions in output_function_prologue(), since it allows the scheduler
2896
   to intermix instructions with the saves of the caller saved registers.  In
2897
   some cases, it might be necessary to emit a barrier instruction as the last
2898
   insn to prevent such scheduling.
2899
 
2900
   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2901
   so that the debug info generation code can handle them properly.
2902
 
2903
   The register save area is layed out like so:
2904
   cfa+16
2905
        [ varargs spill area ]
2906
        [ fr register spill area ]
2907
        [ br register spill area ]
2908
        [ ar register spill area ]
2909
        [ pr register spill area ]
2910
        [ gr register spill area ] */
2911
 
2912
/* ??? Get inefficient code when the frame size is larger than can fit in an
2913
   adds instruction.  */
2914
 
2915
void
2916
ia64_expand_prologue (void)
2917
{
2918
  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2919
  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2920
  rtx reg, alt_reg;
2921
 
2922
  ia64_compute_frame_size (get_frame_size ());
2923
  last_scratch_gr_reg = 15;
2924
 
2925
  /* If there is no epilogue, then we don't need some prologue insns.
2926
     We need to avoid emitting the dead prologue insns, because flow
2927
     will complain about them.  */
2928
  if (optimize)
2929
    {
2930
      edge e;
2931
      edge_iterator ei;
2932
 
2933
      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2934
        if ((e->flags & EDGE_FAKE) == 0
2935
            && (e->flags & EDGE_FALLTHRU) != 0)
2936
          break;
2937
      epilogue_p = (e != NULL);
2938
    }
2939
  else
2940
    epilogue_p = 1;
2941
 
2942
  /* Set the local, input, and output register names.  We need to do this
2943
     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2944
     half.  If we use in/loc/out register names, then we get assembler errors
2945
     in crtn.S because there is no alloc insn or regstk directive in there.  */
2946
  if (! TARGET_REG_NAMES)
2947
    {
2948
      int inputs = current_frame_info.n_input_regs;
2949
      int locals = current_frame_info.n_local_regs;
2950
      int outputs = current_frame_info.n_output_regs;
2951
 
2952
      for (i = 0; i < inputs; i++)
2953
        reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2954
      for (i = 0; i < locals; i++)
2955
        reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2956
      for (i = 0; i < outputs; i++)
2957
        reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2958
    }
2959
 
2960
  /* Set the frame pointer register name.  The regnum is logically loc79,
2961
     but of course we'll not have allocated that many locals.  Rather than
2962
     worrying about renumbering the existing rtxs, we adjust the name.  */
2963
  /* ??? This code means that we can never use one local register when
2964
     there is a frame pointer.  loc79 gets wasted in this case, as it is
2965
     renamed to a register that will never be used.  See also the try_locals
2966
     code in find_gr_spill.  */
2967
  if (current_frame_info.reg_fp)
2968
    {
2969
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2970
      reg_names[HARD_FRAME_POINTER_REGNUM]
2971
        = reg_names[current_frame_info.reg_fp];
2972
      reg_names[current_frame_info.reg_fp] = tmp;
2973
    }
2974
 
2975
  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2976
  if (current_frame_info.n_local_regs == 0
2977
      && current_frame_info.n_output_regs == 0
2978
      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2979
      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2980
    {
2981
      /* If there is no alloc, but there are input registers used, then we
2982
         need a .regstk directive.  */
2983
      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2984
      ar_pfs_save_reg = NULL_RTX;
2985
    }
2986
  else
2987
    {
2988
      current_frame_info.need_regstk = 0;
2989
 
2990
      if (current_frame_info.reg_save_ar_pfs)
2991
        regno = current_frame_info.reg_save_ar_pfs;
2992
      else
2993
        regno = next_scratch_gr_reg ();
2994
      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2995
 
2996
      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2997
                                   GEN_INT (current_frame_info.n_input_regs),
2998
                                   GEN_INT (current_frame_info.n_local_regs),
2999
                                   GEN_INT (current_frame_info.n_output_regs),
3000
                                   GEN_INT (current_frame_info.n_rotate_regs)));
3001
      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
3002
    }
3003
 
3004
  /* Set up frame pointer, stack pointer, and spill iterators.  */
3005
 
3006
  n_varargs = cfun->machine->n_varargs;
3007
  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3008
                        stack_pointer_rtx, 0);
3009
 
3010
  if (frame_pointer_needed)
3011
    {
3012
      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3013
      RTX_FRAME_RELATED_P (insn) = 1;
3014
    }
3015
 
3016
  if (current_frame_info.total_size != 0)
3017
    {
3018
      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3019
      rtx offset;
3020
 
3021
      if (CONST_OK_FOR_I (- current_frame_info.total_size))
3022
        offset = frame_size_rtx;
3023
      else
3024
        {
3025
          regno = next_scratch_gr_reg ();
3026
          offset = gen_rtx_REG (DImode, regno);
3027
          emit_move_insn (offset, frame_size_rtx);
3028
        }
3029
 
3030
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3031
                                    stack_pointer_rtx, offset));
3032
 
3033
      if (! frame_pointer_needed)
3034
        {
3035
          RTX_FRAME_RELATED_P (insn) = 1;
3036
          if (GET_CODE (offset) != CONST_INT)
3037
            {
3038
              REG_NOTES (insn)
3039
                = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3040
                        gen_rtx_SET (VOIDmode,
3041
                                     stack_pointer_rtx,
3042
                                     gen_rtx_PLUS (DImode,
3043
                                                   stack_pointer_rtx,
3044
                                                   frame_size_rtx)),
3045
                        REG_NOTES (insn));
3046
            }
3047
        }
3048
 
3049
      /* ??? At this point we must generate a magic insn that appears to
3050
         modify the stack pointer, the frame pointer, and all spill
3051
         iterators.  This would allow the most scheduling freedom.  For
3052
         now, just hard stop.  */
3053
      emit_insn (gen_blockage ());
3054
    }
3055
 
3056
  /* Must copy out ar.unat before doing any integer spills.  */
3057
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3058
    {
3059
      if (current_frame_info.reg_save_ar_unat)
3060
        ar_unat_save_reg
3061
          = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3062
      else
3063
        {
3064
          alt_regno = next_scratch_gr_reg ();
3065
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3066
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3067
        }
3068
 
3069
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3070
      insn = emit_move_insn (ar_unat_save_reg, reg);
3071
      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3072
 
3073
      /* Even if we're not going to generate an epilogue, we still
3074
         need to save the register so that EH works.  */
3075
      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3076
        emit_insn (gen_prologue_use (ar_unat_save_reg));
3077
    }
3078
  else
3079
    ar_unat_save_reg = NULL_RTX;
3080
 
3081
  /* Spill all varargs registers.  Do this before spilling any GR registers,
3082
     since we want the UNAT bits for the GR registers to override the UNAT
3083
     bits from varargs, which we don't care about.  */
3084
 
3085
  cfa_off = -16;
3086
  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3087
    {
3088
      reg = gen_rtx_REG (DImode, regno);
3089
      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3090
    }
3091
 
3092
  /* Locate the bottom of the register save area.  */
3093
  cfa_off = (current_frame_info.spill_cfa_off
3094
             + current_frame_info.spill_size
3095
             + current_frame_info.extra_spill_size);
3096
 
3097
  /* Save the predicate register block either in a register or in memory.  */
3098
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3099
    {
3100
      reg = gen_rtx_REG (DImode, PR_REG (0));
3101
      if (current_frame_info.reg_save_pr != 0)
3102
        {
3103
          alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3104
          insn = emit_move_insn (alt_reg, reg);
3105
 
3106
          /* ??? Denote pr spill/fill by a DImode move that modifies all
3107
             64 hard registers.  */
3108
          RTX_FRAME_RELATED_P (insn) = 1;
3109
          REG_NOTES (insn)
3110
            = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3111
                        gen_rtx_SET (VOIDmode, alt_reg, reg),
3112
                        REG_NOTES (insn));
3113
 
3114
          /* Even if we're not going to generate an epilogue, we still
3115
             need to save the register so that EH works.  */
3116
          if (! epilogue_p)
3117
            emit_insn (gen_prologue_use (alt_reg));
3118
        }
3119
      else
3120
        {
3121
          alt_regno = next_scratch_gr_reg ();
3122
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3123
          insn = emit_move_insn (alt_reg, reg);
3124
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3125
          cfa_off -= 8;
3126
        }
3127
    }
3128
 
3129
  /* Handle AR regs in numerical order.  All of them get special handling.  */
3130
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3131
      && current_frame_info.reg_save_ar_unat == 0)
3132
    {
3133
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3134
      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3135
      cfa_off -= 8;
3136
    }
3137
 
3138
  /* The alloc insn already copied ar.pfs into a general register.  The
3139
     only thing we have to do now is copy that register to a stack slot
3140
     if we'd not allocated a local register for the job.  */
3141
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3142
      && current_frame_info.reg_save_ar_pfs == 0)
3143
    {
3144
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3145
      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3146
      cfa_off -= 8;
3147
    }
3148
 
3149
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3150
    {
3151
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3152
      if (current_frame_info.reg_save_ar_lc != 0)
3153
        {
3154
          alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3155
          insn = emit_move_insn (alt_reg, reg);
3156
          RTX_FRAME_RELATED_P (insn) = 1;
3157
 
3158
          /* Even if we're not going to generate an epilogue, we still
3159
             need to save the register so that EH works.  */
3160
          if (! epilogue_p)
3161
            emit_insn (gen_prologue_use (alt_reg));
3162
        }
3163
      else
3164
        {
3165
          alt_regno = next_scratch_gr_reg ();
3166
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3167
          emit_move_insn (alt_reg, reg);
3168
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3169
          cfa_off -= 8;
3170
        }
3171
    }
3172
 
3173
  /* Save the return pointer.  */
3174
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3175
    {
3176
      reg = gen_rtx_REG (DImode, BR_REG (0));
3177
      if (current_frame_info.reg_save_b0 != 0)
3178
        {
3179
          alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3180
          insn = emit_move_insn (alt_reg, reg);
3181
          RTX_FRAME_RELATED_P (insn) = 1;
3182
 
3183
          /* Even if we're not going to generate an epilogue, we still
3184
             need to save the register so that EH works.  */
3185
          if (! epilogue_p)
3186
            emit_insn (gen_prologue_use (alt_reg));
3187
        }
3188
      else
3189
        {
3190
          alt_regno = next_scratch_gr_reg ();
3191
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3192
          emit_move_insn (alt_reg, reg);
3193
          do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3194
          cfa_off -= 8;
3195
        }
3196
    }
3197
 
3198
  if (current_frame_info.reg_save_gp)
3199
    {
3200
      insn = emit_move_insn (gen_rtx_REG (DImode,
3201
                                          current_frame_info.reg_save_gp),
3202
                             pic_offset_table_rtx);
3203
      /* We don't know for sure yet if this is actually needed, since
3204
         we've not split the PIC call patterns.  If all of the calls
3205
         are indirect, and not followed by any uses of the gp, then
3206
         this save is dead.  Allow it to go away.  */
3207
      REG_NOTES (insn)
3208
        = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3209
    }
3210
 
3211
  /* We should now be at the base of the gr/br/fr spill area.  */
3212
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3213
                          + current_frame_info.spill_size));
3214
 
3215
  /* Spill all general registers.  */
3216
  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3217
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3218
      {
3219
        reg = gen_rtx_REG (DImode, regno);
3220
        do_spill (gen_gr_spill, reg, cfa_off, reg);
3221
        cfa_off -= 8;
3222
      }
3223
 
3224
  /* Spill the rest of the BR registers.  */
3225
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3226
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3227
      {
3228
        alt_regno = next_scratch_gr_reg ();
3229
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3230
        reg = gen_rtx_REG (DImode, regno);
3231
        emit_move_insn (alt_reg, reg);
3232
        do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3233
        cfa_off -= 8;
3234
      }
3235
 
3236
  /* Align the frame and spill all FR registers.  */
3237
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3238
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3239
      {
3240
        gcc_assert (!(cfa_off & 15));
3241
        reg = gen_rtx_REG (XFmode, regno);
3242
        do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3243
        cfa_off -= 16;
3244
      }
3245
 
3246
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3247
 
3248
  finish_spill_pointers ();
3249
}
3250
 
3251
/* Called after register allocation to add any instructions needed for the
3252
   epilogue.  Using an epilogue insn is favored compared to putting all of the
3253
   instructions in output_function_prologue(), since it allows the scheduler
3254
   to intermix instructions with the saves of the caller saved registers.  In
3255
   some cases, it might be necessary to emit a barrier instruction as the last
3256
   insn to prevent such scheduling.  */
3257
 
3258
void
3259
ia64_expand_epilogue (int sibcall_p)
3260
{
3261
  rtx insn, reg, alt_reg, ar_unat_save_reg;
3262
  int regno, alt_regno, cfa_off;
3263
 
3264
  ia64_compute_frame_size (get_frame_size ());
3265
 
3266
  /* If there is a frame pointer, then we use it instead of the stack
3267
     pointer, so that the stack pointer does not need to be valid when
3268
     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3269
  if (frame_pointer_needed)
3270
    setup_spill_pointers (current_frame_info.n_spilled,
3271
                          hard_frame_pointer_rtx, 0);
3272
  else
3273
    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3274
                          current_frame_info.total_size);
3275
 
3276
  if (current_frame_info.total_size != 0)
3277
    {
3278
      /* ??? At this point we must generate a magic insn that appears to
3279
         modify the spill iterators and the frame pointer.  This would
3280
         allow the most scheduling freedom.  For now, just hard stop.  */
3281
      emit_insn (gen_blockage ());
3282
    }
3283
 
3284
  /* Locate the bottom of the register save area.  */
3285
  cfa_off = (current_frame_info.spill_cfa_off
3286
             + current_frame_info.spill_size
3287
             + current_frame_info.extra_spill_size);
3288
 
3289
  /* Restore the predicate registers.  */
3290
  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3291
    {
3292
      if (current_frame_info.reg_save_pr != 0)
3293
        alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3294
      else
3295
        {
3296
          alt_regno = next_scratch_gr_reg ();
3297
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3298
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3299
          cfa_off -= 8;
3300
        }
3301
      reg = gen_rtx_REG (DImode, PR_REG (0));
3302
      emit_move_insn (reg, alt_reg);
3303
    }
3304
 
3305
  /* Restore the application registers.  */
3306
 
3307
  /* Load the saved unat from the stack, but do not restore it until
3308
     after the GRs have been restored.  */
3309
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3310
    {
3311
      if (current_frame_info.reg_save_ar_unat != 0)
3312
        ar_unat_save_reg
3313
          = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3314
      else
3315
        {
3316
          alt_regno = next_scratch_gr_reg ();
3317
          ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3318
          current_frame_info.gr_used_mask |= 1 << alt_regno;
3319
          do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3320
          cfa_off -= 8;
3321
        }
3322
    }
3323
  else
3324
    ar_unat_save_reg = NULL_RTX;
3325
 
3326
  if (current_frame_info.reg_save_ar_pfs != 0)
3327
    {
3328
      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3329
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3330
      emit_move_insn (reg, alt_reg);
3331
    }
3332
  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3333
    {
3334
      alt_regno = next_scratch_gr_reg ();
3335
      alt_reg = gen_rtx_REG (DImode, alt_regno);
3336
      do_restore (gen_movdi_x, alt_reg, cfa_off);
3337
      cfa_off -= 8;
3338
      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3339
      emit_move_insn (reg, alt_reg);
3340
    }
3341
 
3342
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3343
    {
3344
      if (current_frame_info.reg_save_ar_lc != 0)
3345
        alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3346
      else
3347
        {
3348
          alt_regno = next_scratch_gr_reg ();
3349
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3350
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3351
          cfa_off -= 8;
3352
        }
3353
      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3354
      emit_move_insn (reg, alt_reg);
3355
    }
3356
 
3357
  /* Restore the return pointer.  */
3358
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3359
    {
3360
      if (current_frame_info.reg_save_b0 != 0)
3361
        alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3362
      else
3363
        {
3364
          alt_regno = next_scratch_gr_reg ();
3365
          alt_reg = gen_rtx_REG (DImode, alt_regno);
3366
          do_restore (gen_movdi_x, alt_reg, cfa_off);
3367
          cfa_off -= 8;
3368
        }
3369
      reg = gen_rtx_REG (DImode, BR_REG (0));
3370
      emit_move_insn (reg, alt_reg);
3371
    }
3372
 
3373
  /* We should now be at the base of the gr/br/fr spill area.  */
3374
  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3375
                          + current_frame_info.spill_size));
3376
 
3377
  /* The GP may be stored on the stack in the prologue, but it's
3378
     never restored in the epilogue.  Skip the stack slot.  */
3379
  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3380
    cfa_off -= 8;
3381
 
3382
  /* Restore all general registers.  */
3383
  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3384
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3385
      {
3386
        reg = gen_rtx_REG (DImode, regno);
3387
        do_restore (gen_gr_restore, reg, cfa_off);
3388
        cfa_off -= 8;
3389
      }
3390
 
3391
  /* Restore the branch registers.  */
3392
  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3393
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3394
      {
3395
        alt_regno = next_scratch_gr_reg ();
3396
        alt_reg = gen_rtx_REG (DImode, alt_regno);
3397
        do_restore (gen_movdi_x, alt_reg, cfa_off);
3398
        cfa_off -= 8;
3399
        reg = gen_rtx_REG (DImode, regno);
3400
        emit_move_insn (reg, alt_reg);
3401
      }
3402
 
3403
  /* Restore floating point registers.  */
3404
  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3405
    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3406
      {
3407
        gcc_assert (!(cfa_off & 15));
3408
        reg = gen_rtx_REG (XFmode, regno);
3409
        do_restore (gen_fr_restore_x, reg, cfa_off);
3410
        cfa_off -= 16;
3411
      }
3412
 
3413
  /* Restore ar.unat for real.  */
3414
  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3415
    {
3416
      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3417
      emit_move_insn (reg, ar_unat_save_reg);
3418
    }
3419
 
3420
  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3421
 
3422
  finish_spill_pointers ();
3423
 
3424
  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3425
    {
3426
      /* ??? At this point we must generate a magic insn that appears to
3427
         modify the spill iterators, the stack pointer, and the frame
3428
         pointer.  This would allow the most scheduling freedom.  For now,
3429
         just hard stop.  */
3430
      emit_insn (gen_blockage ());
3431
    }
3432
 
3433
  if (cfun->machine->ia64_eh_epilogue_sp)
3434
    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3435
  else if (frame_pointer_needed)
3436
    {
3437
      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3438
      RTX_FRAME_RELATED_P (insn) = 1;
3439
    }
3440
  else if (current_frame_info.total_size)
3441
    {
3442
      rtx offset, frame_size_rtx;
3443
 
3444
      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3445
      if (CONST_OK_FOR_I (current_frame_info.total_size))
3446
        offset = frame_size_rtx;
3447
      else
3448
        {
3449
          regno = next_scratch_gr_reg ();
3450
          offset = gen_rtx_REG (DImode, regno);
3451
          emit_move_insn (offset, frame_size_rtx);
3452
        }
3453
 
3454
      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3455
                                    offset));
3456
 
3457
      RTX_FRAME_RELATED_P (insn) = 1;
3458
      if (GET_CODE (offset) != CONST_INT)
3459
        {
3460
          REG_NOTES (insn)
3461
            = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3462
                        gen_rtx_SET (VOIDmode,
3463
                                     stack_pointer_rtx,
3464
                                     gen_rtx_PLUS (DImode,
3465
                                                   stack_pointer_rtx,
3466
                                                   frame_size_rtx)),
3467
                        REG_NOTES (insn));
3468
        }
3469
    }
3470
 
3471
  if (cfun->machine->ia64_eh_epilogue_bsp)
3472
    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3473
 
3474
  if (! sibcall_p)
3475
    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3476
  else
3477
    {
3478
      int fp = GR_REG (2);
3479
      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3480
         first available call clobbered register.  If there was a frame_pointer
3481
         register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3482
         so we have to make sure we're using the string "r2" when emitting
3483
         the register name for the assembler.  */
3484
      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3485
        fp = HARD_FRAME_POINTER_REGNUM;
3486
 
3487
      /* We must emit an alloc to force the input registers to become output
3488
         registers.  Otherwise, if the callee tries to pass its parameters
3489
         through to another call without an intervening alloc, then these
3490
         values get lost.  */
3491
      /* ??? We don't need to preserve all input registers.  We only need to
3492
         preserve those input registers used as arguments to the sibling call.
3493
         It is unclear how to compute that number here.  */
3494
      if (current_frame_info.n_input_regs != 0)
3495
        {
3496
          rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3497
          insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3498
                                const0_rtx, const0_rtx,
3499
                                n_inputs, const0_rtx));
3500
          RTX_FRAME_RELATED_P (insn) = 1;
3501
        }
3502
    }
3503
}
3504
 
3505
/* Return 1 if br.ret can do all the work required to return from a
3506
   function.  */
3507
 
3508
int
3509
ia64_direct_return (void)
3510
{
3511
  if (reload_completed && ! frame_pointer_needed)
3512
    {
3513
      ia64_compute_frame_size (get_frame_size ());
3514
 
3515
      return (current_frame_info.total_size == 0
3516
              && current_frame_info.n_spilled == 0
3517
              && current_frame_info.reg_save_b0 == 0
3518
              && current_frame_info.reg_save_pr == 0
3519
              && current_frame_info.reg_save_ar_pfs == 0
3520
              && current_frame_info.reg_save_ar_unat == 0
3521
              && current_frame_info.reg_save_ar_lc == 0);
3522
    }
3523
  return 0;
3524
}
3525
 
3526
/* Return the magic cookie that we use to hold the return address
3527
   during early compilation.  */
3528
 
3529
rtx
3530
ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3531
{
3532
  if (count != 0)
3533
    return NULL;
3534
  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3535
}
3536
 
3537
/* Split this value after reload, now that we know where the return
3538
   address is saved.  */
3539
 
3540
void
3541
ia64_split_return_addr_rtx (rtx dest)
3542
{
3543
  rtx src;
3544
 
3545
  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3546
    {
3547
      if (current_frame_info.reg_save_b0 != 0)
3548
        src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3549
      else
3550
        {
3551
          HOST_WIDE_INT off;
3552
          unsigned int regno;
3553
 
3554
          /* Compute offset from CFA for BR0.  */
3555
          /* ??? Must be kept in sync with ia64_expand_prologue.  */
3556
          off = (current_frame_info.spill_cfa_off
3557
                 + current_frame_info.spill_size);
3558
          for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3559
            if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3560
              off -= 8;
3561
 
3562
          /* Convert CFA offset to a register based offset.  */
3563
          if (frame_pointer_needed)
3564
            src = hard_frame_pointer_rtx;
3565
          else
3566
            {
3567
              src = stack_pointer_rtx;
3568
              off += current_frame_info.total_size;
3569
            }
3570
 
3571
          /* Load address into scratch register.  */
3572
          if (CONST_OK_FOR_I (off))
3573
            emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3574
          else
3575
            {
3576
              emit_move_insn (dest, GEN_INT (off));
3577
              emit_insn (gen_adddi3 (dest, src, dest));
3578
            }
3579
 
3580
          src = gen_rtx_MEM (Pmode, dest);
3581
        }
3582
    }
3583
  else
3584
    src = gen_rtx_REG (DImode, BR_REG (0));
3585
 
3586
  emit_move_insn (dest, src);
3587
}
3588
 
3589
int
3590
ia64_hard_regno_rename_ok (int from, int to)
3591
{
3592
  /* Don't clobber any of the registers we reserved for the prologue.  */
3593
  if (to == current_frame_info.reg_fp
3594
      || to == current_frame_info.reg_save_b0
3595
      || to == current_frame_info.reg_save_pr
3596
      || to == current_frame_info.reg_save_ar_pfs
3597
      || to == current_frame_info.reg_save_ar_unat
3598
      || to == current_frame_info.reg_save_ar_lc)
3599
    return 0;
3600
 
3601
  if (from == current_frame_info.reg_fp
3602
      || from == current_frame_info.reg_save_b0
3603
      || from == current_frame_info.reg_save_pr
3604
      || from == current_frame_info.reg_save_ar_pfs
3605
      || from == current_frame_info.reg_save_ar_unat
3606
      || from == current_frame_info.reg_save_ar_lc)
3607
    return 0;
3608
 
3609
  /* Don't use output registers outside the register frame.  */
3610
  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3611
    return 0;
3612
 
3613
  /* Retain even/oddness on predicate register pairs.  */
3614
  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3615
    return (from & 1) == (to & 1);
3616
 
3617
  return 1;
3618
}
3619
 
3620
/* Target hook for assembling integer objects.  Handle word-sized
3621
   aligned objects and detect the cases when @fptr is needed.  */
3622
 
3623
static bool
3624
ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3625
{
3626
  if (size == POINTER_SIZE / BITS_PER_UNIT
3627
      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3628
      && GET_CODE (x) == SYMBOL_REF
3629
      && SYMBOL_REF_FUNCTION_P (x))
3630
    {
3631
      static const char * const directive[2][2] = {
3632
          /* 64-bit pointer */  /* 32-bit pointer */
3633
        { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},  /* unaligned */
3634
        { "\tdata8\t@fptr(",    "\tdata4\t@fptr("}      /* aligned */
3635
      };
3636
      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3637
      output_addr_const (asm_out_file, x);
3638
      fputs (")\n", asm_out_file);
3639
      return true;
3640
    }
3641
  return default_assemble_integer (x, size, aligned_p);
3642
}
3643
 
3644
/* Emit the function prologue.  */
3645
 
3646
static void
3647
ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3648
{
3649
  int mask, grsave, grsave_prev;
3650
 
3651
  if (current_frame_info.need_regstk)
3652
    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3653
             current_frame_info.n_input_regs,
3654
             current_frame_info.n_local_regs,
3655
             current_frame_info.n_output_regs,
3656
             current_frame_info.n_rotate_regs);
3657
 
3658
  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3659
    return;
3660
 
3661
  /* Emit the .prologue directive.  */
3662
 
3663
  mask = 0;
3664
  grsave = grsave_prev = 0;
3665
  if (current_frame_info.reg_save_b0 != 0)
3666
    {
3667
      mask |= 8;
3668
      grsave = grsave_prev = current_frame_info.reg_save_b0;
3669
    }
3670
  if (current_frame_info.reg_save_ar_pfs != 0
3671
      && (grsave_prev == 0
3672
          || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3673
    {
3674
      mask |= 4;
3675
      if (grsave_prev == 0)
3676
        grsave = current_frame_info.reg_save_ar_pfs;
3677
      grsave_prev = current_frame_info.reg_save_ar_pfs;
3678
    }
3679
  if (current_frame_info.reg_fp != 0
3680
      && (grsave_prev == 0
3681
          || current_frame_info.reg_fp == grsave_prev + 1))
3682
    {
3683
      mask |= 2;
3684
      if (grsave_prev == 0)
3685
        grsave = HARD_FRAME_POINTER_REGNUM;
3686
      grsave_prev = current_frame_info.reg_fp;
3687
    }
3688
  if (current_frame_info.reg_save_pr != 0
3689
      && (grsave_prev == 0
3690
          || current_frame_info.reg_save_pr == grsave_prev + 1))
3691
    {
3692
      mask |= 1;
3693
      if (grsave_prev == 0)
3694
        grsave = current_frame_info.reg_save_pr;
3695
    }
3696
 
3697
  if (mask && TARGET_GNU_AS)
3698
    fprintf (file, "\t.prologue %d, %d\n", mask,
3699
             ia64_dbx_register_number (grsave));
3700
  else
3701
    fputs ("\t.prologue\n", file);
3702
 
3703
  /* Emit a .spill directive, if necessary, to relocate the base of
3704
     the register spill area.  */
3705
  if (current_frame_info.spill_cfa_off != -16)
3706
    fprintf (file, "\t.spill %ld\n",
3707
             (long) (current_frame_info.spill_cfa_off
3708
                     + current_frame_info.spill_size));
3709
}
3710
 
3711
/* Emit the .body directive at the scheduled end of the prologue.  */
3712
 
3713
static void
3714
ia64_output_function_end_prologue (FILE *file)
3715
{
3716
  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3717
    return;
3718
 
3719
  fputs ("\t.body\n", file);
3720
}
3721
 
3722
/* Emit the function epilogue.  */
3723
 
3724
static void
3725
ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3726
                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3727
{
3728
  int i;
3729
 
3730
  if (current_frame_info.reg_fp)
3731
    {
3732
      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3733
      reg_names[HARD_FRAME_POINTER_REGNUM]
3734
        = reg_names[current_frame_info.reg_fp];
3735
      reg_names[current_frame_info.reg_fp] = tmp;
3736
    }
3737
  if (! TARGET_REG_NAMES)
3738
    {
3739
      for (i = 0; i < current_frame_info.n_input_regs; i++)
3740
        reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3741
      for (i = 0; i < current_frame_info.n_local_regs; i++)
3742
        reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3743
      for (i = 0; i < current_frame_info.n_output_regs; i++)
3744
        reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3745
    }
3746
 
3747
  current_frame_info.initialized = 0;
3748
}
3749
 
3750
int
3751
ia64_dbx_register_number (int regno)
3752
{
3753
  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3754
     from its home at loc79 to something inside the register frame.  We
3755
     must perform the same renumbering here for the debug info.  */
3756
  if (current_frame_info.reg_fp)
3757
    {
3758
      if (regno == HARD_FRAME_POINTER_REGNUM)
3759
        regno = current_frame_info.reg_fp;
3760
      else if (regno == current_frame_info.reg_fp)
3761
        regno = HARD_FRAME_POINTER_REGNUM;
3762
    }
3763
 
3764
  if (IN_REGNO_P (regno))
3765
    return 32 + regno - IN_REG (0);
3766
  else if (LOC_REGNO_P (regno))
3767
    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3768
  else if (OUT_REGNO_P (regno))
3769
    return (32 + current_frame_info.n_input_regs
3770
            + current_frame_info.n_local_regs + regno - OUT_REG (0));
3771
  else
3772
    return regno;
3773
}
3774
 
3775
void
3776
ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3777
{
3778
  rtx addr_reg, eight = GEN_INT (8);
3779
 
3780
  /* The Intel assembler requires that the global __ia64_trampoline symbol
3781
     be declared explicitly */
3782
  if (!TARGET_GNU_AS)
3783
    {
3784
      static bool declared_ia64_trampoline = false;
3785
 
3786
      if (!declared_ia64_trampoline)
3787
        {
3788
          declared_ia64_trampoline = true;
3789
          (*targetm.asm_out.globalize_label) (asm_out_file,
3790
                                              "__ia64_trampoline");
3791
        }
3792
    }
3793
 
3794
  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3795
  addr = convert_memory_address (Pmode, addr);
3796
  fnaddr = convert_memory_address (Pmode, fnaddr);
3797
  static_chain = convert_memory_address (Pmode, static_chain);
3798
 
3799
  /* Load up our iterator.  */
3800
  addr_reg = gen_reg_rtx (Pmode);
3801
  emit_move_insn (addr_reg, addr);
3802
 
3803
  /* The first two words are the fake descriptor:
3804
     __ia64_trampoline, ADDR+16.  */
3805
  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3806
                  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3807
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3808
 
3809
  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3810
                  copy_to_reg (plus_constant (addr, 16)));
3811
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3812
 
3813
  /* The third word is the target descriptor.  */
3814
  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3815
  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3816
 
3817
  /* The fourth word is the static chain.  */
3818
  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3819
}
3820
 
3821
/* Do any needed setup for a variadic function.  CUM has not been updated
3822
   for the last named argument which has type TYPE and mode MODE.
3823
 
3824
   We generate the actual spill instructions during prologue generation.  */
3825
 
3826
static void
3827
ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3828
                             tree type, int * pretend_size,
3829
                             int second_time ATTRIBUTE_UNUSED)
3830
{
3831
  CUMULATIVE_ARGS next_cum = *cum;
3832
 
3833
  /* Skip the current argument.  */
3834
  ia64_function_arg_advance (&next_cum, mode, type, 1);
3835
 
3836
  if (next_cum.words < MAX_ARGUMENT_SLOTS)
3837
    {
3838
      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3839
      *pretend_size = n * UNITS_PER_WORD;
3840
      cfun->machine->n_varargs = n;
3841
    }
3842
}
3843
 
3844
/* Check whether TYPE is a homogeneous floating point aggregate.  If
3845
   it is, return the mode of the floating point type that appears
3846
   in all leafs.  If it is not, return VOIDmode.
3847
 
3848
   An aggregate is a homogeneous floating point aggregate is if all
3849
   fields/elements in it have the same floating point type (e.g,
3850
   SFmode).  128-bit quad-precision floats are excluded.
3851
 
3852
   Variable sized aggregates should never arrive here, since we should
3853
   have already decided to pass them by reference.  Top-level zero-sized
3854
   aggregates are excluded because our parallels crash the middle-end.  */
3855
 
3856
static enum machine_mode
3857
hfa_element_mode (tree type, bool nested)
3858
{
3859
  enum machine_mode element_mode = VOIDmode;
3860
  enum machine_mode mode;
3861
  enum tree_code code = TREE_CODE (type);
3862
  int know_element_mode = 0;
3863
  tree t;
3864
 
3865
  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3866
    return VOIDmode;
3867
 
3868
  switch (code)
3869
    {
3870
    case VOID_TYPE:     case INTEGER_TYPE:      case ENUMERAL_TYPE:
3871
    case BOOLEAN_TYPE:  case POINTER_TYPE:
3872
    case OFFSET_TYPE:   case REFERENCE_TYPE:    case METHOD_TYPE:
3873
    case LANG_TYPE:             case FUNCTION_TYPE:
3874
      return VOIDmode;
3875
 
3876
      /* Fortran complex types are supposed to be HFAs, so we need to handle
3877
         gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3878
         types though.  */
3879
    case COMPLEX_TYPE:
3880
      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3881
          && TYPE_MODE (type) != TCmode)
3882
        return GET_MODE_INNER (TYPE_MODE (type));
3883
      else
3884
        return VOIDmode;
3885
 
3886
    case REAL_TYPE:
3887
      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3888
         mode if this is contained within an aggregate.  */
3889
      if (nested && TYPE_MODE (type) != TFmode)
3890
        return TYPE_MODE (type);
3891
      else
3892
        return VOIDmode;
3893
 
3894
    case ARRAY_TYPE:
3895
      return hfa_element_mode (TREE_TYPE (type), 1);
3896
 
3897
    case RECORD_TYPE:
3898
    case UNION_TYPE:
3899
    case QUAL_UNION_TYPE:
3900
      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3901
        {
3902
          if (TREE_CODE (t) != FIELD_DECL)
3903
            continue;
3904
 
3905
          mode = hfa_element_mode (TREE_TYPE (t), 1);
3906
          if (know_element_mode)
3907
            {
3908
              if (mode != element_mode)
3909
                return VOIDmode;
3910
            }
3911
          else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3912
            return VOIDmode;
3913
          else
3914
            {
3915
              know_element_mode = 1;
3916
              element_mode = mode;
3917
            }
3918
        }
3919
      return element_mode;
3920
 
3921
    default:
3922
      /* If we reach here, we probably have some front-end specific type
3923
         that the backend doesn't know about.  This can happen via the
3924
         aggregate_value_p call in init_function_start.  All we can do is
3925
         ignore unknown tree types.  */
3926
      return VOIDmode;
3927
    }
3928
 
3929
  return VOIDmode;
3930
}
3931
 
3932
/* Return the number of words required to hold a quantity of TYPE and MODE
3933
   when passed as an argument.  */
3934
static int
3935
ia64_function_arg_words (tree type, enum machine_mode mode)
3936
{
3937
  int words;
3938
 
3939
  if (mode == BLKmode)
3940
    words = int_size_in_bytes (type);
3941
  else
3942
    words = GET_MODE_SIZE (mode);
3943
 
3944
  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3945
}
3946
 
3947
/* Return the number of registers that should be skipped so the current
3948
   argument (described by TYPE and WORDS) will be properly aligned.
3949
 
3950
   Integer and float arguments larger than 8 bytes start at the next
3951
   even boundary.  Aggregates larger than 8 bytes start at the next
3952
   even boundary if the aggregate has 16 byte alignment.  Note that
3953
   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3954
   but are still to be aligned in registers.
3955
 
3956
   ??? The ABI does not specify how to handle aggregates with
3957
   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3958
   all as if they had 16 byte alignment.  Such aggregates can occur
3959
   only if gcc extensions are used.  */
3960
static int
3961
ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3962
{
3963
  if ((cum->words & 1) == 0)
3964
    return 0;
3965
 
3966
  if (type
3967
      && TREE_CODE (type) != INTEGER_TYPE
3968
      && TREE_CODE (type) != REAL_TYPE)
3969
    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3970
  else
3971
    return words > 1;
3972
}
3973
 
3974
/* Return rtx for register where argument is passed, or zero if it is passed
3975
   on the stack.  */
3976
/* ??? 128-bit quad-precision floats are always passed in general
3977
   registers.  */
3978
 
3979
rtx
3980
ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3981
                   int named, int incoming)
3982
{
3983
  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3984
  int words = ia64_function_arg_words (type, mode);
3985
  int offset = ia64_function_arg_offset (cum, type, words);
3986
  enum machine_mode hfa_mode = VOIDmode;
3987
 
3988
  /* If all argument slots are used, then it must go on the stack.  */
3989
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3990
    return 0;
3991
 
3992
  /* Check for and handle homogeneous FP aggregates.  */
3993
  if (type)
3994
    hfa_mode = hfa_element_mode (type, 0);
3995
 
3996
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3997
     and unprototyped hfas are passed specially.  */
3998
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3999
    {
4000
      rtx loc[16];
4001
      int i = 0;
4002
      int fp_regs = cum->fp_regs;
4003
      int int_regs = cum->words + offset;
4004
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4005
      int byte_size;
4006
      int args_byte_size;
4007
 
4008
      /* If prototyped, pass it in FR regs then GR regs.
4009
         If not prototyped, pass it in both FR and GR regs.
4010
 
4011
         If this is an SFmode aggregate, then it is possible to run out of
4012
         FR regs while GR regs are still left.  In that case, we pass the
4013
         remaining part in the GR regs.  */
4014
 
4015
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4016
         of the argument, the last FP register, or the last argument slot.  */
4017
 
4018
      byte_size = ((mode == BLKmode)
4019
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4020
      args_byte_size = int_regs * UNITS_PER_WORD;
4021
      offset = 0;
4022
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4023
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4024
        {
4025
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4026
                                      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4027
                                                              + fp_regs)),
4028
                                      GEN_INT (offset));
4029
          offset += hfa_size;
4030
          args_byte_size += hfa_size;
4031
          fp_regs++;
4032
        }
4033
 
4034
      /* If no prototype, then the whole thing must go in GR regs.  */
4035
      if (! cum->prototype)
4036
        offset = 0;
4037
      /* If this is an SFmode aggregate, then we might have some left over
4038
         that needs to go in GR regs.  */
4039
      else if (byte_size != offset)
4040
        int_regs += offset / UNITS_PER_WORD;
4041
 
4042
      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4043
 
4044
      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4045
        {
4046
          enum machine_mode gr_mode = DImode;
4047
          unsigned int gr_size;
4048
 
4049
          /* If we have an odd 4 byte hunk because we ran out of FR regs,
4050
             then this goes in a GR reg left adjusted/little endian, right
4051
             adjusted/big endian.  */
4052
          /* ??? Currently this is handled wrong, because 4-byte hunks are
4053
             always right adjusted/little endian.  */
4054
          if (offset & 0x4)
4055
            gr_mode = SImode;
4056
          /* If we have an even 4 byte hunk because the aggregate is a
4057
             multiple of 4 bytes in size, then this goes in a GR reg right
4058
             adjusted/little endian.  */
4059
          else if (byte_size - offset == 4)
4060
            gr_mode = SImode;
4061
 
4062
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4063
                                      gen_rtx_REG (gr_mode, (basereg
4064
                                                             + int_regs)),
4065
                                      GEN_INT (offset));
4066
 
4067
          gr_size = GET_MODE_SIZE (gr_mode);
4068
          offset += gr_size;
4069
          if (gr_size == UNITS_PER_WORD
4070
              || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4071
            int_regs++;
4072
          else if (gr_size > UNITS_PER_WORD)
4073
            int_regs += gr_size / UNITS_PER_WORD;
4074
        }
4075
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4076
    }
4077
 
4078
  /* Integral and aggregates go in general registers.  If we have run out of
4079
     FR registers, then FP values must also go in general registers.  This can
4080
     happen when we have a SFmode HFA.  */
4081
  else if (mode == TFmode || mode == TCmode
4082
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4083
    {
4084
      int byte_size = ((mode == BLKmode)
4085
                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4086
      if (BYTES_BIG_ENDIAN
4087
        && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4088
        && byte_size < UNITS_PER_WORD
4089
        && byte_size > 0)
4090
        {
4091
          rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4092
                                          gen_rtx_REG (DImode,
4093
                                                       (basereg + cum->words
4094
                                                        + offset)),
4095
                                          const0_rtx);
4096
          return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4097
        }
4098
      else
4099
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4100
 
4101
    }
4102
 
4103
  /* If there is a prototype, then FP values go in a FR register when
4104
     named, and in a GR register when unnamed.  */
4105
  else if (cum->prototype)
4106
    {
4107
      if (named)
4108
        return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4109
      /* In big-endian mode, an anonymous SFmode value must be represented
4110
         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4111
         the value into the high half of the general register.  */
4112
      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4113
        return gen_rtx_PARALLEL (mode,
4114
                 gen_rtvec (1,
4115
                   gen_rtx_EXPR_LIST (VOIDmode,
4116
                     gen_rtx_REG (DImode, basereg + cum->words + offset),
4117
                                      const0_rtx)));
4118
      else
4119
        return gen_rtx_REG (mode, basereg + cum->words + offset);
4120
    }
4121
  /* If there is no prototype, then FP values go in both FR and GR
4122
     registers.  */
4123
  else
4124
    {
4125
      /* See comment above.  */
4126
      enum machine_mode inner_mode =
4127
        (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4128
 
4129
      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4130
                                      gen_rtx_REG (mode, (FR_ARG_FIRST
4131
                                                          + cum->fp_regs)),
4132
                                      const0_rtx);
4133
      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4134
                                      gen_rtx_REG (inner_mode,
4135
                                                   (basereg + cum->words
4136
                                                    + offset)),
4137
                                      const0_rtx);
4138
 
4139
      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4140
    }
4141
}
4142
 
4143
/* Return number of bytes, at the beginning of the argument, that must be
4144
   put in registers.  0 is the argument is entirely in registers or entirely
4145
   in memory.  */
4146
 
4147
static int
4148
ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4149
                        tree type, bool named ATTRIBUTE_UNUSED)
4150
{
4151
  int words = ia64_function_arg_words (type, mode);
4152
  int offset = ia64_function_arg_offset (cum, type, words);
4153
 
4154
  /* If all argument slots are used, then it must go on the stack.  */
4155
  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4156
    return 0;
4157
 
4158
  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4159
     it fits within the 8 argument slots, then it goes entirely in
4160
     registers.  If it extends past the last argument slot, then the rest
4161
     goes on the stack.  */
4162
 
4163
  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4164
    return 0;
4165
 
4166
  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4167
}
4168
 
4169
/* Update CUM to point after this argument.  This is patterned after
4170
   ia64_function_arg.  */
4171
 
4172
void
4173
ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4174
                           tree type, int named)
4175
{
4176
  int words = ia64_function_arg_words (type, mode);
4177
  int offset = ia64_function_arg_offset (cum, type, words);
4178
  enum machine_mode hfa_mode = VOIDmode;
4179
 
4180
  /* If all arg slots are already full, then there is nothing to do.  */
4181
  if (cum->words >= MAX_ARGUMENT_SLOTS)
4182
    return;
4183
 
4184
  cum->words += words + offset;
4185
 
4186
  /* Check for and handle homogeneous FP aggregates.  */
4187
  if (type)
4188
    hfa_mode = hfa_element_mode (type, 0);
4189
 
4190
  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4191
     and unprototyped hfas are passed specially.  */
4192
  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4193
    {
4194
      int fp_regs = cum->fp_regs;
4195
      /* This is the original value of cum->words + offset.  */
4196
      int int_regs = cum->words - words;
4197
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4198
      int byte_size;
4199
      int args_byte_size;
4200
 
4201
      /* If prototyped, pass it in FR regs then GR regs.
4202
         If not prototyped, pass it in both FR and GR regs.
4203
 
4204
         If this is an SFmode aggregate, then it is possible to run out of
4205
         FR regs while GR regs are still left.  In that case, we pass the
4206
         remaining part in the GR regs.  */
4207
 
4208
      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4209
         of the argument, the last FP register, or the last argument slot.  */
4210
 
4211
      byte_size = ((mode == BLKmode)
4212
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4213
      args_byte_size = int_regs * UNITS_PER_WORD;
4214
      offset = 0;
4215
      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4216
              && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4217
        {
4218
          offset += hfa_size;
4219
          args_byte_size += hfa_size;
4220
          fp_regs++;
4221
        }
4222
 
4223
      cum->fp_regs = fp_regs;
4224
    }
4225
 
4226
  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4227
     If we have run out of FR registers, then other FP values must also go in
4228
     general registers.  This can happen when we have a SFmode HFA.  */
4229
  else if (mode == TFmode || mode == TCmode
4230
           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4231
    cum->int_regs = cum->words;
4232
 
4233
  /* If there is a prototype, then FP values go in a FR register when
4234
     named, and in a GR register when unnamed.  */
4235
  else if (cum->prototype)
4236
    {
4237
      if (! named)
4238
        cum->int_regs = cum->words;
4239
      else
4240
        /* ??? Complex types should not reach here.  */
4241
        cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4242
    }
4243
  /* If there is no prototype, then FP values go in both FR and GR
4244
     registers.  */
4245
  else
4246
    {
4247
      /* ??? Complex types should not reach here.  */
4248
      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4249
      cum->int_regs = cum->words;
4250
    }
4251
}
4252
 
4253
/* Arguments with alignment larger than 8 bytes start at the next even
4254
   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4255
   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4256
 
4257
int
4258
ia64_function_arg_boundary (enum machine_mode mode, tree type)
4259
{
4260
 
4261
  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4262
    return PARM_BOUNDARY * 2;
4263
 
4264
  if (type)
4265
    {
4266
      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4267
        return PARM_BOUNDARY * 2;
4268
      else
4269
        return PARM_BOUNDARY;
4270
    }
4271
 
4272
  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4273
    return PARM_BOUNDARY * 2;
4274
  else
4275
    return PARM_BOUNDARY;
4276
}
4277
 
4278
/* True if it is OK to do sibling call optimization for the specified
4279
   call expression EXP.  DECL will be the called function, or NULL if
4280
   this is an indirect call.  */
4281
static bool
4282
ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4283
{
4284
  /* We can't perform a sibcall if the current function has the syscall_linkage
4285
     attribute.  */
4286
  if (lookup_attribute ("syscall_linkage",
4287
                        TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4288
    return false;
4289
 
4290
  /* We must always return with our current GP.  This means we can
4291
     only sibcall to functions defined in the current module.  */
4292
  return decl && (*targetm.binds_local_p) (decl);
4293
}
4294
 
4295
 
4296
/* Implement va_arg.  */
4297
 
4298
static tree
4299
ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4300
{
4301
  /* Variable sized types are passed by reference.  */
4302
  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4303
    {
4304
      tree ptrtype = build_pointer_type (type);
4305
      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4306
      return build_va_arg_indirect_ref (addr);
4307
    }
4308
 
4309
  /* Aggregate arguments with alignment larger than 8 bytes start at
4310
     the next even boundary.  Integer and floating point arguments
4311
     do so if they are larger than 8 bytes, whether or not they are
4312
     also aligned larger than 8 bytes.  */
4313
  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4314
      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4315
    {
4316
      tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
4317
                       build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4318
      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4319
                  build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4320
      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4321
      gimplify_and_add (t, pre_p);
4322
    }
4323
 
4324
  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4325
}
4326
 
4327
/* Return 1 if function return value returned in memory.  Return 0 if it is
4328
   in a register.  */
4329
 
4330
static bool
4331
ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4332
{
4333
  enum machine_mode mode;
4334
  enum machine_mode hfa_mode;
4335
  HOST_WIDE_INT byte_size;
4336
 
4337
  mode = TYPE_MODE (valtype);
4338
  byte_size = GET_MODE_SIZE (mode);
4339
  if (mode == BLKmode)
4340
    {
4341
      byte_size = int_size_in_bytes (valtype);
4342
      if (byte_size < 0)
4343
        return true;
4344
    }
4345
 
4346
  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4347
 
4348
  hfa_mode = hfa_element_mode (valtype, 0);
4349
  if (hfa_mode != VOIDmode)
4350
    {
4351
      int hfa_size = GET_MODE_SIZE (hfa_mode);
4352
 
4353
      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4354
        return true;
4355
      else
4356
        return false;
4357
    }
4358
  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4359
    return true;
4360
  else
4361
    return false;
4362
}
4363
 
4364
/* Return rtx for register that holds the function return value.  */
4365
 
4366
rtx
4367
ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4368
{
4369
  enum machine_mode mode;
4370
  enum machine_mode hfa_mode;
4371
 
4372
  mode = TYPE_MODE (valtype);
4373
  hfa_mode = hfa_element_mode (valtype, 0);
4374
 
4375
  if (hfa_mode != VOIDmode)
4376
    {
4377
      rtx loc[8];
4378
      int i;
4379
      int hfa_size;
4380
      int byte_size;
4381
      int offset;
4382
 
4383
      hfa_size = GET_MODE_SIZE (hfa_mode);
4384
      byte_size = ((mode == BLKmode)
4385
                   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4386
      offset = 0;
4387
      for (i = 0; offset < byte_size; i++)
4388
        {
4389
          loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4390
                                      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4391
                                      GEN_INT (offset));
4392
          offset += hfa_size;
4393
        }
4394
      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4395
    }
4396
  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4397
    return gen_rtx_REG (mode, FR_ARG_FIRST);
4398
  else
4399
    {
4400
      bool need_parallel = false;
4401
 
4402
      /* In big-endian mode, we need to manage the layout of aggregates
4403
         in the registers so that we get the bits properly aligned in
4404
         the highpart of the registers.  */
4405
      if (BYTES_BIG_ENDIAN
4406
          && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4407
        need_parallel = true;
4408
 
4409
      /* Something like struct S { long double x; char a[0] } is not an
4410
         HFA structure, and therefore doesn't go in fp registers.  But
4411
         the middle-end will give it XFmode anyway, and XFmode values
4412
         don't normally fit in integer registers.  So we need to smuggle
4413
         the value inside a parallel.  */
4414
      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4415
        need_parallel = true;
4416
 
4417
      if (need_parallel)
4418
        {
4419
          rtx loc[8];
4420
          int offset;
4421
          int bytesize;
4422
          int i;
4423
 
4424
          offset = 0;
4425
          bytesize = int_size_in_bytes (valtype);
4426
          /* An empty PARALLEL is invalid here, but the return value
4427
             doesn't matter for empty structs.  */
4428
          if (bytesize == 0)
4429
            return gen_rtx_REG (mode, GR_RET_FIRST);
4430
          for (i = 0; offset < bytesize; i++)
4431
            {
4432
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4433
                                          gen_rtx_REG (DImode,
4434
                                                       GR_RET_FIRST + i),
4435
                                          GEN_INT (offset));
4436
              offset += UNITS_PER_WORD;
4437
            }
4438
          return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4439
        }
4440
 
4441
      return gen_rtx_REG (mode, GR_RET_FIRST);
4442
    }
4443
}
4444
 
4445
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4446
   We need to emit DTP-relative relocations.  */
4447
 
4448
static void
4449
ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4450
{
4451
  gcc_assert (size == 4 || size == 8);
4452
  if (size == 4)
4453
    fputs ("\tdata4.ua\t@dtprel(", file);
4454
  else
4455
    fputs ("\tdata8.ua\t@dtprel(", file);
4456
  output_addr_const (file, x);
4457
  fputs (")", file);
4458
}
4459
 
4460
/* Print a memory address as an operand to reference that memory location.  */
4461
 
4462
/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4463
   also call this from ia64_print_operand for memory addresses.  */
4464
 
4465
void
4466
ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4467
                            rtx address ATTRIBUTE_UNUSED)
4468
{
4469
}
4470
 
4471
/* Print an operand to an assembler instruction.
4472
   C    Swap and print a comparison operator.
4473
   D    Print an FP comparison operator.
4474
   E    Print 32 - constant, for SImode shifts as extract.
4475
   e    Print 64 - constant, for DImode rotates.
4476
   F    A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4477
        a floating point register emitted normally.
4478
   I    Invert a predicate register by adding 1.
4479
   J    Select the proper predicate register for a condition.
4480
   j    Select the inverse predicate register for a condition.
4481
   O    Append .acq for volatile load.
4482
   P    Postincrement of a MEM.
4483
   Q    Append .rel for volatile store.
4484
   S    Shift amount for shladd instruction.
4485
   T    Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4486
        for Intel assembler.
4487
   U    Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4488
        for Intel assembler.
4489
   X    A pair of floating point registers.
4490
   r    Print register name, or constant 0 as r0.  HP compatibility for
4491
        Linux kernel.
4492
   v    Print vector constant value as an 8-byte integer value.  */
4493
 
4494
void
4495
ia64_print_operand (FILE * file, rtx x, int code)
4496
{
4497
  const char *str;
4498
 
4499
  switch (code)
4500
    {
4501
    case 0:
4502
      /* Handled below.  */
4503
      break;
4504
 
4505
    case 'C':
4506
      {
4507
        enum rtx_code c = swap_condition (GET_CODE (x));
4508
        fputs (GET_RTX_NAME (c), file);
4509
        return;
4510
      }
4511
 
4512
    case 'D':
4513
      switch (GET_CODE (x))
4514
        {
4515
        case NE:
4516
          str = "neq";
4517
          break;
4518
        case UNORDERED:
4519
          str = "unord";
4520
          break;
4521
        case ORDERED:
4522
          str = "ord";
4523
          break;
4524
        default:
4525
          str = GET_RTX_NAME (GET_CODE (x));
4526
          break;
4527
        }
4528
      fputs (str, file);
4529
      return;
4530
 
4531
    case 'E':
4532
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4533
      return;
4534
 
4535
    case 'e':
4536
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4537
      return;
4538
 
4539
    case 'F':
4540
      if (x == CONST0_RTX (GET_MODE (x)))
4541
        str = reg_names [FR_REG (0)];
4542
      else if (x == CONST1_RTX (GET_MODE (x)))
4543
        str = reg_names [FR_REG (1)];
4544
      else
4545
        {
4546
          gcc_assert (GET_CODE (x) == REG);
4547
          str = reg_names [REGNO (x)];
4548
        }
4549
      fputs (str, file);
4550
      return;
4551
 
4552
    case 'I':
4553
      fputs (reg_names [REGNO (x) + 1], file);
4554
      return;
4555
 
4556
    case 'J':
4557
    case 'j':
4558
      {
4559
        unsigned int regno = REGNO (XEXP (x, 0));
4560
        if (GET_CODE (x) == EQ)
4561
          regno += 1;
4562
        if (code == 'j')
4563
          regno ^= 1;
4564
        fputs (reg_names [regno], file);
4565
      }
4566
      return;
4567
 
4568
    case 'O':
4569
      if (MEM_VOLATILE_P (x))
4570
        fputs(".acq", file);
4571
      return;
4572
 
4573
    case 'P':
4574
      {
4575
        HOST_WIDE_INT value;
4576
 
4577
        switch (GET_CODE (XEXP (x, 0)))
4578
          {
4579
          default:
4580
            return;
4581
 
4582
          case POST_MODIFY:
4583
            x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4584
            if (GET_CODE (x) == CONST_INT)
4585
              value = INTVAL (x);
4586
            else
4587
              {
4588
                gcc_assert (GET_CODE (x) == REG);
4589
                fprintf (file, ", %s", reg_names[REGNO (x)]);
4590
                return;
4591
              }
4592
            break;
4593
 
4594
          case POST_INC:
4595
            value = GET_MODE_SIZE (GET_MODE (x));
4596
            break;
4597
 
4598
          case POST_DEC:
4599
            value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4600
            break;
4601
          }
4602
 
4603
        fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4604
        return;
4605
      }
4606
 
4607
    case 'Q':
4608
      if (MEM_VOLATILE_P (x))
4609
        fputs(".rel", file);
4610
      return;
4611
 
4612
    case 'S':
4613
      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4614
      return;
4615
 
4616
    case 'T':
4617
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4618
        {
4619
          fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4620
          return;
4621
        }
4622
      break;
4623
 
4624
    case 'U':
4625
      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4626
        {
4627
          const char *prefix = "0x";
4628
          if (INTVAL (x) & 0x80000000)
4629
            {
4630
              fprintf (file, "0xffffffff");
4631
              prefix = "";
4632
            }
4633
          fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4634
          return;
4635
        }
4636
      break;
4637
 
4638
    case 'X':
4639
      {
4640
        unsigned int regno = REGNO (x);
4641
        fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4642
      }
4643
      return;
4644
 
4645
    case 'r':
4646
      /* If this operand is the constant zero, write it as register zero.
4647
         Any register, zero, or CONST_INT value is OK here.  */
4648
      if (GET_CODE (x) == REG)
4649
        fputs (reg_names[REGNO (x)], file);
4650
      else if (x == CONST0_RTX (GET_MODE (x)))
4651
        fputs ("r0", file);
4652
      else if (GET_CODE (x) == CONST_INT)
4653
        output_addr_const (file, x);
4654
      else
4655
        output_operand_lossage ("invalid %%r value");
4656
      return;
4657
 
4658
    case 'v':
4659
      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4660
      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4661
      break;
4662
 
4663
    case '+':
4664
      {
4665
        const char *which;
4666
 
4667
        /* For conditional branches, returns or calls, substitute
4668
           sptk, dptk, dpnt, or spnt for %s.  */
4669
        x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4670
        if (x)
4671
          {
4672
            int pred_val = INTVAL (XEXP (x, 0));
4673
 
4674
            /* Guess top and bottom 10% statically predicted.  */
4675
            if (pred_val < REG_BR_PROB_BASE / 50
4676
                && br_prob_note_reliable_p (x))
4677
              which = ".spnt";
4678
            else if (pred_val < REG_BR_PROB_BASE / 2)
4679
              which = ".dpnt";
4680
            else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4681
                     || !br_prob_note_reliable_p (x))
4682
              which = ".dptk";
4683
            else
4684
              which = ".sptk";
4685
          }
4686
        else if (GET_CODE (current_output_insn) == CALL_INSN)
4687
          which = ".sptk";
4688
        else
4689
          which = ".dptk";
4690
 
4691
        fputs (which, file);
4692
        return;
4693
      }
4694
 
4695
    case ',':
4696
      x = current_insn_predicate;
4697
      if (x)
4698
        {
4699
          unsigned int regno = REGNO (XEXP (x, 0));
4700
          if (GET_CODE (x) == EQ)
4701
            regno += 1;
4702
          fprintf (file, "(%s) ", reg_names [regno]);
4703
        }
4704
      return;
4705
 
4706
    default:
4707
      output_operand_lossage ("ia64_print_operand: unknown code");
4708
      return;
4709
    }
4710
 
4711
  switch (GET_CODE (x))
4712
    {
4713
      /* This happens for the spill/restore instructions.  */
4714
    case POST_INC:
4715
    case POST_DEC:
4716
    case POST_MODIFY:
4717
      x = XEXP (x, 0);
4718
      /* ... fall through ...  */
4719
 
4720
    case REG:
4721
      fputs (reg_names [REGNO (x)], file);
4722
      break;
4723
 
4724
    case MEM:
4725
      {
4726
        rtx addr = XEXP (x, 0);
4727
        if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4728
          addr = XEXP (addr, 0);
4729
        fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4730
        break;
4731
      }
4732
 
4733
    default:
4734
      output_addr_const (file, x);
4735
      break;
4736
    }
4737
 
4738
  return;
4739
}
4740
 
4741
/* Compute a (partial) cost for rtx X.  Return true if the complete
4742
   cost has been computed, and false if subexpressions should be
4743
   scanned.  In either case, *TOTAL contains the cost result.  */
4744
/* ??? This is incomplete.  */
4745
 
4746
static bool
4747
ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4748
{
4749
  switch (code)
4750
    {
4751
    case CONST_INT:
4752
      switch (outer_code)
4753
        {
4754
        case SET:
4755
          *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4756
          return true;
4757
        case PLUS:
4758
          if (CONST_OK_FOR_I (INTVAL (x)))
4759
            *total = 0;
4760
          else if (CONST_OK_FOR_J (INTVAL (x)))
4761
            *total = 1;
4762
          else
4763
            *total = COSTS_N_INSNS (1);
4764
          return true;
4765
        default:
4766
          if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4767
            *total = 0;
4768
          else
4769
            *total = COSTS_N_INSNS (1);
4770
          return true;
4771
        }
4772
 
4773
    case CONST_DOUBLE:
4774
      *total = COSTS_N_INSNS (1);
4775
      return true;
4776
 
4777
    case CONST:
4778
    case SYMBOL_REF:
4779
    case LABEL_REF:
4780
      *total = COSTS_N_INSNS (3);
4781
      return true;
4782
 
4783
    case MULT:
4784
      /* For multiplies wider than HImode, we have to go to the FPU,
4785
         which normally involves copies.  Plus there's the latency
4786
         of the multiply itself, and the latency of the instructions to
4787
         transfer integer regs to FP regs.  */
4788
      /* ??? Check for FP mode.  */
4789
      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4790
        *total = COSTS_N_INSNS (10);
4791
      else
4792
        *total = COSTS_N_INSNS (2);
4793
      return true;
4794
 
4795
    case PLUS:
4796
    case MINUS:
4797
    case ASHIFT:
4798
    case ASHIFTRT:
4799
    case LSHIFTRT:
4800
      *total = COSTS_N_INSNS (1);
4801
      return true;
4802
 
4803
    case DIV:
4804
    case UDIV:
4805
    case MOD:
4806
    case UMOD:
4807
      /* We make divide expensive, so that divide-by-constant will be
4808
         optimized to a multiply.  */
4809
      *total = COSTS_N_INSNS (60);
4810
      return true;
4811
 
4812
    default:
4813
      return false;
4814
    }
4815
}
4816
 
4817
/* Calculate the cost of moving data from a register in class FROM to
4818
   one in class TO, using MODE.  */
4819
 
4820
int
4821
ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4822
                         enum reg_class to)
4823
{
4824
  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4825
  if (to == ADDL_REGS)
4826
    to = GR_REGS;
4827
  if (from == ADDL_REGS)
4828
    from = GR_REGS;
4829
 
4830
  /* All costs are symmetric, so reduce cases by putting the
4831
     lower number class as the destination.  */
4832
  if (from < to)
4833
    {
4834
      enum reg_class tmp = to;
4835
      to = from, from = tmp;
4836
    }
4837
 
4838
  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4839
     so that we get secondary memory reloads.  Between FR_REGS,
4840
     we have to make this at least as expensive as MEMORY_MOVE_COST
4841
     to avoid spectacularly poor register class preferencing.  */
4842
  if (mode == XFmode || mode == RFmode)
4843
    {
4844
      if (to != GR_REGS || from != GR_REGS)
4845
        return MEMORY_MOVE_COST (mode, to, 0);
4846
      else
4847
        return 3;
4848
    }
4849
 
4850
  switch (to)
4851
    {
4852
    case PR_REGS:
4853
      /* Moving between PR registers takes two insns.  */
4854
      if (from == PR_REGS)
4855
        return 3;
4856
      /* Moving between PR and anything but GR is impossible.  */
4857
      if (from != GR_REGS)
4858
        return MEMORY_MOVE_COST (mode, to, 0);
4859
      break;
4860
 
4861
    case BR_REGS:
4862
      /* Moving between BR and anything but GR is impossible.  */
4863
      if (from != GR_REGS && from != GR_AND_BR_REGS)
4864
        return MEMORY_MOVE_COST (mode, to, 0);
4865
      break;
4866
 
4867
    case AR_I_REGS:
4868
    case AR_M_REGS:
4869
      /* Moving between AR and anything but GR is impossible.  */
4870
      if (from != GR_REGS)
4871
        return MEMORY_MOVE_COST (mode, to, 0);
4872
      break;
4873
 
4874
    case GR_REGS:
4875
    case FR_REGS:
4876
    case FP_REGS:
4877
    case GR_AND_FR_REGS:
4878
    case GR_AND_BR_REGS:
4879
    case ALL_REGS:
4880
      break;
4881
 
4882
    default:
4883
      gcc_unreachable ();
4884
    }
4885
 
4886
  return 2;
4887
}
4888
 
4889
/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
4890
   to use when copying X into that class.  */
4891
 
4892
enum reg_class
4893
ia64_preferred_reload_class (rtx x, enum reg_class class)
4894
{
4895
  switch (class)
4896
    {
4897
    case FR_REGS:
4898
    case FP_REGS:
4899
      /* Don't allow volatile mem reloads into floating point registers.
4900
         This is defined to force reload to choose the r/m case instead
4901
         of the f/f case when reloading (set (reg fX) (mem/v)).  */
4902
      if (MEM_P (x) && MEM_VOLATILE_P (x))
4903
        return NO_REGS;
4904
 
4905
      /* Force all unrecognized constants into the constant pool.  */
4906
      if (CONSTANT_P (x))
4907
        return NO_REGS;
4908
      break;
4909
 
4910
    case AR_M_REGS:
4911
    case AR_I_REGS:
4912
      if (!OBJECT_P (x))
4913
        return NO_REGS;
4914
      break;
4915
 
4916
    default:
4917
      break;
4918
    }
4919
 
4920
  return class;
4921
}
4922
 
4923
/* This function returns the register class required for a secondary
4924
   register when copying between one of the registers in CLASS, and X,
4925
   using MODE.  A return value of NO_REGS means that no secondary register
4926
   is required.  */
4927
 
4928
enum reg_class
4929
ia64_secondary_reload_class (enum reg_class class,
4930
                             enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4931
{
4932
  int regno = -1;
4933
 
4934
  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4935
    regno = true_regnum (x);
4936
 
4937
  switch (class)
4938
    {
4939
    case BR_REGS:
4940
    case AR_M_REGS:
4941
    case AR_I_REGS:
4942
      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4943
         interaction.  We end up with two pseudos with overlapping lifetimes
4944
         both of which are equiv to the same constant, and both which need
4945
         to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4946
         changes depending on the path length, which means the qty_first_reg
4947
         check in make_regs_eqv can give different answers at different times.
4948
         At some point I'll probably need a reload_indi pattern to handle
4949
         this.
4950
 
4951
         We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4952
         wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4953
         non-general registers for good measure.  */
4954
      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4955
        return GR_REGS;
4956
 
4957
      /* This is needed if a pseudo used as a call_operand gets spilled to a
4958
         stack slot.  */
4959
      if (GET_CODE (x) == MEM)
4960
        return GR_REGS;
4961
      break;
4962
 
4963
    case FR_REGS:
4964
    case FP_REGS:
4965
      /* Need to go through general registers to get to other class regs.  */
4966
      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4967
        return GR_REGS;
4968
 
4969
      /* This can happen when a paradoxical subreg is an operand to the
4970
         muldi3 pattern.  */
4971
      /* ??? This shouldn't be necessary after instruction scheduling is
4972
         enabled, because paradoxical subregs are not accepted by
4973
         register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4974
         stop the paradoxical subreg stupidity in the *_operand functions
4975
         in recog.c.  */
4976
      if (GET_CODE (x) == MEM
4977
          && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4978
              || GET_MODE (x) == QImode))
4979
        return GR_REGS;
4980
 
4981
      /* This can happen because of the ior/and/etc patterns that accept FP
4982
         registers as operands.  If the third operand is a constant, then it
4983
         needs to be reloaded into a FP register.  */
4984
      if (GET_CODE (x) == CONST_INT)
4985
        return GR_REGS;
4986
 
4987
      /* This can happen because of register elimination in a muldi3 insn.
4988
         E.g. `26107 * (unsigned long)&u'.  */
4989
      if (GET_CODE (x) == PLUS)
4990
        return GR_REGS;
4991
      break;
4992
 
4993
    case PR_REGS:
4994
      /* ??? This happens if we cse/gcse a BImode value across a call,
4995
         and the function has a nonlocal goto.  This is because global
4996
         does not allocate call crossing pseudos to hard registers when
4997
         current_function_has_nonlocal_goto is true.  This is relatively
4998
         common for C++ programs that use exceptions.  To reproduce,
4999
         return NO_REGS and compile libstdc++.  */
5000
      if (GET_CODE (x) == MEM)
5001
        return GR_REGS;
5002
 
5003
      /* This can happen when we take a BImode subreg of a DImode value,
5004
         and that DImode value winds up in some non-GR register.  */
5005
      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5006
        return GR_REGS;
5007
      break;
5008
 
5009
    default:
5010
      break;
5011
    }
5012
 
5013
  return NO_REGS;
5014
}
5015
 
5016
 
5017
/* Emit text to declare externally defined variables and functions, because
5018
   the Intel assembler does not support undefined externals.  */
5019
 
5020
void
5021
ia64_asm_output_external (FILE *file, tree decl, const char *name)
5022
{
5023
  int save_referenced;
5024
 
5025
  /* GNU as does not need anything here, but the HP linker does need
5026
     something for external functions.  */
5027
 
5028
  if (TARGET_GNU_AS
5029
      && (!TARGET_HPUX_LD
5030
          || TREE_CODE (decl) != FUNCTION_DECL
5031
          || strstr (name, "__builtin_") == name))
5032
    return;
5033
 
5034
  /* ??? The Intel assembler creates a reference that needs to be satisfied by
5035
     the linker when we do this, so we need to be careful not to do this for
5036
     builtin functions which have no library equivalent.  Unfortunately, we
5037
     can't tell here whether or not a function will actually be called by
5038
     expand_expr, so we pull in library functions even if we may not need
5039
     them later.  */
5040
  if (! strcmp (name, "__builtin_next_arg")
5041
      || ! strcmp (name, "alloca")
5042
      || ! strcmp (name, "__builtin_constant_p")
5043
      || ! strcmp (name, "__builtin_args_info"))
5044
    return;
5045
 
5046
  if (TARGET_HPUX_LD)
5047
    ia64_hpux_add_extern_decl (decl);
5048
  else
5049
    {
5050
      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
5051
         restore it.  */
5052
      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
5053
      if (TREE_CODE (decl) == FUNCTION_DECL)
5054
        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
5055
      (*targetm.asm_out.globalize_label) (file, name);
5056
      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
5057
    }
5058
}
5059
 
5060
/* Parse the -mfixed-range= option string.  */
5061
 
5062
static void
5063
fix_range (const char *const_str)
5064
{
5065
  int i, first, last;
5066
  char *str, *dash, *comma;
5067
 
5068
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5069
     REG2 are either register names or register numbers.  The effect
5070
     of this option is to mark the registers in the range from REG1 to
5071
     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5072
     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5073
 
5074
  i = strlen (const_str);
5075
  str = (char *) alloca (i + 1);
5076
  memcpy (str, const_str, i + 1);
5077
 
5078
  while (1)
5079
    {
5080
      dash = strchr (str, '-');
5081
      if (!dash)
5082
        {
5083
          warning (0, "value of -mfixed-range must have form REG1-REG2");
5084
          return;
5085
        }
5086
      *dash = '\0';
5087
 
5088
      comma = strchr (dash + 1, ',');
5089
      if (comma)
5090
        *comma = '\0';
5091
 
5092
      first = decode_reg_name (str);
5093
      if (first < 0)
5094
        {
5095
          warning (0, "unknown register name: %s", str);
5096
          return;
5097
        }
5098
 
5099
      last = decode_reg_name (dash + 1);
5100
      if (last < 0)
5101
        {
5102
          warning (0, "unknown register name: %s", dash + 1);
5103
          return;
5104
        }
5105
 
5106
      *dash = '-';
5107
 
5108
      if (first > last)
5109
        {
5110
          warning (0, "%s-%s is an empty range", str, dash + 1);
5111
          return;
5112
        }
5113
 
5114
      for (i = first; i <= last; ++i)
5115
        fixed_regs[i] = call_used_regs[i] = 1;
5116
 
5117
      if (!comma)
5118
        break;
5119
 
5120
      *comma = ',';
5121
      str = comma + 1;
5122
    }
5123
}
5124
 
5125
/* Implement TARGET_HANDLE_OPTION.  */
5126
 
5127
static bool
5128
ia64_handle_option (size_t code, const char *arg, int value)
5129
{
5130
  switch (code)
5131
    {
5132
    case OPT_mfixed_range_:
5133
      fix_range (arg);
5134
      return true;
5135
 
5136
    case OPT_mtls_size_:
5137
      if (value != 14 && value != 22 && value != 64)
5138
        error ("bad value %<%s%> for -mtls-size= switch", arg);
5139
      return true;
5140
 
5141
    case OPT_mtune_:
5142
      {
5143
        static struct pta
5144
          {
5145
            const char *name;           /* processor name or nickname.  */
5146
            enum processor_type processor;
5147
          }
5148
        const processor_alias_table[] =
5149
          {
5150
            {"itanium", PROCESSOR_ITANIUM},
5151
            {"itanium1", PROCESSOR_ITANIUM},
5152
            {"merced", PROCESSOR_ITANIUM},
5153
            {"itanium2", PROCESSOR_ITANIUM2},
5154
            {"mckinley", PROCESSOR_ITANIUM2},
5155
          };
5156
        int const pta_size = ARRAY_SIZE (processor_alias_table);
5157
        int i;
5158
 
5159
        for (i = 0; i < pta_size; i++)
5160
          if (!strcmp (arg, processor_alias_table[i].name))
5161
            {
5162
              ia64_tune = processor_alias_table[i].processor;
5163
              break;
5164
            }
5165
        if (i == pta_size)
5166
          error ("bad value %<%s%> for -mtune= switch", arg);
5167
        return true;
5168
      }
5169
 
5170
    default:
5171
      return true;
5172
    }
5173
}
5174
 
5175
/* Implement OVERRIDE_OPTIONS.  */
5176
 
5177
void
5178
ia64_override_options (void)
5179
{
5180
  if (TARGET_AUTO_PIC)
5181
    target_flags |= MASK_CONST_GP;
5182
 
5183
  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5184
    {
5185
      warning (0, "not yet implemented: latency-optimized inline square root");
5186
      TARGET_INLINE_SQRT = INL_MAX_THR;
5187
    }
5188
 
5189
  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5190
  flag_schedule_insns_after_reload = 0;
5191
 
5192
  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5193
 
5194
  init_machine_status = ia64_init_machine_status;
5195
}
5196
 
5197
static struct machine_function *
5198
ia64_init_machine_status (void)
5199
{
5200
  return ggc_alloc_cleared (sizeof (struct machine_function));
5201
}
5202
 
5203
static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5204
static enum attr_type ia64_safe_type (rtx);
5205
 
5206
static enum attr_itanium_class
5207
ia64_safe_itanium_class (rtx insn)
5208
{
5209
  if (recog_memoized (insn) >= 0)
5210
    return get_attr_itanium_class (insn);
5211
  else
5212
    return ITANIUM_CLASS_UNKNOWN;
5213
}
5214
 
5215
static enum attr_type
5216
ia64_safe_type (rtx insn)
5217
{
5218
  if (recog_memoized (insn) >= 0)
5219
    return get_attr_type (insn);
5220
  else
5221
    return TYPE_UNKNOWN;
5222
}
5223
 
5224
/* The following collection of routines emit instruction group stop bits as
5225
   necessary to avoid dependencies.  */
5226
 
5227
/* Need to track some additional registers as far as serialization is
5228
   concerned so we can properly handle br.call and br.ret.  We could
5229
   make these registers visible to gcc, but since these registers are
5230
   never explicitly used in gcc generated code, it seems wasteful to
5231
   do so (plus it would make the call and return patterns needlessly
5232
   complex).  */
5233
#define REG_RP          (BR_REG (0))
5234
#define REG_AR_CFM      (FIRST_PSEUDO_REGISTER + 1)
5235
/* This is used for volatile asms which may require a stop bit immediately
5236
   before and after them.  */
5237
#define REG_VOLATILE    (FIRST_PSEUDO_REGISTER + 2)
5238
#define AR_UNAT_BIT_0   (FIRST_PSEUDO_REGISTER + 3)
5239
#define NUM_REGS        (AR_UNAT_BIT_0 + 64)
5240
 
5241
/* For each register, we keep track of how it has been written in the
5242
   current instruction group.
5243
 
5244
   If a register is written unconditionally (no qualifying predicate),
5245
   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5246
 
5247
   If a register is written if its qualifying predicate P is true, we
5248
   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5249
   may be written again by the complement of P (P^1) and when this happens,
5250
   WRITE_COUNT gets set to 2.
5251
 
5252
   The result of this is that whenever an insn attempts to write a register
5253
   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5254
 
5255
   If a predicate register is written by a floating-point insn, we set
5256
   WRITTEN_BY_FP to true.
5257
 
5258
   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5259
   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5260
 
5261
struct reg_write_state
5262
{
5263
  unsigned int write_count : 2;
5264
  unsigned int first_pred : 16;
5265
  unsigned int written_by_fp : 1;
5266
  unsigned int written_by_and : 1;
5267
  unsigned int written_by_or : 1;
5268
};
5269
 
5270
/* Cumulative info for the current instruction group.  */
5271
struct reg_write_state rws_sum[NUM_REGS];
5272
/* Info for the current instruction.  This gets copied to rws_sum after a
5273
   stop bit is emitted.  */
5274
struct reg_write_state rws_insn[NUM_REGS];
5275
 
5276
/* Indicates whether this is the first instruction after a stop bit,
5277
   in which case we don't need another stop bit.  Without this,
5278
   ia64_variable_issue will die when scheduling an alloc.  */
5279
static int first_instruction;
5280
 
5281
/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5282
   RTL for one instruction.  */
5283
struct reg_flags
5284
{
5285
  unsigned int is_write : 1;    /* Is register being written?  */
5286
  unsigned int is_fp : 1;       /* Is register used as part of an fp op?  */
5287
  unsigned int is_branch : 1;   /* Is register used as part of a branch?  */
5288
  unsigned int is_and : 1;      /* Is register used as part of and.orcm?  */
5289
  unsigned int is_or : 1;       /* Is register used as part of or.andcm?  */
5290
  unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
5291
};
5292
 
5293
static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5294
static int rws_access_regno (int, struct reg_flags, int);
5295
static int rws_access_reg (rtx, struct reg_flags, int);
5296
static void update_set_flags (rtx, struct reg_flags *);
5297
static int set_src_needs_barrier (rtx, struct reg_flags, int);
5298
static int rtx_needs_barrier (rtx, struct reg_flags, int);
5299
static void init_insn_group_barriers (void);
5300
static int group_barrier_needed (rtx);
5301
static int safe_group_barrier_needed (rtx);
5302
 
5303
/* Update *RWS for REGNO, which is being written by the current instruction,
5304
   with predicate PRED, and associated register flags in FLAGS.  */
5305
 
5306
static void
5307
rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5308
{
5309
  if (pred)
5310
    rws[regno].write_count++;
5311
  else
5312
    rws[regno].write_count = 2;
5313
  rws[regno].written_by_fp |= flags.is_fp;
5314
  /* ??? Not tracking and/or across differing predicates.  */
5315
  rws[regno].written_by_and = flags.is_and;
5316
  rws[regno].written_by_or = flags.is_or;
5317
  rws[regno].first_pred = pred;
5318
}
5319
 
5320
/* Handle an access to register REGNO of type FLAGS using predicate register
5321
   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
5322
   a dependency with an earlier instruction in the same group.  */
5323
 
5324
static int
5325
rws_access_regno (int regno, struct reg_flags flags, int pred)
5326
{
5327
  int need_barrier = 0;
5328
 
5329
  gcc_assert (regno < NUM_REGS);
5330
 
5331
  if (! PR_REGNO_P (regno))
5332
    flags.is_and = flags.is_or = 0;
5333
 
5334
  if (flags.is_write)
5335
    {
5336
      int write_count;
5337
 
5338
      /* One insn writes same reg multiple times?  */
5339
      gcc_assert (!rws_insn[regno].write_count);
5340
 
5341
      /* Update info for current instruction.  */
5342
      rws_update (rws_insn, regno, flags, pred);
5343
      write_count = rws_sum[regno].write_count;
5344
 
5345
      switch (write_count)
5346
        {
5347
        case 0:
5348
          /* The register has not been written yet.  */
5349
          rws_update (rws_sum, regno, flags, pred);
5350
          break;
5351
 
5352
        case 1:
5353
          /* The register has been written via a predicate.  If this is
5354
             not a complementary predicate, then we need a barrier.  */
5355
          /* ??? This assumes that P and P+1 are always complementary
5356
             predicates for P even.  */
5357
          if (flags.is_and && rws_sum[regno].written_by_and)
5358
            ;
5359
          else if (flags.is_or && rws_sum[regno].written_by_or)
5360
            ;
5361
          else if ((rws_sum[regno].first_pred ^ 1) != pred)
5362
            need_barrier = 1;
5363
          rws_update (rws_sum, regno, flags, pred);
5364
          break;
5365
 
5366
        case 2:
5367
          /* The register has been unconditionally written already.  We
5368
             need a barrier.  */
5369
          if (flags.is_and && rws_sum[regno].written_by_and)
5370
            ;
5371
          else if (flags.is_or && rws_sum[regno].written_by_or)
5372
            ;
5373
          else
5374
            need_barrier = 1;
5375
          rws_sum[regno].written_by_and = flags.is_and;
5376
          rws_sum[regno].written_by_or = flags.is_or;
5377
          break;
5378
 
5379
        default:
5380
          gcc_unreachable ();
5381
        }
5382
    }
5383
  else
5384
    {
5385
      if (flags.is_branch)
5386
        {
5387
          /* Branches have several RAW exceptions that allow to avoid
5388
             barriers.  */
5389
 
5390
          if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5391
            /* RAW dependencies on branch regs are permissible as long
5392
               as the writer is a non-branch instruction.  Since we
5393
               never generate code that uses a branch register written
5394
               by a branch instruction, handling this case is
5395
               easy.  */
5396
            return 0;
5397
 
5398
          if (REGNO_REG_CLASS (regno) == PR_REGS
5399
              && ! rws_sum[regno].written_by_fp)
5400
            /* The predicates of a branch are available within the
5401
               same insn group as long as the predicate was written by
5402
               something other than a floating-point instruction.  */
5403
            return 0;
5404
        }
5405
 
5406
      if (flags.is_and && rws_sum[regno].written_by_and)
5407
        return 0;
5408
      if (flags.is_or && rws_sum[regno].written_by_or)
5409
        return 0;
5410
 
5411
      switch (rws_sum[regno].write_count)
5412
        {
5413
        case 0:
5414
          /* The register has not been written yet.  */
5415
          break;
5416
 
5417
        case 1:
5418
          /* The register has been written via a predicate.  If this is
5419
             not a complementary predicate, then we need a barrier.  */
5420
          /* ??? This assumes that P and P+1 are always complementary
5421
             predicates for P even.  */
5422
          if ((rws_sum[regno].first_pred ^ 1) != pred)
5423
            need_barrier = 1;
5424
          break;
5425
 
5426
        case 2:
5427
          /* The register has been unconditionally written already.  We
5428
             need a barrier.  */
5429
          need_barrier = 1;
5430
          break;
5431
 
5432
        default:
5433
          gcc_unreachable ();
5434
        }
5435
    }
5436
 
5437
  return need_barrier;
5438
}
5439
 
5440
static int
5441
rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5442
{
5443
  int regno = REGNO (reg);
5444
  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5445
 
5446
  if (n == 1)
5447
    return rws_access_regno (regno, flags, pred);
5448
  else
5449
    {
5450
      int need_barrier = 0;
5451
      while (--n >= 0)
5452
        need_barrier |= rws_access_regno (regno + n, flags, pred);
5453
      return need_barrier;
5454
    }
5455
}
5456
 
5457
/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5458
   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5459
 
5460
static void
5461
update_set_flags (rtx x, struct reg_flags *pflags)
5462
{
5463
  rtx src = SET_SRC (x);
5464
 
5465
  switch (GET_CODE (src))
5466
    {
5467
    case CALL:
5468
      return;
5469
 
5470
    case IF_THEN_ELSE:
5471
      /* There are four cases here:
5472
         (1) The destination is (pc), in which case this is a branch,
5473
         nothing here applies.
5474
         (2) The destination is ar.lc, in which case this is a
5475
         doloop_end_internal,
5476
         (3) The destination is an fp register, in which case this is
5477
         an fselect instruction.
5478
         (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5479
         this is a check load.
5480
         In all cases, nothing we do in this function applies.  */
5481
      return;
5482
 
5483
    default:
5484
      if (COMPARISON_P (src)
5485
          && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5486
        /* Set pflags->is_fp to 1 so that we know we're dealing
5487
           with a floating point comparison when processing the
5488
           destination of the SET.  */
5489
        pflags->is_fp = 1;
5490
 
5491
      /* Discover if this is a parallel comparison.  We only handle
5492
         and.orcm and or.andcm at present, since we must retain a
5493
         strict inverse on the predicate pair.  */
5494
      else if (GET_CODE (src) == AND)
5495
        pflags->is_and = 1;
5496
      else if (GET_CODE (src) == IOR)
5497
        pflags->is_or = 1;
5498
 
5499
      break;
5500
    }
5501
}
5502
 
5503
/* Subroutine of rtx_needs_barrier; this function determines whether the
5504
   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5505
   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5506
   for this insn.  */
5507
 
5508
static int
5509
set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5510
{
5511
  int need_barrier = 0;
5512
  rtx dst;
5513
  rtx src = SET_SRC (x);
5514
 
5515
  if (GET_CODE (src) == CALL)
5516
    /* We don't need to worry about the result registers that
5517
       get written by subroutine call.  */
5518
    return rtx_needs_barrier (src, flags, pred);
5519
  else if (SET_DEST (x) == pc_rtx)
5520
    {
5521
      /* X is a conditional branch.  */
5522
      /* ??? This seems redundant, as the caller sets this bit for
5523
         all JUMP_INSNs.  */
5524
      if (!ia64_spec_check_src_p (src))
5525
        flags.is_branch = 1;
5526
      return rtx_needs_barrier (src, flags, pred);
5527
    }
5528
 
5529
  if (ia64_spec_check_src_p (src))
5530
    /* Avoid checking one register twice (in condition
5531
       and in 'then' section) for ldc pattern.  */
5532
    {
5533
      gcc_assert (REG_P (XEXP (src, 2)));
5534
      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5535
 
5536
      /* We process MEM below.  */
5537
      src = XEXP (src, 1);
5538
    }
5539
 
5540
  need_barrier |= rtx_needs_barrier (src, flags, pred);
5541
 
5542
  dst = SET_DEST (x);
5543
  if (GET_CODE (dst) == ZERO_EXTRACT)
5544
    {
5545
      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5546
      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5547
    }
5548
  return need_barrier;
5549
}
5550
 
5551
/* Handle an access to rtx X of type FLAGS using predicate register
5552
   PRED.  Return 1 if this access creates a dependency with an earlier
5553
   instruction in the same group.  */
5554
 
5555
static int
5556
rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5557
{
5558
  int i, j;
5559
  int is_complemented = 0;
5560
  int need_barrier = 0;
5561
  const char *format_ptr;
5562
  struct reg_flags new_flags;
5563
  rtx cond;
5564
 
5565
  if (! x)
5566
    return 0;
5567
 
5568
  new_flags = flags;
5569
 
5570
  switch (GET_CODE (x))
5571
    {
5572
    case SET:
5573
      update_set_flags (x, &new_flags);
5574
      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5575
      if (GET_CODE (SET_SRC (x)) != CALL)
5576
        {
5577
          new_flags.is_write = 1;
5578
          need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5579
        }
5580
      break;
5581
 
5582
    case CALL:
5583
      new_flags.is_write = 0;
5584
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5585
 
5586
      /* Avoid multiple register writes, in case this is a pattern with
5587
         multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5588
      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5589
        {
5590
          new_flags.is_write = 1;
5591
          need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5592
          need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5593
          need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5594
        }
5595
      break;
5596
 
5597
    case COND_EXEC:
5598
      /* X is a predicated instruction.  */
5599
 
5600
      cond = COND_EXEC_TEST (x);
5601
      gcc_assert (!pred);
5602
      need_barrier = rtx_needs_barrier (cond, flags, 0);
5603
 
5604
      if (GET_CODE (cond) == EQ)
5605
        is_complemented = 1;
5606
      cond = XEXP (cond, 0);
5607
      gcc_assert (GET_CODE (cond) == REG
5608
                  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5609
      pred = REGNO (cond);
5610
      if (is_complemented)
5611
        ++pred;
5612
 
5613
      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5614
      return need_barrier;
5615
 
5616
    case CLOBBER:
5617
    case USE:
5618
      /* Clobber & use are for earlier compiler-phases only.  */
5619
      break;
5620
 
5621
    case ASM_OPERANDS:
5622
    case ASM_INPUT:
5623
      /* We always emit stop bits for traditional asms.  We emit stop bits
5624
         for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5625
      if (GET_CODE (x) != ASM_OPERANDS
5626
          || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5627
        {
5628
          /* Avoid writing the register multiple times if we have multiple
5629
             asm outputs.  This avoids a failure in rws_access_reg.  */
5630
          if (! rws_insn[REG_VOLATILE].write_count)
5631
            {
5632
              new_flags.is_write = 1;
5633
              rws_access_regno (REG_VOLATILE, new_flags, pred);
5634
            }
5635
          return 1;
5636
        }
5637
 
5638
      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5639
         We cannot just fall through here since then we would be confused
5640
         by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5641
         traditional asms unlike their normal usage.  */
5642
 
5643
      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5644
        if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5645
          need_barrier = 1;
5646
      break;
5647
 
5648
    case PARALLEL:
5649
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5650
        {
5651
          rtx pat = XVECEXP (x, 0, i);
5652
          switch (GET_CODE (pat))
5653
            {
5654
            case SET:
5655
              update_set_flags (pat, &new_flags);
5656
              need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5657
              break;
5658
 
5659
            case USE:
5660
            case CALL:
5661
            case ASM_OPERANDS:
5662
              need_barrier |= rtx_needs_barrier (pat, flags, pred);
5663
              break;
5664
 
5665
            case CLOBBER:
5666
            case RETURN:
5667
              break;
5668
 
5669
            default:
5670
              gcc_unreachable ();
5671
            }
5672
        }
5673
      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5674
        {
5675
          rtx pat = XVECEXP (x, 0, i);
5676
          if (GET_CODE (pat) == SET)
5677
            {
5678
              if (GET_CODE (SET_SRC (pat)) != CALL)
5679
                {
5680
                  new_flags.is_write = 1;
5681
                  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5682
                                                     pred);
5683
                }
5684
            }
5685
          else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5686
            need_barrier |= rtx_needs_barrier (pat, flags, pred);
5687
        }
5688
      break;
5689
 
5690
    case SUBREG:
5691
      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5692
      break;
5693
    case REG:
5694
      if (REGNO (x) == AR_UNAT_REGNUM)
5695
        {
5696
          for (i = 0; i < 64; ++i)
5697
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5698
        }
5699
      else
5700
        need_barrier = rws_access_reg (x, flags, pred);
5701
      break;
5702
 
5703
    case MEM:
5704
      /* Find the regs used in memory address computation.  */
5705
      new_flags.is_write = 0;
5706
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5707
      break;
5708
 
5709
    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
5710
    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5711
      break;
5712
 
5713
      /* Operators with side-effects.  */
5714
    case POST_INC:    case POST_DEC:
5715
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5716
 
5717
      new_flags.is_write = 0;
5718
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5719
      new_flags.is_write = 1;
5720
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5721
      break;
5722
 
5723
    case POST_MODIFY:
5724
      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5725
 
5726
      new_flags.is_write = 0;
5727
      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5728
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5729
      new_flags.is_write = 1;
5730
      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5731
      break;
5732
 
5733
      /* Handle common unary and binary ops for efficiency.  */
5734
    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5735
    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5736
    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5737
    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5738
    case NE:       case EQ:      case GE:      case GT:        case LE:
5739
    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5740
      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5741
      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5742
      break;
5743
 
5744
    case NEG:      case NOT:            case SIGN_EXTEND:     case ZERO_EXTEND:
5745
    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5746
    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5747
    case SQRT:     case FFS:            case POPCOUNT:
5748
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5749
      break;
5750
 
5751
    case VEC_SELECT:
5752
      /* VEC_SELECT's second argument is a PARALLEL with integers that
5753
         describe the elements selected.  On ia64, those integers are
5754
         always constants.  Avoid walking the PARALLEL so that we don't
5755
         get confused with "normal" parallels and then die.  */
5756
      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5757
      break;
5758
 
5759
    case UNSPEC:
5760
      switch (XINT (x, 1))
5761
        {
5762
        case UNSPEC_LTOFF_DTPMOD:
5763
        case UNSPEC_LTOFF_DTPREL:
5764
        case UNSPEC_DTPREL:
5765
        case UNSPEC_LTOFF_TPREL:
5766
        case UNSPEC_TPREL:
5767
        case UNSPEC_PRED_REL_MUTEX:
5768
        case UNSPEC_PIC_CALL:
5769
        case UNSPEC_MF:
5770
        case UNSPEC_FETCHADD_ACQ:
5771
        case UNSPEC_BSP_VALUE:
5772
        case UNSPEC_FLUSHRS:
5773
        case UNSPEC_BUNDLE_SELECTOR:
5774
          break;
5775
 
5776
        case UNSPEC_GR_SPILL:
5777
        case UNSPEC_GR_RESTORE:
5778
          {
5779
            HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5780
            HOST_WIDE_INT bit = (offset >> 3) & 63;
5781
 
5782
            need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5783
            new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5784
            need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5785
                                              new_flags, pred);
5786
            break;
5787
          }
5788
 
5789
        case UNSPEC_FR_SPILL:
5790
        case UNSPEC_FR_RESTORE:
5791
        case UNSPEC_GETF_EXP:
5792
        case UNSPEC_SETF_EXP:
5793
        case UNSPEC_ADDP4:
5794
        case UNSPEC_FR_SQRT_RECIP_APPROX:
5795
        case UNSPEC_LDA:
5796
        case UNSPEC_LDS:
5797
        case UNSPEC_LDSA:
5798
        case UNSPEC_CHKACLR:
5799
        case UNSPEC_CHKS:
5800
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5801
          break;
5802
 
5803
        case UNSPEC_FR_RECIP_APPROX:
5804
        case UNSPEC_SHRP:
5805
        case UNSPEC_COPYSIGN:
5806
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5807
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5808
          break;
5809
 
5810
        case UNSPEC_CMPXCHG_ACQ:
5811
          need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5812
          need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5813
          break;
5814
 
5815
        default:
5816
          gcc_unreachable ();
5817
        }
5818
      break;
5819
 
5820
    case UNSPEC_VOLATILE:
5821
      switch (XINT (x, 1))
5822
        {
5823
        case UNSPECV_ALLOC:
5824
          /* Alloc must always be the first instruction of a group.
5825
             We force this by always returning true.  */
5826
          /* ??? We might get better scheduling if we explicitly check for
5827
             input/local/output register dependencies, and modify the
5828
             scheduler so that alloc is always reordered to the start of
5829
             the current group.  We could then eliminate all of the
5830
             first_instruction code.  */
5831
          rws_access_regno (AR_PFS_REGNUM, flags, pred);
5832
 
5833
          new_flags.is_write = 1;
5834
          rws_access_regno (REG_AR_CFM, new_flags, pred);
5835
          return 1;
5836
 
5837
        case UNSPECV_SET_BSP:
5838
          need_barrier = 1;
5839
          break;
5840
 
5841
        case UNSPECV_BLOCKAGE:
5842
        case UNSPECV_INSN_GROUP_BARRIER:
5843
        case UNSPECV_BREAK:
5844
        case UNSPECV_PSAC_ALL:
5845
        case UNSPECV_PSAC_NORMAL:
5846
          return 0;
5847
 
5848
        default:
5849
          gcc_unreachable ();
5850
        }
5851
      break;
5852
 
5853
    case RETURN:
5854
      new_flags.is_write = 0;
5855
      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5856
      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5857
 
5858
      new_flags.is_write = 1;
5859
      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5860
      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5861
      break;
5862
 
5863
    default:
5864
      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5865
      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5866
        switch (format_ptr[i])
5867
          {
5868
          case '0':     /* unused field */
5869
          case 'i':     /* integer */
5870
          case 'n':     /* note */
5871
          case 'w':     /* wide integer */
5872
          case 's':     /* pointer to string */
5873
          case 'S':     /* optional pointer to string */
5874
            break;
5875
 
5876
          case 'e':
5877
            if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5878
              need_barrier = 1;
5879
            break;
5880
 
5881
          case 'E':
5882
            for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5883
              if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5884
                need_barrier = 1;
5885
            break;
5886
 
5887
          default:
5888
            gcc_unreachable ();
5889
          }
5890
      break;
5891
    }
5892
  return need_barrier;
5893
}
5894
 
5895
/* Clear out the state for group_barrier_needed at the start of a
5896
   sequence of insns.  */
5897
 
5898
static void
5899
init_insn_group_barriers (void)
5900
{
5901
  memset (rws_sum, 0, sizeof (rws_sum));
5902
  first_instruction = 1;
5903
}
5904
 
5905
/* Given the current state, determine whether a group barrier (a stop bit) is
5906
   necessary before INSN.  Return nonzero if so.  This modifies the state to
5907
   include the effects of INSN as a side-effect.  */
5908
 
5909
static int
5910
group_barrier_needed (rtx insn)
5911
{
5912
  rtx pat;
5913
  int need_barrier = 0;
5914
  struct reg_flags flags;
5915
 
5916
  memset (&flags, 0, sizeof (flags));
5917
  switch (GET_CODE (insn))
5918
    {
5919
    case NOTE:
5920
      break;
5921
 
5922
    case BARRIER:
5923
      /* A barrier doesn't imply an instruction group boundary.  */
5924
      break;
5925
 
5926
    case CODE_LABEL:
5927
      memset (rws_insn, 0, sizeof (rws_insn));
5928
      return 1;
5929
 
5930
    case CALL_INSN:
5931
      flags.is_branch = 1;
5932
      flags.is_sibcall = SIBLING_CALL_P (insn);
5933
      memset (rws_insn, 0, sizeof (rws_insn));
5934
 
5935
      /* Don't bundle a call following another call.  */
5936
      if ((pat = prev_active_insn (insn))
5937
          && GET_CODE (pat) == CALL_INSN)
5938
        {
5939
          need_barrier = 1;
5940
          break;
5941
        }
5942
 
5943
      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5944
      break;
5945
 
5946
    case JUMP_INSN:
5947
      if (!ia64_spec_check_p (insn))
5948
        flags.is_branch = 1;
5949
 
5950
      /* Don't bundle a jump following a call.  */
5951
      if ((pat = prev_active_insn (insn))
5952
          && GET_CODE (pat) == CALL_INSN)
5953
        {
5954
          need_barrier = 1;
5955
          break;
5956
        }
5957
      /* FALLTHRU */
5958
 
5959
    case INSN:
5960
      if (GET_CODE (PATTERN (insn)) == USE
5961
          || GET_CODE (PATTERN (insn)) == CLOBBER)
5962
        /* Don't care about USE and CLOBBER "insns"---those are used to
5963
           indicate to the optimizer that it shouldn't get rid of
5964
           certain operations.  */
5965
        break;
5966
 
5967
      pat = PATTERN (insn);
5968
 
5969
      /* Ug.  Hack hacks hacked elsewhere.  */
5970
      switch (recog_memoized (insn))
5971
        {
5972
          /* We play dependency tricks with the epilogue in order
5973
             to get proper schedules.  Undo this for dv analysis.  */
5974
        case CODE_FOR_epilogue_deallocate_stack:
5975
        case CODE_FOR_prologue_allocate_stack:
5976
          pat = XVECEXP (pat, 0, 0);
5977
          break;
5978
 
5979
          /* The pattern we use for br.cloop confuses the code above.
5980
             The second element of the vector is representative.  */
5981
        case CODE_FOR_doloop_end_internal:
5982
          pat = XVECEXP (pat, 0, 1);
5983
          break;
5984
 
5985
          /* Doesn't generate code.  */
5986
        case CODE_FOR_pred_rel_mutex:
5987
        case CODE_FOR_prologue_use:
5988
          return 0;
5989
 
5990
        default:
5991
          break;
5992
        }
5993
 
5994
      memset (rws_insn, 0, sizeof (rws_insn));
5995
      need_barrier = rtx_needs_barrier (pat, flags, 0);
5996
 
5997
      /* Check to see if the previous instruction was a volatile
5998
         asm.  */
5999
      if (! need_barrier)
6000
        need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6001
      break;
6002
 
6003
    default:
6004
      gcc_unreachable ();
6005
    }
6006
 
6007
  if (first_instruction && INSN_P (insn)
6008
      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6009
      && GET_CODE (PATTERN (insn)) != USE
6010
      && GET_CODE (PATTERN (insn)) != CLOBBER)
6011
    {
6012
      need_barrier = 0;
6013
      first_instruction = 0;
6014
    }
6015
 
6016
  return need_barrier;
6017
}
6018
 
6019
/* Like group_barrier_needed, but do not clobber the current state.  */
6020
 
6021
static int
6022
safe_group_barrier_needed (rtx insn)
6023
{
6024
  struct reg_write_state rws_saved[NUM_REGS];
6025
  int saved_first_instruction;
6026
  int t;
6027
 
6028
  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
6029
  saved_first_instruction = first_instruction;
6030
 
6031
  t = group_barrier_needed (insn);
6032
 
6033
  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
6034
  first_instruction = saved_first_instruction;
6035
 
6036
  return t;
6037
}
6038
 
6039
/* Scan the current function and insert stop bits as necessary to
6040
   eliminate dependencies.  This function assumes that a final
6041
   instruction scheduling pass has been run which has already
6042
   inserted most of the necessary stop bits.  This function only
6043
   inserts new ones at basic block boundaries, since these are
6044
   invisible to the scheduler.  */
6045
 
6046
static void
6047
emit_insn_group_barriers (FILE *dump)
6048
{
6049
  rtx insn;
6050
  rtx last_label = 0;
6051
  int insns_since_last_label = 0;
6052
 
6053
  init_insn_group_barriers ();
6054
 
6055
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6056
    {
6057
      if (GET_CODE (insn) == CODE_LABEL)
6058
        {
6059
          if (insns_since_last_label)
6060
            last_label = insn;
6061
          insns_since_last_label = 0;
6062
        }
6063
      else if (GET_CODE (insn) == NOTE
6064
               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6065
        {
6066
          if (insns_since_last_label)
6067
            last_label = insn;
6068
          insns_since_last_label = 0;
6069
        }
6070
      else if (GET_CODE (insn) == INSN
6071
               && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6072
               && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6073
        {
6074
          init_insn_group_barriers ();
6075
          last_label = 0;
6076
        }
6077
      else if (INSN_P (insn))
6078
        {
6079
          insns_since_last_label = 1;
6080
 
6081
          if (group_barrier_needed (insn))
6082
            {
6083
              if (last_label)
6084
                {
6085
                  if (dump)
6086
                    fprintf (dump, "Emitting stop before label %d\n",
6087
                             INSN_UID (last_label));
6088
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6089
                  insn = last_label;
6090
 
6091
                  init_insn_group_barriers ();
6092
                  last_label = 0;
6093
                }
6094
            }
6095
        }
6096
    }
6097
}
6098
 
6099
/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6100
   This function has to emit all necessary group barriers.  */
6101
 
6102
static void
6103
emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6104
{
6105
  rtx insn;
6106
 
6107
  init_insn_group_barriers ();
6108
 
6109
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6110
    {
6111
      if (GET_CODE (insn) == BARRIER)
6112
        {
6113
          rtx last = prev_active_insn (insn);
6114
 
6115
          if (! last)
6116
            continue;
6117
          if (GET_CODE (last) == JUMP_INSN
6118
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6119
            last = prev_active_insn (last);
6120
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6121
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6122
 
6123
          init_insn_group_barriers ();
6124
        }
6125
      else if (INSN_P (insn))
6126
        {
6127
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6128
            init_insn_group_barriers ();
6129
          else if (group_barrier_needed (insn))
6130
            {
6131
              emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6132
              init_insn_group_barriers ();
6133
              group_barrier_needed (insn);
6134
            }
6135
        }
6136
    }
6137
}
6138
 
6139
 
6140
 
6141
/* Instruction scheduling support.  */
6142
 
6143
#define NR_BUNDLES 10
6144
 
6145
/* A list of names of all available bundles.  */
6146
 
6147
static const char *bundle_name [NR_BUNDLES] =
6148
{
6149
  ".mii",
6150
  ".mmi",
6151
  ".mfi",
6152
  ".mmf",
6153
#if NR_BUNDLES == 10
6154
  ".bbb",
6155
  ".mbb",
6156
#endif
6157
  ".mib",
6158
  ".mmb",
6159
  ".mfb",
6160
  ".mlx"
6161
};
6162
 
6163
/* Nonzero if we should insert stop bits into the schedule.  */
6164
 
6165
int ia64_final_schedule = 0;
6166
 
6167
/* Codes of the corresponding queried units: */
6168
 
6169
static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6170
static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6171
 
6172
static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6173
static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6174
 
6175
static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6176
 
6177
/* The following variable value is an insn group barrier.  */
6178
 
6179
static rtx dfa_stop_insn;
6180
 
6181
/* The following variable value is the last issued insn.  */
6182
 
6183
static rtx last_scheduled_insn;
6184
 
6185
/* The following variable value is size of the DFA state.  */
6186
 
6187
static size_t dfa_state_size;
6188
 
6189
/* The following variable value is pointer to a DFA state used as
6190
   temporary variable.  */
6191
 
6192
static state_t temp_dfa_state = NULL;
6193
 
6194
/* The following variable value is DFA state after issuing the last
6195
   insn.  */
6196
 
6197
static state_t prev_cycle_state = NULL;
6198
 
6199
/* The following array element values are TRUE if the corresponding
6200
   insn requires to add stop bits before it.  */
6201
 
6202
static char *stops_p = NULL;
6203
 
6204
/* The following array element values are ZERO for non-speculative
6205
   instructions and hold corresponding speculation check number for
6206
   speculative instructions.  */
6207
static int *spec_check_no = NULL;
6208
 
6209
/* Size of spec_check_no array.  */
6210
static int max_uid = 0;
6211
 
6212
/* The following variable is used to set up the mentioned above array.  */
6213
 
6214
static int stop_before_p = 0;
6215
 
6216
/* The following variable value is length of the arrays `clocks' and
6217
   `add_cycles'. */
6218
 
6219
static int clocks_length;
6220
 
6221
/* The following array element values are cycles on which the
6222
   corresponding insn will be issued.  The array is used only for
6223
   Itanium1.  */
6224
 
6225
static int *clocks;
6226
 
6227
/* The following array element values are numbers of cycles should be
6228
   added to improve insn scheduling for MM_insns for Itanium1.  */
6229
 
6230
static int *add_cycles;
6231
 
6232
/* The following variable value is number of data speculations in progress.  */
6233
static int pending_data_specs = 0;
6234
 
6235
static rtx ia64_single_set (rtx);
6236
static void ia64_emit_insn_before (rtx, rtx);
6237
 
6238
/* Map a bundle number to its pseudo-op.  */
6239
 
6240
const char *
6241
get_bundle_name (int b)
6242
{
6243
  return bundle_name[b];
6244
}
6245
 
6246
 
6247
/* Return the maximum number of instructions a cpu can issue.  */
6248
 
6249
static int
6250
ia64_issue_rate (void)
6251
{
6252
  return 6;
6253
}
6254
 
6255
/* Helper function - like single_set, but look inside COND_EXEC.  */
6256
 
6257
static rtx
6258
ia64_single_set (rtx insn)
6259
{
6260
  rtx x = PATTERN (insn), ret;
6261
  if (GET_CODE (x) == COND_EXEC)
6262
    x = COND_EXEC_CODE (x);
6263
  if (GET_CODE (x) == SET)
6264
    return x;
6265
 
6266
  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6267
     Although they are not classical single set, the second set is there just
6268
     to protect it from moving past FP-relative stack accesses.  */
6269
  switch (recog_memoized (insn))
6270
    {
6271
    case CODE_FOR_prologue_allocate_stack:
6272
    case CODE_FOR_epilogue_deallocate_stack:
6273
      ret = XVECEXP (x, 0, 0);
6274
      break;
6275
 
6276
    default:
6277
      ret = single_set_2 (insn, x);
6278
      break;
6279
    }
6280
 
6281
  return ret;
6282
}
6283
 
6284
/* Adjust the cost of a scheduling dependency.
6285
   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6286
   COST is the current cost.  */
6287
 
6288
static int
6289
ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
6290
{
6291
  enum reg_note dep_type = (enum reg_note) dep_type1;
6292
  enum attr_itanium_class dep_class;
6293
  enum attr_itanium_class insn_class;
6294
 
6295
  if (dep_type != REG_DEP_OUTPUT)
6296
    return cost;
6297
 
6298
  insn_class = ia64_safe_itanium_class (insn);
6299
  dep_class = ia64_safe_itanium_class (dep_insn);
6300
  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6301
      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6302
    return 0;
6303
 
6304
  return cost;
6305
}
6306
 
6307
/* Like emit_insn_before, but skip cycle_display notes.
6308
   ??? When cycle display notes are implemented, update this.  */
6309
 
6310
static void
6311
ia64_emit_insn_before (rtx insn, rtx before)
6312
{
6313
  emit_insn_before (insn, before);
6314
}
6315
 
6316
/* The following function marks insns who produce addresses for load
6317
   and store insns.  Such insns will be placed into M slots because it
6318
   decrease latency time for Itanium1 (see function
6319
   `ia64_produce_address_p' and the DFA descriptions).  */
6320
 
6321
static void
6322
ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6323
{
6324
  rtx insn, link, next, next_tail;
6325
 
6326
  /* Before reload, which_alternative is not set, which means that
6327
     ia64_safe_itanium_class will produce wrong results for (at least)
6328
     move instructions.  */
6329
  if (!reload_completed)
6330
    return;
6331
 
6332
  next_tail = NEXT_INSN (tail);
6333
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6334
    if (INSN_P (insn))
6335
      insn->call = 0;
6336
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6337
    if (INSN_P (insn)
6338
        && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6339
      {
6340
        for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6341
          {
6342
            enum attr_itanium_class c;
6343
 
6344
            if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6345
              continue;
6346
            next = XEXP (link, 0);
6347
            c = ia64_safe_itanium_class (next);
6348
            if ((c == ITANIUM_CLASS_ST
6349
                 || c == ITANIUM_CLASS_STF)
6350
                && ia64_st_address_bypass_p (insn, next))
6351
              break;
6352
            else if ((c == ITANIUM_CLASS_LD
6353
                      || c == ITANIUM_CLASS_FLD
6354
                      || c == ITANIUM_CLASS_FLDP)
6355
                     && ia64_ld_address_bypass_p (insn, next))
6356
              break;
6357
          }
6358
        insn->call = link != 0;
6359
      }
6360
}
6361
 
6362
/* We're beginning a new block.  Initialize data structures as necessary.  */
6363
 
6364
static void
6365
ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6366
                 int sched_verbose ATTRIBUTE_UNUSED,
6367
                 int max_ready ATTRIBUTE_UNUSED)
6368
{
6369
#ifdef ENABLE_CHECKING
6370
  rtx insn;
6371
 
6372
  if (reload_completed)
6373
    for (insn = NEXT_INSN (current_sched_info->prev_head);
6374
         insn != current_sched_info->next_tail;
6375
         insn = NEXT_INSN (insn))
6376
      gcc_assert (!SCHED_GROUP_P (insn));
6377
#endif
6378
  last_scheduled_insn = NULL_RTX;
6379
  init_insn_group_barriers ();
6380
}
6381
 
6382
/* We're beginning a scheduling pass.  Check assertion.  */
6383
 
6384
static void
6385
ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6386
                        int sched_verbose ATTRIBUTE_UNUSED,
6387
                        int max_ready ATTRIBUTE_UNUSED)
6388
{
6389
  gcc_assert (!pending_data_specs);
6390
}
6391
 
6392
/* Scheduling pass is now finished.  Free/reset static variable.  */
6393
static void
6394
ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6395
                          int sched_verbose ATTRIBUTE_UNUSED)
6396
{
6397
  free (spec_check_no);
6398
  spec_check_no = 0;
6399
  max_uid = 0;
6400
}
6401
 
6402
/* We are about to being issuing insns for this clock cycle.
6403
   Override the default sort algorithm to better slot instructions.  */
6404
 
6405
static int
6406
ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6407
                        int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6408
                        int reorder_type)
6409
{
6410
  int n_asms;
6411
  int n_ready = *pn_ready;
6412
  rtx *e_ready = ready + n_ready;
6413
  rtx *insnp;
6414
 
6415
  if (sched_verbose)
6416
    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6417
 
6418
  if (reorder_type == 0)
6419
    {
6420
      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6421
      n_asms = 0;
6422
      for (insnp = ready; insnp < e_ready; insnp++)
6423
        if (insnp < e_ready)
6424
          {
6425
            rtx insn = *insnp;
6426
            enum attr_type t = ia64_safe_type (insn);
6427
            if (t == TYPE_UNKNOWN)
6428
              {
6429
                if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6430
                    || asm_noperands (PATTERN (insn)) >= 0)
6431
                  {
6432
                    rtx lowest = ready[n_asms];
6433
                    ready[n_asms] = insn;
6434
                    *insnp = lowest;
6435
                    n_asms++;
6436
                  }
6437
                else
6438
                  {
6439
                    rtx highest = ready[n_ready - 1];
6440
                    ready[n_ready - 1] = insn;
6441
                    *insnp = highest;
6442
                    return 1;
6443
                  }
6444
              }
6445
          }
6446
 
6447
      if (n_asms < n_ready)
6448
        {
6449
          /* Some normal insns to process.  Skip the asms.  */
6450
          ready += n_asms;
6451
          n_ready -= n_asms;
6452
        }
6453
      else if (n_ready > 0)
6454
        return 1;
6455
    }
6456
 
6457
  if (ia64_final_schedule)
6458
    {
6459
      int deleted = 0;
6460
      int nr_need_stop = 0;
6461
 
6462
      for (insnp = ready; insnp < e_ready; insnp++)
6463
        if (safe_group_barrier_needed (*insnp))
6464
          nr_need_stop++;
6465
 
6466
      if (reorder_type == 1 && n_ready == nr_need_stop)
6467
        return 0;
6468
      if (reorder_type == 0)
6469
        return 1;
6470
      insnp = e_ready;
6471
      /* Move down everything that needs a stop bit, preserving
6472
         relative order.  */
6473
      while (insnp-- > ready + deleted)
6474
        while (insnp >= ready + deleted)
6475
          {
6476
            rtx insn = *insnp;
6477
            if (! safe_group_barrier_needed (insn))
6478
              break;
6479
            memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6480
            *ready = insn;
6481
            deleted++;
6482
          }
6483
      n_ready -= deleted;
6484
      ready += deleted;
6485
    }
6486
 
6487
  return 1;
6488
}
6489
 
6490
/* We are about to being issuing insns for this clock cycle.  Override
6491
   the default sort algorithm to better slot instructions.  */
6492
 
6493
static int
6494
ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6495
                    int clock_var)
6496
{
6497
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6498
                                 pn_ready, clock_var, 0);
6499
}
6500
 
6501
/* Like ia64_sched_reorder, but called after issuing each insn.
6502
   Override the default sort algorithm to better slot instructions.  */
6503
 
6504
static int
6505
ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6506
                     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6507
                     int *pn_ready, int clock_var)
6508
{
6509
  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6510
    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6511
  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6512
                                 clock_var, 1);
6513
}
6514
 
6515
/* We are about to issue INSN.  Return the number of insns left on the
6516
   ready queue that can be issued this cycle.  */
6517
 
6518
static int
6519
ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6520
                     int sched_verbose ATTRIBUTE_UNUSED,
6521
                     rtx insn ATTRIBUTE_UNUSED,
6522
                     int can_issue_more ATTRIBUTE_UNUSED)
6523
{
6524
  if (current_sched_info->flags & DO_SPECULATION)
6525
    /* Modulo scheduling does not extend h_i_d when emitting
6526
       new instructions.  Deal with it.  */
6527
    {
6528
      if (DONE_SPEC (insn) & BEGIN_DATA)
6529
        pending_data_specs++;
6530
      if (CHECK_SPEC (insn) & BEGIN_DATA)
6531
        pending_data_specs--;
6532
    }
6533
 
6534
  last_scheduled_insn = insn;
6535
  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6536
  if (reload_completed)
6537
    {
6538
      int needed = group_barrier_needed (insn);
6539
 
6540
      gcc_assert (!needed);
6541
      if (GET_CODE (insn) == CALL_INSN)
6542
        init_insn_group_barriers ();
6543
      stops_p [INSN_UID (insn)] = stop_before_p;
6544
      stop_before_p = 0;
6545
    }
6546
  return 1;
6547
}
6548
 
6549
/* We are choosing insn from the ready queue.  Return nonzero if INSN
6550
   can be chosen.  */
6551
 
6552
static int
6553
ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6554
{
6555
  gcc_assert (insn  && INSN_P (insn));
6556
  return ((!reload_completed
6557
           || !safe_group_barrier_needed (insn))
6558
          && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6559
}
6560
 
6561
/* We are choosing insn from the ready queue.  Return nonzero if INSN
6562
   can be chosen.  */
6563
 
6564
static bool
6565
ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
6566
{
6567
  gcc_assert (insn  && INSN_P (insn));
6568
  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
6569
     we keep ALAT half-empty.  */
6570
  return (pending_data_specs < 16
6571
          || !(TODO_SPEC (insn) & BEGIN_DATA));
6572
}
6573
 
6574
/* The following variable value is pseudo-insn used by the DFA insn
6575
   scheduler to change the DFA state when the simulated clock is
6576
   increased.  */
6577
 
6578
static rtx dfa_pre_cycle_insn;
6579
 
6580
/* We are about to being issuing INSN.  Return nonzero if we cannot
6581
   issue it on given cycle CLOCK and return zero if we should not sort
6582
   the ready queue on the next clock start.  */
6583
 
6584
static int
6585
ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6586
                    int clock, int *sort_p)
6587
{
6588
  int setup_clocks_p = FALSE;
6589
 
6590
  gcc_assert (insn && INSN_P (insn));
6591
  if ((reload_completed && safe_group_barrier_needed (insn))
6592
      || (last_scheduled_insn
6593
          && (GET_CODE (last_scheduled_insn) == CALL_INSN
6594
              || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6595
              || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6596
    {
6597
      init_insn_group_barriers ();
6598
      if (verbose && dump)
6599
        fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6600
                 last_clock == clock ? " + cycle advance" : "");
6601
      stop_before_p = 1;
6602
      if (last_clock == clock)
6603
        {
6604
          state_transition (curr_state, dfa_stop_insn);
6605
          if (TARGET_EARLY_STOP_BITS)
6606
            *sort_p = (last_scheduled_insn == NULL_RTX
6607
                       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6608
          else
6609
            *sort_p = 0;
6610
          return 1;
6611
        }
6612
      else if (reload_completed)
6613
        setup_clocks_p = TRUE;
6614
      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6615
          || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6616
        state_reset (curr_state);
6617
      else
6618
        {
6619
          memcpy (curr_state, prev_cycle_state, dfa_state_size);
6620
          state_transition (curr_state, dfa_stop_insn);
6621
          state_transition (curr_state, dfa_pre_cycle_insn);
6622
          state_transition (curr_state, NULL);
6623
        }
6624
    }
6625
  else if (reload_completed)
6626
    setup_clocks_p = TRUE;
6627
  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6628
      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6629
      && asm_noperands (PATTERN (insn)) < 0)
6630
    {
6631
      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6632
 
6633
      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6634
        {
6635
          rtx link;
6636
          int d = -1;
6637
 
6638
          for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6639
            if (REG_NOTE_KIND (link) == 0)
6640
              {
6641
                enum attr_itanium_class dep_class;
6642
                rtx dep_insn = XEXP (link, 0);
6643
 
6644
                dep_class = ia64_safe_itanium_class (dep_insn);
6645
                if ((dep_class == ITANIUM_CLASS_MMMUL
6646
                     || dep_class == ITANIUM_CLASS_MMSHF)
6647
                    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6648
                    && (d < 0
6649
                        || last_clock - clocks [INSN_UID (dep_insn)] < d))
6650
                  d = last_clock - clocks [INSN_UID (dep_insn)];
6651
              }
6652
          if (d >= 0)
6653
            add_cycles [INSN_UID (insn)] = 3 - d;
6654
        }
6655
    }
6656
  return 0;
6657
}
6658
 
6659
/* Implement targetm.sched.h_i_d_extended hook.
6660
   Extend internal data structures.  */
6661
static void
6662
ia64_h_i_d_extended (void)
6663
{
6664
  if (current_sched_info->flags & DO_SPECULATION)
6665
    {
6666
      int new_max_uid = get_max_uid () + 1;
6667
 
6668
      spec_check_no = xrecalloc (spec_check_no, new_max_uid,
6669
                                 max_uid, sizeof (*spec_check_no));
6670
      max_uid = new_max_uid;
6671
    }
6672
 
6673
  if (stops_p != NULL)
6674
    {
6675
      int new_clocks_length = get_max_uid () + 1;
6676
 
6677
      stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6678
 
6679
      if (ia64_tune == PROCESSOR_ITANIUM)
6680
        {
6681
          clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
6682
                              sizeof (int));
6683
          add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
6684
                                  sizeof (int));
6685
        }
6686
 
6687
      clocks_length = new_clocks_length;
6688
    }
6689
}
6690
 
6691
/* Constants that help mapping 'enum machine_mode' to int.  */
6692
enum SPEC_MODES
6693
  {
6694
    SPEC_MODE_INVALID = -1,
6695
    SPEC_MODE_FIRST = 0,
6696
    SPEC_MODE_FOR_EXTEND_FIRST = 1,
6697
    SPEC_MODE_FOR_EXTEND_LAST = 3,
6698
    SPEC_MODE_LAST = 8
6699
  };
6700
 
6701
/* Return index of the MODE.  */
6702
static int
6703
ia64_mode_to_int (enum machine_mode mode)
6704
{
6705
  switch (mode)
6706
    {
6707
    case BImode: return 0; /* SPEC_MODE_FIRST  */
6708
    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
6709
    case HImode: return 2;
6710
    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
6711
    case DImode: return 4;
6712
    case SFmode: return 5;
6713
    case DFmode: return 6;
6714
    case XFmode: return 7;
6715
    case TImode:
6716
      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
6717
         mentioned in itanium[12].md.  Predicate fp_register_operand also
6718
         needs to be defined.  Bottom line: better disable for now.  */
6719
      return SPEC_MODE_INVALID;
6720
    default:     return SPEC_MODE_INVALID;
6721
    }
6722
}
6723
 
6724
/* Provide information about speculation capabilities.  */
6725
static void
6726
ia64_set_sched_flags (spec_info_t spec_info)
6727
{
6728
  unsigned int *flags = &(current_sched_info->flags);
6729
 
6730
  if (*flags & SCHED_RGN
6731
      || *flags & SCHED_EBB)
6732
    {
6733
      int mask = 0;
6734
 
6735
      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
6736
          || (mflag_sched_ar_data_spec && reload_completed))
6737
        {
6738
          mask |= BEGIN_DATA;
6739
 
6740
          if ((mflag_sched_br_in_data_spec && !reload_completed)
6741
              || (mflag_sched_ar_in_data_spec && reload_completed))
6742
            mask |= BE_IN_DATA;
6743
        }
6744
 
6745
      if (mflag_sched_control_spec)
6746
        {
6747
          mask |= BEGIN_CONTROL;
6748
 
6749
          if (mflag_sched_in_control_spec)
6750
            mask |= BE_IN_CONTROL;
6751
        }
6752
 
6753
      gcc_assert (*flags & USE_GLAT);
6754
 
6755
      if (mask)
6756
        {
6757
          *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
6758
 
6759
          spec_info->mask = mask;
6760
          spec_info->flags = 0;
6761
 
6762
          if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6763
            spec_info->flags |= PREFER_NON_DATA_SPEC;
6764
 
6765
          if ((mask & CONTROL_SPEC)
6766
              && mflag_sched_prefer_non_control_spec_insns)
6767
            spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6768
 
6769
          if (mflag_sched_spec_verbose)
6770
            {
6771
              if (sched_verbose >= 1)
6772
                spec_info->dump = sched_dump;
6773
              else
6774
                spec_info->dump = stderr;
6775
            }
6776
          else
6777
            spec_info->dump = 0;
6778
 
6779
          if (mflag_sched_count_spec_in_critical_path)
6780
            spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6781
        }
6782
    }
6783
}
6784
 
6785
/* Implement targetm.sched.speculate_insn hook.
6786
   Check if the INSN can be TS speculative.
6787
   If 'no' - return -1.
6788
   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6789
   If current pattern of the INSN already provides TS speculation, return 0.  */
6790
static int
6791
ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6792
{
6793
  rtx pat, reg, mem, mem_reg;
6794
  int mode_no, gen_p = 1;
6795
  bool extend_p;
6796
 
6797
  gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6798
 
6799
  pat = PATTERN (insn);
6800
 
6801
  if (GET_CODE (pat) == COND_EXEC)
6802
    pat = COND_EXEC_CODE (pat);
6803
 
6804
  /* This should be a SET ...  */
6805
  if (GET_CODE (pat) != SET)
6806
    return -1;
6807
 
6808
  reg = SET_DEST (pat);
6809
  /* ... to the general/fp register ...  */
6810
  if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
6811
    return -1;
6812
 
6813
  /* ... from the mem ...  */
6814
  mem = SET_SRC (pat);
6815
 
6816
  /* ... that can, possibly, be a zero_extend ...  */
6817
  if (GET_CODE (mem) == ZERO_EXTEND)
6818
    {
6819
      mem = XEXP (mem, 0);
6820
      extend_p = true;
6821
    }
6822
  else
6823
    extend_p = false;
6824
 
6825
  /* ... or a speculative load.  */
6826
  if (GET_CODE (mem) == UNSPEC)
6827
    {
6828
      int code;
6829
 
6830
      code = XINT (mem, 1);
6831
      if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6832
        return -1;
6833
 
6834
      if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6835
          || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6836
          || code == UNSPEC_LDSA)
6837
        gen_p = 0;
6838
 
6839
      mem = XVECEXP (mem, 0, 0);
6840
      gcc_assert (MEM_P (mem));
6841
    }
6842
 
6843
  /* Source should be a mem ...  */
6844
  if (!MEM_P (mem))
6845
    return -1;
6846
 
6847
  /* ... addressed by a register.  */
6848
  mem_reg = XEXP (mem, 0);
6849
  if (!REG_P (mem_reg))
6850
    return -1;
6851
 
6852
  /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6853
     will always be DImode.  */
6854
  mode_no = ia64_mode_to_int (GET_MODE (mem));
6855
 
6856
  if (mode_no == SPEC_MODE_INVALID
6857
      || (extend_p
6858
          && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6859
               && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6860
    return -1;
6861
 
6862
  extract_insn_cached (insn);
6863
  gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
6864
 
6865
  *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6866
 
6867
  return gen_p;
6868
}
6869
 
6870
enum
6871
  {
6872
    /* Offset to reach ZERO_EXTEND patterns.  */
6873
    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6874
    /* Number of patterns for each speculation mode.  */
6875
    SPEC_N = (SPEC_MODE_LAST
6876
              + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6877
  };
6878
 
6879
enum SPEC_GEN_LD_MAP
6880
  {
6881
    /* Offset to ld.a patterns.  */
6882
    SPEC_GEN_A = 0 * SPEC_N,
6883
    /* Offset to ld.s patterns.  */
6884
    SPEC_GEN_S = 1 * SPEC_N,
6885
    /* Offset to ld.sa patterns.  */
6886
    SPEC_GEN_SA = 2 * SPEC_N,
6887
    /* Offset to ld.sa patterns.  For this patterns corresponding ld.c will
6888
       mutate to chk.s.  */
6889
    SPEC_GEN_SA_FOR_S = 3 * SPEC_N
6890
  };
6891
 
6892
/* These offsets are used to get (4 * SPEC_N).  */
6893
enum SPEC_GEN_CHECK_OFFSET
6894
  {
6895
    SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
6896
    SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
6897
  };
6898
 
6899
/* If GEN_P is true, calculate the index of needed speculation check and return
6900
   speculative pattern for INSN with speculative mode TS, machine mode
6901
   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
6902
   If GEN_P is false, just calculate the index of needed speculation check.  */
6903
static rtx
6904
ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
6905
{
6906
  rtx pat, new_pat;
6907
  int load_no;
6908
  int shift = 0;
6909
 
6910
  static rtx (* const gen_load[]) (rtx, rtx) = {
6911
    gen_movbi_advanced,
6912
    gen_movqi_advanced,
6913
    gen_movhi_advanced,
6914
    gen_movsi_advanced,
6915
    gen_movdi_advanced,
6916
    gen_movsf_advanced,
6917
    gen_movdf_advanced,
6918
    gen_movxf_advanced,
6919
    gen_movti_advanced,
6920
    gen_zero_extendqidi2_advanced,
6921
    gen_zero_extendhidi2_advanced,
6922
    gen_zero_extendsidi2_advanced,
6923
 
6924
    gen_movbi_speculative,
6925
    gen_movqi_speculative,
6926
    gen_movhi_speculative,
6927
    gen_movsi_speculative,
6928
    gen_movdi_speculative,
6929
    gen_movsf_speculative,
6930
    gen_movdf_speculative,
6931
    gen_movxf_speculative,
6932
    gen_movti_speculative,
6933
    gen_zero_extendqidi2_speculative,
6934
    gen_zero_extendhidi2_speculative,
6935
    gen_zero_extendsidi2_speculative,
6936
 
6937
    gen_movbi_speculative_advanced,
6938
    gen_movqi_speculative_advanced,
6939
    gen_movhi_speculative_advanced,
6940
    gen_movsi_speculative_advanced,
6941
    gen_movdi_speculative_advanced,
6942
    gen_movsf_speculative_advanced,
6943
    gen_movdf_speculative_advanced,
6944
    gen_movxf_speculative_advanced,
6945
    gen_movti_speculative_advanced,
6946
    gen_zero_extendqidi2_speculative_advanced,
6947
    gen_zero_extendhidi2_speculative_advanced,
6948
    gen_zero_extendsidi2_speculative_advanced,
6949
 
6950
    gen_movbi_speculative_advanced,
6951
    gen_movqi_speculative_advanced,
6952
    gen_movhi_speculative_advanced,
6953
    gen_movsi_speculative_advanced,
6954
    gen_movdi_speculative_advanced,
6955
    gen_movsf_speculative_advanced,
6956
    gen_movdf_speculative_advanced,
6957
    gen_movxf_speculative_advanced,
6958
    gen_movti_speculative_advanced,
6959
    gen_zero_extendqidi2_speculative_advanced,
6960
    gen_zero_extendhidi2_speculative_advanced,
6961
    gen_zero_extendsidi2_speculative_advanced
6962
  };
6963
 
6964
  load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
6965
 
6966
  if (ts & BEGIN_DATA)
6967
    {
6968
      /* We don't need recovery because even if this is ld.sa
6969
         ALAT entry will be allocated only if NAT bit is set to zero.
6970
         So it is enough to use ld.c here.  */
6971
 
6972
      if (ts & BEGIN_CONTROL)
6973
        {
6974
          load_no += SPEC_GEN_SA;
6975
 
6976
          if (!mflag_sched_ldc)
6977
            shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
6978
        }
6979
      else
6980
        {
6981
          load_no += SPEC_GEN_A;
6982
 
6983
          if (!mflag_sched_ldc)
6984
            shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
6985
        }
6986
    }
6987
  else if (ts & BEGIN_CONTROL)
6988
    {
6989
      /* ld.sa can be used instead of ld.s to avoid basic block splitting.  */
6990
      if (!mflag_control_ldc)
6991
        load_no += SPEC_GEN_S;
6992
      else
6993
        {
6994
          gcc_assert (mflag_sched_ldc);
6995
          load_no += SPEC_GEN_SA_FOR_S;
6996
        }
6997
    }
6998
  else
6999
    gcc_unreachable ();
7000
 
7001
  /* Set the desired check index.  We add '1', because zero element in this
7002
     array means, that instruction with such uid is non-speculative.  */
7003
  spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
7004
 
7005
  if (!gen_p)
7006
    return 0;
7007
 
7008
  new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
7009
                               copy_rtx (recog_data.operand[1]));
7010
 
7011
  pat = PATTERN (insn);
7012
  if (GET_CODE (pat) == COND_EXEC)
7013
    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
7014
                                 (COND_EXEC_TEST (pat)), new_pat);
7015
 
7016
  return new_pat;
7017
}
7018
 
7019
/* Offset to branchy checks.  */
7020
enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
7021
 
7022
/* Return nonzero, if INSN needs branchy recovery check.  */
7023
static bool
7024
ia64_needs_block_p (rtx insn)
7025
{
7026
  int check_no;
7027
 
7028
  check_no = spec_check_no[INSN_UID(insn)] - 1;
7029
  gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
7030
 
7031
  return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
7032
          || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
7033
}
7034
 
7035
/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7036
   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7037
   Otherwise, generate a simple check.  */
7038
static rtx
7039
ia64_gen_check (rtx insn, rtx label, bool mutate_p)
7040
{
7041
  rtx op1, pat, check_pat;
7042
 
7043
  static rtx (* const gen_check[]) (rtx, rtx) = {
7044
    gen_movbi_clr,
7045
    gen_movqi_clr,
7046
    gen_movhi_clr,
7047
    gen_movsi_clr,
7048
    gen_movdi_clr,
7049
    gen_movsf_clr,
7050
    gen_movdf_clr,
7051
    gen_movxf_clr,
7052
    gen_movti_clr,
7053
    gen_zero_extendqidi2_clr,
7054
    gen_zero_extendhidi2_clr,
7055
    gen_zero_extendsidi2_clr,
7056
 
7057
    gen_speculation_check_bi,
7058
    gen_speculation_check_qi,
7059
    gen_speculation_check_hi,
7060
    gen_speculation_check_si,
7061
    gen_speculation_check_di,
7062
    gen_speculation_check_sf,
7063
    gen_speculation_check_df,
7064
    gen_speculation_check_xf,
7065
    gen_speculation_check_ti,
7066
    gen_speculation_check_di,
7067
    gen_speculation_check_di,
7068
    gen_speculation_check_di,
7069
 
7070
    gen_movbi_clr,
7071
    gen_movqi_clr,
7072
    gen_movhi_clr,
7073
    gen_movsi_clr,
7074
    gen_movdi_clr,
7075
    gen_movsf_clr,
7076
    gen_movdf_clr,
7077
    gen_movxf_clr,
7078
    gen_movti_clr,
7079
    gen_zero_extendqidi2_clr,
7080
    gen_zero_extendhidi2_clr,
7081
    gen_zero_extendsidi2_clr,
7082
 
7083
    gen_movbi_clr,
7084
    gen_movqi_clr,
7085
    gen_movhi_clr,
7086
    gen_movsi_clr,
7087
    gen_movdi_clr,
7088
    gen_movsf_clr,
7089
    gen_movdf_clr,
7090
    gen_movxf_clr,
7091
    gen_movti_clr,
7092
    gen_zero_extendqidi2_clr,
7093
    gen_zero_extendhidi2_clr,
7094
    gen_zero_extendsidi2_clr,
7095
 
7096
    gen_advanced_load_check_clr_bi,
7097
    gen_advanced_load_check_clr_qi,
7098
    gen_advanced_load_check_clr_hi,
7099
    gen_advanced_load_check_clr_si,
7100
    gen_advanced_load_check_clr_di,
7101
    gen_advanced_load_check_clr_sf,
7102
    gen_advanced_load_check_clr_df,
7103
    gen_advanced_load_check_clr_xf,
7104
    gen_advanced_load_check_clr_ti,
7105
    gen_advanced_load_check_clr_di,
7106
    gen_advanced_load_check_clr_di,
7107
    gen_advanced_load_check_clr_di,
7108
 
7109
    /* Following checks are generated during mutation.  */
7110
    gen_advanced_load_check_clr_bi,
7111
    gen_advanced_load_check_clr_qi,
7112
    gen_advanced_load_check_clr_hi,
7113
    gen_advanced_load_check_clr_si,
7114
    gen_advanced_load_check_clr_di,
7115
    gen_advanced_load_check_clr_sf,
7116
    gen_advanced_load_check_clr_df,
7117
    gen_advanced_load_check_clr_xf,
7118
    gen_advanced_load_check_clr_ti,
7119
    gen_advanced_load_check_clr_di,
7120
    gen_advanced_load_check_clr_di,
7121
    gen_advanced_load_check_clr_di,
7122
 
7123
    0,0,0,0,0,0,0,0,0,0,0,0,
7124
 
7125
    gen_advanced_load_check_clr_bi,
7126
    gen_advanced_load_check_clr_qi,
7127
    gen_advanced_load_check_clr_hi,
7128
    gen_advanced_load_check_clr_si,
7129
    gen_advanced_load_check_clr_di,
7130
    gen_advanced_load_check_clr_sf,
7131
    gen_advanced_load_check_clr_df,
7132
    gen_advanced_load_check_clr_xf,
7133
    gen_advanced_load_check_clr_ti,
7134
    gen_advanced_load_check_clr_di,
7135
    gen_advanced_load_check_clr_di,
7136
    gen_advanced_load_check_clr_di,
7137
 
7138
    gen_speculation_check_bi,
7139
    gen_speculation_check_qi,
7140
    gen_speculation_check_hi,
7141
    gen_speculation_check_si,
7142
    gen_speculation_check_di,
7143
    gen_speculation_check_sf,
7144
    gen_speculation_check_df,
7145
    gen_speculation_check_xf,
7146
    gen_speculation_check_ti,
7147
    gen_speculation_check_di,
7148
    gen_speculation_check_di,
7149
    gen_speculation_check_di
7150
  };
7151
 
7152
  extract_insn_cached (insn);
7153
 
7154
  if (label)
7155
    {
7156
      gcc_assert (mutate_p || ia64_needs_block_p (insn));
7157
      op1 = label;
7158
    }
7159
  else
7160
    {
7161
      gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7162
      op1 = copy_rtx (recog_data.operand[1]);
7163
    }
7164
 
7165
  if (mutate_p)
7166
    /* INSN is ld.c.
7167
       Find the speculation check number by searching for original
7168
       speculative load in the RESOLVED_DEPS list of INSN.
7169
       As long as patterns are unique for each instruction, this can be
7170
       accomplished by matching ORIG_PAT fields.  */
7171
    {
7172
      rtx link;
7173
      int check_no = 0;
7174
      rtx orig_pat = ORIG_PAT (insn);
7175
 
7176
      for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
7177
        {
7178
          rtx x = XEXP (link, 0);
7179
 
7180
          if (ORIG_PAT (x) == orig_pat)
7181
            check_no = spec_check_no[INSN_UID (x)];
7182
        }
7183
      gcc_assert (check_no);
7184
 
7185
      spec_check_no[INSN_UID (insn)] = (check_no
7186
                                        + SPEC_GEN_CHECK_MUTATION_OFFSET);
7187
    }
7188
 
7189
  check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7190
               (copy_rtx (recog_data.operand[0]), op1));
7191
 
7192
  pat = PATTERN (insn);
7193
  if (GET_CODE (pat) == COND_EXEC)
7194
    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7195
                                   check_pat);
7196
 
7197
  return check_pat;
7198
}
7199
 
7200
/* Return nonzero, if X is branchy recovery check.  */
7201
static int
7202
ia64_spec_check_p (rtx x)
7203
{
7204
  x = PATTERN (x);
7205
  if (GET_CODE (x) == COND_EXEC)
7206
    x = COND_EXEC_CODE (x);
7207
  if (GET_CODE (x) == SET)
7208
    return ia64_spec_check_src_p (SET_SRC (x));
7209
  return 0;
7210
}
7211
 
7212
/* Return nonzero, if SRC belongs to recovery check.  */
7213
static int
7214
ia64_spec_check_src_p (rtx src)
7215
{
7216
  if (GET_CODE (src) == IF_THEN_ELSE)
7217
    {
7218
      rtx t;
7219
 
7220
      t = XEXP (src, 0);
7221
      if (GET_CODE (t) == NE)
7222
        {
7223
          t = XEXP (t, 0);
7224
 
7225
          if (GET_CODE (t) == UNSPEC)
7226
            {
7227
              int code;
7228
 
7229
              code = XINT (t, 1);
7230
 
7231
              if (code == UNSPEC_CHKACLR
7232
                  || code == UNSPEC_CHKS
7233
                  || code == UNSPEC_LDCCLR)
7234
                {
7235
                  gcc_assert (code != 0);
7236
                  return code;
7237
                }
7238
            }
7239
        }
7240
    }
7241
  return 0;
7242
}
7243
 
7244
 
7245
/* The following page contains abstract data `bundle states' which are
7246
   used for bundling insns (inserting nops and template generation).  */
7247
 
7248
/* The following describes state of insn bundling.  */
7249
 
7250
struct bundle_state
7251
{
7252
  /* Unique bundle state number to identify them in the debugging
7253
     output  */
7254
  int unique_num;
7255
  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7256
  /* number nops before and after the insn  */
7257
  short before_nops_num, after_nops_num;
7258
  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7259
                   insn */
7260
  int cost;     /* cost of the state in cycles */
7261
  int accumulated_insns_num; /* number of all previous insns including
7262
                                nops.  L is considered as 2 insns */
7263
  int branch_deviation; /* deviation of previous branches from 3rd slots  */
7264
  struct bundle_state *next;  /* next state with the same insn_num  */
7265
  struct bundle_state *originator; /* originator (previous insn state)  */
7266
  /* All bundle states are in the following chain.  */
7267
  struct bundle_state *allocated_states_chain;
7268
  /* The DFA State after issuing the insn and the nops.  */
7269
  state_t dfa_state;
7270
};
7271
 
7272
/* The following is map insn number to the corresponding bundle state.  */
7273
 
7274
static struct bundle_state **index_to_bundle_states;
7275
 
7276
/* The unique number of next bundle state.  */
7277
 
7278
static int bundle_states_num;
7279
 
7280
/* All allocated bundle states are in the following chain.  */
7281
 
7282
static struct bundle_state *allocated_bundle_states_chain;
7283
 
7284
/* All allocated but not used bundle states are in the following
7285
   chain.  */
7286
 
7287
static struct bundle_state *free_bundle_state_chain;
7288
 
7289
 
7290
/* The following function returns a free bundle state.  */
7291
 
7292
static struct bundle_state *
7293
get_free_bundle_state (void)
7294
{
7295
  struct bundle_state *result;
7296
 
7297
  if (free_bundle_state_chain != NULL)
7298
    {
7299
      result = free_bundle_state_chain;
7300
      free_bundle_state_chain = result->next;
7301
    }
7302
  else
7303
    {
7304
      result = xmalloc (sizeof (struct bundle_state));
7305
      result->dfa_state = xmalloc (dfa_state_size);
7306
      result->allocated_states_chain = allocated_bundle_states_chain;
7307
      allocated_bundle_states_chain = result;
7308
    }
7309
  result->unique_num = bundle_states_num++;
7310
  return result;
7311
 
7312
}
7313
 
7314
/* The following function frees given bundle state.  */
7315
 
7316
static void
7317
free_bundle_state (struct bundle_state *state)
7318
{
7319
  state->next = free_bundle_state_chain;
7320
  free_bundle_state_chain = state;
7321
}
7322
 
7323
/* Start work with abstract data `bundle states'.  */
7324
 
7325
static void
7326
initiate_bundle_states (void)
7327
{
7328
  bundle_states_num = 0;
7329
  free_bundle_state_chain = NULL;
7330
  allocated_bundle_states_chain = NULL;
7331
}
7332
 
7333
/* Finish work with abstract data `bundle states'.  */
7334
 
7335
static void
7336
finish_bundle_states (void)
7337
{
7338
  struct bundle_state *curr_state, *next_state;
7339
 
7340
  for (curr_state = allocated_bundle_states_chain;
7341
       curr_state != NULL;
7342
       curr_state = next_state)
7343
    {
7344
      next_state = curr_state->allocated_states_chain;
7345
      free (curr_state->dfa_state);
7346
      free (curr_state);
7347
    }
7348
}
7349
 
7350
/* Hash table of the bundle states.  The key is dfa_state and insn_num
7351
   of the bundle states.  */
7352
 
7353
static htab_t bundle_state_table;
7354
 
7355
/* The function returns hash of BUNDLE_STATE.  */
7356
 
7357
static unsigned
7358
bundle_state_hash (const void *bundle_state)
7359
{
7360
  const struct bundle_state *state = (struct bundle_state *) bundle_state;
7361
  unsigned result, i;
7362
 
7363
  for (result = i = 0; i < dfa_state_size; i++)
7364
    result += (((unsigned char *) state->dfa_state) [i]
7365
               << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7366
  return result + state->insn_num;
7367
}
7368
 
7369
/* The function returns nonzero if the bundle state keys are equal.  */
7370
 
7371
static int
7372
bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7373
{
7374
  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
7375
  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
7376
 
7377
  return (state1->insn_num == state2->insn_num
7378
          && memcmp (state1->dfa_state, state2->dfa_state,
7379
                     dfa_state_size) == 0);
7380
}
7381
 
7382
/* The function inserts the BUNDLE_STATE into the hash table.  The
7383
   function returns nonzero if the bundle has been inserted into the
7384
   table.  The table contains the best bundle state with given key.  */
7385
 
7386
static int
7387
insert_bundle_state (struct bundle_state *bundle_state)
7388
{
7389
  void **entry_ptr;
7390
 
7391
  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7392
  if (*entry_ptr == NULL)
7393
    {
7394
      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7395
      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7396
      *entry_ptr = (void *) bundle_state;
7397
      return TRUE;
7398
    }
7399
  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7400
           || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7401
               && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7402
                   > bundle_state->accumulated_insns_num
7403
                   || (((struct bundle_state *)
7404
                        *entry_ptr)->accumulated_insns_num
7405
                       == bundle_state->accumulated_insns_num
7406
                       && ((struct bundle_state *)
7407
                           *entry_ptr)->branch_deviation
7408
                       > bundle_state->branch_deviation))))
7409
 
7410
    {
7411
      struct bundle_state temp;
7412
 
7413
      temp = *(struct bundle_state *) *entry_ptr;
7414
      *(struct bundle_state *) *entry_ptr = *bundle_state;
7415
      ((struct bundle_state *) *entry_ptr)->next = temp.next;
7416
      *bundle_state = temp;
7417
    }
7418
  return FALSE;
7419
}
7420
 
7421
/* Start work with the hash table.  */
7422
 
7423
static void
7424
initiate_bundle_state_table (void)
7425
{
7426
  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7427
                                    (htab_del) 0);
7428
}
7429
 
7430
/* Finish work with the hash table.  */
7431
 
7432
static void
7433
finish_bundle_state_table (void)
7434
{
7435
  htab_delete (bundle_state_table);
7436
}
7437
 
7438
 
7439
 
7440
/* The following variable is a insn `nop' used to check bundle states
7441
   with different number of inserted nops.  */
7442
 
7443
static rtx ia64_nop;
7444
 
7445
/* The following function tries to issue NOPS_NUM nops for the current
7446
   state without advancing processor cycle.  If it failed, the
7447
   function returns FALSE and frees the current state.  */
7448
 
7449
static int
7450
try_issue_nops (struct bundle_state *curr_state, int nops_num)
7451
{
7452
  int i;
7453
 
7454
  for (i = 0; i < nops_num; i++)
7455
    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7456
      {
7457
        free_bundle_state (curr_state);
7458
        return FALSE;
7459
      }
7460
  return TRUE;
7461
}
7462
 
7463
/* The following function tries to issue INSN for the current
7464
   state without advancing processor cycle.  If it failed, the
7465
   function returns FALSE and frees the current state.  */
7466
 
7467
static int
7468
try_issue_insn (struct bundle_state *curr_state, rtx insn)
7469
{
7470
  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7471
    {
7472
      free_bundle_state (curr_state);
7473
      return FALSE;
7474
    }
7475
  return TRUE;
7476
}
7477
 
7478
/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7479
   starting with ORIGINATOR without advancing processor cycle.  If
7480
   TRY_BUNDLE_END_P is TRUE, the function also/only (if
7481
   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7482
   If it was successful, the function creates new bundle state and
7483
   insert into the hash table and into `index_to_bundle_states'.  */
7484
 
7485
static void
7486
issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7487
                     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
7488
{
7489
  struct bundle_state *curr_state;
7490
 
7491
  curr_state = get_free_bundle_state ();
7492
  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7493
  curr_state->insn = insn;
7494
  curr_state->insn_num = originator->insn_num + 1;
7495
  curr_state->cost = originator->cost;
7496
  curr_state->originator = originator;
7497
  curr_state->before_nops_num = before_nops_num;
7498
  curr_state->after_nops_num = 0;
7499
  curr_state->accumulated_insns_num
7500
    = originator->accumulated_insns_num + before_nops_num;
7501
  curr_state->branch_deviation = originator->branch_deviation;
7502
  gcc_assert (insn);
7503
  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
7504
    {
7505
      gcc_assert (GET_MODE (insn) != TImode);
7506
      if (!try_issue_nops (curr_state, before_nops_num))
7507
        return;
7508
      if (!try_issue_insn (curr_state, insn))
7509
        return;
7510
      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7511
      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7512
          && curr_state->accumulated_insns_num % 3 != 0)
7513
        {
7514
          free_bundle_state (curr_state);
7515
          return;
7516
        }
7517
    }
7518
  else if (GET_MODE (insn) != TImode)
7519
    {
7520
      if (!try_issue_nops (curr_state, before_nops_num))
7521
        return;
7522
      if (!try_issue_insn (curr_state, insn))
7523
        return;
7524
      curr_state->accumulated_insns_num++;
7525
      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7526
                  && asm_noperands (PATTERN (insn)) < 0);
7527
 
7528
      if (ia64_safe_type (insn) == TYPE_L)
7529
        curr_state->accumulated_insns_num++;
7530
    }
7531
  else
7532
    {
7533
      /* If this is an insn that must be first in a group, then don't allow
7534
         nops to be emitted before it.  Currently, alloc is the only such
7535
         supported instruction.  */
7536
      /* ??? The bundling automatons should handle this for us, but they do
7537
         not yet have support for the first_insn attribute.  */
7538
      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7539
        {
7540
          free_bundle_state (curr_state);
7541
          return;
7542
        }
7543
 
7544
      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7545
      state_transition (curr_state->dfa_state, NULL);
7546
      curr_state->cost++;
7547
      if (!try_issue_nops (curr_state, before_nops_num))
7548
        return;
7549
      if (!try_issue_insn (curr_state, insn))
7550
        return;
7551
      curr_state->accumulated_insns_num++;
7552
      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7553
          || asm_noperands (PATTERN (insn)) >= 0)
7554
        {
7555
          /* Finish bundle containing asm insn.  */
7556
          curr_state->after_nops_num
7557
            = 3 - curr_state->accumulated_insns_num % 3;
7558
          curr_state->accumulated_insns_num
7559
            += 3 - curr_state->accumulated_insns_num % 3;
7560
        }
7561
      else if (ia64_safe_type (insn) == TYPE_L)
7562
        curr_state->accumulated_insns_num++;
7563
    }
7564
  if (ia64_safe_type (insn) == TYPE_B)
7565
    curr_state->branch_deviation
7566
      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7567
  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7568
    {
7569
      if (!only_bundle_end_p && insert_bundle_state (curr_state))
7570
        {
7571
          state_t dfa_state;
7572
          struct bundle_state *curr_state1;
7573
          struct bundle_state *allocated_states_chain;
7574
 
7575
          curr_state1 = get_free_bundle_state ();
7576
          dfa_state = curr_state1->dfa_state;
7577
          allocated_states_chain = curr_state1->allocated_states_chain;
7578
          *curr_state1 = *curr_state;
7579
          curr_state1->dfa_state = dfa_state;
7580
          curr_state1->allocated_states_chain = allocated_states_chain;
7581
          memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7582
                  dfa_state_size);
7583
          curr_state = curr_state1;
7584
        }
7585
      if (!try_issue_nops (curr_state,
7586
                           3 - curr_state->accumulated_insns_num % 3))
7587
        return;
7588
      curr_state->after_nops_num
7589
        = 3 - curr_state->accumulated_insns_num % 3;
7590
      curr_state->accumulated_insns_num
7591
        += 3 - curr_state->accumulated_insns_num % 3;
7592
    }
7593
  if (!insert_bundle_state (curr_state))
7594
    free_bundle_state (curr_state);
7595
  return;
7596
}
7597
 
7598
/* The following function returns position in the two window bundle
7599
   for given STATE.  */
7600
 
7601
static int
7602
get_max_pos (state_t state)
7603
{
7604
  if (cpu_unit_reservation_p (state, pos_6))
7605
    return 6;
7606
  else if (cpu_unit_reservation_p (state, pos_5))
7607
    return 5;
7608
  else if (cpu_unit_reservation_p (state, pos_4))
7609
    return 4;
7610
  else if (cpu_unit_reservation_p (state, pos_3))
7611
    return 3;
7612
  else if (cpu_unit_reservation_p (state, pos_2))
7613
    return 2;
7614
  else if (cpu_unit_reservation_p (state, pos_1))
7615
    return 1;
7616
  else
7617
    return 0;
7618
}
7619
 
7620
/* The function returns code of a possible template for given position
7621
   and state.  The function should be called only with 2 values of
7622
   position equal to 3 or 6.  We avoid generating F NOPs by putting
7623
   templates containing F insns at the end of the template search
7624
   because undocumented anomaly in McKinley derived cores which can
7625
   cause stalls if an F-unit insn (including a NOP) is issued within a
7626
   six-cycle window after reading certain application registers (such
7627
   as ar.bsp).  Furthermore, power-considerations also argue against
7628
   the use of F-unit instructions unless they're really needed.  */
7629
 
7630
static int
7631
get_template (state_t state, int pos)
7632
{
7633
  switch (pos)
7634
    {
7635
    case 3:
7636
      if (cpu_unit_reservation_p (state, _0mmi_))
7637
        return 1;
7638
      else if (cpu_unit_reservation_p (state, _0mii_))
7639
        return 0;
7640
      else if (cpu_unit_reservation_p (state, _0mmb_))
7641
        return 7;
7642
      else if (cpu_unit_reservation_p (state, _0mib_))
7643
        return 6;
7644
      else if (cpu_unit_reservation_p (state, _0mbb_))
7645
        return 5;
7646
      else if (cpu_unit_reservation_p (state, _0bbb_))
7647
        return 4;
7648
      else if (cpu_unit_reservation_p (state, _0mmf_))
7649
        return 3;
7650
      else if (cpu_unit_reservation_p (state, _0mfi_))
7651
        return 2;
7652
      else if (cpu_unit_reservation_p (state, _0mfb_))
7653
        return 8;
7654
      else if (cpu_unit_reservation_p (state, _0mlx_))
7655
        return 9;
7656
      else
7657
        gcc_unreachable ();
7658
    case 6:
7659
      if (cpu_unit_reservation_p (state, _1mmi_))
7660
        return 1;
7661
      else if (cpu_unit_reservation_p (state, _1mii_))
7662
        return 0;
7663
      else if (cpu_unit_reservation_p (state, _1mmb_))
7664
        return 7;
7665
      else if (cpu_unit_reservation_p (state, _1mib_))
7666
        return 6;
7667
      else if (cpu_unit_reservation_p (state, _1mbb_))
7668
        return 5;
7669
      else if (cpu_unit_reservation_p (state, _1bbb_))
7670
        return 4;
7671
      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7672
        return 3;
7673
      else if (cpu_unit_reservation_p (state, _1mfi_))
7674
        return 2;
7675
      else if (cpu_unit_reservation_p (state, _1mfb_))
7676
        return 8;
7677
      else if (cpu_unit_reservation_p (state, _1mlx_))
7678
        return 9;
7679
      else
7680
        gcc_unreachable ();
7681
    default:
7682
      gcc_unreachable ();
7683
    }
7684
}
7685
 
7686
/* The following function returns an insn important for insn bundling
7687
   followed by INSN and before TAIL.  */
7688
 
7689
static rtx
7690
get_next_important_insn (rtx insn, rtx tail)
7691
{
7692
  for (; insn && insn != tail; insn = NEXT_INSN (insn))
7693
    if (INSN_P (insn)
7694
        && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7695
        && GET_CODE (PATTERN (insn)) != USE
7696
        && GET_CODE (PATTERN (insn)) != CLOBBER)
7697
      return insn;
7698
  return NULL_RTX;
7699
}
7700
 
7701
/* Add a bundle selector TEMPLATE0 before INSN.  */
7702
 
7703
static void
7704
ia64_add_bundle_selector_before (int template0, rtx insn)
7705
{
7706
  rtx b = gen_bundle_selector (GEN_INT (template0));
7707
 
7708
  ia64_emit_insn_before (b, insn);
7709
#if NR_BUNDLES == 10
7710
  if ((template0 == 4 || template0 == 5)
7711
      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7712
    {
7713
      int i;
7714
      rtx note = NULL_RTX;
7715
 
7716
      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7717
         first or second slot.  If it is and has REG_EH_NOTE set, copy it
7718
         to following nops, as br.call sets rp to the address of following
7719
         bundle and therefore an EH region end must be on a bundle
7720
         boundary.  */
7721
      insn = PREV_INSN (insn);
7722
      for (i = 0; i < 3; i++)
7723
        {
7724
          do
7725
            insn = next_active_insn (insn);
7726
          while (GET_CODE (insn) == INSN
7727
                 && get_attr_empty (insn) == EMPTY_YES);
7728
          if (GET_CODE (insn) == CALL_INSN)
7729
            note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7730
          else if (note)
7731
            {
7732
              int code;
7733
 
7734
              gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7735
                          || code == CODE_FOR_nop_b);
7736
              if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7737
                note = NULL_RTX;
7738
              else
7739
                REG_NOTES (insn)
7740
                  = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7741
                                       REG_NOTES (insn));
7742
            }
7743
        }
7744
    }
7745
#endif
7746
}
7747
 
7748
/* The following function does insn bundling.  Bundling means
7749
   inserting templates and nop insns to fit insn groups into permitted
7750
   templates.  Instruction scheduling uses NDFA (non-deterministic
7751
   finite automata) encoding informations about the templates and the
7752
   inserted nops.  Nondeterminism of the automata permits follows
7753
   all possible insn sequences very fast.
7754
 
7755
   Unfortunately it is not possible to get information about inserting
7756
   nop insns and used templates from the automata states.  The
7757
   automata only says that we can issue an insn possibly inserting
7758
   some nops before it and using some template.  Therefore insn
7759
   bundling in this function is implemented by using DFA
7760
   (deterministic finite automata).  We follow all possible insn
7761
   sequences by inserting 0-2 nops (that is what the NDFA describe for
7762
   insn scheduling) before/after each insn being bundled.  We know the
7763
   start of simulated processor cycle from insn scheduling (insn
7764
   starting a new cycle has TImode).
7765
 
7766
   Simple implementation of insn bundling would create enormous
7767
   number of possible insn sequences satisfying information about new
7768
   cycle ticks taken from the insn scheduling.  To make the algorithm
7769
   practical we use dynamic programming.  Each decision (about
7770
   inserting nops and implicitly about previous decisions) is described
7771
   by structure bundle_state (see above).  If we generate the same
7772
   bundle state (key is automaton state after issuing the insns and
7773
   nops for it), we reuse already generated one.  As consequence we
7774
   reject some decisions which cannot improve the solution and
7775
   reduce memory for the algorithm.
7776
 
7777
   When we reach the end of EBB (extended basic block), we choose the
7778
   best sequence and then, moving back in EBB, insert templates for
7779
   the best alternative.  The templates are taken from querying
7780
   automaton state for each insn in chosen bundle states.
7781
 
7782
   So the algorithm makes two (forward and backward) passes through
7783
   EBB.  There is an additional forward pass through EBB for Itanium1
7784
   processor.  This pass inserts more nops to make dependency between
7785
   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
7786
 
7787
static void
7788
bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7789
{
7790
  struct bundle_state *curr_state, *next_state, *best_state;
7791
  rtx insn, next_insn;
7792
  int insn_num;
7793
  int i, bundle_end_p, only_bundle_end_p, asm_p;
7794
  int pos = 0, max_pos, template0, template1;
7795
  rtx b;
7796
  rtx nop;
7797
  enum attr_type type;
7798
 
7799
  insn_num = 0;
7800
  /* Count insns in the EBB.  */
7801
  for (insn = NEXT_INSN (prev_head_insn);
7802
       insn && insn != tail;
7803
       insn = NEXT_INSN (insn))
7804
    if (INSN_P (insn))
7805
      insn_num++;
7806
  if (insn_num == 0)
7807
    return;
7808
  bundling_p = 1;
7809
  dfa_clean_insn_cache ();
7810
  initiate_bundle_state_table ();
7811
  index_to_bundle_states = xmalloc ((insn_num + 2)
7812
                                    * sizeof (struct bundle_state *));
7813
  /* First (forward) pass -- generation of bundle states.  */
7814
  curr_state = get_free_bundle_state ();
7815
  curr_state->insn = NULL;
7816
  curr_state->before_nops_num = 0;
7817
  curr_state->after_nops_num = 0;
7818
  curr_state->insn_num = 0;
7819
  curr_state->cost = 0;
7820
  curr_state->accumulated_insns_num = 0;
7821
  curr_state->branch_deviation = 0;
7822
  curr_state->next = NULL;
7823
  curr_state->originator = NULL;
7824
  state_reset (curr_state->dfa_state);
7825
  index_to_bundle_states [0] = curr_state;
7826
  insn_num = 0;
7827
  /* Shift cycle mark if it is put on insn which could be ignored.  */
7828
  for (insn = NEXT_INSN (prev_head_insn);
7829
       insn != tail;
7830
       insn = NEXT_INSN (insn))
7831
    if (INSN_P (insn)
7832
        && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7833
            || GET_CODE (PATTERN (insn)) == USE
7834
            || GET_CODE (PATTERN (insn)) == CLOBBER)
7835
        && GET_MODE (insn) == TImode)
7836
      {
7837
        PUT_MODE (insn, VOIDmode);
7838
        for (next_insn = NEXT_INSN (insn);
7839
             next_insn != tail;
7840
             next_insn = NEXT_INSN (next_insn))
7841
          if (INSN_P (next_insn)
7842
              && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7843
              && GET_CODE (PATTERN (next_insn)) != USE
7844
              && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7845
            {
7846
              PUT_MODE (next_insn, TImode);
7847
              break;
7848
            }
7849
      }
7850
  /* Forward pass: generation of bundle states.  */
7851
  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7852
       insn != NULL_RTX;
7853
       insn = next_insn)
7854
    {
7855
      gcc_assert (INSN_P (insn)
7856
                  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7857
                  && GET_CODE (PATTERN (insn)) != USE
7858
                  && GET_CODE (PATTERN (insn)) != CLOBBER);
7859
      type = ia64_safe_type (insn);
7860
      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7861
      insn_num++;
7862
      index_to_bundle_states [insn_num] = NULL;
7863
      for (curr_state = index_to_bundle_states [insn_num - 1];
7864
           curr_state != NULL;
7865
           curr_state = next_state)
7866
        {
7867
          pos = curr_state->accumulated_insns_num % 3;
7868
          next_state = curr_state->next;
7869
          /* We must fill up the current bundle in order to start a
7870
             subsequent asm insn in a new bundle.  Asm insn is always
7871
             placed in a separate bundle.  */
7872
          only_bundle_end_p
7873
            = (next_insn != NULL_RTX
7874
               && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7875
               && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7876
          /* We may fill up the current bundle if it is the cycle end
7877
             without a group barrier.  */
7878
          bundle_end_p
7879
            = (only_bundle_end_p || next_insn == NULL_RTX
7880
               || (GET_MODE (next_insn) == TImode
7881
                   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7882
          if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7883
              || type == TYPE_S
7884
              /* We need to insert 2 nops for cases like M_MII.  To
7885
                 guarantee issuing all insns on the same cycle for
7886
                 Itanium 1, we need to issue 2 nops after the first M
7887
                 insn (MnnMII where n is a nop insn).  */
7888
              || ((type == TYPE_M || type == TYPE_A)
7889
                  && ia64_tune == PROCESSOR_ITANIUM
7890
                  && !bundle_end_p && pos == 1))
7891
            issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7892
                                 only_bundle_end_p);
7893
          issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7894
                               only_bundle_end_p);
7895
          issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7896
                               only_bundle_end_p);
7897
        }
7898
      gcc_assert (index_to_bundle_states [insn_num]);
7899
      for (curr_state = index_to_bundle_states [insn_num];
7900
           curr_state != NULL;
7901
           curr_state = curr_state->next)
7902
        if (verbose >= 2 && dump)
7903
          {
7904
            /* This structure is taken from generated code of the
7905
               pipeline hazard recognizer (see file insn-attrtab.c).
7906
               Please don't forget to change the structure if a new
7907
               automaton is added to .md file.  */
7908
            struct DFA_chip
7909
            {
7910
              unsigned short one_automaton_state;
7911
              unsigned short oneb_automaton_state;
7912
              unsigned short two_automaton_state;
7913
              unsigned short twob_automaton_state;
7914
            };
7915
 
7916
            fprintf
7917
              (dump,
7918
               "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7919
               curr_state->unique_num,
7920
               (curr_state->originator == NULL
7921
                ? -1 : curr_state->originator->unique_num),
7922
               curr_state->cost,
7923
               curr_state->before_nops_num, curr_state->after_nops_num,
7924
               curr_state->accumulated_insns_num, curr_state->branch_deviation,
7925
               (ia64_tune == PROCESSOR_ITANIUM
7926
                ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7927
                : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7928
               INSN_UID (insn));
7929
          }
7930
    }
7931
 
7932
  /* We should find a solution because the 2nd insn scheduling has
7933
     found one.  */
7934
  gcc_assert (index_to_bundle_states [insn_num]);
7935
  /* Find a state corresponding to the best insn sequence.  */
7936
  best_state = NULL;
7937
  for (curr_state = index_to_bundle_states [insn_num];
7938
       curr_state != NULL;
7939
       curr_state = curr_state->next)
7940
    /* We are just looking at the states with fully filled up last
7941
       bundle.  The first we prefer insn sequences with minimal cost
7942
       then with minimal inserted nops and finally with branch insns
7943
       placed in the 3rd slots.  */
7944
    if (curr_state->accumulated_insns_num % 3 == 0
7945
        && (best_state == NULL || best_state->cost > curr_state->cost
7946
            || (best_state->cost == curr_state->cost
7947
                && (curr_state->accumulated_insns_num
7948
                    < best_state->accumulated_insns_num
7949
                    || (curr_state->accumulated_insns_num
7950
                        == best_state->accumulated_insns_num
7951
                        && curr_state->branch_deviation
7952
                        < best_state->branch_deviation)))))
7953
      best_state = curr_state;
7954
  /* Second (backward) pass: adding nops and templates.  */
7955
  insn_num = best_state->before_nops_num;
7956
  template0 = template1 = -1;
7957
  for (curr_state = best_state;
7958
       curr_state->originator != NULL;
7959
       curr_state = curr_state->originator)
7960
    {
7961
      insn = curr_state->insn;
7962
      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7963
               || asm_noperands (PATTERN (insn)) >= 0);
7964
      insn_num++;
7965
      if (verbose >= 2 && dump)
7966
        {
7967
          struct DFA_chip
7968
          {
7969
            unsigned short one_automaton_state;
7970
            unsigned short oneb_automaton_state;
7971
            unsigned short two_automaton_state;
7972
            unsigned short twob_automaton_state;
7973
          };
7974
 
7975
          fprintf
7976
            (dump,
7977
             "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7978
             curr_state->unique_num,
7979
             (curr_state->originator == NULL
7980
              ? -1 : curr_state->originator->unique_num),
7981
             curr_state->cost,
7982
             curr_state->before_nops_num, curr_state->after_nops_num,
7983
             curr_state->accumulated_insns_num, curr_state->branch_deviation,
7984
             (ia64_tune == PROCESSOR_ITANIUM
7985
              ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7986
              : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7987
             INSN_UID (insn));
7988
        }
7989
      /* Find the position in the current bundle window.  The window can
7990
         contain at most two bundles.  Two bundle window means that
7991
         the processor will make two bundle rotation.  */
7992
      max_pos = get_max_pos (curr_state->dfa_state);
7993
      if (max_pos == 6
7994
          /* The following (negative template number) means that the
7995
             processor did one bundle rotation.  */
7996
          || (max_pos == 3 && template0 < 0))
7997
        {
7998
          /* We are at the end of the window -- find template(s) for
7999
             its bundle(s).  */
8000
          pos = max_pos;
8001
          if (max_pos == 3)
8002
            template0 = get_template (curr_state->dfa_state, 3);
8003
          else
8004
            {
8005
              template1 = get_template (curr_state->dfa_state, 3);
8006
              template0 = get_template (curr_state->dfa_state, 6);
8007
            }
8008
        }
8009
      if (max_pos > 3 && template1 < 0)
8010
        /* It may happen when we have the stop inside a bundle.  */
8011
        {
8012
          gcc_assert (pos <= 3);
8013
          template1 = get_template (curr_state->dfa_state, 3);
8014
          pos += 3;
8015
        }
8016
      if (!asm_p)
8017
        /* Emit nops after the current insn.  */
8018
        for (i = 0; i < curr_state->after_nops_num; i++)
8019
          {
8020
            nop = gen_nop ();
8021
            emit_insn_after (nop, insn);
8022
            pos--;
8023
            gcc_assert (pos >= 0);
8024
            if (pos % 3 == 0)
8025
              {
8026
                /* We are at the start of a bundle: emit the template
8027
                   (it should be defined).  */
8028
                gcc_assert (template0 >= 0);
8029
                ia64_add_bundle_selector_before (template0, nop);
8030
                /* If we have two bundle window, we make one bundle
8031
                   rotation.  Otherwise template0 will be undefined
8032
                   (negative value).  */
8033
                template0 = template1;
8034
                template1 = -1;
8035
              }
8036
          }
8037
      /* Move the position backward in the window.  Group barrier has
8038
         no slot.  Asm insn takes all bundle.  */
8039
      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8040
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
8041
          && asm_noperands (PATTERN (insn)) < 0)
8042
        pos--;
8043
      /* Long insn takes 2 slots.  */
8044
      if (ia64_safe_type (insn) == TYPE_L)
8045
        pos--;
8046
      gcc_assert (pos >= 0);
8047
      if (pos % 3 == 0
8048
          && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8049
          && GET_CODE (PATTERN (insn)) != ASM_INPUT
8050
          && asm_noperands (PATTERN (insn)) < 0)
8051
        {
8052
          /* The current insn is at the bundle start: emit the
8053
             template.  */
8054
          gcc_assert (template0 >= 0);
8055
          ia64_add_bundle_selector_before (template0, insn);
8056
          b = PREV_INSN (insn);
8057
          insn = b;
8058
          /* See comment above in analogous place for emitting nops
8059
             after the insn.  */
8060
          template0 = template1;
8061
          template1 = -1;
8062
        }
8063
      /* Emit nops after the current insn.  */
8064
      for (i = 0; i < curr_state->before_nops_num; i++)
8065
        {
8066
          nop = gen_nop ();
8067
          ia64_emit_insn_before (nop, insn);
8068
          nop = PREV_INSN (insn);
8069
          insn = nop;
8070
          pos--;
8071
          gcc_assert (pos >= 0);
8072
          if (pos % 3 == 0)
8073
            {
8074
              /* See comment above in analogous place for emitting nops
8075
                 after the insn.  */
8076
              gcc_assert (template0 >= 0);
8077
              ia64_add_bundle_selector_before (template0, insn);
8078
              b = PREV_INSN (insn);
8079
              insn = b;
8080
              template0 = template1;
8081
              template1 = -1;
8082
            }
8083
        }
8084
    }
8085
  if (ia64_tune == PROCESSOR_ITANIUM)
8086
    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8087
       Itanium1 has a strange design, if the distance between an insn
8088
       and dependent MM-insn is less 4 then we have a 6 additional
8089
       cycles stall.  So we make the distance equal to 4 cycles if it
8090
       is less.  */
8091
    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8092
         insn != NULL_RTX;
8093
         insn = next_insn)
8094
      {
8095
        gcc_assert (INSN_P (insn)
8096
                    && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8097
                    && GET_CODE (PATTERN (insn)) != USE
8098
                    && GET_CODE (PATTERN (insn)) != CLOBBER);
8099
        next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8100
        if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8101
          /* We found a MM-insn which needs additional cycles.  */
8102
          {
8103
            rtx last;
8104
            int i, j, n;
8105
            int pred_stop_p;
8106
 
8107
            /* Now we are searching for a template of the bundle in
8108
               which the MM-insn is placed and the position of the
8109
               insn in the bundle (0, 1, 2).  Also we are searching
8110
               for that there is a stop before the insn.  */
8111
            last = prev_active_insn (insn);
8112
            pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8113
            if (pred_stop_p)
8114
              last = prev_active_insn (last);
8115
            n = 0;
8116
            for (;; last = prev_active_insn (last))
8117
              if (recog_memoized (last) == CODE_FOR_bundle_selector)
8118
                {
8119
                  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8120
                  if (template0 == 9)
8121
                    /* The insn is in MLX bundle.  Change the template
8122
                       onto MFI because we will add nops before the
8123
                       insn.  It simplifies subsequent code a lot.  */
8124
                    PATTERN (last)
8125
                      = gen_bundle_selector (const2_rtx); /* -> MFI */
8126
                  break;
8127
                }
8128
              else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8129
                       && (ia64_safe_itanium_class (last)
8130
                           != ITANIUM_CLASS_IGNORE))
8131
                n++;
8132
            /* Some check of correctness: the stop is not at the
8133
               bundle start, there are no more 3 insns in the bundle,
8134
               and the MM-insn is not at the start of bundle with
8135
               template MLX.  */
8136
            gcc_assert ((!pred_stop_p || n)
8137
                        && n <= 2
8138
                        && (template0 != 9 || !n));
8139
            /* Put nops after the insn in the bundle.  */
8140
            for (j = 3 - n; j > 0; j --)
8141
              ia64_emit_insn_before (gen_nop (), insn);
8142
            /* It takes into account that we will add more N nops
8143
               before the insn lately -- please see code below.  */
8144
            add_cycles [INSN_UID (insn)]--;
8145
            if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8146
              ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8147
                                     insn);
8148
            if (pred_stop_p)
8149
              add_cycles [INSN_UID (insn)]--;
8150
            for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8151
              {
8152
                /* Insert "MII;" template.  */
8153
                ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8154
                                       insn);
8155
                ia64_emit_insn_before (gen_nop (), insn);
8156
                ia64_emit_insn_before (gen_nop (), insn);
8157
                if (i > 1)
8158
                  {
8159
                    /* To decrease code size, we use "MI;I;"
8160
                       template.  */
8161
                    ia64_emit_insn_before
8162
                      (gen_insn_group_barrier (GEN_INT (3)), insn);
8163
                    i--;
8164
                  }
8165
                ia64_emit_insn_before (gen_nop (), insn);
8166
                ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8167
                                       insn);
8168
              }
8169
            /* Put the MM-insn in the same slot of a bundle with the
8170
               same template as the original one.  */
8171
            ia64_add_bundle_selector_before (template0, insn);
8172
            /* To put the insn in the same slot, add necessary number
8173
               of nops.  */
8174
            for (j = n; j > 0; j --)
8175
              ia64_emit_insn_before (gen_nop (), insn);
8176
            /* Put the stop if the original bundle had it.  */
8177
            if (pred_stop_p)
8178
              ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8179
                                     insn);
8180
          }
8181
      }
8182
  free (index_to_bundle_states);
8183
  finish_bundle_state_table ();
8184
  bundling_p = 0;
8185
  dfa_clean_insn_cache ();
8186
}
8187
 
8188
/* The following function is called at the end of scheduling BB or
8189
   EBB.  After reload, it inserts stop bits and does insn bundling.  */
8190
 
8191
static void
8192
ia64_sched_finish (FILE *dump, int sched_verbose)
8193
{
8194
  if (sched_verbose)
8195
    fprintf (dump, "// Finishing schedule.\n");
8196
  if (!reload_completed)
8197
    return;
8198
  if (reload_completed)
8199
    {
8200
      final_emit_insn_group_barriers (dump);
8201
      bundling (dump, sched_verbose, current_sched_info->prev_head,
8202
                current_sched_info->next_tail);
8203
      if (sched_verbose && dump)
8204
        fprintf (dump, "//    finishing %d-%d\n",
8205
                 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8206
                 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8207
 
8208
      return;
8209
    }
8210
}
8211
 
8212
/* The following function inserts stop bits in scheduled BB or EBB.  */
8213
 
8214
static void
8215
final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8216
{
8217
  rtx insn;
8218
  int need_barrier_p = 0;
8219
  rtx prev_insn = NULL_RTX;
8220
 
8221
  init_insn_group_barriers ();
8222
 
8223
  for (insn = NEXT_INSN (current_sched_info->prev_head);
8224
       insn != current_sched_info->next_tail;
8225
       insn = NEXT_INSN (insn))
8226
    {
8227
      if (GET_CODE (insn) == BARRIER)
8228
        {
8229
          rtx last = prev_active_insn (insn);
8230
 
8231
          if (! last)
8232
            continue;
8233
          if (GET_CODE (last) == JUMP_INSN
8234
              && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8235
            last = prev_active_insn (last);
8236
          if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8237
            emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8238
 
8239
          init_insn_group_barriers ();
8240
          need_barrier_p = 0;
8241
          prev_insn = NULL_RTX;
8242
        }
8243
      else if (INSN_P (insn))
8244
        {
8245
          if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8246
            {
8247
              init_insn_group_barriers ();
8248
              need_barrier_p = 0;
8249
              prev_insn = NULL_RTX;
8250
            }
8251
          else if (need_barrier_p || group_barrier_needed (insn))
8252
            {
8253
              if (TARGET_EARLY_STOP_BITS)
8254
                {
8255
                  rtx last;
8256
 
8257
                  for (last = insn;
8258
                       last != current_sched_info->prev_head;
8259
                       last = PREV_INSN (last))
8260
                    if (INSN_P (last) && GET_MODE (last) == TImode
8261
                        && stops_p [INSN_UID (last)])
8262
                      break;
8263
                  if (last == current_sched_info->prev_head)
8264
                    last = insn;
8265
                  last = prev_active_insn (last);
8266
                  if (last
8267
                      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8268
                    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8269
                                     last);
8270
                  init_insn_group_barriers ();
8271
                  for (last = NEXT_INSN (last);
8272
                       last != insn;
8273
                       last = NEXT_INSN (last))
8274
                    if (INSN_P (last))
8275
                      group_barrier_needed (last);
8276
                }
8277
              else
8278
                {
8279
                  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8280
                                    insn);
8281
                  init_insn_group_barriers ();
8282
                }
8283
              group_barrier_needed (insn);
8284
              prev_insn = NULL_RTX;
8285
            }
8286
          else if (recog_memoized (insn) >= 0)
8287
            prev_insn = insn;
8288
          need_barrier_p = (GET_CODE (insn) == CALL_INSN
8289
                            || GET_CODE (PATTERN (insn)) == ASM_INPUT
8290
                            || asm_noperands (PATTERN (insn)) >= 0);
8291
        }
8292
    }
8293
}
8294
 
8295
 
8296
 
8297
/* If the following function returns TRUE, we will use the DFA
8298
   insn scheduler.  */
8299
 
8300
static int
8301
ia64_first_cycle_multipass_dfa_lookahead (void)
8302
{
8303
  return (reload_completed ? 6 : 4);
8304
}
8305
 
8306
/* The following function initiates variable `dfa_pre_cycle_insn'.  */
8307
 
8308
static void
8309
ia64_init_dfa_pre_cycle_insn (void)
8310
{
8311
  if (temp_dfa_state == NULL)
8312
    {
8313
      dfa_state_size = state_size ();
8314
      temp_dfa_state = xmalloc (dfa_state_size);
8315
      prev_cycle_state = xmalloc (dfa_state_size);
8316
    }
8317
  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8318
  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8319
  recog_memoized (dfa_pre_cycle_insn);
8320
  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8321
  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8322
  recog_memoized (dfa_stop_insn);
8323
}
8324
 
8325
/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8326
   used by the DFA insn scheduler.  */
8327
 
8328
static rtx
8329
ia64_dfa_pre_cycle_insn (void)
8330
{
8331
  return dfa_pre_cycle_insn;
8332
}
8333
 
8334
/* The following function returns TRUE if PRODUCER (of type ilog or
8335
   ld) produces address for CONSUMER (of type st or stf). */
8336
 
8337
int
8338
ia64_st_address_bypass_p (rtx producer, rtx consumer)
8339
{
8340
  rtx dest, reg, mem;
8341
 
8342
  gcc_assert (producer && consumer);
8343
  dest = ia64_single_set (producer);
8344
  gcc_assert (dest);
8345
  reg = SET_DEST (dest);
8346
  gcc_assert (reg);
8347
  if (GET_CODE (reg) == SUBREG)
8348
    reg = SUBREG_REG (reg);
8349
  gcc_assert (GET_CODE (reg) == REG);
8350
 
8351
  dest = ia64_single_set (consumer);
8352
  gcc_assert (dest);
8353
  mem = SET_DEST (dest);
8354
  gcc_assert (mem && GET_CODE (mem) == MEM);
8355
  return reg_mentioned_p (reg, mem);
8356
}
8357
 
8358
/* The following function returns TRUE if PRODUCER (of type ilog or
8359
   ld) produces address for CONSUMER (of type ld or fld). */
8360
 
8361
int
8362
ia64_ld_address_bypass_p (rtx producer, rtx consumer)
8363
{
8364
  rtx dest, src, reg, mem;
8365
 
8366
  gcc_assert (producer && consumer);
8367
  dest = ia64_single_set (producer);
8368
  gcc_assert (dest);
8369
  reg = SET_DEST (dest);
8370
  gcc_assert (reg);
8371
  if (GET_CODE (reg) == SUBREG)
8372
    reg = SUBREG_REG (reg);
8373
  gcc_assert (GET_CODE (reg) == REG);
8374
 
8375
  src = ia64_single_set (consumer);
8376
  gcc_assert (src);
8377
  mem = SET_SRC (src);
8378
  gcc_assert (mem);
8379
 
8380
  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8381
    mem = XVECEXP (mem, 0, 0);
8382
  else if (GET_CODE (mem) == IF_THEN_ELSE)
8383
    /* ??? Is this bypass necessary for ld.c?  */
8384
    {
8385
      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8386
      mem = XEXP (mem, 1);
8387
    }
8388
 
8389
  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8390
    mem = XEXP (mem, 0);
8391
 
8392
  if (GET_CODE (mem) == UNSPEC)
8393
    {
8394
      int c = XINT (mem, 1);
8395
 
8396
      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8397
      mem = XVECEXP (mem, 0, 0);
8398
    }
8399
 
8400
  /* Note that LO_SUM is used for GOT loads.  */
8401
  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
8402
 
8403
  return reg_mentioned_p (reg, mem);
8404
}
8405
 
8406
/* The following function returns TRUE if INSN produces address for a
8407
   load/store insn.  We will place such insns into M slot because it
8408
   decreases its latency time.  */
8409
 
8410
int
8411
ia64_produce_address_p (rtx insn)
8412
{
8413
  return insn->call;
8414
}
8415
 
8416
 
8417
/* Emit pseudo-ops for the assembler to describe predicate relations.
8418
   At present this assumes that we only consider predicate pairs to
8419
   be mutex, and that the assembler can deduce proper values from
8420
   straight-line code.  */
8421
 
8422
static void
8423
emit_predicate_relation_info (void)
8424
{
8425
  basic_block bb;
8426
 
8427
  FOR_EACH_BB_REVERSE (bb)
8428
    {
8429
      int r;
8430
      rtx head = BB_HEAD (bb);
8431
 
8432
      /* We only need such notes at code labels.  */
8433
      if (GET_CODE (head) != CODE_LABEL)
8434
        continue;
8435
      if (GET_CODE (NEXT_INSN (head)) == NOTE
8436
          && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
8437
        head = NEXT_INSN (head);
8438
 
8439
      /* Skip p0, which may be thought to be live due to (reg:DI p0)
8440
         grabbing the entire block of predicate registers.  */
8441
      for (r = PR_REG (2); r < PR_REG (64); r += 2)
8442
        if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
8443
          {
8444
            rtx p = gen_rtx_REG (BImode, r);
8445
            rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
8446
            if (head == BB_END (bb))
8447
              BB_END (bb) = n;
8448
            head = n;
8449
          }
8450
    }
8451
 
8452
  /* Look for conditional calls that do not return, and protect predicate
8453
     relations around them.  Otherwise the assembler will assume the call
8454
     returns, and complain about uses of call-clobbered predicates after
8455
     the call.  */
8456
  FOR_EACH_BB_REVERSE (bb)
8457
    {
8458
      rtx insn = BB_HEAD (bb);
8459
 
8460
      while (1)
8461
        {
8462
          if (GET_CODE (insn) == CALL_INSN
8463
              && GET_CODE (PATTERN (insn)) == COND_EXEC
8464
              && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8465
            {
8466
              rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8467
              rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
8468
              if (BB_HEAD (bb) == insn)
8469
                BB_HEAD (bb) = b;
8470
              if (BB_END (bb) == insn)
8471
                BB_END (bb) = a;
8472
            }
8473
 
8474
          if (insn == BB_END (bb))
8475
            break;
8476
          insn = NEXT_INSN (insn);
8477
        }
8478
    }
8479
}
8480
 
8481
/* Perform machine dependent operations on the rtl chain INSNS.  */
8482
 
8483
static void
8484
ia64_reorg (void)
8485
{
8486
  /* We are freeing block_for_insn in the toplev to keep compatibility
8487
     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
8488
  compute_bb_for_insn ();
8489
 
8490
  /* If optimizing, we'll have split before scheduling.  */
8491
  if (optimize == 0)
8492
    split_all_insns (0);
8493
 
8494
  /* ??? update_life_info_in_dirty_blocks fails to terminate during
8495
     non-optimizing bootstrap.  */
8496
  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
8497
 
8498
  if (optimize && ia64_flag_schedule_insns2)
8499
    {
8500
      timevar_push (TV_SCHED2);
8501
      ia64_final_schedule = 1;
8502
 
8503
      initiate_bundle_states ();
8504
      ia64_nop = make_insn_raw (gen_nop ());
8505
      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8506
      recog_memoized (ia64_nop);
8507
      clocks_length = get_max_uid () + 1;
8508
      stops_p = xcalloc (1, clocks_length);
8509
      if (ia64_tune == PROCESSOR_ITANIUM)
8510
        {
8511
          clocks = xcalloc (clocks_length, sizeof (int));
8512
          add_cycles = xcalloc (clocks_length, sizeof (int));
8513
        }
8514
      if (ia64_tune == PROCESSOR_ITANIUM2)
8515
        {
8516
          pos_1 = get_cpu_unit_code ("2_1");
8517
          pos_2 = get_cpu_unit_code ("2_2");
8518
          pos_3 = get_cpu_unit_code ("2_3");
8519
          pos_4 = get_cpu_unit_code ("2_4");
8520
          pos_5 = get_cpu_unit_code ("2_5");
8521
          pos_6 = get_cpu_unit_code ("2_6");
8522
          _0mii_ = get_cpu_unit_code ("2b_0mii.");
8523
          _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8524
          _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8525
          _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8526
          _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8527
          _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8528
          _0mib_ = get_cpu_unit_code ("2b_0mib.");
8529
          _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8530
          _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8531
          _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8532
          _1mii_ = get_cpu_unit_code ("2b_1mii.");
8533
          _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8534
          _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8535
          _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8536
          _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8537
          _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8538
          _1mib_ = get_cpu_unit_code ("2b_1mib.");
8539
          _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8540
          _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8541
          _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8542
        }
8543
      else
8544
        {
8545
          pos_1 = get_cpu_unit_code ("1_1");
8546
          pos_2 = get_cpu_unit_code ("1_2");
8547
          pos_3 = get_cpu_unit_code ("1_3");
8548
          pos_4 = get_cpu_unit_code ("1_4");
8549
          pos_5 = get_cpu_unit_code ("1_5");
8550
          pos_6 = get_cpu_unit_code ("1_6");
8551
          _0mii_ = get_cpu_unit_code ("1b_0mii.");
8552
          _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8553
          _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8554
          _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8555
          _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8556
          _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8557
          _0mib_ = get_cpu_unit_code ("1b_0mib.");
8558
          _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8559
          _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8560
          _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8561
          _1mii_ = get_cpu_unit_code ("1b_1mii.");
8562
          _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8563
          _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8564
          _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8565
          _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8566
          _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8567
          _1mib_ = get_cpu_unit_code ("1b_1mib.");
8568
          _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8569
          _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8570
          _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8571
        }
8572
      schedule_ebbs ();
8573
      finish_bundle_states ();
8574
      if (ia64_tune == PROCESSOR_ITANIUM)
8575
        {
8576
          free (add_cycles);
8577
          free (clocks);
8578
        }
8579
      free (stops_p);
8580
      stops_p = NULL;
8581
      emit_insn_group_barriers (dump_file);
8582
 
8583
      ia64_final_schedule = 0;
8584
      timevar_pop (TV_SCHED2);
8585
    }
8586
  else
8587
    emit_all_insn_group_barriers (dump_file);
8588
 
8589
  /* A call must not be the last instruction in a function, so that the
8590
     return address is still within the function, so that unwinding works
8591
     properly.  Note that IA-64 differs from dwarf2 on this point.  */
8592
  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8593
    {
8594
      rtx insn;
8595
      int saw_stop = 0;
8596
 
8597
      insn = get_last_insn ();
8598
      if (! INSN_P (insn))
8599
        insn = prev_active_insn (insn);
8600
      /* Skip over insns that expand to nothing.  */
8601
      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8602
        {
8603
          if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8604
              && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8605
            saw_stop = 1;
8606
          insn = prev_active_insn (insn);
8607
        }
8608
      if (GET_CODE (insn) == CALL_INSN)
8609
        {
8610
          if (! saw_stop)
8611
            emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8612
          emit_insn (gen_break_f ());
8613
          emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8614
        }
8615
    }
8616
 
8617
  emit_predicate_relation_info ();
8618
 
8619
  if (ia64_flag_var_tracking)
8620
    {
8621
      timevar_push (TV_VAR_TRACKING);
8622
      variable_tracking_main ();
8623
      timevar_pop (TV_VAR_TRACKING);
8624
    }
8625
}
8626
 
8627
/* Return true if REGNO is used by the epilogue.  */
8628
 
8629
int
8630
ia64_epilogue_uses (int regno)
8631
{
8632
  switch (regno)
8633
    {
8634
    case R_GR (1):
8635
      /* With a call to a function in another module, we will write a new
8636
         value to "gp".  After returning from such a call, we need to make
8637
         sure the function restores the original gp-value, even if the
8638
         function itself does not use the gp anymore.  */
8639
      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
8640
 
8641
    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8642
    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8643
      /* For functions defined with the syscall_linkage attribute, all
8644
         input registers are marked as live at all function exits.  This
8645
         prevents the register allocator from using the input registers,
8646
         which in turn makes it possible to restart a system call after
8647
         an interrupt without having to save/restore the input registers.
8648
         This also prevents kernel data from leaking to application code.  */
8649
      return lookup_attribute ("syscall_linkage",
8650
           TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8651
 
8652
    case R_BR (0):
8653
      /* Conditional return patterns can't represent the use of `b0' as
8654
         the return address, so we force the value live this way.  */
8655
      return 1;
8656
 
8657
    case AR_PFS_REGNUM:
8658
      /* Likewise for ar.pfs, which is used by br.ret.  */
8659
      return 1;
8660
 
8661
    default:
8662
      return 0;
8663
    }
8664
}
8665
 
8666
/* Return true if REGNO is used by the frame unwinder.  */
8667
 
8668
int
8669
ia64_eh_uses (int regno)
8670
{
8671
  if (! reload_completed)
8672
    return 0;
8673
 
8674
  if (current_frame_info.reg_save_b0
8675
      && regno == current_frame_info.reg_save_b0)
8676
    return 1;
8677
  if (current_frame_info.reg_save_pr
8678
      && regno == current_frame_info.reg_save_pr)
8679
    return 1;
8680
  if (current_frame_info.reg_save_ar_pfs
8681
      && regno == current_frame_info.reg_save_ar_pfs)
8682
    return 1;
8683
  if (current_frame_info.reg_save_ar_unat
8684
      && regno == current_frame_info.reg_save_ar_unat)
8685
    return 1;
8686
  if (current_frame_info.reg_save_ar_lc
8687
      && regno == current_frame_info.reg_save_ar_lc)
8688
    return 1;
8689
 
8690
  return 0;
8691
}
8692
 
8693
/* Return true if this goes in small data/bss.  */
8694
 
8695
/* ??? We could also support own long data here.  Generating movl/add/ld8
8696
   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
8697
   code faster because there is one less load.  This also includes incomplete
8698
   types which can't go in sdata/sbss.  */
8699
 
8700
static bool
8701
ia64_in_small_data_p (tree exp)
8702
{
8703
  if (TARGET_NO_SDATA)
8704
    return false;
8705
 
8706
  /* We want to merge strings, so we never consider them small data.  */
8707
  if (TREE_CODE (exp) == STRING_CST)
8708
    return false;
8709
 
8710
  /* Functions are never small data.  */
8711
  if (TREE_CODE (exp) == FUNCTION_DECL)
8712
    return false;
8713
 
8714
  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8715
    {
8716
      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8717
 
8718
      if (strcmp (section, ".sdata") == 0
8719
          || strncmp (section, ".sdata.", 7) == 0
8720
          || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8721
          || strcmp (section, ".sbss") == 0
8722
          || strncmp (section, ".sbss.", 6) == 0
8723
          || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8724
        return true;
8725
    }
8726
  else
8727
    {
8728
      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8729
 
8730
      /* If this is an incomplete type with size 0, then we can't put it
8731
         in sdata because it might be too big when completed.  */
8732
      if (size > 0 && size <= ia64_section_threshold)
8733
        return true;
8734
    }
8735
 
8736
  return false;
8737
}
8738
 
8739
/* Output assembly directives for prologue regions.  */
8740
 
8741
/* The current basic block number.  */
8742
 
8743
static bool last_block;
8744
 
8745
/* True if we need a copy_state command at the start of the next block.  */
8746
 
8747
static bool need_copy_state;
8748
 
8749
#ifndef MAX_ARTIFICIAL_LABEL_BYTES
8750
# define MAX_ARTIFICIAL_LABEL_BYTES 30
8751
#endif
8752
 
8753
/* Emit a debugging label after a call-frame-related insn.  We'd
8754
   rather output the label right away, but we'd have to output it
8755
   after, not before, the instruction, and the instruction has not
8756
   been output yet.  So we emit the label after the insn, delete it to
8757
   avoid introducing basic blocks, and mark it as preserved, such that
8758
   it is still output, given that it is referenced in debug info.  */
8759
 
8760
static const char *
8761
ia64_emit_deleted_label_after_insn (rtx insn)
8762
{
8763
  char label[MAX_ARTIFICIAL_LABEL_BYTES];
8764
  rtx lb = gen_label_rtx ();
8765
  rtx label_insn = emit_label_after (lb, insn);
8766
 
8767
  LABEL_PRESERVE_P (lb) = 1;
8768
 
8769
  delete_insn (label_insn);
8770
 
8771
  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8772
 
8773
  return xstrdup (label);
8774
}
8775
 
8776
/* Define the CFA after INSN with the steady-state definition.  */
8777
 
8778
static void
8779
ia64_dwarf2out_def_steady_cfa (rtx insn)
8780
{
8781
  rtx fp = frame_pointer_needed
8782
    ? hard_frame_pointer_rtx
8783
    : stack_pointer_rtx;
8784
 
8785
  dwarf2out_def_cfa
8786
    (ia64_emit_deleted_label_after_insn (insn),
8787
     REGNO (fp),
8788
     ia64_initial_elimination_offset
8789
     (REGNO (arg_pointer_rtx), REGNO (fp))
8790
     + ARG_POINTER_CFA_OFFSET (current_function_decl));
8791
}
8792
 
8793
/* The generic dwarf2 frame debug info generator does not define a
8794
   separate region for the very end of the epilogue, so refrain from
8795
   doing so in the IA64-specific code as well.  */
8796
 
8797
#define IA64_CHANGE_CFA_IN_EPILOGUE 0
8798
 
8799
/* The function emits unwind directives for the start of an epilogue.  */
8800
 
8801
static void
8802
process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8803
{
8804
  /* If this isn't the last block of the function, then we need to label the
8805
     current state, and copy it back in at the start of the next block.  */
8806
 
8807
  if (!last_block)
8808
    {
8809
      if (unwind)
8810
        fprintf (asm_out_file, "\t.label_state %d\n",
8811
                 ++cfun->machine->state_num);
8812
      need_copy_state = true;
8813
    }
8814
 
8815
  if (unwind)
8816
    fprintf (asm_out_file, "\t.restore sp\n");
8817
  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8818
    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8819
                       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8820
}
8821
 
8822
/* This function processes a SET pattern looking for specific patterns
8823
   which result in emitting an assembly directive required for unwinding.  */
8824
 
8825
static int
8826
process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8827
{
8828
  rtx src = SET_SRC (pat);
8829
  rtx dest = SET_DEST (pat);
8830
  int src_regno, dest_regno;
8831
 
8832
  /* Look for the ALLOC insn.  */
8833
  if (GET_CODE (src) == UNSPEC_VOLATILE
8834
      && XINT (src, 1) == UNSPECV_ALLOC
8835
      && GET_CODE (dest) == REG)
8836
    {
8837
      dest_regno = REGNO (dest);
8838
 
8839
      /* If this is the final destination for ar.pfs, then this must
8840
         be the alloc in the prologue.  */
8841
      if (dest_regno == current_frame_info.reg_save_ar_pfs)
8842
        {
8843
          if (unwind)
8844
            fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8845
                     ia64_dbx_register_number (dest_regno));
8846
        }
8847
      else
8848
        {
8849
          /* This must be an alloc before a sibcall.  We must drop the
8850
             old frame info.  The easiest way to drop the old frame
8851
             info is to ensure we had a ".restore sp" directive
8852
             followed by a new prologue.  If the procedure doesn't
8853
             have a memory-stack frame, we'll issue a dummy ".restore
8854
             sp" now.  */
8855
          if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8856
            /* if haven't done process_epilogue() yet, do it now */
8857
            process_epilogue (asm_out_file, insn, unwind, frame);
8858
          if (unwind)
8859
            fprintf (asm_out_file, "\t.prologue\n");
8860
        }
8861
      return 1;
8862
    }
8863
 
8864
  /* Look for SP = ....  */
8865
  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8866
    {
8867
      if (GET_CODE (src) == PLUS)
8868
        {
8869
          rtx op0 = XEXP (src, 0);
8870
          rtx op1 = XEXP (src, 1);
8871
 
8872
          gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8873
 
8874
          if (INTVAL (op1) < 0)
8875
            {
8876
              gcc_assert (!frame_pointer_needed);
8877
              if (unwind)
8878
                fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8879
                         -INTVAL (op1));
8880
              if (frame)
8881
                ia64_dwarf2out_def_steady_cfa (insn);
8882
            }
8883
          else
8884
            process_epilogue (asm_out_file, insn, unwind, frame);
8885
        }
8886
      else
8887
        {
8888
          gcc_assert (GET_CODE (src) == REG
8889
                      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
8890
          process_epilogue (asm_out_file, insn, unwind, frame);
8891
        }
8892
 
8893
      return 1;
8894
    }
8895
 
8896
  /* Register move we need to look at.  */
8897
  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8898
    {
8899
      src_regno = REGNO (src);
8900
      dest_regno = REGNO (dest);
8901
 
8902
      switch (src_regno)
8903
        {
8904
        case BR_REG (0):
8905
          /* Saving return address pointer.  */
8906
          gcc_assert (dest_regno == current_frame_info.reg_save_b0);
8907
          if (unwind)
8908
            fprintf (asm_out_file, "\t.save rp, r%d\n",
8909
                     ia64_dbx_register_number (dest_regno));
8910
          return 1;
8911
 
8912
        case PR_REG (0):
8913
          gcc_assert (dest_regno == current_frame_info.reg_save_pr);
8914
          if (unwind)
8915
            fprintf (asm_out_file, "\t.save pr, r%d\n",
8916
                     ia64_dbx_register_number (dest_regno));
8917
          return 1;
8918
 
8919
        case AR_UNAT_REGNUM:
8920
          gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
8921
          if (unwind)
8922
            fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8923
                     ia64_dbx_register_number (dest_regno));
8924
          return 1;
8925
 
8926
        case AR_LC_REGNUM:
8927
          gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
8928
          if (unwind)
8929
            fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8930
                     ia64_dbx_register_number (dest_regno));
8931
          return 1;
8932
 
8933
        case STACK_POINTER_REGNUM:
8934
          gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8935
                      && frame_pointer_needed);
8936
          if (unwind)
8937
            fprintf (asm_out_file, "\t.vframe r%d\n",
8938
                     ia64_dbx_register_number (dest_regno));
8939
          if (frame)
8940
            ia64_dwarf2out_def_steady_cfa (insn);
8941
          return 1;
8942
 
8943
        default:
8944
          /* Everything else should indicate being stored to memory.  */
8945
          gcc_unreachable ();
8946
        }
8947
    }
8948
 
8949
  /* Memory store we need to look at.  */
8950
  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
8951
    {
8952
      long off;
8953
      rtx base;
8954
      const char *saveop;
8955
 
8956
      if (GET_CODE (XEXP (dest, 0)) == REG)
8957
        {
8958
          base = XEXP (dest, 0);
8959
          off = 0;
8960
        }
8961
      else
8962
        {
8963
          gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8964
                      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
8965
          base = XEXP (XEXP (dest, 0), 0);
8966
          off = INTVAL (XEXP (XEXP (dest, 0), 1));
8967
        }
8968
 
8969
      if (base == hard_frame_pointer_rtx)
8970
        {
8971
          saveop = ".savepsp";
8972
          off = - off;
8973
        }
8974
      else
8975
        {
8976
          gcc_assert (base == stack_pointer_rtx);
8977
          saveop = ".savesp";
8978
        }
8979
 
8980
      src_regno = REGNO (src);
8981
      switch (src_regno)
8982
        {
8983
        case BR_REG (0):
8984
          gcc_assert (!current_frame_info.reg_save_b0);
8985
          if (unwind)
8986
            fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
8987
          return 1;
8988
 
8989
        case PR_REG (0):
8990
          gcc_assert (!current_frame_info.reg_save_pr);
8991
          if (unwind)
8992
            fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
8993
          return 1;
8994
 
8995
        case AR_LC_REGNUM:
8996
          gcc_assert (!current_frame_info.reg_save_ar_lc);
8997
          if (unwind)
8998
            fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
8999
          return 1;
9000
 
9001
        case AR_PFS_REGNUM:
9002
          gcc_assert (!current_frame_info.reg_save_ar_pfs);
9003
          if (unwind)
9004
            fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9005
          return 1;
9006
 
9007
        case AR_UNAT_REGNUM:
9008
          gcc_assert (!current_frame_info.reg_save_ar_unat);
9009
          if (unwind)
9010
            fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9011
          return 1;
9012
 
9013
        case GR_REG (4):
9014
        case GR_REG (5):
9015
        case GR_REG (6):
9016
        case GR_REG (7):
9017
          if (unwind)
9018
            fprintf (asm_out_file, "\t.save.g 0x%x\n",
9019
                     1 << (src_regno - GR_REG (4)));
9020
          return 1;
9021
 
9022
        case BR_REG (1):
9023
        case BR_REG (2):
9024
        case BR_REG (3):
9025
        case BR_REG (4):
9026
        case BR_REG (5):
9027
          if (unwind)
9028
            fprintf (asm_out_file, "\t.save.b 0x%x\n",
9029
                     1 << (src_regno - BR_REG (1)));
9030
          return 1;
9031
 
9032
        case FR_REG (2):
9033
        case FR_REG (3):
9034
        case FR_REG (4):
9035
        case FR_REG (5):
9036
          if (unwind)
9037
            fprintf (asm_out_file, "\t.save.f 0x%x\n",
9038
                     1 << (src_regno - FR_REG (2)));
9039
          return 1;
9040
 
9041
        case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9042
        case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9043
        case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9044
        case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9045
          if (unwind)
9046
            fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9047
                     1 << (src_regno - FR_REG (12)));
9048
          return 1;
9049
 
9050
        default:
9051
          return 0;
9052
        }
9053
    }
9054
 
9055
  return 0;
9056
}
9057
 
9058
 
9059
/* This function looks at a single insn and emits any directives
9060
   required to unwind this insn.  */
9061
void
9062
process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9063
{
9064
  bool unwind = (flag_unwind_tables
9065
                 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9066
  bool frame = dwarf2out_do_frame ();
9067
 
9068
  if (unwind || frame)
9069
    {
9070
      rtx pat;
9071
 
9072
      if (GET_CODE (insn) == NOTE
9073
          && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
9074
        {
9075
          last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9076
 
9077
          /* Restore unwind state from immediately before the epilogue.  */
9078
          if (need_copy_state)
9079
            {
9080
              if (unwind)
9081
                {
9082
                  fprintf (asm_out_file, "\t.body\n");
9083
                  fprintf (asm_out_file, "\t.copy_state %d\n",
9084
                           cfun->machine->state_num);
9085
                }
9086
              if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9087
                ia64_dwarf2out_def_steady_cfa (insn);
9088
              need_copy_state = false;
9089
            }
9090
        }
9091
 
9092
      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9093
        return;
9094
 
9095
      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9096
      if (pat)
9097
        pat = XEXP (pat, 0);
9098
      else
9099
        pat = PATTERN (insn);
9100
 
9101
      switch (GET_CODE (pat))
9102
        {
9103
        case SET:
9104
          process_set (asm_out_file, pat, insn, unwind, frame);
9105
          break;
9106
 
9107
        case PARALLEL:
9108
          {
9109
            int par_index;
9110
            int limit = XVECLEN (pat, 0);
9111
            for (par_index = 0; par_index < limit; par_index++)
9112
              {
9113
                rtx x = XVECEXP (pat, 0, par_index);
9114
                if (GET_CODE (x) == SET)
9115
                  process_set (asm_out_file, x, insn, unwind, frame);
9116
              }
9117
            break;
9118
          }
9119
 
9120
        default:
9121
          gcc_unreachable ();
9122
        }
9123
    }
9124
}
9125
 
9126
 
9127
enum ia64_builtins
9128
{
9129
  IA64_BUILTIN_BSP,
9130
  IA64_BUILTIN_FLUSHRS
9131
};
9132
 
9133
void
9134
ia64_init_builtins (void)
9135
{
9136
  tree fpreg_type;
9137
  tree float80_type;
9138
 
9139
  /* The __fpreg type.  */
9140
  fpreg_type = make_node (REAL_TYPE);
9141
  TYPE_PRECISION (fpreg_type) = 82;
9142
  layout_type (fpreg_type);
9143
  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9144
 
9145
  /* The __float80 type.  */
9146
  float80_type = make_node (REAL_TYPE);
9147
  TYPE_PRECISION (float80_type) = 80;
9148
  layout_type (float80_type);
9149
  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9150
 
9151
  /* The __float128 type.  */
9152
  if (!TARGET_HPUX)
9153
    {
9154
      tree float128_type = make_node (REAL_TYPE);
9155
      TYPE_PRECISION (float128_type) = 128;
9156
      layout_type (float128_type);
9157
      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9158
    }
9159
  else
9160
    /* Under HPUX, this is a synonym for "long double".  */
9161
    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9162
                                               "__float128");
9163
 
9164
#define def_builtin(name, type, code)                                   \
9165
  lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD,     \
9166
                               NULL, NULL_TREE)
9167
 
9168
  def_builtin ("__builtin_ia64_bsp",
9169
               build_function_type (ptr_type_node, void_list_node),
9170
               IA64_BUILTIN_BSP);
9171
 
9172
  def_builtin ("__builtin_ia64_flushrs",
9173
               build_function_type (void_type_node, void_list_node),
9174
               IA64_BUILTIN_FLUSHRS);
9175
 
9176
#undef def_builtin
9177
}
9178
 
9179
rtx
9180
ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9181
                     enum machine_mode mode ATTRIBUTE_UNUSED,
9182
                     int ignore ATTRIBUTE_UNUSED)
9183
{
9184
  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9185
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9186
 
9187
  switch (fcode)
9188
    {
9189
    case IA64_BUILTIN_BSP:
9190
      if (! target || ! register_operand (target, DImode))
9191
        target = gen_reg_rtx (DImode);
9192
      emit_insn (gen_bsp_value (target));
9193
#ifdef POINTERS_EXTEND_UNSIGNED
9194
      target = convert_memory_address (ptr_mode, target);
9195
#endif
9196
      return target;
9197
 
9198
    case IA64_BUILTIN_FLUSHRS:
9199
      emit_insn (gen_flushrs ());
9200
      return const0_rtx;
9201
 
9202
    default:
9203
      break;
9204
    }
9205
 
9206
  return NULL_RTX;
9207
}
9208
 
9209
/* For the HP-UX IA64 aggregate parameters are passed stored in the
9210
   most significant bits of the stack slot.  */
9211
 
9212
enum direction
9213
ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
9214
{
9215
   /* Exception to normal case for structures/unions/etc.  */
9216
 
9217
   if (type && AGGREGATE_TYPE_P (type)
9218
       && int_size_in_bytes (type) < UNITS_PER_WORD)
9219
     return upward;
9220
 
9221
   /* Fall back to the default.  */
9222
   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9223
}
9224
 
9225
/* Linked list of all external functions that are to be emitted by GCC.
9226
   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
9227
   order to avoid putting out names that are never really used.  */
9228
 
9229
struct extern_func_list GTY(())
9230
{
9231
  struct extern_func_list *next;
9232
  tree decl;
9233
};
9234
 
9235
static GTY(()) struct extern_func_list *extern_func_head;
9236
 
9237
static void
9238
ia64_hpux_add_extern_decl (tree decl)
9239
{
9240
  struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
9241
 
9242
  p->decl = decl;
9243
  p->next = extern_func_head;
9244
  extern_func_head = p;
9245
}
9246
 
9247
/* Print out the list of used global functions.  */
9248
 
9249
static void
9250
ia64_hpux_file_end (void)
9251
{
9252
  struct extern_func_list *p;
9253
 
9254
  for (p = extern_func_head; p; p = p->next)
9255
    {
9256
      tree decl = p->decl;
9257
      tree id = DECL_ASSEMBLER_NAME (decl);
9258
 
9259
      gcc_assert (id);
9260
 
9261
      if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
9262
        {
9263
          const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
9264
 
9265
          TREE_ASM_WRITTEN (decl) = 1;
9266
          (*targetm.asm_out.globalize_label) (asm_out_file, name);
9267
          fputs (TYPE_ASM_OP, asm_out_file);
9268
          assemble_name (asm_out_file, name);
9269
          fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
9270
        }
9271
    }
9272
 
9273
  extern_func_head = 0;
9274
}
9275
 
9276
/* Set SImode div/mod functions, init_integral_libfuncs only initializes
9277
   modes of word_mode and larger.  Rename the TFmode libfuncs using the
9278
   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9279
   backward compatibility. */
9280
 
9281
static void
9282
ia64_init_libfuncs (void)
9283
{
9284
  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9285
  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9286
  set_optab_libfunc (smod_optab, SImode, "__modsi3");
9287
  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9288
 
9289
  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9290
  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9291
  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9292
  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9293
  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9294
 
9295
  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9296
  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9297
  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9298
  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9299
  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9300
  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9301
 
9302
  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9303
  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9304
  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9305
  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9306
  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9307
 
9308
  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9309
  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9310
  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9311
  /* HP-UX 11.23 libc does not have a function for unsigned
9312
     SImode-to-TFmode conversion.  */
9313
  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
9314
}
9315
 
9316
/* Rename all the TFmode libfuncs using the HPUX conventions.  */
9317
 
9318
static void
9319
ia64_hpux_init_libfuncs (void)
9320
{
9321
  ia64_init_libfuncs ();
9322
 
9323
  /* The HP SI millicode division and mod functions expect DI arguments.
9324
     By turning them off completely we avoid using both libgcc and the
9325
     non-standard millicode routines and use the HP DI millicode routines
9326
     instead.  */
9327
 
9328
  set_optab_libfunc (sdiv_optab, SImode, 0);
9329
  set_optab_libfunc (udiv_optab, SImode, 0);
9330
  set_optab_libfunc (smod_optab, SImode, 0);
9331
  set_optab_libfunc (umod_optab, SImode, 0);
9332
 
9333
  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9334
  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9335
  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9336
  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9337
 
9338
  /* HP-UX libc has TF min/max/abs routines in it.  */
9339
  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9340
  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9341
  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
9342
 
9343
  /* ia64_expand_compare uses this.  */
9344
  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9345
 
9346
  /* These should never be used.  */
9347
  set_optab_libfunc (eq_optab, TFmode, 0);
9348
  set_optab_libfunc (ne_optab, TFmode, 0);
9349
  set_optab_libfunc (gt_optab, TFmode, 0);
9350
  set_optab_libfunc (ge_optab, TFmode, 0);
9351
  set_optab_libfunc (lt_optab, TFmode, 0);
9352
  set_optab_libfunc (le_optab, TFmode, 0);
9353
}
9354
 
9355
/* Rename the division and modulus functions in VMS.  */
9356
 
9357
static void
9358
ia64_vms_init_libfuncs (void)
9359
{
9360
  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9361
  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9362
  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9363
  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9364
  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9365
  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9366
  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9367
  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9368
}
9369
 
9370
/* Rename the TFmode libfuncs available from soft-fp in glibc using
9371
   the HPUX conventions.  */
9372
 
9373
static void
9374
ia64_sysv4_init_libfuncs (void)
9375
{
9376
  ia64_init_libfuncs ();
9377
 
9378
  /* These functions are not part of the HPUX TFmode interface.  We
9379
     use them instead of _U_Qfcmp, which doesn't work the way we
9380
     expect.  */
9381
  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9382
  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9383
  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9384
  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9385
  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9386
  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9387
 
9388
  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9389
     glibc doesn't have them.  */
9390
}
9391
 
9392
/* For HPUX, it is illegal to have relocations in shared segments.  */
9393
 
9394
static int
9395
ia64_hpux_reloc_rw_mask (void)
9396
{
9397
  return 3;
9398
}
9399
 
9400
/* For others, relax this so that relocations to local data goes in
9401
   read-only segments, but we still cannot allow global relocations
9402
   in read-only segments.  */
9403
 
9404
static int
9405
ia64_reloc_rw_mask (void)
9406
{
9407
  return flag_pic ? 3 : 2;
9408
}
9409
 
9410
/* Return the section to use for X.  The only special thing we do here
9411
   is to honor small data.  */
9412
 
9413
static section *
9414
ia64_select_rtx_section (enum machine_mode mode, rtx x,
9415
                         unsigned HOST_WIDE_INT align)
9416
{
9417
  if (GET_MODE_SIZE (mode) > 0
9418
      && GET_MODE_SIZE (mode) <= ia64_section_threshold
9419
      && !TARGET_NO_SDATA)
9420
    return sdata_section;
9421
  else
9422
    return default_elf_select_rtx_section (mode, x, align);
9423
}
9424
 
9425
static unsigned int
9426
ia64_section_type_flags (tree decl, const char *name, int reloc)
9427
{
9428
  unsigned int flags = 0;
9429
 
9430
  if (strcmp (name, ".sdata") == 0
9431
      || strncmp (name, ".sdata.", 7) == 0
9432
      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9433
      || strncmp (name, ".sdata2.", 8) == 0
9434
      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9435
      || strcmp (name, ".sbss") == 0
9436
      || strncmp (name, ".sbss.", 6) == 0
9437
      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9438
    flags = SECTION_SMALL;
9439
 
9440
  flags |= default_section_type_flags (decl, name, reloc);
9441
  return flags;
9442
}
9443
 
9444
/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9445
   structure type and that the address of that type should be passed
9446
   in out0, rather than in r8.  */
9447
 
9448
static bool
9449
ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9450
{
9451
  tree ret_type = TREE_TYPE (fntype);
9452
 
9453
  /* The Itanium C++ ABI requires that out0, rather than r8, be used
9454
     as the structure return address parameter, if the return value
9455
     type has a non-trivial copy constructor or destructor.  It is not
9456
     clear if this same convention should be used for other
9457
     programming languages.  Until G++ 3.4, we incorrectly used r8 for
9458
     these return values.  */
9459
  return (abi_version_at_least (2)
9460
          && ret_type
9461
          && TYPE_MODE (ret_type) == BLKmode
9462
          && TREE_ADDRESSABLE (ret_type)
9463
          && strcmp (lang_hooks.name, "GNU C++") == 0);
9464
}
9465
 
9466
/* Output the assembler code for a thunk function.  THUNK_DECL is the
9467
   declaration for the thunk function itself, FUNCTION is the decl for
9468
   the target function.  DELTA is an immediate constant offset to be
9469
   added to THIS.  If VCALL_OFFSET is nonzero, the word at
9470
   *(*this + vcall_offset) should be added to THIS.  */
9471
 
9472
static void
9473
ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9474
                      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9475
                      tree function)
9476
{
9477
  rtx this, insn, funexp;
9478
  unsigned int this_parmno;
9479
  unsigned int this_regno;
9480
 
9481
  reload_completed = 1;
9482
  epilogue_completed = 1;
9483
  no_new_pseudos = 1;
9484
  reset_block_changes ();
9485
 
9486
  /* Set things up as ia64_expand_prologue might.  */
9487
  last_scratch_gr_reg = 15;
9488
 
9489
  memset (&current_frame_info, 0, sizeof (current_frame_info));
9490
  current_frame_info.spill_cfa_off = -16;
9491
  current_frame_info.n_input_regs = 1;
9492
  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9493
 
9494
  /* Mark the end of the (empty) prologue.  */
9495
  emit_note (NOTE_INSN_PROLOGUE_END);
9496
 
9497
  /* Figure out whether "this" will be the first parameter (the
9498
     typical case) or the second parameter (as happens when the
9499
     virtual function returns certain class objects).  */
9500
  this_parmno
9501
    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9502
       ? 1 : 0);
9503
  this_regno = IN_REG (this_parmno);
9504
  if (!TARGET_REG_NAMES)
9505
    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9506
 
9507
  this = gen_rtx_REG (Pmode, this_regno);
9508
  if (TARGET_ILP32)
9509
    {
9510
      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
9511
      REG_POINTER (tmp) = 1;
9512
      if (delta && CONST_OK_FOR_I (delta))
9513
        {
9514
          emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
9515
          delta = 0;
9516
        }
9517
      else
9518
        emit_insn (gen_ptr_extend (this, tmp));
9519
    }
9520
 
9521
  /* Apply the constant offset, if required.  */
9522
  if (delta)
9523
    {
9524
      rtx delta_rtx = GEN_INT (delta);
9525
 
9526
      if (!CONST_OK_FOR_I (delta))
9527
        {
9528
          rtx tmp = gen_rtx_REG (Pmode, 2);
9529
          emit_move_insn (tmp, delta_rtx);
9530
          delta_rtx = tmp;
9531
        }
9532
      emit_insn (gen_adddi3 (this, this, delta_rtx));
9533
    }
9534
 
9535
  /* Apply the offset from the vtable, if required.  */
9536
  if (vcall_offset)
9537
    {
9538
      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9539
      rtx tmp = gen_rtx_REG (Pmode, 2);
9540
 
9541
      if (TARGET_ILP32)
9542
        {
9543
          rtx t = gen_rtx_REG (ptr_mode, 2);
9544
          REG_POINTER (t) = 1;
9545
          emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
9546
          if (CONST_OK_FOR_I (vcall_offset))
9547
            {
9548
              emit_insn (gen_ptr_extend_plus_imm (tmp, t,
9549
                                                  vcall_offset_rtx));
9550
              vcall_offset = 0;
9551
            }
9552
          else
9553
            emit_insn (gen_ptr_extend (tmp, t));
9554
        }
9555
      else
9556
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
9557
 
9558
      if (vcall_offset)
9559
        {
9560
          if (!CONST_OK_FOR_J (vcall_offset))
9561
            {
9562
              rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9563
              emit_move_insn (tmp2, vcall_offset_rtx);
9564
              vcall_offset_rtx = tmp2;
9565
            }
9566
          emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9567
        }
9568
 
9569
      if (TARGET_ILP32)
9570
        emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9571
                        gen_rtx_MEM (ptr_mode, tmp));
9572
      else
9573
        emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9574
 
9575
      emit_insn (gen_adddi3 (this, this, tmp));
9576
    }
9577
 
9578
  /* Generate a tail call to the target function.  */
9579
  if (! TREE_USED (function))
9580
    {
9581
      assemble_external (function);
9582
      TREE_USED (function) = 1;
9583
    }
9584
  funexp = XEXP (DECL_RTL (function), 0);
9585
  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9586
  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9587
  insn = get_last_insn ();
9588
  SIBLING_CALL_P (insn) = 1;
9589
 
9590
  /* Code generation for calls relies on splitting.  */
9591
  reload_completed = 1;
9592
  epilogue_completed = 1;
9593
  try_split (PATTERN (insn), insn, 0);
9594
 
9595
  emit_barrier ();
9596
 
9597
  /* Run just enough of rest_of_compilation to get the insns emitted.
9598
     There's not really enough bulk here to make other passes such as
9599
     instruction scheduling worth while.  Note that use_thunk calls
9600
     assemble_start_function and assemble_end_function.  */
9601
 
9602
  insn_locators_initialize ();
9603
  emit_all_insn_group_barriers (NULL);
9604
  insn = get_insns ();
9605
  shorten_branches (insn);
9606
  final_start_function (insn, file, 1);
9607
  final (insn, file, 1);
9608
  final_end_function ();
9609
 
9610
  reload_completed = 0;
9611
  epilogue_completed = 0;
9612
  no_new_pseudos = 0;
9613
}
9614
 
9615
/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9616
 
9617
static rtx
9618
ia64_struct_value_rtx (tree fntype,
9619
                       int incoming ATTRIBUTE_UNUSED)
9620
{
9621
  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9622
    return NULL_RTX;
9623
  return gen_rtx_REG (Pmode, GR_REG (8));
9624
}
9625
 
9626
static bool
9627
ia64_scalar_mode_supported_p (enum machine_mode mode)
9628
{
9629
  switch (mode)
9630
    {
9631
    case QImode:
9632
    case HImode:
9633
    case SImode:
9634
    case DImode:
9635
    case TImode:
9636
      return true;
9637
 
9638
    case SFmode:
9639
    case DFmode:
9640
    case XFmode:
9641
    case RFmode:
9642
      return true;
9643
 
9644
    case TFmode:
9645
      return TARGET_HPUX;
9646
 
9647
    default:
9648
      return false;
9649
    }
9650
}
9651
 
9652
static bool
9653
ia64_vector_mode_supported_p (enum machine_mode mode)
9654
{
9655
  switch (mode)
9656
    {
9657
    case V8QImode:
9658
    case V4HImode:
9659
    case V2SImode:
9660
      return true;
9661
 
9662
    case V2SFmode:
9663
      return true;
9664
 
9665
    default:
9666
      return false;
9667
    }
9668
}
9669
 
9670
/* Implement the FUNCTION_PROFILER macro.  */
9671
 
9672
void
9673
ia64_output_function_profiler (FILE *file, int labelno)
9674
{
9675
  bool indirect_call;
9676
 
9677
  /* If the function needs a static chain and the static chain
9678
     register is r15, we use an indirect call so as to bypass
9679
     the PLT stub in case the executable is dynamically linked,
9680
     because the stub clobbers r15 as per 5.3.6 of the psABI.
9681
     We don't need to do that in non canonical PIC mode.  */
9682
 
9683
  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9684
    {
9685
      gcc_assert (STATIC_CHAIN_REGNUM == 15);
9686
      indirect_call = true;
9687
    }
9688
  else
9689
    indirect_call = false;
9690
 
9691
  if (TARGET_GNU_AS)
9692
    fputs ("\t.prologue 4, r40\n", file);
9693
  else
9694
    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9695
  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
9696
 
9697
  if (NO_PROFILE_COUNTERS)
9698
    fputs ("\tmov out3 = r0\n", file);
9699
  else
9700
    {
9701
      char buf[20];
9702
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9703
 
9704
      if (TARGET_AUTO_PIC)
9705
        fputs ("\tmovl out3 = @gprel(", file);
9706
      else
9707
        fputs ("\taddl out3 = @ltoff(", file);
9708
      assemble_name (file, buf);
9709
      if (TARGET_AUTO_PIC)
9710
        fputs (")\n", file);
9711
      else
9712
        fputs ("), r1\n", file);
9713
    }
9714
 
9715
  if (indirect_call)
9716
    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9717
  fputs ("\t;;\n", file);
9718
 
9719
  fputs ("\t.save rp, r42\n", file);
9720
  fputs ("\tmov out2 = b0\n", file);
9721
  if (indirect_call)
9722
    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9723
  fputs ("\t.body\n", file);
9724
  fputs ("\tmov out1 = r1\n", file);
9725
  if (indirect_call)
9726
    {
9727
      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9728
      fputs ("\tmov b6 = r16\n", file);
9729
      fputs ("\tld8 r1 = [r14]\n", file);
9730
      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9731
    }
9732
  else
9733
    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9734
}
9735
 
9736
static GTY(()) rtx mcount_func_rtx;
9737
static rtx
9738
gen_mcount_func_rtx (void)
9739
{
9740
  if (!mcount_func_rtx)
9741
    mcount_func_rtx = init_one_libfunc ("_mcount");
9742
  return mcount_func_rtx;
9743
}
9744
 
9745
void
9746
ia64_profile_hook (int labelno)
9747
{
9748
  rtx label, ip;
9749
 
9750
  if (NO_PROFILE_COUNTERS)
9751
    label = const0_rtx;
9752
  else
9753
    {
9754
      char buf[30];
9755
      const char *label_name;
9756
      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9757
      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9758
      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9759
      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9760
    }
9761
  ip = gen_reg_rtx (Pmode);
9762
  emit_insn (gen_ip_value (ip));
9763
  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9764
                     VOIDmode, 3,
9765
                     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9766
                     ip, Pmode,
9767
                     label, Pmode);
9768
}
9769
 
9770
/* Return the mangling of TYPE if it is an extended fundamental type.  */
9771
 
9772
static const char *
9773
ia64_mangle_fundamental_type (tree type)
9774
{
9775
  /* On HP-UX, "long double" is mangled as "e" so __float128 is
9776
     mangled as "e".  */
9777
  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9778
    return "g";
9779
  /* On HP-UX, "e" is not available as a mangling of __float80 so use
9780
     an extended mangling.  Elsewhere, "e" is available since long
9781
     double is 80 bits.  */
9782
  if (TYPE_MODE (type) == XFmode)
9783
    return TARGET_HPUX ? "u9__float80" : "e";
9784
  if (TYPE_MODE (type) == RFmode)
9785
    return "u7__fpreg";
9786
  return NULL;
9787
}
9788
 
9789
/* Return the diagnostic message string if conversion from FROMTYPE to
9790
   TOTYPE is not allowed, NULL otherwise.  */
9791
static const char *
9792
ia64_invalid_conversion (tree fromtype, tree totype)
9793
{
9794
  /* Reject nontrivial conversion to or from __fpreg.  */
9795
  if (TYPE_MODE (fromtype) == RFmode
9796
      && TYPE_MODE (totype) != RFmode
9797
      && TYPE_MODE (totype) != VOIDmode)
9798
    return N_("invalid conversion from %<__fpreg%>");
9799
  if (TYPE_MODE (totype) == RFmode
9800
      && TYPE_MODE (fromtype) != RFmode)
9801
    return N_("invalid conversion to %<__fpreg%>");
9802
  return NULL;
9803
}
9804
 
9805
/* Return the diagnostic message string if the unary operation OP is
9806
   not permitted on TYPE, NULL otherwise.  */
9807
static const char *
9808
ia64_invalid_unary_op (int op, tree type)
9809
{
9810
  /* Reject operations on __fpreg other than unary + or &.  */
9811
  if (TYPE_MODE (type) == RFmode
9812
      && op != CONVERT_EXPR
9813
      && op != ADDR_EXPR)
9814
    return N_("invalid operation on %<__fpreg%>");
9815
  return NULL;
9816
}
9817
 
9818
/* Return the diagnostic message string if the binary operation OP is
9819
   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
9820
static const char *
9821
ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9822
{
9823
  /* Reject operations on __fpreg.  */
9824
  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9825
    return N_("invalid operation on %<__fpreg%>");
9826
  return NULL;
9827
}
9828
 
9829
/* Implement overriding of the optimization options.  */
9830
void
9831
ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9832
                           int size ATTRIBUTE_UNUSED)
9833
{
9834
  /* Let the scheduler form additional regions.  */
9835
  set_param_value ("max-sched-extend-regions-iters", 2);
9836
}
9837
 
9838
#include "gt-ia64.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.