OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [arm.c] - Blame information for rev 750

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
/* Output routines for GCC for ARM.
2
   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3
   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4
   Free Software Foundation, Inc.
5
   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6
   and Martin Simmons (@harleqn.co.uk).
7
   More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
 
9
   This file is part of GCC.
10
 
11
   GCC is free software; you can redistribute it and/or modify it
12
   under the terms of the GNU General Public License as published
13
   by the Free Software Foundation; either version 3, or (at your
14
   option) any later version.
15
 
16
   GCC is distributed in the hope that it will be useful, but WITHOUT
17
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
19
   License for more details.
20
 
21
   You should have received a copy of the GNU General Public License
22
   along with GCC; see the file COPYING3.  If not see
23
   <http://www.gnu.org/licenses/>.  */
24
 
25
#include "config.h"
26
#include "system.h"
27
#include "coretypes.h"
28
#include "tm.h"
29
#include "rtl.h"
30
#include "tree.h"
31
#include "obstack.h"
32
#include "regs.h"
33
#include "hard-reg-set.h"
34
#include "insn-config.h"
35
#include "conditions.h"
36
#include "output.h"
37
#include "insn-attr.h"
38
#include "flags.h"
39
#include "reload.h"
40
#include "function.h"
41
#include "expr.h"
42
#include "optabs.h"
43
#include "diagnostic-core.h"
44
#include "recog.h"
45
#include "cgraph.h"
46
#include "ggc.h"
47
#include "except.h"
48
#include "c-family/c-pragma.h"  /* ??? */
49
#include "integrate.h"
50
#include "tm_p.h"
51
#include "target.h"
52
#include "target-def.h"
53
#include "debug.h"
54
#include "langhooks.h"
55
#include "df.h"
56
#include "intl.h"
57
#include "libfuncs.h"
58
#include "params.h"
59
#include "opts.h"
60
 
61
/* Forward definitions of types.  */
62
typedef struct minipool_node    Mnode;
63
typedef struct minipool_fixup   Mfix;
64
 
65
void (*arm_lang_output_object_attributes_hook)(void);
66
 
67
struct four_ints
68
{
69
  int i[4];
70
};
71
 
72
/* Forward function declarations.  */
73
static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74
static int arm_compute_static_chain_stack_bytes (void);
75
static arm_stack_offsets *arm_get_frame_offsets (void);
76
static void arm_add_gc_roots (void);
77
static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78
                             HOST_WIDE_INT, rtx, rtx, int, int);
79
static unsigned bit_count (unsigned long);
80
static int arm_address_register_rtx_p (rtx, int);
81
static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82
static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83
static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84
static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85
static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86
inline static int thumb1_index_register_rtx_p (rtx, int);
87
static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88
static int thumb_far_jump_used_p (void);
89
static bool thumb_force_lr_save (void);
90
static rtx emit_sfm (int, int);
91
static unsigned arm_size_return_regs (void);
92
static bool arm_assemble_integer (rtx, unsigned int, int);
93
static void arm_print_operand (FILE *, rtx, int);
94
static void arm_print_operand_address (FILE *, rtx);
95
static bool arm_print_operand_punct_valid_p (unsigned char code);
96
static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97
static arm_cc get_arm_condition_code (rtx);
98
static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99
static rtx is_jump_table (rtx);
100
static const char *output_multi_immediate (rtx *, const char *, const char *,
101
                                           int, HOST_WIDE_INT);
102
static const char *shift_op (rtx, HOST_WIDE_INT *);
103
static struct machine_function *arm_init_machine_status (void);
104
static void thumb_exit (FILE *, int);
105
static rtx is_jump_table (rtx);
106
static HOST_WIDE_INT get_jump_table_size (rtx);
107
static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108
static Mnode *add_minipool_forward_ref (Mfix *);
109
static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110
static Mnode *add_minipool_backward_ref (Mfix *);
111
static void assign_minipool_offsets (Mfix *);
112
static void arm_print_value (FILE *, rtx);
113
static void dump_minipool (rtx);
114
static int arm_barrier_cost (rtx);
115
static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116
static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117
static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
118
                               rtx);
119
static void arm_reorg (void);
120
static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121
static unsigned long arm_compute_save_reg0_reg12_mask (void);
122
static unsigned long arm_compute_save_reg_mask (void);
123
static unsigned long arm_isr_value (tree);
124
static unsigned long arm_compute_func_type (void);
125
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126
static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
130
#endif
131
static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132
static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133
static int arm_comp_type_attributes (const_tree, const_tree);
134
static void arm_set_default_type_attributes (tree);
135
static int arm_adjust_cost (rtx, rtx, rtx, int);
136
static int optimal_immediate_sequence (enum rtx_code code,
137
                                       unsigned HOST_WIDE_INT val,
138
                                       struct four_ints *return_sequence);
139
static int optimal_immediate_sequence_1 (enum rtx_code code,
140
                                         unsigned HOST_WIDE_INT val,
141
                                         struct four_ints *return_sequence,
142
                                         int i);
143
static int arm_get_strip_length (int);
144
static bool arm_function_ok_for_sibcall (tree, tree);
145
static enum machine_mode arm_promote_function_mode (const_tree,
146
                                                    enum machine_mode, int *,
147
                                                    const_tree, int);
148
static bool arm_return_in_memory (const_tree, const_tree);
149
static rtx arm_function_value (const_tree, const_tree, bool);
150
static rtx arm_libcall_value_1 (enum machine_mode);
151
static rtx arm_libcall_value (enum machine_mode, const_rtx);
152
static bool arm_function_value_regno_p (const unsigned int);
153
static void arm_internal_label (FILE *, const char *, unsigned long);
154
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155
                                 tree);
156
static bool arm_have_conditional_execution (void);
157
static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158
static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159
static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160
static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161
static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162
static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163
static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164
static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165
static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166
static int arm_address_cost (rtx, bool);
167
static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168
static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169
static bool arm_memory_load_p (rtx);
170
static bool arm_cirrus_insn_p (rtx);
171
static void cirrus_reorg (rtx);
172
static void arm_init_builtins (void);
173
static void arm_init_iwmmxt_builtins (void);
174
static rtx safe_vector_operand (rtx, enum machine_mode);
175
static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176
static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177
static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178
static tree arm_builtin_decl (unsigned, bool);
179
static void emit_constant_insn (rtx cond, rtx pattern);
180
static rtx emit_set_insn (rtx, rtx);
181
static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182
                                  tree, bool);
183
static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184
                             const_tree, bool);
185
static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186
                                      const_tree, bool);
187
static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188
static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189
                                      const_tree);
190
static rtx aapcs_libcall_value (enum machine_mode);
191
static int aapcs_select_return_coproc (const_tree, const_tree);
192
 
193
#ifdef OBJECT_FORMAT_ELF
194
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195
static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196
#endif
197
#ifndef ARM_PE
198
static void arm_encode_section_info (tree, rtx, int);
199
#endif
200
 
201
static void arm_file_end (void);
202
static void arm_file_start (void);
203
 
204
static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205
                                        tree, int *, int);
206
static bool arm_pass_by_reference (cumulative_args_t,
207
                                   enum machine_mode, const_tree, bool);
208
static bool arm_promote_prototypes (const_tree);
209
static bool arm_default_short_enums (void);
210
static bool arm_align_anon_bitfield (void);
211
static bool arm_return_in_msb (const_tree);
212
static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213
static bool arm_return_in_memory (const_tree, const_tree);
214
#if ARM_UNWIND_INFO
215
static void arm_unwind_emit (FILE *, rtx);
216
static bool arm_output_ttype (rtx);
217
static void arm_asm_emit_except_personality (rtx);
218
static void arm_asm_init_sections (void);
219
#endif
220
static rtx arm_dwarf_register_span (rtx);
221
 
222
static tree arm_cxx_guard_type (void);
223
static bool arm_cxx_guard_mask_bit (void);
224
static tree arm_get_cookie_size (tree);
225
static bool arm_cookie_has_size (void);
226
static bool arm_cxx_cdtor_returns_this (void);
227
static bool arm_cxx_key_method_may_be_inline (void);
228
static void arm_cxx_determine_class_data_visibility (tree);
229
static bool arm_cxx_class_data_always_comdat (void);
230
static bool arm_cxx_use_aeabi_atexit (void);
231
static void arm_init_libfuncs (void);
232
static tree arm_build_builtin_va_list (void);
233
static void arm_expand_builtin_va_start (tree, rtx);
234
static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235
static void arm_option_override (void);
236
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237
static bool arm_cannot_copy_insn_p (rtx);
238
static bool arm_tls_symbol_p (rtx x);
239
static int arm_issue_rate (void);
240
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241
static bool arm_output_addr_const_extra (FILE *, rtx);
242
static bool arm_allocate_stack_slots_for_args (void);
243
static const char *arm_invalid_parameter_type (const_tree t);
244
static const char *arm_invalid_return_type (const_tree t);
245
static tree arm_promoted_type (const_tree t);
246
static tree arm_convert_to_type (tree type, tree expr);
247
static bool arm_scalar_mode_supported_p (enum machine_mode);
248
static bool arm_frame_pointer_required (void);
249
static bool arm_can_eliminate (const int, const int);
250
static void arm_asm_trampoline_template (FILE *);
251
static void arm_trampoline_init (rtx, tree, rtx);
252
static rtx arm_trampoline_adjust_address (rtx);
253
static rtx arm_pic_static_addr (rtx orig, rtx reg);
254
static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255
static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256
static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257
static bool arm_array_mode_supported_p (enum machine_mode,
258
                                        unsigned HOST_WIDE_INT);
259
static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260
static bool arm_class_likely_spilled_p (reg_class_t);
261
static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262
static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
263
                                                     const_tree type,
264
                                                     int misalignment,
265
                                                     bool is_packed);
266
static void arm_conditional_register_usage (void);
267
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268
static unsigned int arm_autovectorize_vector_sizes (void);
269
static int arm_default_branch_cost (bool, bool);
270
static int arm_cortex_a5_branch_cost (bool, bool);
271
 
272
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
273
                                             const unsigned char *sel);
274
 
275
 
276
/* Table of machine attributes.  */
277
static const struct attribute_spec arm_attribute_table[] =
278
{
279
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
280
       affects_type_identity } */
281
  /* Function calls made to this symbol must be done indirectly, because
282
     it may lie outside of the 26 bit addressing range of a normal function
283
     call.  */
284
  { "long_call",    0, 0, false, true,  true,  NULL, false },
285
  /* Whereas these functions are always known to reside within the 26 bit
286
     addressing range.  */
287
  { "short_call",   0, 0, false, true,  true,  NULL, false },
288
  /* Specify the procedure call conventions for a function.  */
289
  { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
290
    false },
291
  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
292
  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
293
    false },
294
  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
295
    false },
296
  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
297
    false },
298
#ifdef ARM_PE
299
  /* ARM/PE has three new attributes:
300
     interfacearm - ?
301
     dllexport - for exporting a function/variable that will live in a dll
302
     dllimport - for importing a function/variable from a dll
303
 
304
     Microsoft allows multiple declspecs in one __declspec, separating
305
     them with spaces.  We do NOT support this.  Instead, use __declspec
306
     multiple times.
307
  */
308
  { "dllimport",    0, 0, true,  false, false, NULL, false },
309
  { "dllexport",    0, 0, true,  false, false, NULL, false },
310
  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
311
    false },
312
#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
313
  { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
314
  { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
315
  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
316
    false },
317
#endif
318
  { NULL,           0, 0, false, false, false, NULL, false }
319
};
320
 
321
/* Initialize the GCC target structure.  */
322
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
323
#undef  TARGET_MERGE_DECL_ATTRIBUTES
324
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
325
#endif
326
 
327
#undef TARGET_LEGITIMIZE_ADDRESS
328
#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
329
 
330
#undef  TARGET_ATTRIBUTE_TABLE
331
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
332
 
333
#undef TARGET_ASM_FILE_START
334
#define TARGET_ASM_FILE_START arm_file_start
335
#undef TARGET_ASM_FILE_END
336
#define TARGET_ASM_FILE_END arm_file_end
337
 
338
#undef  TARGET_ASM_ALIGNED_SI_OP
339
#define TARGET_ASM_ALIGNED_SI_OP NULL
340
#undef  TARGET_ASM_INTEGER
341
#define TARGET_ASM_INTEGER arm_assemble_integer
342
 
343
#undef TARGET_PRINT_OPERAND
344
#define TARGET_PRINT_OPERAND arm_print_operand
345
#undef TARGET_PRINT_OPERAND_ADDRESS
346
#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
347
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
348
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
349
 
350
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
351
#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
352
 
353
#undef  TARGET_ASM_FUNCTION_PROLOGUE
354
#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
355
 
356
#undef  TARGET_ASM_FUNCTION_EPILOGUE
357
#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
358
 
359
#undef  TARGET_OPTION_OVERRIDE
360
#define TARGET_OPTION_OVERRIDE arm_option_override
361
 
362
#undef  TARGET_COMP_TYPE_ATTRIBUTES
363
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
364
 
365
#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
366
#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
367
 
368
#undef  TARGET_SCHED_ADJUST_COST
369
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
370
 
371
#undef TARGET_REGISTER_MOVE_COST
372
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
373
 
374
#undef TARGET_MEMORY_MOVE_COST
375
#define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
376
 
377
#undef TARGET_ENCODE_SECTION_INFO
378
#ifdef ARM_PE
379
#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
380
#else
381
#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
382
#endif
383
 
384
#undef  TARGET_STRIP_NAME_ENCODING
385
#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
386
 
387
#undef  TARGET_ASM_INTERNAL_LABEL
388
#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
389
 
390
#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
391
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
392
 
393
#undef  TARGET_FUNCTION_VALUE
394
#define TARGET_FUNCTION_VALUE arm_function_value
395
 
396
#undef  TARGET_LIBCALL_VALUE
397
#define TARGET_LIBCALL_VALUE arm_libcall_value
398
 
399
#undef TARGET_FUNCTION_VALUE_REGNO_P
400
#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
401
 
402
#undef  TARGET_ASM_OUTPUT_MI_THUNK
403
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
404
#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
405
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
406
 
407
#undef  TARGET_RTX_COSTS
408
#define TARGET_RTX_COSTS arm_rtx_costs
409
#undef  TARGET_ADDRESS_COST
410
#define TARGET_ADDRESS_COST arm_address_cost
411
 
412
#undef TARGET_SHIFT_TRUNCATION_MASK
413
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
414
#undef TARGET_VECTOR_MODE_SUPPORTED_P
415
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
416
#undef TARGET_ARRAY_MODE_SUPPORTED_P
417
#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
418
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
419
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
420
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
421
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
422
  arm_autovectorize_vector_sizes
423
 
424
#undef  TARGET_MACHINE_DEPENDENT_REORG
425
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
426
 
427
#undef  TARGET_INIT_BUILTINS
428
#define TARGET_INIT_BUILTINS  arm_init_builtins
429
#undef  TARGET_EXPAND_BUILTIN
430
#define TARGET_EXPAND_BUILTIN arm_expand_builtin
431
#undef  TARGET_BUILTIN_DECL
432
#define TARGET_BUILTIN_DECL arm_builtin_decl
433
 
434
#undef TARGET_INIT_LIBFUNCS
435
#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
436
 
437
#undef TARGET_PROMOTE_FUNCTION_MODE
438
#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
439
#undef TARGET_PROMOTE_PROTOTYPES
440
#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
441
#undef TARGET_PASS_BY_REFERENCE
442
#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
443
#undef TARGET_ARG_PARTIAL_BYTES
444
#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
445
#undef TARGET_FUNCTION_ARG
446
#define TARGET_FUNCTION_ARG arm_function_arg
447
#undef TARGET_FUNCTION_ARG_ADVANCE
448
#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
449
#undef TARGET_FUNCTION_ARG_BOUNDARY
450
#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
451
 
452
#undef  TARGET_SETUP_INCOMING_VARARGS
453
#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
454
 
455
#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
456
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
457
 
458
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
459
#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
460
#undef TARGET_TRAMPOLINE_INIT
461
#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
462
#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
463
#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
464
 
465
#undef TARGET_DEFAULT_SHORT_ENUMS
466
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
467
 
468
#undef TARGET_ALIGN_ANON_BITFIELD
469
#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
470
 
471
#undef TARGET_NARROW_VOLATILE_BITFIELD
472
#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
473
 
474
#undef TARGET_CXX_GUARD_TYPE
475
#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
476
 
477
#undef TARGET_CXX_GUARD_MASK_BIT
478
#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
479
 
480
#undef TARGET_CXX_GET_COOKIE_SIZE
481
#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
482
 
483
#undef TARGET_CXX_COOKIE_HAS_SIZE
484
#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
485
 
486
#undef TARGET_CXX_CDTOR_RETURNS_THIS
487
#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
488
 
489
#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
490
#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
491
 
492
#undef TARGET_CXX_USE_AEABI_ATEXIT
493
#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
494
 
495
#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
496
#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
497
  arm_cxx_determine_class_data_visibility
498
 
499
#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
500
#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
501
 
502
#undef TARGET_RETURN_IN_MSB
503
#define TARGET_RETURN_IN_MSB arm_return_in_msb
504
 
505
#undef TARGET_RETURN_IN_MEMORY
506
#define TARGET_RETURN_IN_MEMORY arm_return_in_memory
507
 
508
#undef TARGET_MUST_PASS_IN_STACK
509
#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
510
 
511
#if ARM_UNWIND_INFO
512
#undef TARGET_ASM_UNWIND_EMIT
513
#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
514
 
515
/* EABI unwinding tables use a different format for the typeinfo tables.  */
516
#undef TARGET_ASM_TTYPE
517
#define TARGET_ASM_TTYPE arm_output_ttype
518
 
519
#undef TARGET_ARM_EABI_UNWINDER
520
#define TARGET_ARM_EABI_UNWINDER true
521
 
522
#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
523
#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
524
 
525
#undef TARGET_ASM_INIT_SECTIONS
526
#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
527
#endif /* ARM_UNWIND_INFO */
528
 
529
#undef TARGET_DWARF_REGISTER_SPAN
530
#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
531
 
532
#undef  TARGET_CANNOT_COPY_INSN_P
533
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
534
 
535
#ifdef HAVE_AS_TLS
536
#undef TARGET_HAVE_TLS
537
#define TARGET_HAVE_TLS true
538
#endif
539
 
540
#undef TARGET_HAVE_CONDITIONAL_EXECUTION
541
#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
542
 
543
#undef TARGET_LEGITIMATE_CONSTANT_P
544
#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
545
 
546
#undef TARGET_CANNOT_FORCE_CONST_MEM
547
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
548
 
549
#undef TARGET_MAX_ANCHOR_OFFSET
550
#define TARGET_MAX_ANCHOR_OFFSET 4095
551
 
552
/* The minimum is set such that the total size of the block
553
   for a particular anchor is -4088 + 1 + 4095 bytes, which is
554
   divisible by eight, ensuring natural spacing of anchors.  */
555
#undef TARGET_MIN_ANCHOR_OFFSET
556
#define TARGET_MIN_ANCHOR_OFFSET -4088
557
 
558
#undef TARGET_SCHED_ISSUE_RATE
559
#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
560
 
561
#undef TARGET_MANGLE_TYPE
562
#define TARGET_MANGLE_TYPE arm_mangle_type
563
 
564
#undef TARGET_BUILD_BUILTIN_VA_LIST
565
#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
566
#undef TARGET_EXPAND_BUILTIN_VA_START
567
#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
568
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
569
#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
570
 
571
#ifdef HAVE_AS_TLS
572
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
573
#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
574
#endif
575
 
576
#undef TARGET_LEGITIMATE_ADDRESS_P
577
#define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
578
 
579
#undef TARGET_INVALID_PARAMETER_TYPE
580
#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
581
 
582
#undef TARGET_INVALID_RETURN_TYPE
583
#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
584
 
585
#undef TARGET_PROMOTED_TYPE
586
#define TARGET_PROMOTED_TYPE arm_promoted_type
587
 
588
#undef TARGET_CONVERT_TO_TYPE
589
#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
590
 
591
#undef TARGET_SCALAR_MODE_SUPPORTED_P
592
#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
593
 
594
#undef TARGET_FRAME_POINTER_REQUIRED
595
#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
596
 
597
#undef TARGET_CAN_ELIMINATE
598
#define TARGET_CAN_ELIMINATE arm_can_eliminate
599
 
600
#undef TARGET_CONDITIONAL_REGISTER_USAGE
601
#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
602
 
603
#undef TARGET_CLASS_LIKELY_SPILLED_P
604
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
605
 
606
#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
607
#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
608
  arm_vector_alignment_reachable
609
 
610
#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
611
#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
612
  arm_builtin_support_vector_misalignment
613
 
614
#undef TARGET_PREFERRED_RENAME_CLASS
615
#define TARGET_PREFERRED_RENAME_CLASS \
616
  arm_preferred_rename_class
617
 
618
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
619
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
620
  arm_vectorize_vec_perm_const_ok
621
 
622
struct gcc_target targetm = TARGET_INITIALIZER;
623
 
624
/* Obstack for minipool constant handling.  */
625
static struct obstack minipool_obstack;
626
static char *         minipool_startobj;
627
 
628
/* The maximum number of insns skipped which
629
   will be conditionalised if possible.  */
630
static int max_insns_skipped = 5;
631
 
632
extern FILE * asm_out_file;
633
 
634
/* True if we are currently building a constant table.  */
635
int making_const_table;
636
 
637
/* The processor for which instructions should be scheduled.  */
638
enum processor_type arm_tune = arm_none;
639
 
640
/* The current tuning set.  */
641
const struct tune_params *current_tune;
642
 
643
/* Which floating point hardware to schedule for.  */
644
int arm_fpu_attr;
645
 
646
/* Which floating popint hardware to use.  */
647
const struct arm_fpu_desc *arm_fpu_desc;
648
 
649
/* Used for Thumb call_via trampolines.  */
650
rtx thumb_call_via_label[14];
651
static int thumb_call_reg_needed;
652
 
653
/* Bit values used to identify processor capabilities.  */
654
#define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
655
#define FL_ARCH3M     (1 << 1)        /* Extended multiply */
656
#define FL_MODE26     (1 << 2)        /* 26-bit mode support */
657
#define FL_MODE32     (1 << 3)        /* 32-bit mode support */
658
#define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
659
#define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
660
#define FL_THUMB      (1 << 6)        /* Thumb aware */
661
#define FL_LDSCHED    (1 << 7)        /* Load scheduling necessary */
662
#define FL_STRONG     (1 << 8)        /* StrongARM */
663
#define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
664
#define FL_XSCALE     (1 << 10)       /* XScale */
665
#define FL_CIRRUS     (1 << 11)       /* Cirrus/DSP.  */
666
#define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
667
                                         media instructions.  */
668
#define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
669
#define FL_WBUF       (1 << 14)       /* Schedule for write buffer ops.
670
                                         Note: ARM6 & 7 derivatives only.  */
671
#define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
672
#define FL_THUMB2     (1 << 16)       /* Thumb-2.  */
673
#define FL_NOTM       (1 << 17)       /* Instructions not present in the 'M'
674
                                         profile.  */
675
#define FL_THUMB_DIV  (1 << 18)       /* Hardware divide (Thumb mode).  */
676
#define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
677
#define FL_NEON       (1 << 20)       /* Neon instructions.  */
678
#define FL_ARCH7EM    (1 << 21)       /* Instructions present in the ARMv7E-M
679
                                         architecture.  */
680
#define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
681
#define FL_ARM_DIV    (1 << 23)       /* Hardware divide (ARM mode).  */
682
 
683
#define FL_IWMMXT     (1 << 29)       /* XScale v2 or "Intel Wireless MMX technology".  */
684
 
685
/* Flags that only effect tuning, not available instructions.  */
686
#define FL_TUNE         (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
687
                         | FL_CO_PROC)
688
 
689
#define FL_FOR_ARCH2    FL_NOTM
690
#define FL_FOR_ARCH3    (FL_FOR_ARCH2 | FL_MODE32)
691
#define FL_FOR_ARCH3M   (FL_FOR_ARCH3 | FL_ARCH3M)
692
#define FL_FOR_ARCH4    (FL_FOR_ARCH3M | FL_ARCH4)
693
#define FL_FOR_ARCH4T   (FL_FOR_ARCH4 | FL_THUMB)
694
#define FL_FOR_ARCH5    (FL_FOR_ARCH4 | FL_ARCH5)
695
#define FL_FOR_ARCH5T   (FL_FOR_ARCH5 | FL_THUMB)
696
#define FL_FOR_ARCH5E   (FL_FOR_ARCH5 | FL_ARCH5E)
697
#define FL_FOR_ARCH5TE  (FL_FOR_ARCH5E | FL_THUMB)
698
#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699
#define FL_FOR_ARCH6    (FL_FOR_ARCH5TE | FL_ARCH6)
700
#define FL_FOR_ARCH6J   FL_FOR_ARCH6
701
#define FL_FOR_ARCH6K   (FL_FOR_ARCH6 | FL_ARCH6K)
702
#define FL_FOR_ARCH6Z   FL_FOR_ARCH6
703
#define FL_FOR_ARCH6ZK  FL_FOR_ARCH6K
704
#define FL_FOR_ARCH6T2  (FL_FOR_ARCH6 | FL_THUMB2)
705
#define FL_FOR_ARCH6M   (FL_FOR_ARCH6 & ~FL_NOTM)
706
#define FL_FOR_ARCH7    ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707
#define FL_FOR_ARCH7A   (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708
#define FL_FOR_ARCH7R   (FL_FOR_ARCH7A | FL_THUMB_DIV)
709
#define FL_FOR_ARCH7M   (FL_FOR_ARCH7 | FL_THUMB_DIV)
710
#define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
711
 
712
/* The bits in this mask specify which
713
   instructions we are allowed to generate.  */
714
static unsigned long insn_flags = 0;
715
 
716
/* The bits in this mask specify which instruction scheduling options should
717
   be used.  */
718
static unsigned long tune_flags = 0;
719
 
720
/* The following are used in the arm.md file as equivalents to bits
721
   in the above two flag variables.  */
722
 
723
/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
724
int arm_arch3m = 0;
725
 
726
/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
727
int arm_arch4 = 0;
728
 
729
/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
730
int arm_arch4t = 0;
731
 
732
/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
733
int arm_arch5 = 0;
734
 
735
/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
736
int arm_arch5e = 0;
737
 
738
/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
739
int arm_arch6 = 0;
740
 
741
/* Nonzero if this chip supports the ARM 6K extensions.  */
742
int arm_arch6k = 0;
743
 
744
/* Nonzero if this chip supports the ARM 7 extensions.  */
745
int arm_arch7 = 0;
746
 
747
/* Nonzero if instructions not present in the 'M' profile can be used.  */
748
int arm_arch_notm = 0;
749
 
750
/* Nonzero if instructions present in ARMv7E-M can be used.  */
751
int arm_arch7em = 0;
752
 
753
/* Nonzero if this chip can benefit from load scheduling.  */
754
int arm_ld_sched = 0;
755
 
756
/* Nonzero if this chip is a StrongARM.  */
757
int arm_tune_strongarm = 0;
758
 
759
/* Nonzero if this chip is a Cirrus variant.  */
760
int arm_arch_cirrus = 0;
761
 
762
/* Nonzero if this chip supports Intel Wireless MMX technology.  */
763
int arm_arch_iwmmxt = 0;
764
 
765
/* Nonzero if this chip is an XScale.  */
766
int arm_arch_xscale = 0;
767
 
768
/* Nonzero if tuning for XScale  */
769
int arm_tune_xscale = 0;
770
 
771
/* Nonzero if we want to tune for stores that access the write-buffer.
772
   This typically means an ARM6 or ARM7 with MMU or MPU.  */
773
int arm_tune_wbuf = 0;
774
 
775
/* Nonzero if tuning for Cortex-A9.  */
776
int arm_tune_cortex_a9 = 0;
777
 
778
/* Nonzero if generating Thumb instructions.  */
779
int thumb_code = 0;
780
 
781
/* Nonzero if generating Thumb-1 instructions.  */
782
int thumb1_code = 0;
783
 
784
/* Nonzero if we should define __THUMB_INTERWORK__ in the
785
   preprocessor.
786
   XXX This is a bit of a hack, it's intended to help work around
787
   problems in GLD which doesn't understand that armv5t code is
788
   interworking clean.  */
789
int arm_cpp_interwork = 0;
790
 
791
/* Nonzero if chip supports Thumb 2.  */
792
int arm_arch_thumb2;
793
 
794
/* Nonzero if chip supports integer division instruction.  */
795
int arm_arch_arm_hwdiv;
796
int arm_arch_thumb_hwdiv;
797
 
798
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
799
   we must report the mode of the memory reference from
800
   TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
801
enum machine_mode output_memory_reference_mode;
802
 
803
/* The register number to be used for the PIC offset register.  */
804
unsigned arm_pic_register = INVALID_REGNUM;
805
 
806
/* Set to 1 after arm_reorg has started.  Reset to start at the start of
807
   the next function.  */
808
static int after_arm_reorg = 0;
809
 
810
enum arm_pcs arm_pcs_default;
811
 
812
/* For an explanation of these variables, see final_prescan_insn below.  */
813
int arm_ccfsm_state;
814
/* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
815
enum arm_cond_code arm_current_cc;
816
 
817
rtx arm_target_insn;
818
int arm_target_label;
819
/* The number of conditionally executed insns, including the current insn.  */
820
int arm_condexec_count = 0;
821
/* A bitmask specifying the patterns for the IT block.
822
   Zero means do not output an IT block before this insn. */
823
int arm_condexec_mask = 0;
824
/* The number of bits used in arm_condexec_mask.  */
825
int arm_condexec_masklen = 0;
826
 
827
/* The condition codes of the ARM, and the inverse function.  */
828
static const char * const arm_condition_codes[] =
829
{
830
  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
831
  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
832
};
833
 
834
/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
835
int arm_regs_in_sequence[] =
836
{
837
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
838
};
839
 
840
#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
841
#define streq(string1, string2) (strcmp (string1, string2) == 0)
842
 
843
#define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
844
                                   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
845
                                   | (1 << PIC_OFFSET_TABLE_REGNUM)))
846
 
847
/* Initialization code.  */
848
 
849
struct processors
850
{
851
  const char *const name;
852
  enum processor_type core;
853
  const char *arch;
854
  const unsigned long flags;
855
  const struct tune_params *const tune;
856
};
857
 
858
 
859
#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
860
#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
861
  prefetch_slots, \
862
  l1_size, \
863
  l1_line_size
864
 
865
const struct tune_params arm_slowmul_tune =
866
{
867
  arm_slowmul_rtx_costs,
868
  NULL,
869
  3,                                            /* Constant limit.  */
870
  5,                                            /* Max cond insns.  */
871
  ARM_PREFETCH_NOT_BENEFICIAL,
872
  true,                                         /* Prefer constant pool.  */
873
  arm_default_branch_cost
874
};
875
 
876
const struct tune_params arm_fastmul_tune =
877
{
878
  arm_fastmul_rtx_costs,
879
  NULL,
880
  1,                                            /* Constant limit.  */
881
  5,                                            /* Max cond insns.  */
882
  ARM_PREFETCH_NOT_BENEFICIAL,
883
  true,                                         /* Prefer constant pool.  */
884
  arm_default_branch_cost
885
};
886
 
887
/* StrongARM has early execution of branches, so a sequence that is worth
888
   skipping is shorter.  Set max_insns_skipped to a lower value.  */
889
 
890
const struct tune_params arm_strongarm_tune =
891
{
892
  arm_fastmul_rtx_costs,
893
  NULL,
894
  1,                                            /* Constant limit.  */
895
  3,                                            /* Max cond insns.  */
896
  ARM_PREFETCH_NOT_BENEFICIAL,
897
  true,                                         /* Prefer constant pool.  */
898
  arm_default_branch_cost
899
};
900
 
901
const struct tune_params arm_xscale_tune =
902
{
903
  arm_xscale_rtx_costs,
904
  xscale_sched_adjust_cost,
905
  2,                                            /* Constant limit.  */
906
  3,                                            /* Max cond insns.  */
907
  ARM_PREFETCH_NOT_BENEFICIAL,
908
  true,                                         /* Prefer constant pool.  */
909
  arm_default_branch_cost
910
};
911
 
912
const struct tune_params arm_9e_tune =
913
{
914
  arm_9e_rtx_costs,
915
  NULL,
916
  1,                                            /* Constant limit.  */
917
  5,                                            /* Max cond insns.  */
918
  ARM_PREFETCH_NOT_BENEFICIAL,
919
  true,                                         /* Prefer constant pool.  */
920
  arm_default_branch_cost
921
};
922
 
923
const struct tune_params arm_v6t2_tune =
924
{
925
  arm_9e_rtx_costs,
926
  NULL,
927
  1,                                            /* Constant limit.  */
928
  5,                                            /* Max cond insns.  */
929
  ARM_PREFETCH_NOT_BENEFICIAL,
930
  false,                                        /* Prefer constant pool.  */
931
  arm_default_branch_cost
932
};
933
 
934
/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
935
const struct tune_params arm_cortex_tune =
936
{
937
  arm_9e_rtx_costs,
938
  NULL,
939
  1,                                            /* Constant limit.  */
940
  5,                                            /* Max cond insns.  */
941
  ARM_PREFETCH_NOT_BENEFICIAL,
942
  false,                                        /* Prefer constant pool.  */
943
  arm_default_branch_cost
944
};
945
 
946
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
947
   less appealing.  Set max_insns_skipped to a low value.  */
948
 
949
const struct tune_params arm_cortex_a5_tune =
950
{
951
  arm_9e_rtx_costs,
952
  NULL,
953
  1,                                            /* Constant limit.  */
954
  1,                                            /* Max cond insns.  */
955
  ARM_PREFETCH_NOT_BENEFICIAL,
956
  false,                                        /* Prefer constant pool.  */
957
  arm_cortex_a5_branch_cost
958
};
959
 
960
const struct tune_params arm_cortex_a9_tune =
961
{
962
  arm_9e_rtx_costs,
963
  cortex_a9_sched_adjust_cost,
964
  1,                                            /* Constant limit.  */
965
  5,                                            /* Max cond insns.  */
966
  ARM_PREFETCH_BENEFICIAL(4,32,32),
967
  false,                                        /* Prefer constant pool.  */
968
  arm_default_branch_cost
969
};
970
 
971
const struct tune_params arm_fa726te_tune =
972
{
973
  arm_9e_rtx_costs,
974
  fa726te_sched_adjust_cost,
975
  1,                                            /* Constant limit.  */
976
  5,                                            /* Max cond insns.  */
977
  ARM_PREFETCH_NOT_BENEFICIAL,
978
  true,                                         /* Prefer constant pool.  */
979
  arm_default_branch_cost
980
};
981
 
982
 
983
/* Not all of these give usefully different compilation alternatives,
984
   but there is no simple way of generalizing them.  */
985
static const struct processors all_cores[] =
986
{
987
  /* ARM Cores */
988
#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
989
  {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
990
#include "arm-cores.def"
991
#undef ARM_CORE
992
  {NULL, arm_none, NULL, 0, NULL}
993
};
994
 
995
static const struct processors all_architectures[] =
996
{
997
  /* ARM Architectures */
998
  /* We don't specify tuning costs here as it will be figured out
999
     from the core.  */
1000
 
1001
#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1002
  {NAME, CORE, #ARCH, FLAGS, NULL},
1003
#include "arm-arches.def"
1004
#undef ARM_ARCH
1005
  {NULL, arm_none, NULL, 0 , NULL}
1006
};
1007
 
1008
 
1009
/* These are populated as commandline arguments are processed, or NULL
1010
   if not specified.  */
1011
static const struct processors *arm_selected_arch;
1012
static const struct processors *arm_selected_cpu;
1013
static const struct processors *arm_selected_tune;
1014
 
1015
/* The name of the preprocessor macro to define for this architecture.  */
1016
 
1017
char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1018
 
1019
/* Available values for -mfpu=.  */
1020
 
1021
static const struct arm_fpu_desc all_fpus[] =
1022
{
1023
#define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1024
  { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1025
#include "arm-fpus.def"
1026
#undef ARM_FPU
1027
};
1028
 
1029
 
1030
/* Supported TLS relocations.  */
1031
 
1032
enum tls_reloc {
1033
  TLS_GD32,
1034
  TLS_LDM32,
1035
  TLS_LDO32,
1036
  TLS_IE32,
1037
  TLS_LE32,
1038
  TLS_DESCSEQ   /* GNU scheme */
1039
};
1040
 
1041
/* The maximum number of insns to be used when loading a constant.  */
1042
inline static int
1043
arm_constant_limit (bool size_p)
1044
{
1045
  return size_p ? 1 : current_tune->constant_limit;
1046
}
1047
 
1048
/* Emit an insn that's a simple single-set.  Both the operands must be known
1049
   to be valid.  */
1050
inline static rtx
1051
emit_set_insn (rtx x, rtx y)
1052
{
1053
  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1054
}
1055
 
1056
/* Return the number of bits set in VALUE.  */
1057
static unsigned
1058
bit_count (unsigned long value)
1059
{
1060
  unsigned long count = 0;
1061
 
1062
  while (value)
1063
    {
1064
      count++;
1065
      value &= value - 1;  /* Clear the least-significant set bit.  */
1066
    }
1067
 
1068
  return count;
1069
}
1070
 
1071
typedef struct
1072
{
1073
  enum machine_mode mode;
1074
  const char *name;
1075
} arm_fixed_mode_set;
1076
 
1077
/* A small helper for setting fixed-point library libfuncs.  */
1078
 
1079
static void
1080
arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1081
                             const char *funcname, const char *modename,
1082
                             int num_suffix)
1083
{
1084
  char buffer[50];
1085
 
1086
  if (num_suffix == 0)
1087
    sprintf (buffer, "__gnu_%s%s", funcname, modename);
1088
  else
1089
    sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1090
 
1091
  set_optab_libfunc (optable, mode, buffer);
1092
}
1093
 
1094
static void
1095
arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1096
                            enum machine_mode from, const char *funcname,
1097
                            const char *toname, const char *fromname)
1098
{
1099
  char buffer[50];
1100
  const char *maybe_suffix_2 = "";
1101
 
1102
  /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
1103
  if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1104
      && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1105
      && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1106
    maybe_suffix_2 = "2";
1107
 
1108
  sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1109
           maybe_suffix_2);
1110
 
1111
  set_conv_libfunc (optable, to, from, buffer);
1112
}
1113
 
1114
/* Set up library functions unique to ARM.  */
1115
 
1116
static void
1117
arm_init_libfuncs (void)
1118
{
1119
  /* For Linux, we have access to kernel support for atomic operations.  */
1120
  if (arm_abi == ARM_ABI_AAPCS_LINUX)
1121
    init_sync_libfuncs (2 * UNITS_PER_WORD);
1122
 
1123
  /* There are no special library functions unless we are using the
1124
     ARM BPABI.  */
1125
  if (!TARGET_BPABI)
1126
    return;
1127
 
1128
  /* The functions below are described in Section 4 of the "Run-Time
1129
     ABI for the ARM architecture", Version 1.0.  */
1130
 
1131
  /* Double-precision floating-point arithmetic.  Table 2.  */
1132
  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1133
  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1134
  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1135
  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1136
  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1137
 
1138
  /* Double-precision comparisons.  Table 3.  */
1139
  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1140
  set_optab_libfunc (ne_optab, DFmode, NULL);
1141
  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1142
  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1143
  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1144
  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1145
  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1146
 
1147
  /* Single-precision floating-point arithmetic.  Table 4.  */
1148
  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1149
  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1150
  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1151
  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1152
  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1153
 
1154
  /* Single-precision comparisons.  Table 5.  */
1155
  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1156
  set_optab_libfunc (ne_optab, SFmode, NULL);
1157
  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1158
  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1159
  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1160
  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1161
  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1162
 
1163
  /* Floating-point to integer conversions.  Table 6.  */
1164
  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1165
  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1166
  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1167
  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1168
  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1169
  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1170
  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1171
  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1172
 
1173
  /* Conversions between floating types.  Table 7.  */
1174
  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1175
  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1176
 
1177
  /* Integer to floating-point conversions.  Table 8.  */
1178
  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1179
  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1180
  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1181
  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1182
  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1183
  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1184
  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1185
  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1186
 
1187
  /* Long long.  Table 9.  */
1188
  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1189
  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1190
  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1191
  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1192
  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1193
  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1194
  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1195
  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1196
 
1197
  /* Integer (32/32->32) division.  \S 4.3.1.  */
1198
  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1199
  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1200
 
1201
  /* The divmod functions are designed so that they can be used for
1202
     plain division, even though they return both the quotient and the
1203
     remainder.  The quotient is returned in the usual location (i.e.,
1204
     r0 for SImode, {r0, r1} for DImode), just as would be expected
1205
     for an ordinary division routine.  Because the AAPCS calling
1206
     conventions specify that all of { r0, r1, r2, r3 } are
1207
     callee-saved registers, there is no need to tell the compiler
1208
     explicitly that those registers are clobbered by these
1209
     routines.  */
1210
  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1211
  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1212
 
1213
  /* For SImode division the ABI provides div-without-mod routines,
1214
     which are faster.  */
1215
  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1216
  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1217
 
1218
  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
1219
     divmod libcalls instead.  */
1220
  set_optab_libfunc (smod_optab, DImode, NULL);
1221
  set_optab_libfunc (umod_optab, DImode, NULL);
1222
  set_optab_libfunc (smod_optab, SImode, NULL);
1223
  set_optab_libfunc (umod_optab, SImode, NULL);
1224
 
1225
  /* Half-precision float operations.  The compiler handles all operations
1226
     with NULL libfuncs by converting the SFmode.  */
1227
  switch (arm_fp16_format)
1228
    {
1229
    case ARM_FP16_FORMAT_IEEE:
1230
    case ARM_FP16_FORMAT_ALTERNATIVE:
1231
 
1232
      /* Conversions.  */
1233
      set_conv_libfunc (trunc_optab, HFmode, SFmode,
1234
                        (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1235
                         ? "__gnu_f2h_ieee"
1236
                         : "__gnu_f2h_alternative"));
1237
      set_conv_libfunc (sext_optab, SFmode, HFmode,
1238
                        (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1239
                         ? "__gnu_h2f_ieee"
1240
                         : "__gnu_h2f_alternative"));
1241
 
1242
      /* Arithmetic.  */
1243
      set_optab_libfunc (add_optab, HFmode, NULL);
1244
      set_optab_libfunc (sdiv_optab, HFmode, NULL);
1245
      set_optab_libfunc (smul_optab, HFmode, NULL);
1246
      set_optab_libfunc (neg_optab, HFmode, NULL);
1247
      set_optab_libfunc (sub_optab, HFmode, NULL);
1248
 
1249
      /* Comparisons.  */
1250
      set_optab_libfunc (eq_optab, HFmode, NULL);
1251
      set_optab_libfunc (ne_optab, HFmode, NULL);
1252
      set_optab_libfunc (lt_optab, HFmode, NULL);
1253
      set_optab_libfunc (le_optab, HFmode, NULL);
1254
      set_optab_libfunc (ge_optab, HFmode, NULL);
1255
      set_optab_libfunc (gt_optab, HFmode, NULL);
1256
      set_optab_libfunc (unord_optab, HFmode, NULL);
1257
      break;
1258
 
1259
    default:
1260
      break;
1261
    }
1262
 
1263
  /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
1264
  {
1265
    const arm_fixed_mode_set fixed_arith_modes[] =
1266
      {
1267
        { QQmode, "qq" },
1268
        { UQQmode, "uqq" },
1269
        { HQmode, "hq" },
1270
        { UHQmode, "uhq" },
1271
        { SQmode, "sq" },
1272
        { USQmode, "usq" },
1273
        { DQmode, "dq" },
1274
        { UDQmode, "udq" },
1275
        { TQmode, "tq" },
1276
        { UTQmode, "utq" },
1277
        { HAmode, "ha" },
1278
        { UHAmode, "uha" },
1279
        { SAmode, "sa" },
1280
        { USAmode, "usa" },
1281
        { DAmode, "da" },
1282
        { UDAmode, "uda" },
1283
        { TAmode, "ta" },
1284
        { UTAmode, "uta" }
1285
      };
1286
    const arm_fixed_mode_set fixed_conv_modes[] =
1287
      {
1288
        { QQmode, "qq" },
1289
        { UQQmode, "uqq" },
1290
        { HQmode, "hq" },
1291
        { UHQmode, "uhq" },
1292
        { SQmode, "sq" },
1293
        { USQmode, "usq" },
1294
        { DQmode, "dq" },
1295
        { UDQmode, "udq" },
1296
        { TQmode, "tq" },
1297
        { UTQmode, "utq" },
1298
        { HAmode, "ha" },
1299
        { UHAmode, "uha" },
1300
        { SAmode, "sa" },
1301
        { USAmode, "usa" },
1302
        { DAmode, "da" },
1303
        { UDAmode, "uda" },
1304
        { TAmode, "ta" },
1305
        { UTAmode, "uta" },
1306
        { QImode, "qi" },
1307
        { HImode, "hi" },
1308
        { SImode, "si" },
1309
        { DImode, "di" },
1310
        { TImode, "ti" },
1311
        { SFmode, "sf" },
1312
        { DFmode, "df" }
1313
      };
1314
    unsigned int i, j;
1315
 
1316
    for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1317
      {
1318
        arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1319
                                     "add", fixed_arith_modes[i].name, 3);
1320
        arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1321
                                     "ssadd", fixed_arith_modes[i].name, 3);
1322
        arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1323
                                     "usadd", fixed_arith_modes[i].name, 3);
1324
        arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1325
                                     "sub", fixed_arith_modes[i].name, 3);
1326
        arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1327
                                     "sssub", fixed_arith_modes[i].name, 3);
1328
        arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1329
                                     "ussub", fixed_arith_modes[i].name, 3);
1330
        arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1331
                                     "mul", fixed_arith_modes[i].name, 3);
1332
        arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1333
                                     "ssmul", fixed_arith_modes[i].name, 3);
1334
        arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1335
                                     "usmul", fixed_arith_modes[i].name, 3);
1336
        arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1337
                                     "div", fixed_arith_modes[i].name, 3);
1338
        arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1339
                                     "udiv", fixed_arith_modes[i].name, 3);
1340
        arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1341
                                     "ssdiv", fixed_arith_modes[i].name, 3);
1342
        arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1343
                                     "usdiv", fixed_arith_modes[i].name, 3);
1344
        arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1345
                                     "neg", fixed_arith_modes[i].name, 2);
1346
        arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1347
                                     "ssneg", fixed_arith_modes[i].name, 2);
1348
        arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1349
                                     "usneg", fixed_arith_modes[i].name, 2);
1350
        arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1351
                                     "ashl", fixed_arith_modes[i].name, 3);
1352
        arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1353
                                     "ashr", fixed_arith_modes[i].name, 3);
1354
        arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1355
                                     "lshr", fixed_arith_modes[i].name, 3);
1356
        arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1357
                                     "ssashl", fixed_arith_modes[i].name, 3);
1358
        arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1359
                                     "usashl", fixed_arith_modes[i].name, 3);
1360
        arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1361
                                     "cmp", fixed_arith_modes[i].name, 2);
1362
      }
1363
 
1364
    for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1365
      for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1366
        {
1367
          if (i == j
1368
              || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1369
                  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1370
            continue;
1371
 
1372
          arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1373
                                      fixed_conv_modes[j].mode, "fract",
1374
                                      fixed_conv_modes[i].name,
1375
                                      fixed_conv_modes[j].name);
1376
          arm_set_fixed_conv_libfunc (satfract_optab,
1377
                                      fixed_conv_modes[i].mode,
1378
                                      fixed_conv_modes[j].mode, "satfract",
1379
                                      fixed_conv_modes[i].name,
1380
                                      fixed_conv_modes[j].name);
1381
          arm_set_fixed_conv_libfunc (fractuns_optab,
1382
                                      fixed_conv_modes[i].mode,
1383
                                      fixed_conv_modes[j].mode, "fractuns",
1384
                                      fixed_conv_modes[i].name,
1385
                                      fixed_conv_modes[j].name);
1386
          arm_set_fixed_conv_libfunc (satfractuns_optab,
1387
                                      fixed_conv_modes[i].mode,
1388
                                      fixed_conv_modes[j].mode, "satfractuns",
1389
                                      fixed_conv_modes[i].name,
1390
                                      fixed_conv_modes[j].name);
1391
        }
1392
  }
1393
 
1394
  if (TARGET_AAPCS_BASED)
1395
    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1396
}
1397
 
1398
/* On AAPCS systems, this is the "struct __va_list".  */
1399
static GTY(()) tree va_list_type;
1400
 
1401
/* Return the type to use as __builtin_va_list.  */
1402
static tree
1403
arm_build_builtin_va_list (void)
1404
{
1405
  tree va_list_name;
1406
  tree ap_field;
1407
 
1408
  if (!TARGET_AAPCS_BASED)
1409
    return std_build_builtin_va_list ();
1410
 
1411
  /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1412
     defined as:
1413
 
1414
       struct __va_list
1415
       {
1416
         void *__ap;
1417
       };
1418
 
1419
     The C Library ABI further reinforces this definition in \S
1420
     4.1.
1421
 
1422
     We must follow this definition exactly.  The structure tag
1423
     name is visible in C++ mangled names, and thus forms a part
1424
     of the ABI.  The field name may be used by people who
1425
     #include <stdarg.h>.  */
1426
  /* Create the type.  */
1427
  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1428
  /* Give it the required name.  */
1429
  va_list_name = build_decl (BUILTINS_LOCATION,
1430
                             TYPE_DECL,
1431
                             get_identifier ("__va_list"),
1432
                             va_list_type);
1433
  DECL_ARTIFICIAL (va_list_name) = 1;
1434
  TYPE_NAME (va_list_type) = va_list_name;
1435
  TYPE_STUB_DECL (va_list_type) = va_list_name;
1436
  /* Create the __ap field.  */
1437
  ap_field = build_decl (BUILTINS_LOCATION,
1438
                         FIELD_DECL,
1439
                         get_identifier ("__ap"),
1440
                         ptr_type_node);
1441
  DECL_ARTIFICIAL (ap_field) = 1;
1442
  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1443
  TYPE_FIELDS (va_list_type) = ap_field;
1444
  /* Compute its layout.  */
1445
  layout_type (va_list_type);
1446
 
1447
  return va_list_type;
1448
}
1449
 
1450
/* Return an expression of type "void *" pointing to the next
1451
   available argument in a variable-argument list.  VALIST is the
1452
   user-level va_list object, of type __builtin_va_list.  */
1453
static tree
1454
arm_extract_valist_ptr (tree valist)
1455
{
1456
  if (TREE_TYPE (valist) == error_mark_node)
1457
    return error_mark_node;
1458
 
1459
  /* On an AAPCS target, the pointer is stored within "struct
1460
     va_list".  */
1461
  if (TARGET_AAPCS_BASED)
1462
    {
1463
      tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1464
      valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1465
                       valist, ap_field, NULL_TREE);
1466
    }
1467
 
1468
  return valist;
1469
}
1470
 
1471
/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
1472
static void
1473
arm_expand_builtin_va_start (tree valist, rtx nextarg)
1474
{
1475
  valist = arm_extract_valist_ptr (valist);
1476
  std_expand_builtin_va_start (valist, nextarg);
1477
}
1478
 
1479
/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
1480
static tree
1481
arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1482
                          gimple_seq *post_p)
1483
{
1484
  valist = arm_extract_valist_ptr (valist);
1485
  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1486
}
1487
 
1488
/* Fix up any incompatible options that the user has specified.  */
1489
static void
1490
arm_option_override (void)
1491
{
1492
  if (global_options_set.x_arm_arch_option)
1493
    arm_selected_arch = &all_architectures[arm_arch_option];
1494
 
1495
  if (global_options_set.x_arm_cpu_option)
1496
    arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1497
 
1498
  if (global_options_set.x_arm_tune_option)
1499
    arm_selected_tune = &all_cores[(int) arm_tune_option];
1500
 
1501
#ifdef SUBTARGET_OVERRIDE_OPTIONS
1502
  SUBTARGET_OVERRIDE_OPTIONS;
1503
#endif
1504
 
1505
  if (arm_selected_arch)
1506
    {
1507
      if (arm_selected_cpu)
1508
        {
1509
          /* Check for conflict between mcpu and march.  */
1510
          if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1511
            {
1512
              warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1513
                       arm_selected_cpu->name, arm_selected_arch->name);
1514
              /* -march wins for code generation.
1515
                 -mcpu wins for default tuning.  */
1516
              if (!arm_selected_tune)
1517
                arm_selected_tune = arm_selected_cpu;
1518
 
1519
              arm_selected_cpu = arm_selected_arch;
1520
            }
1521
          else
1522
            /* -mcpu wins.  */
1523
            arm_selected_arch = NULL;
1524
        }
1525
      else
1526
        /* Pick a CPU based on the architecture.  */
1527
        arm_selected_cpu = arm_selected_arch;
1528
    }
1529
 
1530
  /* If the user did not specify a processor, choose one for them.  */
1531
  if (!arm_selected_cpu)
1532
    {
1533
      const struct processors * sel;
1534
      unsigned int        sought;
1535
 
1536
      arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1537
      if (!arm_selected_cpu->name)
1538
        {
1539
#ifdef SUBTARGET_CPU_DEFAULT
1540
          /* Use the subtarget default CPU if none was specified by
1541
             configure.  */
1542
          arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1543
#endif
1544
          /* Default to ARM6.  */
1545
          if (!arm_selected_cpu->name)
1546
            arm_selected_cpu = &all_cores[arm6];
1547
        }
1548
 
1549
      sel = arm_selected_cpu;
1550
      insn_flags = sel->flags;
1551
 
1552
      /* Now check to see if the user has specified some command line
1553
         switch that require certain abilities from the cpu.  */
1554
      sought = 0;
1555
 
1556
      if (TARGET_INTERWORK || TARGET_THUMB)
1557
        {
1558
          sought |= (FL_THUMB | FL_MODE32);
1559
 
1560
          /* There are no ARM processors that support both APCS-26 and
1561
             interworking.  Therefore we force FL_MODE26 to be removed
1562
             from insn_flags here (if it was set), so that the search
1563
             below will always be able to find a compatible processor.  */
1564
          insn_flags &= ~FL_MODE26;
1565
        }
1566
 
1567
      if (sought != 0 && ((sought & insn_flags) != sought))
1568
        {
1569
          /* Try to locate a CPU type that supports all of the abilities
1570
             of the default CPU, plus the extra abilities requested by
1571
             the user.  */
1572
          for (sel = all_cores; sel->name != NULL; sel++)
1573
            if ((sel->flags & sought) == (sought | insn_flags))
1574
              break;
1575
 
1576
          if (sel->name == NULL)
1577
            {
1578
              unsigned current_bit_count = 0;
1579
              const struct processors * best_fit = NULL;
1580
 
1581
              /* Ideally we would like to issue an error message here
1582
                 saying that it was not possible to find a CPU compatible
1583
                 with the default CPU, but which also supports the command
1584
                 line options specified by the programmer, and so they
1585
                 ought to use the -mcpu=<name> command line option to
1586
                 override the default CPU type.
1587
 
1588
                 If we cannot find a cpu that has both the
1589
                 characteristics of the default cpu and the given
1590
                 command line options we scan the array again looking
1591
                 for a best match.  */
1592
              for (sel = all_cores; sel->name != NULL; sel++)
1593
                if ((sel->flags & sought) == sought)
1594
                  {
1595
                    unsigned count;
1596
 
1597
                    count = bit_count (sel->flags & insn_flags);
1598
 
1599
                    if (count >= current_bit_count)
1600
                      {
1601
                        best_fit = sel;
1602
                        current_bit_count = count;
1603
                      }
1604
                  }
1605
 
1606
              gcc_assert (best_fit);
1607
              sel = best_fit;
1608
            }
1609
 
1610
          arm_selected_cpu = sel;
1611
        }
1612
    }
1613
 
1614
  gcc_assert (arm_selected_cpu);
1615
  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
1616
  if (!arm_selected_tune)
1617
    arm_selected_tune = &all_cores[arm_selected_cpu->core];
1618
 
1619
  sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1620
  insn_flags = arm_selected_cpu->flags;
1621
 
1622
  arm_tune = arm_selected_tune->core;
1623
  tune_flags = arm_selected_tune->flags;
1624
  current_tune = arm_selected_tune->tune;
1625
 
1626
  /* Make sure that the processor choice does not conflict with any of the
1627
     other command line choices.  */
1628
  if (TARGET_ARM && !(insn_flags & FL_NOTM))
1629
    error ("target CPU does not support ARM mode");
1630
 
1631
  /* BPABI targets use linker tricks to allow interworking on cores
1632
     without thumb support.  */
1633
  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1634
    {
1635
      warning (0, "target CPU does not support interworking" );
1636
      target_flags &= ~MASK_INTERWORK;
1637
    }
1638
 
1639
  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1640
    {
1641
      warning (0, "target CPU does not support THUMB instructions");
1642
      target_flags &= ~MASK_THUMB;
1643
    }
1644
 
1645
  if (TARGET_APCS_FRAME && TARGET_THUMB)
1646
    {
1647
      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1648
      target_flags &= ~MASK_APCS_FRAME;
1649
    }
1650
 
1651
  /* Callee super interworking implies thumb interworking.  Adding
1652
     this to the flags here simplifies the logic elsewhere.  */
1653
  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1654
    target_flags |= MASK_INTERWORK;
1655
 
1656
  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1657
     from here where no function is being compiled currently.  */
1658
  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1659
    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1660
 
1661
  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1662
    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1663
 
1664
  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1665
    {
1666
      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1667
      target_flags |= MASK_APCS_FRAME;
1668
    }
1669
 
1670
  if (TARGET_POKE_FUNCTION_NAME)
1671
    target_flags |= MASK_APCS_FRAME;
1672
 
1673
  if (TARGET_APCS_REENT && flag_pic)
1674
    error ("-fpic and -mapcs-reent are incompatible");
1675
 
1676
  if (TARGET_APCS_REENT)
1677
    warning (0, "APCS reentrant code not supported.  Ignored");
1678
 
1679
  /* If this target is normally configured to use APCS frames, warn if they
1680
     are turned off and debugging is turned on.  */
1681
  if (TARGET_ARM
1682
      && write_symbols != NO_DEBUG
1683
      && !TARGET_APCS_FRAME
1684
      && (TARGET_DEFAULT & MASK_APCS_FRAME))
1685
    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1686
 
1687
  if (TARGET_APCS_FLOAT)
1688
    warning (0, "passing floating point arguments in fp regs not yet supported");
1689
 
1690
  if (TARGET_LITTLE_WORDS)
1691
    warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1692
             "will be removed in a future release");
1693
 
1694
  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
1695
  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1696
  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1697
  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1698
  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1699
  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1700
  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1701
  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1702
  arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1703
  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1704
  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1705
  arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1706
  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1707
  arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1708
 
1709
  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1710
  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1711
  thumb_code = TARGET_ARM == 0;
1712
  thumb1_code = TARGET_THUMB1 != 0;
1713
  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1714
  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1715
  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1716
  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1717
  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1718
  arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1719
 
1720
  /* If we are not using the default (ARM mode) section anchor offset
1721
     ranges, then set the correct ranges now.  */
1722
  if (TARGET_THUMB1)
1723
    {
1724
      /* Thumb-1 LDR instructions cannot have negative offsets.
1725
         Permissible positive offset ranges are 5-bit (for byte loads),
1726
         6-bit (for halfword loads), or 7-bit (for word loads).
1727
         Empirical results suggest a 7-bit anchor range gives the best
1728
         overall code size.  */
1729
      targetm.min_anchor_offset = 0;
1730
      targetm.max_anchor_offset = 127;
1731
    }
1732
  else if (TARGET_THUMB2)
1733
    {
1734
      /* The minimum is set such that the total size of the block
1735
         for a particular anchor is 248 + 1 + 4095 bytes, which is
1736
         divisible by eight, ensuring natural spacing of anchors.  */
1737
      targetm.min_anchor_offset = -248;
1738
      targetm.max_anchor_offset = 4095;
1739
    }
1740
 
1741
  /* V5 code we generate is completely interworking capable, so we turn off
1742
     TARGET_INTERWORK here to avoid many tests later on.  */
1743
 
1744
  /* XXX However, we must pass the right pre-processor defines to CPP
1745
     or GLD can get confused.  This is a hack.  */
1746
  if (TARGET_INTERWORK)
1747
    arm_cpp_interwork = 1;
1748
 
1749
  if (arm_arch5)
1750
    target_flags &= ~MASK_INTERWORK;
1751
 
1752
  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1753
    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1754
 
1755
  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1756
    error ("iwmmxt abi requires an iwmmxt capable cpu");
1757
 
1758
  if (!global_options_set.x_arm_fpu_index)
1759
    {
1760
      const char *target_fpu_name;
1761
      bool ok;
1762
 
1763
#ifdef FPUTYPE_DEFAULT
1764
      target_fpu_name = FPUTYPE_DEFAULT;
1765
#else
1766
      if (arm_arch_cirrus)
1767
        target_fpu_name = "maverick";
1768
      else
1769
        target_fpu_name = "fpe2";
1770
#endif
1771
 
1772
      ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1773
                                  CL_TARGET);
1774
      gcc_assert (ok);
1775
    }
1776
 
1777
  arm_fpu_desc = &all_fpus[arm_fpu_index];
1778
 
1779
  switch (arm_fpu_desc->model)
1780
    {
1781
    case ARM_FP_MODEL_FPA:
1782
      if (arm_fpu_desc->rev == 2)
1783
        arm_fpu_attr = FPU_FPE2;
1784
      else if (arm_fpu_desc->rev == 3)
1785
        arm_fpu_attr = FPU_FPE3;
1786
      else
1787
        arm_fpu_attr = FPU_FPA;
1788
      break;
1789
 
1790
    case ARM_FP_MODEL_MAVERICK:
1791
      arm_fpu_attr = FPU_MAVERICK;
1792
      break;
1793
 
1794
    case ARM_FP_MODEL_VFP:
1795
      arm_fpu_attr = FPU_VFP;
1796
      break;
1797
 
1798
    default:
1799
      gcc_unreachable();
1800
    }
1801
 
1802
  if (TARGET_AAPCS_BASED
1803
      && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1804
    error ("FPA is unsupported in the AAPCS");
1805
 
1806
  if (TARGET_AAPCS_BASED)
1807
    {
1808
      if (TARGET_CALLER_INTERWORKING)
1809
        error ("AAPCS does not support -mcaller-super-interworking");
1810
      else
1811
        if (TARGET_CALLEE_INTERWORKING)
1812
          error ("AAPCS does not support -mcallee-super-interworking");
1813
    }
1814
 
1815
  /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1816
     VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1817
     will ever exist.  GCC makes no attempt to support this combination.  */
1818
  if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1819
    sorry ("iWMMXt and hardware floating point");
1820
 
1821
  /* ??? iWMMXt insn patterns need auditing for Thumb-2.  */
1822
  if (TARGET_THUMB2 && TARGET_IWMMXT)
1823
    sorry ("Thumb-2 iWMMXt");
1824
 
1825
  /* __fp16 support currently assumes the core has ldrh.  */
1826
  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1827
    sorry ("__fp16 and no ldrh");
1828
 
1829
  /* If soft-float is specified then don't use FPU.  */
1830
  if (TARGET_SOFT_FLOAT)
1831
    arm_fpu_attr = FPU_NONE;
1832
 
1833
  if (TARGET_AAPCS_BASED)
1834
    {
1835
      if (arm_abi == ARM_ABI_IWMMXT)
1836
        arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1837
      else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1838
               && TARGET_HARD_FLOAT
1839
               && TARGET_VFP)
1840
        arm_pcs_default = ARM_PCS_AAPCS_VFP;
1841
      else
1842
        arm_pcs_default = ARM_PCS_AAPCS;
1843
    }
1844
  else
1845
    {
1846
      if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1847
        sorry ("-mfloat-abi=hard and VFP");
1848
 
1849
      if (arm_abi == ARM_ABI_APCS)
1850
        arm_pcs_default = ARM_PCS_APCS;
1851
      else
1852
        arm_pcs_default = ARM_PCS_ATPCS;
1853
    }
1854
 
1855
  /* For arm2/3 there is no need to do any scheduling if there is only
1856
     a floating point emulator, or we are doing software floating-point.  */
1857
  if ((TARGET_SOFT_FLOAT
1858
       || (TARGET_FPA && arm_fpu_desc->rev))
1859
      && (tune_flags & FL_MODE32) == 0)
1860
    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1861
 
1862
  /* Use the cp15 method if it is available.  */
1863
  if (target_thread_pointer == TP_AUTO)
1864
    {
1865
      if (arm_arch6k && !TARGET_THUMB1)
1866
        target_thread_pointer = TP_CP15;
1867
      else
1868
        target_thread_pointer = TP_SOFT;
1869
    }
1870
 
1871
  if (TARGET_HARD_TP && TARGET_THUMB1)
1872
    error ("can not use -mtp=cp15 with 16-bit Thumb");
1873
 
1874
  /* Override the default structure alignment for AAPCS ABI.  */
1875
  if (!global_options_set.x_arm_structure_size_boundary)
1876
    {
1877
      if (TARGET_AAPCS_BASED)
1878
        arm_structure_size_boundary = 8;
1879
    }
1880
  else
1881
    {
1882
      if (arm_structure_size_boundary != 8
1883
          && arm_structure_size_boundary != 32
1884
          && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1885
        {
1886
          if (ARM_DOUBLEWORD_ALIGN)
1887
            warning (0,
1888
                     "structure size boundary can only be set to 8, 32 or 64");
1889
          else
1890
            warning (0, "structure size boundary can only be set to 8 or 32");
1891
          arm_structure_size_boundary
1892
            = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1893
        }
1894
    }
1895
 
1896
  if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1897
    {
1898
      error ("RTP PIC is incompatible with Thumb");
1899
      flag_pic = 0;
1900
    }
1901
 
1902
  /* If stack checking is disabled, we can use r10 as the PIC register,
1903
     which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
1904
  if (flag_pic && TARGET_SINGLE_PIC_BASE)
1905
    {
1906
      if (TARGET_VXWORKS_RTP)
1907
        warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1908
      arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1909
    }
1910
 
1911
  if (flag_pic && TARGET_VXWORKS_RTP)
1912
    arm_pic_register = 9;
1913
 
1914
  if (arm_pic_register_string != NULL)
1915
    {
1916
      int pic_register = decode_reg_name (arm_pic_register_string);
1917
 
1918
      if (!flag_pic)
1919
        warning (0, "-mpic-register= is useless without -fpic");
1920
 
1921
      /* Prevent the user from choosing an obviously stupid PIC register.  */
1922
      else if (pic_register < 0 || call_used_regs[pic_register]
1923
               || pic_register == HARD_FRAME_POINTER_REGNUM
1924
               || pic_register == STACK_POINTER_REGNUM
1925
               || pic_register >= PC_REGNUM
1926
               || (TARGET_VXWORKS_RTP
1927
                   && (unsigned int) pic_register != arm_pic_register))
1928
        error ("unable to use '%s' for PIC register", arm_pic_register_string);
1929
      else
1930
        arm_pic_register = pic_register;
1931
    }
1932
 
1933
  /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
1934
  if (fix_cm3_ldrd == 2)
1935
    {
1936
      if (arm_selected_cpu->core == cortexm3)
1937
        fix_cm3_ldrd = 1;
1938
      else
1939
        fix_cm3_ldrd = 0;
1940
    }
1941
 
1942
  /* Enable -munaligned-access by default for
1943
     - all ARMv6 architecture-based processors
1944
     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1945
 
1946
     Disable -munaligned-access by default for
1947
     - all pre-ARMv6 architecture-based processors
1948
     - ARMv6-M architecture-based processors.  */
1949
 
1950
  if (unaligned_access == 2)
1951
    {
1952
      if (arm_arch6 && (arm_arch_notm || arm_arch7))
1953
        unaligned_access = 1;
1954
      else
1955
        unaligned_access = 0;
1956
    }
1957
  else if (unaligned_access == 1
1958
           && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1959
    {
1960
      warning (0, "target CPU does not support unaligned accesses");
1961
      unaligned_access = 0;
1962
    }
1963
 
1964
  if (TARGET_THUMB1 && flag_schedule_insns)
1965
    {
1966
      /* Don't warn since it's on by default in -O2.  */
1967
      flag_schedule_insns = 0;
1968
    }
1969
 
1970
  if (optimize_size)
1971
    {
1972
      /* If optimizing for size, bump the number of instructions that we
1973
         are prepared to conditionally execute (even on a StrongARM).  */
1974
      max_insns_skipped = 6;
1975
    }
1976
  else
1977
    max_insns_skipped = current_tune->max_insns_skipped;
1978
 
1979
  /* Hot/Cold partitioning is not currently supported, since we can't
1980
     handle literal pool placement in that case.  */
1981
  if (flag_reorder_blocks_and_partition)
1982
    {
1983
      inform (input_location,
1984
              "-freorder-blocks-and-partition not supported on this architecture");
1985
      flag_reorder_blocks_and_partition = 0;
1986
      flag_reorder_blocks = 1;
1987
    }
1988
 
1989
  if (flag_pic)
1990
    /* Hoisting PIC address calculations more aggressively provides a small,
1991
       but measurable, size reduction for PIC code.  Therefore, we decrease
1992
       the bar for unrestricted expression hoisting to the cost of PIC address
1993
       calculation, which is 2 instructions.  */
1994
    maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1995
                           global_options.x_param_values,
1996
                           global_options_set.x_param_values);
1997
 
1998
  /* ARM EABI defaults to strict volatile bitfields.  */
1999
  if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2000
      && abi_version_at_least(2))
2001
    flag_strict_volatile_bitfields = 1;
2002
 
2003
  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2004
     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
2005
  if (flag_prefetch_loop_arrays < 0
2006
      && HAVE_prefetch
2007
      && optimize >= 3
2008
      && current_tune->num_prefetch_slots > 0)
2009
    flag_prefetch_loop_arrays = 1;
2010
 
2011
  /* Set up parameters to be used in prefetching algorithm.  Do not override the
2012
     defaults unless we are tuning for a core we have researched values for.  */
2013
  if (current_tune->num_prefetch_slots > 0)
2014
    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2015
                           current_tune->num_prefetch_slots,
2016
                           global_options.x_param_values,
2017
                           global_options_set.x_param_values);
2018
  if (current_tune->l1_cache_line_size >= 0)
2019
    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2020
                           current_tune->l1_cache_line_size,
2021
                           global_options.x_param_values,
2022
                           global_options_set.x_param_values);
2023
  if (current_tune->l1_cache_size >= 0)
2024
    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2025
                           current_tune->l1_cache_size,
2026
                           global_options.x_param_values,
2027
                           global_options_set.x_param_values);
2028
 
2029
  /* Register global variables with the garbage collector.  */
2030
  arm_add_gc_roots ();
2031
}
2032
 
2033
static void
2034
arm_add_gc_roots (void)
2035
{
2036
  gcc_obstack_init(&minipool_obstack);
2037
  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2038
}
2039
 
2040
/* A table of known ARM exception types.
2041
   For use with the interrupt function attribute.  */
2042
 
2043
typedef struct
2044
{
2045
  const char *const arg;
2046
  const unsigned long return_value;
2047
}
2048
isr_attribute_arg;
2049
 
2050
static const isr_attribute_arg isr_attribute_args [] =
2051
{
2052
  { "IRQ",   ARM_FT_ISR },
2053
  { "irq",   ARM_FT_ISR },
2054
  { "FIQ",   ARM_FT_FIQ },
2055
  { "fiq",   ARM_FT_FIQ },
2056
  { "ABORT", ARM_FT_ISR },
2057
  { "abort", ARM_FT_ISR },
2058
  { "ABORT", ARM_FT_ISR },
2059
  { "abort", ARM_FT_ISR },
2060
  { "UNDEF", ARM_FT_EXCEPTION },
2061
  { "undef", ARM_FT_EXCEPTION },
2062
  { "SWI",   ARM_FT_EXCEPTION },
2063
  { "swi",   ARM_FT_EXCEPTION },
2064
  { NULL,    ARM_FT_NORMAL }
2065
};
2066
 
2067
/* Returns the (interrupt) function type of the current
2068
   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
2069
 
2070
static unsigned long
2071
arm_isr_value (tree argument)
2072
{
2073
  const isr_attribute_arg * ptr;
2074
  const char *              arg;
2075
 
2076
  if (!arm_arch_notm)
2077
    return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2078
 
2079
  /* No argument - default to IRQ.  */
2080
  if (argument == NULL_TREE)
2081
    return ARM_FT_ISR;
2082
 
2083
  /* Get the value of the argument.  */
2084
  if (TREE_VALUE (argument) == NULL_TREE
2085
      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2086
    return ARM_FT_UNKNOWN;
2087
 
2088
  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2089
 
2090
  /* Check it against the list of known arguments.  */
2091
  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2092
    if (streq (arg, ptr->arg))
2093
      return ptr->return_value;
2094
 
2095
  /* An unrecognized interrupt type.  */
2096
  return ARM_FT_UNKNOWN;
2097
}
2098
 
2099
/* Computes the type of the current function.  */
2100
 
2101
static unsigned long
2102
arm_compute_func_type (void)
2103
{
2104
  unsigned long type = ARM_FT_UNKNOWN;
2105
  tree a;
2106
  tree attr;
2107
 
2108
  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2109
 
2110
  /* Decide if the current function is volatile.  Such functions
2111
     never return, and many memory cycles can be saved by not storing
2112
     register values that will never be needed again.  This optimization
2113
     was added to speed up context switching in a kernel application.  */
2114
  if (optimize > 0
2115
      && (TREE_NOTHROW (current_function_decl)
2116
          || !(flag_unwind_tables
2117
               || (flag_exceptions
2118
                   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2119
      && TREE_THIS_VOLATILE (current_function_decl))
2120
    type |= ARM_FT_VOLATILE;
2121
 
2122
  if (cfun->static_chain_decl != NULL)
2123
    type |= ARM_FT_NESTED;
2124
 
2125
  attr = DECL_ATTRIBUTES (current_function_decl);
2126
 
2127
  a = lookup_attribute ("naked", attr);
2128
  if (a != NULL_TREE)
2129
    type |= ARM_FT_NAKED;
2130
 
2131
  a = lookup_attribute ("isr", attr);
2132
  if (a == NULL_TREE)
2133
    a = lookup_attribute ("interrupt", attr);
2134
 
2135
  if (a == NULL_TREE)
2136
    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2137
  else
2138
    type |= arm_isr_value (TREE_VALUE (a));
2139
 
2140
  return type;
2141
}
2142
 
2143
/* Returns the type of the current function.  */
2144
 
2145
unsigned long
2146
arm_current_func_type (void)
2147
{
2148
  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2149
    cfun->machine->func_type = arm_compute_func_type ();
2150
 
2151
  return cfun->machine->func_type;
2152
}
2153
 
2154
bool
2155
arm_allocate_stack_slots_for_args (void)
2156
{
2157
  /* Naked functions should not allocate stack slots for arguments.  */
2158
  return !IS_NAKED (arm_current_func_type ());
2159
}
2160
 
2161
 
2162
/* Output assembler code for a block containing the constant parts
2163
   of a trampoline, leaving space for the variable parts.
2164
 
2165
   On the ARM, (if r8 is the static chain regnum, and remembering that
2166
   referencing pc adds an offset of 8) the trampoline looks like:
2167
           ldr          r8, [pc, #0]
2168
           ldr          pc, [pc]
2169
           .word        static chain value
2170
           .word        function's address
2171
   XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
2172
 
2173
static void
2174
arm_asm_trampoline_template (FILE *f)
2175
{
2176
  if (TARGET_ARM)
2177
    {
2178
      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2179
      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2180
    }
2181
  else if (TARGET_THUMB2)
2182
    {
2183
      /* The Thumb-2 trampoline is similar to the arm implementation.
2184
         Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
2185
      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2186
                   STATIC_CHAIN_REGNUM, PC_REGNUM);
2187
      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2188
    }
2189
  else
2190
    {
2191
      ASM_OUTPUT_ALIGN (f, 2);
2192
      fprintf (f, "\t.code\t16\n");
2193
      fprintf (f, ".Ltrampoline_start:\n");
2194
      asm_fprintf (f, "\tpush\t{r0, r1}\n");
2195
      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2196
      asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2197
      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2198
      asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2199
      asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2200
    }
2201
  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2202
  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2203
}
2204
 
2205
/* Emit RTL insns to initialize the variable parts of a trampoline.  */
2206
 
2207
static void
2208
arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2209
{
2210
  rtx fnaddr, mem, a_tramp;
2211
 
2212
  emit_block_move (m_tramp, assemble_trampoline_template (),
2213
                   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2214
 
2215
  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2216
  emit_move_insn (mem, chain_value);
2217
 
2218
  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2219
  fnaddr = XEXP (DECL_RTL (fndecl), 0);
2220
  emit_move_insn (mem, fnaddr);
2221
 
2222
  a_tramp = XEXP (m_tramp, 0);
2223
  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2224
                     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2225
                     plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2226
}
2227
 
2228
/* Thumb trampolines should be entered in thumb mode, so set
2229
   the bottom bit of the address.  */
2230
 
2231
static rtx
2232
arm_trampoline_adjust_address (rtx addr)
2233
{
2234
  if (TARGET_THUMB)
2235
    addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2236
                                NULL, 0, OPTAB_LIB_WIDEN);
2237
  return addr;
2238
}
2239
 
2240
/* Return 1 if it is possible to return using a single instruction.
2241
   If SIBLING is non-null, this is a test for a return before a sibling
2242
   call.  SIBLING is the call insn, so we can examine its register usage.  */
2243
 
2244
int
2245
use_return_insn (int iscond, rtx sibling)
2246
{
2247
  int regno;
2248
  unsigned int func_type;
2249
  unsigned long saved_int_regs;
2250
  unsigned HOST_WIDE_INT stack_adjust;
2251
  arm_stack_offsets *offsets;
2252
 
2253
  /* Never use a return instruction before reload has run.  */
2254
  if (!reload_completed)
2255
    return 0;
2256
 
2257
  func_type = arm_current_func_type ();
2258
 
2259
  /* Naked, volatile and stack alignment functions need special
2260
     consideration.  */
2261
  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2262
    return 0;
2263
 
2264
  /* So do interrupt functions that use the frame pointer and Thumb
2265
     interrupt functions.  */
2266
  if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2267
    return 0;
2268
 
2269
  offsets = arm_get_frame_offsets ();
2270
  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2271
 
2272
  /* As do variadic functions.  */
2273
  if (crtl->args.pretend_args_size
2274
      || cfun->machine->uses_anonymous_args
2275
      /* Or if the function calls __builtin_eh_return () */
2276
      || crtl->calls_eh_return
2277
      /* Or if the function calls alloca */
2278
      || cfun->calls_alloca
2279
      /* Or if there is a stack adjustment.  However, if the stack pointer
2280
         is saved on the stack, we can use a pre-incrementing stack load.  */
2281
      || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2282
                                 && stack_adjust == 4)))
2283
    return 0;
2284
 
2285
  saved_int_regs = offsets->saved_regs_mask;
2286
 
2287
  /* Unfortunately, the insn
2288
 
2289
       ldmib sp, {..., sp, ...}
2290
 
2291
     triggers a bug on most SA-110 based devices, such that the stack
2292
     pointer won't be correctly restored if the instruction takes a
2293
     page fault.  We work around this problem by popping r3 along with
2294
     the other registers, since that is never slower than executing
2295
     another instruction.
2296
 
2297
     We test for !arm_arch5 here, because code for any architecture
2298
     less than this could potentially be run on one of the buggy
2299
     chips.  */
2300
  if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2301
    {
2302
      /* Validate that r3 is a call-clobbered register (always true in
2303
         the default abi) ...  */
2304
      if (!call_used_regs[3])
2305
        return 0;
2306
 
2307
      /* ... that it isn't being used for a return value ... */
2308
      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2309
        return 0;
2310
 
2311
      /* ... or for a tail-call argument ...  */
2312
      if (sibling)
2313
        {
2314
          gcc_assert (GET_CODE (sibling) == CALL_INSN);
2315
 
2316
          if (find_regno_fusage (sibling, USE, 3))
2317
            return 0;
2318
        }
2319
 
2320
      /* ... and that there are no call-saved registers in r0-r2
2321
         (always true in the default ABI).  */
2322
      if (saved_int_regs & 0x7)
2323
        return 0;
2324
    }
2325
 
2326
  /* Can't be done if interworking with Thumb, and any registers have been
2327
     stacked.  */
2328
  if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2329
    return 0;
2330
 
2331
  /* On StrongARM, conditional returns are expensive if they aren't
2332
     taken and multiple registers have been stacked.  */
2333
  if (iscond && arm_tune_strongarm)
2334
    {
2335
      /* Conditional return when just the LR is stored is a simple
2336
         conditional-load instruction, that's not expensive.  */
2337
      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2338
        return 0;
2339
 
2340
      if (flag_pic
2341
          && arm_pic_register != INVALID_REGNUM
2342
          && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2343
        return 0;
2344
    }
2345
 
2346
  /* If there are saved registers but the LR isn't saved, then we need
2347
     two instructions for the return.  */
2348
  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2349
    return 0;
2350
 
2351
  /* Can't be done if any of the FPA regs are pushed,
2352
     since this also requires an insn.  */
2353
  if (TARGET_HARD_FLOAT && TARGET_FPA)
2354
    for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2355
      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2356
        return 0;
2357
 
2358
  /* Likewise VFP regs.  */
2359
  if (TARGET_HARD_FLOAT && TARGET_VFP)
2360
    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2361
      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2362
        return 0;
2363
 
2364
  if (TARGET_REALLY_IWMMXT)
2365
    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2366
      if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2367
        return 0;
2368
 
2369
  return 1;
2370
}
2371
 
2372
/* Return TRUE if int I is a valid immediate ARM constant.  */
2373
 
2374
int
2375
const_ok_for_arm (HOST_WIDE_INT i)
2376
{
2377
  int lowbit;
2378
 
2379
  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2380
     be all zero, or all one.  */
2381
  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2382
      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2383
          != ((~(unsigned HOST_WIDE_INT) 0)
2384
              & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2385
    return FALSE;
2386
 
2387
  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2388
 
2389
  /* Fast return for 0 and small values.  We must do this for zero, since
2390
     the code below can't handle that one case.  */
2391
  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2392
    return TRUE;
2393
 
2394
  /* Get the number of trailing zeros.  */
2395
  lowbit = ffs((int) i) - 1;
2396
 
2397
  /* Only even shifts are allowed in ARM mode so round down to the
2398
     nearest even number.  */
2399
  if (TARGET_ARM)
2400
    lowbit &= ~1;
2401
 
2402
  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2403
    return TRUE;
2404
 
2405
  if (TARGET_ARM)
2406
    {
2407
      /* Allow rotated constants in ARM mode.  */
2408
      if (lowbit <= 4
2409
           && ((i & ~0xc000003f) == 0
2410
               || (i & ~0xf000000f) == 0
2411
               || (i & ~0xfc000003) == 0))
2412
        return TRUE;
2413
    }
2414
  else
2415
    {
2416
      HOST_WIDE_INT v;
2417
 
2418
      /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
2419
      v = i & 0xff;
2420
      v |= v << 16;
2421
      if (i == v || i == (v | (v << 8)))
2422
        return TRUE;
2423
 
2424
      /* Allow repeated pattern 0xXY00XY00.  */
2425
      v = i & 0xff00;
2426
      v |= v << 16;
2427
      if (i == v)
2428
        return TRUE;
2429
    }
2430
 
2431
  return FALSE;
2432
}
2433
 
2434
/* Return true if I is a valid constant for the operation CODE.  */
2435
int
2436
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2437
{
2438
  if (const_ok_for_arm (i))
2439
    return 1;
2440
 
2441
  switch (code)
2442
    {
2443
    case SET:
2444
      /* See if we can use movw.  */
2445
      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2446
        return 1;
2447
      else
2448
        /* Otherwise, try mvn.  */
2449
        return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2450
 
2451
    case PLUS:
2452
      /* See if we can use addw or subw.  */
2453
      if (TARGET_THUMB2
2454
          && ((i & 0xfffff000) == 0
2455
              || ((-i) & 0xfffff000) == 0))
2456
        return 1;
2457
      /* else fall through.  */
2458
 
2459
    case COMPARE:
2460
    case EQ:
2461
    case NE:
2462
    case GT:
2463
    case LE:
2464
    case LT:
2465
    case GE:
2466
    case GEU:
2467
    case LTU:
2468
    case GTU:
2469
    case LEU:
2470
    case UNORDERED:
2471
    case ORDERED:
2472
    case UNEQ:
2473
    case UNGE:
2474
    case UNLT:
2475
    case UNGT:
2476
    case UNLE:
2477
      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2478
 
2479
    case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
2480
    case XOR:
2481
      return 0;
2482
 
2483
    case IOR:
2484
      if (TARGET_THUMB2)
2485
        return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2486
      return 0;
2487
 
2488
    case AND:
2489
      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2490
 
2491
    default:
2492
      gcc_unreachable ();
2493
    }
2494
}
2495
 
2496
/* Emit a sequence of insns to handle a large constant.
2497
   CODE is the code of the operation required, it can be any of SET, PLUS,
2498
   IOR, AND, XOR, MINUS;
2499
   MODE is the mode in which the operation is being performed;
2500
   VAL is the integer to operate on;
2501
   SOURCE is the other operand (a register, or a null-pointer for SET);
2502
   SUBTARGETS means it is safe to create scratch registers if that will
2503
   either produce a simpler sequence, or we will want to cse the values.
2504
   Return value is the number of insns emitted.  */
2505
 
2506
/* ??? Tweak this for thumb2.  */
2507
int
2508
arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2509
                    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2510
{
2511
  rtx cond;
2512
 
2513
  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2514
    cond = COND_EXEC_TEST (PATTERN (insn));
2515
  else
2516
    cond = NULL_RTX;
2517
 
2518
  if (subtargets || code == SET
2519
      || (GET_CODE (target) == REG && GET_CODE (source) == REG
2520
          && REGNO (target) != REGNO (source)))
2521
    {
2522
      /* After arm_reorg has been called, we can't fix up expensive
2523
         constants by pushing them into memory so we must synthesize
2524
         them in-line, regardless of the cost.  This is only likely to
2525
         be more costly on chips that have load delay slots and we are
2526
         compiling without running the scheduler (so no splitting
2527
         occurred before the final instruction emission).
2528
 
2529
         Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2530
      */
2531
      if (!after_arm_reorg
2532
          && !cond
2533
          && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2534
                                1, 0)
2535
              > (arm_constant_limit (optimize_function_for_size_p (cfun))
2536
                 + (code != SET))))
2537
        {
2538
          if (code == SET)
2539
            {
2540
              /* Currently SET is the only monadic value for CODE, all
2541
                 the rest are diadic.  */
2542
              if (TARGET_USE_MOVT)
2543
                arm_emit_movpair (target, GEN_INT (val));
2544
              else
2545
                emit_set_insn (target, GEN_INT (val));
2546
 
2547
              return 1;
2548
            }
2549
          else
2550
            {
2551
              rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2552
 
2553
              if (TARGET_USE_MOVT)
2554
                arm_emit_movpair (temp, GEN_INT (val));
2555
              else
2556
                emit_set_insn (temp, GEN_INT (val));
2557
 
2558
              /* For MINUS, the value is subtracted from, since we never
2559
                 have subtraction of a constant.  */
2560
              if (code == MINUS)
2561
                emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2562
              else
2563
                emit_set_insn (target,
2564
                               gen_rtx_fmt_ee (code, mode, source, temp));
2565
              return 2;
2566
            }
2567
        }
2568
    }
2569
 
2570
  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2571
                           1);
2572
}
2573
 
2574
/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2575
   ARM/THUMB2 immediates, and add up to VAL.
2576
   Thr function return value gives the number of insns required.  */
2577
static int
2578
optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2579
                            struct four_ints *return_sequence)
2580
{
2581
  int best_consecutive_zeros = 0;
2582
  int i;
2583
  int best_start = 0;
2584
  int insns1, insns2;
2585
  struct four_ints tmp_sequence;
2586
 
2587
  /* If we aren't targetting ARM, the best place to start is always at
2588
     the bottom, otherwise look more closely.  */
2589
  if (TARGET_ARM)
2590
    {
2591
      for (i = 0; i < 32; i += 2)
2592
        {
2593
          int consecutive_zeros = 0;
2594
 
2595
          if (!(val & (3 << i)))
2596
            {
2597
              while ((i < 32) && !(val & (3 << i)))
2598
                {
2599
                  consecutive_zeros += 2;
2600
                  i += 2;
2601
                }
2602
              if (consecutive_zeros > best_consecutive_zeros)
2603
                {
2604
                  best_consecutive_zeros = consecutive_zeros;
2605
                  best_start = i - consecutive_zeros;
2606
                }
2607
              i -= 2;
2608
            }
2609
        }
2610
    }
2611
 
2612
  /* So long as it won't require any more insns to do so, it's
2613
     desirable to emit a small constant (in bits 0...9) in the last
2614
     insn.  This way there is more chance that it can be combined with
2615
     a later addressing insn to form a pre-indexed load or store
2616
     operation.  Consider:
2617
 
2618
           *((volatile int *)0xe0000100) = 1;
2619
           *((volatile int *)0xe0000110) = 2;
2620
 
2621
     We want this to wind up as:
2622
 
2623
            mov rA, #0xe0000000
2624
            mov rB, #1
2625
            str rB, [rA, #0x100]
2626
            mov rB, #2
2627
            str rB, [rA, #0x110]
2628
 
2629
     rather than having to synthesize both large constants from scratch.
2630
 
2631
     Therefore, we calculate how many insns would be required to emit
2632
     the constant starting from `best_start', and also starting from
2633
     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
2634
     yield a shorter sequence, we may as well use zero.  */
2635
  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2636
  if (best_start != 0
2637
      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2638
    {
2639
      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2640
      if (insns2 <= insns1)
2641
        {
2642
          *return_sequence = tmp_sequence;
2643
          insns1 = insns2;
2644
        }
2645
    }
2646
 
2647
  return insns1;
2648
}
2649
 
2650
/* As for optimal_immediate_sequence, but starting at bit-position I.  */
2651
static int
2652
optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2653
                             struct four_ints *return_sequence, int i)
2654
{
2655
  int remainder = val & 0xffffffff;
2656
  int insns = 0;
2657
 
2658
  /* Try and find a way of doing the job in either two or three
2659
     instructions.
2660
 
2661
     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2662
     location.  We start at position I.  This may be the MSB, or
2663
     optimial_immediate_sequence may have positioned it at the largest block
2664
     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2665
     wrapping around to the top of the word when we drop off the bottom.
2666
     In the worst case this code should produce no more than four insns.
2667
 
2668
     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2669
     constants, shifted to any arbitrary location.  We should always start
2670
     at the MSB.  */
2671
  do
2672
    {
2673
      int end;
2674
      unsigned int b1, b2, b3, b4;
2675
      unsigned HOST_WIDE_INT result;
2676
      int loc;
2677
 
2678
      gcc_assert (insns < 4);
2679
 
2680
      if (i <= 0)
2681
        i += 32;
2682
 
2683
      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
2684
      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2685
        {
2686
          loc = i;
2687
          if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2688
            /* We can use addw/subw for the last 12 bits.  */
2689
            result = remainder;
2690
          else
2691
            {
2692
              /* Use an 8-bit shifted/rotated immediate.  */
2693
              end = i - 8;
2694
              if (end < 0)
2695
                end += 32;
2696
              result = remainder & ((0x0ff << end)
2697
                                   | ((i < end) ? (0xff >> (32 - end))
2698
                                                : 0));
2699
              i -= 8;
2700
            }
2701
        }
2702
      else
2703
        {
2704
          /* Arm allows rotates by a multiple of two. Thumb-2 allows
2705
             arbitrary shifts.  */
2706
          i -= TARGET_ARM ? 2 : 1;
2707
          continue;
2708
        }
2709
 
2710
      /* Next, see if we can do a better job with a thumb2 replicated
2711
         constant.
2712
 
2713
         We do it this way around to catch the cases like 0x01F001E0 where
2714
         two 8-bit immediates would work, but a replicated constant would
2715
         make it worse.
2716
 
2717
         TODO: 16-bit constants that don't clear all the bits, but still win.
2718
         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
2719
      if (TARGET_THUMB2)
2720
        {
2721
          b1 = (remainder & 0xff000000) >> 24;
2722
          b2 = (remainder & 0x00ff0000) >> 16;
2723
          b3 = (remainder & 0x0000ff00) >> 8;
2724
          b4 = remainder & 0xff;
2725
 
2726
          if (loc > 24)
2727
            {
2728
              /* The 8-bit immediate already found clears b1 (and maybe b2),
2729
                 but must leave b3 and b4 alone.  */
2730
 
2731
              /* First try to find a 32-bit replicated constant that clears
2732
                 almost everything.  We can assume that we can't do it in one,
2733
                 or else we wouldn't be here.  */
2734
              unsigned int tmp = b1 & b2 & b3 & b4;
2735
              unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2736
                                  + (tmp << 24);
2737
              unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2738
                                            + (tmp == b3) + (tmp == b4);
2739
              if (tmp
2740
                  && (matching_bytes >= 3
2741
                      || (matching_bytes == 2
2742
                          && const_ok_for_op (remainder & ~tmp2, code))))
2743
                {
2744
                  /* At least 3 of the bytes match, and the fourth has at
2745
                     least as many bits set, or two of the bytes match
2746
                     and it will only require one more insn to finish.  */
2747
                  result = tmp2;
2748
                  i = tmp != b1 ? 32
2749
                      : tmp != b2 ? 24
2750
                      : tmp != b3 ? 16
2751
                      : 8;
2752
                }
2753
 
2754
              /* Second, try to find a 16-bit replicated constant that can
2755
                 leave three of the bytes clear.  If b2 or b4 is already
2756
                 zero, then we can.  If the 8-bit from above would not
2757
                 clear b2 anyway, then we still win.  */
2758
              else if (b1 == b3 && (!b2 || !b4
2759
                               || (remainder & 0x00ff0000 & ~result)))
2760
                {
2761
                  result = remainder & 0xff00ff00;
2762
                  i = 24;
2763
                }
2764
            }
2765
          else if (loc > 16)
2766
            {
2767
              /* The 8-bit immediate already found clears b2 (and maybe b3)
2768
                 and we don't get here unless b1 is alredy clear, but it will
2769
                 leave b4 unchanged.  */
2770
 
2771
              /* If we can clear b2 and b4 at once, then we win, since the
2772
                 8-bits couldn't possibly reach that far.  */
2773
              if (b2 == b4)
2774
                {
2775
                  result = remainder & 0x00ff00ff;
2776
                  i = 16;
2777
                }
2778
            }
2779
        }
2780
 
2781
      return_sequence->i[insns++] = result;
2782
      remainder &= ~result;
2783
 
2784
      if (code == SET || code == MINUS)
2785
        code = PLUS;
2786
    }
2787
  while (remainder);
2788
 
2789
  return insns;
2790
}
2791
 
2792
/* Emit an instruction with the indicated PATTERN.  If COND is
2793
   non-NULL, conditionalize the execution of the instruction on COND
2794
   being true.  */
2795
 
2796
static void
2797
emit_constant_insn (rtx cond, rtx pattern)
2798
{
2799
  if (cond)
2800
    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2801
  emit_insn (pattern);
2802
}
2803
 
2804
/* As above, but extra parameter GENERATE which, if clear, suppresses
2805
   RTL generation.  */
2806
 
2807
static int
2808
arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2809
                  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2810
                  int generate)
2811
{
2812
  int can_invert = 0;
2813
  int can_negate = 0;
2814
  int final_invert = 0;
2815
  int i;
2816
  int set_sign_bit_copies = 0;
2817
  int clear_sign_bit_copies = 0;
2818
  int clear_zero_bit_copies = 0;
2819
  int set_zero_bit_copies = 0;
2820
  int insns = 0, neg_insns, inv_insns;
2821
  unsigned HOST_WIDE_INT temp1, temp2;
2822
  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2823
  struct four_ints *immediates;
2824
  struct four_ints pos_immediates, neg_immediates, inv_immediates;
2825
 
2826
  /* Find out which operations are safe for a given CODE.  Also do a quick
2827
     check for degenerate cases; these can occur when DImode operations
2828
     are split.  */
2829
  switch (code)
2830
    {
2831
    case SET:
2832
      can_invert = 1;
2833
      break;
2834
 
2835
    case PLUS:
2836
      can_negate = 1;
2837
      break;
2838
 
2839
    case IOR:
2840
      if (remainder == 0xffffffff)
2841
        {
2842
          if (generate)
2843
            emit_constant_insn (cond,
2844
                                gen_rtx_SET (VOIDmode, target,
2845
                                             GEN_INT (ARM_SIGN_EXTEND (val))));
2846
          return 1;
2847
        }
2848
 
2849
      if (remainder == 0)
2850
        {
2851
          if (reload_completed && rtx_equal_p (target, source))
2852
            return 0;
2853
 
2854
          if (generate)
2855
            emit_constant_insn (cond,
2856
                                gen_rtx_SET (VOIDmode, target, source));
2857
          return 1;
2858
        }
2859
      break;
2860
 
2861
    case AND:
2862
      if (remainder == 0)
2863
        {
2864
          if (generate)
2865
            emit_constant_insn (cond,
2866
                                gen_rtx_SET (VOIDmode, target, const0_rtx));
2867
          return 1;
2868
        }
2869
      if (remainder == 0xffffffff)
2870
        {
2871
          if (reload_completed && rtx_equal_p (target, source))
2872
            return 0;
2873
          if (generate)
2874
            emit_constant_insn (cond,
2875
                                gen_rtx_SET (VOIDmode, target, source));
2876
          return 1;
2877
        }
2878
      can_invert = 1;
2879
      break;
2880
 
2881
    case XOR:
2882
      if (remainder == 0)
2883
        {
2884
          if (reload_completed && rtx_equal_p (target, source))
2885
            return 0;
2886
          if (generate)
2887
            emit_constant_insn (cond,
2888
                                gen_rtx_SET (VOIDmode, target, source));
2889
          return 1;
2890
        }
2891
 
2892
      if (remainder == 0xffffffff)
2893
        {
2894
          if (generate)
2895
            emit_constant_insn (cond,
2896
                                gen_rtx_SET (VOIDmode, target,
2897
                                             gen_rtx_NOT (mode, source)));
2898
          return 1;
2899
        }
2900
      final_invert = 1;
2901
      break;
2902
 
2903
    case MINUS:
2904
      /* We treat MINUS as (val - source), since (source - val) is always
2905
         passed as (source + (-val)).  */
2906
      if (remainder == 0)
2907
        {
2908
          if (generate)
2909
            emit_constant_insn (cond,
2910
                                gen_rtx_SET (VOIDmode, target,
2911
                                             gen_rtx_NEG (mode, source)));
2912
          return 1;
2913
        }
2914
      if (const_ok_for_arm (val))
2915
        {
2916
          if (generate)
2917
            emit_constant_insn (cond,
2918
                                gen_rtx_SET (VOIDmode, target,
2919
                                             gen_rtx_MINUS (mode, GEN_INT (val),
2920
                                                            source)));
2921
          return 1;
2922
        }
2923
 
2924
      break;
2925
 
2926
    default:
2927
      gcc_unreachable ();
2928
    }
2929
 
2930
  /* If we can do it in one insn get out quickly.  */
2931
  if (const_ok_for_op (val, code))
2932
    {
2933
      if (generate)
2934
        emit_constant_insn (cond,
2935
                            gen_rtx_SET (VOIDmode, target,
2936
                                         (source
2937
                                          ? gen_rtx_fmt_ee (code, mode, source,
2938
                                                            GEN_INT (val))
2939
                                          : GEN_INT (val))));
2940
      return 1;
2941
    }
2942
 
2943
  /* Calculate a few attributes that may be useful for specific
2944
     optimizations.  */
2945
  /* Count number of leading zeros.  */
2946
  for (i = 31; i >= 0; i--)
2947
    {
2948
      if ((remainder & (1 << i)) == 0)
2949
        clear_sign_bit_copies++;
2950
      else
2951
        break;
2952
    }
2953
 
2954
  /* Count number of leading 1's.  */
2955
  for (i = 31; i >= 0; i--)
2956
    {
2957
      if ((remainder & (1 << i)) != 0)
2958
        set_sign_bit_copies++;
2959
      else
2960
        break;
2961
    }
2962
 
2963
  /* Count number of trailing zero's.  */
2964
  for (i = 0; i <= 31; i++)
2965
    {
2966
      if ((remainder & (1 << i)) == 0)
2967
        clear_zero_bit_copies++;
2968
      else
2969
        break;
2970
    }
2971
 
2972
  /* Count number of trailing 1's.  */
2973
  for (i = 0; i <= 31; i++)
2974
    {
2975
      if ((remainder & (1 << i)) != 0)
2976
        set_zero_bit_copies++;
2977
      else
2978
        break;
2979
    }
2980
 
2981
  switch (code)
2982
    {
2983
    case SET:
2984
      /* See if we can do this by sign_extending a constant that is known
2985
         to be negative.  This is a good, way of doing it, since the shift
2986
         may well merge into a subsequent insn.  */
2987
      if (set_sign_bit_copies > 1)
2988
        {
2989
          if (const_ok_for_arm
2990
              (temp1 = ARM_SIGN_EXTEND (remainder
2991
                                        << (set_sign_bit_copies - 1))))
2992
            {
2993
              if (generate)
2994
                {
2995
                  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2996
                  emit_constant_insn (cond,
2997
                                      gen_rtx_SET (VOIDmode, new_src,
2998
                                                   GEN_INT (temp1)));
2999
                  emit_constant_insn (cond,
3000
                                      gen_ashrsi3 (target, new_src,
3001
                                                   GEN_INT (set_sign_bit_copies - 1)));
3002
                }
3003
              return 2;
3004
            }
3005
          /* For an inverted constant, we will need to set the low bits,
3006
             these will be shifted out of harm's way.  */
3007
          temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3008
          if (const_ok_for_arm (~temp1))
3009
            {
3010
              if (generate)
3011
                {
3012
                  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3013
                  emit_constant_insn (cond,
3014
                                      gen_rtx_SET (VOIDmode, new_src,
3015
                                                   GEN_INT (temp1)));
3016
                  emit_constant_insn (cond,
3017
                                      gen_ashrsi3 (target, new_src,
3018
                                                   GEN_INT (set_sign_bit_copies - 1)));
3019
                }
3020
              return 2;
3021
            }
3022
        }
3023
 
3024
      /* See if we can calculate the value as the difference between two
3025
         valid immediates.  */
3026
      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3027
        {
3028
          int topshift = clear_sign_bit_copies & ~1;
3029
 
3030
          temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3031
                                   & (0xff000000 >> topshift));
3032
 
3033
          /* If temp1 is zero, then that means the 9 most significant
3034
             bits of remainder were 1 and we've caused it to overflow.
3035
             When topshift is 0 we don't need to do anything since we
3036
             can borrow from 'bit 32'.  */
3037
          if (temp1 == 0 && topshift != 0)
3038
            temp1 = 0x80000000 >> (topshift - 1);
3039
 
3040
          temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3041
 
3042
          if (const_ok_for_arm (temp2))
3043
            {
3044
              if (generate)
3045
                {
3046
                  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047
                  emit_constant_insn (cond,
3048
                                      gen_rtx_SET (VOIDmode, new_src,
3049
                                                   GEN_INT (temp1)));
3050
                  emit_constant_insn (cond,
3051
                                      gen_addsi3 (target, new_src,
3052
                                                  GEN_INT (-temp2)));
3053
                }
3054
 
3055
              return 2;
3056
            }
3057
        }
3058
 
3059
      /* See if we can generate this by setting the bottom (or the top)
3060
         16 bits, and then shifting these into the other half of the
3061
         word.  We only look for the simplest cases, to do more would cost
3062
         too much.  Be careful, however, not to generate this when the
3063
         alternative would take fewer insns.  */
3064
      if (val & 0xffff0000)
3065
        {
3066
          temp1 = remainder & 0xffff0000;
3067
          temp2 = remainder & 0x0000ffff;
3068
 
3069
          /* Overlaps outside this range are best done using other methods.  */
3070
          for (i = 9; i < 24; i++)
3071
            {
3072
              if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3073
                  && !const_ok_for_arm (temp2))
3074
                {
3075
                  rtx new_src = (subtargets
3076
                                 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3077
                                 : target);
3078
                  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3079
                                            source, subtargets, generate);
3080
                  source = new_src;
3081
                  if (generate)
3082
                    emit_constant_insn
3083
                      (cond,
3084
                       gen_rtx_SET
3085
                       (VOIDmode, target,
3086
                        gen_rtx_IOR (mode,
3087
                                     gen_rtx_ASHIFT (mode, source,
3088
                                                     GEN_INT (i)),
3089
                                     source)));
3090
                  return insns + 1;
3091
                }
3092
            }
3093
 
3094
          /* Don't duplicate cases already considered.  */
3095
          for (i = 17; i < 24; i++)
3096
            {
3097
              if (((temp1 | (temp1 >> i)) == remainder)
3098
                  && !const_ok_for_arm (temp1))
3099
                {
3100
                  rtx new_src = (subtargets
3101
                                 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3102
                                 : target);
3103
                  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3104
                                            source, subtargets, generate);
3105
                  source = new_src;
3106
                  if (generate)
3107
                    emit_constant_insn
3108
                      (cond,
3109
                       gen_rtx_SET (VOIDmode, target,
3110
                                    gen_rtx_IOR
3111
                                    (mode,
3112
                                     gen_rtx_LSHIFTRT (mode, source,
3113
                                                       GEN_INT (i)),
3114
                                     source)));
3115
                  return insns + 1;
3116
                }
3117
            }
3118
        }
3119
      break;
3120
 
3121
    case IOR:
3122
    case XOR:
3123
      /* If we have IOR or XOR, and the constant can be loaded in a
3124
         single instruction, and we can find a temporary to put it in,
3125
         then this can be done in two instructions instead of 3-4.  */
3126
      if (subtargets
3127
          /* TARGET can't be NULL if SUBTARGETS is 0 */
3128
          || (reload_completed && !reg_mentioned_p (target, source)))
3129
        {
3130
          if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3131
            {
3132
              if (generate)
3133
                {
3134
                  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3135
 
3136
                  emit_constant_insn (cond,
3137
                                      gen_rtx_SET (VOIDmode, sub,
3138
                                                   GEN_INT (val)));
3139
                  emit_constant_insn (cond,
3140
                                      gen_rtx_SET (VOIDmode, target,
3141
                                                   gen_rtx_fmt_ee (code, mode,
3142
                                                                   source, sub)));
3143
                }
3144
              return 2;
3145
            }
3146
        }
3147
 
3148
      if (code == XOR)
3149
        break;
3150
 
3151
      /*  Convert.
3152
          x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3153
                             and the remainder 0s for e.g. 0xfff00000)
3154
          x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3155
 
3156
          This can be done in 2 instructions by using shifts with mov or mvn.
3157
          e.g. for
3158
          x = x | 0xfff00000;
3159
          we generate.
3160
          mvn   r0, r0, asl #12
3161
          mvn   r0, r0, lsr #12  */
3162
      if (set_sign_bit_copies > 8
3163
          && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3164
        {
3165
          if (generate)
3166
            {
3167
              rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3168
              rtx shift = GEN_INT (set_sign_bit_copies);
3169
 
3170
              emit_constant_insn
3171
                (cond,
3172
                 gen_rtx_SET (VOIDmode, sub,
3173
                              gen_rtx_NOT (mode,
3174
                                           gen_rtx_ASHIFT (mode,
3175
                                                           source,
3176
                                                           shift))));
3177
              emit_constant_insn
3178
                (cond,
3179
                 gen_rtx_SET (VOIDmode, target,
3180
                              gen_rtx_NOT (mode,
3181
                                           gen_rtx_LSHIFTRT (mode, sub,
3182
                                                             shift))));
3183
            }
3184
          return 2;
3185
        }
3186
 
3187
      /* Convert
3188
          x = y | constant (which has set_zero_bit_copies number of trailing ones).
3189
           to
3190
          x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3191
 
3192
          For eg. r0 = r0 | 0xfff
3193
               mvn      r0, r0, lsr #12
3194
               mvn      r0, r0, asl #12
3195
 
3196
      */
3197
      if (set_zero_bit_copies > 8
3198
          && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3199
        {
3200
          if (generate)
3201
            {
3202
              rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3203
              rtx shift = GEN_INT (set_zero_bit_copies);
3204
 
3205
              emit_constant_insn
3206
                (cond,
3207
                 gen_rtx_SET (VOIDmode, sub,
3208
                              gen_rtx_NOT (mode,
3209
                                           gen_rtx_LSHIFTRT (mode,
3210
                                                             source,
3211
                                                             shift))));
3212
              emit_constant_insn
3213
                (cond,
3214
                 gen_rtx_SET (VOIDmode, target,
3215
                              gen_rtx_NOT (mode,
3216
                                           gen_rtx_ASHIFT (mode, sub,
3217
                                                           shift))));
3218
            }
3219
          return 2;
3220
        }
3221
 
3222
      /* This will never be reached for Thumb2 because orn is a valid
3223
         instruction. This is for Thumb1 and the ARM 32 bit cases.
3224
 
3225
         x = y | constant (such that ~constant is a valid constant)
3226
         Transform this to
3227
         x = ~(~y & ~constant).
3228
      */
3229
      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3230
        {
3231
          if (generate)
3232
            {
3233
              rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3234
              emit_constant_insn (cond,
3235
                                  gen_rtx_SET (VOIDmode, sub,
3236
                                               gen_rtx_NOT (mode, source)));
3237
              source = sub;
3238
              if (subtargets)
3239
                sub = gen_reg_rtx (mode);
3240
              emit_constant_insn (cond,
3241
                                  gen_rtx_SET (VOIDmode, sub,
3242
                                               gen_rtx_AND (mode, source,
3243
                                                            GEN_INT (temp1))));
3244
              emit_constant_insn (cond,
3245
                                  gen_rtx_SET (VOIDmode, target,
3246
                                               gen_rtx_NOT (mode, sub)));
3247
            }
3248
          return 3;
3249
        }
3250
      break;
3251
 
3252
    case AND:
3253
      /* See if two shifts will do 2 or more insn's worth of work.  */
3254
      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3255
        {
3256
          HOST_WIDE_INT shift_mask = ((0xffffffff
3257
                                       << (32 - clear_sign_bit_copies))
3258
                                      & 0xffffffff);
3259
 
3260
          if ((remainder | shift_mask) != 0xffffffff)
3261
            {
3262
              if (generate)
3263
                {
3264
                  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3265
                  insns = arm_gen_constant (AND, mode, cond,
3266
                                            remainder | shift_mask,
3267
                                            new_src, source, subtargets, 1);
3268
                  source = new_src;
3269
                }
3270
              else
3271
                {
3272
                  rtx targ = subtargets ? NULL_RTX : target;
3273
                  insns = arm_gen_constant (AND, mode, cond,
3274
                                            remainder | shift_mask,
3275
                                            targ, source, subtargets, 0);
3276
                }
3277
            }
3278
 
3279
          if (generate)
3280
            {
3281
              rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3282
              rtx shift = GEN_INT (clear_sign_bit_copies);
3283
 
3284
              emit_insn (gen_ashlsi3 (new_src, source, shift));
3285
              emit_insn (gen_lshrsi3 (target, new_src, shift));
3286
            }
3287
 
3288
          return insns + 2;
3289
        }
3290
 
3291
      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3292
        {
3293
          HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3294
 
3295
          if ((remainder | shift_mask) != 0xffffffff)
3296
            {
3297
              if (generate)
3298
                {
3299
                  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3300
 
3301
                  insns = arm_gen_constant (AND, mode, cond,
3302
                                            remainder | shift_mask,
3303
                                            new_src, source, subtargets, 1);
3304
                  source = new_src;
3305
                }
3306
              else
3307
                {
3308
                  rtx targ = subtargets ? NULL_RTX : target;
3309
 
3310
                  insns = arm_gen_constant (AND, mode, cond,
3311
                                            remainder | shift_mask,
3312
                                            targ, source, subtargets, 0);
3313
                }
3314
            }
3315
 
3316
          if (generate)
3317
            {
3318
              rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3319
              rtx shift = GEN_INT (clear_zero_bit_copies);
3320
 
3321
              emit_insn (gen_lshrsi3 (new_src, source, shift));
3322
              emit_insn (gen_ashlsi3 (target, new_src, shift));
3323
            }
3324
 
3325
          return insns + 2;
3326
        }
3327
 
3328
      break;
3329
 
3330
    default:
3331
      break;
3332
    }
3333
 
3334
  /* Calculate what the instruction sequences would be if we generated it
3335
     normally, negated, or inverted.  */
3336
  if (code == AND)
3337
    /* AND cannot be split into multiple insns, so invert and use BIC.  */
3338
    insns = 99;
3339
  else
3340
    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3341
 
3342
  if (can_negate)
3343
    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3344
                                            &neg_immediates);
3345
  else
3346
    neg_insns = 99;
3347
 
3348
  if (can_invert || final_invert)
3349
    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3350
                                            &inv_immediates);
3351
  else
3352
    inv_insns = 99;
3353
 
3354
  immediates = &pos_immediates;
3355
 
3356
  /* Is the negated immediate sequence more efficient?  */
3357
  if (neg_insns < insns && neg_insns <= inv_insns)
3358
    {
3359
      insns = neg_insns;
3360
      immediates = &neg_immediates;
3361
    }
3362
  else
3363
    can_negate = 0;
3364
 
3365
  /* Is the inverted immediate sequence more efficient?
3366
     We must allow for an extra NOT instruction for XOR operations, although
3367
     there is some chance that the final 'mvn' will get optimized later.  */
3368
  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3369
    {
3370
      insns = inv_insns;
3371
      immediates = &inv_immediates;
3372
    }
3373
  else
3374
    {
3375
      can_invert = 0;
3376
      final_invert = 0;
3377
    }
3378
 
3379
  /* Now output the chosen sequence as instructions.  */
3380
  if (generate)
3381
    {
3382
      for (i = 0; i < insns; i++)
3383
        {
3384
          rtx new_src, temp1_rtx;
3385
 
3386
          temp1 = immediates->i[i];
3387
 
3388
          if (code == SET || code == MINUS)
3389
            new_src = (subtargets ? gen_reg_rtx (mode) : target);
3390
          else if ((final_invert || i < (insns - 1)) && subtargets)
3391
            new_src = gen_reg_rtx (mode);
3392
          else
3393
            new_src = target;
3394
 
3395
          if (can_invert)
3396
            temp1 = ~temp1;
3397
          else if (can_negate)
3398
            temp1 = -temp1;
3399
 
3400
          temp1 = trunc_int_for_mode (temp1, mode);
3401
          temp1_rtx = GEN_INT (temp1);
3402
 
3403
          if (code == SET)
3404
            ;
3405
          else if (code == MINUS)
3406
            temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3407
          else
3408
            temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3409
 
3410
          emit_constant_insn (cond,
3411
                              gen_rtx_SET (VOIDmode, new_src,
3412
                                           temp1_rtx));
3413
          source = new_src;
3414
 
3415
          if (code == SET)
3416
            {
3417
              can_negate = can_invert;
3418
              can_invert = 0;
3419
              code = PLUS;
3420
            }
3421
          else if (code == MINUS)
3422
            code = PLUS;
3423
        }
3424
    }
3425
 
3426
  if (final_invert)
3427
    {
3428
      if (generate)
3429
        emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3430
                                               gen_rtx_NOT (mode, source)));
3431
      insns++;
3432
    }
3433
 
3434
  return insns;
3435
}
3436
 
3437
/* Canonicalize a comparison so that we are more likely to recognize it.
3438
   This can be done for a few constant compares, where we can make the
3439
   immediate value easier to load.  */
3440
 
3441
enum rtx_code
3442
arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3443
{
3444
  enum machine_mode mode;
3445
  unsigned HOST_WIDE_INT i, maxval;
3446
 
3447
  mode = GET_MODE (*op0);
3448
  if (mode == VOIDmode)
3449
    mode = GET_MODE (*op1);
3450
 
3451
  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3452
 
3453
  /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
3454
     we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
3455
     reversed or (for constant OP1) adjusted to GE/LT.  Similarly
3456
     for GTU/LEU in Thumb mode.  */
3457
  if (mode == DImode)
3458
    {
3459
      rtx tem;
3460
 
3461
      /* To keep things simple, always use the Cirrus cfcmp64 if it is
3462
         available.  */
3463
      if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3464
        return code;
3465
 
3466
      if (code == GT || code == LE
3467
          || (!TARGET_ARM && (code == GTU || code == LEU)))
3468
        {
3469
          /* Missing comparison.  First try to use an available
3470
             comparison.  */
3471
          if (GET_CODE (*op1) == CONST_INT)
3472
            {
3473
              i = INTVAL (*op1);
3474
              switch (code)
3475
                {
3476
                case GT:
3477
                case LE:
3478
                  if (i != maxval
3479
                      && arm_const_double_by_immediates (GEN_INT (i + 1)))
3480
                    {
3481
                      *op1 = GEN_INT (i + 1);
3482
                      return code == GT ? GE : LT;
3483
                    }
3484
                  break;
3485
                case GTU:
3486
                case LEU:
3487
                  if (i != ~((unsigned HOST_WIDE_INT) 0)
3488
                      && arm_const_double_by_immediates (GEN_INT (i + 1)))
3489
                    {
3490
                      *op1 = GEN_INT (i + 1);
3491
                      return code == GTU ? GEU : LTU;
3492
                    }
3493
                  break;
3494
                default:
3495
                  gcc_unreachable ();
3496
                }
3497
            }
3498
 
3499
          /* If that did not work, reverse the condition.  */
3500
          tem = *op0;
3501
          *op0 = *op1;
3502
          *op1 = tem;
3503
          return swap_condition (code);
3504
        }
3505
 
3506
      return code;
3507
    }
3508
 
3509
  /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3510
     with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3511
     to facilitate possible combining with a cmp into 'ands'.  */
3512
  if (mode == SImode
3513
      && GET_CODE (*op0) == ZERO_EXTEND
3514
      && GET_CODE (XEXP (*op0, 0)) == SUBREG
3515
      && GET_MODE (XEXP (*op0, 0)) == QImode
3516
      && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3517
      && subreg_lowpart_p (XEXP (*op0, 0))
3518
      && *op1 == const0_rtx)
3519
    *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3520
                        GEN_INT (255));
3521
 
3522
  /* Comparisons smaller than DImode.  Only adjust comparisons against
3523
     an out-of-range constant.  */
3524
  if (GET_CODE (*op1) != CONST_INT
3525
      || const_ok_for_arm (INTVAL (*op1))
3526
      || const_ok_for_arm (- INTVAL (*op1)))
3527
    return code;
3528
 
3529
  i = INTVAL (*op1);
3530
 
3531
  switch (code)
3532
    {
3533
    case EQ:
3534
    case NE:
3535
      return code;
3536
 
3537
    case GT:
3538
    case LE:
3539
      if (i != maxval
3540
          && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3541
        {
3542
          *op1 = GEN_INT (i + 1);
3543
          return code == GT ? GE : LT;
3544
        }
3545
      break;
3546
 
3547
    case GE:
3548
    case LT:
3549
      if (i != ~maxval
3550
          && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3551
        {
3552
          *op1 = GEN_INT (i - 1);
3553
          return code == GE ? GT : LE;
3554
        }
3555
      break;
3556
 
3557
    case GTU:
3558
    case LEU:
3559
      if (i != ~((unsigned HOST_WIDE_INT) 0)
3560
          && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3561
        {
3562
          *op1 = GEN_INT (i + 1);
3563
          return code == GTU ? GEU : LTU;
3564
        }
3565
      break;
3566
 
3567
    case GEU:
3568
    case LTU:
3569
      if (i != 0
3570
          && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3571
        {
3572
          *op1 = GEN_INT (i - 1);
3573
          return code == GEU ? GTU : LEU;
3574
        }
3575
      break;
3576
 
3577
    default:
3578
      gcc_unreachable ();
3579
    }
3580
 
3581
  return code;
3582
}
3583
 
3584
 
3585
/* Define how to find the value returned by a function.  */
3586
 
3587
static rtx
3588
arm_function_value(const_tree type, const_tree func,
3589
                   bool outgoing ATTRIBUTE_UNUSED)
3590
{
3591
  enum machine_mode mode;
3592
  int unsignedp ATTRIBUTE_UNUSED;
3593
  rtx r ATTRIBUTE_UNUSED;
3594
 
3595
  mode = TYPE_MODE (type);
3596
 
3597
  if (TARGET_AAPCS_BASED)
3598
    return aapcs_allocate_return_reg (mode, type, func);
3599
 
3600
  /* Promote integer types.  */
3601
  if (INTEGRAL_TYPE_P (type))
3602
    mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3603
 
3604
  /* Promotes small structs returned in a register to full-word size
3605
     for big-endian AAPCS.  */
3606
  if (arm_return_in_msb (type))
3607
    {
3608
      HOST_WIDE_INT size = int_size_in_bytes (type);
3609
      if (size % UNITS_PER_WORD != 0)
3610
        {
3611
          size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3612
          mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3613
        }
3614
    }
3615
 
3616
  return arm_libcall_value_1 (mode);
3617
}
3618
 
3619
static int
3620
libcall_eq (const void *p1, const void *p2)
3621
{
3622
  return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3623
}
3624
 
3625
static hashval_t
3626
libcall_hash (const void *p1)
3627
{
3628
  return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3629
}
3630
 
3631
static void
3632
add_libcall (htab_t htab, rtx libcall)
3633
{
3634
  *htab_find_slot (htab, libcall, INSERT) = libcall;
3635
}
3636
 
3637
static bool
3638
arm_libcall_uses_aapcs_base (const_rtx libcall)
3639
{
3640
  static bool init_done = false;
3641
  static htab_t libcall_htab;
3642
 
3643
  if (!init_done)
3644
    {
3645
      init_done = true;
3646
 
3647
      libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3648
                                  NULL);
3649
      add_libcall (libcall_htab,
3650
                   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3651
      add_libcall (libcall_htab,
3652
                   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3653
      add_libcall (libcall_htab,
3654
                   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3655
      add_libcall (libcall_htab,
3656
                   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3657
 
3658
      add_libcall (libcall_htab,
3659
                   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3660
      add_libcall (libcall_htab,
3661
                   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3662
      add_libcall (libcall_htab,
3663
                   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3664
      add_libcall (libcall_htab,
3665
                   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3666
 
3667
      add_libcall (libcall_htab,
3668
                   convert_optab_libfunc (sext_optab, SFmode, HFmode));
3669
      add_libcall (libcall_htab,
3670
                   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3671
      add_libcall (libcall_htab,
3672
                   convert_optab_libfunc (sfix_optab, SImode, DFmode));
3673
      add_libcall (libcall_htab,
3674
                   convert_optab_libfunc (ufix_optab, SImode, DFmode));
3675
      add_libcall (libcall_htab,
3676
                   convert_optab_libfunc (sfix_optab, DImode, DFmode));
3677
      add_libcall (libcall_htab,
3678
                   convert_optab_libfunc (ufix_optab, DImode, DFmode));
3679
      add_libcall (libcall_htab,
3680
                   convert_optab_libfunc (sfix_optab, DImode, SFmode));
3681
      add_libcall (libcall_htab,
3682
                   convert_optab_libfunc (ufix_optab, DImode, SFmode));
3683
 
3684
      /* Values from double-precision helper functions are returned in core
3685
         registers if the selected core only supports single-precision
3686
         arithmetic, even if we are using the hard-float ABI.  The same is
3687
         true for single-precision helpers, but we will never be using the
3688
         hard-float ABI on a CPU which doesn't support single-precision
3689
         operations in hardware.  */
3690
      add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3691
      add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3692
      add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3693
      add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3694
      add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3695
      add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3696
      add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3697
      add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3698
      add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3699
      add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3700
      add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3701
      add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3702
                                                        SFmode));
3703
      add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3704
                                                        DFmode));
3705
    }
3706
 
3707
  return libcall && htab_find (libcall_htab, libcall) != NULL;
3708
}
3709
 
3710
static rtx
3711
arm_libcall_value_1 (enum machine_mode mode)
3712
{
3713
  if (TARGET_AAPCS_BASED)
3714
    return aapcs_libcall_value (mode);
3715
  else if (TARGET_32BIT
3716
           && TARGET_HARD_FLOAT_ABI
3717
           && TARGET_FPA
3718
           && GET_MODE_CLASS (mode) == MODE_FLOAT)
3719
    return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3720
  else if (TARGET_32BIT
3721
           && TARGET_HARD_FLOAT_ABI
3722
           && TARGET_MAVERICK
3723
           && GET_MODE_CLASS (mode) == MODE_FLOAT)
3724
    return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3725
  else if (TARGET_IWMMXT_ABI
3726
           && arm_vector_mode_supported_p (mode))
3727
    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3728
  else
3729
    return gen_rtx_REG (mode, ARG_REGISTER (1));
3730
}
3731
 
3732
/* Define how to find the value returned by a library function
3733
   assuming the value has mode MODE.  */
3734
 
3735
static rtx
3736
arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3737
{
3738
  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3739
      && GET_MODE_CLASS (mode) == MODE_FLOAT)
3740
    {
3741
      /* The following libcalls return their result in integer registers,
3742
         even though they return a floating point value.  */
3743
      if (arm_libcall_uses_aapcs_base (libcall))
3744
        return gen_rtx_REG (mode, ARG_REGISTER(1));
3745
 
3746
    }
3747
 
3748
  return arm_libcall_value_1 (mode);
3749
}
3750
 
3751
/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
3752
 
3753
static bool
3754
arm_function_value_regno_p (const unsigned int regno)
3755
{
3756
  if (regno == ARG_REGISTER (1)
3757
      || (TARGET_32BIT
3758
          && TARGET_AAPCS_BASED
3759
          && TARGET_VFP
3760
          && TARGET_HARD_FLOAT
3761
          && regno == FIRST_VFP_REGNUM)
3762
      || (TARGET_32BIT
3763
          && TARGET_HARD_FLOAT_ABI
3764
          && TARGET_MAVERICK
3765
          && regno == FIRST_CIRRUS_FP_REGNUM)
3766
      || (TARGET_IWMMXT_ABI
3767
          && regno == FIRST_IWMMXT_REGNUM)
3768
      || (TARGET_32BIT
3769
          && TARGET_HARD_FLOAT_ABI
3770
          && TARGET_FPA
3771
          && regno == FIRST_FPA_REGNUM))
3772
    return true;
3773
 
3774
  return false;
3775
}
3776
 
3777
/* Determine the amount of memory needed to store the possible return
3778
   registers of an untyped call.  */
3779
int
3780
arm_apply_result_size (void)
3781
{
3782
  int size = 16;
3783
 
3784
  if (TARGET_32BIT)
3785
    {
3786
      if (TARGET_HARD_FLOAT_ABI)
3787
        {
3788
          if (TARGET_VFP)
3789
            size += 32;
3790
          if (TARGET_FPA)
3791
            size += 12;
3792
          if (TARGET_MAVERICK)
3793
            size += 8;
3794
        }
3795
      if (TARGET_IWMMXT_ABI)
3796
        size += 8;
3797
    }
3798
 
3799
  return size;
3800
}
3801
 
3802
/* Decide whether TYPE should be returned in memory (true)
3803
   or in a register (false).  FNTYPE is the type of the function making
3804
   the call.  */
3805
static bool
3806
arm_return_in_memory (const_tree type, const_tree fntype)
3807
{
3808
  HOST_WIDE_INT size;
3809
 
3810
  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
3811
 
3812
  if (TARGET_AAPCS_BASED)
3813
    {
3814
      /* Simple, non-aggregate types (ie not including vectors and
3815
         complex) are always returned in a register (or registers).
3816
         We don't care about which register here, so we can short-cut
3817
         some of the detail.  */
3818
      if (!AGGREGATE_TYPE_P (type)
3819
          && TREE_CODE (type) != VECTOR_TYPE
3820
          && TREE_CODE (type) != COMPLEX_TYPE)
3821
        return false;
3822
 
3823
      /* Any return value that is no larger than one word can be
3824
         returned in r0.  */
3825
      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3826
        return false;
3827
 
3828
      /* Check any available co-processors to see if they accept the
3829
         type as a register candidate (VFP, for example, can return
3830
         some aggregates in consecutive registers).  These aren't
3831
         available if the call is variadic.  */
3832
      if (aapcs_select_return_coproc (type, fntype) >= 0)
3833
        return false;
3834
 
3835
      /* Vector values should be returned using ARM registers, not
3836
         memory (unless they're over 16 bytes, which will break since
3837
         we only have four call-clobbered registers to play with).  */
3838
      if (TREE_CODE (type) == VECTOR_TYPE)
3839
        return (size < 0 || size > (4 * UNITS_PER_WORD));
3840
 
3841
      /* The rest go in memory.  */
3842
      return true;
3843
    }
3844
 
3845
  if (TREE_CODE (type) == VECTOR_TYPE)
3846
    return (size < 0 || size > (4 * UNITS_PER_WORD));
3847
 
3848
  if (!AGGREGATE_TYPE_P (type) &&
3849
      (TREE_CODE (type) != VECTOR_TYPE))
3850
    /* All simple types are returned in registers.  */
3851
    return false;
3852
 
3853
  if (arm_abi != ARM_ABI_APCS)
3854
    {
3855
      /* ATPCS and later return aggregate types in memory only if they are
3856
         larger than a word (or are variable size).  */
3857
      return (size < 0 || size > UNITS_PER_WORD);
3858
    }
3859
 
3860
  /* For the arm-wince targets we choose to be compatible with Microsoft's
3861
     ARM and Thumb compilers, which always return aggregates in memory.  */
3862
#ifndef ARM_WINCE
3863
  /* All structures/unions bigger than one word are returned in memory.
3864
     Also catch the case where int_size_in_bytes returns -1.  In this case
3865
     the aggregate is either huge or of variable size, and in either case
3866
     we will want to return it via memory and not in a register.  */
3867
  if (size < 0 || size > UNITS_PER_WORD)
3868
    return true;
3869
 
3870
  if (TREE_CODE (type) == RECORD_TYPE)
3871
    {
3872
      tree field;
3873
 
3874
      /* For a struct the APCS says that we only return in a register
3875
         if the type is 'integer like' and every addressable element
3876
         has an offset of zero.  For practical purposes this means
3877
         that the structure can have at most one non bit-field element
3878
         and that this element must be the first one in the structure.  */
3879
 
3880
      /* Find the first field, ignoring non FIELD_DECL things which will
3881
         have been created by C++.  */
3882
      for (field = TYPE_FIELDS (type);
3883
           field && TREE_CODE (field) != FIELD_DECL;
3884
           field = DECL_CHAIN (field))
3885
        continue;
3886
 
3887
      if (field == NULL)
3888
        return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
3889
 
3890
      /* Check that the first field is valid for returning in a register.  */
3891
 
3892
      /* ... Floats are not allowed */
3893
      if (FLOAT_TYPE_P (TREE_TYPE (field)))
3894
        return true;
3895
 
3896
      /* ... Aggregates that are not themselves valid for returning in
3897
         a register are not allowed.  */
3898
      if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3899
        return true;
3900
 
3901
      /* Now check the remaining fields, if any.  Only bitfields are allowed,
3902
         since they are not addressable.  */
3903
      for (field = DECL_CHAIN (field);
3904
           field;
3905
           field = DECL_CHAIN (field))
3906
        {
3907
          if (TREE_CODE (field) != FIELD_DECL)
3908
            continue;
3909
 
3910
          if (!DECL_BIT_FIELD_TYPE (field))
3911
            return true;
3912
        }
3913
 
3914
      return false;
3915
    }
3916
 
3917
  if (TREE_CODE (type) == UNION_TYPE)
3918
    {
3919
      tree field;
3920
 
3921
      /* Unions can be returned in registers if every element is
3922
         integral, or can be returned in an integer register.  */
3923
      for (field = TYPE_FIELDS (type);
3924
           field;
3925
           field = DECL_CHAIN (field))
3926
        {
3927
          if (TREE_CODE (field) != FIELD_DECL)
3928
            continue;
3929
 
3930
          if (FLOAT_TYPE_P (TREE_TYPE (field)))
3931
            return true;
3932
 
3933
          if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3934
            return true;
3935
        }
3936
 
3937
      return false;
3938
    }
3939
#endif /* not ARM_WINCE */
3940
 
3941
  /* Return all other types in memory.  */
3942
  return true;
3943
}
3944
 
3945
/* Indicate whether or not words of a double are in big-endian order.  */
3946
 
3947
int
3948
arm_float_words_big_endian (void)
3949
{
3950
  if (TARGET_MAVERICK)
3951
    return 0;
3952
 
3953
  /* For FPA, float words are always big-endian.  For VFP, floats words
3954
     follow the memory system mode.  */
3955
 
3956
  if (TARGET_FPA)
3957
    {
3958
      return 1;
3959
    }
3960
 
3961
  if (TARGET_VFP)
3962
    return (TARGET_BIG_END ? 1 : 0);
3963
 
3964
  return 1;
3965
}
3966
 
3967
const struct pcs_attribute_arg
3968
{
3969
  const char *arg;
3970
  enum arm_pcs value;
3971
} pcs_attribute_args[] =
3972
  {
3973
    {"aapcs", ARM_PCS_AAPCS},
3974
    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3975
#if 0
3976
    /* We could recognize these, but changes would be needed elsewhere
3977
     * to implement them.  */
3978
    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3979
    {"atpcs", ARM_PCS_ATPCS},
3980
    {"apcs", ARM_PCS_APCS},
3981
#endif
3982
    {NULL, ARM_PCS_UNKNOWN}
3983
  };
3984
 
3985
static enum arm_pcs
3986
arm_pcs_from_attribute (tree attr)
3987
{
3988
  const struct pcs_attribute_arg *ptr;
3989
  const char *arg;
3990
 
3991
  /* Get the value of the argument.  */
3992
  if (TREE_VALUE (attr) == NULL_TREE
3993
      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3994
    return ARM_PCS_UNKNOWN;
3995
 
3996
  arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3997
 
3998
  /* Check it against the list of known arguments.  */
3999
  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4000
    if (streq (arg, ptr->arg))
4001
      return ptr->value;
4002
 
4003
  /* An unrecognized interrupt type.  */
4004
  return ARM_PCS_UNKNOWN;
4005
}
4006
 
4007
/* Get the PCS variant to use for this call.  TYPE is the function's type
4008
   specification, DECL is the specific declartion.  DECL may be null if
4009
   the call could be indirect or if this is a library call.  */
4010
static enum arm_pcs
4011
arm_get_pcs_model (const_tree type, const_tree decl)
4012
{
4013
  bool user_convention = false;
4014
  enum arm_pcs user_pcs = arm_pcs_default;
4015
  tree attr;
4016
 
4017
  gcc_assert (type);
4018
 
4019
  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4020
  if (attr)
4021
    {
4022
      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4023
      user_convention = true;
4024
    }
4025
 
4026
  if (TARGET_AAPCS_BASED)
4027
    {
4028
      /* Detect varargs functions.  These always use the base rules
4029
         (no argument is ever a candidate for a co-processor
4030
         register).  */
4031
      bool base_rules = stdarg_p (type);
4032
 
4033
      if (user_convention)
4034
        {
4035
          if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4036
            sorry ("non-AAPCS derived PCS variant");
4037
          else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4038
            error ("variadic functions must use the base AAPCS variant");
4039
        }
4040
 
4041
      if (base_rules)
4042
        return ARM_PCS_AAPCS;
4043
      else if (user_convention)
4044
        return user_pcs;
4045
      else if (decl && flag_unit_at_a_time)
4046
        {
4047
          /* Local functions never leak outside this compilation unit,
4048
             so we are free to use whatever conventions are
4049
             appropriate.  */
4050
          /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
4051
          struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4052
          if (i && i->local)
4053
            return ARM_PCS_AAPCS_LOCAL;
4054
        }
4055
    }
4056
  else if (user_convention && user_pcs != arm_pcs_default)
4057
    sorry ("PCS variant");
4058
 
4059
  /* For everything else we use the target's default.  */
4060
  return arm_pcs_default;
4061
}
4062
 
4063
 
4064
static void
4065
aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
4066
                    const_tree fntype ATTRIBUTE_UNUSED,
4067
                    rtx libcall ATTRIBUTE_UNUSED,
4068
                    const_tree fndecl ATTRIBUTE_UNUSED)
4069
{
4070
  /* Record the unallocated VFP registers.  */
4071
  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4072
  pcum->aapcs_vfp_reg_alloc = 0;
4073
}
4074
 
4075
/* Walk down the type tree of TYPE counting consecutive base elements.
4076
   If *MODEP is VOIDmode, then set it to the first valid floating point
4077
   type.  If a non-floating point type is found, or if a floating point
4078
   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4079
   otherwise return the count in the sub-tree.  */
4080
static int
4081
aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4082
{
4083
  enum machine_mode mode;
4084
  HOST_WIDE_INT size;
4085
 
4086
  switch (TREE_CODE (type))
4087
    {
4088
    case REAL_TYPE:
4089
      mode = TYPE_MODE (type);
4090
      if (mode != DFmode && mode != SFmode)
4091
        return -1;
4092
 
4093
      if (*modep == VOIDmode)
4094
        *modep = mode;
4095
 
4096
      if (*modep == mode)
4097
        return 1;
4098
 
4099
      break;
4100
 
4101
    case COMPLEX_TYPE:
4102
      mode = TYPE_MODE (TREE_TYPE (type));
4103
      if (mode != DFmode && mode != SFmode)
4104
        return -1;
4105
 
4106
      if (*modep == VOIDmode)
4107
        *modep = mode;
4108
 
4109
      if (*modep == mode)
4110
        return 2;
4111
 
4112
      break;
4113
 
4114
    case VECTOR_TYPE:
4115
      /* Use V2SImode and V4SImode as representatives of all 64-bit
4116
         and 128-bit vector types, whether or not those modes are
4117
         supported with the present options.  */
4118
      size = int_size_in_bytes (type);
4119
      switch (size)
4120
        {
4121
        case 8:
4122
          mode = V2SImode;
4123
          break;
4124
        case 16:
4125
          mode = V4SImode;
4126
          break;
4127
        default:
4128
          return -1;
4129
        }
4130
 
4131
      if (*modep == VOIDmode)
4132
        *modep = mode;
4133
 
4134
      /* Vector modes are considered to be opaque: two vectors are
4135
         equivalent for the purposes of being homogeneous aggregates
4136
         if they are the same size.  */
4137
      if (*modep == mode)
4138
        return 1;
4139
 
4140
      break;
4141
 
4142
    case ARRAY_TYPE:
4143
      {
4144
        int count;
4145
        tree index = TYPE_DOMAIN (type);
4146
 
4147
        /* Can't handle incomplete types.  */
4148
        if (!COMPLETE_TYPE_P(type))
4149
          return -1;
4150
 
4151
        count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4152
        if (count == -1
4153
            || !index
4154
            || !TYPE_MAX_VALUE (index)
4155
            || !host_integerp (TYPE_MAX_VALUE (index), 1)
4156
            || !TYPE_MIN_VALUE (index)
4157
            || !host_integerp (TYPE_MIN_VALUE (index), 1)
4158
            || count < 0)
4159
          return -1;
4160
 
4161
        count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4162
                      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4163
 
4164
        /* There must be no padding.  */
4165
        if (!host_integerp (TYPE_SIZE (type), 1)
4166
            || (tree_low_cst (TYPE_SIZE (type), 1)
4167
                != count * GET_MODE_BITSIZE (*modep)))
4168
          return -1;
4169
 
4170
        return count;
4171
      }
4172
 
4173
    case RECORD_TYPE:
4174
      {
4175
        int count = 0;
4176
        int sub_count;
4177
        tree field;
4178
 
4179
        /* Can't handle incomplete types.  */
4180
        if (!COMPLETE_TYPE_P(type))
4181
          return -1;
4182
 
4183
        for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4184
          {
4185
            if (TREE_CODE (field) != FIELD_DECL)
4186
              continue;
4187
 
4188
            sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4189
            if (sub_count < 0)
4190
              return -1;
4191
            count += sub_count;
4192
          }
4193
 
4194
        /* There must be no padding.  */
4195
        if (!host_integerp (TYPE_SIZE (type), 1)
4196
            || (tree_low_cst (TYPE_SIZE (type), 1)
4197
                != count * GET_MODE_BITSIZE (*modep)))
4198
          return -1;
4199
 
4200
        return count;
4201
      }
4202
 
4203
    case UNION_TYPE:
4204
    case QUAL_UNION_TYPE:
4205
      {
4206
        /* These aren't very interesting except in a degenerate case.  */
4207
        int count = 0;
4208
        int sub_count;
4209
        tree field;
4210
 
4211
        /* Can't handle incomplete types.  */
4212
        if (!COMPLETE_TYPE_P(type))
4213
          return -1;
4214
 
4215
        for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4216
          {
4217
            if (TREE_CODE (field) != FIELD_DECL)
4218
              continue;
4219
 
4220
            sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4221
            if (sub_count < 0)
4222
              return -1;
4223
            count = count > sub_count ? count : sub_count;
4224
          }
4225
 
4226
        /* There must be no padding.  */
4227
        if (!host_integerp (TYPE_SIZE (type), 1)
4228
            || (tree_low_cst (TYPE_SIZE (type), 1)
4229
                != count * GET_MODE_BITSIZE (*modep)))
4230
          return -1;
4231
 
4232
        return count;
4233
      }
4234
 
4235
    default:
4236
      break;
4237
    }
4238
 
4239
  return -1;
4240
}
4241
 
4242
/* Return true if PCS_VARIANT should use VFP registers.  */
4243
static bool
4244
use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4245
{
4246
  if (pcs_variant == ARM_PCS_AAPCS_VFP)
4247
    {
4248
      static bool seen_thumb1_vfp = false;
4249
 
4250
      if (TARGET_THUMB1 && !seen_thumb1_vfp)
4251
        {
4252
          sorry ("Thumb-1 hard-float VFP ABI");
4253
          /* sorry() is not immediately fatal, so only display this once.  */
4254
          seen_thumb1_vfp = true;
4255
        }
4256
 
4257
      return true;
4258
    }
4259
 
4260
  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4261
    return false;
4262
 
4263
  return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4264
          (TARGET_VFP_DOUBLE || !is_double));
4265
}
4266
 
4267
/* Return true if an argument whose type is TYPE, or mode is MODE, is
4268
   suitable for passing or returning in VFP registers for the PCS
4269
   variant selected.  If it is, then *BASE_MODE is updated to contain
4270
   a machine mode describing each element of the argument's type and
4271
   *COUNT to hold the number of such elements.  */
4272
static bool
4273
aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4274
                                       enum machine_mode mode, const_tree type,
4275
                                       enum machine_mode *base_mode, int *count)
4276
{
4277
  enum machine_mode new_mode = VOIDmode;
4278
 
4279
  /* If we have the type information, prefer that to working things
4280
     out from the mode.  */
4281
  if (type)
4282
    {
4283
      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4284
 
4285
      if (ag_count > 0 && ag_count <= 4)
4286
        *count = ag_count;
4287
      else
4288
        return false;
4289
    }
4290
  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4291
           || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4292
           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4293
    {
4294
      *count = 1;
4295
      new_mode = mode;
4296
    }
4297
  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4298
    {
4299
      *count = 2;
4300
      new_mode = (mode == DCmode ? DFmode : SFmode);
4301
    }
4302
  else
4303
    return false;
4304
 
4305
 
4306
  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4307
    return false;
4308
 
4309
  *base_mode = new_mode;
4310
  return true;
4311
}
4312
 
4313
static bool
4314
aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4315
                               enum machine_mode mode, const_tree type)
4316
{
4317
  int count ATTRIBUTE_UNUSED;
4318
  enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4319
 
4320
  if (!use_vfp_abi (pcs_variant, false))
4321
    return false;
4322
  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4323
                                                &ag_mode, &count);
4324
}
4325
 
4326
static bool
4327
aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4328
                             const_tree type)
4329
{
4330
  if (!use_vfp_abi (pcum->pcs_variant, false))
4331
    return false;
4332
 
4333
  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4334
                                                &pcum->aapcs_vfp_rmode,
4335
                                                &pcum->aapcs_vfp_rcount);
4336
}
4337
 
4338
static bool
4339
aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4340
                    const_tree type  ATTRIBUTE_UNUSED)
4341
{
4342
  int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4343
  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4344
  int regno;
4345
 
4346
  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4347
    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4348
      {
4349
        pcum->aapcs_vfp_reg_alloc = mask << regno;
4350
        if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4351
          {
4352
            int i;
4353
            int rcount = pcum->aapcs_vfp_rcount;
4354
            int rshift = shift;
4355
            enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4356
            rtx par;
4357
            if (!TARGET_NEON)
4358
              {
4359
                /* Avoid using unsupported vector modes.  */
4360
                if (rmode == V2SImode)
4361
                  rmode = DImode;
4362
                else if (rmode == V4SImode)
4363
                  {
4364
                    rmode = DImode;
4365
                    rcount *= 2;
4366
                    rshift /= 2;
4367
                  }
4368
              }
4369
            par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4370
            for (i = 0; i < rcount; i++)
4371
              {
4372
                rtx tmp = gen_rtx_REG (rmode,
4373
                                       FIRST_VFP_REGNUM + regno + i * rshift);
4374
                tmp = gen_rtx_EXPR_LIST
4375
                  (VOIDmode, tmp,
4376
                   GEN_INT (i * GET_MODE_SIZE (rmode)));
4377
                XVECEXP (par, 0, i) = tmp;
4378
              }
4379
 
4380
            pcum->aapcs_reg = par;
4381
          }
4382
        else
4383
          pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4384
        return true;
4385
      }
4386
  return false;
4387
}
4388
 
4389
static rtx
4390
aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4391
                               enum machine_mode mode,
4392
                               const_tree type ATTRIBUTE_UNUSED)
4393
{
4394
  if (!use_vfp_abi (pcs_variant, false))
4395
    return NULL;
4396
 
4397
  if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4398
    {
4399
      int count;
4400
      enum machine_mode ag_mode;
4401
      int i;
4402
      rtx par;
4403
      int shift;
4404
 
4405
      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4406
                                             &ag_mode, &count);
4407
 
4408
      if (!TARGET_NEON)
4409
        {
4410
          if (ag_mode == V2SImode)
4411
            ag_mode = DImode;
4412
          else if (ag_mode == V4SImode)
4413
            {
4414
              ag_mode = DImode;
4415
              count *= 2;
4416
            }
4417
        }
4418
      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4419
      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4420
      for (i = 0; i < count; i++)
4421
        {
4422
          rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4423
          tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4424
                                   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4425
          XVECEXP (par, 0, i) = tmp;
4426
        }
4427
 
4428
      return par;
4429
    }
4430
 
4431
  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4432
}
4433
 
4434
static void
4435
aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
4436
                   enum machine_mode mode  ATTRIBUTE_UNUSED,
4437
                   const_tree type  ATTRIBUTE_UNUSED)
4438
{
4439
  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4440
  pcum->aapcs_vfp_reg_alloc = 0;
4441
  return;
4442
}
4443
 
4444
#define AAPCS_CP(X)                             \
4445
  {                                             \
4446
    aapcs_ ## X ## _cum_init,                   \
4447
    aapcs_ ## X ## _is_call_candidate,          \
4448
    aapcs_ ## X ## _allocate,                   \
4449
    aapcs_ ## X ## _is_return_candidate,        \
4450
    aapcs_ ## X ## _allocate_return_reg,        \
4451
    aapcs_ ## X ## _advance                     \
4452
  }
4453
 
4454
/* Table of co-processors that can be used to pass arguments in
4455
   registers.  Idealy no arugment should be a candidate for more than
4456
   one co-processor table entry, but the table is processed in order
4457
   and stops after the first match.  If that entry then fails to put
4458
   the argument into a co-processor register, the argument will go on
4459
   the stack.  */
4460
static struct
4461
{
4462
  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
4463
  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4464
 
4465
  /* Return true if an argument of mode MODE (or type TYPE if MODE is
4466
     BLKmode) is a candidate for this co-processor's registers; this
4467
     function should ignore any position-dependent state in
4468
     CUMULATIVE_ARGS and only use call-type dependent information.  */
4469
  bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4470
 
4471
  /* Return true if the argument does get a co-processor register; it
4472
     should set aapcs_reg to an RTX of the register allocated as is
4473
     required for a return from FUNCTION_ARG.  */
4474
  bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4475
 
4476
  /* Return true if a result of mode MODE (or type TYPE if MODE is
4477
     BLKmode) is can be returned in this co-processor's registers.  */
4478
  bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4479
 
4480
  /* Allocate and return an RTX element to hold the return type of a
4481
     call, this routine must not fail and will only be called if
4482
     is_return_candidate returned true with the same parameters.  */
4483
  rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4484
 
4485
  /* Finish processing this argument and prepare to start processing
4486
     the next one.  */
4487
  void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4488
} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4489
  {
4490
    AAPCS_CP(vfp)
4491
  };
4492
 
4493
#undef AAPCS_CP
4494
 
4495
static int
4496
aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4497
                          const_tree type)
4498
{
4499
  int i;
4500
 
4501
  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4502
    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4503
      return i;
4504
 
4505
  return -1;
4506
}
4507
 
4508
static int
4509
aapcs_select_return_coproc (const_tree type, const_tree fntype)
4510
{
4511
  /* We aren't passed a decl, so we can't check that a call is local.
4512
     However, it isn't clear that that would be a win anyway, since it
4513
     might limit some tail-calling opportunities.  */
4514
  enum arm_pcs pcs_variant;
4515
 
4516
  if (fntype)
4517
    {
4518
      const_tree fndecl = NULL_TREE;
4519
 
4520
      if (TREE_CODE (fntype) == FUNCTION_DECL)
4521
        {
4522
          fndecl = fntype;
4523
          fntype = TREE_TYPE (fntype);
4524
        }
4525
 
4526
      pcs_variant = arm_get_pcs_model (fntype, fndecl);
4527
    }
4528
  else
4529
    pcs_variant = arm_pcs_default;
4530
 
4531
  if (pcs_variant != ARM_PCS_AAPCS)
4532
    {
4533
      int i;
4534
 
4535
      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4536
        if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4537
                                                        TYPE_MODE (type),
4538
                                                        type))
4539
          return i;
4540
    }
4541
  return -1;
4542
}
4543
 
4544
static rtx
4545
aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4546
                           const_tree fntype)
4547
{
4548
  /* We aren't passed a decl, so we can't check that a call is local.
4549
     However, it isn't clear that that would be a win anyway, since it
4550
     might limit some tail-calling opportunities.  */
4551
  enum arm_pcs pcs_variant;
4552
  int unsignedp ATTRIBUTE_UNUSED;
4553
 
4554
  if (fntype)
4555
    {
4556
      const_tree fndecl = NULL_TREE;
4557
 
4558
      if (TREE_CODE (fntype) == FUNCTION_DECL)
4559
        {
4560
          fndecl = fntype;
4561
          fntype = TREE_TYPE (fntype);
4562
        }
4563
 
4564
      pcs_variant = arm_get_pcs_model (fntype, fndecl);
4565
    }
4566
  else
4567
    pcs_variant = arm_pcs_default;
4568
 
4569
  /* Promote integer types.  */
4570
  if (type && INTEGRAL_TYPE_P (type))
4571
    mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4572
 
4573
  if (pcs_variant != ARM_PCS_AAPCS)
4574
    {
4575
      int i;
4576
 
4577
      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4578
        if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4579
                                                        type))
4580
          return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4581
                                                             mode, type);
4582
    }
4583
 
4584
  /* Promotes small structs returned in a register to full-word size
4585
     for big-endian AAPCS.  */
4586
  if (type && arm_return_in_msb (type))
4587
    {
4588
      HOST_WIDE_INT size = int_size_in_bytes (type);
4589
      if (size % UNITS_PER_WORD != 0)
4590
        {
4591
          size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4592
          mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4593
        }
4594
    }
4595
 
4596
  return gen_rtx_REG (mode, R0_REGNUM);
4597
}
4598
 
4599
static rtx
4600
aapcs_libcall_value (enum machine_mode mode)
4601
{
4602
  if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4603
      && GET_MODE_SIZE (mode) <= 4)
4604
    mode = SImode;
4605
 
4606
  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4607
}
4608
 
4609
/* Lay out a function argument using the AAPCS rules.  The rule
4610
   numbers referred to here are those in the AAPCS.  */
4611
static void
4612
aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4613
                  const_tree type, bool named)
4614
{
4615
  int nregs, nregs2;
4616
  int ncrn;
4617
 
4618
  /* We only need to do this once per argument.  */
4619
  if (pcum->aapcs_arg_processed)
4620
    return;
4621
 
4622
  pcum->aapcs_arg_processed = true;
4623
 
4624
  /* Special case: if named is false then we are handling an incoming
4625
     anonymous argument which is on the stack.  */
4626
  if (!named)
4627
    return;
4628
 
4629
  /* Is this a potential co-processor register candidate?  */
4630
  if (pcum->pcs_variant != ARM_PCS_AAPCS)
4631
    {
4632
      int slot = aapcs_select_call_coproc (pcum, mode, type);
4633
      pcum->aapcs_cprc_slot = slot;
4634
 
4635
      /* We don't have to apply any of the rules from part B of the
4636
         preparation phase, these are handled elsewhere in the
4637
         compiler.  */
4638
 
4639
      if (slot >= 0)
4640
        {
4641
          /* A Co-processor register candidate goes either in its own
4642
             class of registers or on the stack.  */
4643
          if (!pcum->aapcs_cprc_failed[slot])
4644
            {
4645
              /* C1.cp - Try to allocate the argument to co-processor
4646
                 registers.  */
4647
              if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4648
                return;
4649
 
4650
              /* C2.cp - Put the argument on the stack and note that we
4651
                 can't assign any more candidates in this slot.  We also
4652
                 need to note that we have allocated stack space, so that
4653
                 we won't later try to split a non-cprc candidate between
4654
                 core registers and the stack.  */
4655
              pcum->aapcs_cprc_failed[slot] = true;
4656
              pcum->can_split = false;
4657
            }
4658
 
4659
          /* We didn't get a register, so this argument goes on the
4660
             stack.  */
4661
          gcc_assert (pcum->can_split == false);
4662
          return;
4663
        }
4664
    }
4665
 
4666
  /* C3 - For double-word aligned arguments, round the NCRN up to the
4667
     next even number.  */
4668
  ncrn = pcum->aapcs_ncrn;
4669
  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4670
    ncrn++;
4671
 
4672
  nregs = ARM_NUM_REGS2(mode, type);
4673
 
4674
  /* Sigh, this test should really assert that nregs > 0, but a GCC
4675
     extension allows empty structs and then gives them empty size; it
4676
     then allows such a structure to be passed by value.  For some of
4677
     the code below we have to pretend that such an argument has
4678
     non-zero size so that we 'locate' it correctly either in
4679
     registers or on the stack.  */
4680
  gcc_assert (nregs >= 0);
4681
 
4682
  nregs2 = nregs ? nregs : 1;
4683
 
4684
  /* C4 - Argument fits entirely in core registers.  */
4685
  if (ncrn + nregs2 <= NUM_ARG_REGS)
4686
    {
4687
      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4688
      pcum->aapcs_next_ncrn = ncrn + nregs;
4689
      return;
4690
    }
4691
 
4692
  /* C5 - Some core registers left and there are no arguments already
4693
     on the stack: split this argument between the remaining core
4694
     registers and the stack.  */
4695
  if (ncrn < NUM_ARG_REGS && pcum->can_split)
4696
    {
4697
      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4698
      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4699
      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4700
      return;
4701
    }
4702
 
4703
  /* C6 - NCRN is set to 4.  */
4704
  pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4705
 
4706
  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
4707
  return;
4708
}
4709
 
4710
/* Initialize a variable CUM of type CUMULATIVE_ARGS
4711
   for a call to a function whose data type is FNTYPE.
4712
   For a library call, FNTYPE is NULL.  */
4713
void
4714
arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4715
                          rtx libname,
4716
                          tree fndecl ATTRIBUTE_UNUSED)
4717
{
4718
  /* Long call handling.  */
4719
  if (fntype)
4720
    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4721
  else
4722
    pcum->pcs_variant = arm_pcs_default;
4723
 
4724
  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4725
    {
4726
      if (arm_libcall_uses_aapcs_base (libname))
4727
        pcum->pcs_variant = ARM_PCS_AAPCS;
4728
 
4729
      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4730
      pcum->aapcs_reg = NULL_RTX;
4731
      pcum->aapcs_partial = 0;
4732
      pcum->aapcs_arg_processed = false;
4733
      pcum->aapcs_cprc_slot = -1;
4734
      pcum->can_split = true;
4735
 
4736
      if (pcum->pcs_variant != ARM_PCS_AAPCS)
4737
        {
4738
          int i;
4739
 
4740
          for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4741
            {
4742
              pcum->aapcs_cprc_failed[i] = false;
4743
              aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4744
            }
4745
        }
4746
      return;
4747
    }
4748
 
4749
  /* Legacy ABIs */
4750
 
4751
  /* On the ARM, the offset starts at 0.  */
4752
  pcum->nregs = 0;
4753
  pcum->iwmmxt_nregs = 0;
4754
  pcum->can_split = true;
4755
 
4756
  /* Varargs vectors are treated the same as long long.
4757
     named_count avoids having to change the way arm handles 'named' */
4758
  pcum->named_count = 0;
4759
  pcum->nargs = 0;
4760
 
4761
  if (TARGET_REALLY_IWMMXT && fntype)
4762
    {
4763
      tree fn_arg;
4764
 
4765
      for (fn_arg = TYPE_ARG_TYPES (fntype);
4766
           fn_arg;
4767
           fn_arg = TREE_CHAIN (fn_arg))
4768
        pcum->named_count += 1;
4769
 
4770
      if (! pcum->named_count)
4771
        pcum->named_count = INT_MAX;
4772
    }
4773
}
4774
 
4775
 
4776
/* Return true if mode/type need doubleword alignment.  */
4777
static bool
4778
arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4779
{
4780
  return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4781
          || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4782
}
4783
 
4784
 
4785
/* Determine where to put an argument to a function.
4786
   Value is zero to push the argument on the stack,
4787
   or a hard register in which to store the argument.
4788
 
4789
   MODE is the argument's machine mode.
4790
   TYPE is the data type of the argument (as a tree).
4791
    This is null for libcalls where that information may
4792
    not be available.
4793
   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4794
    the preceding args and about the function being called.
4795
   NAMED is nonzero if this argument is a named parameter
4796
    (otherwise it is an extra parameter matching an ellipsis).
4797
 
4798
   On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4799
   other arguments are passed on the stack.  If (NAMED == 0) (which happens
4800
   only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4801
   defined), say it is passed in the stack (function_prologue will
4802
   indeed make it pass in the stack if necessary).  */
4803
 
4804
static rtx
4805
arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4806
                  const_tree type, bool named)
4807
{
4808
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4809
  int nregs;
4810
 
4811
  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
4812
     a call insn (op3 of a call_value insn).  */
4813
  if (mode == VOIDmode)
4814
    return const0_rtx;
4815
 
4816
  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4817
    {
4818
      aapcs_layout_arg (pcum, mode, type, named);
4819
      return pcum->aapcs_reg;
4820
    }
4821
 
4822
  /* Varargs vectors are treated the same as long long.
4823
     named_count avoids having to change the way arm handles 'named' */
4824
  if (TARGET_IWMMXT_ABI
4825
      && arm_vector_mode_supported_p (mode)
4826
      && pcum->named_count > pcum->nargs + 1)
4827
    {
4828
      if (pcum->iwmmxt_nregs <= 9)
4829
        return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4830
      else
4831
        {
4832
          pcum->can_split = false;
4833
          return NULL_RTX;
4834
        }
4835
    }
4836
 
4837
  /* Put doubleword aligned quantities in even register pairs.  */
4838
  if (pcum->nregs & 1
4839
      && ARM_DOUBLEWORD_ALIGN
4840
      && arm_needs_doubleword_align (mode, type))
4841
    pcum->nregs++;
4842
 
4843
  /* Only allow splitting an arg between regs and memory if all preceding
4844
     args were allocated to regs.  For args passed by reference we only count
4845
     the reference pointer.  */
4846
  if (pcum->can_split)
4847
    nregs = 1;
4848
  else
4849
    nregs = ARM_NUM_REGS2 (mode, type);
4850
 
4851
  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4852
    return NULL_RTX;
4853
 
4854
  return gen_rtx_REG (mode, pcum->nregs);
4855
}
4856
 
4857
static unsigned int
4858
arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4859
{
4860
  return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4861
          ? DOUBLEWORD_ALIGNMENT
4862
          : PARM_BOUNDARY);
4863
}
4864
 
4865
static int
4866
arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4867
                       tree type, bool named)
4868
{
4869
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4870
  int nregs = pcum->nregs;
4871
 
4872
  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4873
    {
4874
      aapcs_layout_arg (pcum, mode, type, named);
4875
      return pcum->aapcs_partial;
4876
    }
4877
 
4878
  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4879
    return 0;
4880
 
4881
  if (NUM_ARG_REGS > nregs
4882
      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4883
      && pcum->can_split)
4884
    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4885
 
4886
  return 0;
4887
}
4888
 
4889
/* Update the data in PCUM to advance over an argument
4890
   of mode MODE and data type TYPE.
4891
   (TYPE is null for libcalls where that information may not be available.)  */
4892
 
4893
static void
4894
arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4895
                          const_tree type, bool named)
4896
{
4897
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4898
 
4899
  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4900
    {
4901
      aapcs_layout_arg (pcum, mode, type, named);
4902
 
4903
      if (pcum->aapcs_cprc_slot >= 0)
4904
        {
4905
          aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4906
                                                              type);
4907
          pcum->aapcs_cprc_slot = -1;
4908
        }
4909
 
4910
      /* Generic stuff.  */
4911
      pcum->aapcs_arg_processed = false;
4912
      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4913
      pcum->aapcs_reg = NULL_RTX;
4914
      pcum->aapcs_partial = 0;
4915
    }
4916
  else
4917
    {
4918
      pcum->nargs += 1;
4919
      if (arm_vector_mode_supported_p (mode)
4920
          && pcum->named_count > pcum->nargs
4921
          && TARGET_IWMMXT_ABI)
4922
        pcum->iwmmxt_nregs += 1;
4923
      else
4924
        pcum->nregs += ARM_NUM_REGS2 (mode, type);
4925
    }
4926
}
4927
 
4928
/* Variable sized types are passed by reference.  This is a GCC
4929
   extension to the ARM ABI.  */
4930
 
4931
static bool
4932
arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4933
                       enum machine_mode mode ATTRIBUTE_UNUSED,
4934
                       const_tree type, bool named ATTRIBUTE_UNUSED)
4935
{
4936
  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4937
}
4938
 
4939
/* Encode the current state of the #pragma [no_]long_calls.  */
4940
typedef enum
4941
{
4942
  OFF,          /* No #pragma [no_]long_calls is in effect.  */
4943
  LONG,         /* #pragma long_calls is in effect.  */
4944
  SHORT         /* #pragma no_long_calls is in effect.  */
4945
} arm_pragma_enum;
4946
 
4947
static arm_pragma_enum arm_pragma_long_calls = OFF;
4948
 
4949
void
4950
arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4951
{
4952
  arm_pragma_long_calls = LONG;
4953
}
4954
 
4955
void
4956
arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4957
{
4958
  arm_pragma_long_calls = SHORT;
4959
}
4960
 
4961
void
4962
arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4963
{
4964
  arm_pragma_long_calls = OFF;
4965
}
4966
 
4967
/* Handle an attribute requiring a FUNCTION_DECL;
4968
   arguments as in struct attribute_spec.handler.  */
4969
static tree
4970
arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4971
                             int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4972
{
4973
  if (TREE_CODE (*node) != FUNCTION_DECL)
4974
    {
4975
      warning (OPT_Wattributes, "%qE attribute only applies to functions",
4976
               name);
4977
      *no_add_attrs = true;
4978
    }
4979
 
4980
  return NULL_TREE;
4981
}
4982
 
4983
/* Handle an "interrupt" or "isr" attribute;
4984
   arguments as in struct attribute_spec.handler.  */
4985
static tree
4986
arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4987
                          bool *no_add_attrs)
4988
{
4989
  if (DECL_P (*node))
4990
    {
4991
      if (TREE_CODE (*node) != FUNCTION_DECL)
4992
        {
4993
          warning (OPT_Wattributes, "%qE attribute only applies to functions",
4994
                   name);
4995
          *no_add_attrs = true;
4996
        }
4997
      /* FIXME: the argument if any is checked for type attributes;
4998
         should it be checked for decl ones?  */
4999
    }
5000
  else
5001
    {
5002
      if (TREE_CODE (*node) == FUNCTION_TYPE
5003
          || TREE_CODE (*node) == METHOD_TYPE)
5004
        {
5005
          if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5006
            {
5007
              warning (OPT_Wattributes, "%qE attribute ignored",
5008
                       name);
5009
              *no_add_attrs = true;
5010
            }
5011
        }
5012
      else if (TREE_CODE (*node) == POINTER_TYPE
5013
               && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5014
                   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5015
               && arm_isr_value (args) != ARM_FT_UNKNOWN)
5016
        {
5017
          *node = build_variant_type_copy (*node);
5018
          TREE_TYPE (*node) = build_type_attribute_variant
5019
            (TREE_TYPE (*node),
5020
             tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5021
          *no_add_attrs = true;
5022
        }
5023
      else
5024
        {
5025
          /* Possibly pass this attribute on from the type to a decl.  */
5026
          if (flags & ((int) ATTR_FLAG_DECL_NEXT
5027
                       | (int) ATTR_FLAG_FUNCTION_NEXT
5028
                       | (int) ATTR_FLAG_ARRAY_NEXT))
5029
            {
5030
              *no_add_attrs = true;
5031
              return tree_cons (name, args, NULL_TREE);
5032
            }
5033
          else
5034
            {
5035
              warning (OPT_Wattributes, "%qE attribute ignored",
5036
                       name);
5037
            }
5038
        }
5039
    }
5040
 
5041
  return NULL_TREE;
5042
}
5043
 
5044
/* Handle a "pcs" attribute; arguments as in struct
5045
   attribute_spec.handler.  */
5046
static tree
5047
arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5048
                          int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5049
{
5050
  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5051
    {
5052
      warning (OPT_Wattributes, "%qE attribute ignored", name);
5053
      *no_add_attrs = true;
5054
    }
5055
  return NULL_TREE;
5056
}
5057
 
5058
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5059
/* Handle the "notshared" attribute.  This attribute is another way of
5060
   requesting hidden visibility.  ARM's compiler supports
5061
   "__declspec(notshared)"; we support the same thing via an
5062
   attribute.  */
5063
 
5064
static tree
5065
arm_handle_notshared_attribute (tree *node,
5066
                                tree name ATTRIBUTE_UNUSED,
5067
                                tree args ATTRIBUTE_UNUSED,
5068
                                int flags ATTRIBUTE_UNUSED,
5069
                                bool *no_add_attrs)
5070
{
5071
  tree decl = TYPE_NAME (*node);
5072
 
5073
  if (decl)
5074
    {
5075
      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5076
      DECL_VISIBILITY_SPECIFIED (decl) = 1;
5077
      *no_add_attrs = false;
5078
    }
5079
  return NULL_TREE;
5080
}
5081
#endif
5082
 
5083
/* Return 0 if the attributes for two types are incompatible, 1 if they
5084
   are compatible, and 2 if they are nearly compatible (which causes a
5085
   warning to be generated).  */
5086
static int
5087
arm_comp_type_attributes (const_tree type1, const_tree type2)
5088
{
5089
  int l1, l2, s1, s2;
5090
 
5091
  /* Check for mismatch of non-default calling convention.  */
5092
  if (TREE_CODE (type1) != FUNCTION_TYPE)
5093
    return 1;
5094
 
5095
  /* Check for mismatched call attributes.  */
5096
  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5097
  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5098
  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5099
  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5100
 
5101
  /* Only bother to check if an attribute is defined.  */
5102
  if (l1 | l2 | s1 | s2)
5103
    {
5104
      /* If one type has an attribute, the other must have the same attribute.  */
5105
      if ((l1 != l2) || (s1 != s2))
5106
        return 0;
5107
 
5108
      /* Disallow mixed attributes.  */
5109
      if ((l1 & s2) || (l2 & s1))
5110
        return 0;
5111
    }
5112
 
5113
  /* Check for mismatched ISR attribute.  */
5114
  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5115
  if (! l1)
5116
    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5117
  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5118
  if (! l2)
5119
    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5120
  if (l1 != l2)
5121
    return 0;
5122
 
5123
  return 1;
5124
}
5125
 
5126
/*  Assigns default attributes to newly defined type.  This is used to
5127
    set short_call/long_call attributes for function types of
5128
    functions defined inside corresponding #pragma scopes.  */
5129
static void
5130
arm_set_default_type_attributes (tree type)
5131
{
5132
  /* Add __attribute__ ((long_call)) to all functions, when
5133
     inside #pragma long_calls or __attribute__ ((short_call)),
5134
     when inside #pragma no_long_calls.  */
5135
  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5136
    {
5137
      tree type_attr_list, attr_name;
5138
      type_attr_list = TYPE_ATTRIBUTES (type);
5139
 
5140
      if (arm_pragma_long_calls == LONG)
5141
        attr_name = get_identifier ("long_call");
5142
      else if (arm_pragma_long_calls == SHORT)
5143
        attr_name = get_identifier ("short_call");
5144
      else
5145
        return;
5146
 
5147
      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5148
      TYPE_ATTRIBUTES (type) = type_attr_list;
5149
    }
5150
}
5151
 
5152
/* Return true if DECL is known to be linked into section SECTION.  */
5153
 
5154
static bool
5155
arm_function_in_section_p (tree decl, section *section)
5156
{
5157
  /* We can only be certain about functions defined in the same
5158
     compilation unit.  */
5159
  if (!TREE_STATIC (decl))
5160
    return false;
5161
 
5162
  /* Make sure that SYMBOL always binds to the definition in this
5163
     compilation unit.  */
5164
  if (!targetm.binds_local_p (decl))
5165
    return false;
5166
 
5167
  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
5168
  if (!DECL_SECTION_NAME (decl))
5169
    {
5170
      /* Make sure that we will not create a unique section for DECL.  */
5171
      if (flag_function_sections || DECL_ONE_ONLY (decl))
5172
        return false;
5173
    }
5174
 
5175
  return function_section (decl) == section;
5176
}
5177
 
5178
/* Return nonzero if a 32-bit "long_call" should be generated for
5179
   a call from the current function to DECL.  We generate a long_call
5180
   if the function:
5181
 
5182
        a.  has an __attribute__((long call))
5183
     or b.  is within the scope of a #pragma long_calls
5184
     or c.  the -mlong-calls command line switch has been specified
5185
 
5186
   However we do not generate a long call if the function:
5187
 
5188
        d.  has an __attribute__ ((short_call))
5189
     or e.  is inside the scope of a #pragma no_long_calls
5190
     or f.  is defined in the same section as the current function.  */
5191
 
5192
bool
5193
arm_is_long_call_p (tree decl)
5194
{
5195
  tree attrs;
5196
 
5197
  if (!decl)
5198
    return TARGET_LONG_CALLS;
5199
 
5200
  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5201
  if (lookup_attribute ("short_call", attrs))
5202
    return false;
5203
 
5204
  /* For "f", be conservative, and only cater for cases in which the
5205
     whole of the current function is placed in the same section.  */
5206
  if (!flag_reorder_blocks_and_partition
5207
      && TREE_CODE (decl) == FUNCTION_DECL
5208
      && arm_function_in_section_p (decl, current_function_section ()))
5209
    return false;
5210
 
5211
  if (lookup_attribute ("long_call", attrs))
5212
    return true;
5213
 
5214
  return TARGET_LONG_CALLS;
5215
}
5216
 
5217
/* Return nonzero if it is ok to make a tail-call to DECL.  */
5218
static bool
5219
arm_function_ok_for_sibcall (tree decl, tree exp)
5220
{
5221
  unsigned long func_type;
5222
 
5223
  if (cfun->machine->sibcall_blocked)
5224
    return false;
5225
 
5226
  /* Never tailcall something for which we have no decl, or if we
5227
     are generating code for Thumb-1.  */
5228
  if (decl == NULL || TARGET_THUMB1)
5229
    return false;
5230
 
5231
  /* The PIC register is live on entry to VxWorks PLT entries, so we
5232
     must make the call before restoring the PIC register.  */
5233
  if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5234
    return false;
5235
 
5236
  /* Cannot tail-call to long calls, since these are out of range of
5237
     a branch instruction.  */
5238
  if (arm_is_long_call_p (decl))
5239
    return false;
5240
 
5241
  /* If we are interworking and the function is not declared static
5242
     then we can't tail-call it unless we know that it exists in this
5243
     compilation unit (since it might be a Thumb routine).  */
5244
  if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5245
    return false;
5246
 
5247
  func_type = arm_current_func_type ();
5248
  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
5249
  if (IS_INTERRUPT (func_type))
5250
    return false;
5251
 
5252
  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5253
    {
5254
      /* Check that the return value locations are the same.  For
5255
         example that we aren't returning a value from the sibling in
5256
         a VFP register but then need to transfer it to a core
5257
         register.  */
5258
      rtx a, b;
5259
 
5260
      a = arm_function_value (TREE_TYPE (exp), decl, false);
5261
      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5262
                              cfun->decl, false);
5263
      if (!rtx_equal_p (a, b))
5264
        return false;
5265
    }
5266
 
5267
  /* Never tailcall if function may be called with a misaligned SP.  */
5268
  if (IS_STACKALIGN (func_type))
5269
    return false;
5270
 
5271
  /* The AAPCS says that, on bare-metal, calls to unresolved weak
5272
     references should become a NOP.  Don't convert such calls into
5273
     sibling calls.  */
5274
  if (TARGET_AAPCS_BASED
5275
      && arm_abi == ARM_ABI_AAPCS
5276
      && DECL_WEAK (decl))
5277
    return false;
5278
 
5279
  /* Everything else is ok.  */
5280
  return true;
5281
}
5282
 
5283
 
5284
/* Addressing mode support functions.  */
5285
 
5286
/* Return nonzero if X is a legitimate immediate operand when compiling
5287
   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
5288
int
5289
legitimate_pic_operand_p (rtx x)
5290
{
5291
  if (GET_CODE (x) == SYMBOL_REF
5292
      || (GET_CODE (x) == CONST
5293
          && GET_CODE (XEXP (x, 0)) == PLUS
5294
          && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5295
    return 0;
5296
 
5297
  return 1;
5298
}
5299
 
5300
/* Record that the current function needs a PIC register.  Initialize
5301
   cfun->machine->pic_reg if we have not already done so.  */
5302
 
5303
static void
5304
require_pic_register (void)
5305
{
5306
  /* A lot of the logic here is made obscure by the fact that this
5307
     routine gets called as part of the rtx cost estimation process.
5308
     We don't want those calls to affect any assumptions about the real
5309
     function; and further, we can't call entry_of_function() until we
5310
     start the real expansion process.  */
5311
  if (!crtl->uses_pic_offset_table)
5312
    {
5313
      gcc_assert (can_create_pseudo_p ());
5314
      if (arm_pic_register != INVALID_REGNUM)
5315
        {
5316
          if (!cfun->machine->pic_reg)
5317
            cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5318
 
5319
          /* Play games to avoid marking the function as needing pic
5320
             if we are being called as part of the cost-estimation
5321
             process.  */
5322
          if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5323
            crtl->uses_pic_offset_table = 1;
5324
        }
5325
      else
5326
        {
5327
          rtx seq, insn;
5328
 
5329
          if (!cfun->machine->pic_reg)
5330
            cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5331
 
5332
          /* Play games to avoid marking the function as needing pic
5333
             if we are being called as part of the cost-estimation
5334
             process.  */
5335
          if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5336
            {
5337
              crtl->uses_pic_offset_table = 1;
5338
              start_sequence ();
5339
 
5340
              arm_load_pic_register (0UL);
5341
 
5342
              seq = get_insns ();
5343
              end_sequence ();
5344
 
5345
              for (insn = seq; insn; insn = NEXT_INSN (insn))
5346
                if (INSN_P (insn))
5347
                  INSN_LOCATOR (insn) = prologue_locator;
5348
 
5349
              /* We can be called during expansion of PHI nodes, where
5350
                 we can't yet emit instructions directly in the final
5351
                 insn stream.  Queue the insns on the entry edge, they will
5352
                 be committed after everything else is expanded.  */
5353
              insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5354
            }
5355
        }
5356
    }
5357
}
5358
 
5359
rtx
5360
legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5361
{
5362
  if (GET_CODE (orig) == SYMBOL_REF
5363
      || GET_CODE (orig) == LABEL_REF)
5364
    {
5365
      rtx insn;
5366
 
5367
      if (reg == 0)
5368
        {
5369
          gcc_assert (can_create_pseudo_p ());
5370
          reg = gen_reg_rtx (Pmode);
5371
        }
5372
 
5373
      /* VxWorks does not impose a fixed gap between segments; the run-time
5374
         gap can be different from the object-file gap.  We therefore can't
5375
         use GOTOFF unless we are absolutely sure that the symbol is in the
5376
         same segment as the GOT.  Unfortunately, the flexibility of linker
5377
         scripts means that we can't be sure of that in general, so assume
5378
         that GOTOFF is never valid on VxWorks.  */
5379
      if ((GET_CODE (orig) == LABEL_REF
5380
           || (GET_CODE (orig) == SYMBOL_REF &&
5381
               SYMBOL_REF_LOCAL_P (orig)))
5382
          && NEED_GOT_RELOC
5383
          && !TARGET_VXWORKS_RTP)
5384
        insn = arm_pic_static_addr (orig, reg);
5385
      else
5386
        {
5387
          rtx pat;
5388
          rtx mem;
5389
 
5390
          /* If this function doesn't have a pic register, create one now.  */
5391
          require_pic_register ();
5392
 
5393
          pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5394
 
5395
          /* Make the MEM as close to a constant as possible.  */
5396
          mem = SET_SRC (pat);
5397
          gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5398
          MEM_READONLY_P (mem) = 1;
5399
          MEM_NOTRAP_P (mem) = 1;
5400
 
5401
          insn = emit_insn (pat);
5402
        }
5403
 
5404
      /* Put a REG_EQUAL note on this insn, so that it can be optimized
5405
         by loop.  */
5406
      set_unique_reg_note (insn, REG_EQUAL, orig);
5407
 
5408
      return reg;
5409
    }
5410
  else if (GET_CODE (orig) == CONST)
5411
    {
5412
      rtx base, offset;
5413
 
5414
      if (GET_CODE (XEXP (orig, 0)) == PLUS
5415
          && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5416
        return orig;
5417
 
5418
      /* Handle the case where we have: const (UNSPEC_TLS).  */
5419
      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5420
          && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5421
        return orig;
5422
 
5423
      /* Handle the case where we have:
5424
         const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
5425
         CONST_INT.  */
5426
      if (GET_CODE (XEXP (orig, 0)) == PLUS
5427
          && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5428
          && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5429
        {
5430
          gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5431
          return orig;
5432
        }
5433
 
5434
      if (reg == 0)
5435
        {
5436
          gcc_assert (can_create_pseudo_p ());
5437
          reg = gen_reg_rtx (Pmode);
5438
        }
5439
 
5440
      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5441
 
5442
      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5443
      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5444
                                       base == reg ? 0 : reg);
5445
 
5446
      if (GET_CODE (offset) == CONST_INT)
5447
        {
5448
          /* The base register doesn't really matter, we only want to
5449
             test the index for the appropriate mode.  */
5450
          if (!arm_legitimate_index_p (mode, offset, SET, 0))
5451
            {
5452
              gcc_assert (can_create_pseudo_p ());
5453
              offset = force_reg (Pmode, offset);
5454
            }
5455
 
5456
          if (GET_CODE (offset) == CONST_INT)
5457
            return plus_constant (base, INTVAL (offset));
5458
        }
5459
 
5460
      if (GET_MODE_SIZE (mode) > 4
5461
          && (GET_MODE_CLASS (mode) == MODE_INT
5462
              || TARGET_SOFT_FLOAT))
5463
        {
5464
          emit_insn (gen_addsi3 (reg, base, offset));
5465
          return reg;
5466
        }
5467
 
5468
      return gen_rtx_PLUS (Pmode, base, offset);
5469
    }
5470
 
5471
  return orig;
5472
}
5473
 
5474
 
5475
/* Find a spare register to use during the prolog of a function.  */
5476
 
5477
static int
5478
thumb_find_work_register (unsigned long pushed_regs_mask)
5479
{
5480
  int reg;
5481
 
5482
  /* Check the argument registers first as these are call-used.  The
5483
     register allocation order means that sometimes r3 might be used
5484
     but earlier argument registers might not, so check them all.  */
5485
  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5486
    if (!df_regs_ever_live_p (reg))
5487
      return reg;
5488
 
5489
  /* Before going on to check the call-saved registers we can try a couple
5490
     more ways of deducing that r3 is available.  The first is when we are
5491
     pushing anonymous arguments onto the stack and we have less than 4
5492
     registers worth of fixed arguments(*).  In this case r3 will be part of
5493
     the variable argument list and so we can be sure that it will be
5494
     pushed right at the start of the function.  Hence it will be available
5495
     for the rest of the prologue.
5496
     (*): ie crtl->args.pretend_args_size is greater than 0.  */
5497
  if (cfun->machine->uses_anonymous_args
5498
      && crtl->args.pretend_args_size > 0)
5499
    return LAST_ARG_REGNUM;
5500
 
5501
  /* The other case is when we have fixed arguments but less than 4 registers
5502
     worth.  In this case r3 might be used in the body of the function, but
5503
     it is not being used to convey an argument into the function.  In theory
5504
     we could just check crtl->args.size to see how many bytes are
5505
     being passed in argument registers, but it seems that it is unreliable.
5506
     Sometimes it will have the value 0 when in fact arguments are being
5507
     passed.  (See testcase execute/20021111-1.c for an example).  So we also
5508
     check the args_info.nregs field as well.  The problem with this field is
5509
     that it makes no allowances for arguments that are passed to the
5510
     function but which are not used.  Hence we could miss an opportunity
5511
     when a function has an unused argument in r3.  But it is better to be
5512
     safe than to be sorry.  */
5513
  if (! cfun->machine->uses_anonymous_args
5514
      && crtl->args.size >= 0
5515
      && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5516
      && crtl->args.info.nregs < 4)
5517
    return LAST_ARG_REGNUM;
5518
 
5519
  /* Otherwise look for a call-saved register that is going to be pushed.  */
5520
  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5521
    if (pushed_regs_mask & (1 << reg))
5522
      return reg;
5523
 
5524
  if (TARGET_THUMB2)
5525
    {
5526
      /* Thumb-2 can use high regs.  */
5527
      for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5528
        if (pushed_regs_mask & (1 << reg))
5529
          return reg;
5530
    }
5531
  /* Something went wrong - thumb_compute_save_reg_mask()
5532
     should have arranged for a suitable register to be pushed.  */
5533
  gcc_unreachable ();
5534
}
5535
 
5536
static GTY(()) int pic_labelno;
5537
 
5538
/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
5539
   low register.  */
5540
 
5541
void
5542
arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5543
{
5544
  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5545
 
5546
  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5547
    return;
5548
 
5549
  gcc_assert (flag_pic);
5550
 
5551
  pic_reg = cfun->machine->pic_reg;
5552
  if (TARGET_VXWORKS_RTP)
5553
    {
5554
      pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5555
      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5556
      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5557
 
5558
      emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5559
 
5560
      pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5561
      emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5562
    }
5563
  else
5564
    {
5565
      /* We use an UNSPEC rather than a LABEL_REF because this label
5566
         never appears in the code stream.  */
5567
 
5568
      labelno = GEN_INT (pic_labelno++);
5569
      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5570
      l1 = gen_rtx_CONST (VOIDmode, l1);
5571
 
5572
      /* On the ARM the PC register contains 'dot + 8' at the time of the
5573
         addition, on the Thumb it is 'dot + 4'.  */
5574
      pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5575
      pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5576
                                UNSPEC_GOTSYM_OFF);
5577
      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5578
 
5579
      if (TARGET_32BIT)
5580
        {
5581
          emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5582
        }
5583
      else /* TARGET_THUMB1 */
5584
        {
5585
          if (arm_pic_register != INVALID_REGNUM
5586
              && REGNO (pic_reg) > LAST_LO_REGNUM)
5587
            {
5588
              /* We will have pushed the pic register, so we should always be
5589
                 able to find a work register.  */
5590
              pic_tmp = gen_rtx_REG (SImode,
5591
                                     thumb_find_work_register (saved_regs));
5592
              emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5593
              emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5594
              emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5595
            }
5596
          else
5597
            emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5598
        }
5599
    }
5600
 
5601
  /* Need to emit this whether or not we obey regdecls,
5602
     since setjmp/longjmp can cause life info to screw up.  */
5603
  emit_use (pic_reg);
5604
}
5605
 
5606
/* Generate code to load the address of a static var when flag_pic is set.  */
5607
static rtx
5608
arm_pic_static_addr (rtx orig, rtx reg)
5609
{
5610
  rtx l1, labelno, offset_rtx, insn;
5611
 
5612
  gcc_assert (flag_pic);
5613
 
5614
  /* We use an UNSPEC rather than a LABEL_REF because this label
5615
     never appears in the code stream.  */
5616
  labelno = GEN_INT (pic_labelno++);
5617
  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5618
  l1 = gen_rtx_CONST (VOIDmode, l1);
5619
 
5620
  /* On the ARM the PC register contains 'dot + 8' at the time of the
5621
     addition, on the Thumb it is 'dot + 4'.  */
5622
  offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5623
  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5624
                               UNSPEC_SYMBOL_OFFSET);
5625
  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5626
 
5627
  insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5628
  return insn;
5629
}
5630
 
5631
/* Return nonzero if X is valid as an ARM state addressing register.  */
5632
static int
5633
arm_address_register_rtx_p (rtx x, int strict_p)
5634
{
5635
  int regno;
5636
 
5637
  if (GET_CODE (x) != REG)
5638
    return 0;
5639
 
5640
  regno = REGNO (x);
5641
 
5642
  if (strict_p)
5643
    return ARM_REGNO_OK_FOR_BASE_P (regno);
5644
 
5645
  return (regno <= LAST_ARM_REGNUM
5646
          || regno >= FIRST_PSEUDO_REGISTER
5647
          || regno == FRAME_POINTER_REGNUM
5648
          || regno == ARG_POINTER_REGNUM);
5649
}
5650
 
5651
/* Return TRUE if this rtx is the difference of a symbol and a label,
5652
   and will reduce to a PC-relative relocation in the object file.
5653
   Expressions like this can be left alone when generating PIC, rather
5654
   than forced through the GOT.  */
5655
static int
5656
pcrel_constant_p (rtx x)
5657
{
5658
  if (GET_CODE (x) == MINUS)
5659
    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5660
 
5661
  return FALSE;
5662
}
5663
 
5664
/* Return true if X will surely end up in an index register after next
5665
   splitting pass.  */
5666
static bool
5667
will_be_in_index_register (const_rtx x)
5668
{
5669
  /* arm.md: calculate_pic_address will split this into a register.  */
5670
  return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5671
}
5672
 
5673
/* Return nonzero if X is a valid ARM state address operand.  */
5674
int
5675
arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5676
                                int strict_p)
5677
{
5678
  bool use_ldrd;
5679
  enum rtx_code code = GET_CODE (x);
5680
 
5681
  if (arm_address_register_rtx_p (x, strict_p))
5682
    return 1;
5683
 
5684
  use_ldrd = (TARGET_LDRD
5685
              && (mode == DImode
5686
                  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5687
 
5688
  if (code == POST_INC || code == PRE_DEC
5689
      || ((code == PRE_INC || code == POST_DEC)
5690
          && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5691
    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5692
 
5693
  else if ((code == POST_MODIFY || code == PRE_MODIFY)
5694
           && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5695
           && GET_CODE (XEXP (x, 1)) == PLUS
5696
           && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5697
    {
5698
      rtx addend = XEXP (XEXP (x, 1), 1);
5699
 
5700
      /* Don't allow ldrd post increment by register because it's hard
5701
         to fixup invalid register choices.  */
5702
      if (use_ldrd
5703
          && GET_CODE (x) == POST_MODIFY
5704
          && GET_CODE (addend) == REG)
5705
        return 0;
5706
 
5707
      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5708
              && arm_legitimate_index_p (mode, addend, outer, strict_p));
5709
    }
5710
 
5711
  /* After reload constants split into minipools will have addresses
5712
     from a LABEL_REF.  */
5713
  else if (reload_completed
5714
           && (code == LABEL_REF
5715
               || (code == CONST
5716
                   && GET_CODE (XEXP (x, 0)) == PLUS
5717
                   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5718
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5719
    return 1;
5720
 
5721
  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5722
    return 0;
5723
 
5724
  else if (code == PLUS)
5725
    {
5726
      rtx xop0 = XEXP (x, 0);
5727
      rtx xop1 = XEXP (x, 1);
5728
 
5729
      return ((arm_address_register_rtx_p (xop0, strict_p)
5730
               && ((GET_CODE(xop1) == CONST_INT
5731
                    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5732
                   || (!strict_p && will_be_in_index_register (xop1))))
5733
              || (arm_address_register_rtx_p (xop1, strict_p)
5734
                  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5735
    }
5736
 
5737
#if 0
5738
  /* Reload currently can't handle MINUS, so disable this for now */
5739
  else if (GET_CODE (x) == MINUS)
5740
    {
5741
      rtx xop0 = XEXP (x, 0);
5742
      rtx xop1 = XEXP (x, 1);
5743
 
5744
      return (arm_address_register_rtx_p (xop0, strict_p)
5745
              && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5746
    }
5747
#endif
5748
 
5749
  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5750
           && code == SYMBOL_REF
5751
           && CONSTANT_POOL_ADDRESS_P (x)
5752
           && ! (flag_pic
5753
                 && symbol_mentioned_p (get_pool_constant (x))
5754
                 && ! pcrel_constant_p (get_pool_constant (x))))
5755
    return 1;
5756
 
5757
  return 0;
5758
}
5759
 
5760
/* Return nonzero if X is a valid Thumb-2 address operand.  */
5761
static int
5762
thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5763
{
5764
  bool use_ldrd;
5765
  enum rtx_code code = GET_CODE (x);
5766
 
5767
  if (arm_address_register_rtx_p (x, strict_p))
5768
    return 1;
5769
 
5770
  use_ldrd = (TARGET_LDRD
5771
              && (mode == DImode
5772
                  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5773
 
5774
  if (code == POST_INC || code == PRE_DEC
5775
      || ((code == PRE_INC || code == POST_DEC)
5776
          && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5777
    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5778
 
5779
  else if ((code == POST_MODIFY || code == PRE_MODIFY)
5780
           && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5781
           && GET_CODE (XEXP (x, 1)) == PLUS
5782
           && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5783
    {
5784
      /* Thumb-2 only has autoincrement by constant.  */
5785
      rtx addend = XEXP (XEXP (x, 1), 1);
5786
      HOST_WIDE_INT offset;
5787
 
5788
      if (GET_CODE (addend) != CONST_INT)
5789
        return 0;
5790
 
5791
      offset = INTVAL(addend);
5792
      if (GET_MODE_SIZE (mode) <= 4)
5793
        return (offset > -256 && offset < 256);
5794
 
5795
      return (use_ldrd && offset > -1024 && offset < 1024
5796
              && (offset & 3) == 0);
5797
    }
5798
 
5799
  /* After reload constants split into minipools will have addresses
5800
     from a LABEL_REF.  */
5801
  else if (reload_completed
5802
           && (code == LABEL_REF
5803
               || (code == CONST
5804
                   && GET_CODE (XEXP (x, 0)) == PLUS
5805
                   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5807
    return 1;
5808
 
5809
  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5810
    return 0;
5811
 
5812
  else if (code == PLUS)
5813
    {
5814
      rtx xop0 = XEXP (x, 0);
5815
      rtx xop1 = XEXP (x, 1);
5816
 
5817
      return ((arm_address_register_rtx_p (xop0, strict_p)
5818
               && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5819
                   || (!strict_p && will_be_in_index_register (xop1))))
5820
              || (arm_address_register_rtx_p (xop1, strict_p)
5821
                  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5822
    }
5823
 
5824
  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5825
           && code == SYMBOL_REF
5826
           && CONSTANT_POOL_ADDRESS_P (x)
5827
           && ! (flag_pic
5828
                 && symbol_mentioned_p (get_pool_constant (x))
5829
                 && ! pcrel_constant_p (get_pool_constant (x))))
5830
    return 1;
5831
 
5832
  return 0;
5833
}
5834
 
5835
/* Return nonzero if INDEX is valid for an address index operand in
5836
   ARM state.  */
5837
static int
5838
arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5839
                        int strict_p)
5840
{
5841
  HOST_WIDE_INT range;
5842
  enum rtx_code code = GET_CODE (index);
5843
 
5844
  /* Standard coprocessor addressing modes.  */
5845
  if (TARGET_HARD_FLOAT
5846
      && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5847
      && (mode == SFmode || mode == DFmode
5848
          || (TARGET_MAVERICK && mode == DImode)))
5849
    return (code == CONST_INT && INTVAL (index) < 1024
5850
            && INTVAL (index) > -1024
5851
            && (INTVAL (index) & 3) == 0);
5852
 
5853
  /* For quad modes, we restrict the constant offset to be slightly less
5854
     than what the instruction format permits.  We do this because for
5855
     quad mode moves, we will actually decompose them into two separate
5856
     double-mode reads or writes.  INDEX must therefore be a valid
5857
     (double-mode) offset and so should INDEX+8.  */
5858
  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5859
    return (code == CONST_INT
5860
            && INTVAL (index) < 1016
5861
            && INTVAL (index) > -1024
5862
            && (INTVAL (index) & 3) == 0);
5863
 
5864
  /* We have no such constraint on double mode offsets, so we permit the
5865
     full range of the instruction format.  */
5866
  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5867
    return (code == CONST_INT
5868
            && INTVAL (index) < 1024
5869
            && INTVAL (index) > -1024
5870
            && (INTVAL (index) & 3) == 0);
5871
 
5872
  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5873
    return (code == CONST_INT
5874
            && INTVAL (index) < 1024
5875
            && INTVAL (index) > -1024
5876
            && (INTVAL (index) & 3) == 0);
5877
 
5878
  if (arm_address_register_rtx_p (index, strict_p)
5879
      && (GET_MODE_SIZE (mode) <= 4))
5880
    return 1;
5881
 
5882
  if (mode == DImode || mode == DFmode)
5883
    {
5884
      if (code == CONST_INT)
5885
        {
5886
          HOST_WIDE_INT val = INTVAL (index);
5887
 
5888
          if (TARGET_LDRD)
5889
            return val > -256 && val < 256;
5890
          else
5891
            return val > -4096 && val < 4092;
5892
        }
5893
 
5894
      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5895
    }
5896
 
5897
  if (GET_MODE_SIZE (mode) <= 4
5898
      && ! (arm_arch4
5899
            && (mode == HImode
5900
                || mode == HFmode
5901
                || (mode == QImode && outer == SIGN_EXTEND))))
5902
    {
5903
      if (code == MULT)
5904
        {
5905
          rtx xiop0 = XEXP (index, 0);
5906
          rtx xiop1 = XEXP (index, 1);
5907
 
5908
          return ((arm_address_register_rtx_p (xiop0, strict_p)
5909
                   && power_of_two_operand (xiop1, SImode))
5910
                  || (arm_address_register_rtx_p (xiop1, strict_p)
5911
                      && power_of_two_operand (xiop0, SImode)));
5912
        }
5913
      else if (code == LSHIFTRT || code == ASHIFTRT
5914
               || code == ASHIFT || code == ROTATERT)
5915
        {
5916
          rtx op = XEXP (index, 1);
5917
 
5918
          return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5919
                  && GET_CODE (op) == CONST_INT
5920
                  && INTVAL (op) > 0
5921
                  && INTVAL (op) <= 31);
5922
        }
5923
    }
5924
 
5925
  /* For ARM v4 we may be doing a sign-extend operation during the
5926
     load.  */
5927
  if (arm_arch4)
5928
    {
5929
      if (mode == HImode
5930
          || mode == HFmode
5931
          || (outer == SIGN_EXTEND && mode == QImode))
5932
        range = 256;
5933
      else
5934
        range = 4096;
5935
    }
5936
  else
5937
    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5938
 
5939
  return (code == CONST_INT
5940
          && INTVAL (index) < range
5941
          && INTVAL (index) > -range);
5942
}
5943
 
5944
/* Return true if OP is a valid index scaling factor for Thumb-2 address
5945
   index operand.  i.e. 1, 2, 4 or 8.  */
5946
static bool
5947
thumb2_index_mul_operand (rtx op)
5948
{
5949
  HOST_WIDE_INT val;
5950
 
5951
  if (GET_CODE(op) != CONST_INT)
5952
    return false;
5953
 
5954
  val = INTVAL(op);
5955
  return (val == 1 || val == 2 || val == 4 || val == 8);
5956
}
5957
 
5958
/* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
5959
static int
5960
thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5961
{
5962
  enum rtx_code code = GET_CODE (index);
5963
 
5964
  /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
5965
  /* Standard coprocessor addressing modes.  */
5966
  if (TARGET_HARD_FLOAT
5967
      && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5968
      && (mode == SFmode || mode == DFmode
5969
          || (TARGET_MAVERICK && mode == DImode)))
5970
    return (code == CONST_INT && INTVAL (index) < 1024
5971
            /* Thumb-2 allows only > -256 index range for it's core register
5972
               load/stores. Since we allow SF/DF in core registers, we have
5973
               to use the intersection between -256~4096 (core) and -1024~1024
5974
               (coprocessor).  */
5975
            && INTVAL (index) > -256
5976
            && (INTVAL (index) & 3) == 0);
5977
 
5978
  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5979
    {
5980
      /* For DImode assume values will usually live in core regs
5981
         and only allow LDRD addressing modes.  */
5982
      if (!TARGET_LDRD || mode != DImode)
5983
        return (code == CONST_INT
5984
                && INTVAL (index) < 1024
5985
                && INTVAL (index) > -1024
5986
                && (INTVAL (index) & 3) == 0);
5987
    }
5988
 
5989
  /* For quad modes, we restrict the constant offset to be slightly less
5990
     than what the instruction format permits.  We do this because for
5991
     quad mode moves, we will actually decompose them into two separate
5992
     double-mode reads or writes.  INDEX must therefore be a valid
5993
     (double-mode) offset and so should INDEX+8.  */
5994
  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5995
    return (code == CONST_INT
5996
            && INTVAL (index) < 1016
5997
            && INTVAL (index) > -1024
5998
            && (INTVAL (index) & 3) == 0);
5999
 
6000
  /* We have no such constraint on double mode offsets, so we permit the
6001
     full range of the instruction format.  */
6002
  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6003
    return (code == CONST_INT
6004
            && INTVAL (index) < 1024
6005
            && INTVAL (index) > -1024
6006
            && (INTVAL (index) & 3) == 0);
6007
 
6008
  if (arm_address_register_rtx_p (index, strict_p)
6009
      && (GET_MODE_SIZE (mode) <= 4))
6010
    return 1;
6011
 
6012
  if (mode == DImode || mode == DFmode)
6013
    {
6014
      if (code == CONST_INT)
6015
        {
6016
          HOST_WIDE_INT val = INTVAL (index);
6017
          /* ??? Can we assume ldrd for thumb2?  */
6018
          /* Thumb-2 ldrd only has reg+const addressing modes.  */
6019
          /* ldrd supports offsets of +-1020.
6020
             However the ldr fallback does not.  */
6021
          return val > -256 && val < 256 && (val & 3) == 0;
6022
        }
6023
      else
6024
        return 0;
6025
    }
6026
 
6027
  if (code == MULT)
6028
    {
6029
      rtx xiop0 = XEXP (index, 0);
6030
      rtx xiop1 = XEXP (index, 1);
6031
 
6032
      return ((arm_address_register_rtx_p (xiop0, strict_p)
6033
               && thumb2_index_mul_operand (xiop1))
6034
              || (arm_address_register_rtx_p (xiop1, strict_p)
6035
                  && thumb2_index_mul_operand (xiop0)));
6036
    }
6037
  else if (code == ASHIFT)
6038
    {
6039
      rtx op = XEXP (index, 1);
6040
 
6041
      return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6042
              && GET_CODE (op) == CONST_INT
6043
              && INTVAL (op) > 0
6044
              && INTVAL (op) <= 3);
6045
    }
6046
 
6047
  return (code == CONST_INT
6048
          && INTVAL (index) < 4096
6049
          && INTVAL (index) > -256);
6050
}
6051
 
6052
/* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
6053
static int
6054
thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6055
{
6056
  int regno;
6057
 
6058
  if (GET_CODE (x) != REG)
6059
    return 0;
6060
 
6061
  regno = REGNO (x);
6062
 
6063
  if (strict_p)
6064
    return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6065
 
6066
  return (regno <= LAST_LO_REGNUM
6067
          || regno > LAST_VIRTUAL_REGISTER
6068
          || regno == FRAME_POINTER_REGNUM
6069
          || (GET_MODE_SIZE (mode) >= 4
6070
              && (regno == STACK_POINTER_REGNUM
6071
                  || regno >= FIRST_PSEUDO_REGISTER
6072
                  || x == hard_frame_pointer_rtx
6073
                  || x == arg_pointer_rtx)));
6074
}
6075
 
6076
/* Return nonzero if x is a legitimate index register.  This is the case
6077
   for any base register that can access a QImode object.  */
6078
inline static int
6079
thumb1_index_register_rtx_p (rtx x, int strict_p)
6080
{
6081
  return thumb1_base_register_rtx_p (x, QImode, strict_p);
6082
}
6083
 
6084
/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6085
 
6086
   The AP may be eliminated to either the SP or the FP, so we use the
6087
   least common denominator, e.g. SImode, and offsets from 0 to 64.
6088
 
6089
   ??? Verify whether the above is the right approach.
6090
 
6091
   ??? Also, the FP may be eliminated to the SP, so perhaps that
6092
   needs special handling also.
6093
 
6094
   ??? Look at how the mips16 port solves this problem.  It probably uses
6095
   better ways to solve some of these problems.
6096
 
6097
   Although it is not incorrect, we don't accept QImode and HImode
6098
   addresses based on the frame pointer or arg pointer until the
6099
   reload pass starts.  This is so that eliminating such addresses
6100
   into stack based ones won't produce impossible code.  */
6101
int
6102
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6103
{
6104
  /* ??? Not clear if this is right.  Experiment.  */
6105
  if (GET_MODE_SIZE (mode) < 4
6106
      && !(reload_in_progress || reload_completed)
6107
      && (reg_mentioned_p (frame_pointer_rtx, x)
6108
          || reg_mentioned_p (arg_pointer_rtx, x)
6109
          || reg_mentioned_p (virtual_incoming_args_rtx, x)
6110
          || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6111
          || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6112
          || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6113
    return 0;
6114
 
6115
  /* Accept any base register.  SP only in SImode or larger.  */
6116
  else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6117
    return 1;
6118
 
6119
  /* This is PC relative data before arm_reorg runs.  */
6120
  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6121
           && GET_CODE (x) == SYMBOL_REF
6122
           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6123
    return 1;
6124
 
6125
  /* This is PC relative data after arm_reorg runs.  */
6126
  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6127
           && reload_completed
6128
           && (GET_CODE (x) == LABEL_REF
6129
               || (GET_CODE (x) == CONST
6130
                   && GET_CODE (XEXP (x, 0)) == PLUS
6131
                   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6132
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6133
    return 1;
6134
 
6135
  /* Post-inc indexing only supported for SImode and larger.  */
6136
  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6137
           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6138
    return 1;
6139
 
6140
  else if (GET_CODE (x) == PLUS)
6141
    {
6142
      /* REG+REG address can be any two index registers.  */
6143
      /* We disallow FRAME+REG addressing since we know that FRAME
6144
         will be replaced with STACK, and SP relative addressing only
6145
         permits SP+OFFSET.  */
6146
      if (GET_MODE_SIZE (mode) <= 4
6147
          && XEXP (x, 0) != frame_pointer_rtx
6148
          && XEXP (x, 1) != frame_pointer_rtx
6149
          && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6150
          && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6151
              || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6152
        return 1;
6153
 
6154
      /* REG+const has 5-7 bit offset for non-SP registers.  */
6155
      else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6156
                || XEXP (x, 0) == arg_pointer_rtx)
6157
               && GET_CODE (XEXP (x, 1)) == CONST_INT
6158
               && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6159
        return 1;
6160
 
6161
      /* REG+const has 10-bit offset for SP, but only SImode and
6162
         larger is supported.  */
6163
      /* ??? Should probably check for DI/DFmode overflow here
6164
         just like GO_IF_LEGITIMATE_OFFSET does.  */
6165
      else if (GET_CODE (XEXP (x, 0)) == REG
6166
               && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6167
               && GET_MODE_SIZE (mode) >= 4
6168
               && GET_CODE (XEXP (x, 1)) == CONST_INT
6169
               && INTVAL (XEXP (x, 1)) >= 0
6170
               && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6171
               && (INTVAL (XEXP (x, 1)) & 3) == 0)
6172
        return 1;
6173
 
6174
      else if (GET_CODE (XEXP (x, 0)) == REG
6175
               && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6176
                   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6177
                   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6178
                       && REGNO (XEXP (x, 0))
6179
                          <= LAST_VIRTUAL_POINTER_REGISTER))
6180
               && GET_MODE_SIZE (mode) >= 4
6181
               && GET_CODE (XEXP (x, 1)) == CONST_INT
6182
               && (INTVAL (XEXP (x, 1)) & 3) == 0)
6183
        return 1;
6184
    }
6185
 
6186
  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6187
           && GET_MODE_SIZE (mode) == 4
6188
           && GET_CODE (x) == SYMBOL_REF
6189
           && CONSTANT_POOL_ADDRESS_P (x)
6190
           && ! (flag_pic
6191
                 && symbol_mentioned_p (get_pool_constant (x))
6192
                 && ! pcrel_constant_p (get_pool_constant (x))))
6193
    return 1;
6194
 
6195
  return 0;
6196
}
6197
 
6198
/* Return nonzero if VAL can be used as an offset in a Thumb-state address
6199
   instruction of mode MODE.  */
6200
int
6201
thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6202
{
6203
  switch (GET_MODE_SIZE (mode))
6204
    {
6205
    case 1:
6206
      return val >= 0 && val < 32;
6207
 
6208
    case 2:
6209
      return val >= 0 && val < 64 && (val & 1) == 0;
6210
 
6211
    default:
6212
      return (val >= 0
6213
              && (val + GET_MODE_SIZE (mode)) <= 128
6214
              && (val & 3) == 0);
6215
    }
6216
}
6217
 
6218
bool
6219
arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6220
{
6221
  if (TARGET_ARM)
6222
    return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6223
  else if (TARGET_THUMB2)
6224
    return thumb2_legitimate_address_p (mode, x, strict_p);
6225
  else /* if (TARGET_THUMB1) */
6226
    return thumb1_legitimate_address_p (mode, x, strict_p);
6227
}
6228
 
6229
/* Build the SYMBOL_REF for __tls_get_addr.  */
6230
 
6231
static GTY(()) rtx tls_get_addr_libfunc;
6232
 
6233
static rtx
6234
get_tls_get_addr (void)
6235
{
6236
  if (!tls_get_addr_libfunc)
6237
    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6238
  return tls_get_addr_libfunc;
6239
}
6240
 
6241
static rtx
6242
arm_load_tp (rtx target)
6243
{
6244
  if (!target)
6245
    target = gen_reg_rtx (SImode);
6246
 
6247
  if (TARGET_HARD_TP)
6248
    {
6249
      /* Can return in any reg.  */
6250
      emit_insn (gen_load_tp_hard (target));
6251
    }
6252
  else
6253
    {
6254
      /* Always returned in r0.  Immediately copy the result into a pseudo,
6255
         otherwise other uses of r0 (e.g. setting up function arguments) may
6256
         clobber the value.  */
6257
 
6258
      rtx tmp;
6259
 
6260
      emit_insn (gen_load_tp_soft ());
6261
 
6262
      tmp = gen_rtx_REG (SImode, 0);
6263
      emit_move_insn (target, tmp);
6264
    }
6265
  return target;
6266
}
6267
 
6268
static rtx
6269
load_tls_operand (rtx x, rtx reg)
6270
{
6271
  rtx tmp;
6272
 
6273
  if (reg == NULL_RTX)
6274
    reg = gen_reg_rtx (SImode);
6275
 
6276
  tmp = gen_rtx_CONST (SImode, x);
6277
 
6278
  emit_move_insn (reg, tmp);
6279
 
6280
  return reg;
6281
}
6282
 
6283
static rtx
6284
arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6285
{
6286
  rtx insns, label, labelno, sum;
6287
 
6288
  gcc_assert (reloc != TLS_DESCSEQ);
6289
  start_sequence ();
6290
 
6291
  labelno = GEN_INT (pic_labelno++);
6292
  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6293
  label = gen_rtx_CONST (VOIDmode, label);
6294
 
6295
  sum = gen_rtx_UNSPEC (Pmode,
6296
                        gen_rtvec (4, x, GEN_INT (reloc), label,
6297
                                   GEN_INT (TARGET_ARM ? 8 : 4)),
6298
                        UNSPEC_TLS);
6299
  reg = load_tls_operand (sum, reg);
6300
 
6301
  if (TARGET_ARM)
6302
    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6303
  else
6304
    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6305
 
6306
  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6307
                                     LCT_PURE, /* LCT_CONST?  */
6308
                                     Pmode, 1, reg, Pmode);
6309
 
6310
  insns = get_insns ();
6311
  end_sequence ();
6312
 
6313
  return insns;
6314
}
6315
 
6316
static rtx
6317
arm_tls_descseq_addr (rtx x, rtx reg)
6318
{
6319
  rtx labelno = GEN_INT (pic_labelno++);
6320
  rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6321
  rtx sum = gen_rtx_UNSPEC (Pmode,
6322
                            gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6323
                                       gen_rtx_CONST (VOIDmode, label),
6324
                                       GEN_INT (!TARGET_ARM)),
6325
                            UNSPEC_TLS);
6326
  rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6327
 
6328
  emit_insn (gen_tlscall (x, labelno));
6329
  if (!reg)
6330
    reg = gen_reg_rtx (SImode);
6331
  else
6332
    gcc_assert (REGNO (reg) != 0);
6333
 
6334
  emit_move_insn (reg, reg0);
6335
 
6336
  return reg;
6337
}
6338
 
6339
rtx
6340
legitimize_tls_address (rtx x, rtx reg)
6341
{
6342
  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6343
  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6344
 
6345
  switch (model)
6346
    {
6347
    case TLS_MODEL_GLOBAL_DYNAMIC:
6348
      if (TARGET_GNU2_TLS)
6349
        {
6350
          reg = arm_tls_descseq_addr (x, reg);
6351
 
6352
          tp = arm_load_tp (NULL_RTX);
6353
 
6354
          dest = gen_rtx_PLUS (Pmode, tp, reg);
6355
        }
6356
      else
6357
        {
6358
          /* Original scheme */
6359
          insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6360
          dest = gen_reg_rtx (Pmode);
6361
          emit_libcall_block (insns, dest, ret, x);
6362
        }
6363
      return dest;
6364
 
6365
    case TLS_MODEL_LOCAL_DYNAMIC:
6366
      if (TARGET_GNU2_TLS)
6367
        {
6368
          reg = arm_tls_descseq_addr (x, reg);
6369
 
6370
          tp = arm_load_tp (NULL_RTX);
6371
 
6372
          dest = gen_rtx_PLUS (Pmode, tp, reg);
6373
        }
6374
      else
6375
        {
6376
          insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6377
 
6378
          /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6379
             share the LDM result with other LD model accesses.  */
6380
          eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6381
                                UNSPEC_TLS);
6382
          dest = gen_reg_rtx (Pmode);
6383
          emit_libcall_block (insns, dest, ret, eqv);
6384
 
6385
          /* Load the addend.  */
6386
          addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6387
                                                     GEN_INT (TLS_LDO32)),
6388
                                   UNSPEC_TLS);
6389
          addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6390
          dest = gen_rtx_PLUS (Pmode, dest, addend);
6391
        }
6392
      return dest;
6393
 
6394
    case TLS_MODEL_INITIAL_EXEC:
6395
      labelno = GEN_INT (pic_labelno++);
6396
      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6397
      label = gen_rtx_CONST (VOIDmode, label);
6398
      sum = gen_rtx_UNSPEC (Pmode,
6399
                            gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6400
                                       GEN_INT (TARGET_ARM ? 8 : 4)),
6401
                            UNSPEC_TLS);
6402
      reg = load_tls_operand (sum, reg);
6403
 
6404
      if (TARGET_ARM)
6405
        emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6406
      else if (TARGET_THUMB2)
6407
        emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6408
      else
6409
        {
6410
          emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6411
          emit_move_insn (reg, gen_const_mem (SImode, reg));
6412
        }
6413
 
6414
      tp = arm_load_tp (NULL_RTX);
6415
 
6416
      return gen_rtx_PLUS (Pmode, tp, reg);
6417
 
6418
    case TLS_MODEL_LOCAL_EXEC:
6419
      tp = arm_load_tp (NULL_RTX);
6420
 
6421
      reg = gen_rtx_UNSPEC (Pmode,
6422
                            gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6423
                            UNSPEC_TLS);
6424
      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6425
 
6426
      return gen_rtx_PLUS (Pmode, tp, reg);
6427
 
6428
    default:
6429
      abort ();
6430
    }
6431
}
6432
 
6433
/* Try machine-dependent ways of modifying an illegitimate address
6434
   to be legitimate.  If we find one, return the new, valid address.  */
6435
rtx
6436
arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6437
{
6438
  if (!TARGET_ARM)
6439
    {
6440
      /* TODO: legitimize_address for Thumb2.  */
6441
      if (TARGET_THUMB2)
6442
        return x;
6443
      return thumb_legitimize_address (x, orig_x, mode);
6444
    }
6445
 
6446
  if (arm_tls_symbol_p (x))
6447
    return legitimize_tls_address (x, NULL_RTX);
6448
 
6449
  if (GET_CODE (x) == PLUS)
6450
    {
6451
      rtx xop0 = XEXP (x, 0);
6452
      rtx xop1 = XEXP (x, 1);
6453
 
6454
      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6455
        xop0 = force_reg (SImode, xop0);
6456
 
6457
      if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6458
        xop1 = force_reg (SImode, xop1);
6459
 
6460
      if (ARM_BASE_REGISTER_RTX_P (xop0)
6461
          && GET_CODE (xop1) == CONST_INT)
6462
        {
6463
          HOST_WIDE_INT n, low_n;
6464
          rtx base_reg, val;
6465
          n = INTVAL (xop1);
6466
 
6467
          /* VFP addressing modes actually allow greater offsets, but for
6468
             now we just stick with the lowest common denominator.  */
6469
          if (mode == DImode
6470
              || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6471
            {
6472
              low_n = n & 0x0f;
6473
              n &= ~0x0f;
6474
              if (low_n > 4)
6475
                {
6476
                  n += 16;
6477
                  low_n -= 16;
6478
                }
6479
            }
6480
          else
6481
            {
6482
              low_n = ((mode) == TImode ? 0
6483
                       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6484
              n -= low_n;
6485
            }
6486
 
6487
          base_reg = gen_reg_rtx (SImode);
6488
          val = force_operand (plus_constant (xop0, n), NULL_RTX);
6489
          emit_move_insn (base_reg, val);
6490
          x = plus_constant (base_reg, low_n);
6491
        }
6492
      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6493
        x = gen_rtx_PLUS (SImode, xop0, xop1);
6494
    }
6495
 
6496
  /* XXX We don't allow MINUS any more -- see comment in
6497
     arm_legitimate_address_outer_p ().  */
6498
  else if (GET_CODE (x) == MINUS)
6499
    {
6500
      rtx xop0 = XEXP (x, 0);
6501
      rtx xop1 = XEXP (x, 1);
6502
 
6503
      if (CONSTANT_P (xop0))
6504
        xop0 = force_reg (SImode, xop0);
6505
 
6506
      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6507
        xop1 = force_reg (SImode, xop1);
6508
 
6509
      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6510
        x = gen_rtx_MINUS (SImode, xop0, xop1);
6511
    }
6512
 
6513
  /* Make sure to take full advantage of the pre-indexed addressing mode
6514
     with absolute addresses which often allows for the base register to
6515
     be factorized for multiple adjacent memory references, and it might
6516
     even allows for the mini pool to be avoided entirely. */
6517
  else if (GET_CODE (x) == CONST_INT && optimize > 0)
6518
    {
6519
      unsigned int bits;
6520
      HOST_WIDE_INT mask, base, index;
6521
      rtx base_reg;
6522
 
6523
      /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6524
         use a 8-bit index. So let's use a 12-bit index for SImode only and
6525
         hope that arm_gen_constant will enable ldrb to use more bits. */
6526
      bits = (mode == SImode) ? 12 : 8;
6527
      mask = (1 << bits) - 1;
6528
      base = INTVAL (x) & ~mask;
6529
      index = INTVAL (x) & mask;
6530
      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6531
        {
6532
          /* It'll most probably be more efficient to generate the base
6533
             with more bits set and use a negative index instead. */
6534
          base |= mask;
6535
          index -= mask;
6536
        }
6537
      base_reg = force_reg (SImode, GEN_INT (base));
6538
      x = plus_constant (base_reg, index);
6539
    }
6540
 
6541
  if (flag_pic)
6542
    {
6543
      /* We need to find and carefully transform any SYMBOL and LABEL
6544
         references; so go back to the original address expression.  */
6545
      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6546
 
6547
      if (new_x != orig_x)
6548
        x = new_x;
6549
    }
6550
 
6551
  return x;
6552
}
6553
 
6554
 
6555
/* Try machine-dependent ways of modifying an illegitimate Thumb address
6556
   to be legitimate.  If we find one, return the new, valid address.  */
6557
rtx
6558
thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6559
{
6560
  if (arm_tls_symbol_p (x))
6561
    return legitimize_tls_address (x, NULL_RTX);
6562
 
6563
  if (GET_CODE (x) == PLUS
6564
      && GET_CODE (XEXP (x, 1)) == CONST_INT
6565
      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6566
          || INTVAL (XEXP (x, 1)) < 0))
6567
    {
6568
      rtx xop0 = XEXP (x, 0);
6569
      rtx xop1 = XEXP (x, 1);
6570
      HOST_WIDE_INT offset = INTVAL (xop1);
6571
 
6572
      /* Try and fold the offset into a biasing of the base register and
6573
         then offsetting that.  Don't do this when optimizing for space
6574
         since it can cause too many CSEs.  */
6575
      if (optimize_size && offset >= 0
6576
          && offset < 256 + 31 * GET_MODE_SIZE (mode))
6577
        {
6578
          HOST_WIDE_INT delta;
6579
 
6580
          if (offset >= 256)
6581
            delta = offset - (256 - GET_MODE_SIZE (mode));
6582
          else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6583
            delta = 31 * GET_MODE_SIZE (mode);
6584
          else
6585
            delta = offset & (~31 * GET_MODE_SIZE (mode));
6586
 
6587
          xop0 = force_operand (plus_constant (xop0, offset - delta),
6588
                                NULL_RTX);
6589
          x = plus_constant (xop0, delta);
6590
        }
6591
      else if (offset < 0 && offset > -256)
6592
        /* Small negative offsets are best done with a subtract before the
6593
           dereference, forcing these into a register normally takes two
6594
           instructions.  */
6595
        x = force_operand (x, NULL_RTX);
6596
      else
6597
        {
6598
          /* For the remaining cases, force the constant into a register.  */
6599
          xop1 = force_reg (SImode, xop1);
6600
          x = gen_rtx_PLUS (SImode, xop0, xop1);
6601
        }
6602
    }
6603
  else if (GET_CODE (x) == PLUS
6604
           && s_register_operand (XEXP (x, 1), SImode)
6605
           && !s_register_operand (XEXP (x, 0), SImode))
6606
    {
6607
      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6608
 
6609
      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6610
    }
6611
 
6612
  if (flag_pic)
6613
    {
6614
      /* We need to find and carefully transform any SYMBOL and LABEL
6615
         references; so go back to the original address expression.  */
6616
      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6617
 
6618
      if (new_x != orig_x)
6619
        x = new_x;
6620
    }
6621
 
6622
  return x;
6623
}
6624
 
6625
bool
6626
arm_legitimize_reload_address (rtx *p,
6627
                               enum machine_mode mode,
6628
                               int opnum, int type,
6629
                               int ind_levels ATTRIBUTE_UNUSED)
6630
{
6631
  /* We must recognize output that we have already generated ourselves.  */
6632
  if (GET_CODE (*p) == PLUS
6633
      && GET_CODE (XEXP (*p, 0)) == PLUS
6634
      && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6635
      && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6636
      && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6637
    {
6638
      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6639
                   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6640
                   VOIDmode, 0, 0, opnum, (enum reload_type) type);
6641
      return true;
6642
    }
6643
 
6644
  if (GET_CODE (*p) == PLUS
6645
      && GET_CODE (XEXP (*p, 0)) == REG
6646
      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6647
      /* If the base register is equivalent to a constant, let the generic
6648
         code handle it.  Otherwise we will run into problems if a future
6649
         reload pass decides to rematerialize the constant.  */
6650
      && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6651
      && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6652
    {
6653
      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6654
      HOST_WIDE_INT low, high;
6655
 
6656
      /* Detect coprocessor load/stores.  */
6657
      bool coproc_p = ((TARGET_HARD_FLOAT
6658
                        && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6659
                        && (mode == SFmode || mode == DFmode
6660
                            || (mode == DImode && TARGET_MAVERICK)))
6661
                       || (TARGET_REALLY_IWMMXT
6662
                           && VALID_IWMMXT_REG_MODE (mode))
6663
                       || (TARGET_NEON
6664
                           && (VALID_NEON_DREG_MODE (mode)
6665
                               || VALID_NEON_QREG_MODE (mode))));
6666
 
6667
      /* For some conditions, bail out when lower two bits are unaligned.  */
6668
      if ((val & 0x3) != 0
6669
          /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
6670
          && (coproc_p
6671
              /* For DI, and DF under soft-float: */
6672
              || ((mode == DImode || mode == DFmode)
6673
                  /* Without ldrd, we use stm/ldm, which does not
6674
                     fair well with unaligned bits.  */
6675
                  && (! TARGET_LDRD
6676
                      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
6677
                      || TARGET_THUMB2))))
6678
        return false;
6679
 
6680
      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6681
         of which the (reg+high) gets turned into a reload add insn,
6682
         we try to decompose the index into high/low values that can often
6683
         also lead to better reload CSE.
6684
         For example:
6685
                 ldr r0, [r2, #4100]  // Offset too large
6686
                 ldr r1, [r2, #4104]  // Offset too large
6687
 
6688
         is best reloaded as:
6689
                 add t1, r2, #4096
6690
                 ldr r0, [t1, #4]
6691
                 add t2, r2, #4096
6692
                 ldr r1, [t2, #8]
6693
 
6694
         which post-reload CSE can simplify in most cases to eliminate the
6695
         second add instruction:
6696
                 add t1, r2, #4096
6697
                 ldr r0, [t1, #4]
6698
                 ldr r1, [t1, #8]
6699
 
6700
         The idea here is that we want to split out the bits of the constant
6701
         as a mask, rather than as subtracting the maximum offset that the
6702
         respective type of load/store used can handle.
6703
 
6704
         When encountering negative offsets, we can still utilize it even if
6705
         the overall offset is positive; sometimes this may lead to an immediate
6706
         that can be constructed with fewer instructions.
6707
         For example:
6708
                 ldr r0, [r2, #0x3FFFFC]
6709
 
6710
         This is best reloaded as:
6711
                 add t1, r2, #0x400000
6712
                 ldr r0, [t1, #-4]
6713
 
6714
         The trick for spotting this for a load insn with N bits of offset
6715
         (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6716
         negative offset that is going to make bit N and all the bits below
6717
         it become zero in the remainder part.
6718
 
6719
         The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6720
         to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6721
         used in most cases of ARM load/store instructions.  */
6722
 
6723
#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
6724
      (((VAL) & ((1 << (N)) - 1))                                       \
6725
       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
6726
       : 0)
6727
 
6728
      if (coproc_p)
6729
        {
6730
          low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6731
 
6732
          /* NEON quad-word load/stores are made of two double-word accesses,
6733
             so the valid index range is reduced by 8. Treat as 9-bit range if
6734
             we go over it.  */
6735
          if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6736
            low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6737
        }
6738
      else if (GET_MODE_SIZE (mode) == 8)
6739
        {
6740
          if (TARGET_LDRD)
6741
            low = (TARGET_THUMB2
6742
                   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6743
                   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6744
          else
6745
            /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6746
               to access doublewords. The supported load/store offsets are
6747
               -8, -4, and 4, which we try to produce here.  */
6748
            low = ((val & 0xf) ^ 0x8) - 0x8;
6749
        }
6750
      else if (GET_MODE_SIZE (mode) < 8)
6751
        {
6752
          /* NEON element load/stores do not have an offset.  */
6753
          if (TARGET_NEON_FP16 && mode == HFmode)
6754
            return false;
6755
 
6756
          if (TARGET_THUMB2)
6757
            {
6758
              /* Thumb-2 has an asymmetrical index range of (-256,4096).
6759
                 Try the wider 12-bit range first, and re-try if the result
6760
                 is out of range.  */
6761
              low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6762
              if (low < -255)
6763
                low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6764
            }
6765
          else
6766
            {
6767
              if (mode == HImode || mode == HFmode)
6768
                {
6769
                  if (arm_arch4)
6770
                    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6771
                  else
6772
                    {
6773
                      /* The storehi/movhi_bytes fallbacks can use only
6774
                         [-4094,+4094] of the full ldrb/strb index range.  */
6775
                      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6776
                      if (low == 4095 || low == -4095)
6777
                        return false;
6778
                    }
6779
                }
6780
              else
6781
                low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6782
            }
6783
        }
6784
      else
6785
        return false;
6786
 
6787
      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6788
               ^ (unsigned HOST_WIDE_INT) 0x80000000)
6789
              - (unsigned HOST_WIDE_INT) 0x80000000);
6790
      /* Check for overflow or zero */
6791
      if (low == 0 || high == 0 || (high + low != val))
6792
        return false;
6793
 
6794
      /* Reload the high part into a base reg; leave the low part
6795
         in the mem.  */
6796
      *p = gen_rtx_PLUS (GET_MODE (*p),
6797
                         gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6798
                                       GEN_INT (high)),
6799
                         GEN_INT (low));
6800
      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6801
                   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6802
                   VOIDmode, 0, 0, opnum, (enum reload_type) type);
6803
      return true;
6804
    }
6805
 
6806
  return false;
6807
}
6808
 
6809
rtx
6810
thumb_legitimize_reload_address (rtx *x_p,
6811
                                 enum machine_mode mode,
6812
                                 int opnum, int type,
6813
                                 int ind_levels ATTRIBUTE_UNUSED)
6814
{
6815
  rtx x = *x_p;
6816
 
6817
  if (GET_CODE (x) == PLUS
6818
      && GET_MODE_SIZE (mode) < 4
6819
      && REG_P (XEXP (x, 0))
6820
      && XEXP (x, 0) == stack_pointer_rtx
6821
      && GET_CODE (XEXP (x, 1)) == CONST_INT
6822
      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6823
    {
6824
      rtx orig_x = x;
6825
 
6826
      x = copy_rtx (x);
6827
      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6828
                   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6829
      return x;
6830
    }
6831
 
6832
  /* If both registers are hi-regs, then it's better to reload the
6833
     entire expression rather than each register individually.  That
6834
     only requires one reload register rather than two.  */
6835
  if (GET_CODE (x) == PLUS
6836
      && REG_P (XEXP (x, 0))
6837
      && REG_P (XEXP (x, 1))
6838
      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6839
      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6840
    {
6841
      rtx orig_x = x;
6842
 
6843
      x = copy_rtx (x);
6844
      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6845
                   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6846
      return x;
6847
    }
6848
 
6849
  return NULL;
6850
}
6851
 
6852
/* Test for various thread-local symbols.  */
6853
 
6854
/* Return TRUE if X is a thread-local symbol.  */
6855
 
6856
static bool
6857
arm_tls_symbol_p (rtx x)
6858
{
6859
  if (! TARGET_HAVE_TLS)
6860
    return false;
6861
 
6862
  if (GET_CODE (x) != SYMBOL_REF)
6863
    return false;
6864
 
6865
  return SYMBOL_REF_TLS_MODEL (x) != 0;
6866
}
6867
 
6868
/* Helper for arm_tls_referenced_p.  */
6869
 
6870
static int
6871
arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6872
{
6873
  if (GET_CODE (*x) == SYMBOL_REF)
6874
    return SYMBOL_REF_TLS_MODEL (*x) != 0;
6875
 
6876
  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6877
     TLS offsets, not real symbol references.  */
6878
  if (GET_CODE (*x) == UNSPEC
6879
      && XINT (*x, 1) == UNSPEC_TLS)
6880
    return -1;
6881
 
6882
  return 0;
6883
}
6884
 
6885
/* Return TRUE if X contains any TLS symbol references.  */
6886
 
6887
bool
6888
arm_tls_referenced_p (rtx x)
6889
{
6890
  if (! TARGET_HAVE_TLS)
6891
    return false;
6892
 
6893
  return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6894
}
6895
 
6896
/* Implement TARGET_LEGITIMATE_CONSTANT_P.
6897
 
6898
   On the ARM, allow any integer (invalid ones are removed later by insn
6899
   patterns), nice doubles and symbol_refs which refer to the function's
6900
   constant pool XXX.
6901
 
6902
   When generating pic allow anything.  */
6903
 
6904
static bool
6905
arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6906
{
6907
  /* At present, we have no support for Neon structure constants, so forbid
6908
     them here.  It might be possible to handle simple cases like 0 and -1
6909
     in future.  */
6910
  if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6911
    return false;
6912
 
6913
  return flag_pic || !label_mentioned_p (x);
6914
}
6915
 
6916
static bool
6917
thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6918
{
6919
  return (GET_CODE (x) == CONST_INT
6920
          || GET_CODE (x) == CONST_DOUBLE
6921
          || CONSTANT_ADDRESS_P (x)
6922
          || flag_pic);
6923
}
6924
 
6925
static bool
6926
arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6927
{
6928
  return (!arm_cannot_force_const_mem (mode, x)
6929
          && (TARGET_32BIT
6930
              ? arm_legitimate_constant_p_1 (mode, x)
6931
              : thumb_legitimate_constant_p (mode, x)));
6932
}
6933
 
6934
/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
6935
 
6936
static bool
6937
arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6938
{
6939
  rtx base, offset;
6940
 
6941
  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6942
    {
6943
      split_const (x, &base, &offset);
6944
      if (GET_CODE (base) == SYMBOL_REF
6945
          && !offset_within_block_p (base, INTVAL (offset)))
6946
        return true;
6947
    }
6948
  return arm_tls_referenced_p (x);
6949
}
6950
 
6951
#define REG_OR_SUBREG_REG(X)                                            \
6952
  (GET_CODE (X) == REG                                                  \
6953
   || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6954
 
6955
#define REG_OR_SUBREG_RTX(X)                    \
6956
   (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6957
 
6958
static inline int
6959
thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6960
{
6961
  enum machine_mode mode = GET_MODE (x);
6962
  int total;
6963
 
6964
  switch (code)
6965
    {
6966
    case ASHIFT:
6967
    case ASHIFTRT:
6968
    case LSHIFTRT:
6969
    case ROTATERT:
6970
    case PLUS:
6971
    case MINUS:
6972
    case COMPARE:
6973
    case NEG:
6974
    case NOT:
6975
      return COSTS_N_INSNS (1);
6976
 
6977
    case MULT:
6978
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6979
        {
6980
          int cycles = 0;
6981
          unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6982
 
6983
          while (i)
6984
            {
6985
              i >>= 2;
6986
              cycles++;
6987
            }
6988
          return COSTS_N_INSNS (2) + cycles;
6989
        }
6990
      return COSTS_N_INSNS (1) + 16;
6991
 
6992
    case SET:
6993
      return (COSTS_N_INSNS (1)
6994
              + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6995
                     + GET_CODE (SET_DEST (x)) == MEM));
6996
 
6997
    case CONST_INT:
6998
      if (outer == SET)
6999
        {
7000
          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7001
            return 0;
7002
          if (thumb_shiftable_const (INTVAL (x)))
7003
            return COSTS_N_INSNS (2);
7004
          return COSTS_N_INSNS (3);
7005
        }
7006
      else if ((outer == PLUS || outer == COMPARE)
7007
               && INTVAL (x) < 256 && INTVAL (x) > -256)
7008
        return 0;
7009
      else if ((outer == IOR || outer == XOR || outer == AND)
7010
               && INTVAL (x) < 256 && INTVAL (x) >= -256)
7011
        return COSTS_N_INSNS (1);
7012
      else if (outer == AND)
7013
        {
7014
          int i;
7015
          /* This duplicates the tests in the andsi3 expander.  */
7016
          for (i = 9; i <= 31; i++)
7017
            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7018
                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7019
              return COSTS_N_INSNS (2);
7020
        }
7021
      else if (outer == ASHIFT || outer == ASHIFTRT
7022
               || outer == LSHIFTRT)
7023
        return 0;
7024
      return COSTS_N_INSNS (2);
7025
 
7026
    case CONST:
7027
    case CONST_DOUBLE:
7028
    case LABEL_REF:
7029
    case SYMBOL_REF:
7030
      return COSTS_N_INSNS (3);
7031
 
7032
    case UDIV:
7033
    case UMOD:
7034
    case DIV:
7035
    case MOD:
7036
      return 100;
7037
 
7038
    case TRUNCATE:
7039
      return 99;
7040
 
7041
    case AND:
7042
    case XOR:
7043
    case IOR:
7044
      /* XXX guess.  */
7045
      return 8;
7046
 
7047
    case MEM:
7048
      /* XXX another guess.  */
7049
      /* Memory costs quite a lot for the first word, but subsequent words
7050
         load at the equivalent of a single insn each.  */
7051
      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7052
              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7053
                 ? 4 : 0));
7054
 
7055
    case IF_THEN_ELSE:
7056
      /* XXX a guess.  */
7057
      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7058
        return 14;
7059
      return 2;
7060
 
7061
    case SIGN_EXTEND:
7062
    case ZERO_EXTEND:
7063
      total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7064
      total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7065
 
7066
      if (mode == SImode)
7067
        return total;
7068
 
7069
      if (arm_arch6)
7070
        return total + COSTS_N_INSNS (1);
7071
 
7072
      /* Assume a two-shift sequence.  Increase the cost slightly so
7073
         we prefer actual shifts over an extend operation.  */
7074
      return total + 1 + COSTS_N_INSNS (2);
7075
 
7076
    default:
7077
      return 99;
7078
    }
7079
}
7080
 
7081
static inline bool
7082
arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7083
{
7084
  enum machine_mode mode = GET_MODE (x);
7085
  enum rtx_code subcode;
7086
  rtx operand;
7087
  enum rtx_code code = GET_CODE (x);
7088
  *total = 0;
7089
 
7090
  switch (code)
7091
    {
7092
    case MEM:
7093
      /* Memory costs quite a lot for the first word, but subsequent words
7094
         load at the equivalent of a single insn each.  */
7095
      *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7096
      return true;
7097
 
7098
    case DIV:
7099
    case MOD:
7100
    case UDIV:
7101
    case UMOD:
7102
      if (TARGET_HARD_FLOAT && mode == SFmode)
7103
        *total = COSTS_N_INSNS (2);
7104
      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7105
        *total = COSTS_N_INSNS (4);
7106
      else
7107
        *total = COSTS_N_INSNS (20);
7108
      return false;
7109
 
7110
    case ROTATE:
7111
      if (GET_CODE (XEXP (x, 1)) == REG)
7112
        *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7113
      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7114
        *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7115
 
7116
      /* Fall through */
7117
    case ROTATERT:
7118
      if (mode != SImode)
7119
        {
7120
          *total += COSTS_N_INSNS (4);
7121
          return true;
7122
        }
7123
 
7124
      /* Fall through */
7125
    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7126
      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7127
      if (mode == DImode)
7128
        {
7129
          *total += COSTS_N_INSNS (3);
7130
          return true;
7131
        }
7132
 
7133
      *total += COSTS_N_INSNS (1);
7134
      /* Increase the cost of complex shifts because they aren't any faster,
7135
         and reduce dual issue opportunities.  */
7136
      if (arm_tune_cortex_a9
7137
          && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7138
        ++*total;
7139
 
7140
      return true;
7141
 
7142
    case MINUS:
7143
      if (mode == DImode)
7144
        {
7145
          *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7146
          if (GET_CODE (XEXP (x, 0)) == CONST_INT
7147
              && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7148
            {
7149
              *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7150
              return true;
7151
            }
7152
 
7153
          if (GET_CODE (XEXP (x, 1)) == CONST_INT
7154
              && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7155
            {
7156
              *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7157
              return true;
7158
            }
7159
 
7160
          return false;
7161
        }
7162
 
7163
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7164
        {
7165
          if (TARGET_HARD_FLOAT
7166
              && (mode == SFmode
7167
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7168
            {
7169
              *total = COSTS_N_INSNS (1);
7170
              if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7171
                  && arm_const_double_rtx (XEXP (x, 0)))
7172
                {
7173
                  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7174
                  return true;
7175
                }
7176
 
7177
              if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7178
                  && arm_const_double_rtx (XEXP (x, 1)))
7179
                {
7180
                  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7181
                  return true;
7182
                }
7183
 
7184
              return false;
7185
            }
7186
          *total = COSTS_N_INSNS (20);
7187
          return false;
7188
        }
7189
 
7190
      *total = COSTS_N_INSNS (1);
7191
      if (GET_CODE (XEXP (x, 0)) == CONST_INT
7192
          && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7193
        {
7194
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7195
          return true;
7196
        }
7197
 
7198
      subcode = GET_CODE (XEXP (x, 1));
7199
      if (subcode == ASHIFT || subcode == ASHIFTRT
7200
          || subcode == LSHIFTRT
7201
          || subcode == ROTATE || subcode == ROTATERT)
7202
        {
7203
          *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7204
          *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7205
          return true;
7206
        }
7207
 
7208
      /* A shift as a part of RSB costs no more than RSB itself.  */
7209
      if (GET_CODE (XEXP (x, 0)) == MULT
7210
          && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7211
        {
7212
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7213
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7214
          return true;
7215
        }
7216
 
7217
      if (subcode == MULT
7218
          && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7219
        {
7220
          *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7221
          *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7222
          return true;
7223
        }
7224
 
7225
      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7226
          || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7227
        {
7228
          *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7229
          if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7230
              && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7231
            *total += COSTS_N_INSNS (1);
7232
 
7233
          return true;
7234
        }
7235
 
7236
      /* Fall through */
7237
 
7238
    case PLUS:
7239
      if (code == PLUS && arm_arch6 && mode == SImode
7240
          && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7241
              || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7242
        {
7243
          *total = COSTS_N_INSNS (1);
7244
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7245
                              0, speed);
7246
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7247
          return true;
7248
        }
7249
 
7250
      /* MLA: All arguments must be registers.  We filter out
7251
         multiplication by a power of two, so that we fall down into
7252
         the code below.  */
7253
      if (GET_CODE (XEXP (x, 0)) == MULT
7254
          && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7255
        {
7256
          /* The cost comes from the cost of the multiply.  */
7257
          return false;
7258
        }
7259
 
7260
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7261
        {
7262
          if (TARGET_HARD_FLOAT
7263
              && (mode == SFmode
7264
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7265
            {
7266
              *total = COSTS_N_INSNS (1);
7267
              if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7268
                  && arm_const_double_rtx (XEXP (x, 1)))
7269
                {
7270
                  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7271
                  return true;
7272
                }
7273
 
7274
              return false;
7275
            }
7276
 
7277
          *total = COSTS_N_INSNS (20);
7278
          return false;
7279
        }
7280
 
7281
      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7282
          || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7283
        {
7284
          *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7285
          if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7286
              && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7287
            *total += COSTS_N_INSNS (1);
7288
          return true;
7289
        }
7290
 
7291
      /* Fall through */
7292
 
7293
    case AND: case XOR: case IOR:
7294
 
7295
      /* Normally the frame registers will be spilt into reg+const during
7296
         reload, so it is a bad idea to combine them with other instructions,
7297
         since then they might not be moved outside of loops.  As a compromise
7298
         we allow integration with ops that have a constant as their second
7299
         operand.  */
7300
      if (REG_OR_SUBREG_REG (XEXP (x, 0))
7301
          && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7302
          && GET_CODE (XEXP (x, 1)) != CONST_INT)
7303
        *total = COSTS_N_INSNS (1);
7304
 
7305
      if (mode == DImode)
7306
        {
7307
          *total += COSTS_N_INSNS (2);
7308
          if (GET_CODE (XEXP (x, 1)) == CONST_INT
7309
              && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7310
            {
7311
              *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7312
              return true;
7313
            }
7314
 
7315
          return false;
7316
        }
7317
 
7318
      *total += COSTS_N_INSNS (1);
7319
      if (GET_CODE (XEXP (x, 1)) == CONST_INT
7320
          && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7321
        {
7322
          *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7323
          return true;
7324
        }
7325
      subcode = GET_CODE (XEXP (x, 0));
7326
      if (subcode == ASHIFT || subcode == ASHIFTRT
7327
          || subcode == LSHIFTRT
7328
          || subcode == ROTATE || subcode == ROTATERT)
7329
        {
7330
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7331
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7332
          return true;
7333
        }
7334
 
7335
      if (subcode == MULT
7336
          && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7337
        {
7338
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7339
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7340
          return true;
7341
        }
7342
 
7343
      if (subcode == UMIN || subcode == UMAX
7344
          || subcode == SMIN || subcode == SMAX)
7345
        {
7346
          *total = COSTS_N_INSNS (3);
7347
          return true;
7348
        }
7349
 
7350
      return false;
7351
 
7352
    case MULT:
7353
      /* This should have been handled by the CPU specific routines.  */
7354
      gcc_unreachable ();
7355
 
7356
    case TRUNCATE:
7357
      if (arm_arch3m && mode == SImode
7358
          && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7359
          && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7360
          && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7361
              == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7362
          && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7363
              || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7364
        {
7365
          *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7366
          return true;
7367
        }
7368
      *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7369
      return false;
7370
 
7371
    case NEG:
7372
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7373
        {
7374
          if (TARGET_HARD_FLOAT
7375
              && (mode == SFmode
7376
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7377
            {
7378
              *total = COSTS_N_INSNS (1);
7379
              return false;
7380
            }
7381
          *total = COSTS_N_INSNS (2);
7382
          return false;
7383
        }
7384
 
7385
      /* Fall through */
7386
    case NOT:
7387
      *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7388
      if (mode == SImode && code == NOT)
7389
        {
7390
          subcode = GET_CODE (XEXP (x, 0));
7391
          if (subcode == ASHIFT || subcode == ASHIFTRT
7392
              || subcode == LSHIFTRT
7393
              || subcode == ROTATE || subcode == ROTATERT
7394
              || (subcode == MULT
7395
                  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7396
            {
7397
              *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7398
              /* Register shifts cost an extra cycle.  */
7399
              if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7400
                *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7401
                                                        subcode, 1, speed);
7402
              return true;
7403
            }
7404
        }
7405
 
7406
      return false;
7407
 
7408
    case IF_THEN_ELSE:
7409
      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7410
        {
7411
          *total = COSTS_N_INSNS (4);
7412
          return true;
7413
        }
7414
 
7415
      operand = XEXP (x, 0);
7416
 
7417
      if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7418
             || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7419
            && GET_CODE (XEXP (operand, 0)) == REG
7420
            && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7421
        *total += COSTS_N_INSNS (1);
7422
      *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7423
                 + rtx_cost (XEXP (x, 2), code, 2, speed));
7424
      return true;
7425
 
7426
    case NE:
7427
      if (mode == SImode && XEXP (x, 1) == const0_rtx)
7428
        {
7429
          *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7430
          return true;
7431
        }
7432
      goto scc_insn;
7433
 
7434
    case GE:
7435
      if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7436
          && mode == SImode && XEXP (x, 1) == const0_rtx)
7437
        {
7438
          *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7439
          return true;
7440
        }
7441
      goto scc_insn;
7442
 
7443
    case LT:
7444
      if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7445
          && mode == SImode && XEXP (x, 1) == const0_rtx)
7446
        {
7447
          *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7448
          return true;
7449
        }
7450
      goto scc_insn;
7451
 
7452
    case EQ:
7453
    case GT:
7454
    case LE:
7455
    case GEU:
7456
    case LTU:
7457
    case GTU:
7458
    case LEU:
7459
    case UNORDERED:
7460
    case ORDERED:
7461
    case UNEQ:
7462
    case UNGE:
7463
    case UNLT:
7464
    case UNGT:
7465
    case UNLE:
7466
    scc_insn:
7467
      /* SCC insns.  In the case where the comparison has already been
7468
         performed, then they cost 2 instructions.  Otherwise they need
7469
         an additional comparison before them.  */
7470
      *total = COSTS_N_INSNS (2);
7471
      if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7472
        {
7473
          return true;
7474
        }
7475
 
7476
      /* Fall through */
7477
    case COMPARE:
7478
      if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7479
        {
7480
          *total = 0;
7481
          return true;
7482
        }
7483
 
7484
      *total += COSTS_N_INSNS (1);
7485
      if (GET_CODE (XEXP (x, 1)) == CONST_INT
7486
          && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7487
        {
7488
          *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7489
          return true;
7490
        }
7491
 
7492
      subcode = GET_CODE (XEXP (x, 0));
7493
      if (subcode == ASHIFT || subcode == ASHIFTRT
7494
          || subcode == LSHIFTRT
7495
          || subcode == ROTATE || subcode == ROTATERT)
7496
        {
7497
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7498
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7499
          return true;
7500
        }
7501
 
7502
      if (subcode == MULT
7503
          && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7504
        {
7505
          *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7506
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7507
          return true;
7508
        }
7509
 
7510
      return false;
7511
 
7512
    case UMIN:
7513
    case UMAX:
7514
    case SMIN:
7515
    case SMAX:
7516
      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7517
      if (GET_CODE (XEXP (x, 1)) != CONST_INT
7518
          || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7519
        *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7520
      return true;
7521
 
7522
    case ABS:
7523
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7524
        {
7525
          if (TARGET_HARD_FLOAT
7526
              && (mode == SFmode
7527
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7528
            {
7529
              *total = COSTS_N_INSNS (1);
7530
              return false;
7531
            }
7532
          *total = COSTS_N_INSNS (20);
7533
          return false;
7534
        }
7535
      *total = COSTS_N_INSNS (1);
7536
      if (mode == DImode)
7537
        *total += COSTS_N_INSNS (3);
7538
      return false;
7539
 
7540
    case SIGN_EXTEND:
7541
    case ZERO_EXTEND:
7542
      *total = 0;
7543
      if (GET_MODE_CLASS (mode) == MODE_INT)
7544
        {
7545
          rtx op = XEXP (x, 0);
7546
          enum machine_mode opmode = GET_MODE (op);
7547
 
7548
          if (mode == DImode)
7549
            *total += COSTS_N_INSNS (1);
7550
 
7551
          if (opmode != SImode)
7552
            {
7553
              if (MEM_P (op))
7554
                {
7555
                  /* If !arm_arch4, we use one of the extendhisi2_mem
7556
                     or movhi_bytes patterns for HImode.  For a QImode
7557
                     sign extension, we first zero-extend from memory
7558
                     and then perform a shift sequence.  */
7559
                  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7560
                    *total += COSTS_N_INSNS (2);
7561
                }
7562
              else if (arm_arch6)
7563
                *total += COSTS_N_INSNS (1);
7564
 
7565
              /* We don't have the necessary insn, so we need to perform some
7566
                 other operation.  */
7567
              else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7568
                /* An and with constant 255.  */
7569
                *total += COSTS_N_INSNS (1);
7570
              else
7571
                /* A shift sequence.  Increase costs slightly to avoid
7572
                   combining two shifts into an extend operation.  */
7573
                *total += COSTS_N_INSNS (2) + 1;
7574
            }
7575
 
7576
          return false;
7577
        }
7578
 
7579
      switch (GET_MODE (XEXP (x, 0)))
7580
        {
7581
        case V8QImode:
7582
        case V4HImode:
7583
        case V2SImode:
7584
        case V4QImode:
7585
        case V2HImode:
7586
          *total = COSTS_N_INSNS (1);
7587
          return false;
7588
 
7589
        default:
7590
          gcc_unreachable ();
7591
        }
7592
      gcc_unreachable ();
7593
 
7594
    case ZERO_EXTRACT:
7595
    case SIGN_EXTRACT:
7596
      *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7597
      return true;
7598
 
7599
    case CONST_INT:
7600
      if (const_ok_for_arm (INTVAL (x))
7601
          || const_ok_for_arm (~INTVAL (x)))
7602
        *total = COSTS_N_INSNS (1);
7603
      else
7604
        *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7605
                                                  INTVAL (x), NULL_RTX,
7606
                                                  NULL_RTX, 0, 0));
7607
      return true;
7608
 
7609
    case CONST:
7610
    case LABEL_REF:
7611
    case SYMBOL_REF:
7612
      *total = COSTS_N_INSNS (3);
7613
      return true;
7614
 
7615
    case HIGH:
7616
      *total = COSTS_N_INSNS (1);
7617
      return true;
7618
 
7619
    case LO_SUM:
7620
      *total = COSTS_N_INSNS (1);
7621
      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7622
      return true;
7623
 
7624
    case CONST_DOUBLE:
7625
      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7626
          && (mode == SFmode || !TARGET_VFP_SINGLE))
7627
        *total = COSTS_N_INSNS (1);
7628
      else
7629
        *total = COSTS_N_INSNS (4);
7630
      return true;
7631
 
7632
    case SET:
7633
      return false;
7634
 
7635
    case UNSPEC:
7636
      /* We cost this as high as our memory costs to allow this to
7637
         be hoisted from loops.  */
7638
      if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7639
        {
7640
          *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7641
        }
7642
      return true;
7643
 
7644
    default:
7645
      *total = COSTS_N_INSNS (4);
7646
      return false;
7647
    }
7648
}
7649
 
7650
/* Estimates the size cost of thumb1 instructions.
7651
   For now most of the code is copied from thumb1_rtx_costs. We need more
7652
   fine grain tuning when we have more related test cases.  */
7653
static inline int
7654
thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7655
{
7656
  enum machine_mode mode = GET_MODE (x);
7657
 
7658
  switch (code)
7659
    {
7660
    case ASHIFT:
7661
    case ASHIFTRT:
7662
    case LSHIFTRT:
7663
    case ROTATERT:
7664
    case PLUS:
7665
    case MINUS:
7666
    case COMPARE:
7667
    case NEG:
7668
    case NOT:
7669
      return COSTS_N_INSNS (1);
7670
 
7671
    case MULT:
7672
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7673
        {
7674
          /* Thumb1 mul instruction can't operate on const. We must Load it
7675
             into a register first.  */
7676
          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7677
          return COSTS_N_INSNS (1) + const_size;
7678
        }
7679
      return COSTS_N_INSNS (1);
7680
 
7681
    case SET:
7682
      return (COSTS_N_INSNS (1)
7683
              + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7684
                     + GET_CODE (SET_DEST (x)) == MEM));
7685
 
7686
    case CONST_INT:
7687
      if (outer == SET)
7688
        {
7689
          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7690
            return COSTS_N_INSNS (1);
7691
          /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
7692
          if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7693
            return COSTS_N_INSNS (2);
7694
          /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
7695
          if (thumb_shiftable_const (INTVAL (x)))
7696
            return COSTS_N_INSNS (2);
7697
          return COSTS_N_INSNS (3);
7698
        }
7699
      else if ((outer == PLUS || outer == COMPARE)
7700
               && INTVAL (x) < 256 && INTVAL (x) > -256)
7701
        return 0;
7702
      else if ((outer == IOR || outer == XOR || outer == AND)
7703
               && INTVAL (x) < 256 && INTVAL (x) >= -256)
7704
        return COSTS_N_INSNS (1);
7705
      else if (outer == AND)
7706
        {
7707
          int i;
7708
          /* This duplicates the tests in the andsi3 expander.  */
7709
          for (i = 9; i <= 31; i++)
7710
            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7711
                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7712
              return COSTS_N_INSNS (2);
7713
        }
7714
      else if (outer == ASHIFT || outer == ASHIFTRT
7715
               || outer == LSHIFTRT)
7716
        return 0;
7717
      return COSTS_N_INSNS (2);
7718
 
7719
    case CONST:
7720
    case CONST_DOUBLE:
7721
    case LABEL_REF:
7722
    case SYMBOL_REF:
7723
      return COSTS_N_INSNS (3);
7724
 
7725
    case UDIV:
7726
    case UMOD:
7727
    case DIV:
7728
    case MOD:
7729
      return 100;
7730
 
7731
    case TRUNCATE:
7732
      return 99;
7733
 
7734
    case AND:
7735
    case XOR:
7736
    case IOR:
7737
      /* XXX guess.  */
7738
      return 8;
7739
 
7740
    case MEM:
7741
      /* XXX another guess.  */
7742
      /* Memory costs quite a lot for the first word, but subsequent words
7743
         load at the equivalent of a single insn each.  */
7744
      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7745
              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7746
                 ? 4 : 0));
7747
 
7748
    case IF_THEN_ELSE:
7749
      /* XXX a guess.  */
7750
      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7751
        return 14;
7752
      return 2;
7753
 
7754
    case ZERO_EXTEND:
7755
      /* XXX still guessing.  */
7756
      switch (GET_MODE (XEXP (x, 0)))
7757
        {
7758
          case QImode:
7759
            return (1 + (mode == DImode ? 4 : 0)
7760
                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7761
 
7762
          case HImode:
7763
            return (4 + (mode == DImode ? 4 : 0)
7764
                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7765
 
7766
          case SImode:
7767
            return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7768
 
7769
          default:
7770
            return 99;
7771
        }
7772
 
7773
    default:
7774
      return 99;
7775
    }
7776
}
7777
 
7778
/* RTX costs when optimizing for size.  */
7779
static bool
7780
arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7781
                    int *total)
7782
{
7783
  enum machine_mode mode = GET_MODE (x);
7784
  if (TARGET_THUMB1)
7785
    {
7786
      *total = thumb1_size_rtx_costs (x, code, outer_code);
7787
      return true;
7788
    }
7789
 
7790
  /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
7791
  switch (code)
7792
    {
7793
    case MEM:
7794
      /* A memory access costs 1 insn if the mode is small, or the address is
7795
         a single register, otherwise it costs one insn per word.  */
7796
      if (REG_P (XEXP (x, 0)))
7797
        *total = COSTS_N_INSNS (1);
7798
      else if (flag_pic
7799
               && GET_CODE (XEXP (x, 0)) == PLUS
7800
               && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7801
        /* This will be split into two instructions.
7802
           See arm.md:calculate_pic_address.  */
7803
        *total = COSTS_N_INSNS (2);
7804
      else
7805
        *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7806
      return true;
7807
 
7808
    case DIV:
7809
    case MOD:
7810
    case UDIV:
7811
    case UMOD:
7812
      /* Needs a libcall, so it costs about this.  */
7813
      *total = COSTS_N_INSNS (2);
7814
      return false;
7815
 
7816
    case ROTATE:
7817
      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7818
        {
7819
          *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7820
          return true;
7821
        }
7822
      /* Fall through */
7823
    case ROTATERT:
7824
    case ASHIFT:
7825
    case LSHIFTRT:
7826
    case ASHIFTRT:
7827
      if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7828
        {
7829
          *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7830
          return true;
7831
        }
7832
      else if (mode == SImode)
7833
        {
7834
          *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7835
          /* Slightly disparage register shifts, but not by much.  */
7836
          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7837
            *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7838
          return true;
7839
        }
7840
 
7841
      /* Needs a libcall.  */
7842
      *total = COSTS_N_INSNS (2);
7843
      return false;
7844
 
7845
    case MINUS:
7846
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7847
          && (mode == SFmode || !TARGET_VFP_SINGLE))
7848
        {
7849
          *total = COSTS_N_INSNS (1);
7850
          return false;
7851
        }
7852
 
7853
      if (mode == SImode)
7854
        {
7855
          enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7856
          enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7857
 
7858
          if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7859
              || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7860
              || subcode1 == ROTATE || subcode1 == ROTATERT
7861
              || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7862
              || subcode1 == ASHIFTRT)
7863
            {
7864
              /* It's just the cost of the two operands.  */
7865
              *total = 0;
7866
              return false;
7867
            }
7868
 
7869
          *total = COSTS_N_INSNS (1);
7870
          return false;
7871
        }
7872
 
7873
      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7874
      return false;
7875
 
7876
    case PLUS:
7877
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7878
          && (mode == SFmode || !TARGET_VFP_SINGLE))
7879
        {
7880
          *total = COSTS_N_INSNS (1);
7881
          return false;
7882
        }
7883
 
7884
      /* A shift as a part of ADD costs nothing.  */
7885
      if (GET_CODE (XEXP (x, 0)) == MULT
7886
          && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7887
        {
7888
          *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7889
          *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7890
          *total += rtx_cost (XEXP (x, 1), code, 1, false);
7891
          return true;
7892
        }
7893
 
7894
      /* Fall through */
7895
    case AND: case XOR: case IOR:
7896
      if (mode == SImode)
7897
        {
7898
          enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7899
 
7900
          if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7901
              || subcode == LSHIFTRT || subcode == ASHIFTRT
7902
              || (code == AND && subcode == NOT))
7903
            {
7904
              /* It's just the cost of the two operands.  */
7905
              *total = 0;
7906
              return false;
7907
            }
7908
        }
7909
 
7910
      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7911
      return false;
7912
 
7913
    case MULT:
7914
      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7915
      return false;
7916
 
7917
    case NEG:
7918
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7919
          && (mode == SFmode || !TARGET_VFP_SINGLE))
7920
        {
7921
          *total = COSTS_N_INSNS (1);
7922
          return false;
7923
        }
7924
 
7925
      /* Fall through */
7926
    case NOT:
7927
      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7928
 
7929
      return false;
7930
 
7931
    case IF_THEN_ELSE:
7932
      *total = 0;
7933
      return false;
7934
 
7935
    case COMPARE:
7936
      if (cc_register (XEXP (x, 0), VOIDmode))
7937
        * total = 0;
7938
      else
7939
        *total = COSTS_N_INSNS (1);
7940
      return false;
7941
 
7942
    case ABS:
7943
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7944
          && (mode == SFmode || !TARGET_VFP_SINGLE))
7945
        *total = COSTS_N_INSNS (1);
7946
      else
7947
        *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7948
      return false;
7949
 
7950
    case SIGN_EXTEND:
7951
    case ZERO_EXTEND:
7952
      return arm_rtx_costs_1 (x, outer_code, total, 0);
7953
 
7954
    case CONST_INT:
7955
      if (const_ok_for_arm (INTVAL (x)))
7956
        /* A multiplication by a constant requires another instruction
7957
           to load the constant to a register.  */
7958
        *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7959
                                ? 1 : 0);
7960
      else if (const_ok_for_arm (~INTVAL (x)))
7961
        *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7962
      else if (const_ok_for_arm (-INTVAL (x)))
7963
        {
7964
          if (outer_code == COMPARE || outer_code == PLUS
7965
              || outer_code == MINUS)
7966
            *total = 0;
7967
          else
7968
            *total = COSTS_N_INSNS (1);
7969
        }
7970
      else
7971
        *total = COSTS_N_INSNS (2);
7972
      return true;
7973
 
7974
    case CONST:
7975
    case LABEL_REF:
7976
    case SYMBOL_REF:
7977
      *total = COSTS_N_INSNS (2);
7978
      return true;
7979
 
7980
    case CONST_DOUBLE:
7981
      *total = COSTS_N_INSNS (4);
7982
      return true;
7983
 
7984
    case HIGH:
7985
    case LO_SUM:
7986
      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7987
         cost of these slightly.  */
7988
      *total = COSTS_N_INSNS (1) + 1;
7989
      return true;
7990
 
7991
    case SET:
7992
      return false;
7993
 
7994
    default:
7995
      if (mode != VOIDmode)
7996
        *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7997
      else
7998
        *total = COSTS_N_INSNS (4); /* How knows?  */
7999
      return false;
8000
    }
8001
}
8002
 
8003
/* RTX costs when optimizing for size.  */
8004
static bool
8005
arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8006
               int *total, bool speed)
8007
{
8008
  if (!speed)
8009
    return arm_size_rtx_costs (x, (enum rtx_code) code,
8010
                               (enum rtx_code) outer_code, total);
8011
  else
8012
    return current_tune->rtx_costs (x, (enum rtx_code) code,
8013
                                    (enum rtx_code) outer_code,
8014
                                    total, speed);
8015
}
8016
 
8017
/* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
8018
   supported on any "slowmul" cores, so it can be ignored.  */
8019
 
8020
static bool
8021
arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8022
                       int *total, bool speed)
8023
{
8024
  enum machine_mode mode = GET_MODE (x);
8025
 
8026
  if (TARGET_THUMB)
8027
    {
8028
      *total = thumb1_rtx_costs (x, code, outer_code);
8029
      return true;
8030
    }
8031
 
8032
  switch (code)
8033
    {
8034
    case MULT:
8035
      if (GET_MODE_CLASS (mode) == MODE_FLOAT
8036
          || mode == DImode)
8037
        {
8038
          *total = COSTS_N_INSNS (20);
8039
          return false;
8040
        }
8041
 
8042
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8043
        {
8044
          unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8045
                                      & (unsigned HOST_WIDE_INT) 0xffffffff);
8046
          int cost, const_ok = const_ok_for_arm (i);
8047
          int j, booth_unit_size;
8048
 
8049
          /* Tune as appropriate.  */
8050
          cost = const_ok ? 4 : 8;
8051
          booth_unit_size = 2;
8052
          for (j = 0; i && j < 32; j += booth_unit_size)
8053
            {
8054
              i >>= booth_unit_size;
8055
              cost++;
8056
            }
8057
 
8058
          *total = COSTS_N_INSNS (cost);
8059
          *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8060
          return true;
8061
        }
8062
 
8063
      *total = COSTS_N_INSNS (20);
8064
      return false;
8065
 
8066
    default:
8067
      return arm_rtx_costs_1 (x, outer_code, total, speed);;
8068
    }
8069
}
8070
 
8071
 
8072
/* RTX cost for cores with a fast multiply unit (M variants).  */
8073
 
8074
static bool
8075
arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8076
                       int *total, bool speed)
8077
{
8078
  enum machine_mode mode = GET_MODE (x);
8079
 
8080
  if (TARGET_THUMB1)
8081
    {
8082
      *total = thumb1_rtx_costs (x, code, outer_code);
8083
      return true;
8084
    }
8085
 
8086
  /* ??? should thumb2 use different costs?  */
8087
  switch (code)
8088
    {
8089
    case MULT:
8090
      /* There is no point basing this on the tuning, since it is always the
8091
         fast variant if it exists at all.  */
8092
      if (mode == DImode
8093
          && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8094
          && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8095
              || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8096
        {
8097
          *total = COSTS_N_INSNS(2);
8098
          return false;
8099
        }
8100
 
8101
 
8102
      if (mode == DImode)
8103
        {
8104
          *total = COSTS_N_INSNS (5);
8105
          return false;
8106
        }
8107
 
8108
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8109
        {
8110
          unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8111
                                      & (unsigned HOST_WIDE_INT) 0xffffffff);
8112
          int cost, const_ok = const_ok_for_arm (i);
8113
          int j, booth_unit_size;
8114
 
8115
          /* Tune as appropriate.  */
8116
          cost = const_ok ? 4 : 8;
8117
          booth_unit_size = 8;
8118
          for (j = 0; i && j < 32; j += booth_unit_size)
8119
            {
8120
              i >>= booth_unit_size;
8121
              cost++;
8122
            }
8123
 
8124
          *total = COSTS_N_INSNS(cost);
8125
          return false;
8126
        }
8127
 
8128
      if (mode == SImode)
8129
        {
8130
          *total = COSTS_N_INSNS (4);
8131
          return false;
8132
        }
8133
 
8134
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8135
        {
8136
          if (TARGET_HARD_FLOAT
8137
              && (mode == SFmode
8138
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8139
            {
8140
              *total = COSTS_N_INSNS (1);
8141
              return false;
8142
            }
8143
        }
8144
 
8145
      /* Requires a lib call */
8146
      *total = COSTS_N_INSNS (20);
8147
      return false;
8148
 
8149
    default:
8150
      return arm_rtx_costs_1 (x, outer_code, total, speed);
8151
    }
8152
}
8153
 
8154
 
8155
/* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
8156
   so it can be ignored.  */
8157
 
8158
static bool
8159
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8160
                      int *total, bool speed)
8161
{
8162
  enum machine_mode mode = GET_MODE (x);
8163
 
8164
  if (TARGET_THUMB)
8165
    {
8166
      *total = thumb1_rtx_costs (x, code, outer_code);
8167
      return true;
8168
    }
8169
 
8170
  switch (code)
8171
    {
8172
    case COMPARE:
8173
      if (GET_CODE (XEXP (x, 0)) != MULT)
8174
        return arm_rtx_costs_1 (x, outer_code, total, speed);
8175
 
8176
      /* A COMPARE of a MULT is slow on XScale; the muls instruction
8177
         will stall until the multiplication is complete.  */
8178
      *total = COSTS_N_INSNS (3);
8179
      return false;
8180
 
8181
    case MULT:
8182
      /* There is no point basing this on the tuning, since it is always the
8183
         fast variant if it exists at all.  */
8184
      if (mode == DImode
8185
          && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8186
          && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8187
              || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8188
        {
8189
          *total = COSTS_N_INSNS (2);
8190
          return false;
8191
        }
8192
 
8193
 
8194
      if (mode == DImode)
8195
        {
8196
          *total = COSTS_N_INSNS (5);
8197
          return false;
8198
        }
8199
 
8200
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8201
        {
8202
          /* If operand 1 is a constant we can more accurately
8203
             calculate the cost of the multiply.  The multiplier can
8204
             retire 15 bits on the first cycle and a further 12 on the
8205
             second.  We do, of course, have to load the constant into
8206
             a register first.  */
8207
          unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8208
          /* There's a general overhead of one cycle.  */
8209
          int cost = 1;
8210
          unsigned HOST_WIDE_INT masked_const;
8211
 
8212
          if (i & 0x80000000)
8213
            i = ~i;
8214
 
8215
          i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8216
 
8217
          masked_const = i & 0xffff8000;
8218
          if (masked_const != 0)
8219
            {
8220
              cost++;
8221
              masked_const = i & 0xf8000000;
8222
              if (masked_const != 0)
8223
                cost++;
8224
            }
8225
          *total = COSTS_N_INSNS (cost);
8226
          return false;
8227
        }
8228
 
8229
      if (mode == SImode)
8230
        {
8231
          *total = COSTS_N_INSNS (3);
8232
          return false;
8233
        }
8234
 
8235
      /* Requires a lib call */
8236
      *total = COSTS_N_INSNS (20);
8237
      return false;
8238
 
8239
    default:
8240
      return arm_rtx_costs_1 (x, outer_code, total, speed);
8241
    }
8242
}
8243
 
8244
 
8245
/* RTX costs for 9e (and later) cores.  */
8246
 
8247
static bool
8248
arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8249
                  int *total, bool speed)
8250
{
8251
  enum machine_mode mode = GET_MODE (x);
8252
 
8253
  if (TARGET_THUMB1)
8254
    {
8255
      switch (code)
8256
        {
8257
        case MULT:
8258
          *total = COSTS_N_INSNS (3);
8259
          return true;
8260
 
8261
        default:
8262
          *total = thumb1_rtx_costs (x, code, outer_code);
8263
          return true;
8264
        }
8265
    }
8266
 
8267
  switch (code)
8268
    {
8269
    case MULT:
8270
      /* There is no point basing this on the tuning, since it is always the
8271
         fast variant if it exists at all.  */
8272
      if (mode == DImode
8273
          && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8274
          && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8275
              || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8276
        {
8277
          *total = COSTS_N_INSNS (2);
8278
          return false;
8279
        }
8280
 
8281
 
8282
      if (mode == DImode)
8283
        {
8284
          *total = COSTS_N_INSNS (5);
8285
          return false;
8286
        }
8287
 
8288
      if (mode == SImode)
8289
        {
8290
          *total = COSTS_N_INSNS (2);
8291
          return false;
8292
        }
8293
 
8294
      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8295
        {
8296
          if (TARGET_HARD_FLOAT
8297
              && (mode == SFmode
8298
                  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8299
            {
8300
              *total = COSTS_N_INSNS (1);
8301
              return false;
8302
            }
8303
        }
8304
 
8305
      *total = COSTS_N_INSNS (20);
8306
      return false;
8307
 
8308
    default:
8309
      return arm_rtx_costs_1 (x, outer_code, total, speed);
8310
    }
8311
}
8312
/* All address computations that can be done are free, but rtx cost returns
8313
   the same for practically all of them.  So we weight the different types
8314
   of address here in the order (most pref first):
8315
   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
8316
static inline int
8317
arm_arm_address_cost (rtx x)
8318
{
8319
  enum rtx_code c  = GET_CODE (x);
8320
 
8321
  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8322
    return 0;
8323
  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8324
    return 10;
8325
 
8326
  if (c == PLUS)
8327
    {
8328
      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8329
        return 2;
8330
 
8331
      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8332
        return 3;
8333
 
8334
      return 4;
8335
    }
8336
 
8337
  return 6;
8338
}
8339
 
8340
static inline int
8341
arm_thumb_address_cost (rtx x)
8342
{
8343
  enum rtx_code c  = GET_CODE (x);
8344
 
8345
  if (c == REG)
8346
    return 1;
8347
  if (c == PLUS
8348
      && GET_CODE (XEXP (x, 0)) == REG
8349
      && GET_CODE (XEXP (x, 1)) == CONST_INT)
8350
    return 1;
8351
 
8352
  return 2;
8353
}
8354
 
8355
static int
8356
arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8357
{
8358
  return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8359
}
8360
 
8361
/* Adjust cost hook for XScale.  */
8362
static bool
8363
xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8364
{
8365
  /* Some true dependencies can have a higher cost depending
8366
     on precisely how certain input operands are used.  */
8367
  if (REG_NOTE_KIND(link) == 0
8368
      && recog_memoized (insn) >= 0
8369
      && recog_memoized (dep) >= 0)
8370
    {
8371
      int shift_opnum = get_attr_shift (insn);
8372
      enum attr_type attr_type = get_attr_type (dep);
8373
 
8374
      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8375
         operand for INSN.  If we have a shifted input operand and the
8376
         instruction we depend on is another ALU instruction, then we may
8377
         have to account for an additional stall.  */
8378
      if (shift_opnum != 0
8379
          && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8380
        {
8381
          rtx shifted_operand;
8382
          int opno;
8383
 
8384
          /* Get the shifted operand.  */
8385
          extract_insn (insn);
8386
          shifted_operand = recog_data.operand[shift_opnum];
8387
 
8388
          /* Iterate over all the operands in DEP.  If we write an operand
8389
             that overlaps with SHIFTED_OPERAND, then we have increase the
8390
             cost of this dependency.  */
8391
          extract_insn (dep);
8392
          preprocess_constraints ();
8393
          for (opno = 0; opno < recog_data.n_operands; opno++)
8394
            {
8395
              /* We can ignore strict inputs.  */
8396
              if (recog_data.operand_type[opno] == OP_IN)
8397
                continue;
8398
 
8399
              if (reg_overlap_mentioned_p (recog_data.operand[opno],
8400
                                           shifted_operand))
8401
                {
8402
                  *cost = 2;
8403
                  return false;
8404
                }
8405
            }
8406
        }
8407
    }
8408
  return true;
8409
}
8410
 
8411
/* Adjust cost hook for Cortex A9.  */
8412
static bool
8413
cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8414
{
8415
  switch (REG_NOTE_KIND (link))
8416
    {
8417
    case REG_DEP_ANTI:
8418
      *cost = 0;
8419
      return false;
8420
 
8421
    case REG_DEP_TRUE:
8422
    case REG_DEP_OUTPUT:
8423
        if (recog_memoized (insn) >= 0
8424
            && recog_memoized (dep) >= 0)
8425
          {
8426
            if (GET_CODE (PATTERN (insn)) == SET)
8427
              {
8428
                if (GET_MODE_CLASS
8429
                    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8430
                  || GET_MODE_CLASS
8431
                    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8432
                  {
8433
                    enum attr_type attr_type_insn = get_attr_type (insn);
8434
                    enum attr_type attr_type_dep = get_attr_type (dep);
8435
 
8436
                    /* By default all dependencies of the form
8437
                       s0 = s0 <op> s1
8438
                       s0 = s0 <op> s2
8439
                       have an extra latency of 1 cycle because
8440
                       of the input and output dependency in this
8441
                       case. However this gets modeled as an true
8442
                       dependency and hence all these checks.  */
8443
                    if (REG_P (SET_DEST (PATTERN (insn)))
8444
                        && REG_P (SET_DEST (PATTERN (dep)))
8445
                        && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8446
                                                    SET_DEST (PATTERN (dep))))
8447
                      {
8448
                        /* FMACS is a special case where the dependant
8449
                           instruction can be issued 3 cycles before
8450
                           the normal latency in case of an output
8451
                           dependency.  */
8452
                        if ((attr_type_insn == TYPE_FMACS
8453
                             || attr_type_insn == TYPE_FMACD)
8454
                            && (attr_type_dep == TYPE_FMACS
8455
                                || attr_type_dep == TYPE_FMACD))
8456
                          {
8457
                            if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8458
                              *cost = insn_default_latency (dep) - 3;
8459
                            else
8460
                              *cost = insn_default_latency (dep);
8461
                            return false;
8462
                          }
8463
                        else
8464
                          {
8465
                            if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8466
                              *cost = insn_default_latency (dep) + 1;
8467
                            else
8468
                              *cost = insn_default_latency (dep);
8469
                          }
8470
                        return false;
8471
                      }
8472
                  }
8473
              }
8474
          }
8475
        break;
8476
 
8477
    default:
8478
      gcc_unreachable ();
8479
    }
8480
 
8481
  return true;
8482
}
8483
 
8484
/* Adjust cost hook for FA726TE.  */
8485
static bool
8486
fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8487
{
8488
  /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8489
     have penalty of 3.  */
8490
  if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8491
      && recog_memoized (insn) >= 0
8492
      && recog_memoized (dep) >= 0
8493
      && get_attr_conds (dep) == CONDS_SET)
8494
    {
8495
      /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
8496
      if (get_attr_conds (insn) == CONDS_USE
8497
          && get_attr_type (insn) != TYPE_BRANCH)
8498
        {
8499
          *cost = 3;
8500
          return false;
8501
        }
8502
 
8503
      if (GET_CODE (PATTERN (insn)) == COND_EXEC
8504
          || get_attr_conds (insn) == CONDS_USE)
8505
        {
8506
          *cost = 0;
8507
          return false;
8508
        }
8509
    }
8510
 
8511
  return true;
8512
}
8513
 
8514
/* Implement TARGET_REGISTER_MOVE_COST.
8515
 
8516
   Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8517
   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8518
   it is typically more expensive than a single memory access.  We set
8519
   the cost to less than two memory accesses so that floating
8520
   point to integer conversion does not go through memory.  */
8521
 
8522
int
8523
arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8524
                        reg_class_t from, reg_class_t to)
8525
{
8526
  if (TARGET_32BIT)
8527
    {
8528
      if ((from == FPA_REGS && to != FPA_REGS)
8529
          || (from != FPA_REGS && to == FPA_REGS))
8530
        return 20;
8531
      else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8532
               || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8533
        return 15;
8534
      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8535
               || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8536
        return 4;
8537
      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8538
        return 20;
8539
      else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8540
               || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8541
        return 20;
8542
      else
8543
        return 2;
8544
    }
8545
  else
8546
    {
8547
      if (from == HI_REGS || to == HI_REGS)
8548
        return 4;
8549
      else
8550
        return 2;
8551
    }
8552
}
8553
 
8554
/* Implement TARGET_MEMORY_MOVE_COST.  */
8555
 
8556
int
8557
arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8558
                      bool in ATTRIBUTE_UNUSED)
8559
{
8560
  if (TARGET_32BIT)
8561
    return 10;
8562
  else
8563
    {
8564
      if (GET_MODE_SIZE (mode) < 4)
8565
        return 8;
8566
      else
8567
        return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8568
    }
8569
}
8570
 
8571
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8572
   It corrects the value of COST based on the relationship between
8573
   INSN and DEP through the dependence LINK.  It returns the new
8574
   value. There is a per-core adjust_cost hook to adjust scheduler costs
8575
   and the per-core hook can choose to completely override the generic
8576
   adjust_cost function. Only put bits of code into arm_adjust_cost that
8577
   are common across all cores.  */
8578
static int
8579
arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8580
{
8581
  rtx i_pat, d_pat;
8582
 
8583
 /* When generating Thumb-1 code, we want to place flag-setting operations
8584
    close to a conditional branch which depends on them, so that we can
8585
    omit the comparison. */
8586
  if (TARGET_THUMB1
8587
      && REG_NOTE_KIND (link) == 0
8588
      && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8589
      && recog_memoized (dep) >= 0
8590
      && get_attr_conds (dep) == CONDS_SET)
8591
    return 0;
8592
 
8593
  if (current_tune->sched_adjust_cost != NULL)
8594
    {
8595
      if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8596
        return cost;
8597
    }
8598
 
8599
  /* XXX This is not strictly true for the FPA.  */
8600
  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8601
      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8602
    return 0;
8603
 
8604
  /* Call insns don't incur a stall, even if they follow a load.  */
8605
  if (REG_NOTE_KIND (link) == 0
8606
      && GET_CODE (insn) == CALL_INSN)
8607
    return 1;
8608
 
8609
  if ((i_pat = single_set (insn)) != NULL
8610
      && GET_CODE (SET_SRC (i_pat)) == MEM
8611
      && (d_pat = single_set (dep)) != NULL
8612
      && GET_CODE (SET_DEST (d_pat)) == MEM)
8613
    {
8614
      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8615
      /* This is a load after a store, there is no conflict if the load reads
8616
         from a cached area.  Assume that loads from the stack, and from the
8617
         constant pool are cached, and that others will miss.  This is a
8618
         hack.  */
8619
 
8620
      if ((GET_CODE (src_mem) == SYMBOL_REF
8621
           && CONSTANT_POOL_ADDRESS_P (src_mem))
8622
          || reg_mentioned_p (stack_pointer_rtx, src_mem)
8623
          || reg_mentioned_p (frame_pointer_rtx, src_mem)
8624
          || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8625
        return 1;
8626
    }
8627
 
8628
  return cost;
8629
}
8630
 
8631
static int
8632
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8633
{
8634
  if (TARGET_32BIT)
8635
    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8636
  else
8637
    return (optimize > 0) ? 2 : 0;
8638
}
8639
 
8640
static int
8641
arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8642
{
8643
  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8644
}
8645
 
8646
static int fp_consts_inited = 0;
8647
 
8648
/* Only zero is valid for VFP.  Other values are also valid for FPA.  */
8649
static const char * const strings_fp[8] =
8650
{
8651
  "0",   "1",   "2",   "3",
8652
  "4",   "5",   "0.5", "10"
8653
};
8654
 
8655
static REAL_VALUE_TYPE values_fp[8];
8656
 
8657
static void
8658
init_fp_table (void)
8659
{
8660
  int i;
8661
  REAL_VALUE_TYPE r;
8662
 
8663
  if (TARGET_VFP)
8664
    fp_consts_inited = 1;
8665
  else
8666
    fp_consts_inited = 8;
8667
 
8668
  for (i = 0; i < fp_consts_inited; i++)
8669
    {
8670
      r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8671
      values_fp[i] = r;
8672
    }
8673
}
8674
 
8675
/* Return TRUE if rtx X is a valid immediate FP constant.  */
8676
int
8677
arm_const_double_rtx (rtx x)
8678
{
8679
  REAL_VALUE_TYPE r;
8680
  int i;
8681
 
8682
  if (!fp_consts_inited)
8683
    init_fp_table ();
8684
 
8685
  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8686
  if (REAL_VALUE_MINUS_ZERO (r))
8687
    return 0;
8688
 
8689
  for (i = 0; i < fp_consts_inited; i++)
8690
    if (REAL_VALUES_EQUAL (r, values_fp[i]))
8691
      return 1;
8692
 
8693
  return 0;
8694
}
8695
 
8696
/* Return TRUE if rtx X is a valid immediate FPA constant.  */
8697
int
8698
neg_const_double_rtx_ok_for_fpa (rtx x)
8699
{
8700
  REAL_VALUE_TYPE r;
8701
  int i;
8702
 
8703
  if (!fp_consts_inited)
8704
    init_fp_table ();
8705
 
8706
  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8707
  r = real_value_negate (&r);
8708
  if (REAL_VALUE_MINUS_ZERO (r))
8709
    return 0;
8710
 
8711
  for (i = 0; i < 8; i++)
8712
    if (REAL_VALUES_EQUAL (r, values_fp[i]))
8713
      return 1;
8714
 
8715
  return 0;
8716
}
8717
 
8718
 
8719
/* VFPv3 has a fairly wide range of representable immediates, formed from
8720
   "quarter-precision" floating-point values. These can be evaluated using this
8721
   formula (with ^ for exponentiation):
8722
 
8723
     -1^s * n * 2^-r
8724
 
8725
   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8726
   16 <= n <= 31 and 0 <= r <= 7.
8727
 
8728
   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8729
 
8730
     - A (most-significant) is the sign bit.
8731
     - BCD are the exponent (encoded as r XOR 3).
8732
     - EFGH are the mantissa (encoded as n - 16).
8733
*/
8734
 
8735
/* Return an integer index for a VFPv3 immediate operand X suitable for the
8736
   fconst[sd] instruction, or -1 if X isn't suitable.  */
8737
static int
8738
vfp3_const_double_index (rtx x)
8739
{
8740
  REAL_VALUE_TYPE r, m;
8741
  int sign, exponent;
8742
  unsigned HOST_WIDE_INT mantissa, mant_hi;
8743
  unsigned HOST_WIDE_INT mask;
8744
  HOST_WIDE_INT m1, m2;
8745
  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8746
 
8747
  if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8748
    return -1;
8749
 
8750
  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8751
 
8752
  /* We can't represent these things, so detect them first.  */
8753
  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8754
    return -1;
8755
 
8756
  /* Extract sign, exponent and mantissa.  */
8757
  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8758
  r = real_value_abs (&r);
8759
  exponent = REAL_EXP (&r);
8760
  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8761
     highest (sign) bit, with a fixed binary point at bit point_pos.
8762
     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8763
     bits for the mantissa, this may fail (low bits would be lost).  */
8764
  real_ldexp (&m, &r, point_pos - exponent);
8765
  REAL_VALUE_TO_INT (&m1, &m2, m);
8766
  mantissa = m1;
8767
  mant_hi = m2;
8768
 
8769
  /* If there are bits set in the low part of the mantissa, we can't
8770
     represent this value.  */
8771
  if (mantissa != 0)
8772
    return -1;
8773
 
8774
  /* Now make it so that mantissa contains the most-significant bits, and move
8775
     the point_pos to indicate that the least-significant bits have been
8776
     discarded.  */
8777
  point_pos -= HOST_BITS_PER_WIDE_INT;
8778
  mantissa = mant_hi;
8779
 
8780
  /* We can permit four significant bits of mantissa only, plus a high bit
8781
     which is always 1.  */
8782
  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8783
  if ((mantissa & mask) != 0)
8784
    return -1;
8785
 
8786
  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
8787
  mantissa >>= point_pos - 5;
8788
 
8789
  /* The mantissa may be zero. Disallow that case. (It's possible to load the
8790
     floating-point immediate zero with Neon using an integer-zero load, but
8791
     that case is handled elsewhere.)  */
8792
  if (mantissa == 0)
8793
    return -1;
8794
 
8795
  gcc_assert (mantissa >= 16 && mantissa <= 31);
8796
 
8797
  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8798
     normalized significands are in the range [1, 2). (Our mantissa is shifted
8799
     left 4 places at this point relative to normalized IEEE754 values).  GCC
8800
     internally uses [0.5, 1) (see real.c), so the exponent returned from
8801
     REAL_EXP must be altered.  */
8802
  exponent = 5 - exponent;
8803
 
8804
  if (exponent < 0 || exponent > 7)
8805
    return -1;
8806
 
8807
  /* Sign, mantissa and exponent are now in the correct form to plug into the
8808
     formula described in the comment above.  */
8809
  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8810
}
8811
 
8812
/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
8813
int
8814
vfp3_const_double_rtx (rtx x)
8815
{
8816
  if (!TARGET_VFP3)
8817
    return 0;
8818
 
8819
  return vfp3_const_double_index (x) != -1;
8820
}
8821
 
8822
/* Recognize immediates which can be used in various Neon instructions. Legal
8823
   immediates are described by the following table (for VMVN variants, the
8824
   bitwise inverse of the constant shown is recognized. In either case, VMOV
8825
   is output and the correct instruction to use for a given constant is chosen
8826
   by the assembler). The constant shown is replicated across all elements of
8827
   the destination vector.
8828
 
8829
   insn elems variant constant (binary)
8830
   ---- ----- ------- -----------------
8831
   vmov  i32     0    00000000 00000000 00000000 abcdefgh
8832
   vmov  i32     1    00000000 00000000 abcdefgh 00000000
8833
   vmov  i32     2    00000000 abcdefgh 00000000 00000000
8834
   vmov  i32     3    abcdefgh 00000000 00000000 00000000
8835
   vmov  i16     4    00000000 abcdefgh
8836
   vmov  i16     5    abcdefgh 00000000
8837
   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
8838
   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
8839
   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
8840
   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
8841
   vmvn  i16    10    00000000 abcdefgh
8842
   vmvn  i16    11    abcdefgh 00000000
8843
   vmov  i32    12    00000000 00000000 abcdefgh 11111111
8844
   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
8845
   vmov  i32    14    00000000 abcdefgh 11111111 11111111
8846
   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
8847
   vmov   i8    16    abcdefgh
8848
   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
8849
                      eeeeeeee ffffffff gggggggg hhhhhhhh
8850
   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
8851
 
8852
   For case 18, B = !b. Representable values are exactly those accepted by
8853
   vfp3_const_double_index, but are output as floating-point numbers rather
8854
   than indices.
8855
 
8856
   Variants 0-5 (inclusive) may also be used as immediates for the second
8857
   operand of VORR/VBIC instructions.
8858
 
8859
   The INVERSE argument causes the bitwise inverse of the given operand to be
8860
   recognized instead (used for recognizing legal immediates for the VAND/VORN
8861
   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8862
   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8863
   output, rather than the real insns vbic/vorr).
8864
 
8865
   INVERSE makes no difference to the recognition of float vectors.
8866
 
8867
   The return value is the variant of immediate as shown in the above table, or
8868
   -1 if the given value doesn't match any of the listed patterns.
8869
*/
8870
static int
8871
neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8872
                      rtx *modconst, int *elementwidth)
8873
{
8874
#define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
8875
  matches = 1;                                  \
8876
  for (i = 0; i < idx; i += (STRIDE))            \
8877
    if (!(TEST))                                \
8878
      matches = 0;                               \
8879
  if (matches)                                  \
8880
    {                                           \
8881
      immtype = (CLASS);                        \
8882
      elsize = (ELSIZE);                        \
8883
      break;                                    \
8884
    }
8885
 
8886
  unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8887
  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8888
  unsigned char bytes[16];
8889
  int immtype = -1, matches;
8890
  unsigned int invmask = inverse ? 0xff : 0;
8891
 
8892
  /* Vectors of float constants.  */
8893
  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8894
    {
8895
      rtx el0 = CONST_VECTOR_ELT (op, 0);
8896
      REAL_VALUE_TYPE r0;
8897
 
8898
      if (!vfp3_const_double_rtx (el0))
8899
        return -1;
8900
 
8901
      REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8902
 
8903
      for (i = 1; i < n_elts; i++)
8904
        {
8905
          rtx elt = CONST_VECTOR_ELT (op, i);
8906
          REAL_VALUE_TYPE re;
8907
 
8908
          REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8909
 
8910
          if (!REAL_VALUES_EQUAL (r0, re))
8911
            return -1;
8912
        }
8913
 
8914
      if (modconst)
8915
        *modconst = CONST_VECTOR_ELT (op, 0);
8916
 
8917
      if (elementwidth)
8918
        *elementwidth = 0;
8919
 
8920
      return 18;
8921
    }
8922
 
8923
  /* Splat vector constant out into a byte vector.  */
8924
  for (i = 0; i < n_elts; i++)
8925
    {
8926
      rtx el = CONST_VECTOR_ELT (op, i);
8927
      unsigned HOST_WIDE_INT elpart;
8928
      unsigned int part, parts;
8929
 
8930
      if (GET_CODE (el) == CONST_INT)
8931
        {
8932
          elpart = INTVAL (el);
8933
          parts = 1;
8934
        }
8935
      else if (GET_CODE (el) == CONST_DOUBLE)
8936
        {
8937
          elpart = CONST_DOUBLE_LOW (el);
8938
          parts = 2;
8939
        }
8940
      else
8941
        gcc_unreachable ();
8942
 
8943
      for (part = 0; part < parts; part++)
8944
        {
8945
          unsigned int byte;
8946
          for (byte = 0; byte < innersize; byte++)
8947
            {
8948
              bytes[idx++] = (elpart & 0xff) ^ invmask;
8949
              elpart >>= BITS_PER_UNIT;
8950
            }
8951
          if (GET_CODE (el) == CONST_DOUBLE)
8952
            elpart = CONST_DOUBLE_HIGH (el);
8953
        }
8954
    }
8955
 
8956
  /* Sanity check.  */
8957
  gcc_assert (idx == GET_MODE_SIZE (mode));
8958
 
8959
  do
8960
    {
8961
      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8962
                       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8963
 
8964
      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8965
                       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8966
 
8967
      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8968
                       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8969
 
8970
      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8971
                       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8972
 
8973
      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8974
 
8975
      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8976
 
8977
      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8978
                       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8979
 
8980
      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8981
                       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8982
 
8983
      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8984
                       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8985
 
8986
      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8987
                       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8988
 
8989
      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8990
 
8991
      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8992
 
8993
      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8994
                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8995
 
8996
      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8997
                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8998
 
8999
      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9000
                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9001
 
9002
      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9003
                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9004
 
9005
      CHECK (1, 8, 16, bytes[i] == bytes[0]);
9006
 
9007
      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9008
                        && bytes[i] == bytes[(i + 8) % idx]);
9009
    }
9010
  while (0);
9011
 
9012
  if (immtype == -1)
9013
    return -1;
9014
 
9015
  if (elementwidth)
9016
    *elementwidth = elsize;
9017
 
9018
  if (modconst)
9019
    {
9020
      unsigned HOST_WIDE_INT imm = 0;
9021
 
9022
      /* Un-invert bytes of recognized vector, if necessary.  */
9023
      if (invmask != 0)
9024
        for (i = 0; i < idx; i++)
9025
          bytes[i] ^= invmask;
9026
 
9027
      if (immtype == 17)
9028
        {
9029
          /* FIXME: Broken on 32-bit H_W_I hosts.  */
9030
          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9031
 
9032
          for (i = 0; i < 8; i++)
9033
            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9034
                   << (i * BITS_PER_UNIT);
9035
 
9036
          *modconst = GEN_INT (imm);
9037
        }
9038
      else
9039
        {
9040
          unsigned HOST_WIDE_INT imm = 0;
9041
 
9042
          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9043
            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9044
 
9045
          *modconst = GEN_INT (imm);
9046
        }
9047
    }
9048
 
9049
  return immtype;
9050
#undef CHECK
9051
}
9052
 
9053
/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9054
   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9055
   float elements), and a modified constant (whatever should be output for a
9056
   VMOV) in *MODCONST.  */
9057
 
9058
int
9059
neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9060
                               rtx *modconst, int *elementwidth)
9061
{
9062
  rtx tmpconst;
9063
  int tmpwidth;
9064
  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9065
 
9066
  if (retval == -1)
9067
    return 0;
9068
 
9069
  if (modconst)
9070
    *modconst = tmpconst;
9071
 
9072
  if (elementwidth)
9073
    *elementwidth = tmpwidth;
9074
 
9075
  return 1;
9076
}
9077
 
9078
/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
9079
   the immediate is valid, write a constant suitable for using as an operand
9080
   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9081
   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
9082
 
9083
int
9084
neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9085
                                rtx *modconst, int *elementwidth)
9086
{
9087
  rtx tmpconst;
9088
  int tmpwidth;
9089
  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9090
 
9091
  if (retval < 0 || retval > 5)
9092
    return 0;
9093
 
9094
  if (modconst)
9095
    *modconst = tmpconst;
9096
 
9097
  if (elementwidth)
9098
    *elementwidth = tmpwidth;
9099
 
9100
  return 1;
9101
}
9102
 
9103
/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
9104
   the immediate is valid, write a constant suitable for using as an operand
9105
   to VSHR/VSHL to *MODCONST and the corresponding element width to
9106
   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9107
   because they have different limitations.  */
9108
 
9109
int
9110
neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9111
                                rtx *modconst, int *elementwidth,
9112
                                bool isleftshift)
9113
{
9114
  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9115
  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9116
  unsigned HOST_WIDE_INT last_elt = 0;
9117
  unsigned HOST_WIDE_INT maxshift;
9118
 
9119
  /* Split vector constant out into a byte vector.  */
9120
  for (i = 0; i < n_elts; i++)
9121
    {
9122
      rtx el = CONST_VECTOR_ELT (op, i);
9123
      unsigned HOST_WIDE_INT elpart;
9124
 
9125
      if (GET_CODE (el) == CONST_INT)
9126
        elpart = INTVAL (el);
9127
      else if (GET_CODE (el) == CONST_DOUBLE)
9128
        return 0;
9129
      else
9130
        gcc_unreachable ();
9131
 
9132
      if (i != 0 && elpart != last_elt)
9133
        return 0;
9134
 
9135
      last_elt = elpart;
9136
    }
9137
 
9138
  /* Shift less than element size.  */
9139
  maxshift = innersize * 8;
9140
 
9141
  if (isleftshift)
9142
    {
9143
      /* Left shift immediate value can be from 0 to <size>-1.  */
9144
      if (last_elt >= maxshift)
9145
        return 0;
9146
    }
9147
  else
9148
    {
9149
      /* Right shift immediate value can be from 1 to <size>.  */
9150
      if (last_elt == 0 || last_elt > maxshift)
9151
        return 0;
9152
    }
9153
 
9154
  if (elementwidth)
9155
    *elementwidth = innersize * 8;
9156
 
9157
  if (modconst)
9158
    *modconst = CONST_VECTOR_ELT (op, 0);
9159
 
9160
  return 1;
9161
}
9162
 
9163
/* Return a string suitable for output of Neon immediate logic operation
9164
   MNEM.  */
9165
 
9166
char *
9167
neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9168
                             int inverse, int quad)
9169
{
9170
  int width, is_valid;
9171
  static char templ[40];
9172
 
9173
  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9174
 
9175
  gcc_assert (is_valid != 0);
9176
 
9177
  if (quad)
9178
    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9179
  else
9180
    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9181
 
9182
  return templ;
9183
}
9184
 
9185
/* Return a string suitable for output of Neon immediate shift operation
9186
   (VSHR or VSHL) MNEM.  */
9187
 
9188
char *
9189
neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9190
                             enum machine_mode mode, int quad,
9191
                             bool isleftshift)
9192
{
9193
  int width, is_valid;
9194
  static char templ[40];
9195
 
9196
  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9197
  gcc_assert (is_valid != 0);
9198
 
9199
  if (quad)
9200
    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9201
  else
9202
    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9203
 
9204
  return templ;
9205
}
9206
 
9207
/* Output a sequence of pairwise operations to implement a reduction.
9208
   NOTE: We do "too much work" here, because pairwise operations work on two
9209
   registers-worth of operands in one go. Unfortunately we can't exploit those
9210
   extra calculations to do the full operation in fewer steps, I don't think.
9211
   Although all vector elements of the result but the first are ignored, we
9212
   actually calculate the same result in each of the elements. An alternative
9213
   such as initially loading a vector with zero to use as each of the second
9214
   operands would use up an additional register and take an extra instruction,
9215
   for no particular gain.  */
9216
 
9217
void
9218
neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9219
                      rtx (*reduc) (rtx, rtx, rtx))
9220
{
9221
  enum machine_mode inner = GET_MODE_INNER (mode);
9222
  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9223
  rtx tmpsum = op1;
9224
 
9225
  for (i = parts / 2; i >= 1; i /= 2)
9226
    {
9227
      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9228
      emit_insn (reduc (dest, tmpsum, tmpsum));
9229
      tmpsum = dest;
9230
    }
9231
}
9232
 
9233
/* If VALS is a vector constant that can be loaded into a register
9234
   using VDUP, generate instructions to do so and return an RTX to
9235
   assign to the register.  Otherwise return NULL_RTX.  */
9236
 
9237
static rtx
9238
neon_vdup_constant (rtx vals)
9239
{
9240
  enum machine_mode mode = GET_MODE (vals);
9241
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
9242
  int n_elts = GET_MODE_NUNITS (mode);
9243
  bool all_same = true;
9244
  rtx x;
9245
  int i;
9246
 
9247
  if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9248
    return NULL_RTX;
9249
 
9250
  for (i = 0; i < n_elts; ++i)
9251
    {
9252
      x = XVECEXP (vals, 0, i);
9253
      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9254
        all_same = false;
9255
    }
9256
 
9257
  if (!all_same)
9258
    /* The elements are not all the same.  We could handle repeating
9259
       patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9260
       {0, C, 0, C, 0, C, 0, C} which can be loaded using
9261
       vdup.i16).  */
9262
    return NULL_RTX;
9263
 
9264
  /* We can load this constant by using VDUP and a constant in a
9265
     single ARM register.  This will be cheaper than a vector
9266
     load.  */
9267
 
9268
  x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9269
  return gen_rtx_VEC_DUPLICATE (mode, x);
9270
}
9271
 
9272
/* Generate code to load VALS, which is a PARALLEL containing only
9273
   constants (for vec_init) or CONST_VECTOR, efficiently into a
9274
   register.  Returns an RTX to copy into the register, or NULL_RTX
9275
   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
9276
 
9277
rtx
9278
neon_make_constant (rtx vals)
9279
{
9280
  enum machine_mode mode = GET_MODE (vals);
9281
  rtx target;
9282
  rtx const_vec = NULL_RTX;
9283
  int n_elts = GET_MODE_NUNITS (mode);
9284
  int n_const = 0;
9285
  int i;
9286
 
9287
  if (GET_CODE (vals) == CONST_VECTOR)
9288
    const_vec = vals;
9289
  else if (GET_CODE (vals) == PARALLEL)
9290
    {
9291
      /* A CONST_VECTOR must contain only CONST_INTs and
9292
         CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9293
         Only store valid constants in a CONST_VECTOR.  */
9294
      for (i = 0; i < n_elts; ++i)
9295
        {
9296
          rtx x = XVECEXP (vals, 0, i);
9297
          if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9298
            n_const++;
9299
        }
9300
      if (n_const == n_elts)
9301
        const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9302
    }
9303
  else
9304
    gcc_unreachable ();
9305
 
9306
  if (const_vec != NULL
9307
      && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9308
    /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
9309
    return const_vec;
9310
  else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9311
    /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
9312
       pipeline cycle; creating the constant takes one or two ARM
9313
       pipeline cycles.  */
9314
    return target;
9315
  else if (const_vec != NULL_RTX)
9316
    /* Load from constant pool.  On Cortex-A8 this takes two cycles
9317
       (for either double or quad vectors).  We can not take advantage
9318
       of single-cycle VLD1 because we need a PC-relative addressing
9319
       mode.  */
9320
    return const_vec;
9321
  else
9322
    /* A PARALLEL containing something not valid inside CONST_VECTOR.
9323
       We can not construct an initializer.  */
9324
    return NULL_RTX;
9325
}
9326
 
9327
/* Initialize vector TARGET to VALS.  */
9328
 
9329
void
9330
neon_expand_vector_init (rtx target, rtx vals)
9331
{
9332
  enum machine_mode mode = GET_MODE (target);
9333
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
9334
  int n_elts = GET_MODE_NUNITS (mode);
9335
  int n_var = 0, one_var = -1;
9336
  bool all_same = true;
9337
  rtx x, mem;
9338
  int i;
9339
 
9340
  for (i = 0; i < n_elts; ++i)
9341
    {
9342
      x = XVECEXP (vals, 0, i);
9343
      if (!CONSTANT_P (x))
9344
        ++n_var, one_var = i;
9345
 
9346
      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9347
        all_same = false;
9348
    }
9349
 
9350
  if (n_var == 0)
9351
    {
9352
      rtx constant = neon_make_constant (vals);
9353
      if (constant != NULL_RTX)
9354
        {
9355
          emit_move_insn (target, constant);
9356
          return;
9357
        }
9358
    }
9359
 
9360
  /* Splat a single non-constant element if we can.  */
9361
  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9362
    {
9363
      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9364
      emit_insn (gen_rtx_SET (VOIDmode, target,
9365
                              gen_rtx_VEC_DUPLICATE (mode, x)));
9366
      return;
9367
    }
9368
 
9369
  /* One field is non-constant.  Load constant then overwrite varying
9370
     field.  This is more efficient than using the stack.  */
9371
  if (n_var == 1)
9372
    {
9373
      rtx copy = copy_rtx (vals);
9374
      rtx index = GEN_INT (one_var);
9375
 
9376
      /* Load constant part of vector, substitute neighboring value for
9377
         varying element.  */
9378
      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9379
      neon_expand_vector_init (target, copy);
9380
 
9381
      /* Insert variable.  */
9382
      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9383
      switch (mode)
9384
        {
9385
        case V8QImode:
9386
          emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9387
          break;
9388
        case V16QImode:
9389
          emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9390
          break;
9391
        case V4HImode:
9392
          emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9393
          break;
9394
        case V8HImode:
9395
          emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9396
          break;
9397
        case V2SImode:
9398
          emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9399
          break;
9400
        case V4SImode:
9401
          emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9402
          break;
9403
        case V2SFmode:
9404
          emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9405
          break;
9406
        case V4SFmode:
9407
          emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9408
          break;
9409
        case V2DImode:
9410
          emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9411
          break;
9412
        default:
9413
          gcc_unreachable ();
9414
        }
9415
      return;
9416
    }
9417
 
9418
  /* Construct the vector in memory one field at a time
9419
     and load the whole vector.  */
9420
  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9421
  for (i = 0; i < n_elts; i++)
9422
    emit_move_insn (adjust_address_nv (mem, inner_mode,
9423
                                    i * GET_MODE_SIZE (inner_mode)),
9424
                    XVECEXP (vals, 0, i));
9425
  emit_move_insn (target, mem);
9426
}
9427
 
9428
/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
9429
   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
9430
   reported source locations are bogus.  */
9431
 
9432
static void
9433
bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9434
              const char *err)
9435
{
9436
  HOST_WIDE_INT lane;
9437
 
9438
  gcc_assert (GET_CODE (operand) == CONST_INT);
9439
 
9440
  lane = INTVAL (operand);
9441
 
9442
  if (lane < low || lane >= high)
9443
    error (err);
9444
}
9445
 
9446
/* Bounds-check lanes.  */
9447
 
9448
void
9449
neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9450
{
9451
  bounds_check (operand, low, high, "lane out of range");
9452
}
9453
 
9454
/* Bounds-check constants.  */
9455
 
9456
void
9457
neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9458
{
9459
  bounds_check (operand, low, high, "constant out of range");
9460
}
9461
 
9462
HOST_WIDE_INT
9463
neon_element_bits (enum machine_mode mode)
9464
{
9465
  if (mode == DImode)
9466
    return GET_MODE_BITSIZE (mode);
9467
  else
9468
    return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9469
}
9470
 
9471
 
9472
/* Predicates for `match_operand' and `match_operator'.  */
9473
 
9474
/* Return nonzero if OP is a valid Cirrus memory address pattern.  */
9475
int
9476
cirrus_memory_offset (rtx op)
9477
{
9478
  /* Reject eliminable registers.  */
9479
  if (! (reload_in_progress || reload_completed)
9480
      && (   reg_mentioned_p (frame_pointer_rtx, op)
9481
          || reg_mentioned_p (arg_pointer_rtx, op)
9482
          || reg_mentioned_p (virtual_incoming_args_rtx, op)
9483
          || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9484
          || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9485
          || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9486
    return 0;
9487
 
9488
  if (GET_CODE (op) == MEM)
9489
    {
9490
      rtx ind;
9491
 
9492
      ind = XEXP (op, 0);
9493
 
9494
      /* Match: (mem (reg)).  */
9495
      if (GET_CODE (ind) == REG)
9496
        return 1;
9497
 
9498
      /* Match:
9499
         (mem (plus (reg)
9500
                    (const))).  */
9501
      if (GET_CODE (ind) == PLUS
9502
          && GET_CODE (XEXP (ind, 0)) == REG
9503
          && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9504
          && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9505
        return 1;
9506
    }
9507
 
9508
  return 0;
9509
}
9510
 
9511
/* Return TRUE if OP is a valid coprocessor memory address pattern.
9512
   WB is true if full writeback address modes are allowed and is false
9513
   if limited writeback address modes (POST_INC and PRE_DEC) are
9514
   allowed.  */
9515
 
9516
int
9517
arm_coproc_mem_operand (rtx op, bool wb)
9518
{
9519
  rtx ind;
9520
 
9521
  /* Reject eliminable registers.  */
9522
  if (! (reload_in_progress || reload_completed)
9523
      && (   reg_mentioned_p (frame_pointer_rtx, op)
9524
          || reg_mentioned_p (arg_pointer_rtx, op)
9525
          || reg_mentioned_p (virtual_incoming_args_rtx, op)
9526
          || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9527
          || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9528
          || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9529
    return FALSE;
9530
 
9531
  /* Constants are converted into offsets from labels.  */
9532
  if (GET_CODE (op) != MEM)
9533
    return FALSE;
9534
 
9535
  ind = XEXP (op, 0);
9536
 
9537
  if (reload_completed
9538
      && (GET_CODE (ind) == LABEL_REF
9539
          || (GET_CODE (ind) == CONST
9540
              && GET_CODE (XEXP (ind, 0)) == PLUS
9541
              && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9542
              && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9543
    return TRUE;
9544
 
9545
  /* Match: (mem (reg)).  */
9546
  if (GET_CODE (ind) == REG)
9547
    return arm_address_register_rtx_p (ind, 0);
9548
 
9549
  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
9550
     acceptable in any case (subject to verification by
9551
     arm_address_register_rtx_p).  We need WB to be true to accept
9552
     PRE_INC and POST_DEC.  */
9553
  if (GET_CODE (ind) == POST_INC
9554
      || GET_CODE (ind) == PRE_DEC
9555
      || (wb
9556
          && (GET_CODE (ind) == PRE_INC
9557
              || GET_CODE (ind) == POST_DEC)))
9558
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9559
 
9560
  if (wb
9561
      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9562
      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9563
      && GET_CODE (XEXP (ind, 1)) == PLUS
9564
      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9565
    ind = XEXP (ind, 1);
9566
 
9567
  /* Match:
9568
     (plus (reg)
9569
           (const)).  */
9570
  if (GET_CODE (ind) == PLUS
9571
      && GET_CODE (XEXP (ind, 0)) == REG
9572
      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9573
      && GET_CODE (XEXP (ind, 1)) == CONST_INT
9574
      && INTVAL (XEXP (ind, 1)) > -1024
9575
      && INTVAL (XEXP (ind, 1)) <  1024
9576
      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9577
    return TRUE;
9578
 
9579
  return FALSE;
9580
}
9581
 
9582
/* Return TRUE if OP is a memory operand which we can load or store a vector
9583
   to/from. TYPE is one of the following values:
9584
 
9585
    1 - Core registers (ldm)
9586
    2 - Element/structure loads (vld1)
9587
 */
9588
int
9589
neon_vector_mem_operand (rtx op, int type)
9590
{
9591
  rtx ind;
9592
 
9593
  /* Reject eliminable registers.  */
9594
  if (! (reload_in_progress || reload_completed)
9595
      && (   reg_mentioned_p (frame_pointer_rtx, op)
9596
          || reg_mentioned_p (arg_pointer_rtx, op)
9597
          || reg_mentioned_p (virtual_incoming_args_rtx, op)
9598
          || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9599
          || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9600
          || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9601
    return FALSE;
9602
 
9603
  /* Constants are converted into offsets from labels.  */
9604
  if (GET_CODE (op) != MEM)
9605
    return FALSE;
9606
 
9607
  ind = XEXP (op, 0);
9608
 
9609
  if (reload_completed
9610
      && (GET_CODE (ind) == LABEL_REF
9611
          || (GET_CODE (ind) == CONST
9612
              && GET_CODE (XEXP (ind, 0)) == PLUS
9613
              && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9614
              && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9615
    return TRUE;
9616
 
9617
  /* Match: (mem (reg)).  */
9618
  if (GET_CODE (ind) == REG)
9619
    return arm_address_register_rtx_p (ind, 0);
9620
 
9621
  /* Allow post-increment with Neon registers.  */
9622
  if ((type != 1 && GET_CODE (ind) == POST_INC)
9623
      || (type == 0 && GET_CODE (ind) == PRE_DEC))
9624
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9625
 
9626
  /* FIXME: vld1 allows register post-modify.  */
9627
 
9628
  /* Match:
9629
     (plus (reg)
9630
          (const)).  */
9631
  if (type == 0
9632
      && GET_CODE (ind) == PLUS
9633
      && GET_CODE (XEXP (ind, 0)) == REG
9634
      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9635
      && GET_CODE (XEXP (ind, 1)) == CONST_INT
9636
      && INTVAL (XEXP (ind, 1)) > -1024
9637
      && INTVAL (XEXP (ind, 1)) < 1016
9638
      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9639
    return TRUE;
9640
 
9641
  return FALSE;
9642
}
9643
 
9644
/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9645
   type.  */
9646
int
9647
neon_struct_mem_operand (rtx op)
9648
{
9649
  rtx ind;
9650
 
9651
  /* Reject eliminable registers.  */
9652
  if (! (reload_in_progress || reload_completed)
9653
      && (   reg_mentioned_p (frame_pointer_rtx, op)
9654
          || reg_mentioned_p (arg_pointer_rtx, op)
9655
          || reg_mentioned_p (virtual_incoming_args_rtx, op)
9656
          || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9657
          || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9658
          || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9659
    return FALSE;
9660
 
9661
  /* Constants are converted into offsets from labels.  */
9662
  if (GET_CODE (op) != MEM)
9663
    return FALSE;
9664
 
9665
  ind = XEXP (op, 0);
9666
 
9667
  if (reload_completed
9668
      && (GET_CODE (ind) == LABEL_REF
9669
          || (GET_CODE (ind) == CONST
9670
              && GET_CODE (XEXP (ind, 0)) == PLUS
9671
              && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9672
              && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9673
    return TRUE;
9674
 
9675
  /* Match: (mem (reg)).  */
9676
  if (GET_CODE (ind) == REG)
9677
    return arm_address_register_rtx_p (ind, 0);
9678
 
9679
  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
9680
  if (GET_CODE (ind) == POST_INC
9681
      || GET_CODE (ind) == PRE_DEC)
9682
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9683
 
9684
  return FALSE;
9685
}
9686
 
9687
/* Return true if X is a register that will be eliminated later on.  */
9688
int
9689
arm_eliminable_register (rtx x)
9690
{
9691
  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9692
                       || REGNO (x) == ARG_POINTER_REGNUM
9693
                       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9694
                           && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9695
}
9696
 
9697
/* Return GENERAL_REGS if a scratch register required to reload x to/from
9698
   coprocessor registers.  Otherwise return NO_REGS.  */
9699
 
9700
enum reg_class
9701
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9702
{
9703
  if (mode == HFmode)
9704
    {
9705
      if (!TARGET_NEON_FP16)
9706
        return GENERAL_REGS;
9707
      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9708
        return NO_REGS;
9709
      return GENERAL_REGS;
9710
    }
9711
 
9712
  /* The neon move patterns handle all legitimate vector and struct
9713
     addresses.  */
9714
  if (TARGET_NEON
9715
      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9716
      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9717
          || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9718
          || VALID_NEON_STRUCT_MODE (mode)))
9719
    return NO_REGS;
9720
 
9721
  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9722
    return NO_REGS;
9723
 
9724
  return GENERAL_REGS;
9725
}
9726
 
9727
/* Values which must be returned in the most-significant end of the return
9728
   register.  */
9729
 
9730
static bool
9731
arm_return_in_msb (const_tree valtype)
9732
{
9733
  return (TARGET_AAPCS_BASED
9734
          && BYTES_BIG_ENDIAN
9735
          && (AGGREGATE_TYPE_P (valtype)
9736
              || TREE_CODE (valtype) == COMPLEX_TYPE
9737
              || FIXED_POINT_TYPE_P (valtype)));
9738
}
9739
 
9740
/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9741
   Use by the Cirrus Maverick code which has to workaround
9742
   a hardware bug triggered by such instructions.  */
9743
static bool
9744
arm_memory_load_p (rtx insn)
9745
{
9746
  rtx body, lhs, rhs;;
9747
 
9748
  if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9749
    return false;
9750
 
9751
  body = PATTERN (insn);
9752
 
9753
  if (GET_CODE (body) != SET)
9754
    return false;
9755
 
9756
  lhs = XEXP (body, 0);
9757
  rhs = XEXP (body, 1);
9758
 
9759
  lhs = REG_OR_SUBREG_RTX (lhs);
9760
 
9761
  /* If the destination is not a general purpose
9762
     register we do not have to worry.  */
9763
  if (GET_CODE (lhs) != REG
9764
      || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9765
    return false;
9766
 
9767
  /* As well as loads from memory we also have to react
9768
     to loads of invalid constants which will be turned
9769
     into loads from the minipool.  */
9770
  return (GET_CODE (rhs) == MEM
9771
          || GET_CODE (rhs) == SYMBOL_REF
9772
          || note_invalid_constants (insn, -1, false));
9773
}
9774
 
9775
/* Return TRUE if INSN is a Cirrus instruction.  */
9776
static bool
9777
arm_cirrus_insn_p (rtx insn)
9778
{
9779
  enum attr_cirrus attr;
9780
 
9781
  /* get_attr cannot accept USE or CLOBBER.  */
9782
  if (!insn
9783
      || GET_CODE (insn) != INSN
9784
      || GET_CODE (PATTERN (insn)) == USE
9785
      || GET_CODE (PATTERN (insn)) == CLOBBER)
9786
    return 0;
9787
 
9788
  attr = get_attr_cirrus (insn);
9789
 
9790
  return attr != CIRRUS_NOT;
9791
}
9792
 
9793
/* Cirrus reorg for invalid instruction combinations.  */
9794
static void
9795
cirrus_reorg (rtx first)
9796
{
9797
  enum attr_cirrus attr;
9798
  rtx body = PATTERN (first);
9799
  rtx t;
9800
  int nops;
9801
 
9802
  /* Any branch must be followed by 2 non Cirrus instructions.  */
9803
  if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9804
    {
9805
      nops = 0;
9806
      t = next_nonnote_insn (first);
9807
 
9808
      if (arm_cirrus_insn_p (t))
9809
        ++ nops;
9810
 
9811
      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9812
        ++ nops;
9813
 
9814
      while (nops --)
9815
        emit_insn_after (gen_nop (), first);
9816
 
9817
      return;
9818
    }
9819
 
9820
  /* (float (blah)) is in parallel with a clobber.  */
9821
  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9822
    body = XVECEXP (body, 0, 0);
9823
 
9824
  if (GET_CODE (body) == SET)
9825
    {
9826
      rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9827
 
9828
      /* cfldrd, cfldr64, cfstrd, cfstr64 must
9829
         be followed by a non Cirrus insn.  */
9830
      if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9831
        {
9832
          if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9833
            emit_insn_after (gen_nop (), first);
9834
 
9835
          return;
9836
        }
9837
      else if (arm_memory_load_p (first))
9838
        {
9839
          unsigned int arm_regno;
9840
 
9841
          /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9842
             ldr/cfmv64hr combination where the Rd field is the same
9843
             in both instructions must be split with a non Cirrus
9844
             insn.  Example:
9845
 
9846
             ldr r0, blah
9847
             nop
9848
             cfmvsr mvf0, r0.  */
9849
 
9850
          /* Get Arm register number for ldr insn.  */
9851
          if (GET_CODE (lhs) == REG)
9852
            arm_regno = REGNO (lhs);
9853
          else
9854
            {
9855
              gcc_assert (GET_CODE (rhs) == REG);
9856
              arm_regno = REGNO (rhs);
9857
            }
9858
 
9859
          /* Next insn.  */
9860
          first = next_nonnote_insn (first);
9861
 
9862
          if (! arm_cirrus_insn_p (first))
9863
            return;
9864
 
9865
          body = PATTERN (first);
9866
 
9867
          /* (float (blah)) is in parallel with a clobber.  */
9868
          if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9869
            body = XVECEXP (body, 0, 0);
9870
 
9871
          if (GET_CODE (body) == FLOAT)
9872
            body = XEXP (body, 0);
9873
 
9874
          if (get_attr_cirrus (first) == CIRRUS_MOVE
9875
              && GET_CODE (XEXP (body, 1)) == REG
9876
              && arm_regno == REGNO (XEXP (body, 1)))
9877
            emit_insn_after (gen_nop (), first);
9878
 
9879
          return;
9880
        }
9881
    }
9882
 
9883
  /* get_attr cannot accept USE or CLOBBER.  */
9884
  if (!first
9885
      || GET_CODE (first) != INSN
9886
      || GET_CODE (PATTERN (first)) == USE
9887
      || GET_CODE (PATTERN (first)) == CLOBBER)
9888
    return;
9889
 
9890
  attr = get_attr_cirrus (first);
9891
 
9892
  /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9893
     must be followed by a non-coprocessor instruction.  */
9894
  if (attr == CIRRUS_COMPARE)
9895
    {
9896
      nops = 0;
9897
 
9898
      t = next_nonnote_insn (first);
9899
 
9900
      if (arm_cirrus_insn_p (t))
9901
        ++ nops;
9902
 
9903
      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9904
        ++ nops;
9905
 
9906
      while (nops --)
9907
        emit_insn_after (gen_nop (), first);
9908
 
9909
      return;
9910
    }
9911
}
9912
 
9913
/* Return TRUE if X references a SYMBOL_REF.  */
9914
int
9915
symbol_mentioned_p (rtx x)
9916
{
9917
  const char * fmt;
9918
  int i;
9919
 
9920
  if (GET_CODE (x) == SYMBOL_REF)
9921
    return 1;
9922
 
9923
  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9924
     are constant offsets, not symbols.  */
9925
  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9926
    return 0;
9927
 
9928
  fmt = GET_RTX_FORMAT (GET_CODE (x));
9929
 
9930
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9931
    {
9932
      if (fmt[i] == 'E')
9933
        {
9934
          int j;
9935
 
9936
          for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9937
            if (symbol_mentioned_p (XVECEXP (x, i, j)))
9938
              return 1;
9939
        }
9940
      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9941
        return 1;
9942
    }
9943
 
9944
  return 0;
9945
}
9946
 
9947
/* Return TRUE if X references a LABEL_REF.  */
9948
int
9949
label_mentioned_p (rtx x)
9950
{
9951
  const char * fmt;
9952
  int i;
9953
 
9954
  if (GET_CODE (x) == LABEL_REF)
9955
    return 1;
9956
 
9957
  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9958
     instruction, but they are constant offsets, not symbols.  */
9959
  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9960
    return 0;
9961
 
9962
  fmt = GET_RTX_FORMAT (GET_CODE (x));
9963
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9964
    {
9965
      if (fmt[i] == 'E')
9966
        {
9967
          int j;
9968
 
9969
          for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9970
            if (label_mentioned_p (XVECEXP (x, i, j)))
9971
              return 1;
9972
        }
9973
      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9974
        return 1;
9975
    }
9976
 
9977
  return 0;
9978
}
9979
 
9980
int
9981
tls_mentioned_p (rtx x)
9982
{
9983
  switch (GET_CODE (x))
9984
    {
9985
    case CONST:
9986
      return tls_mentioned_p (XEXP (x, 0));
9987
 
9988
    case UNSPEC:
9989
      if (XINT (x, 1) == UNSPEC_TLS)
9990
        return 1;
9991
 
9992
    default:
9993
      return 0;
9994
    }
9995
}
9996
 
9997
/* Must not copy any rtx that uses a pc-relative address.  */
9998
 
9999
static int
10000
arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10001
{
10002
  if (GET_CODE (*x) == UNSPEC
10003
      && (XINT (*x, 1) == UNSPEC_PIC_BASE
10004
          || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10005
    return 1;
10006
  return 0;
10007
}
10008
 
10009
static bool
10010
arm_cannot_copy_insn_p (rtx insn)
10011
{
10012
  /* The tls call insn cannot be copied, as it is paired with a data
10013
     word.  */
10014
  if (recog_memoized (insn) == CODE_FOR_tlscall)
10015
    return true;
10016
 
10017
  return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10018
}
10019
 
10020
enum rtx_code
10021
minmax_code (rtx x)
10022
{
10023
  enum rtx_code code = GET_CODE (x);
10024
 
10025
  switch (code)
10026
    {
10027
    case SMAX:
10028
      return GE;
10029
    case SMIN:
10030
      return LE;
10031
    case UMIN:
10032
      return LEU;
10033
    case UMAX:
10034
      return GEU;
10035
    default:
10036
      gcc_unreachable ();
10037
    }
10038
}
10039
 
10040
/* Return 1 if memory locations are adjacent.  */
10041
int
10042
adjacent_mem_locations (rtx a, rtx b)
10043
{
10044
  /* We don't guarantee to preserve the order of these memory refs.  */
10045
  if (volatile_refs_p (a) || volatile_refs_p (b))
10046
    return 0;
10047
 
10048
  if ((GET_CODE (XEXP (a, 0)) == REG
10049
       || (GET_CODE (XEXP (a, 0)) == PLUS
10050
           && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10051
      && (GET_CODE (XEXP (b, 0)) == REG
10052
          || (GET_CODE (XEXP (b, 0)) == PLUS
10053
              && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10054
    {
10055
      HOST_WIDE_INT val0 = 0, val1 = 0;
10056
      rtx reg0, reg1;
10057
      int val_diff;
10058
 
10059
      if (GET_CODE (XEXP (a, 0)) == PLUS)
10060
        {
10061
          reg0 = XEXP (XEXP (a, 0), 0);
10062
          val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10063
        }
10064
      else
10065
        reg0 = XEXP (a, 0);
10066
 
10067
      if (GET_CODE (XEXP (b, 0)) == PLUS)
10068
        {
10069
          reg1 = XEXP (XEXP (b, 0), 0);
10070
          val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10071
        }
10072
      else
10073
        reg1 = XEXP (b, 0);
10074
 
10075
      /* Don't accept any offset that will require multiple
10076
         instructions to handle, since this would cause the
10077
         arith_adjacentmem pattern to output an overlong sequence.  */
10078
      if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10079
        return 0;
10080
 
10081
      /* Don't allow an eliminable register: register elimination can make
10082
         the offset too large.  */
10083
      if (arm_eliminable_register (reg0))
10084
        return 0;
10085
 
10086
      val_diff = val1 - val0;
10087
 
10088
      if (arm_ld_sched)
10089
        {
10090
          /* If the target has load delay slots, then there's no benefit
10091
             to using an ldm instruction unless the offset is zero and
10092
             we are optimizing for size.  */
10093
          return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10094
                  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10095
                  && (val_diff == 4 || val_diff == -4));
10096
        }
10097
 
10098
      return ((REGNO (reg0) == REGNO (reg1))
10099
              && (val_diff == 4 || val_diff == -4));
10100
    }
10101
 
10102
  return 0;
10103
}
10104
 
10105
/* Return true iff it would be profitable to turn a sequence of NOPS loads
10106
   or stores (depending on IS_STORE) into a load-multiple or store-multiple
10107
   instruction.  ADD_OFFSET is nonzero if the base address register needs
10108
   to be modified with an add instruction before we can use it.  */
10109
 
10110
static bool
10111
multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10112
                                 int nops, HOST_WIDE_INT add_offset)
10113
 {
10114
  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10115
     if the offset isn't small enough.  The reason 2 ldrs are faster
10116
     is because these ARMs are able to do more than one cache access
10117
     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
10118
     whilst the ARM8 has a double bandwidth cache.  This means that
10119
     these cores can do both an instruction fetch and a data fetch in
10120
     a single cycle, so the trick of calculating the address into a
10121
     scratch register (one of the result regs) and then doing a load
10122
     multiple actually becomes slower (and no smaller in code size).
10123
     That is the transformation
10124
 
10125
        ldr     rd1, [rbase + offset]
10126
        ldr     rd2, [rbase + offset + 4]
10127
 
10128
     to
10129
 
10130
        add     rd1, rbase, offset
10131
        ldmia   rd1, {rd1, rd2}
10132
 
10133
     produces worse code -- '3 cycles + any stalls on rd2' instead of
10134
     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
10135
     access per cycle, the first sequence could never complete in less
10136
     than 6 cycles, whereas the ldm sequence would only take 5 and
10137
     would make better use of sequential accesses if not hitting the
10138
     cache.
10139
 
10140
     We cheat here and test 'arm_ld_sched' which we currently know to
10141
     only be true for the ARM8, ARM9 and StrongARM.  If this ever
10142
     changes, then the test below needs to be reworked.  */
10143
  if (nops == 2 && arm_ld_sched && add_offset != 0)
10144
    return false;
10145
 
10146
  /* XScale has load-store double instructions, but they have stricter
10147
     alignment requirements than load-store multiple, so we cannot
10148
     use them.
10149
 
10150
     For XScale ldm requires 2 + NREGS cycles to complete and blocks
10151
     the pipeline until completion.
10152
 
10153
        NREGS           CYCLES
10154
          1               3
10155
          2               4
10156
          3               5
10157
          4               6
10158
 
10159
     An ldr instruction takes 1-3 cycles, but does not block the
10160
     pipeline.
10161
 
10162
        NREGS           CYCLES
10163
          1              1-3
10164
          2              2-6
10165
          3              3-9
10166
          4              4-12
10167
 
10168
     Best case ldr will always win.  However, the more ldr instructions
10169
     we issue, the less likely we are to be able to schedule them well.
10170
     Using ldr instructions also increases code size.
10171
 
10172
     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10173
     for counts of 3 or 4 regs.  */
10174
  if (nops <= 2 && arm_tune_xscale && !optimize_size)
10175
    return false;
10176
  return true;
10177
}
10178
 
10179
/* Subroutine of load_multiple_sequence and store_multiple_sequence.
10180
   Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10181
   an array ORDER which describes the sequence to use when accessing the
10182
   offsets that produces an ascending order.  In this sequence, each
10183
   offset must be larger by exactly 4 than the previous one.  ORDER[0]
10184
   must have been filled in with the lowest offset by the caller.
10185
   If UNSORTED_REGS is nonnull, it is an array of register numbers that
10186
   we use to verify that ORDER produces an ascending order of registers.
10187
   Return true if it was possible to construct such an order, false if
10188
   not.  */
10189
 
10190
static bool
10191
compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10192
                      int *unsorted_regs)
10193
{
10194
  int i;
10195
  for (i = 1; i < nops; i++)
10196
    {
10197
      int j;
10198
 
10199
      order[i] = order[i - 1];
10200
      for (j = 0; j < nops; j++)
10201
        if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10202
          {
10203
            /* We must find exactly one offset that is higher than the
10204
               previous one by 4.  */
10205
            if (order[i] != order[i - 1])
10206
              return false;
10207
            order[i] = j;
10208
          }
10209
      if (order[i] == order[i - 1])
10210
        return false;
10211
      /* The register numbers must be ascending.  */
10212
      if (unsorted_regs != NULL
10213
          && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10214
        return false;
10215
    }
10216
  return true;
10217
}
10218
 
10219
/* Used to determine in a peephole whether a sequence of load
10220
   instructions can be changed into a load-multiple instruction.
10221
   NOPS is the number of separate load instructions we are examining.  The
10222
   first NOPS entries in OPERANDS are the destination registers, the
10223
   next NOPS entries are memory operands.  If this function is
10224
   successful, *BASE is set to the common base register of the memory
10225
   accesses; *LOAD_OFFSET is set to the first memory location's offset
10226
   from that base register.
10227
   REGS is an array filled in with the destination register numbers.
10228
   SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10229
   insn numbers to an ascending order of stores.  If CHECK_REGS is true,
10230
   the sequence of registers in REGS matches the loads from ascending memory
10231
   locations, and the function verifies that the register numbers are
10232
   themselves ascending.  If CHECK_REGS is false, the register numbers
10233
   are stored in the order they are found in the operands.  */
10234
static int
10235
load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10236
                        int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10237
{
10238
  int unsorted_regs[MAX_LDM_STM_OPS];
10239
  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10240
  int order[MAX_LDM_STM_OPS];
10241
  rtx base_reg_rtx = NULL;
10242
  int base_reg = -1;
10243
  int i, ldm_case;
10244
 
10245
  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10246
     easily extended if required.  */
10247
  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10248
 
10249
  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10250
 
10251
  /* Loop over the operands and check that the memory references are
10252
     suitable (i.e. immediate offsets from the same base register).  At
10253
     the same time, extract the target register, and the memory
10254
     offsets.  */
10255
  for (i = 0; i < nops; i++)
10256
    {
10257
      rtx reg;
10258
      rtx offset;
10259
 
10260
      /* Convert a subreg of a mem into the mem itself.  */
10261
      if (GET_CODE (operands[nops + i]) == SUBREG)
10262
        operands[nops + i] = alter_subreg (operands + (nops + i));
10263
 
10264
      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10265
 
10266
      /* Don't reorder volatile memory references; it doesn't seem worth
10267
         looking for the case where the order is ok anyway.  */
10268
      if (MEM_VOLATILE_P (operands[nops + i]))
10269
        return 0;
10270
 
10271
      offset = const0_rtx;
10272
 
10273
      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10274
           || (GET_CODE (reg) == SUBREG
10275
               && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10276
          || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10277
              && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10278
                   == REG)
10279
                  || (GET_CODE (reg) == SUBREG
10280
                      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10281
              && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10282
                  == CONST_INT)))
10283
        {
10284
          if (i == 0)
10285
            {
10286
              base_reg = REGNO (reg);
10287
              base_reg_rtx = reg;
10288
              if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10289
                return 0;
10290
            }
10291
          else if (base_reg != (int) REGNO (reg))
10292
            /* Not addressed from the same base register.  */
10293
            return 0;
10294
 
10295
          unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10296
                              ? REGNO (operands[i])
10297
                              : REGNO (SUBREG_REG (operands[i])));
10298
 
10299
          /* If it isn't an integer register, or if it overwrites the
10300
             base register but isn't the last insn in the list, then
10301
             we can't do this.  */
10302
          if (unsorted_regs[i] < 0
10303
              || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10304
              || unsorted_regs[i] > 14
10305
              || (i != nops - 1 && unsorted_regs[i] == base_reg))
10306
            return 0;
10307
 
10308
          unsorted_offsets[i] = INTVAL (offset);
10309
          if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10310
            order[0] = i;
10311
        }
10312
      else
10313
        /* Not a suitable memory address.  */
10314
        return 0;
10315
    }
10316
 
10317
  /* All the useful information has now been extracted from the
10318
     operands into unsorted_regs and unsorted_offsets; additionally,
10319
     order[0] has been set to the lowest offset in the list.  Sort
10320
     the offsets into order, verifying that they are adjacent, and
10321
     check that the register numbers are ascending.  */
10322
  if (!compute_offset_order (nops, unsorted_offsets, order,
10323
                             check_regs ? unsorted_regs : NULL))
10324
    return 0;
10325
 
10326
  if (saved_order)
10327
    memcpy (saved_order, order, sizeof order);
10328
 
10329
  if (base)
10330
    {
10331
      *base = base_reg;
10332
 
10333
      for (i = 0; i < nops; i++)
10334
        regs[i] = unsorted_regs[check_regs ? order[i] : i];
10335
 
10336
      *load_offset = unsorted_offsets[order[0]];
10337
    }
10338
 
10339
  if (TARGET_THUMB1
10340
      && !peep2_reg_dead_p (nops, base_reg_rtx))
10341
    return 0;
10342
 
10343
  if (unsorted_offsets[order[0]] == 0)
10344
    ldm_case = 1; /* ldmia */
10345
  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10346
    ldm_case = 2; /* ldmib */
10347
  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10348
    ldm_case = 3; /* ldmda */
10349
  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10350
    ldm_case = 4; /* ldmdb */
10351
  else if (const_ok_for_arm (unsorted_offsets[order[0]])
10352
           || const_ok_for_arm (-unsorted_offsets[order[0]]))
10353
    ldm_case = 5;
10354
  else
10355
    return 0;
10356
 
10357
  if (!multiple_operation_profitable_p (false, nops,
10358
                                        ldm_case == 5
10359
                                        ? unsorted_offsets[order[0]] : 0))
10360
    return 0;
10361
 
10362
  return ldm_case;
10363
}
10364
 
10365
/* Used to determine in a peephole whether a sequence of store instructions can
10366
   be changed into a store-multiple instruction.
10367
   NOPS is the number of separate store instructions we are examining.
10368
   NOPS_TOTAL is the total number of instructions recognized by the peephole
10369
   pattern.
10370
   The first NOPS entries in OPERANDS are the source registers, the next
10371
   NOPS entries are memory operands.  If this function is successful, *BASE is
10372
   set to the common base register of the memory accesses; *LOAD_OFFSET is set
10373
   to the first memory location's offset from that base register.  REGS is an
10374
   array filled in with the source register numbers, REG_RTXS (if nonnull) is
10375
   likewise filled with the corresponding rtx's.
10376
   SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10377
   numbers to an ascending order of stores.
10378
   If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10379
   from ascending memory locations, and the function verifies that the register
10380
   numbers are themselves ascending.  If CHECK_REGS is false, the register
10381
   numbers are stored in the order they are found in the operands.  */
10382
static int
10383
store_multiple_sequence (rtx *operands, int nops, int nops_total,
10384
                         int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10385
                         HOST_WIDE_INT *load_offset, bool check_regs)
10386
{
10387
  int unsorted_regs[MAX_LDM_STM_OPS];
10388
  rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10389
  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10390
  int order[MAX_LDM_STM_OPS];
10391
  int base_reg = -1;
10392
  rtx base_reg_rtx = NULL;
10393
  int i, stm_case;
10394
 
10395
  /* Write back of base register is currently only supported for Thumb 1.  */
10396
  int base_writeback = TARGET_THUMB1;
10397
 
10398
  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10399
     easily extended if required.  */
10400
  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10401
 
10402
  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10403
 
10404
  /* Loop over the operands and check that the memory references are
10405
     suitable (i.e. immediate offsets from the same base register).  At
10406
     the same time, extract the target register, and the memory
10407
     offsets.  */
10408
  for (i = 0; i < nops; i++)
10409
    {
10410
      rtx reg;
10411
      rtx offset;
10412
 
10413
      /* Convert a subreg of a mem into the mem itself.  */
10414
      if (GET_CODE (operands[nops + i]) == SUBREG)
10415
        operands[nops + i] = alter_subreg (operands + (nops + i));
10416
 
10417
      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10418
 
10419
      /* Don't reorder volatile memory references; it doesn't seem worth
10420
         looking for the case where the order is ok anyway.  */
10421
      if (MEM_VOLATILE_P (operands[nops + i]))
10422
        return 0;
10423
 
10424
      offset = const0_rtx;
10425
 
10426
      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10427
           || (GET_CODE (reg) == SUBREG
10428
               && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10429
          || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10430
              && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10431
                   == REG)
10432
                  || (GET_CODE (reg) == SUBREG
10433
                      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10434
              && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10435
                  == CONST_INT)))
10436
        {
10437
          unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10438
                                  ? operands[i] : SUBREG_REG (operands[i]));
10439
          unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10440
 
10441
          if (i == 0)
10442
            {
10443
              base_reg = REGNO (reg);
10444
              base_reg_rtx = reg;
10445
              if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10446
                return 0;
10447
            }
10448
          else if (base_reg != (int) REGNO (reg))
10449
            /* Not addressed from the same base register.  */
10450
            return 0;
10451
 
10452
          /* If it isn't an integer register, then we can't do this.  */
10453
          if (unsorted_regs[i] < 0
10454
              || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10455
              /* The effects are unpredictable if the base register is
10456
                 both updated and stored.  */
10457
              || (base_writeback && unsorted_regs[i] == base_reg)
10458
              || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10459
              || unsorted_regs[i] > 14)
10460
            return 0;
10461
 
10462
          unsorted_offsets[i] = INTVAL (offset);
10463
          if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10464
            order[0] = i;
10465
        }
10466
      else
10467
        /* Not a suitable memory address.  */
10468
        return 0;
10469
    }
10470
 
10471
  /* All the useful information has now been extracted from the
10472
     operands into unsorted_regs and unsorted_offsets; additionally,
10473
     order[0] has been set to the lowest offset in the list.  Sort
10474
     the offsets into order, verifying that they are adjacent, and
10475
     check that the register numbers are ascending.  */
10476
  if (!compute_offset_order (nops, unsorted_offsets, order,
10477
                             check_regs ? unsorted_regs : NULL))
10478
    return 0;
10479
 
10480
  if (saved_order)
10481
    memcpy (saved_order, order, sizeof order);
10482
 
10483
  if (base)
10484
    {
10485
      *base = base_reg;
10486
 
10487
      for (i = 0; i < nops; i++)
10488
        {
10489
          regs[i] = unsorted_regs[check_regs ? order[i] : i];
10490
          if (reg_rtxs)
10491
            reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10492
        }
10493
 
10494
      *load_offset = unsorted_offsets[order[0]];
10495
    }
10496
 
10497
  if (TARGET_THUMB1
10498
      && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10499
    return 0;
10500
 
10501
  if (unsorted_offsets[order[0]] == 0)
10502
    stm_case = 1; /* stmia */
10503
  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10504
    stm_case = 2; /* stmib */
10505
  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10506
    stm_case = 3; /* stmda */
10507
  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10508
    stm_case = 4; /* stmdb */
10509
  else
10510
    return 0;
10511
 
10512
  if (!multiple_operation_profitable_p (false, nops, 0))
10513
    return 0;
10514
 
10515
  return stm_case;
10516
}
10517
 
10518
/* Routines for use in generating RTL.  */
10519
 
10520
/* Generate a load-multiple instruction.  COUNT is the number of loads in
10521
   the instruction; REGS and MEMS are arrays containing the operands.
10522
   BASEREG is the base register to be used in addressing the memory operands.
10523
   WBACK_OFFSET is nonzero if the instruction should update the base
10524
   register.  */
10525
 
10526
static rtx
10527
arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10528
                         HOST_WIDE_INT wback_offset)
10529
{
10530
  int i = 0, j;
10531
  rtx result;
10532
 
10533
  if (!multiple_operation_profitable_p (false, count, 0))
10534
    {
10535
      rtx seq;
10536
 
10537
      start_sequence ();
10538
 
10539
      for (i = 0; i < count; i++)
10540
        emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10541
 
10542
      if (wback_offset != 0)
10543
        emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10544
 
10545
      seq = get_insns ();
10546
      end_sequence ();
10547
 
10548
      return seq;
10549
    }
10550
 
10551
  result = gen_rtx_PARALLEL (VOIDmode,
10552
                             rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10553
  if (wback_offset != 0)
10554
    {
10555
      XVECEXP (result, 0, 0)
10556
        = gen_rtx_SET (VOIDmode, basereg,
10557
                       plus_constant (basereg, wback_offset));
10558
      i = 1;
10559
      count++;
10560
    }
10561
 
10562
  for (j = 0; i < count; i++, j++)
10563
    XVECEXP (result, 0, i)
10564
      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10565
 
10566
  return result;
10567
}
10568
 
10569
/* Generate a store-multiple instruction.  COUNT is the number of stores in
10570
   the instruction; REGS and MEMS are arrays containing the operands.
10571
   BASEREG is the base register to be used in addressing the memory operands.
10572
   WBACK_OFFSET is nonzero if the instruction should update the base
10573
   register.  */
10574
 
10575
static rtx
10576
arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10577
                          HOST_WIDE_INT wback_offset)
10578
{
10579
  int i = 0, j;
10580
  rtx result;
10581
 
10582
  if (GET_CODE (basereg) == PLUS)
10583
    basereg = XEXP (basereg, 0);
10584
 
10585
  if (!multiple_operation_profitable_p (false, count, 0))
10586
    {
10587
      rtx seq;
10588
 
10589
      start_sequence ();
10590
 
10591
      for (i = 0; i < count; i++)
10592
        emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10593
 
10594
      if (wback_offset != 0)
10595
        emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10596
 
10597
      seq = get_insns ();
10598
      end_sequence ();
10599
 
10600
      return seq;
10601
    }
10602
 
10603
  result = gen_rtx_PARALLEL (VOIDmode,
10604
                             rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10605
  if (wback_offset != 0)
10606
    {
10607
      XVECEXP (result, 0, 0)
10608
        = gen_rtx_SET (VOIDmode, basereg,
10609
                       plus_constant (basereg, wback_offset));
10610
      i = 1;
10611
      count++;
10612
    }
10613
 
10614
  for (j = 0; i < count; i++, j++)
10615
    XVECEXP (result, 0, i)
10616
      = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10617
 
10618
  return result;
10619
}
10620
 
10621
/* Generate either a load-multiple or a store-multiple instruction.  This
10622
   function can be used in situations where we can start with a single MEM
10623
   rtx and adjust its address upwards.
10624
   COUNT is the number of operations in the instruction, not counting a
10625
   possible update of the base register.  REGS is an array containing the
10626
   register operands.
10627
   BASEREG is the base register to be used in addressing the memory operands,
10628
   which are constructed from BASEMEM.
10629
   WRITE_BACK specifies whether the generated instruction should include an
10630
   update of the base register.
10631
   OFFSETP is used to pass an offset to and from this function; this offset
10632
   is not used when constructing the address (instead BASEMEM should have an
10633
   appropriate offset in its address), it is used only for setting
10634
   MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
10635
 
10636
static rtx
10637
arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10638
                     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10639
{
10640
  rtx mems[MAX_LDM_STM_OPS];
10641
  HOST_WIDE_INT offset = *offsetp;
10642
  int i;
10643
 
10644
  gcc_assert (count <= MAX_LDM_STM_OPS);
10645
 
10646
  if (GET_CODE (basereg) == PLUS)
10647
    basereg = XEXP (basereg, 0);
10648
 
10649
  for (i = 0; i < count; i++)
10650
    {
10651
      rtx addr = plus_constant (basereg, i * 4);
10652
      mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10653
      offset += 4;
10654
    }
10655
 
10656
  if (write_back)
10657
    *offsetp = offset;
10658
 
10659
  if (is_load)
10660
    return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10661
                                    write_back ? 4 * count : 0);
10662
  else
10663
    return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10664
                                     write_back ? 4 * count : 0);
10665
}
10666
 
10667
rtx
10668
arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10669
                       rtx basemem, HOST_WIDE_INT *offsetp)
10670
{
10671
  return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10672
                              offsetp);
10673
}
10674
 
10675
rtx
10676
arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10677
                        rtx basemem, HOST_WIDE_INT *offsetp)
10678
{
10679
  return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10680
                              offsetp);
10681
}
10682
 
10683
/* Called from a peephole2 expander to turn a sequence of loads into an
10684
   LDM instruction.  OPERANDS are the operands found by the peephole matcher;
10685
   NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
10686
   is true if we can reorder the registers because they are used commutatively
10687
   subsequently.
10688
   Returns true iff we could generate a new instruction.  */
10689
 
10690
bool
10691
gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10692
{
10693
  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10694
  rtx mems[MAX_LDM_STM_OPS];
10695
  int i, j, base_reg;
10696
  rtx base_reg_rtx;
10697
  HOST_WIDE_INT offset;
10698
  int write_back = FALSE;
10699
  int ldm_case;
10700
  rtx addr;
10701
 
10702
  ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10703
                                     &base_reg, &offset, !sort_regs);
10704
 
10705
  if (ldm_case == 0)
10706
    return false;
10707
 
10708
  if (sort_regs)
10709
    for (i = 0; i < nops - 1; i++)
10710
      for (j = i + 1; j < nops; j++)
10711
        if (regs[i] > regs[j])
10712
          {
10713
            int t = regs[i];
10714
            regs[i] = regs[j];
10715
            regs[j] = t;
10716
          }
10717
  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10718
 
10719
  if (TARGET_THUMB1)
10720
    {
10721
      gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10722
      gcc_assert (ldm_case == 1 || ldm_case == 5);
10723
      write_back = TRUE;
10724
    }
10725
 
10726
  if (ldm_case == 5)
10727
    {
10728
      rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10729
      emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10730
      offset = 0;
10731
      if (!TARGET_THUMB1)
10732
        {
10733
          base_reg = regs[0];
10734
          base_reg_rtx = newbase;
10735
        }
10736
    }
10737
 
10738
  for (i = 0; i < nops; i++)
10739
    {
10740
      addr = plus_constant (base_reg_rtx, offset + i * 4);
10741
      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10742
                                              SImode, addr, 0);
10743
    }
10744
  emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10745
                                      write_back ? offset + i * 4 : 0));
10746
  return true;
10747
}
10748
 
10749
/* Called from a peephole2 expander to turn a sequence of stores into an
10750
   STM instruction.  OPERANDS are the operands found by the peephole matcher;
10751
   NOPS indicates how many separate stores we are trying to combine.
10752
   Returns true iff we could generate a new instruction.  */
10753
 
10754
bool
10755
gen_stm_seq (rtx *operands, int nops)
10756
{
10757
  int i;
10758
  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10759
  rtx mems[MAX_LDM_STM_OPS];
10760
  int base_reg;
10761
  rtx base_reg_rtx;
10762
  HOST_WIDE_INT offset;
10763
  int write_back = FALSE;
10764
  int stm_case;
10765
  rtx addr;
10766
  bool base_reg_dies;
10767
 
10768
  stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10769
                                      mem_order, &base_reg, &offset, true);
10770
 
10771
  if (stm_case == 0)
10772
    return false;
10773
 
10774
  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10775
 
10776
  base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10777
  if (TARGET_THUMB1)
10778
    {
10779
      gcc_assert (base_reg_dies);
10780
      write_back = TRUE;
10781
    }
10782
 
10783
  if (stm_case == 5)
10784
    {
10785
      gcc_assert (base_reg_dies);
10786
      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10787
      offset = 0;
10788
    }
10789
 
10790
  addr = plus_constant (base_reg_rtx, offset);
10791
 
10792
  for (i = 0; i < nops; i++)
10793
    {
10794
      addr = plus_constant (base_reg_rtx, offset + i * 4);
10795
      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10796
                                              SImode, addr, 0);
10797
    }
10798
  emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10799
                                       write_back ? offset + i * 4 : 0));
10800
  return true;
10801
}
10802
 
10803
/* Called from a peephole2 expander to turn a sequence of stores that are
10804
   preceded by constant loads into an STM instruction.  OPERANDS are the
10805
   operands found by the peephole matcher; NOPS indicates how many
10806
   separate stores we are trying to combine; there are 2 * NOPS
10807
   instructions in the peephole.
10808
   Returns true iff we could generate a new instruction.  */
10809
 
10810
bool
10811
gen_const_stm_seq (rtx *operands, int nops)
10812
{
10813
  int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10814
  int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10815
  rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10816
  rtx mems[MAX_LDM_STM_OPS];
10817
  int base_reg;
10818
  rtx base_reg_rtx;
10819
  HOST_WIDE_INT offset;
10820
  int write_back = FALSE;
10821
  int stm_case;
10822
  rtx addr;
10823
  bool base_reg_dies;
10824
  int i, j;
10825
  HARD_REG_SET allocated;
10826
 
10827
  stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10828
                                      mem_order, &base_reg, &offset, false);
10829
 
10830
  if (stm_case == 0)
10831
    return false;
10832
 
10833
  memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10834
 
10835
  /* If the same register is used more than once, try to find a free
10836
     register.  */
10837
  CLEAR_HARD_REG_SET (allocated);
10838
  for (i = 0; i < nops; i++)
10839
    {
10840
      for (j = i + 1; j < nops; j++)
10841
        if (regs[i] == regs[j])
10842
          {
10843
            rtx t = peep2_find_free_register (0, nops * 2,
10844
                                              TARGET_THUMB1 ? "l" : "r",
10845
                                              SImode, &allocated);
10846
            if (t == NULL_RTX)
10847
              return false;
10848
            reg_rtxs[i] = t;
10849
            regs[i] = REGNO (t);
10850
          }
10851
    }
10852
 
10853
  /* Compute an ordering that maps the register numbers to an ascending
10854
     sequence.  */
10855
  reg_order[0] = 0;
10856
  for (i = 0; i < nops; i++)
10857
    if (regs[i] < regs[reg_order[0]])
10858
      reg_order[0] = i;
10859
 
10860
  for (i = 1; i < nops; i++)
10861
    {
10862
      int this_order = reg_order[i - 1];
10863
      for (j = 0; j < nops; j++)
10864
        if (regs[j] > regs[reg_order[i - 1]]
10865
            && (this_order == reg_order[i - 1]
10866
                || regs[j] < regs[this_order]))
10867
          this_order = j;
10868
      reg_order[i] = this_order;
10869
    }
10870
 
10871
  /* Ensure that registers that must be live after the instruction end
10872
     up with the correct value.  */
10873
  for (i = 0; i < nops; i++)
10874
    {
10875
      int this_order = reg_order[i];
10876
      if ((this_order != mem_order[i]
10877
           || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10878
          && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10879
        return false;
10880
    }
10881
 
10882
  /* Load the constants.  */
10883
  for (i = 0; i < nops; i++)
10884
    {
10885
      rtx op = operands[2 * nops + mem_order[i]];
10886
      sorted_regs[i] = regs[reg_order[i]];
10887
      emit_move_insn (reg_rtxs[reg_order[i]], op);
10888
    }
10889
 
10890
  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10891
 
10892
  base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10893
  if (TARGET_THUMB1)
10894
    {
10895
      gcc_assert (base_reg_dies);
10896
      write_back = TRUE;
10897
    }
10898
 
10899
  if (stm_case == 5)
10900
    {
10901
      gcc_assert (base_reg_dies);
10902
      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10903
      offset = 0;
10904
    }
10905
 
10906
  addr = plus_constant (base_reg_rtx, offset);
10907
 
10908
  for (i = 0; i < nops; i++)
10909
    {
10910
      addr = plus_constant (base_reg_rtx, offset + i * 4);
10911
      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10912
                                              SImode, addr, 0);
10913
    }
10914
  emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10915
                                       write_back ? offset + i * 4 : 0));
10916
  return true;
10917
}
10918
 
10919
/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10920
   unaligned copies on processors which support unaligned semantics for those
10921
   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
10922
   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10923
   An interleave factor of 1 (the minimum) will perform no interleaving.
10924
   Load/store multiple are used for aligned addresses where possible.  */
10925
 
10926
static void
10927
arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10928
                                   HOST_WIDE_INT length,
10929
                                   unsigned int interleave_factor)
10930
{
10931
  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10932
  int *regnos = XALLOCAVEC (int, interleave_factor);
10933
  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10934
  HOST_WIDE_INT i, j;
10935
  HOST_WIDE_INT remaining = length, words;
10936
  rtx halfword_tmp = NULL, byte_tmp = NULL;
10937
  rtx dst, src;
10938
  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10939
  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10940
  HOST_WIDE_INT srcoffset, dstoffset;
10941
  HOST_WIDE_INT src_autoinc, dst_autoinc;
10942
  rtx mem, addr;
10943
 
10944
  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10945
 
10946
  /* Use hard registers if we have aligned source or destination so we can use
10947
     load/store multiple with contiguous registers.  */
10948
  if (dst_aligned || src_aligned)
10949
    for (i = 0; i < interleave_factor; i++)
10950
      regs[i] = gen_rtx_REG (SImode, i);
10951
  else
10952
    for (i = 0; i < interleave_factor; i++)
10953
      regs[i] = gen_reg_rtx (SImode);
10954
 
10955
  dst = copy_addr_to_reg (XEXP (dstbase, 0));
10956
  src = copy_addr_to_reg (XEXP (srcbase, 0));
10957
 
10958
  srcoffset = dstoffset = 0;
10959
 
10960
  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10961
     For copying the last bytes we want to subtract this offset again.  */
10962
  src_autoinc = dst_autoinc = 0;
10963
 
10964
  for (i = 0; i < interleave_factor; i++)
10965
    regnos[i] = i;
10966
 
10967
  /* Copy BLOCK_SIZE_BYTES chunks.  */
10968
 
10969
  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10970
    {
10971
      /* Load words.  */
10972
      if (src_aligned && interleave_factor > 1)
10973
        {
10974
          emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10975
                                            TRUE, srcbase, &srcoffset));
10976
          src_autoinc += UNITS_PER_WORD * interleave_factor;
10977
        }
10978
      else
10979
        {
10980
          for (j = 0; j < interleave_factor; j++)
10981
            {
10982
              addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10983
                                         - src_autoinc);
10984
              mem = adjust_automodify_address (srcbase, SImode, addr,
10985
                                               srcoffset + j * UNITS_PER_WORD);
10986
              emit_insn (gen_unaligned_loadsi (regs[j], mem));
10987
            }
10988
          srcoffset += block_size_bytes;
10989
        }
10990
 
10991
      /* Store words.  */
10992
      if (dst_aligned && interleave_factor > 1)
10993
        {
10994
          emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10995
                                             TRUE, dstbase, &dstoffset));
10996
          dst_autoinc += UNITS_PER_WORD * interleave_factor;
10997
        }
10998
      else
10999
        {
11000
          for (j = 0; j < interleave_factor; j++)
11001
            {
11002
              addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
11003
                                         - dst_autoinc);
11004
              mem = adjust_automodify_address (dstbase, SImode, addr,
11005
                                               dstoffset + j * UNITS_PER_WORD);
11006
              emit_insn (gen_unaligned_storesi (mem, regs[j]));
11007
            }
11008
          dstoffset += block_size_bytes;
11009
        }
11010
 
11011
      remaining -= block_size_bytes;
11012
    }
11013
 
11014
  /* Copy any whole words left (note these aren't interleaved with any
11015
     subsequent halfword/byte load/stores in the interests of simplicity).  */
11016
 
11017
  words = remaining / UNITS_PER_WORD;
11018
 
11019
  gcc_assert (words < interleave_factor);
11020
 
11021
  if (src_aligned && words > 1)
11022
    {
11023
      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11024
                                        &srcoffset));
11025
      src_autoinc += UNITS_PER_WORD * words;
11026
    }
11027
  else
11028
    {
11029
      for (j = 0; j < words; j++)
11030
        {
11031
          addr = plus_constant (src,
11032
                                srcoffset + j * UNITS_PER_WORD - src_autoinc);
11033
          mem = adjust_automodify_address (srcbase, SImode, addr,
11034
                                           srcoffset + j * UNITS_PER_WORD);
11035
          emit_insn (gen_unaligned_loadsi (regs[j], mem));
11036
        }
11037
      srcoffset += words * UNITS_PER_WORD;
11038
    }
11039
 
11040
  if (dst_aligned && words > 1)
11041
    {
11042
      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11043
                                         &dstoffset));
11044
      dst_autoinc += words * UNITS_PER_WORD;
11045
    }
11046
  else
11047
    {
11048
      for (j = 0; j < words; j++)
11049
        {
11050
          addr = plus_constant (dst,
11051
                                dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11052
          mem = adjust_automodify_address (dstbase, SImode, addr,
11053
                                           dstoffset + j * UNITS_PER_WORD);
11054
          emit_insn (gen_unaligned_storesi (mem, regs[j]));
11055
        }
11056
      dstoffset += words * UNITS_PER_WORD;
11057
    }
11058
 
11059
  remaining -= words * UNITS_PER_WORD;
11060
 
11061
  gcc_assert (remaining < 4);
11062
 
11063
  /* Copy a halfword if necessary.  */
11064
 
11065
  if (remaining >= 2)
11066
    {
11067
      halfword_tmp = gen_reg_rtx (SImode);
11068
 
11069
      addr = plus_constant (src, srcoffset - src_autoinc);
11070
      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11071
      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11072
 
11073
      /* Either write out immediately, or delay until we've loaded the last
11074
         byte, depending on interleave factor.  */
11075
      if (interleave_factor == 1)
11076
        {
11077
          addr = plus_constant (dst, dstoffset - dst_autoinc);
11078
          mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11079
          emit_insn (gen_unaligned_storehi (mem,
11080
                       gen_lowpart (HImode, halfword_tmp)));
11081
          halfword_tmp = NULL;
11082
          dstoffset += 2;
11083
        }
11084
 
11085
      remaining -= 2;
11086
      srcoffset += 2;
11087
    }
11088
 
11089
  gcc_assert (remaining < 2);
11090
 
11091
  /* Copy last byte.  */
11092
 
11093
  if ((remaining & 1) != 0)
11094
    {
11095
      byte_tmp = gen_reg_rtx (SImode);
11096
 
11097
      addr = plus_constant (src, srcoffset - src_autoinc);
11098
      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11099
      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11100
 
11101
      if (interleave_factor == 1)
11102
        {
11103
          addr = plus_constant (dst, dstoffset - dst_autoinc);
11104
          mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11105
          emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11106
          byte_tmp = NULL;
11107
          dstoffset++;
11108
        }
11109
 
11110
      remaining--;
11111
      srcoffset++;
11112
    }
11113
 
11114
  /* Store last halfword if we haven't done so already.  */
11115
 
11116
  if (halfword_tmp)
11117
    {
11118
      addr = plus_constant (dst, dstoffset - dst_autoinc);
11119
      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11120
      emit_insn (gen_unaligned_storehi (mem,
11121
                   gen_lowpart (HImode, halfword_tmp)));
11122
      dstoffset += 2;
11123
    }
11124
 
11125
  /* Likewise for last byte.  */
11126
 
11127
  if (byte_tmp)
11128
    {
11129
      addr = plus_constant (dst, dstoffset - dst_autoinc);
11130
      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11131
      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11132
      dstoffset++;
11133
    }
11134
 
11135
  gcc_assert (remaining == 0 && srcoffset == dstoffset);
11136
}
11137
 
11138
/* From mips_adjust_block_mem:
11139
 
11140
   Helper function for doing a loop-based block operation on memory
11141
   reference MEM.  Each iteration of the loop will operate on LENGTH
11142
   bytes of MEM.
11143
 
11144
   Create a new base register for use within the loop and point it to
11145
   the start of MEM.  Create a new memory reference that uses this
11146
   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
11147
 
11148
static void
11149
arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11150
                      rtx *loop_mem)
11151
{
11152
  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11153
 
11154
  /* Although the new mem does not refer to a known location,
11155
     it does keep up to LENGTH bytes of alignment.  */
11156
  *loop_mem = change_address (mem, BLKmode, *loop_reg);
11157
  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11158
}
11159
 
11160
/* From mips_block_move_loop:
11161
 
11162
   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11163
   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
11164
   the memory regions do not overlap.  */
11165
 
11166
static void
11167
arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11168
                               unsigned int interleave_factor,
11169
                               HOST_WIDE_INT bytes_per_iter)
11170
{
11171
  rtx label, src_reg, dest_reg, final_src, test;
11172
  HOST_WIDE_INT leftover;
11173
 
11174
  leftover = length % bytes_per_iter;
11175
  length -= leftover;
11176
 
11177
  /* Create registers and memory references for use within the loop.  */
11178
  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11179
  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11180
 
11181
  /* Calculate the value that SRC_REG should have after the last iteration of
11182
     the loop.  */
11183
  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11184
                                   0, 0, OPTAB_WIDEN);
11185
 
11186
  /* Emit the start of the loop.  */
11187
  label = gen_label_rtx ();
11188
  emit_label (label);
11189
 
11190
  /* Emit the loop body.  */
11191
  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11192
                                     interleave_factor);
11193
 
11194
  /* Move on to the next block.  */
11195
  emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11196
  emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11197
 
11198
  /* Emit the loop condition.  */
11199
  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11200
  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11201
 
11202
  /* Mop up any left-over bytes.  */
11203
  if (leftover)
11204
    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11205
}
11206
 
11207
/* Emit a block move when either the source or destination is unaligned (not
11208
   aligned to a four-byte boundary).  This may need further tuning depending on
11209
   core type, optimize_size setting, etc.  */
11210
 
11211
static int
11212
arm_movmemqi_unaligned (rtx *operands)
11213
{
11214
  HOST_WIDE_INT length = INTVAL (operands[2]);
11215
 
11216
  if (optimize_size)
11217
    {
11218
      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11219
      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11220
      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11221
         size of code if optimizing for size.  We'll use ldm/stm if src_aligned
11222
         or dst_aligned though: allow more interleaving in those cases since the
11223
         resulting code can be smaller.  */
11224
      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11225
      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11226
 
11227
      if (length > 12)
11228
        arm_block_move_unaligned_loop (operands[0], operands[1], length,
11229
                                       interleave_factor, bytes_per_iter);
11230
      else
11231
        arm_block_move_unaligned_straight (operands[0], operands[1], length,
11232
                                           interleave_factor);
11233
    }
11234
  else
11235
    {
11236
      /* Note that the loop created by arm_block_move_unaligned_loop may be
11237
         subject to loop unrolling, which makes tuning this condition a little
11238
         redundant.  */
11239
      if (length > 32)
11240
        arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11241
      else
11242
        arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11243
    }
11244
 
11245
  return 1;
11246
}
11247
 
11248
int
11249
arm_gen_movmemqi (rtx *operands)
11250
{
11251
  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11252
  HOST_WIDE_INT srcoffset, dstoffset;
11253
  int i;
11254
  rtx src, dst, srcbase, dstbase;
11255
  rtx part_bytes_reg = NULL;
11256
  rtx mem;
11257
 
11258
  if (GET_CODE (operands[2]) != CONST_INT
11259
      || GET_CODE (operands[3]) != CONST_INT
11260
      || INTVAL (operands[2]) > 64)
11261
    return 0;
11262
 
11263
  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11264
    return arm_movmemqi_unaligned (operands);
11265
 
11266
  if (INTVAL (operands[3]) & 3)
11267
    return 0;
11268
 
11269
  dstbase = operands[0];
11270
  srcbase = operands[1];
11271
 
11272
  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11273
  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11274
 
11275
  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11276
  out_words_to_go = INTVAL (operands[2]) / 4;
11277
  last_bytes = INTVAL (operands[2]) & 3;
11278
  dstoffset = srcoffset = 0;
11279
 
11280
  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11281
    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11282
 
11283
  for (i = 0; in_words_to_go >= 2; i+=4)
11284
    {
11285
      if (in_words_to_go > 4)
11286
        emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11287
                                          TRUE, srcbase, &srcoffset));
11288
      else
11289
        emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11290
                                          src, FALSE, srcbase,
11291
                                          &srcoffset));
11292
 
11293
      if (out_words_to_go)
11294
        {
11295
          if (out_words_to_go > 4)
11296
            emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11297
                                               TRUE, dstbase, &dstoffset));
11298
          else if (out_words_to_go != 1)
11299
            emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11300
                                               out_words_to_go, dst,
11301
                                               (last_bytes == 0
11302
                                                ? FALSE : TRUE),
11303
                                               dstbase, &dstoffset));
11304
          else
11305
            {
11306
              mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11307
              emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11308
              if (last_bytes != 0)
11309
                {
11310
                  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11311
                  dstoffset += 4;
11312
                }
11313
            }
11314
        }
11315
 
11316
      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11317
      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11318
    }
11319
 
11320
  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
11321
  if (out_words_to_go)
11322
    {
11323
      rtx sreg;
11324
 
11325
      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11326
      sreg = copy_to_reg (mem);
11327
 
11328
      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11329
      emit_move_insn (mem, sreg);
11330
      in_words_to_go--;
11331
 
11332
      gcc_assert (!in_words_to_go);     /* Sanity check */
11333
    }
11334
 
11335
  if (in_words_to_go)
11336
    {
11337
      gcc_assert (in_words_to_go > 0);
11338
 
11339
      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11340
      part_bytes_reg = copy_to_mode_reg (SImode, mem);
11341
    }
11342
 
11343
  gcc_assert (!last_bytes || part_bytes_reg);
11344
 
11345
  if (BYTES_BIG_ENDIAN && last_bytes)
11346
    {
11347
      rtx tmp = gen_reg_rtx (SImode);
11348
 
11349
      /* The bytes we want are in the top end of the word.  */
11350
      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11351
                              GEN_INT (8 * (4 - last_bytes))));
11352
      part_bytes_reg = tmp;
11353
 
11354
      while (last_bytes)
11355
        {
11356
          mem = adjust_automodify_address (dstbase, QImode,
11357
                                           plus_constant (dst, last_bytes - 1),
11358
                                           dstoffset + last_bytes - 1);
11359
          emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11360
 
11361
          if (--last_bytes)
11362
            {
11363
              tmp = gen_reg_rtx (SImode);
11364
              emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11365
              part_bytes_reg = tmp;
11366
            }
11367
        }
11368
 
11369
    }
11370
  else
11371
    {
11372
      if (last_bytes > 1)
11373
        {
11374
          mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11375
          emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11376
          last_bytes -= 2;
11377
          if (last_bytes)
11378
            {
11379
              rtx tmp = gen_reg_rtx (SImode);
11380
              emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11381
              emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11382
              part_bytes_reg = tmp;
11383
              dstoffset += 2;
11384
            }
11385
        }
11386
 
11387
      if (last_bytes)
11388
        {
11389
          mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11390
          emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11391
        }
11392
    }
11393
 
11394
  return 1;
11395
}
11396
 
11397
/* Select a dominance comparison mode if possible for a test of the general
11398
   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
11399
   COND_OR == DOM_CC_X_AND_Y => (X && Y)
11400
   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11401
   COND_OR == DOM_CC_X_OR_Y => (X || Y)
11402
   In all cases OP will be either EQ or NE, but we don't need to know which
11403
   here.  If we are unable to support a dominance comparison we return
11404
   CC mode.  This will then fail to match for the RTL expressions that
11405
   generate this call.  */
11406
enum machine_mode
11407
arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11408
{
11409
  enum rtx_code cond1, cond2;
11410
  int swapped = 0;
11411
 
11412
  /* Currently we will probably get the wrong result if the individual
11413
     comparisons are not simple.  This also ensures that it is safe to
11414
     reverse a comparison if necessary.  */
11415
  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11416
       != CCmode)
11417
      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11418
          != CCmode))
11419
    return CCmode;
11420
 
11421
  /* The if_then_else variant of this tests the second condition if the
11422
     first passes, but is true if the first fails.  Reverse the first
11423
     condition to get a true "inclusive-or" expression.  */
11424
  if (cond_or == DOM_CC_NX_OR_Y)
11425
    cond1 = reverse_condition (cond1);
11426
 
11427
  /* If the comparisons are not equal, and one doesn't dominate the other,
11428
     then we can't do this.  */
11429
  if (cond1 != cond2
11430
      && !comparison_dominates_p (cond1, cond2)
11431
      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11432
    return CCmode;
11433
 
11434
  if (swapped)
11435
    {
11436
      enum rtx_code temp = cond1;
11437
      cond1 = cond2;
11438
      cond2 = temp;
11439
    }
11440
 
11441
  switch (cond1)
11442
    {
11443
    case EQ:
11444
      if (cond_or == DOM_CC_X_AND_Y)
11445
        return CC_DEQmode;
11446
 
11447
      switch (cond2)
11448
        {
11449
        case EQ: return CC_DEQmode;
11450
        case LE: return CC_DLEmode;
11451
        case LEU: return CC_DLEUmode;
11452
        case GE: return CC_DGEmode;
11453
        case GEU: return CC_DGEUmode;
11454
        default: gcc_unreachable ();
11455
        }
11456
 
11457
    case LT:
11458
      if (cond_or == DOM_CC_X_AND_Y)
11459
        return CC_DLTmode;
11460
 
11461
      switch (cond2)
11462
        {
11463
        case  LT:
11464
            return CC_DLTmode;
11465
        case LE:
11466
          return CC_DLEmode;
11467
        case NE:
11468
          return CC_DNEmode;
11469
        default:
11470
          gcc_unreachable ();
11471
        }
11472
 
11473
    case GT:
11474
      if (cond_or == DOM_CC_X_AND_Y)
11475
        return CC_DGTmode;
11476
 
11477
      switch (cond2)
11478
        {
11479
        case GT:
11480
          return CC_DGTmode;
11481
        case GE:
11482
          return CC_DGEmode;
11483
        case NE:
11484
          return CC_DNEmode;
11485
        default:
11486
          gcc_unreachable ();
11487
        }
11488
 
11489
    case LTU:
11490
      if (cond_or == DOM_CC_X_AND_Y)
11491
        return CC_DLTUmode;
11492
 
11493
      switch (cond2)
11494
        {
11495
        case LTU:
11496
          return CC_DLTUmode;
11497
        case LEU:
11498
          return CC_DLEUmode;
11499
        case NE:
11500
          return CC_DNEmode;
11501
        default:
11502
          gcc_unreachable ();
11503
        }
11504
 
11505
    case GTU:
11506
      if (cond_or == DOM_CC_X_AND_Y)
11507
        return CC_DGTUmode;
11508
 
11509
      switch (cond2)
11510
        {
11511
        case GTU:
11512
          return CC_DGTUmode;
11513
        case GEU:
11514
          return CC_DGEUmode;
11515
        case NE:
11516
          return CC_DNEmode;
11517
        default:
11518
          gcc_unreachable ();
11519
        }
11520
 
11521
    /* The remaining cases only occur when both comparisons are the
11522
       same.  */
11523
    case NE:
11524
      gcc_assert (cond1 == cond2);
11525
      return CC_DNEmode;
11526
 
11527
    case LE:
11528
      gcc_assert (cond1 == cond2);
11529
      return CC_DLEmode;
11530
 
11531
    case GE:
11532
      gcc_assert (cond1 == cond2);
11533
      return CC_DGEmode;
11534
 
11535
    case LEU:
11536
      gcc_assert (cond1 == cond2);
11537
      return CC_DLEUmode;
11538
 
11539
    case GEU:
11540
      gcc_assert (cond1 == cond2);
11541
      return CC_DGEUmode;
11542
 
11543
    default:
11544
      gcc_unreachable ();
11545
    }
11546
}
11547
 
11548
enum machine_mode
11549
arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11550
{
11551
  /* All floating point compares return CCFP if it is an equality
11552
     comparison, and CCFPE otherwise.  */
11553
  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11554
    {
11555
      switch (op)
11556
        {
11557
        case EQ:
11558
        case NE:
11559
        case UNORDERED:
11560
        case ORDERED:
11561
        case UNLT:
11562
        case UNLE:
11563
        case UNGT:
11564
        case UNGE:
11565
        case UNEQ:
11566
        case LTGT:
11567
          return CCFPmode;
11568
 
11569
        case LT:
11570
        case LE:
11571
        case GT:
11572
        case GE:
11573
          if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11574
            return CCFPmode;
11575
          return CCFPEmode;
11576
 
11577
        default:
11578
          gcc_unreachable ();
11579
        }
11580
    }
11581
 
11582
  /* A compare with a shifted operand.  Because of canonicalization, the
11583
     comparison will have to be swapped when we emit the assembler.  */
11584
  if (GET_MODE (y) == SImode
11585
      && (REG_P (y) || (GET_CODE (y) == SUBREG))
11586
      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11587
          || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11588
          || GET_CODE (x) == ROTATERT))
11589
    return CC_SWPmode;
11590
 
11591
  /* This operation is performed swapped, but since we only rely on the Z
11592
     flag we don't need an additional mode.  */
11593
  if (GET_MODE (y) == SImode
11594
      && (REG_P (y) || (GET_CODE (y) == SUBREG))
11595
      && GET_CODE (x) == NEG
11596
      && (op == EQ || op == NE))
11597
    return CC_Zmode;
11598
 
11599
  /* This is a special case that is used by combine to allow a
11600
     comparison of a shifted byte load to be split into a zero-extend
11601
     followed by a comparison of the shifted integer (only valid for
11602
     equalities and unsigned inequalities).  */
11603
  if (GET_MODE (x) == SImode
11604
      && GET_CODE (x) == ASHIFT
11605
      && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11606
      && GET_CODE (XEXP (x, 0)) == SUBREG
11607
      && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11608
      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11609
      && (op == EQ || op == NE
11610
          || op == GEU || op == GTU || op == LTU || op == LEU)
11611
      && GET_CODE (y) == CONST_INT)
11612
    return CC_Zmode;
11613
 
11614
  /* A construct for a conditional compare, if the false arm contains
11615
     0, then both conditions must be true, otherwise either condition
11616
     must be true.  Not all conditions are possible, so CCmode is
11617
     returned if it can't be done.  */
11618
  if (GET_CODE (x) == IF_THEN_ELSE
11619
      && (XEXP (x, 2) == const0_rtx
11620
          || XEXP (x, 2) == const1_rtx)
11621
      && COMPARISON_P (XEXP (x, 0))
11622
      && COMPARISON_P (XEXP (x, 1)))
11623
    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11624
                                         INTVAL (XEXP (x, 2)));
11625
 
11626
  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
11627
  if (GET_CODE (x) == AND
11628
      && (op == EQ || op == NE)
11629
      && COMPARISON_P (XEXP (x, 0))
11630
      && COMPARISON_P (XEXP (x, 1)))
11631
    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11632
                                         DOM_CC_X_AND_Y);
11633
 
11634
  if (GET_CODE (x) == IOR
11635
      && (op == EQ || op == NE)
11636
      && COMPARISON_P (XEXP (x, 0))
11637
      && COMPARISON_P (XEXP (x, 1)))
11638
    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11639
                                         DOM_CC_X_OR_Y);
11640
 
11641
  /* An operation (on Thumb) where we want to test for a single bit.
11642
     This is done by shifting that bit up into the top bit of a
11643
     scratch register; we can then branch on the sign bit.  */
11644
  if (TARGET_THUMB1
11645
      && GET_MODE (x) == SImode
11646
      && (op == EQ || op == NE)
11647
      && GET_CODE (x) == ZERO_EXTRACT
11648
      && XEXP (x, 1) == const1_rtx)
11649
    return CC_Nmode;
11650
 
11651
  /* An operation that sets the condition codes as a side-effect, the
11652
     V flag is not set correctly, so we can only use comparisons where
11653
     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
11654
     instead.)  */
11655
  /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
11656
  if (GET_MODE (x) == SImode
11657
      && y == const0_rtx
11658
      && (op == EQ || op == NE || op == LT || op == GE)
11659
      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11660
          || GET_CODE (x) == AND || GET_CODE (x) == IOR
11661
          || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11662
          || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11663
          || GET_CODE (x) == LSHIFTRT
11664
          || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11665
          || GET_CODE (x) == ROTATERT
11666
          || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11667
    return CC_NOOVmode;
11668
 
11669
  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11670
    return CC_Zmode;
11671
 
11672
  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11673
      && GET_CODE (x) == PLUS
11674
      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11675
    return CC_Cmode;
11676
 
11677
  if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11678
    {
11679
      /* To keep things simple, always use the Cirrus cfcmp64 if it is
11680
         available.  */
11681
      if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11682
        return CCmode;
11683
 
11684
      switch (op)
11685
        {
11686
        case EQ:
11687
        case NE:
11688
          /* A DImode comparison against zero can be implemented by
11689
             or'ing the two halves together.  */
11690
          if (y == const0_rtx)
11691
            return CC_Zmode;
11692
 
11693
          /* We can do an equality test in three Thumb instructions.  */
11694
          if (!TARGET_32BIT)
11695
            return CC_Zmode;
11696
 
11697
          /* FALLTHROUGH */
11698
 
11699
        case LTU:
11700
        case LEU:
11701
        case GTU:
11702
        case GEU:
11703
          /* DImode unsigned comparisons can be implemented by cmp +
11704
             cmpeq without a scratch register.  Not worth doing in
11705
             Thumb-2.  */
11706
          if (TARGET_32BIT)
11707
            return CC_CZmode;
11708
 
11709
          /* FALLTHROUGH */
11710
 
11711
        case LT:
11712
        case LE:
11713
        case GT:
11714
        case GE:
11715
          /* DImode signed and unsigned comparisons can be implemented
11716
             by cmp + sbcs with a scratch register, but that does not
11717
             set the Z flag - we must reverse GT/LE/GTU/LEU.  */
11718
          gcc_assert (op != EQ && op != NE);
11719
          return CC_NCVmode;
11720
 
11721
        default:
11722
          gcc_unreachable ();
11723
        }
11724
    }
11725
 
11726
  return CCmode;
11727
}
11728
 
11729
/* X and Y are two things to compare using CODE.  Emit the compare insn and
11730
   return the rtx for register 0 in the proper mode.  FP means this is a
11731
   floating point compare: I don't think that it is needed on the arm.  */
11732
rtx
11733
arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11734
{
11735
  enum machine_mode mode;
11736
  rtx cc_reg;
11737
  int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11738
 
11739
  /* We might have X as a constant, Y as a register because of the predicates
11740
     used for cmpdi.  If so, force X to a register here.  */
11741
  if (dimode_comparison && !REG_P (x))
11742
    x = force_reg (DImode, x);
11743
 
11744
  mode = SELECT_CC_MODE (code, x, y);
11745
  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11746
 
11747
  if (dimode_comparison
11748
      && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11749
      && mode != CC_CZmode)
11750
    {
11751
      rtx clobber, set;
11752
 
11753
      /* To compare two non-zero values for equality, XOR them and
11754
         then compare against zero.  Not used for ARM mode; there
11755
         CC_CZmode is cheaper.  */
11756
      if (mode == CC_Zmode && y != const0_rtx)
11757
        {
11758
          gcc_assert (!reload_completed);
11759
          x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11760
          y = const0_rtx;
11761
        }
11762
 
11763
      /* A scratch register is required.  */
11764
      if (reload_completed)
11765
        gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11766
      else
11767
        scratch = gen_rtx_SCRATCH (SImode);
11768
 
11769
      clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11770
      set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11771
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11772
    }
11773
  else
11774
    emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11775
 
11776
  return cc_reg;
11777
}
11778
 
11779
/* Generate a sequence of insns that will generate the correct return
11780
   address mask depending on the physical architecture that the program
11781
   is running on.  */
11782
rtx
11783
arm_gen_return_addr_mask (void)
11784
{
11785
  rtx reg = gen_reg_rtx (Pmode);
11786
 
11787
  emit_insn (gen_return_addr_mask (reg));
11788
  return reg;
11789
}
11790
 
11791
void
11792
arm_reload_in_hi (rtx *operands)
11793
{
11794
  rtx ref = operands[1];
11795
  rtx base, scratch;
11796
  HOST_WIDE_INT offset = 0;
11797
 
11798
  if (GET_CODE (ref) == SUBREG)
11799
    {
11800
      offset = SUBREG_BYTE (ref);
11801
      ref = SUBREG_REG (ref);
11802
    }
11803
 
11804
  if (GET_CODE (ref) == REG)
11805
    {
11806
      /* We have a pseudo which has been spilt onto the stack; there
11807
         are two cases here: the first where there is a simple
11808
         stack-slot replacement and a second where the stack-slot is
11809
         out of range, or is used as a subreg.  */
11810
      if (reg_equiv_mem (REGNO (ref)))
11811
        {
11812
          ref = reg_equiv_mem (REGNO (ref));
11813
          base = find_replacement (&XEXP (ref, 0));
11814
        }
11815
      else
11816
        /* The slot is out of range, or was dressed up in a SUBREG.  */
11817
        base = reg_equiv_address (REGNO (ref));
11818
    }
11819
  else
11820
    base = find_replacement (&XEXP (ref, 0));
11821
 
11822
  /* Handle the case where the address is too complex to be offset by 1.  */
11823
  if (GET_CODE (base) == MINUS
11824
      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11825
    {
11826
      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11827
 
11828
      emit_set_insn (base_plus, base);
11829
      base = base_plus;
11830
    }
11831
  else if (GET_CODE (base) == PLUS)
11832
    {
11833
      /* The addend must be CONST_INT, or we would have dealt with it above.  */
11834
      HOST_WIDE_INT hi, lo;
11835
 
11836
      offset += INTVAL (XEXP (base, 1));
11837
      base = XEXP (base, 0);
11838
 
11839
      /* Rework the address into a legal sequence of insns.  */
11840
      /* Valid range for lo is -4095 -> 4095 */
11841
      lo = (offset >= 0
11842
            ? (offset & 0xfff)
11843
            : -((-offset) & 0xfff));
11844
 
11845
      /* Corner case, if lo is the max offset then we would be out of range
11846
         once we have added the additional 1 below, so bump the msb into the
11847
         pre-loading insn(s).  */
11848
      if (lo == 4095)
11849
        lo &= 0x7ff;
11850
 
11851
      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11852
             ^ (HOST_WIDE_INT) 0x80000000)
11853
            - (HOST_WIDE_INT) 0x80000000);
11854
 
11855
      gcc_assert (hi + lo == offset);
11856
 
11857
      if (hi != 0)
11858
        {
11859
          rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11860
 
11861
          /* Get the base address; addsi3 knows how to handle constants
11862
             that require more than one insn.  */
11863
          emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11864
          base = base_plus;
11865
          offset = lo;
11866
        }
11867
    }
11868
 
11869
  /* Operands[2] may overlap operands[0] (though it won't overlap
11870
     operands[1]), that's why we asked for a DImode reg -- so we can
11871
     use the bit that does not overlap.  */
11872
  if (REGNO (operands[2]) == REGNO (operands[0]))
11873
    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11874
  else
11875
    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11876
 
11877
  emit_insn (gen_zero_extendqisi2 (scratch,
11878
                                   gen_rtx_MEM (QImode,
11879
                                                plus_constant (base,
11880
                                                               offset))));
11881
  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11882
                                   gen_rtx_MEM (QImode,
11883
                                                plus_constant (base,
11884
                                                               offset + 1))));
11885
  if (!BYTES_BIG_ENDIAN)
11886
    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11887
                   gen_rtx_IOR (SImode,
11888
                                gen_rtx_ASHIFT
11889
                                (SImode,
11890
                                 gen_rtx_SUBREG (SImode, operands[0], 0),
11891
                                 GEN_INT (8)),
11892
                                scratch));
11893
  else
11894
    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11895
                   gen_rtx_IOR (SImode,
11896
                                gen_rtx_ASHIFT (SImode, scratch,
11897
                                                GEN_INT (8)),
11898
                                gen_rtx_SUBREG (SImode, operands[0], 0)));
11899
}
11900
 
11901
/* Handle storing a half-word to memory during reload by synthesizing as two
11902
   byte stores.  Take care not to clobber the input values until after we
11903
   have moved them somewhere safe.  This code assumes that if the DImode
11904
   scratch in operands[2] overlaps either the input value or output address
11905
   in some way, then that value must die in this insn (we absolutely need
11906
   two scratch registers for some corner cases).  */
11907
void
11908
arm_reload_out_hi (rtx *operands)
11909
{
11910
  rtx ref = operands[0];
11911
  rtx outval = operands[1];
11912
  rtx base, scratch;
11913
  HOST_WIDE_INT offset = 0;
11914
 
11915
  if (GET_CODE (ref) == SUBREG)
11916
    {
11917
      offset = SUBREG_BYTE (ref);
11918
      ref = SUBREG_REG (ref);
11919
    }
11920
 
11921
  if (GET_CODE (ref) == REG)
11922
    {
11923
      /* We have a pseudo which has been spilt onto the stack; there
11924
         are two cases here: the first where there is a simple
11925
         stack-slot replacement and a second where the stack-slot is
11926
         out of range, or is used as a subreg.  */
11927
      if (reg_equiv_mem (REGNO (ref)))
11928
        {
11929
          ref = reg_equiv_mem (REGNO (ref));
11930
          base = find_replacement (&XEXP (ref, 0));
11931
        }
11932
      else
11933
        /* The slot is out of range, or was dressed up in a SUBREG.  */
11934
        base = reg_equiv_address (REGNO (ref));
11935
    }
11936
  else
11937
    base = find_replacement (&XEXP (ref, 0));
11938
 
11939
  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11940
 
11941
  /* Handle the case where the address is too complex to be offset by 1.  */
11942
  if (GET_CODE (base) == MINUS
11943
      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11944
    {
11945
      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11946
 
11947
      /* Be careful not to destroy OUTVAL.  */
11948
      if (reg_overlap_mentioned_p (base_plus, outval))
11949
        {
11950
          /* Updating base_plus might destroy outval, see if we can
11951
             swap the scratch and base_plus.  */
11952
          if (!reg_overlap_mentioned_p (scratch, outval))
11953
            {
11954
              rtx tmp = scratch;
11955
              scratch = base_plus;
11956
              base_plus = tmp;
11957
            }
11958
          else
11959
            {
11960
              rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11961
 
11962
              /* Be conservative and copy OUTVAL into the scratch now,
11963
                 this should only be necessary if outval is a subreg
11964
                 of something larger than a word.  */
11965
              /* XXX Might this clobber base?  I can't see how it can,
11966
                 since scratch is known to overlap with OUTVAL, and
11967
                 must be wider than a word.  */
11968
              emit_insn (gen_movhi (scratch_hi, outval));
11969
              outval = scratch_hi;
11970
            }
11971
        }
11972
 
11973
      emit_set_insn (base_plus, base);
11974
      base = base_plus;
11975
    }
11976
  else if (GET_CODE (base) == PLUS)
11977
    {
11978
      /* The addend must be CONST_INT, or we would have dealt with it above.  */
11979
      HOST_WIDE_INT hi, lo;
11980
 
11981
      offset += INTVAL (XEXP (base, 1));
11982
      base = XEXP (base, 0);
11983
 
11984
      /* Rework the address into a legal sequence of insns.  */
11985
      /* Valid range for lo is -4095 -> 4095 */
11986
      lo = (offset >= 0
11987
            ? (offset & 0xfff)
11988
            : -((-offset) & 0xfff));
11989
 
11990
      /* Corner case, if lo is the max offset then we would be out of range
11991
         once we have added the additional 1 below, so bump the msb into the
11992
         pre-loading insn(s).  */
11993
      if (lo == 4095)
11994
        lo &= 0x7ff;
11995
 
11996
      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11997
             ^ (HOST_WIDE_INT) 0x80000000)
11998
            - (HOST_WIDE_INT) 0x80000000);
11999
 
12000
      gcc_assert (hi + lo == offset);
12001
 
12002
      if (hi != 0)
12003
        {
12004
          rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12005
 
12006
          /* Be careful not to destroy OUTVAL.  */
12007
          if (reg_overlap_mentioned_p (base_plus, outval))
12008
            {
12009
              /* Updating base_plus might destroy outval, see if we
12010
                 can swap the scratch and base_plus.  */
12011
              if (!reg_overlap_mentioned_p (scratch, outval))
12012
                {
12013
                  rtx tmp = scratch;
12014
                  scratch = base_plus;
12015
                  base_plus = tmp;
12016
                }
12017
              else
12018
                {
12019
                  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12020
 
12021
                  /* Be conservative and copy outval into scratch now,
12022
                     this should only be necessary if outval is a
12023
                     subreg of something larger than a word.  */
12024
                  /* XXX Might this clobber base?  I can't see how it
12025
                     can, since scratch is known to overlap with
12026
                     outval.  */
12027
                  emit_insn (gen_movhi (scratch_hi, outval));
12028
                  outval = scratch_hi;
12029
                }
12030
            }
12031
 
12032
          /* Get the base address; addsi3 knows how to handle constants
12033
             that require more than one insn.  */
12034
          emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12035
          base = base_plus;
12036
          offset = lo;
12037
        }
12038
    }
12039
 
12040
  if (BYTES_BIG_ENDIAN)
12041
    {
12042
      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12043
                                         plus_constant (base, offset + 1)),
12044
                            gen_lowpart (QImode, outval)));
12045
      emit_insn (gen_lshrsi3 (scratch,
12046
                              gen_rtx_SUBREG (SImode, outval, 0),
12047
                              GEN_INT (8)));
12048
      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12049
                            gen_lowpart (QImode, scratch)));
12050
    }
12051
  else
12052
    {
12053
      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12054
                            gen_lowpart (QImode, outval)));
12055
      emit_insn (gen_lshrsi3 (scratch,
12056
                              gen_rtx_SUBREG (SImode, outval, 0),
12057
                              GEN_INT (8)));
12058
      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12059
                                         plus_constant (base, offset + 1)),
12060
                            gen_lowpart (QImode, scratch)));
12061
    }
12062
}
12063
 
12064
/* Return true if a type must be passed in memory. For AAPCS, small aggregates
12065
   (padded to the size of a word) should be passed in a register.  */
12066
 
12067
static bool
12068
arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12069
{
12070
  if (TARGET_AAPCS_BASED)
12071
    return must_pass_in_stack_var_size (mode, type);
12072
  else
12073
    return must_pass_in_stack_var_size_or_pad (mode, type);
12074
}
12075
 
12076
 
12077
/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12078
   Return true if an argument passed on the stack should be padded upwards,
12079
   i.e. if the least-significant byte has useful data.
12080
   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
12081
   aggregate types are placed in the lowest memory address.  */
12082
 
12083
bool
12084
arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12085
{
12086
  if (!TARGET_AAPCS_BASED)
12087
    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12088
 
12089
  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12090
    return false;
12091
 
12092
  return true;
12093
}
12094
 
12095
 
12096
/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12097
   Return !BYTES_BIG_ENDIAN if the least significant byte of the
12098
   register has useful data, and return the opposite if the most
12099
   significant byte does.  */
12100
 
12101
bool
12102
arm_pad_reg_upward (enum machine_mode mode,
12103
                    tree type, int first ATTRIBUTE_UNUSED)
12104
{
12105
  if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12106
    {
12107
      /* For AAPCS, small aggregates, small fixed-point types,
12108
         and small complex types are always padded upwards.  */
12109
      if (type)
12110
        {
12111
          if ((AGGREGATE_TYPE_P (type)
12112
               || TREE_CODE (type) == COMPLEX_TYPE
12113
               || FIXED_POINT_TYPE_P (type))
12114
              && int_size_in_bytes (type) <= 4)
12115
            return true;
12116
        }
12117
      else
12118
        {
12119
          if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12120
              && GET_MODE_SIZE (mode) <= 4)
12121
            return true;
12122
        }
12123
    }
12124
 
12125
  /* Otherwise, use default padding.  */
12126
  return !BYTES_BIG_ENDIAN;
12127
}
12128
 
12129
 
12130
/* Print a symbolic form of X to the debug file, F.  */
12131
static void
12132
arm_print_value (FILE *f, rtx x)
12133
{
12134
  switch (GET_CODE (x))
12135
    {
12136
    case CONST_INT:
12137
      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12138
      return;
12139
 
12140
    case CONST_DOUBLE:
12141
      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12142
      return;
12143
 
12144
    case CONST_VECTOR:
12145
      {
12146
        int i;
12147
 
12148
        fprintf (f, "<");
12149
        for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12150
          {
12151
            fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12152
            if (i < (CONST_VECTOR_NUNITS (x) - 1))
12153
              fputc (',', f);
12154
          }
12155
        fprintf (f, ">");
12156
      }
12157
      return;
12158
 
12159
    case CONST_STRING:
12160
      fprintf (f, "\"%s\"", XSTR (x, 0));
12161
      return;
12162
 
12163
    case SYMBOL_REF:
12164
      fprintf (f, "`%s'", XSTR (x, 0));
12165
      return;
12166
 
12167
    case LABEL_REF:
12168
      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12169
      return;
12170
 
12171
    case CONST:
12172
      arm_print_value (f, XEXP (x, 0));
12173
      return;
12174
 
12175
    case PLUS:
12176
      arm_print_value (f, XEXP (x, 0));
12177
      fprintf (f, "+");
12178
      arm_print_value (f, XEXP (x, 1));
12179
      return;
12180
 
12181
    case PC:
12182
      fprintf (f, "pc");
12183
      return;
12184
 
12185
    default:
12186
      fprintf (f, "????");
12187
      return;
12188
    }
12189
}
12190
 
12191
/* Routines for manipulation of the constant pool.  */
12192
 
12193
/* Arm instructions cannot load a large constant directly into a
12194
   register; they have to come from a pc relative load.  The constant
12195
   must therefore be placed in the addressable range of the pc
12196
   relative load.  Depending on the precise pc relative load
12197
   instruction the range is somewhere between 256 bytes and 4k.  This
12198
   means that we often have to dump a constant inside a function, and
12199
   generate code to branch around it.
12200
 
12201
   It is important to minimize this, since the branches will slow
12202
   things down and make the code larger.
12203
 
12204
   Normally we can hide the table after an existing unconditional
12205
   branch so that there is no interruption of the flow, but in the
12206
   worst case the code looks like this:
12207
 
12208
        ldr     rn, L1
12209
        ...
12210
        b       L2
12211
        align
12212
        L1:     .long value
12213
        L2:
12214
        ...
12215
 
12216
        ldr     rn, L3
12217
        ...
12218
        b       L4
12219
        align
12220
        L3:     .long value
12221
        L4:
12222
        ...
12223
 
12224
   We fix this by performing a scan after scheduling, which notices
12225
   which instructions need to have their operands fetched from the
12226
   constant table and builds the table.
12227
 
12228
   The algorithm starts by building a table of all the constants that
12229
   need fixing up and all the natural barriers in the function (places
12230
   where a constant table can be dropped without breaking the flow).
12231
   For each fixup we note how far the pc-relative replacement will be
12232
   able to reach and the offset of the instruction into the function.
12233
 
12234
   Having built the table we then group the fixes together to form
12235
   tables that are as large as possible (subject to addressing
12236
   constraints) and emit each table of constants after the last
12237
   barrier that is within range of all the instructions in the group.
12238
   If a group does not contain a barrier, then we forcibly create one
12239
   by inserting a jump instruction into the flow.  Once the table has
12240
   been inserted, the insns are then modified to reference the
12241
   relevant entry in the pool.
12242
 
12243
   Possible enhancements to the algorithm (not implemented) are:
12244
 
12245
   1) For some processors and object formats, there may be benefit in
12246
   aligning the pools to the start of cache lines; this alignment
12247
   would need to be taken into account when calculating addressability
12248
   of a pool.  */
12249
 
12250
/* These typedefs are located at the start of this file, so that
12251
   they can be used in the prototypes there.  This comment is to
12252
   remind readers of that fact so that the following structures
12253
   can be understood more easily.
12254
 
12255
     typedef struct minipool_node    Mnode;
12256
     typedef struct minipool_fixup   Mfix;  */
12257
 
12258
struct minipool_node
12259
{
12260
  /* Doubly linked chain of entries.  */
12261
  Mnode * next;
12262
  Mnode * prev;
12263
  /* The maximum offset into the code that this entry can be placed.  While
12264
     pushing fixes for forward references, all entries are sorted in order
12265
     of increasing max_address.  */
12266
  HOST_WIDE_INT max_address;
12267
  /* Similarly for an entry inserted for a backwards ref.  */
12268
  HOST_WIDE_INT min_address;
12269
  /* The number of fixes referencing this entry.  This can become zero
12270
     if we "unpush" an entry.  In this case we ignore the entry when we
12271
     come to emit the code.  */
12272
  int refcount;
12273
  /* The offset from the start of the minipool.  */
12274
  HOST_WIDE_INT offset;
12275
  /* The value in table.  */
12276
  rtx value;
12277
  /* The mode of value.  */
12278
  enum machine_mode mode;
12279
  /* The size of the value.  With iWMMXt enabled
12280
     sizes > 4 also imply an alignment of 8-bytes.  */
12281
  int fix_size;
12282
};
12283
 
12284
struct minipool_fixup
12285
{
12286
  Mfix *            next;
12287
  rtx               insn;
12288
  HOST_WIDE_INT     address;
12289
  rtx *             loc;
12290
  enum machine_mode mode;
12291
  int               fix_size;
12292
  rtx               value;
12293
  Mnode *           minipool;
12294
  HOST_WIDE_INT     forwards;
12295
  HOST_WIDE_INT     backwards;
12296
};
12297
 
12298
/* Fixes less than a word need padding out to a word boundary.  */
12299
#define MINIPOOL_FIX_SIZE(mode) \
12300
  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12301
 
12302
static Mnode *  minipool_vector_head;
12303
static Mnode *  minipool_vector_tail;
12304
static rtx      minipool_vector_label;
12305
static int      minipool_pad;
12306
 
12307
/* The linked list of all minipool fixes required for this function.  */
12308
Mfix *          minipool_fix_head;
12309
Mfix *          minipool_fix_tail;
12310
/* The fix entry for the current minipool, once it has been placed.  */
12311
Mfix *          minipool_barrier;
12312
 
12313
/* Determines if INSN is the start of a jump table.  Returns the end
12314
   of the TABLE or NULL_RTX.  */
12315
static rtx
12316
is_jump_table (rtx insn)
12317
{
12318
  rtx table;
12319
 
12320
  if (jump_to_label_p (insn)
12321
      && ((table = next_real_insn (JUMP_LABEL (insn)))
12322
          == next_real_insn (insn))
12323
      && table != NULL
12324
      && GET_CODE (table) == JUMP_INSN
12325
      && (GET_CODE (PATTERN (table)) == ADDR_VEC
12326
          || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12327
    return table;
12328
 
12329
  return NULL_RTX;
12330
}
12331
 
12332
#ifndef JUMP_TABLES_IN_TEXT_SECTION
12333
#define JUMP_TABLES_IN_TEXT_SECTION 0
12334
#endif
12335
 
12336
static HOST_WIDE_INT
12337
get_jump_table_size (rtx insn)
12338
{
12339
  /* ADDR_VECs only take room if read-only data does into the text
12340
     section.  */
12341
  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12342
    {
12343
      rtx body = PATTERN (insn);
12344
      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12345
      HOST_WIDE_INT size;
12346
      HOST_WIDE_INT modesize;
12347
 
12348
      modesize = GET_MODE_SIZE (GET_MODE (body));
12349
      size = modesize * XVECLEN (body, elt);
12350
      switch (modesize)
12351
        {
12352
        case 1:
12353
          /* Round up size  of TBB table to a halfword boundary.  */
12354
          size = (size + 1) & ~(HOST_WIDE_INT)1;
12355
          break;
12356
        case 2:
12357
          /* No padding necessary for TBH.  */
12358
          break;
12359
        case 4:
12360
          /* Add two bytes for alignment on Thumb.  */
12361
          if (TARGET_THUMB)
12362
            size += 2;
12363
          break;
12364
        default:
12365
          gcc_unreachable ();
12366
        }
12367
      return size;
12368
    }
12369
 
12370
  return 0;
12371
}
12372
 
12373
/* Return the maximum amount of padding that will be inserted before
12374
   label LABEL.  */
12375
 
12376
static HOST_WIDE_INT
12377
get_label_padding (rtx label)
12378
{
12379
  HOST_WIDE_INT align, min_insn_size;
12380
 
12381
  align = 1 << label_to_alignment (label);
12382
  min_insn_size = TARGET_THUMB ? 2 : 4;
12383
  return align > min_insn_size ? align - min_insn_size : 0;
12384
}
12385
 
12386
/* Move a minipool fix MP from its current location to before MAX_MP.
12387
   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12388
   constraints may need updating.  */
12389
static Mnode *
12390
move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12391
                               HOST_WIDE_INT max_address)
12392
{
12393
  /* The code below assumes these are different.  */
12394
  gcc_assert (mp != max_mp);
12395
 
12396
  if (max_mp == NULL)
12397
    {
12398
      if (max_address < mp->max_address)
12399
        mp->max_address = max_address;
12400
    }
12401
  else
12402
    {
12403
      if (max_address > max_mp->max_address - mp->fix_size)
12404
        mp->max_address = max_mp->max_address - mp->fix_size;
12405
      else
12406
        mp->max_address = max_address;
12407
 
12408
      /* Unlink MP from its current position.  Since max_mp is non-null,
12409
       mp->prev must be non-null.  */
12410
      mp->prev->next = mp->next;
12411
      if (mp->next != NULL)
12412
        mp->next->prev = mp->prev;
12413
      else
12414
        minipool_vector_tail = mp->prev;
12415
 
12416
      /* Re-insert it before MAX_MP.  */
12417
      mp->next = max_mp;
12418
      mp->prev = max_mp->prev;
12419
      max_mp->prev = mp;
12420
 
12421
      if (mp->prev != NULL)
12422
        mp->prev->next = mp;
12423
      else
12424
        minipool_vector_head = mp;
12425
    }
12426
 
12427
  /* Save the new entry.  */
12428
  max_mp = mp;
12429
 
12430
  /* Scan over the preceding entries and adjust their addresses as
12431
     required.  */
12432
  while (mp->prev != NULL
12433
         && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12434
    {
12435
      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12436
      mp = mp->prev;
12437
    }
12438
 
12439
  return max_mp;
12440
}
12441
 
12442
/* Add a constant to the minipool for a forward reference.  Returns the
12443
   node added or NULL if the constant will not fit in this pool.  */
12444
static Mnode *
12445
add_minipool_forward_ref (Mfix *fix)
12446
{
12447
  /* If set, max_mp is the first pool_entry that has a lower
12448
     constraint than the one we are trying to add.  */
12449
  Mnode *       max_mp = NULL;
12450
  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12451
  Mnode *       mp;
12452
 
12453
  /* If the minipool starts before the end of FIX->INSN then this FIX
12454
     can not be placed into the current pool.  Furthermore, adding the
12455
     new constant pool entry may cause the pool to start FIX_SIZE bytes
12456
     earlier.  */
12457
  if (minipool_vector_head &&
12458
      (fix->address + get_attr_length (fix->insn)
12459
       >= minipool_vector_head->max_address - fix->fix_size))
12460
    return NULL;
12461
 
12462
  /* Scan the pool to see if a constant with the same value has
12463
     already been added.  While we are doing this, also note the
12464
     location where we must insert the constant if it doesn't already
12465
     exist.  */
12466
  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12467
    {
12468
      if (GET_CODE (fix->value) == GET_CODE (mp->value)
12469
          && fix->mode == mp->mode
12470
          && (GET_CODE (fix->value) != CODE_LABEL
12471
              || (CODE_LABEL_NUMBER (fix->value)
12472
                  == CODE_LABEL_NUMBER (mp->value)))
12473
          && rtx_equal_p (fix->value, mp->value))
12474
        {
12475
          /* More than one fix references this entry.  */
12476
          mp->refcount++;
12477
          return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12478
        }
12479
 
12480
      /* Note the insertion point if necessary.  */
12481
      if (max_mp == NULL
12482
          && mp->max_address > max_address)
12483
        max_mp = mp;
12484
 
12485
      /* If we are inserting an 8-bytes aligned quantity and
12486
         we have not already found an insertion point, then
12487
         make sure that all such 8-byte aligned quantities are
12488
         placed at the start of the pool.  */
12489
      if (ARM_DOUBLEWORD_ALIGN
12490
          && max_mp == NULL
12491
          && fix->fix_size >= 8
12492
          && mp->fix_size < 8)
12493
        {
12494
          max_mp = mp;
12495
          max_address = mp->max_address;
12496
        }
12497
    }
12498
 
12499
  /* The value is not currently in the minipool, so we need to create
12500
     a new entry for it.  If MAX_MP is NULL, the entry will be put on
12501
     the end of the list since the placement is less constrained than
12502
     any existing entry.  Otherwise, we insert the new fix before
12503
     MAX_MP and, if necessary, adjust the constraints on the other
12504
     entries.  */
12505
  mp = XNEW (Mnode);
12506
  mp->fix_size = fix->fix_size;
12507
  mp->mode = fix->mode;
12508
  mp->value = fix->value;
12509
  mp->refcount = 1;
12510
  /* Not yet required for a backwards ref.  */
12511
  mp->min_address = -65536;
12512
 
12513
  if (max_mp == NULL)
12514
    {
12515
      mp->max_address = max_address;
12516
      mp->next = NULL;
12517
      mp->prev = minipool_vector_tail;
12518
 
12519
      if (mp->prev == NULL)
12520
        {
12521
          minipool_vector_head = mp;
12522
          minipool_vector_label = gen_label_rtx ();
12523
        }
12524
      else
12525
        mp->prev->next = mp;
12526
 
12527
      minipool_vector_tail = mp;
12528
    }
12529
  else
12530
    {
12531
      if (max_address > max_mp->max_address - mp->fix_size)
12532
        mp->max_address = max_mp->max_address - mp->fix_size;
12533
      else
12534
        mp->max_address = max_address;
12535
 
12536
      mp->next = max_mp;
12537
      mp->prev = max_mp->prev;
12538
      max_mp->prev = mp;
12539
      if (mp->prev != NULL)
12540
        mp->prev->next = mp;
12541
      else
12542
        minipool_vector_head = mp;
12543
    }
12544
 
12545
  /* Save the new entry.  */
12546
  max_mp = mp;
12547
 
12548
  /* Scan over the preceding entries and adjust their addresses as
12549
     required.  */
12550
  while (mp->prev != NULL
12551
         && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12552
    {
12553
      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12554
      mp = mp->prev;
12555
    }
12556
 
12557
  return max_mp;
12558
}
12559
 
12560
static Mnode *
12561
move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12562
                                HOST_WIDE_INT  min_address)
12563
{
12564
  HOST_WIDE_INT offset;
12565
 
12566
  /* The code below assumes these are different.  */
12567
  gcc_assert (mp != min_mp);
12568
 
12569
  if (min_mp == NULL)
12570
    {
12571
      if (min_address > mp->min_address)
12572
        mp->min_address = min_address;
12573
    }
12574
  else
12575
    {
12576
      /* We will adjust this below if it is too loose.  */
12577
      mp->min_address = min_address;
12578
 
12579
      /* Unlink MP from its current position.  Since min_mp is non-null,
12580
         mp->next must be non-null.  */
12581
      mp->next->prev = mp->prev;
12582
      if (mp->prev != NULL)
12583
        mp->prev->next = mp->next;
12584
      else
12585
        minipool_vector_head = mp->next;
12586
 
12587
      /* Reinsert it after MIN_MP.  */
12588
      mp->prev = min_mp;
12589
      mp->next = min_mp->next;
12590
      min_mp->next = mp;
12591
      if (mp->next != NULL)
12592
        mp->next->prev = mp;
12593
      else
12594
        minipool_vector_tail = mp;
12595
    }
12596
 
12597
  min_mp = mp;
12598
 
12599
  offset = 0;
12600
  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12601
    {
12602
      mp->offset = offset;
12603
      if (mp->refcount > 0)
12604
        offset += mp->fix_size;
12605
 
12606
      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12607
        mp->next->min_address = mp->min_address + mp->fix_size;
12608
    }
12609
 
12610
  return min_mp;
12611
}
12612
 
12613
/* Add a constant to the minipool for a backward reference.  Returns the
12614
   node added or NULL if the constant will not fit in this pool.
12615
 
12616
   Note that the code for insertion for a backwards reference can be
12617
   somewhat confusing because the calculated offsets for each fix do
12618
   not take into account the size of the pool (which is still under
12619
   construction.  */
12620
static Mnode *
12621
add_minipool_backward_ref (Mfix *fix)
12622
{
12623
  /* If set, min_mp is the last pool_entry that has a lower constraint
12624
     than the one we are trying to add.  */
12625
  Mnode *min_mp = NULL;
12626
  /* This can be negative, since it is only a constraint.  */
12627
  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
12628
  Mnode *mp;
12629
 
12630
  /* If we can't reach the current pool from this insn, or if we can't
12631
     insert this entry at the end of the pool without pushing other
12632
     fixes out of range, then we don't try.  This ensures that we
12633
     can't fail later on.  */
12634
  if (min_address >= minipool_barrier->address
12635
      || (minipool_vector_tail->min_address + fix->fix_size
12636
          >= minipool_barrier->address))
12637
    return NULL;
12638
 
12639
  /* Scan the pool to see if a constant with the same value has
12640
     already been added.  While we are doing this, also note the
12641
     location where we must insert the constant if it doesn't already
12642
     exist.  */
12643
  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12644
    {
12645
      if (GET_CODE (fix->value) == GET_CODE (mp->value)
12646
          && fix->mode == mp->mode
12647
          && (GET_CODE (fix->value) != CODE_LABEL
12648
              || (CODE_LABEL_NUMBER (fix->value)
12649
                  == CODE_LABEL_NUMBER (mp->value)))
12650
          && rtx_equal_p (fix->value, mp->value)
12651
          /* Check that there is enough slack to move this entry to the
12652
             end of the table (this is conservative).  */
12653
          && (mp->max_address
12654
              > (minipool_barrier->address
12655
                 + minipool_vector_tail->offset
12656
                 + minipool_vector_tail->fix_size)))
12657
        {
12658
          mp->refcount++;
12659
          return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12660
        }
12661
 
12662
      if (min_mp != NULL)
12663
        mp->min_address += fix->fix_size;
12664
      else
12665
        {
12666
          /* Note the insertion point if necessary.  */
12667
          if (mp->min_address < min_address)
12668
            {
12669
              /* For now, we do not allow the insertion of 8-byte alignment
12670
                 requiring nodes anywhere but at the start of the pool.  */
12671
              if (ARM_DOUBLEWORD_ALIGN
12672
                  && fix->fix_size >= 8 && mp->fix_size < 8)
12673
                return NULL;
12674
              else
12675
                min_mp = mp;
12676
            }
12677
          else if (mp->max_address
12678
                   < minipool_barrier->address + mp->offset + fix->fix_size)
12679
            {
12680
              /* Inserting before this entry would push the fix beyond
12681
                 its maximum address (which can happen if we have
12682
                 re-located a forwards fix); force the new fix to come
12683
                 after it.  */
12684
              if (ARM_DOUBLEWORD_ALIGN
12685
                  && fix->fix_size >= 8 && mp->fix_size < 8)
12686
                return NULL;
12687
              else
12688
                {
12689
                  min_mp = mp;
12690
                  min_address = mp->min_address + fix->fix_size;
12691
                }
12692
            }
12693
          /* Do not insert a non-8-byte aligned quantity before 8-byte
12694
             aligned quantities.  */
12695
          else if (ARM_DOUBLEWORD_ALIGN
12696
                   && fix->fix_size < 8
12697
                   && mp->fix_size >= 8)
12698
            {
12699
              min_mp = mp;
12700
              min_address = mp->min_address + fix->fix_size;
12701
            }
12702
        }
12703
    }
12704
 
12705
  /* We need to create a new entry.  */
12706
  mp = XNEW (Mnode);
12707
  mp->fix_size = fix->fix_size;
12708
  mp->mode = fix->mode;
12709
  mp->value = fix->value;
12710
  mp->refcount = 1;
12711
  mp->max_address = minipool_barrier->address + 65536;
12712
 
12713
  mp->min_address = min_address;
12714
 
12715
  if (min_mp == NULL)
12716
    {
12717
      mp->prev = NULL;
12718
      mp->next = minipool_vector_head;
12719
 
12720
      if (mp->next == NULL)
12721
        {
12722
          minipool_vector_tail = mp;
12723
          minipool_vector_label = gen_label_rtx ();
12724
        }
12725
      else
12726
        mp->next->prev = mp;
12727
 
12728
      minipool_vector_head = mp;
12729
    }
12730
  else
12731
    {
12732
      mp->next = min_mp->next;
12733
      mp->prev = min_mp;
12734
      min_mp->next = mp;
12735
 
12736
      if (mp->next != NULL)
12737
        mp->next->prev = mp;
12738
      else
12739
        minipool_vector_tail = mp;
12740
    }
12741
 
12742
  /* Save the new entry.  */
12743
  min_mp = mp;
12744
 
12745
  if (mp->prev)
12746
    mp = mp->prev;
12747
  else
12748
    mp->offset = 0;
12749
 
12750
  /* Scan over the following entries and adjust their offsets.  */
12751
  while (mp->next != NULL)
12752
    {
12753
      if (mp->next->min_address < mp->min_address + mp->fix_size)
12754
        mp->next->min_address = mp->min_address + mp->fix_size;
12755
 
12756
      if (mp->refcount)
12757
        mp->next->offset = mp->offset + mp->fix_size;
12758
      else
12759
        mp->next->offset = mp->offset;
12760
 
12761
      mp = mp->next;
12762
    }
12763
 
12764
  return min_mp;
12765
}
12766
 
12767
static void
12768
assign_minipool_offsets (Mfix *barrier)
12769
{
12770
  HOST_WIDE_INT offset = 0;
12771
  Mnode *mp;
12772
 
12773
  minipool_barrier = barrier;
12774
 
12775
  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12776
    {
12777
      mp->offset = offset;
12778
 
12779
      if (mp->refcount > 0)
12780
        offset += mp->fix_size;
12781
    }
12782
}
12783
 
12784
/* Output the literal table */
12785
static void
12786
dump_minipool (rtx scan)
12787
{
12788
  Mnode * mp;
12789
  Mnode * nmp;
12790
  int align64 = 0;
12791
 
12792
  if (ARM_DOUBLEWORD_ALIGN)
12793
    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12794
      if (mp->refcount > 0 && mp->fix_size >= 8)
12795
        {
12796
          align64 = 1;
12797
          break;
12798
        }
12799
 
12800
  if (dump_file)
12801
    fprintf (dump_file,
12802
             ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12803
             INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12804
 
12805
  scan = emit_label_after (gen_label_rtx (), scan);
12806
  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12807
  scan = emit_label_after (minipool_vector_label, scan);
12808
 
12809
  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12810
    {
12811
      if (mp->refcount > 0)
12812
        {
12813
          if (dump_file)
12814
            {
12815
              fprintf (dump_file,
12816
                       ";;  Offset %u, min %ld, max %ld ",
12817
                       (unsigned) mp->offset, (unsigned long) mp->min_address,
12818
                       (unsigned long) mp->max_address);
12819
              arm_print_value (dump_file, mp->value);
12820
              fputc ('\n', dump_file);
12821
            }
12822
 
12823
          switch (mp->fix_size)
12824
            {
12825
#ifdef HAVE_consttable_1
12826
            case 1:
12827
              scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12828
              break;
12829
 
12830
#endif
12831
#ifdef HAVE_consttable_2
12832
            case 2:
12833
              scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12834
              break;
12835
 
12836
#endif
12837
#ifdef HAVE_consttable_4
12838
            case 4:
12839
              scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12840
              break;
12841
 
12842
#endif
12843
#ifdef HAVE_consttable_8
12844
            case 8:
12845
              scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12846
              break;
12847
 
12848
#endif
12849
#ifdef HAVE_consttable_16
12850
            case 16:
12851
              scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12852
              break;
12853
 
12854
#endif
12855
            default:
12856
              gcc_unreachable ();
12857
            }
12858
        }
12859
 
12860
      nmp = mp->next;
12861
      free (mp);
12862
    }
12863
 
12864
  minipool_vector_head = minipool_vector_tail = NULL;
12865
  scan = emit_insn_after (gen_consttable_end (), scan);
12866
  scan = emit_barrier_after (scan);
12867
}
12868
 
12869
/* Return the cost of forcibly inserting a barrier after INSN.  */
12870
static int
12871
arm_barrier_cost (rtx insn)
12872
{
12873
  /* Basing the location of the pool on the loop depth is preferable,
12874
     but at the moment, the basic block information seems to be
12875
     corrupt by this stage of the compilation.  */
12876
  int base_cost = 50;
12877
  rtx next = next_nonnote_insn (insn);
12878
 
12879
  if (next != NULL && GET_CODE (next) == CODE_LABEL)
12880
    base_cost -= 20;
12881
 
12882
  switch (GET_CODE (insn))
12883
    {
12884
    case CODE_LABEL:
12885
      /* It will always be better to place the table before the label, rather
12886
         than after it.  */
12887
      return 50;
12888
 
12889
    case INSN:
12890
    case CALL_INSN:
12891
      return base_cost;
12892
 
12893
    case JUMP_INSN:
12894
      return base_cost - 10;
12895
 
12896
    default:
12897
      return base_cost + 10;
12898
    }
12899
}
12900
 
12901
/* Find the best place in the insn stream in the range
12902
   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12903
   Create the barrier by inserting a jump and add a new fix entry for
12904
   it.  */
12905
static Mfix *
12906
create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12907
{
12908
  HOST_WIDE_INT count = 0;
12909
  rtx barrier;
12910
  rtx from = fix->insn;
12911
  /* The instruction after which we will insert the jump.  */
12912
  rtx selected = NULL;
12913
  int selected_cost;
12914
  /* The address at which the jump instruction will be placed.  */
12915
  HOST_WIDE_INT selected_address;
12916
  Mfix * new_fix;
12917
  HOST_WIDE_INT max_count = max_address - fix->address;
12918
  rtx label = gen_label_rtx ();
12919
 
12920
  selected_cost = arm_barrier_cost (from);
12921
  selected_address = fix->address;
12922
 
12923
  while (from && count < max_count)
12924
    {
12925
      rtx tmp;
12926
      int new_cost;
12927
 
12928
      /* This code shouldn't have been called if there was a natural barrier
12929
         within range.  */
12930
      gcc_assert (GET_CODE (from) != BARRIER);
12931
 
12932
      /* Count the length of this insn.  This must stay in sync with the
12933
         code that pushes minipool fixes.  */
12934
      if (LABEL_P (from))
12935
        count += get_label_padding (from);
12936
      else
12937
        count += get_attr_length (from);
12938
 
12939
      /* If there is a jump table, add its length.  */
12940
      tmp = is_jump_table (from);
12941
      if (tmp != NULL)
12942
        {
12943
          count += get_jump_table_size (tmp);
12944
 
12945
          /* Jump tables aren't in a basic block, so base the cost on
12946
             the dispatch insn.  If we select this location, we will
12947
             still put the pool after the table.  */
12948
          new_cost = arm_barrier_cost (from);
12949
 
12950
          if (count < max_count
12951
              && (!selected || new_cost <= selected_cost))
12952
            {
12953
              selected = tmp;
12954
              selected_cost = new_cost;
12955
              selected_address = fix->address + count;
12956
            }
12957
 
12958
          /* Continue after the dispatch table.  */
12959
          from = NEXT_INSN (tmp);
12960
          continue;
12961
        }
12962
 
12963
      new_cost = arm_barrier_cost (from);
12964
 
12965
      if (count < max_count
12966
          && (!selected || new_cost <= selected_cost))
12967
        {
12968
          selected = from;
12969
          selected_cost = new_cost;
12970
          selected_address = fix->address + count;
12971
        }
12972
 
12973
      from = NEXT_INSN (from);
12974
    }
12975
 
12976
  /* Make sure that we found a place to insert the jump.  */
12977
  gcc_assert (selected);
12978
 
12979
  /* Make sure we do not split a call and its corresponding
12980
     CALL_ARG_LOCATION note.  */
12981
  if (CALL_P (selected))
12982
    {
12983
      rtx next = NEXT_INSN (selected);
12984
      if (next && NOTE_P (next)
12985
          && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12986
          selected = next;
12987
    }
12988
 
12989
  /* Create a new JUMP_INSN that branches around a barrier.  */
12990
  from = emit_jump_insn_after (gen_jump (label), selected);
12991
  JUMP_LABEL (from) = label;
12992
  barrier = emit_barrier_after (from);
12993
  emit_label_after (label, barrier);
12994
 
12995
  /* Create a minipool barrier entry for the new barrier.  */
12996
  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12997
  new_fix->insn = barrier;
12998
  new_fix->address = selected_address;
12999
  new_fix->next = fix->next;
13000
  fix->next = new_fix;
13001
 
13002
  return new_fix;
13003
}
13004
 
13005
/* Record that there is a natural barrier in the insn stream at
13006
   ADDRESS.  */
13007
static void
13008
push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13009
{
13010
  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13011
 
13012
  fix->insn = insn;
13013
  fix->address = address;
13014
 
13015
  fix->next = NULL;
13016
  if (minipool_fix_head != NULL)
13017
    minipool_fix_tail->next = fix;
13018
  else
13019
    minipool_fix_head = fix;
13020
 
13021
  minipool_fix_tail = fix;
13022
}
13023
 
13024
/* Record INSN, which will need fixing up to load a value from the
13025
   minipool.  ADDRESS is the offset of the insn since the start of the
13026
   function; LOC is a pointer to the part of the insn which requires
13027
   fixing; VALUE is the constant that must be loaded, which is of type
13028
   MODE.  */
13029
static void
13030
push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13031
                   enum machine_mode mode, rtx value)
13032
{
13033
  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13034
 
13035
  fix->insn = insn;
13036
  fix->address = address;
13037
  fix->loc = loc;
13038
  fix->mode = mode;
13039
  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13040
  fix->value = value;
13041
  fix->forwards = get_attr_pool_range (insn);
13042
  fix->backwards = get_attr_neg_pool_range (insn);
13043
  fix->minipool = NULL;
13044
 
13045
  /* If an insn doesn't have a range defined for it, then it isn't
13046
     expecting to be reworked by this code.  Better to stop now than
13047
     to generate duff assembly code.  */
13048
  gcc_assert (fix->forwards || fix->backwards);
13049
 
13050
  /* If an entry requires 8-byte alignment then assume all constant pools
13051
     require 4 bytes of padding.  Trying to do this later on a per-pool
13052
     basis is awkward because existing pool entries have to be modified.  */
13053
  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13054
    minipool_pad = 4;
13055
 
13056
  if (dump_file)
13057
    {
13058
      fprintf (dump_file,
13059
               ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13060
               GET_MODE_NAME (mode),
13061
               INSN_UID (insn), (unsigned long) address,
13062
               -1 * (long)fix->backwards, (long)fix->forwards);
13063
      arm_print_value (dump_file, fix->value);
13064
      fprintf (dump_file, "\n");
13065
    }
13066
 
13067
  /* Add it to the chain of fixes.  */
13068
  fix->next = NULL;
13069
 
13070
  if (minipool_fix_head != NULL)
13071
    minipool_fix_tail->next = fix;
13072
  else
13073
    minipool_fix_head = fix;
13074
 
13075
  minipool_fix_tail = fix;
13076
}
13077
 
13078
/* Return the cost of synthesizing a 64-bit constant VAL inline.
13079
   Returns the number of insns needed, or 99 if we don't know how to
13080
   do it.  */
13081
int
13082
arm_const_double_inline_cost (rtx val)
13083
{
13084
  rtx lowpart, highpart;
13085
  enum machine_mode mode;
13086
 
13087
  mode = GET_MODE (val);
13088
 
13089
  if (mode == VOIDmode)
13090
    mode = DImode;
13091
 
13092
  gcc_assert (GET_MODE_SIZE (mode) == 8);
13093
 
13094
  lowpart = gen_lowpart (SImode, val);
13095
  highpart = gen_highpart_mode (SImode, mode, val);
13096
 
13097
  gcc_assert (GET_CODE (lowpart) == CONST_INT);
13098
  gcc_assert (GET_CODE (highpart) == CONST_INT);
13099
 
13100
  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13101
                            NULL_RTX, NULL_RTX, 0, 0)
13102
          + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13103
                              NULL_RTX, NULL_RTX, 0, 0));
13104
}
13105
 
13106
/* Return true if it is worthwhile to split a 64-bit constant into two
13107
   32-bit operations.  This is the case if optimizing for size, or
13108
   if we have load delay slots, or if one 32-bit part can be done with
13109
   a single data operation.  */
13110
bool
13111
arm_const_double_by_parts (rtx val)
13112
{
13113
  enum machine_mode mode = GET_MODE (val);
13114
  rtx part;
13115
 
13116
  if (optimize_size || arm_ld_sched)
13117
    return true;
13118
 
13119
  if (mode == VOIDmode)
13120
    mode = DImode;
13121
 
13122
  part = gen_highpart_mode (SImode, mode, val);
13123
 
13124
  gcc_assert (GET_CODE (part) == CONST_INT);
13125
 
13126
  if (const_ok_for_arm (INTVAL (part))
13127
      || const_ok_for_arm (~INTVAL (part)))
13128
    return true;
13129
 
13130
  part = gen_lowpart (SImode, val);
13131
 
13132
  gcc_assert (GET_CODE (part) == CONST_INT);
13133
 
13134
  if (const_ok_for_arm (INTVAL (part))
13135
      || const_ok_for_arm (~INTVAL (part)))
13136
    return true;
13137
 
13138
  return false;
13139
}
13140
 
13141
/* Return true if it is possible to inline both the high and low parts
13142
   of a 64-bit constant into 32-bit data processing instructions.  */
13143
bool
13144
arm_const_double_by_immediates (rtx val)
13145
{
13146
  enum machine_mode mode = GET_MODE (val);
13147
  rtx part;
13148
 
13149
  if (mode == VOIDmode)
13150
    mode = DImode;
13151
 
13152
  part = gen_highpart_mode (SImode, mode, val);
13153
 
13154
  gcc_assert (GET_CODE (part) == CONST_INT);
13155
 
13156
  if (!const_ok_for_arm (INTVAL (part)))
13157
    return false;
13158
 
13159
  part = gen_lowpart (SImode, val);
13160
 
13161
  gcc_assert (GET_CODE (part) == CONST_INT);
13162
 
13163
  if (!const_ok_for_arm (INTVAL (part)))
13164
    return false;
13165
 
13166
  return true;
13167
}
13168
 
13169
/* Scan INSN and note any of its operands that need fixing.
13170
   If DO_PUSHES is false we do not actually push any of the fixups
13171
   needed.  The function returns TRUE if any fixups were needed/pushed.
13172
   This is used by arm_memory_load_p() which needs to know about loads
13173
   of constants that will be converted into minipool loads.  */
13174
static bool
13175
note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13176
{
13177
  bool result = false;
13178
  int opno;
13179
 
13180
  extract_insn (insn);
13181
 
13182
  if (!constrain_operands (1))
13183
    fatal_insn_not_found (insn);
13184
 
13185
  if (recog_data.n_alternatives == 0)
13186
    return false;
13187
 
13188
  /* Fill in recog_op_alt with information about the constraints of
13189
     this insn.  */
13190
  preprocess_constraints ();
13191
 
13192
  for (opno = 0; opno < recog_data.n_operands; opno++)
13193
    {
13194
      /* Things we need to fix can only occur in inputs.  */
13195
      if (recog_data.operand_type[opno] != OP_IN)
13196
        continue;
13197
 
13198
      /* If this alternative is a memory reference, then any mention
13199
         of constants in this alternative is really to fool reload
13200
         into allowing us to accept one there.  We need to fix them up
13201
         now so that we output the right code.  */
13202
      if (recog_op_alt[opno][which_alternative].memory_ok)
13203
        {
13204
          rtx op = recog_data.operand[opno];
13205
 
13206
          if (CONSTANT_P (op))
13207
            {
13208
              if (do_pushes)
13209
                push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13210
                                   recog_data.operand_mode[opno], op);
13211
              result = true;
13212
            }
13213
          else if (GET_CODE (op) == MEM
13214
                   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13215
                   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13216
            {
13217
              if (do_pushes)
13218
                {
13219
                  rtx cop = avoid_constant_pool_reference (op);
13220
 
13221
                  /* Casting the address of something to a mode narrower
13222
                     than a word can cause avoid_constant_pool_reference()
13223
                     to return the pool reference itself.  That's no good to
13224
                     us here.  Lets just hope that we can use the
13225
                     constant pool value directly.  */
13226
                  if (op == cop)
13227
                    cop = get_pool_constant (XEXP (op, 0));
13228
 
13229
                  push_minipool_fix (insn, address,
13230
                                     recog_data.operand_loc[opno],
13231
                                     recog_data.operand_mode[opno], cop);
13232
                }
13233
 
13234
              result = true;
13235
            }
13236
        }
13237
    }
13238
 
13239
  return result;
13240
}
13241
 
13242
/* Convert instructions to their cc-clobbering variant if possible, since
13243
   that allows us to use smaller encodings.  */
13244
 
13245
static void
13246
thumb2_reorg (void)
13247
{
13248
  basic_block bb;
13249
  regset_head live;
13250
 
13251
  INIT_REG_SET (&live);
13252
 
13253
  /* We are freeing block_for_insn in the toplev to keep compatibility
13254
     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
13255
  compute_bb_for_insn ();
13256
  df_analyze ();
13257
 
13258
  FOR_EACH_BB (bb)
13259
    {
13260
      rtx insn;
13261
 
13262
      COPY_REG_SET (&live, DF_LR_OUT (bb));
13263
      df_simulate_initialize_backwards (bb, &live);
13264
      FOR_BB_INSNS_REVERSE (bb, insn)
13265
        {
13266
          if (NONJUMP_INSN_P (insn)
13267
              && !REGNO_REG_SET_P (&live, CC_REGNUM))
13268
            {
13269
              rtx pat = PATTERN (insn);
13270
              if (GET_CODE (pat) == SET
13271
                  && low_register_operand (XEXP (pat, 0), SImode)
13272
                  && thumb_16bit_operator (XEXP (pat, 1), SImode)
13273
                  && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13274
                  && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13275
                {
13276
                  rtx dst = XEXP (pat, 0);
13277
                  rtx src = XEXP (pat, 1);
13278
                  rtx op0 = XEXP (src, 0);
13279
                  rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13280
                             ? XEXP (src, 1) : NULL);
13281
 
13282
                  if (rtx_equal_p (dst, op0)
13283
                      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13284
                    {
13285
                      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13286
                      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13287
                      rtvec vec = gen_rtvec (2, pat, clobber);
13288
 
13289
                      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13290
                      INSN_CODE (insn) = -1;
13291
                    }
13292
                  /* We can also handle a commutative operation where the
13293
                     second operand matches the destination.  */
13294
                  else if (op1 && rtx_equal_p (dst, op1))
13295
                    {
13296
                      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13297
                      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13298
                      rtvec vec;
13299
 
13300
                      src = copy_rtx (src);
13301
                      XEXP (src, 0) = op1;
13302
                      XEXP (src, 1) = op0;
13303
                      pat = gen_rtx_SET (VOIDmode, dst, src);
13304
                      vec = gen_rtvec (2, pat, clobber);
13305
                      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13306
                      INSN_CODE (insn) = -1;
13307
                    }
13308
                }
13309
            }
13310
 
13311
          if (NONDEBUG_INSN_P (insn))
13312
            df_simulate_one_insn_backwards (bb, insn, &live);
13313
        }
13314
    }
13315
 
13316
  CLEAR_REG_SET (&live);
13317
}
13318
 
13319
/* Gcc puts the pool in the wrong place for ARM, since we can only
13320
   load addresses a limited distance around the pc.  We do some
13321
   special munging to move the constant pool values to the correct
13322
   point in the code.  */
13323
static void
13324
arm_reorg (void)
13325
{
13326
  rtx insn;
13327
  HOST_WIDE_INT address = 0;
13328
  Mfix * fix;
13329
 
13330
  if (TARGET_THUMB2)
13331
    thumb2_reorg ();
13332
 
13333
  minipool_fix_head = minipool_fix_tail = NULL;
13334
 
13335
  /* The first insn must always be a note, or the code below won't
13336
     scan it properly.  */
13337
  insn = get_insns ();
13338
  gcc_assert (GET_CODE (insn) == NOTE);
13339
  minipool_pad = 0;
13340
 
13341
  /* Scan all the insns and record the operands that will need fixing.  */
13342
  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13343
    {
13344
      if (TARGET_CIRRUS_FIX_INVALID_INSNS
13345
          && (arm_cirrus_insn_p (insn)
13346
              || GET_CODE (insn) == JUMP_INSN
13347
              || arm_memory_load_p (insn)))
13348
        cirrus_reorg (insn);
13349
 
13350
      if (GET_CODE (insn) == BARRIER)
13351
        push_minipool_barrier (insn, address);
13352
      else if (INSN_P (insn))
13353
        {
13354
          rtx table;
13355
 
13356
          note_invalid_constants (insn, address, true);
13357
          address += get_attr_length (insn);
13358
 
13359
          /* If the insn is a vector jump, add the size of the table
13360
             and skip the table.  */
13361
          if ((table = is_jump_table (insn)) != NULL)
13362
            {
13363
              address += get_jump_table_size (table);
13364
              insn = table;
13365
            }
13366
        }
13367
      else if (LABEL_P (insn))
13368
        /* Add the worst-case padding due to alignment.  We don't add
13369
           the _current_ padding because the minipool insertions
13370
           themselves might change it.  */
13371
        address += get_label_padding (insn);
13372
    }
13373
 
13374
  fix = minipool_fix_head;
13375
 
13376
  /* Now scan the fixups and perform the required changes.  */
13377
  while (fix)
13378
    {
13379
      Mfix * ftmp;
13380
      Mfix * fdel;
13381
      Mfix *  last_added_fix;
13382
      Mfix * last_barrier = NULL;
13383
      Mfix * this_fix;
13384
 
13385
      /* Skip any further barriers before the next fix.  */
13386
      while (fix && GET_CODE (fix->insn) == BARRIER)
13387
        fix = fix->next;
13388
 
13389
      /* No more fixes.  */
13390
      if (fix == NULL)
13391
        break;
13392
 
13393
      last_added_fix = NULL;
13394
 
13395
      for (ftmp = fix; ftmp; ftmp = ftmp->next)
13396
        {
13397
          if (GET_CODE (ftmp->insn) == BARRIER)
13398
            {
13399
              if (ftmp->address >= minipool_vector_head->max_address)
13400
                break;
13401
 
13402
              last_barrier = ftmp;
13403
            }
13404
          else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13405
            break;
13406
 
13407
          last_added_fix = ftmp;  /* Keep track of the last fix added.  */
13408
        }
13409
 
13410
      /* If we found a barrier, drop back to that; any fixes that we
13411
         could have reached but come after the barrier will now go in
13412
         the next mini-pool.  */
13413
      if (last_barrier != NULL)
13414
        {
13415
          /* Reduce the refcount for those fixes that won't go into this
13416
             pool after all.  */
13417
          for (fdel = last_barrier->next;
13418
               fdel && fdel != ftmp;
13419
               fdel = fdel->next)
13420
            {
13421
              fdel->minipool->refcount--;
13422
              fdel->minipool = NULL;
13423
            }
13424
 
13425
          ftmp = last_barrier;
13426
        }
13427
      else
13428
        {
13429
          /* ftmp is first fix that we can't fit into this pool and
13430
             there no natural barriers that we could use.  Insert a
13431
             new barrier in the code somewhere between the previous
13432
             fix and this one, and arrange to jump around it.  */
13433
          HOST_WIDE_INT max_address;
13434
 
13435
          /* The last item on the list of fixes must be a barrier, so
13436
             we can never run off the end of the list of fixes without
13437
             last_barrier being set.  */
13438
          gcc_assert (ftmp);
13439
 
13440
          max_address = minipool_vector_head->max_address;
13441
          /* Check that there isn't another fix that is in range that
13442
             we couldn't fit into this pool because the pool was
13443
             already too large: we need to put the pool before such an
13444
             instruction.  The pool itself may come just after the
13445
             fix because create_fix_barrier also allows space for a
13446
             jump instruction.  */
13447
          if (ftmp->address < max_address)
13448
            max_address = ftmp->address + 1;
13449
 
13450
          last_barrier = create_fix_barrier (last_added_fix, max_address);
13451
        }
13452
 
13453
      assign_minipool_offsets (last_barrier);
13454
 
13455
      while (ftmp)
13456
        {
13457
          if (GET_CODE (ftmp->insn) != BARRIER
13458
              && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13459
                  == NULL))
13460
            break;
13461
 
13462
          ftmp = ftmp->next;
13463
        }
13464
 
13465
      /* Scan over the fixes we have identified for this pool, fixing them
13466
         up and adding the constants to the pool itself.  */
13467
      for (this_fix = fix; this_fix && ftmp != this_fix;
13468
           this_fix = this_fix->next)
13469
        if (GET_CODE (this_fix->insn) != BARRIER)
13470
          {
13471
            rtx addr
13472
              = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13473
                                                  minipool_vector_label),
13474
                               this_fix->minipool->offset);
13475
            *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13476
          }
13477
 
13478
      dump_minipool (last_barrier->insn);
13479
      fix = ftmp;
13480
    }
13481
 
13482
  /* From now on we must synthesize any constants that we can't handle
13483
     directly.  This can happen if the RTL gets split during final
13484
     instruction generation.  */
13485
  after_arm_reorg = 1;
13486
 
13487
  /* Free the minipool memory.  */
13488
  obstack_free (&minipool_obstack, minipool_startobj);
13489
}
13490
 
13491
/* Routines to output assembly language.  */
13492
 
13493
/* If the rtx is the correct value then return the string of the number.
13494
   In this way we can ensure that valid double constants are generated even
13495
   when cross compiling.  */
13496
const char *
13497
fp_immediate_constant (rtx x)
13498
{
13499
  REAL_VALUE_TYPE r;
13500
  int i;
13501
 
13502
  if (!fp_consts_inited)
13503
    init_fp_table ();
13504
 
13505
  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13506
  for (i = 0; i < 8; i++)
13507
    if (REAL_VALUES_EQUAL (r, values_fp[i]))
13508
      return strings_fp[i];
13509
 
13510
  gcc_unreachable ();
13511
}
13512
 
13513
/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
13514
static const char *
13515
fp_const_from_val (REAL_VALUE_TYPE *r)
13516
{
13517
  int i;
13518
 
13519
  if (!fp_consts_inited)
13520
    init_fp_table ();
13521
 
13522
  for (i = 0; i < 8; i++)
13523
    if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13524
      return strings_fp[i];
13525
 
13526
  gcc_unreachable ();
13527
}
13528
 
13529
/* Output the operands of a LDM/STM instruction to STREAM.
13530
   MASK is the ARM register set mask of which only bits 0-15 are important.
13531
   REG is the base register, either the frame pointer or the stack pointer,
13532
   INSTR is the possibly suffixed load or store instruction.
13533
   RFE is nonzero if the instruction should also copy spsr to cpsr.  */
13534
 
13535
static void
13536
print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13537
                 unsigned long mask, int rfe)
13538
{
13539
  unsigned i;
13540
  bool not_first = FALSE;
13541
 
13542
  gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13543
  fputc ('\t', stream);
13544
  asm_fprintf (stream, instr, reg);
13545
  fputc ('{', stream);
13546
 
13547
  for (i = 0; i <= LAST_ARM_REGNUM; i++)
13548
    if (mask & (1 << i))
13549
      {
13550
        if (not_first)
13551
          fprintf (stream, ", ");
13552
 
13553
        asm_fprintf (stream, "%r", i);
13554
        not_first = TRUE;
13555
      }
13556
 
13557
  if (rfe)
13558
    fprintf (stream, "}^\n");
13559
  else
13560
    fprintf (stream, "}\n");
13561
}
13562
 
13563
 
13564
/* Output a FLDMD instruction to STREAM.
13565
   BASE if the register containing the address.
13566
   REG and COUNT specify the register range.
13567
   Extra registers may be added to avoid hardware bugs.
13568
 
13569
   We output FLDMD even for ARMv5 VFP implementations.  Although
13570
   FLDMD is technically not supported until ARMv6, it is believed
13571
   that all VFP implementations support its use in this context.  */
13572
 
13573
static void
13574
vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13575
{
13576
  int i;
13577
 
13578
  /* Workaround ARM10 VFPr1 bug.  */
13579
  if (count == 2 && !arm_arch6)
13580
    {
13581
      if (reg == 15)
13582
        reg--;
13583
      count++;
13584
    }
13585
 
13586
  /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13587
     load into multiple parts if we have to handle more than 16 registers.  */
13588
  if (count > 16)
13589
    {
13590
      vfp_output_fldmd (stream, base, reg, 16);
13591
      vfp_output_fldmd (stream, base, reg + 16, count - 16);
13592
      return;
13593
    }
13594
 
13595
  fputc ('\t', stream);
13596
  asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13597
 
13598
  for (i = reg; i < reg + count; i++)
13599
    {
13600
      if (i > reg)
13601
        fputs (", ", stream);
13602
      asm_fprintf (stream, "d%d", i);
13603
    }
13604
  fputs ("}\n", stream);
13605
 
13606
}
13607
 
13608
 
13609
/* Output the assembly for a store multiple.  */
13610
 
13611
const char *
13612
vfp_output_fstmd (rtx * operands)
13613
{
13614
  char pattern[100];
13615
  int p;
13616
  int base;
13617
  int i;
13618
 
13619
  strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13620
  p = strlen (pattern);
13621
 
13622
  gcc_assert (GET_CODE (operands[1]) == REG);
13623
 
13624
  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13625
  for (i = 1; i < XVECLEN (operands[2], 0); i++)
13626
    {
13627
      p += sprintf (&pattern[p], ", d%d", base + i);
13628
    }
13629
  strcpy (&pattern[p], "}");
13630
 
13631
  output_asm_insn (pattern, operands);
13632
  return "";
13633
}
13634
 
13635
 
13636
/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
13637
   number of bytes pushed.  */
13638
 
13639
static int
13640
vfp_emit_fstmd (int base_reg, int count)
13641
{
13642
  rtx par;
13643
  rtx dwarf;
13644
  rtx tmp, reg;
13645
  int i;
13646
 
13647
  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
13648
     register pairs are stored by a store multiple insn.  We avoid this
13649
     by pushing an extra pair.  */
13650
  if (count == 2 && !arm_arch6)
13651
    {
13652
      if (base_reg == LAST_VFP_REGNUM - 3)
13653
        base_reg -= 2;
13654
      count++;
13655
    }
13656
 
13657
  /* FSTMD may not store more than 16 doubleword registers at once.  Split
13658
     larger stores into multiple parts (up to a maximum of two, in
13659
     practice).  */
13660
  if (count > 16)
13661
    {
13662
      int saved;
13663
      /* NOTE: base_reg is an internal register number, so each D register
13664
         counts as 2.  */
13665
      saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13666
      saved += vfp_emit_fstmd (base_reg, 16);
13667
      return saved;
13668
    }
13669
 
13670
  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13671
  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13672
 
13673
  reg = gen_rtx_REG (DFmode, base_reg);
13674
  base_reg += 2;
13675
 
13676
  XVECEXP (par, 0, 0)
13677
    = gen_rtx_SET (VOIDmode,
13678
                   gen_frame_mem
13679
                   (BLKmode,
13680
                    gen_rtx_PRE_MODIFY (Pmode,
13681
                                        stack_pointer_rtx,
13682
                                        plus_constant
13683
                                        (stack_pointer_rtx,
13684
                                         - (count * 8)))
13685
                    ),
13686
                   gen_rtx_UNSPEC (BLKmode,
13687
                                   gen_rtvec (1, reg),
13688
                                   UNSPEC_PUSH_MULT));
13689
 
13690
  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13691
                     plus_constant (stack_pointer_rtx, -(count * 8)));
13692
  RTX_FRAME_RELATED_P (tmp) = 1;
13693
  XVECEXP (dwarf, 0, 0) = tmp;
13694
 
13695
  tmp = gen_rtx_SET (VOIDmode,
13696
                     gen_frame_mem (DFmode, stack_pointer_rtx),
13697
                     reg);
13698
  RTX_FRAME_RELATED_P (tmp) = 1;
13699
  XVECEXP (dwarf, 0, 1) = tmp;
13700
 
13701
  for (i = 1; i < count; i++)
13702
    {
13703
      reg = gen_rtx_REG (DFmode, base_reg);
13704
      base_reg += 2;
13705
      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13706
 
13707
      tmp = gen_rtx_SET (VOIDmode,
13708
                         gen_frame_mem (DFmode,
13709
                                        plus_constant (stack_pointer_rtx,
13710
                                                       i * 8)),
13711
                         reg);
13712
      RTX_FRAME_RELATED_P (tmp) = 1;
13713
      XVECEXP (dwarf, 0, i + 1) = tmp;
13714
    }
13715
 
13716
  par = emit_insn (par);
13717
  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13718
  RTX_FRAME_RELATED_P (par) = 1;
13719
 
13720
  return count * 8;
13721
}
13722
 
13723
/* Emit a call instruction with pattern PAT.  ADDR is the address of
13724
   the call target.  */
13725
 
13726
void
13727
arm_emit_call_insn (rtx pat, rtx addr)
13728
{
13729
  rtx insn;
13730
 
13731
  insn = emit_call_insn (pat);
13732
 
13733
  /* The PIC register is live on entry to VxWorks PIC PLT entries.
13734
     If the call might use such an entry, add a use of the PIC register
13735
     to the instruction's CALL_INSN_FUNCTION_USAGE.  */
13736
  if (TARGET_VXWORKS_RTP
13737
      && flag_pic
13738
      && GET_CODE (addr) == SYMBOL_REF
13739
      && (SYMBOL_REF_DECL (addr)
13740
          ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13741
          : !SYMBOL_REF_LOCAL_P (addr)))
13742
    {
13743
      require_pic_register ();
13744
      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13745
    }
13746
}
13747
 
13748
/* Output a 'call' insn.  */
13749
const char *
13750
output_call (rtx *operands)
13751
{
13752
  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
13753
 
13754
  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
13755
  if (REGNO (operands[0]) == LR_REGNUM)
13756
    {
13757
      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13758
      output_asm_insn ("mov%?\t%0, %|lr", operands);
13759
    }
13760
 
13761
  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13762
 
13763
  if (TARGET_INTERWORK || arm_arch4t)
13764
    output_asm_insn ("bx%?\t%0", operands);
13765
  else
13766
    output_asm_insn ("mov%?\t%|pc, %0", operands);
13767
 
13768
  return "";
13769
}
13770
 
13771
/* Output a 'call' insn that is a reference in memory. This is
13772
   disabled for ARMv5 and we prefer a blx instead because otherwise
13773
   there's a significant performance overhead.  */
13774
const char *
13775
output_call_mem (rtx *operands)
13776
{
13777
  gcc_assert (!arm_arch5);
13778
  if (TARGET_INTERWORK)
13779
    {
13780
      output_asm_insn ("ldr%?\t%|ip, %0", operands);
13781
      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13782
      output_asm_insn ("bx%?\t%|ip", operands);
13783
    }
13784
  else if (regno_use_in (LR_REGNUM, operands[0]))
13785
    {
13786
      /* LR is used in the memory address.  We load the address in the
13787
         first instruction.  It's safe to use IP as the target of the
13788
         load since the call will kill it anyway.  */
13789
      output_asm_insn ("ldr%?\t%|ip, %0", operands);
13790
      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13791
      if (arm_arch4t)
13792
        output_asm_insn ("bx%?\t%|ip", operands);
13793
      else
13794
        output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13795
    }
13796
  else
13797
    {
13798
      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13799
      output_asm_insn ("ldr%?\t%|pc, %0", operands);
13800
    }
13801
 
13802
  return "";
13803
}
13804
 
13805
 
13806
/* Output a move from arm registers to an fpa registers.
13807
   OPERANDS[0] is an fpa register.
13808
   OPERANDS[1] is the first registers of an arm register pair.  */
13809
const char *
13810
output_mov_long_double_fpa_from_arm (rtx *operands)
13811
{
13812
  int arm_reg0 = REGNO (operands[1]);
13813
  rtx ops[3];
13814
 
13815
  gcc_assert (arm_reg0 != IP_REGNUM);
13816
 
13817
  ops[0] = gen_rtx_REG (SImode, arm_reg0);
13818
  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13819
  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13820
 
13821
  output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13822
  output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13823
 
13824
  return "";
13825
}
13826
 
13827
/* Output a move from an fpa register to arm registers.
13828
   OPERANDS[0] is the first registers of an arm register pair.
13829
   OPERANDS[1] is an fpa register.  */
13830
const char *
13831
output_mov_long_double_arm_from_fpa (rtx *operands)
13832
{
13833
  int arm_reg0 = REGNO (operands[0]);
13834
  rtx ops[3];
13835
 
13836
  gcc_assert (arm_reg0 != IP_REGNUM);
13837
 
13838
  ops[0] = gen_rtx_REG (SImode, arm_reg0);
13839
  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13840
  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13841
 
13842
  output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13843
  output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13844
  return "";
13845
}
13846
 
13847
/* Output a move from arm registers to arm registers of a long double
13848
   OPERANDS[0] is the destination.
13849
   OPERANDS[1] is the source.  */
13850
const char *
13851
output_mov_long_double_arm_from_arm (rtx *operands)
13852
{
13853
  /* We have to be careful here because the two might overlap.  */
13854
  int dest_start = REGNO (operands[0]);
13855
  int src_start = REGNO (operands[1]);
13856
  rtx ops[2];
13857
  int i;
13858
 
13859
  if (dest_start < src_start)
13860
    {
13861
      for (i = 0; i < 3; i++)
13862
        {
13863
          ops[0] = gen_rtx_REG (SImode, dest_start + i);
13864
          ops[1] = gen_rtx_REG (SImode, src_start + i);
13865
          output_asm_insn ("mov%?\t%0, %1", ops);
13866
        }
13867
    }
13868
  else
13869
    {
13870
      for (i = 2; i >= 0; i--)
13871
        {
13872
          ops[0] = gen_rtx_REG (SImode, dest_start + i);
13873
          ops[1] = gen_rtx_REG (SImode, src_start + i);
13874
          output_asm_insn ("mov%?\t%0, %1", ops);
13875
        }
13876
    }
13877
 
13878
  return "";
13879
}
13880
 
13881
void
13882
arm_emit_movpair (rtx dest, rtx src)
13883
 {
13884
  /* If the src is an immediate, simplify it.  */
13885
  if (CONST_INT_P (src))
13886
    {
13887
      HOST_WIDE_INT val = INTVAL (src);
13888
      emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13889
      if ((val >> 16) & 0x0000ffff)
13890
        emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13891
                                             GEN_INT (16)),
13892
                       GEN_INT ((val >> 16) & 0x0000ffff));
13893
      return;
13894
    }
13895
   emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13896
   emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13897
 }
13898
 
13899
/* Output a move from arm registers to an fpa registers.
13900
   OPERANDS[0] is an fpa register.
13901
   OPERANDS[1] is the first registers of an arm register pair.  */
13902
const char *
13903
output_mov_double_fpa_from_arm (rtx *operands)
13904
{
13905
  int arm_reg0 = REGNO (operands[1]);
13906
  rtx ops[2];
13907
 
13908
  gcc_assert (arm_reg0 != IP_REGNUM);
13909
 
13910
  ops[0] = gen_rtx_REG (SImode, arm_reg0);
13911
  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13912
  output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13913
  output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13914
  return "";
13915
}
13916
 
13917
/* Output a move from an fpa register to arm registers.
13918
   OPERANDS[0] is the first registers of an arm register pair.
13919
   OPERANDS[1] is an fpa register.  */
13920
const char *
13921
output_mov_double_arm_from_fpa (rtx *operands)
13922
{
13923
  int arm_reg0 = REGNO (operands[0]);
13924
  rtx ops[2];
13925
 
13926
  gcc_assert (arm_reg0 != IP_REGNUM);
13927
 
13928
  ops[0] = gen_rtx_REG (SImode, arm_reg0);
13929
  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13930
  output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13931
  output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13932
  return "";
13933
}
13934
 
13935
/* Output a move between double words.  It must be REG<-MEM
13936
   or MEM<-REG.  */
13937
const char *
13938
output_move_double (rtx *operands, bool emit, int *count)
13939
{
13940
  enum rtx_code code0 = GET_CODE (operands[0]);
13941
  enum rtx_code code1 = GET_CODE (operands[1]);
13942
  rtx otherops[3];
13943
  if (count)
13944
    *count = 1;
13945
 
13946
  /* The only case when this might happen is when
13947
     you are looking at the length of a DImode instruction
13948
     that has an invalid constant in it.  */
13949
  if (code0 == REG && code1 != MEM)
13950
    {
13951
      gcc_assert (!emit);
13952
      *count = 2;
13953
      return "";
13954
    }
13955
 
13956
  if (code0 == REG)
13957
    {
13958
      unsigned int reg0 = REGNO (operands[0]);
13959
 
13960
      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13961
 
13962
      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
13963
 
13964
      switch (GET_CODE (XEXP (operands[1], 0)))
13965
        {
13966
        case REG:
13967
 
13968
          if (emit)
13969
            {
13970
              if (TARGET_LDRD
13971
                  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13972
                output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13973
              else
13974
                output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13975
            }
13976
          break;
13977
 
13978
        case PRE_INC:
13979
          gcc_assert (TARGET_LDRD);
13980
          if (emit)
13981
            output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13982
          break;
13983
 
13984
        case PRE_DEC:
13985
          if (emit)
13986
            {
13987
              if (TARGET_LDRD)
13988
                output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13989
              else
13990
                output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13991
            }
13992
          break;
13993
 
13994
        case POST_INC:
13995
          if (emit)
13996
            {
13997
              if (TARGET_LDRD)
13998
                output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13999
              else
14000
                output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14001
            }
14002
          break;
14003
 
14004
        case POST_DEC:
14005
          gcc_assert (TARGET_LDRD);
14006
          if (emit)
14007
            output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14008
          break;
14009
 
14010
        case PRE_MODIFY:
14011
        case POST_MODIFY:
14012
          /* Autoicrement addressing modes should never have overlapping
14013
             base and destination registers, and overlapping index registers
14014
             are already prohibited, so this doesn't need to worry about
14015
             fix_cm3_ldrd.  */
14016
          otherops[0] = operands[0];
14017
          otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14018
          otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14019
 
14020
          if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14021
            {
14022
              if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14023
                {
14024
                  /* Registers overlap so split out the increment.  */
14025
                  if (emit)
14026
                    {
14027
                      output_asm_insn ("add%?\t%1, %1, %2", otherops);
14028
                      output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14029
                    }
14030
                  if (count)
14031
                    *count = 2;
14032
                }
14033
              else
14034
                {
14035
                  /* Use a single insn if we can.
14036
                     FIXME: IWMMXT allows offsets larger than ldrd can
14037
                     handle, fix these up with a pair of ldr.  */
14038
                  if (TARGET_THUMB2
14039
                      || GET_CODE (otherops[2]) != CONST_INT
14040
                      || (INTVAL (otherops[2]) > -256
14041
                          && INTVAL (otherops[2]) < 256))
14042
                    {
14043
                      if (emit)
14044
                        output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14045
                    }
14046
                  else
14047
                    {
14048
                      if (emit)
14049
                        {
14050
                          output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14051
                          output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14052
                        }
14053
                      if (count)
14054
                        *count = 2;
14055
 
14056
                    }
14057
                }
14058
            }
14059
          else
14060
            {
14061
              /* Use a single insn if we can.
14062
                 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14063
                 fix these up with a pair of ldr.  */
14064
              if (TARGET_THUMB2
14065
                  || GET_CODE (otherops[2]) != CONST_INT
14066
                  || (INTVAL (otherops[2]) > -256
14067
                      && INTVAL (otherops[2]) < 256))
14068
                {
14069
                  if (emit)
14070
                    output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14071
                }
14072
              else
14073
                {
14074
                  if (emit)
14075
                    {
14076
                      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14077
                      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14078
                    }
14079
                  if (count)
14080
                    *count = 2;
14081
                }
14082
            }
14083
          break;
14084
 
14085
        case LABEL_REF:
14086
        case CONST:
14087
          /* We might be able to use ldrd %0, %1 here.  However the range is
14088
             different to ldr/adr, and it is broken on some ARMv7-M
14089
             implementations.  */
14090
          /* Use the second register of the pair to avoid problematic
14091
             overlap.  */
14092
          otherops[1] = operands[1];
14093
          if (emit)
14094
            output_asm_insn ("adr%?\t%0, %1", otherops);
14095
          operands[1] = otherops[0];
14096
          if (emit)
14097
            {
14098
              if (TARGET_LDRD)
14099
                output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14100
              else
14101
                output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14102
            }
14103
 
14104
          if (count)
14105
            *count = 2;
14106
          break;
14107
 
14108
          /* ??? This needs checking for thumb2.  */
14109
        default:
14110
          if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14111
                               GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14112
            {
14113
              otherops[0] = operands[0];
14114
              otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14115
              otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14116
 
14117
              if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14118
                {
14119
                  if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14120
                    {
14121
                      switch ((int) INTVAL (otherops[2]))
14122
                        {
14123
                        case -8:
14124
                          if (emit)
14125
                            output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14126
                          return "";
14127
                        case -4:
14128
                          if (TARGET_THUMB2)
14129
                            break;
14130
                          if (emit)
14131
                            output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14132
                          return "";
14133
                        case 4:
14134
                          if (TARGET_THUMB2)
14135
                            break;
14136
                          if (emit)
14137
                            output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14138
                          return "";
14139
                        }
14140
                    }
14141
                  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14142
                  operands[1] = otherops[0];
14143
                  if (TARGET_LDRD
14144
                      && (GET_CODE (otherops[2]) == REG
14145
                          || TARGET_THUMB2
14146
                          || (GET_CODE (otherops[2]) == CONST_INT
14147
                              && INTVAL (otherops[2]) > -256
14148
                              && INTVAL (otherops[2]) < 256)))
14149
                    {
14150
                      if (reg_overlap_mentioned_p (operands[0],
14151
                                                   otherops[2]))
14152
                        {
14153
                          rtx tmp;
14154
                          /* Swap base and index registers over to
14155
                             avoid a conflict.  */
14156
                          tmp = otherops[1];
14157
                          otherops[1] = otherops[2];
14158
                          otherops[2] = tmp;
14159
                        }
14160
                      /* If both registers conflict, it will usually
14161
                         have been fixed by a splitter.  */
14162
                      if (reg_overlap_mentioned_p (operands[0], otherops[2])
14163
                          || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14164
                        {
14165
                          if (emit)
14166
                            {
14167
                              output_asm_insn ("add%?\t%0, %1, %2", otherops);
14168
                              output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14169
                            }
14170
                          if (count)
14171
                            *count = 2;
14172
                        }
14173
                      else
14174
                        {
14175
                          otherops[0] = operands[0];
14176
                          if (emit)
14177
                            output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14178
                        }
14179
                      return "";
14180
                    }
14181
 
14182
                  if (GET_CODE (otherops[2]) == CONST_INT)
14183
                    {
14184
                      if (emit)
14185
                        {
14186
                          if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14187
                            output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14188
                          else
14189
                            output_asm_insn ("add%?\t%0, %1, %2", otherops);
14190
                        }
14191
                    }
14192
                  else
14193
                    {
14194
                      if (emit)
14195
                        output_asm_insn ("add%?\t%0, %1, %2", otherops);
14196
                    }
14197
                }
14198
              else
14199
                {
14200
                  if (emit)
14201
                    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14202
                }
14203
 
14204
              if (count)
14205
                *count = 2;
14206
 
14207
              if (TARGET_LDRD)
14208
                return "ldr%(d%)\t%0, [%1]";
14209
 
14210
              return "ldm%(ia%)\t%1, %M0";
14211
            }
14212
          else
14213
            {
14214
              otherops[1] = adjust_address (operands[1], SImode, 4);
14215
              /* Take care of overlapping base/data reg.  */
14216
              if (reg_mentioned_p (operands[0], operands[1]))
14217
                {
14218
                  if (emit)
14219
                    {
14220
                      output_asm_insn ("ldr%?\t%0, %1", otherops);
14221
                      output_asm_insn ("ldr%?\t%0, %1", operands);
14222
                    }
14223
                  if (count)
14224
                    *count = 2;
14225
 
14226
                }
14227
              else
14228
                {
14229
                  if (emit)
14230
                    {
14231
                      output_asm_insn ("ldr%?\t%0, %1", operands);
14232
                      output_asm_insn ("ldr%?\t%0, %1", otherops);
14233
                    }
14234
                  if (count)
14235
                    *count = 2;
14236
                }
14237
            }
14238
        }
14239
    }
14240
  else
14241
    {
14242
      /* Constraints should ensure this.  */
14243
      gcc_assert (code0 == MEM && code1 == REG);
14244
      gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14245
 
14246
      switch (GET_CODE (XEXP (operands[0], 0)))
14247
        {
14248
        case REG:
14249
          if (emit)
14250
            {
14251
              if (TARGET_LDRD)
14252
                output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14253
              else
14254
                output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14255
            }
14256
          break;
14257
 
14258
        case PRE_INC:
14259
          gcc_assert (TARGET_LDRD);
14260
          if (emit)
14261
            output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14262
          break;
14263
 
14264
        case PRE_DEC:
14265
          if (emit)
14266
            {
14267
              if (TARGET_LDRD)
14268
                output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14269
              else
14270
                output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14271
            }
14272
          break;
14273
 
14274
        case POST_INC:
14275
          if (emit)
14276
            {
14277
              if (TARGET_LDRD)
14278
                output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14279
              else
14280
                output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14281
            }
14282
          break;
14283
 
14284
        case POST_DEC:
14285
          gcc_assert (TARGET_LDRD);
14286
          if (emit)
14287
            output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14288
          break;
14289
 
14290
        case PRE_MODIFY:
14291
        case POST_MODIFY:
14292
          otherops[0] = operands[1];
14293
          otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14294
          otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14295
 
14296
          /* IWMMXT allows offsets larger than ldrd can handle,
14297
             fix these up with a pair of ldr.  */
14298
          if (!TARGET_THUMB2
14299
              && GET_CODE (otherops[2]) == CONST_INT
14300
              && (INTVAL(otherops[2]) <= -256
14301
                  || INTVAL(otherops[2]) >= 256))
14302
            {
14303
              if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14304
                {
14305
                  if (emit)
14306
                    {
14307
                      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14308
                      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14309
                    }
14310
                  if (count)
14311
                    *count = 2;
14312
                }
14313
              else
14314
                {
14315
                  if (emit)
14316
                    {
14317
                      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14318
                      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14319
                    }
14320
                  if (count)
14321
                    *count = 2;
14322
                }
14323
            }
14324
          else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14325
            {
14326
              if (emit)
14327
                output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14328
            }
14329
          else
14330
            {
14331
              if (emit)
14332
                output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14333
            }
14334
          break;
14335
 
14336
        case PLUS:
14337
          otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14338
          if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14339
            {
14340
              switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14341
                {
14342
                case -8:
14343
                  if (emit)
14344
                    output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14345
                  return "";
14346
 
14347
                case -4:
14348
                  if (TARGET_THUMB2)
14349
                    break;
14350
                  if (emit)
14351
                    output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14352
                  return "";
14353
 
14354
                case 4:
14355
                  if (TARGET_THUMB2)
14356
                    break;
14357
                  if (emit)
14358
                    output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14359
                  return "";
14360
                }
14361
            }
14362
          if (TARGET_LDRD
14363
              && (GET_CODE (otherops[2]) == REG
14364
                  || TARGET_THUMB2
14365
                  || (GET_CODE (otherops[2]) == CONST_INT
14366
                      && INTVAL (otherops[2]) > -256
14367
                      && INTVAL (otherops[2]) < 256)))
14368
            {
14369
              otherops[0] = operands[1];
14370
              otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14371
              if (emit)
14372
                output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14373
              return "";
14374
            }
14375
          /* Fall through */
14376
 
14377
        default:
14378
          otherops[0] = adjust_address (operands[0], SImode, 4);
14379
          otherops[1] = operands[1];
14380
          if (emit)
14381
            {
14382
              output_asm_insn ("str%?\t%1, %0", operands);
14383
              output_asm_insn ("str%?\t%H1, %0", otherops);
14384
            }
14385
          if (count)
14386
            *count = 2;
14387
        }
14388
    }
14389
 
14390
  return "";
14391
}
14392
 
14393
/* Output a move, load or store for quad-word vectors in ARM registers.  Only
14394
   handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
14395
 
14396
const char *
14397
output_move_quad (rtx *operands)
14398
{
14399
  if (REG_P (operands[0]))
14400
    {
14401
      /* Load, or reg->reg move.  */
14402
 
14403
      if (MEM_P (operands[1]))
14404
        {
14405
          switch (GET_CODE (XEXP (operands[1], 0)))
14406
            {
14407
            case REG:
14408
              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14409
              break;
14410
 
14411
            case LABEL_REF:
14412
            case CONST:
14413
              output_asm_insn ("adr%?\t%0, %1", operands);
14414
              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14415
              break;
14416
 
14417
            default:
14418
              gcc_unreachable ();
14419
            }
14420
        }
14421
      else
14422
        {
14423
          rtx ops[2];
14424
          int dest, src, i;
14425
 
14426
          gcc_assert (REG_P (operands[1]));
14427
 
14428
          dest = REGNO (operands[0]);
14429
          src = REGNO (operands[1]);
14430
 
14431
          /* This seems pretty dumb, but hopefully GCC won't try to do it
14432
             very often.  */
14433
          if (dest < src)
14434
            for (i = 0; i < 4; i++)
14435
              {
14436
                ops[0] = gen_rtx_REG (SImode, dest + i);
14437
                ops[1] = gen_rtx_REG (SImode, src + i);
14438
                output_asm_insn ("mov%?\t%0, %1", ops);
14439
              }
14440
          else
14441
            for (i = 3; i >= 0; i--)
14442
              {
14443
                ops[0] = gen_rtx_REG (SImode, dest + i);
14444
                ops[1] = gen_rtx_REG (SImode, src + i);
14445
                output_asm_insn ("mov%?\t%0, %1", ops);
14446
              }
14447
        }
14448
    }
14449
  else
14450
    {
14451
      gcc_assert (MEM_P (operands[0]));
14452
      gcc_assert (REG_P (operands[1]));
14453
      gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14454
 
14455
      switch (GET_CODE (XEXP (operands[0], 0)))
14456
        {
14457
        case REG:
14458
          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14459
          break;
14460
 
14461
        default:
14462
          gcc_unreachable ();
14463
        }
14464
    }
14465
 
14466
  return "";
14467
}
14468
 
14469
/* Output a VFP load or store instruction.  */
14470
 
14471
const char *
14472
output_move_vfp (rtx *operands)
14473
{
14474
  rtx reg, mem, addr, ops[2];
14475
  int load = REG_P (operands[0]);
14476
  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14477
  int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14478
  const char *templ;
14479
  char buff[50];
14480
  enum machine_mode mode;
14481
 
14482
  reg = operands[!load];
14483
  mem = operands[load];
14484
 
14485
  mode = GET_MODE (reg);
14486
 
14487
  gcc_assert (REG_P (reg));
14488
  gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14489
  gcc_assert (mode == SFmode
14490
              || mode == DFmode
14491
              || mode == SImode
14492
              || mode == DImode
14493
              || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14494
  gcc_assert (MEM_P (mem));
14495
 
14496
  addr = XEXP (mem, 0);
14497
 
14498
  switch (GET_CODE (addr))
14499
    {
14500
    case PRE_DEC:
14501
      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14502
      ops[0] = XEXP (addr, 0);
14503
      ops[1] = reg;
14504
      break;
14505
 
14506
    case POST_INC:
14507
      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14508
      ops[0] = XEXP (addr, 0);
14509
      ops[1] = reg;
14510
      break;
14511
 
14512
    default:
14513
      templ = "f%s%c%%?\t%%%s0, %%1%s";
14514
      ops[0] = reg;
14515
      ops[1] = mem;
14516
      break;
14517
    }
14518
 
14519
  sprintf (buff, templ,
14520
           load ? "ld" : "st",
14521
           dp ? 'd' : 's',
14522
           dp ? "P" : "",
14523
           integer_p ? "\t%@ int" : "");
14524
  output_asm_insn (buff, ops);
14525
 
14526
  return "";
14527
}
14528
 
14529
/* Output a Neon quad-word load or store, or a load or store for
14530
   larger structure modes.
14531
 
14532
   WARNING: The ordering of elements is weird in big-endian mode,
14533
   because we use VSTM, as required by the EABI.  GCC RTL defines
14534
   element ordering based on in-memory order.  This can be differ
14535
   from the architectural ordering of elements within a NEON register.
14536
   The intrinsics defined in arm_neon.h use the NEON register element
14537
   ordering, not the GCC RTL element ordering.
14538
 
14539
   For example, the in-memory ordering of a big-endian a quadword
14540
   vector with 16-bit elements when stored from register pair {d0,d1}
14541
   will be (lowest address first, d0[N] is NEON register element N):
14542
 
14543
     [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14544
 
14545
   When necessary, quadword registers (dN, dN+1) are moved to ARM
14546
   registers from rN in the order:
14547
 
14548
     dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14549
 
14550
   So that STM/LDM can be used on vectors in ARM registers, and the
14551
   same memory layout will result as if VSTM/VLDM were used.  */
14552
 
14553
const char *
14554
output_move_neon (rtx *operands)
14555
{
14556
  rtx reg, mem, addr, ops[2];
14557
  int regno, load = REG_P (operands[0]);
14558
  const char *templ;
14559
  char buff[50];
14560
  enum machine_mode mode;
14561
 
14562
  reg = operands[!load];
14563
  mem = operands[load];
14564
 
14565
  mode = GET_MODE (reg);
14566
 
14567
  gcc_assert (REG_P (reg));
14568
  regno = REGNO (reg);
14569
  gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14570
              || NEON_REGNO_OK_FOR_QUAD (regno));
14571
  gcc_assert (VALID_NEON_DREG_MODE (mode)
14572
              || VALID_NEON_QREG_MODE (mode)
14573
              || VALID_NEON_STRUCT_MODE (mode));
14574
  gcc_assert (MEM_P (mem));
14575
 
14576
  addr = XEXP (mem, 0);
14577
 
14578
  /* Strip off const from addresses like (const (plus (...))).  */
14579
  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14580
    addr = XEXP (addr, 0);
14581
 
14582
  switch (GET_CODE (addr))
14583
    {
14584
    case POST_INC:
14585
      templ = "v%smia%%?\t%%0!, %%h1";
14586
      ops[0] = XEXP (addr, 0);
14587
      ops[1] = reg;
14588
      break;
14589
 
14590
    case PRE_DEC:
14591
      /* FIXME: We should be using vld1/vst1 here in BE mode?  */
14592
      templ = "v%smdb%%?\t%%0!, %%h1";
14593
      ops[0] = XEXP (addr, 0);
14594
      ops[1] = reg;
14595
      break;
14596
 
14597
    case POST_MODIFY:
14598
      /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
14599
      gcc_unreachable ();
14600
 
14601
    case LABEL_REF:
14602
    case PLUS:
14603
      {
14604
        int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14605
        int i;
14606
        int overlap = -1;
14607
        for (i = 0; i < nregs; i++)
14608
          {
14609
            /* We're only using DImode here because it's a convenient size.  */
14610
            ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14611
            ops[1] = adjust_address (mem, DImode, 8 * i);
14612
            if (reg_overlap_mentioned_p (ops[0], mem))
14613
              {
14614
                gcc_assert (overlap == -1);
14615
                overlap = i;
14616
              }
14617
            else
14618
              {
14619
                sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14620
                output_asm_insn (buff, ops);
14621
              }
14622
          }
14623
        if (overlap != -1)
14624
          {
14625
            ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14626
            ops[1] = adjust_address (mem, SImode, 8 * overlap);
14627
            sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14628
            output_asm_insn (buff, ops);
14629
          }
14630
 
14631
        return "";
14632
      }
14633
 
14634
    default:
14635
      templ = "v%smia%%?\t%%m0, %%h1";
14636
      ops[0] = mem;
14637
      ops[1] = reg;
14638
    }
14639
 
14640
  sprintf (buff, templ, load ? "ld" : "st");
14641
  output_asm_insn (buff, ops);
14642
 
14643
  return "";
14644
}
14645
 
14646
/* Compute and return the length of neon_mov<mode>, where <mode> is
14647
   one of VSTRUCT modes: EI, OI, CI or XI.  */
14648
int
14649
arm_attr_length_move_neon (rtx insn)
14650
{
14651
  rtx reg, mem, addr;
14652
  int load;
14653
  enum machine_mode mode;
14654
 
14655
  extract_insn_cached (insn);
14656
 
14657
  if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14658
    {
14659
      mode = GET_MODE (recog_data.operand[0]);
14660
      switch (mode)
14661
        {
14662
        case EImode:
14663
        case OImode:
14664
          return 8;
14665
        case CImode:
14666
          return 12;
14667
        case XImode:
14668
          return 16;
14669
        default:
14670
          gcc_unreachable ();
14671
        }
14672
    }
14673
 
14674
  load = REG_P (recog_data.operand[0]);
14675
  reg = recog_data.operand[!load];
14676
  mem = recog_data.operand[load];
14677
 
14678
  gcc_assert (MEM_P (mem));
14679
 
14680
  mode = GET_MODE (reg);
14681
  addr = XEXP (mem, 0);
14682
 
14683
  /* Strip off const from addresses like (const (plus (...))).  */
14684
  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14685
    addr = XEXP (addr, 0);
14686
 
14687
  if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14688
    {
14689
      int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14690
      return insns * 4;
14691
    }
14692
  else
14693
    return 4;
14694
}
14695
 
14696
/* Return nonzero if the offset in the address is an immediate.  Otherwise,
14697
   return zero.  */
14698
 
14699
int
14700
arm_address_offset_is_imm (rtx insn)
14701
{
14702
  rtx mem, addr;
14703
 
14704
  extract_insn_cached (insn);
14705
 
14706
  if (REG_P (recog_data.operand[0]))
14707
    return 0;
14708
 
14709
  mem = recog_data.operand[0];
14710
 
14711
  gcc_assert (MEM_P (mem));
14712
 
14713
  addr = XEXP (mem, 0);
14714
 
14715
  if (GET_CODE (addr) == REG
14716
      || (GET_CODE (addr) == PLUS
14717
          && GET_CODE (XEXP (addr, 0)) == REG
14718
          && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14719
    return 1;
14720
  else
14721
    return 0;
14722
}
14723
 
14724
/* Output an ADD r, s, #n where n may be too big for one instruction.
14725
   If adding zero to one register, output nothing.  */
14726
const char *
14727
output_add_immediate (rtx *operands)
14728
{
14729
  HOST_WIDE_INT n = INTVAL (operands[2]);
14730
 
14731
  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14732
    {
14733
      if (n < 0)
14734
        output_multi_immediate (operands,
14735
                                "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14736
                                -n);
14737
      else
14738
        output_multi_immediate (operands,
14739
                                "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14740
                                n);
14741
    }
14742
 
14743
  return "";
14744
}
14745
 
14746
/* Output a multiple immediate operation.
14747
   OPERANDS is the vector of operands referred to in the output patterns.
14748
   INSTR1 is the output pattern to use for the first constant.
14749
   INSTR2 is the output pattern to use for subsequent constants.
14750
   IMMED_OP is the index of the constant slot in OPERANDS.
14751
   N is the constant value.  */
14752
static const char *
14753
output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14754
                        int immed_op, HOST_WIDE_INT n)
14755
{
14756
#if HOST_BITS_PER_WIDE_INT > 32
14757
  n &= 0xffffffff;
14758
#endif
14759
 
14760
  if (n == 0)
14761
    {
14762
      /* Quick and easy output.  */
14763
      operands[immed_op] = const0_rtx;
14764
      output_asm_insn (instr1, operands);
14765
    }
14766
  else
14767
    {
14768
      int i;
14769
      const char * instr = instr1;
14770
 
14771
      /* Note that n is never zero here (which would give no output).  */
14772
      for (i = 0; i < 32; i += 2)
14773
        {
14774
          if (n & (3 << i))
14775
            {
14776
              operands[immed_op] = GEN_INT (n & (255 << i));
14777
              output_asm_insn (instr, operands);
14778
              instr = instr2;
14779
              i += 6;
14780
            }
14781
        }
14782
    }
14783
 
14784
  return "";
14785
}
14786
 
14787
/* Return the name of a shifter operation.  */
14788
static const char *
14789
arm_shift_nmem(enum rtx_code code)
14790
{
14791
  switch (code)
14792
    {
14793
    case ASHIFT:
14794
      return ARM_LSL_NAME;
14795
 
14796
    case ASHIFTRT:
14797
      return "asr";
14798
 
14799
    case LSHIFTRT:
14800
      return "lsr";
14801
 
14802
    case ROTATERT:
14803
      return "ror";
14804
 
14805
    default:
14806
      abort();
14807
    }
14808
}
14809
 
14810
/* Return the appropriate ARM instruction for the operation code.
14811
   The returned result should not be overwritten.  OP is the rtx of the
14812
   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14813
   was shifted.  */
14814
const char *
14815
arithmetic_instr (rtx op, int shift_first_arg)
14816
{
14817
  switch (GET_CODE (op))
14818
    {
14819
    case PLUS:
14820
      return "add";
14821
 
14822
    case MINUS:
14823
      return shift_first_arg ? "rsb" : "sub";
14824
 
14825
    case IOR:
14826
      return "orr";
14827
 
14828
    case XOR:
14829
      return "eor";
14830
 
14831
    case AND:
14832
      return "and";
14833
 
14834
    case ASHIFT:
14835
    case ASHIFTRT:
14836
    case LSHIFTRT:
14837
    case ROTATERT:
14838
      return arm_shift_nmem(GET_CODE(op));
14839
 
14840
    default:
14841
      gcc_unreachable ();
14842
    }
14843
}
14844
 
14845
/* Ensure valid constant shifts and return the appropriate shift mnemonic
14846
   for the operation code.  The returned result should not be overwritten.
14847
   OP is the rtx code of the shift.
14848
   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14849
   shift.  */
14850
static const char *
14851
shift_op (rtx op, HOST_WIDE_INT *amountp)
14852
{
14853
  const char * mnem;
14854
  enum rtx_code code = GET_CODE (op);
14855
 
14856
  switch (GET_CODE (XEXP (op, 1)))
14857
    {
14858
    case REG:
14859
    case SUBREG:
14860
      *amountp = -1;
14861
      break;
14862
 
14863
    case CONST_INT:
14864
      *amountp = INTVAL (XEXP (op, 1));
14865
      break;
14866
 
14867
    default:
14868
      gcc_unreachable ();
14869
    }
14870
 
14871
  switch (code)
14872
    {
14873
    case ROTATE:
14874
      gcc_assert (*amountp != -1);
14875
      *amountp = 32 - *amountp;
14876
      code = ROTATERT;
14877
 
14878
      /* Fall through.  */
14879
 
14880
    case ASHIFT:
14881
    case ASHIFTRT:
14882
    case LSHIFTRT:
14883
    case ROTATERT:
14884
      mnem = arm_shift_nmem(code);
14885
      break;
14886
 
14887
    case MULT:
14888
      /* We never have to worry about the amount being other than a
14889
         power of 2, since this case can never be reloaded from a reg.  */
14890
      gcc_assert (*amountp != -1);
14891
      *amountp = int_log2 (*amountp);
14892
      return ARM_LSL_NAME;
14893
 
14894
    default:
14895
      gcc_unreachable ();
14896
    }
14897
 
14898
  if (*amountp != -1)
14899
    {
14900
      /* This is not 100% correct, but follows from the desire to merge
14901
         multiplication by a power of 2 with the recognizer for a
14902
         shift.  >=32 is not a valid shift for "lsl", so we must try and
14903
         output a shift that produces the correct arithmetical result.
14904
         Using lsr #32 is identical except for the fact that the carry bit
14905
         is not set correctly if we set the flags; but we never use the
14906
         carry bit from such an operation, so we can ignore that.  */
14907
      if (code == ROTATERT)
14908
        /* Rotate is just modulo 32.  */
14909
        *amountp &= 31;
14910
      else if (*amountp != (*amountp & 31))
14911
        {
14912
          if (code == ASHIFT)
14913
            mnem = "lsr";
14914
          *amountp = 32;
14915
        }
14916
 
14917
      /* Shifts of 0 are no-ops.  */
14918
      if (*amountp == 0)
14919
        return NULL;
14920
    }
14921
 
14922
  return mnem;
14923
}
14924
 
14925
/* Obtain the shift from the POWER of two.  */
14926
 
14927
static HOST_WIDE_INT
14928
int_log2 (HOST_WIDE_INT power)
14929
{
14930
  HOST_WIDE_INT shift = 0;
14931
 
14932
  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14933
    {
14934
      gcc_assert (shift <= 31);
14935
      shift++;
14936
    }
14937
 
14938
  return shift;
14939
}
14940
 
14941
/* Output a .ascii pseudo-op, keeping track of lengths.  This is
14942
   because /bin/as is horribly restrictive.  The judgement about
14943
   whether or not each character is 'printable' (and can be output as
14944
   is) or not (and must be printed with an octal escape) must be made
14945
   with reference to the *host* character set -- the situation is
14946
   similar to that discussed in the comments above pp_c_char in
14947
   c-pretty-print.c.  */
14948
 
14949
#define MAX_ASCII_LEN 51
14950
 
14951
void
14952
output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14953
{
14954
  int i;
14955
  int len_so_far = 0;
14956
 
14957
  fputs ("\t.ascii\t\"", stream);
14958
 
14959
  for (i = 0; i < len; i++)
14960
    {
14961
      int c = p[i];
14962
 
14963
      if (len_so_far >= MAX_ASCII_LEN)
14964
        {
14965
          fputs ("\"\n\t.ascii\t\"", stream);
14966
          len_so_far = 0;
14967
        }
14968
 
14969
      if (ISPRINT (c))
14970
        {
14971
          if (c == '\\' || c == '\"')
14972
            {
14973
              putc ('\\', stream);
14974
              len_so_far++;
14975
            }
14976
          putc (c, stream);
14977
          len_so_far++;
14978
        }
14979
      else
14980
        {
14981
          fprintf (stream, "\\%03o", c);
14982
          len_so_far += 4;
14983
        }
14984
    }
14985
 
14986
  fputs ("\"\n", stream);
14987
}
14988
 
14989
/* Compute the register save mask for registers 0 through 12
14990
   inclusive.  This code is used by arm_compute_save_reg_mask.  */
14991
 
14992
static unsigned long
14993
arm_compute_save_reg0_reg12_mask (void)
14994
{
14995
  unsigned long func_type = arm_current_func_type ();
14996
  unsigned long save_reg_mask = 0;
14997
  unsigned int reg;
14998
 
14999
  if (IS_INTERRUPT (func_type))
15000
    {
15001
      unsigned int max_reg;
15002
      /* Interrupt functions must not corrupt any registers,
15003
         even call clobbered ones.  If this is a leaf function
15004
         we can just examine the registers used by the RTL, but
15005
         otherwise we have to assume that whatever function is
15006
         called might clobber anything, and so we have to save
15007
         all the call-clobbered registers as well.  */
15008
      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15009
        /* FIQ handlers have registers r8 - r12 banked, so
15010
           we only need to check r0 - r7, Normal ISRs only
15011
           bank r14 and r15, so we must check up to r12.
15012
           r13 is the stack pointer which is always preserved,
15013
           so we do not need to consider it here.  */
15014
        max_reg = 7;
15015
      else
15016
        max_reg = 12;
15017
 
15018
      for (reg = 0; reg <= max_reg; reg++)
15019
        if (df_regs_ever_live_p (reg)
15020
            || (! current_function_is_leaf && call_used_regs[reg]))
15021
          save_reg_mask |= (1 << reg);
15022
 
15023
      /* Also save the pic base register if necessary.  */
15024
      if (flag_pic
15025
          && !TARGET_SINGLE_PIC_BASE
15026
          && arm_pic_register != INVALID_REGNUM
15027
          && crtl->uses_pic_offset_table)
15028
        save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15029
    }
15030
  else if (IS_VOLATILE(func_type))
15031
    {
15032
      /* For noreturn functions we historically omitted register saves
15033
         altogether.  However this really messes up debugging.  As a
15034
         compromise save just the frame pointers.  Combined with the link
15035
         register saved elsewhere this should be sufficient to get
15036
         a backtrace.  */
15037
      if (frame_pointer_needed)
15038
        save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15039
      if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15040
        save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15041
      if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15042
        save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15043
    }
15044
  else
15045
    {
15046
      /* In the normal case we only need to save those registers
15047
         which are call saved and which are used by this function.  */
15048
      for (reg = 0; reg <= 11; reg++)
15049
        if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15050
          save_reg_mask |= (1 << reg);
15051
 
15052
      /* Handle the frame pointer as a special case.  */
15053
      if (frame_pointer_needed)
15054
        save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15055
 
15056
      /* If we aren't loading the PIC register,
15057
         don't stack it even though it may be live.  */
15058
      if (flag_pic
15059
          && !TARGET_SINGLE_PIC_BASE
15060
          && arm_pic_register != INVALID_REGNUM
15061
          && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15062
              || crtl->uses_pic_offset_table))
15063
        save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15064
 
15065
      /* The prologue will copy SP into R0, so save it.  */
15066
      if (IS_STACKALIGN (func_type))
15067
        save_reg_mask |= 1;
15068
    }
15069
 
15070
  /* Save registers so the exception handler can modify them.  */
15071
  if (crtl->calls_eh_return)
15072
    {
15073
      unsigned int i;
15074
 
15075
      for (i = 0; ; i++)
15076
        {
15077
          reg = EH_RETURN_DATA_REGNO (i);
15078
          if (reg == INVALID_REGNUM)
15079
            break;
15080
          save_reg_mask |= 1 << reg;
15081
        }
15082
    }
15083
 
15084
  return save_reg_mask;
15085
}
15086
 
15087
 
15088
/* Compute the number of bytes used to store the static chain register on the
15089
   stack, above the stack frame. We need to know this accurately to get the
15090
   alignment of the rest of the stack frame correct. */
15091
 
15092
static int arm_compute_static_chain_stack_bytes (void)
15093
{
15094
  unsigned long func_type = arm_current_func_type ();
15095
  int static_chain_stack_bytes = 0;
15096
 
15097
  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15098
      IS_NESTED (func_type) &&
15099
      df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15100
    static_chain_stack_bytes = 4;
15101
 
15102
  return static_chain_stack_bytes;
15103
}
15104
 
15105
 
15106
/* Compute a bit mask of which registers need to be
15107
   saved on the stack for the current function.
15108
   This is used by arm_get_frame_offsets, which may add extra registers.  */
15109
 
15110
static unsigned long
15111
arm_compute_save_reg_mask (void)
15112
{
15113
  unsigned int save_reg_mask = 0;
15114
  unsigned long func_type = arm_current_func_type ();
15115
  unsigned int reg;
15116
 
15117
  if (IS_NAKED (func_type))
15118
    /* This should never really happen.  */
15119
    return 0;
15120
 
15121
  /* If we are creating a stack frame, then we must save the frame pointer,
15122
     IP (which will hold the old stack pointer), LR and the PC.  */
15123
  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15124
    save_reg_mask |=
15125
      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15126
      | (1 << IP_REGNUM)
15127
      | (1 << LR_REGNUM)
15128
      | (1 << PC_REGNUM);
15129
 
15130
  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15131
 
15132
  /* Decide if we need to save the link register.
15133
     Interrupt routines have their own banked link register,
15134
     so they never need to save it.
15135
     Otherwise if we do not use the link register we do not need to save
15136
     it.  If we are pushing other registers onto the stack however, we
15137
     can save an instruction in the epilogue by pushing the link register
15138
     now and then popping it back into the PC.  This incurs extra memory
15139
     accesses though, so we only do it when optimizing for size, and only
15140
     if we know that we will not need a fancy return sequence.  */
15141
  if (df_regs_ever_live_p (LR_REGNUM)
15142
      || (save_reg_mask
15143
          && optimize_size
15144
          && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15145
          && !crtl->calls_eh_return))
15146
    save_reg_mask |= 1 << LR_REGNUM;
15147
 
15148
  if (cfun->machine->lr_save_eliminated)
15149
    save_reg_mask &= ~ (1 << LR_REGNUM);
15150
 
15151
  if (TARGET_REALLY_IWMMXT
15152
      && ((bit_count (save_reg_mask)
15153
           + ARM_NUM_INTS (crtl->args.pretend_args_size +
15154
                           arm_compute_static_chain_stack_bytes())
15155
           ) % 2) != 0)
15156
    {
15157
      /* The total number of registers that are going to be pushed
15158
         onto the stack is odd.  We need to ensure that the stack
15159
         is 64-bit aligned before we start to save iWMMXt registers,
15160
         and also before we start to create locals.  (A local variable
15161
         might be a double or long long which we will load/store using
15162
         an iWMMXt instruction).  Therefore we need to push another
15163
         ARM register, so that the stack will be 64-bit aligned.  We
15164
         try to avoid using the arg registers (r0 -r3) as they might be
15165
         used to pass values in a tail call.  */
15166
      for (reg = 4; reg <= 12; reg++)
15167
        if ((save_reg_mask & (1 << reg)) == 0)
15168
          break;
15169
 
15170
      if (reg <= 12)
15171
        save_reg_mask |= (1 << reg);
15172
      else
15173
        {
15174
          cfun->machine->sibcall_blocked = 1;
15175
          save_reg_mask |= (1 << 3);
15176
        }
15177
    }
15178
 
15179
  /* We may need to push an additional register for use initializing the
15180
     PIC base register.  */
15181
  if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15182
      && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15183
    {
15184
      reg = thumb_find_work_register (1 << 4);
15185
      if (!call_used_regs[reg])
15186
        save_reg_mask |= (1 << reg);
15187
    }
15188
 
15189
  return save_reg_mask;
15190
}
15191
 
15192
 
15193
/* Compute a bit mask of which registers need to be
15194
   saved on the stack for the current function.  */
15195
static unsigned long
15196
thumb1_compute_save_reg_mask (void)
15197
{
15198
  unsigned long mask;
15199
  unsigned reg;
15200
 
15201
  mask = 0;
15202
  for (reg = 0; reg < 12; reg ++)
15203
    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15204
      mask |= 1 << reg;
15205
 
15206
  if (flag_pic
15207
      && !TARGET_SINGLE_PIC_BASE
15208
      && arm_pic_register != INVALID_REGNUM
15209
      && crtl->uses_pic_offset_table)
15210
    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15211
 
15212
  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
15213
  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15214
    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15215
 
15216
  /* LR will also be pushed if any lo regs are pushed.  */
15217
  if (mask & 0xff || thumb_force_lr_save ())
15218
    mask |= (1 << LR_REGNUM);
15219
 
15220
  /* Make sure we have a low work register if we need one.
15221
     We will need one if we are going to push a high register,
15222
     but we are not currently intending to push a low register.  */
15223
  if ((mask & 0xff) == 0
15224
      && ((mask & 0x0f00) || TARGET_BACKTRACE))
15225
    {
15226
      /* Use thumb_find_work_register to choose which register
15227
         we will use.  If the register is live then we will
15228
         have to push it.  Use LAST_LO_REGNUM as our fallback
15229
         choice for the register to select.  */
15230
      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15231
      /* Make sure the register returned by thumb_find_work_register is
15232
         not part of the return value.  */
15233
      if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15234
        reg = LAST_LO_REGNUM;
15235
 
15236
      if (! call_used_regs[reg])
15237
        mask |= 1 << reg;
15238
    }
15239
 
15240
  /* The 504 below is 8 bytes less than 512 because there are two possible
15241
     alignment words.  We can't tell here if they will be present or not so we
15242
     have to play it safe and assume that they are. */
15243
  if ((CALLER_INTERWORKING_SLOT_SIZE +
15244
       ROUND_UP_WORD (get_frame_size ()) +
15245
       crtl->outgoing_args_size) >= 504)
15246
    {
15247
      /* This is the same as the code in thumb1_expand_prologue() which
15248
         determines which register to use for stack decrement. */
15249
      for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15250
        if (mask & (1 << reg))
15251
          break;
15252
 
15253
      if (reg > LAST_LO_REGNUM)
15254
        {
15255
          /* Make sure we have a register available for stack decrement. */
15256
          mask |= 1 << LAST_LO_REGNUM;
15257
        }
15258
    }
15259
 
15260
  return mask;
15261
}
15262
 
15263
 
15264
/* Return the number of bytes required to save VFP registers.  */
15265
static int
15266
arm_get_vfp_saved_size (void)
15267
{
15268
  unsigned int regno;
15269
  int count;
15270
  int saved;
15271
 
15272
  saved = 0;
15273
  /* Space for saved VFP registers.  */
15274
  if (TARGET_HARD_FLOAT && TARGET_VFP)
15275
    {
15276
      count = 0;
15277
      for (regno = FIRST_VFP_REGNUM;
15278
           regno < LAST_VFP_REGNUM;
15279
           regno += 2)
15280
        {
15281
          if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15282
              && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15283
            {
15284
              if (count > 0)
15285
                {
15286
                  /* Workaround ARM10 VFPr1 bug.  */
15287
                  if (count == 2 && !arm_arch6)
15288
                    count++;
15289
                  saved += count * 8;
15290
                }
15291
              count = 0;
15292
            }
15293
          else
15294
            count++;
15295
        }
15296
      if (count > 0)
15297
        {
15298
          if (count == 2 && !arm_arch6)
15299
            count++;
15300
          saved += count * 8;
15301
        }
15302
    }
15303
  return saved;
15304
}
15305
 
15306
 
15307
/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
15308
   everything bar the final return instruction.  */
15309
const char *
15310
output_return_instruction (rtx operand, int really_return, int reverse)
15311
{
15312
  char conditional[10];
15313
  char instr[100];
15314
  unsigned reg;
15315
  unsigned long live_regs_mask;
15316
  unsigned long func_type;
15317
  arm_stack_offsets *offsets;
15318
 
15319
  func_type = arm_current_func_type ();
15320
 
15321
  if (IS_NAKED (func_type))
15322
    return "";
15323
 
15324
  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15325
    {
15326
      /* If this function was declared non-returning, and we have
15327
         found a tail call, then we have to trust that the called
15328
         function won't return.  */
15329
      if (really_return)
15330
        {
15331
          rtx ops[2];
15332
 
15333
          /* Otherwise, trap an attempted return by aborting.  */
15334
          ops[0] = operand;
15335
          ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15336
                                       : "abort");
15337
          assemble_external_libcall (ops[1]);
15338
          output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15339
        }
15340
 
15341
      return "";
15342
    }
15343
 
15344
  gcc_assert (!cfun->calls_alloca || really_return);
15345
 
15346
  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15347
 
15348
  cfun->machine->return_used_this_function = 1;
15349
 
15350
  offsets = arm_get_frame_offsets ();
15351
  live_regs_mask = offsets->saved_regs_mask;
15352
 
15353
  if (live_regs_mask)
15354
    {
15355
      const char * return_reg;
15356
 
15357
      /* If we do not have any special requirements for function exit
15358
         (e.g. interworking) then we can load the return address
15359
         directly into the PC.  Otherwise we must load it into LR.  */
15360
      if (really_return
15361
          && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15362
        return_reg = reg_names[PC_REGNUM];
15363
      else
15364
        return_reg = reg_names[LR_REGNUM];
15365
 
15366
      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15367
        {
15368
          /* There are three possible reasons for the IP register
15369
             being saved.  1) a stack frame was created, in which case
15370
             IP contains the old stack pointer, or 2) an ISR routine
15371
             corrupted it, or 3) it was saved to align the stack on
15372
             iWMMXt.  In case 1, restore IP into SP, otherwise just
15373
             restore IP.  */
15374
          if (frame_pointer_needed)
15375
            {
15376
              live_regs_mask &= ~ (1 << IP_REGNUM);
15377
              live_regs_mask |=   (1 << SP_REGNUM);
15378
            }
15379
          else
15380
            gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15381
        }
15382
 
15383
      /* On some ARM architectures it is faster to use LDR rather than
15384
         LDM to load a single register.  On other architectures, the
15385
         cost is the same.  In 26 bit mode, or for exception handlers,
15386
         we have to use LDM to load the PC so that the CPSR is also
15387
         restored.  */
15388
      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15389
        if (live_regs_mask == (1U << reg))
15390
          break;
15391
 
15392
      if (reg <= LAST_ARM_REGNUM
15393
          && (reg != LR_REGNUM
15394
              || ! really_return
15395
              || ! IS_INTERRUPT (func_type)))
15396
        {
15397
          sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15398
                   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15399
        }
15400
      else
15401
        {
15402
          char *p;
15403
          int first = 1;
15404
 
15405
          /* Generate the load multiple instruction to restore the
15406
             registers.  Note we can get here, even if
15407
             frame_pointer_needed is true, but only if sp already
15408
             points to the base of the saved core registers.  */
15409
          if (live_regs_mask & (1 << SP_REGNUM))
15410
            {
15411
              unsigned HOST_WIDE_INT stack_adjust;
15412
 
15413
              stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15414
              gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15415
 
15416
              if (stack_adjust && arm_arch5 && TARGET_ARM)
15417
                if (TARGET_UNIFIED_ASM)
15418
                  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15419
                else
15420
                  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15421
              else
15422
                {
15423
                  /* If we can't use ldmib (SA110 bug),
15424
                     then try to pop r3 instead.  */
15425
                  if (stack_adjust)
15426
                    live_regs_mask |= 1 << 3;
15427
 
15428
                  if (TARGET_UNIFIED_ASM)
15429
                    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15430
                  else
15431
                    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15432
                }
15433
            }
15434
          else
15435
            if (TARGET_UNIFIED_ASM)
15436
              sprintf (instr, "pop%s\t{", conditional);
15437
            else
15438
              sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15439
 
15440
          p = instr + strlen (instr);
15441
 
15442
          for (reg = 0; reg <= SP_REGNUM; reg++)
15443
            if (live_regs_mask & (1 << reg))
15444
              {
15445
                int l = strlen (reg_names[reg]);
15446
 
15447
                if (first)
15448
                  first = 0;
15449
                else
15450
                  {
15451
                    memcpy (p, ", ", 2);
15452
                    p += 2;
15453
                  }
15454
 
15455
                memcpy (p, "%|", 2);
15456
                memcpy (p + 2, reg_names[reg], l);
15457
                p += l + 2;
15458
              }
15459
 
15460
          if (live_regs_mask & (1 << LR_REGNUM))
15461
            {
15462
              sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15463
              /* If returning from an interrupt, restore the CPSR.  */
15464
              if (IS_INTERRUPT (func_type))
15465
                strcat (p, "^");
15466
            }
15467
          else
15468
            strcpy (p, "}");
15469
        }
15470
 
15471
      output_asm_insn (instr, & operand);
15472
 
15473
      /* See if we need to generate an extra instruction to
15474
         perform the actual function return.  */
15475
      if (really_return
15476
          && func_type != ARM_FT_INTERWORKED
15477
          && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15478
        {
15479
          /* The return has already been handled
15480
             by loading the LR into the PC.  */
15481
          really_return = 0;
15482
        }
15483
    }
15484
 
15485
  if (really_return)
15486
    {
15487
      switch ((int) ARM_FUNC_TYPE (func_type))
15488
        {
15489
        case ARM_FT_ISR:
15490
        case ARM_FT_FIQ:
15491
          /* ??? This is wrong for unified assembly syntax.  */
15492
          sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15493
          break;
15494
 
15495
        case ARM_FT_INTERWORKED:
15496
          sprintf (instr, "bx%s\t%%|lr", conditional);
15497
          break;
15498
 
15499
        case ARM_FT_EXCEPTION:
15500
          /* ??? This is wrong for unified assembly syntax.  */
15501
          sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15502
          break;
15503
 
15504
        default:
15505
          /* Use bx if it's available.  */
15506
          if (arm_arch5 || arm_arch4t)
15507
            sprintf (instr, "bx%s\t%%|lr", conditional);
15508
          else
15509
            sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15510
          break;
15511
        }
15512
 
15513
      output_asm_insn (instr, & operand);
15514
    }
15515
 
15516
  return "";
15517
}
15518
 
15519
/* Write the function name into the code section, directly preceding
15520
   the function prologue.
15521
 
15522
   Code will be output similar to this:
15523
     t0
15524
         .ascii "arm_poke_function_name", 0
15525
         .align
15526
     t1
15527
         .word 0xff000000 + (t1 - t0)
15528
     arm_poke_function_name
15529
         mov     ip, sp
15530
         stmfd   sp!, {fp, ip, lr, pc}
15531
         sub     fp, ip, #4
15532
 
15533
   When performing a stack backtrace, code can inspect the value
15534
   of 'pc' stored at 'fp' + 0.  If the trace function then looks
15535
   at location pc - 12 and the top 8 bits are set, then we know
15536
   that there is a function name embedded immediately preceding this
15537
   location and has length ((pc[-3]) & 0xff000000).
15538
 
15539
   We assume that pc is declared as a pointer to an unsigned long.
15540
 
15541
   It is of no benefit to output the function name if we are assembling
15542
   a leaf function.  These function types will not contain a stack
15543
   backtrace structure, therefore it is not possible to determine the
15544
   function name.  */
15545
void
15546
arm_poke_function_name (FILE *stream, const char *name)
15547
{
15548
  unsigned long alignlength;
15549
  unsigned long length;
15550
  rtx           x;
15551
 
15552
  length      = strlen (name) + 1;
15553
  alignlength = ROUND_UP_WORD (length);
15554
 
15555
  ASM_OUTPUT_ASCII (stream, name, length);
15556
  ASM_OUTPUT_ALIGN (stream, 2);
15557
  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15558
  assemble_aligned_integer (UNITS_PER_WORD, x);
15559
}
15560
 
15561
/* Place some comments into the assembler stream
15562
   describing the current function.  */
15563
static void
15564
arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15565
{
15566
  unsigned long func_type;
15567
 
15568
  /* ??? Do we want to print some of the below anyway?  */
15569
  if (TARGET_THUMB1)
15570
    return;
15571
 
15572
  /* Sanity check.  */
15573
  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15574
 
15575
  func_type = arm_current_func_type ();
15576
 
15577
  switch ((int) ARM_FUNC_TYPE (func_type))
15578
    {
15579
    default:
15580
    case ARM_FT_NORMAL:
15581
      break;
15582
    case ARM_FT_INTERWORKED:
15583
      asm_fprintf (f, "\t%@ Function supports interworking.\n");
15584
      break;
15585
    case ARM_FT_ISR:
15586
      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15587
      break;
15588
    case ARM_FT_FIQ:
15589
      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15590
      break;
15591
    case ARM_FT_EXCEPTION:
15592
      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15593
      break;
15594
    }
15595
 
15596
  if (IS_NAKED (func_type))
15597
    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15598
 
15599
  if (IS_VOLATILE (func_type))
15600
    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15601
 
15602
  if (IS_NESTED (func_type))
15603
    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15604
  if (IS_STACKALIGN (func_type))
15605
    asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15606
 
15607
  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15608
               crtl->args.size,
15609
               crtl->args.pretend_args_size, frame_size);
15610
 
15611
  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15612
               frame_pointer_needed,
15613
               cfun->machine->uses_anonymous_args);
15614
 
15615
  if (cfun->machine->lr_save_eliminated)
15616
    asm_fprintf (f, "\t%@ link register save eliminated.\n");
15617
 
15618
  if (crtl->calls_eh_return)
15619
    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15620
 
15621
}
15622
 
15623
const char *
15624
arm_output_epilogue (rtx sibling)
15625
{
15626
  int reg;
15627
  unsigned long saved_regs_mask;
15628
  unsigned long func_type;
15629
  /* Floats_offset is the offset from the "virtual" frame.  In an APCS
15630
     frame that is $fp + 4 for a non-variadic function.  */
15631
  int floats_offset = 0;
15632
  rtx operands[3];
15633
  FILE * f = asm_out_file;
15634
  unsigned int lrm_count = 0;
15635
  int really_return = (sibling == NULL);
15636
  int start_reg;
15637
  arm_stack_offsets *offsets;
15638
 
15639
  /* If we have already generated the return instruction
15640
     then it is futile to generate anything else.  */
15641
  if (use_return_insn (FALSE, sibling) &&
15642
      (cfun->machine->return_used_this_function != 0))
15643
    return "";
15644
 
15645
  func_type = arm_current_func_type ();
15646
 
15647
  if (IS_NAKED (func_type))
15648
    /* Naked functions don't have epilogues.  */
15649
    return "";
15650
 
15651
  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15652
    {
15653
      rtx op;
15654
 
15655
      /* A volatile function should never return.  Call abort.  */
15656
      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15657
      assemble_external_libcall (op);
15658
      output_asm_insn ("bl\t%a0", &op);
15659
 
15660
      return "";
15661
    }
15662
 
15663
  /* If we are throwing an exception, then we really must be doing a
15664
     return, so we can't tail-call.  */
15665
  gcc_assert (!crtl->calls_eh_return || really_return);
15666
 
15667
  offsets = arm_get_frame_offsets ();
15668
  saved_regs_mask = offsets->saved_regs_mask;
15669
 
15670
  if (TARGET_IWMMXT)
15671
    lrm_count = bit_count (saved_regs_mask);
15672
 
15673
  floats_offset = offsets->saved_args;
15674
  /* Compute how far away the floats will be.  */
15675
  for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15676
    if (saved_regs_mask & (1 << reg))
15677
      floats_offset += 4;
15678
 
15679
  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15680
    {
15681
      /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
15682
      int vfp_offset = offsets->frame;
15683
 
15684
      if (TARGET_FPA_EMU2)
15685
        {
15686
          for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15687
            if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15688
              {
15689
                floats_offset += 12;
15690
                asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15691
                             reg, FP_REGNUM, floats_offset - vfp_offset);
15692
              }
15693
        }
15694
      else
15695
        {
15696
          start_reg = LAST_FPA_REGNUM;
15697
 
15698
          for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15699
            {
15700
              if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15701
                {
15702
                  floats_offset += 12;
15703
 
15704
                  /* We can't unstack more than four registers at once.  */
15705
                  if (start_reg - reg == 3)
15706
                    {
15707
                      asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15708
                                   reg, FP_REGNUM, floats_offset - vfp_offset);
15709
                      start_reg = reg - 1;
15710
                    }
15711
                }
15712
              else
15713
                {
15714
                  if (reg != start_reg)
15715
                    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15716
                                 reg + 1, start_reg - reg,
15717
                                 FP_REGNUM, floats_offset - vfp_offset);
15718
                  start_reg = reg - 1;
15719
                }
15720
            }
15721
 
15722
          /* Just in case the last register checked also needs unstacking.  */
15723
          if (reg != start_reg)
15724
            asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15725
                         reg + 1, start_reg - reg,
15726
                         FP_REGNUM, floats_offset - vfp_offset);
15727
        }
15728
 
15729
      if (TARGET_HARD_FLOAT && TARGET_VFP)
15730
        {
15731
          int saved_size;
15732
 
15733
          /* The fldmd insns do not have base+offset addressing
15734
             modes, so we use IP to hold the address.  */
15735
          saved_size = arm_get_vfp_saved_size ();
15736
 
15737
          if (saved_size > 0)
15738
            {
15739
              floats_offset += saved_size;
15740
              asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15741
                           FP_REGNUM, floats_offset - vfp_offset);
15742
            }
15743
          start_reg = FIRST_VFP_REGNUM;
15744
          for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15745
            {
15746
              if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15747
                  && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15748
                {
15749
                  if (start_reg != reg)
15750
                    vfp_output_fldmd (f, IP_REGNUM,
15751
                                      (start_reg - FIRST_VFP_REGNUM) / 2,
15752
                                      (reg - start_reg) / 2);
15753
                  start_reg = reg + 2;
15754
                }
15755
            }
15756
          if (start_reg != reg)
15757
            vfp_output_fldmd (f, IP_REGNUM,
15758
                              (start_reg - FIRST_VFP_REGNUM) / 2,
15759
                              (reg - start_reg) / 2);
15760
        }
15761
 
15762
      if (TARGET_IWMMXT)
15763
        {
15764
          /* The frame pointer is guaranteed to be non-double-word aligned.
15765
             This is because it is set to (old_stack_pointer - 4) and the
15766
             old_stack_pointer was double word aligned.  Thus the offset to
15767
             the iWMMXt registers to be loaded must also be non-double-word
15768
             sized, so that the resultant address *is* double-word aligned.
15769
             We can ignore floats_offset since that was already included in
15770
             the live_regs_mask.  */
15771
          lrm_count += (lrm_count % 2 ? 2 : 1);
15772
 
15773
          for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15774
            if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15775
              {
15776
                asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15777
                             reg, FP_REGNUM, lrm_count * 4);
15778
                lrm_count += 2;
15779
              }
15780
        }
15781
 
15782
      /* saved_regs_mask should contain the IP, which at the time of stack
15783
         frame generation actually contains the old stack pointer.  So a
15784
         quick way to unwind the stack is just pop the IP register directly
15785
         into the stack pointer.  */
15786
      gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15787
      saved_regs_mask &= ~ (1 << IP_REGNUM);
15788
      saved_regs_mask |=   (1 << SP_REGNUM);
15789
 
15790
      /* There are two registers left in saved_regs_mask - LR and PC.  We
15791
         only need to restore the LR register (the return address), but to
15792
         save time we can load it directly into the PC, unless we need a
15793
         special function exit sequence, or we are not really returning.  */
15794
      if (really_return
15795
          && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15796
          && !crtl->calls_eh_return)
15797
        /* Delete the LR from the register mask, so that the LR on
15798
           the stack is loaded into the PC in the register mask.  */
15799
        saved_regs_mask &= ~ (1 << LR_REGNUM);
15800
      else
15801
        saved_regs_mask &= ~ (1 << PC_REGNUM);
15802
 
15803
      /* We must use SP as the base register, because SP is one of the
15804
         registers being restored.  If an interrupt or page fault
15805
         happens in the ldm instruction, the SP might or might not
15806
         have been restored.  That would be bad, as then SP will no
15807
         longer indicate the safe area of stack, and we can get stack
15808
         corruption.  Using SP as the base register means that it will
15809
         be reset correctly to the original value, should an interrupt
15810
         occur.  If the stack pointer already points at the right
15811
         place, then omit the subtraction.  */
15812
      if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15813
          || cfun->calls_alloca)
15814
        asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15815
                     4 * bit_count (saved_regs_mask));
15816
      print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15817
 
15818
      if (IS_INTERRUPT (func_type))
15819
        /* Interrupt handlers will have pushed the
15820
           IP onto the stack, so restore it now.  */
15821
        print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15822
    }
15823
  else
15824
    {
15825
      /* This branch is executed for ARM mode (non-apcs frames) and
15826
         Thumb-2 mode. Frame layout is essentially the same for those
15827
         cases, except that in ARM mode frame pointer points to the
15828
         first saved register, while in Thumb-2 mode the frame pointer points
15829
         to the last saved register.
15830
 
15831
         It is possible to make frame pointer point to last saved
15832
         register in both cases, and remove some conditionals below.
15833
         That means that fp setup in prologue would be just "mov fp, sp"
15834
         and sp restore in epilogue would be just "mov sp, fp", whereas
15835
         now we have to use add/sub in those cases. However, the value
15836
         of that would be marginal, as both mov and add/sub are 32-bit
15837
         in ARM mode, and it would require extra conditionals
15838
         in arm_expand_prologue to distingish ARM-apcs-frame case
15839
         (where frame pointer is required to point at first register)
15840
         and ARM-non-apcs-frame. Therefore, such change is postponed
15841
         until real need arise.  */
15842
      unsigned HOST_WIDE_INT amount;
15843
      int rfe;
15844
      /* Restore stack pointer if necessary.  */
15845
      if (TARGET_ARM && frame_pointer_needed)
15846
        {
15847
          operands[0] = stack_pointer_rtx;
15848
          operands[1] = hard_frame_pointer_rtx;
15849
 
15850
          operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15851
          output_add_immediate (operands);
15852
        }
15853
      else
15854
        {
15855
          if (frame_pointer_needed)
15856
            {
15857
              /* For Thumb-2 restore sp from the frame pointer.
15858
                 Operand restrictions mean we have to incrememnt FP, then copy
15859
                 to SP.  */
15860
              amount = offsets->locals_base - offsets->saved_regs;
15861
              operands[0] = hard_frame_pointer_rtx;
15862
            }
15863
          else
15864
            {
15865
              unsigned long count;
15866
              operands[0] = stack_pointer_rtx;
15867
              amount = offsets->outgoing_args - offsets->saved_regs;
15868
              /* pop call clobbered registers if it avoids a
15869
                 separate stack adjustment.  */
15870
              count = offsets->saved_regs - offsets->saved_args;
15871
              if (optimize_size
15872
                  && count != 0
15873
                  && !crtl->calls_eh_return
15874
                  && bit_count(saved_regs_mask) * 4 == count
15875
                  && !IS_INTERRUPT (func_type)
15876
                  && !IS_STACKALIGN (func_type)
15877
                  && !crtl->tail_call_emit)
15878
                {
15879
                  unsigned long mask;
15880
                  /* Preserve return values, of any size.  */
15881
                  mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15882
                  mask ^= 0xf;
15883
                  mask &= ~saved_regs_mask;
15884
                  reg = 0;
15885
                  while (bit_count (mask) * 4 > amount)
15886
                    {
15887
                      while ((mask & (1 << reg)) == 0)
15888
                        reg++;
15889
                      mask &= ~(1 << reg);
15890
                    }
15891
                  if (bit_count (mask) * 4 == amount) {
15892
                      amount = 0;
15893
                      saved_regs_mask |= mask;
15894
                  }
15895
                }
15896
            }
15897
 
15898
          if (amount)
15899
            {
15900
              operands[1] = operands[0];
15901
              operands[2] = GEN_INT (amount);
15902
              output_add_immediate (operands);
15903
            }
15904
          if (frame_pointer_needed)
15905
            asm_fprintf (f, "\tmov\t%r, %r\n",
15906
                         SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15907
        }
15908
 
15909
      if (TARGET_FPA_EMU2)
15910
        {
15911
          for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15912
            if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15913
              asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15914
                           reg, SP_REGNUM);
15915
        }
15916
      else
15917
        {
15918
          start_reg = FIRST_FPA_REGNUM;
15919
 
15920
          for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15921
            {
15922
              if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15923
                {
15924
                  if (reg - start_reg == 3)
15925
                    {
15926
                      asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15927
                                   start_reg, SP_REGNUM);
15928
                      start_reg = reg + 1;
15929
                    }
15930
                }
15931
              else
15932
                {
15933
                  if (reg != start_reg)
15934
                    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15935
                                 start_reg, reg - start_reg,
15936
                                 SP_REGNUM);
15937
 
15938
                  start_reg = reg + 1;
15939
                }
15940
            }
15941
 
15942
          /* Just in case the last register checked also needs unstacking.  */
15943
          if (reg != start_reg)
15944
            asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15945
                         start_reg, reg - start_reg, SP_REGNUM);
15946
        }
15947
 
15948
      if (TARGET_HARD_FLOAT && TARGET_VFP)
15949
        {
15950
          int end_reg = LAST_VFP_REGNUM + 1;
15951
 
15952
          /* Scan the registers in reverse order.  We need to match
15953
             any groupings made in the prologue and generate matching
15954
             pop operations.  */
15955
          for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15956
            {
15957
              if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15958
                  && (!df_regs_ever_live_p (reg + 1)
15959
                      || call_used_regs[reg + 1]))
15960
                {
15961
                  if (end_reg > reg + 2)
15962
                    vfp_output_fldmd (f, SP_REGNUM,
15963
                                      (reg + 2 - FIRST_VFP_REGNUM) / 2,
15964
                                      (end_reg - (reg + 2)) / 2);
15965
                  end_reg = reg;
15966
                }
15967
            }
15968
          if (end_reg > reg + 2)
15969
            vfp_output_fldmd (f, SP_REGNUM, 0,
15970
                              (end_reg - (reg + 2)) / 2);
15971
        }
15972
 
15973
      if (TARGET_IWMMXT)
15974
        for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15975
          if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15976
            asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15977
 
15978
      /* If we can, restore the LR into the PC.  */
15979
      if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15980
          && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15981
          && !IS_STACKALIGN (func_type)
15982
          && really_return
15983
          && crtl->args.pretend_args_size == 0
15984
          && saved_regs_mask & (1 << LR_REGNUM)
15985
          && !crtl->calls_eh_return)
15986
        {
15987
          saved_regs_mask &= ~ (1 << LR_REGNUM);
15988
          saved_regs_mask |=   (1 << PC_REGNUM);
15989
          rfe = IS_INTERRUPT (func_type);
15990
        }
15991
      else
15992
        rfe = 0;
15993
 
15994
      /* Load the registers off the stack.  If we only have one register
15995
         to load use the LDR instruction - it is faster.  For Thumb-2
15996
         always use pop and the assembler will pick the best instruction.*/
15997
      if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15998
          && !IS_INTERRUPT(func_type))
15999
        {
16000
          asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
16001
        }
16002
      else if (saved_regs_mask)
16003
        {
16004
          if (saved_regs_mask & (1 << SP_REGNUM))
16005
            /* Note - write back to the stack register is not enabled
16006
               (i.e. "ldmfd sp!...").  We know that the stack pointer is
16007
               in the list of registers and if we add writeback the
16008
               instruction becomes UNPREDICTABLE.  */
16009
            print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
16010
                             rfe);
16011
          else if (TARGET_ARM)
16012
            print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
16013
                             rfe);
16014
          else
16015
            print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
16016
        }
16017
 
16018
      if (crtl->args.pretend_args_size)
16019
        {
16020
          /* Unwind the pre-pushed regs.  */
16021
          operands[0] = operands[1] = stack_pointer_rtx;
16022
          operands[2] = GEN_INT (crtl->args.pretend_args_size);
16023
          output_add_immediate (operands);
16024
        }
16025
    }
16026
 
16027
  /* We may have already restored PC directly from the stack.  */
16028
  if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16029
    return "";
16030
 
16031
  /* Stack adjustment for exception handler.  */
16032
  if (crtl->calls_eh_return)
16033
    asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16034
                 ARM_EH_STACKADJ_REGNUM);
16035
 
16036
  /* Generate the return instruction.  */
16037
  switch ((int) ARM_FUNC_TYPE (func_type))
16038
    {
16039
    case ARM_FT_ISR:
16040
    case ARM_FT_FIQ:
16041
      asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16042
      break;
16043
 
16044
    case ARM_FT_EXCEPTION:
16045
      asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16046
      break;
16047
 
16048
    case ARM_FT_INTERWORKED:
16049
      asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16050
      break;
16051
 
16052
    default:
16053
      if (IS_STACKALIGN (func_type))
16054
        {
16055
          /* See comment in arm_expand_prologue.  */
16056
          asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16057
        }
16058
      if (arm_arch5 || arm_arch4t)
16059
        asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16060
      else
16061
        asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16062
      break;
16063
    }
16064
 
16065
  return "";
16066
}
16067
 
16068
static void
16069
arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16070
                              HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16071
{
16072
  arm_stack_offsets *offsets;
16073
 
16074
  if (TARGET_THUMB1)
16075
    {
16076
      int regno;
16077
 
16078
      /* Emit any call-via-reg trampolines that are needed for v4t support
16079
         of call_reg and call_value_reg type insns.  */
16080
      for (regno = 0; regno < LR_REGNUM; regno++)
16081
        {
16082
          rtx label = cfun->machine->call_via[regno];
16083
 
16084
          if (label != NULL)
16085
            {
16086
              switch_to_section (function_section (current_function_decl));
16087
              targetm.asm_out.internal_label (asm_out_file, "L",
16088
                                              CODE_LABEL_NUMBER (label));
16089
              asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16090
            }
16091
        }
16092
 
16093
      /* ??? Probably not safe to set this here, since it assumes that a
16094
         function will be emitted as assembly immediately after we generate
16095
         RTL for it.  This does not happen for inline functions.  */
16096
      cfun->machine->return_used_this_function = 0;
16097
    }
16098
  else /* TARGET_32BIT */
16099
    {
16100
      /* We need to take into account any stack-frame rounding.  */
16101
      offsets = arm_get_frame_offsets ();
16102
 
16103
      gcc_assert (!use_return_insn (FALSE, NULL)
16104
                  || (cfun->machine->return_used_this_function != 0)
16105
                  || offsets->saved_regs == offsets->outgoing_args
16106
                  || frame_pointer_needed);
16107
 
16108
      /* Reset the ARM-specific per-function variables.  */
16109
      after_arm_reorg = 0;
16110
    }
16111
}
16112
 
16113
/* Generate and emit an insn that we will recognize as a push_multi.
16114
   Unfortunately, since this insn does not reflect very well the actual
16115
   semantics of the operation, we need to annotate the insn for the benefit
16116
   of DWARF2 frame unwind information.  */
16117
static rtx
16118
emit_multi_reg_push (unsigned long mask)
16119
{
16120
  int num_regs = 0;
16121
  int num_dwarf_regs;
16122
  int i, j;
16123
  rtx par;
16124
  rtx dwarf;
16125
  int dwarf_par_index;
16126
  rtx tmp, reg;
16127
 
16128
  for (i = 0; i <= LAST_ARM_REGNUM; i++)
16129
    if (mask & (1 << i))
16130
      num_regs++;
16131
 
16132
  gcc_assert (num_regs && num_regs <= 16);
16133
 
16134
  /* We don't record the PC in the dwarf frame information.  */
16135
  num_dwarf_regs = num_regs;
16136
  if (mask & (1 << PC_REGNUM))
16137
    num_dwarf_regs--;
16138
 
16139
  /* For the body of the insn we are going to generate an UNSPEC in
16140
     parallel with several USEs.  This allows the insn to be recognized
16141
     by the push_multi pattern in the arm.md file.
16142
 
16143
     The body of the insn looks something like this:
16144
 
16145
       (parallel [
16146
           (set (mem:BLK (pre_modify:SI (reg:SI sp)
16147
                                        (const_int:SI <num>)))
16148
                (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16149
           (use (reg:SI XX))
16150
           (use (reg:SI YY))
16151
           ...
16152
        ])
16153
 
16154
     For the frame note however, we try to be more explicit and actually
16155
     show each register being stored into the stack frame, plus a (single)
16156
     decrement of the stack pointer.  We do it this way in order to be
16157
     friendly to the stack unwinding code, which only wants to see a single
16158
     stack decrement per instruction.  The RTL we generate for the note looks
16159
     something like this:
16160
 
16161
      (sequence [
16162
           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16163
           (set (mem:SI (reg:SI sp)) (reg:SI r4))
16164
           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16165
           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16166
           ...
16167
        ])
16168
 
16169
     FIXME:: In an ideal world the PRE_MODIFY would not exist and
16170
     instead we'd have a parallel expression detailing all
16171
     the stores to the various memory addresses so that debug
16172
     information is more up-to-date. Remember however while writing
16173
     this to take care of the constraints with the push instruction.
16174
 
16175
     Note also that this has to be taken care of for the VFP registers.
16176
 
16177
     For more see PR43399.  */
16178
 
16179
  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16180
  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16181
  dwarf_par_index = 1;
16182
 
16183
  for (i = 0; i <= LAST_ARM_REGNUM; i++)
16184
    {
16185
      if (mask & (1 << i))
16186
        {
16187
          reg = gen_rtx_REG (SImode, i);
16188
 
16189
          XVECEXP (par, 0, 0)
16190
            = gen_rtx_SET (VOIDmode,
16191
                           gen_frame_mem
16192
                           (BLKmode,
16193
                            gen_rtx_PRE_MODIFY (Pmode,
16194
                                                stack_pointer_rtx,
16195
                                                plus_constant
16196
                                                (stack_pointer_rtx,
16197
                                                 -4 * num_regs))
16198
                            ),
16199
                           gen_rtx_UNSPEC (BLKmode,
16200
                                           gen_rtvec (1, reg),
16201
                                           UNSPEC_PUSH_MULT));
16202
 
16203
          if (i != PC_REGNUM)
16204
            {
16205
              tmp = gen_rtx_SET (VOIDmode,
16206
                                 gen_frame_mem (SImode, stack_pointer_rtx),
16207
                                 reg);
16208
              RTX_FRAME_RELATED_P (tmp) = 1;
16209
              XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16210
              dwarf_par_index++;
16211
            }
16212
 
16213
          break;
16214
        }
16215
    }
16216
 
16217
  for (j = 1, i++; j < num_regs; i++)
16218
    {
16219
      if (mask & (1 << i))
16220
        {
16221
          reg = gen_rtx_REG (SImode, i);
16222
 
16223
          XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16224
 
16225
          if (i != PC_REGNUM)
16226
            {
16227
              tmp
16228
                = gen_rtx_SET (VOIDmode,
16229
                               gen_frame_mem
16230
                               (SImode,
16231
                                plus_constant (stack_pointer_rtx,
16232
                                               4 * j)),
16233
                               reg);
16234
              RTX_FRAME_RELATED_P (tmp) = 1;
16235
              XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16236
            }
16237
 
16238
          j++;
16239
        }
16240
    }
16241
 
16242
  par = emit_insn (par);
16243
 
16244
  tmp = gen_rtx_SET (VOIDmode,
16245
                     stack_pointer_rtx,
16246
                     plus_constant (stack_pointer_rtx, -4 * num_regs));
16247
  RTX_FRAME_RELATED_P (tmp) = 1;
16248
  XVECEXP (dwarf, 0, 0) = tmp;
16249
 
16250
  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16251
 
16252
  return par;
16253
}
16254
 
16255
/* Calculate the size of the return value that is passed in registers.  */
16256
static unsigned
16257
arm_size_return_regs (void)
16258
{
16259
  enum machine_mode mode;
16260
 
16261
  if (crtl->return_rtx != 0)
16262
    mode = GET_MODE (crtl->return_rtx);
16263
  else
16264
    mode = DECL_MODE (DECL_RESULT (current_function_decl));
16265
 
16266
  return GET_MODE_SIZE (mode);
16267
}
16268
 
16269
static rtx
16270
emit_sfm (int base_reg, int count)
16271
{
16272
  rtx par;
16273
  rtx dwarf;
16274
  rtx tmp, reg;
16275
  int i;
16276
 
16277
  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16278
  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16279
 
16280
  reg = gen_rtx_REG (XFmode, base_reg++);
16281
 
16282
  XVECEXP (par, 0, 0)
16283
    = gen_rtx_SET (VOIDmode,
16284
                   gen_frame_mem
16285
                   (BLKmode,
16286
                    gen_rtx_PRE_MODIFY (Pmode,
16287
                                        stack_pointer_rtx,
16288
                                        plus_constant
16289
                                        (stack_pointer_rtx,
16290
                                         -12 * count))
16291
                    ),
16292
                   gen_rtx_UNSPEC (BLKmode,
16293
                                   gen_rtvec (1, reg),
16294
                                   UNSPEC_PUSH_MULT));
16295
  tmp = gen_rtx_SET (VOIDmode,
16296
                     gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16297
  RTX_FRAME_RELATED_P (tmp) = 1;
16298
  XVECEXP (dwarf, 0, 1) = tmp;
16299
 
16300
  for (i = 1; i < count; i++)
16301
    {
16302
      reg = gen_rtx_REG (XFmode, base_reg++);
16303
      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16304
 
16305
      tmp = gen_rtx_SET (VOIDmode,
16306
                         gen_frame_mem (XFmode,
16307
                                        plus_constant (stack_pointer_rtx,
16308
                                                       i * 12)),
16309
                         reg);
16310
      RTX_FRAME_RELATED_P (tmp) = 1;
16311
      XVECEXP (dwarf, 0, i + 1) = tmp;
16312
    }
16313
 
16314
  tmp = gen_rtx_SET (VOIDmode,
16315
                     stack_pointer_rtx,
16316
                     plus_constant (stack_pointer_rtx, -12 * count));
16317
 
16318
  RTX_FRAME_RELATED_P (tmp) = 1;
16319
  XVECEXP (dwarf, 0, 0) = tmp;
16320
 
16321
  par = emit_insn (par);
16322
  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16323
 
16324
  return par;
16325
}
16326
 
16327
 
16328
/* Return true if the current function needs to save/restore LR.  */
16329
 
16330
static bool
16331
thumb_force_lr_save (void)
16332
{
16333
  return !cfun->machine->lr_save_eliminated
16334
         && (!leaf_function_p ()
16335
             || thumb_far_jump_used_p ()
16336
             || df_regs_ever_live_p (LR_REGNUM));
16337
}
16338
 
16339
 
16340
/* Return true if r3 is used by any of the tail call insns in the
16341
   current function.  */
16342
 
16343
static bool
16344
any_sibcall_uses_r3 (void)
16345
{
16346
  edge_iterator ei;
16347
  edge e;
16348
 
16349
  if (!crtl->tail_call_emit)
16350
    return false;
16351
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16352
    if (e->flags & EDGE_SIBCALL)
16353
      {
16354
        rtx call = BB_END (e->src);
16355
        if (!CALL_P (call))
16356
          call = prev_nonnote_nondebug_insn (call);
16357
        gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16358
        if (find_regno_fusage (call, USE, 3))
16359
          return true;
16360
      }
16361
  return false;
16362
}
16363
 
16364
 
16365
/* Compute the distance from register FROM to register TO.
16366
   These can be the arg pointer (26), the soft frame pointer (25),
16367
   the stack pointer (13) or the hard frame pointer (11).
16368
   In thumb mode r7 is used as the soft frame pointer, if needed.
16369
   Typical stack layout looks like this:
16370
 
16371
       old stack pointer -> |    |
16372
                             ----
16373
                            |    | \
16374
                            |    |   saved arguments for
16375
                            |    |   vararg functions
16376
                            |    | /
16377
                              --
16378
   hard FP & arg pointer -> |    | \
16379
                            |    |   stack
16380
                            |    |   frame
16381
                            |    | /
16382
                              --
16383
                            |    | \
16384
                            |    |   call saved
16385
                            |    |   registers
16386
      soft frame pointer -> |    | /
16387
                              --
16388
                            |    | \
16389
                            |    |   local
16390
                            |    |   variables
16391
     locals base pointer -> |    | /
16392
                              --
16393
                            |    | \
16394
                            |    |   outgoing
16395
                            |    |   arguments
16396
   current stack pointer -> |    | /
16397
                              --
16398
 
16399
  For a given function some or all of these stack components
16400
  may not be needed, giving rise to the possibility of
16401
  eliminating some of the registers.
16402
 
16403
  The values returned by this function must reflect the behavior
16404
  of arm_expand_prologue() and arm_compute_save_reg_mask().
16405
 
16406
  The sign of the number returned reflects the direction of stack
16407
  growth, so the values are positive for all eliminations except
16408
  from the soft frame pointer to the hard frame pointer.
16409
 
16410
  SFP may point just inside the local variables block to ensure correct
16411
  alignment.  */
16412
 
16413
 
16414
/* Calculate stack offsets.  These are used to calculate register elimination
16415
   offsets and in prologue/epilogue code.  Also calculates which registers
16416
   should be saved.  */
16417
 
16418
static arm_stack_offsets *
16419
arm_get_frame_offsets (void)
16420
{
16421
  struct arm_stack_offsets *offsets;
16422
  unsigned long func_type;
16423
  int leaf;
16424
  int saved;
16425
  int core_saved;
16426
  HOST_WIDE_INT frame_size;
16427
  int i;
16428
 
16429
  offsets = &cfun->machine->stack_offsets;
16430
 
16431
  /* We need to know if we are a leaf function.  Unfortunately, it
16432
     is possible to be called after start_sequence has been called,
16433
     which causes get_insns to return the insns for the sequence,
16434
     not the function, which will cause leaf_function_p to return
16435
     the incorrect result.
16436
 
16437
     to know about leaf functions once reload has completed, and the
16438
     frame size cannot be changed after that time, so we can safely
16439
     use the cached value.  */
16440
 
16441
  if (reload_completed)
16442
    return offsets;
16443
 
16444
  /* Initially this is the size of the local variables.  It will translated
16445
     into an offset once we have determined the size of preceding data.  */
16446
  frame_size = ROUND_UP_WORD (get_frame_size ());
16447
 
16448
  leaf = leaf_function_p ();
16449
 
16450
  /* Space for variadic functions.  */
16451
  offsets->saved_args = crtl->args.pretend_args_size;
16452
 
16453
  /* In Thumb mode this is incorrect, but never used.  */
16454
  offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16455
                   arm_compute_static_chain_stack_bytes();
16456
 
16457
  if (TARGET_32BIT)
16458
    {
16459
      unsigned int regno;
16460
 
16461
      offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16462
      core_saved = bit_count (offsets->saved_regs_mask) * 4;
16463
      saved = core_saved;
16464
 
16465
      /* We know that SP will be doubleword aligned on entry, and we must
16466
         preserve that condition at any subroutine call.  We also require the
16467
         soft frame pointer to be doubleword aligned.  */
16468
 
16469
      if (TARGET_REALLY_IWMMXT)
16470
        {
16471
          /* Check for the call-saved iWMMXt registers.  */
16472
          for (regno = FIRST_IWMMXT_REGNUM;
16473
               regno <= LAST_IWMMXT_REGNUM;
16474
               regno++)
16475
            if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16476
              saved += 8;
16477
        }
16478
 
16479
      func_type = arm_current_func_type ();
16480
      if (! IS_VOLATILE (func_type))
16481
        {
16482
          /* Space for saved FPA registers.  */
16483
          for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16484
            if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16485
            saved += 12;
16486
 
16487
          /* Space for saved VFP registers.  */
16488
          if (TARGET_HARD_FLOAT && TARGET_VFP)
16489
            saved += arm_get_vfp_saved_size ();
16490
        }
16491
    }
16492
  else /* TARGET_THUMB1 */
16493
    {
16494
      offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16495
      core_saved = bit_count (offsets->saved_regs_mask) * 4;
16496
      saved = core_saved;
16497
      if (TARGET_BACKTRACE)
16498
        saved += 16;
16499
    }
16500
 
16501
  /* Saved registers include the stack frame.  */
16502
  offsets->saved_regs = offsets->saved_args + saved +
16503
                        arm_compute_static_chain_stack_bytes();
16504
  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16505
  /* A leaf function does not need any stack alignment if it has nothing
16506
     on the stack.  */
16507
  if (leaf && frame_size == 0
16508
      /* However if it calls alloca(), we have a dynamically allocated
16509
         block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
16510
      && ! cfun->calls_alloca)
16511
    {
16512
      offsets->outgoing_args = offsets->soft_frame;
16513
      offsets->locals_base = offsets->soft_frame;
16514
      return offsets;
16515
    }
16516
 
16517
  /* Ensure SFP has the correct alignment.  */
16518
  if (ARM_DOUBLEWORD_ALIGN
16519
      && (offsets->soft_frame & 7))
16520
    {
16521
      offsets->soft_frame += 4;
16522
      /* Try to align stack by pushing an extra reg.  Don't bother doing this
16523
         when there is a stack frame as the alignment will be rolled into
16524
         the normal stack adjustment.  */
16525
      if (frame_size + crtl->outgoing_args_size == 0)
16526
        {
16527
          int reg = -1;
16528
 
16529
          /* If it is safe to use r3, then do so.  This sometimes
16530
             generates better code on Thumb-2 by avoiding the need to
16531
             use 32-bit push/pop instructions.  */
16532
          if (! any_sibcall_uses_r3 ()
16533
              && arm_size_return_regs () <= 12
16534
              && (offsets->saved_regs_mask & (1 << 3)) == 0)
16535
            {
16536
              reg = 3;
16537
            }
16538
          else
16539
            for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16540
              {
16541
                if ((offsets->saved_regs_mask & (1 << i)) == 0)
16542
                  {
16543
                    reg = i;
16544
                    break;
16545
                  }
16546
              }
16547
 
16548
          if (reg != -1)
16549
            {
16550
              offsets->saved_regs += 4;
16551
              offsets->saved_regs_mask |= (1 << reg);
16552
            }
16553
        }
16554
    }
16555
 
16556
  offsets->locals_base = offsets->soft_frame + frame_size;
16557
  offsets->outgoing_args = (offsets->locals_base
16558
                            + crtl->outgoing_args_size);
16559
 
16560
  if (ARM_DOUBLEWORD_ALIGN)
16561
    {
16562
      /* Ensure SP remains doubleword aligned.  */
16563
      if (offsets->outgoing_args & 7)
16564
        offsets->outgoing_args += 4;
16565
      gcc_assert (!(offsets->outgoing_args & 7));
16566
    }
16567
 
16568
  return offsets;
16569
}
16570
 
16571
 
16572
/* Calculate the relative offsets for the different stack pointers.  Positive
16573
   offsets are in the direction of stack growth.  */
16574
 
16575
HOST_WIDE_INT
16576
arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16577
{
16578
  arm_stack_offsets *offsets;
16579
 
16580
  offsets = arm_get_frame_offsets ();
16581
 
16582
  /* OK, now we have enough information to compute the distances.
16583
     There must be an entry in these switch tables for each pair
16584
     of registers in ELIMINABLE_REGS, even if some of the entries
16585
     seem to be redundant or useless.  */
16586
  switch (from)
16587
    {
16588
    case ARG_POINTER_REGNUM:
16589
      switch (to)
16590
        {
16591
        case THUMB_HARD_FRAME_POINTER_REGNUM:
16592
          return 0;
16593
 
16594
        case FRAME_POINTER_REGNUM:
16595
          /* This is the reverse of the soft frame pointer
16596
             to hard frame pointer elimination below.  */
16597
          return offsets->soft_frame - offsets->saved_args;
16598
 
16599
        case ARM_HARD_FRAME_POINTER_REGNUM:
16600
          /* This is only non-zero in the case where the static chain register
16601
             is stored above the frame.  */
16602
          return offsets->frame - offsets->saved_args - 4;
16603
 
16604
        case STACK_POINTER_REGNUM:
16605
          /* If nothing has been pushed on the stack at all
16606
             then this will return -4.  This *is* correct!  */
16607
          return offsets->outgoing_args - (offsets->saved_args + 4);
16608
 
16609
        default:
16610
          gcc_unreachable ();
16611
        }
16612
      gcc_unreachable ();
16613
 
16614
    case FRAME_POINTER_REGNUM:
16615
      switch (to)
16616
        {
16617
        case THUMB_HARD_FRAME_POINTER_REGNUM:
16618
          return 0;
16619
 
16620
        case ARM_HARD_FRAME_POINTER_REGNUM:
16621
          /* The hard frame pointer points to the top entry in the
16622
             stack frame.  The soft frame pointer to the bottom entry
16623
             in the stack frame.  If there is no stack frame at all,
16624
             then they are identical.  */
16625
 
16626
          return offsets->frame - offsets->soft_frame;
16627
 
16628
        case STACK_POINTER_REGNUM:
16629
          return offsets->outgoing_args - offsets->soft_frame;
16630
 
16631
        default:
16632
          gcc_unreachable ();
16633
        }
16634
      gcc_unreachable ();
16635
 
16636
    default:
16637
      /* You cannot eliminate from the stack pointer.
16638
         In theory you could eliminate from the hard frame
16639
         pointer to the stack pointer, but this will never
16640
         happen, since if a stack frame is not needed the
16641
         hard frame pointer will never be used.  */
16642
      gcc_unreachable ();
16643
    }
16644
}
16645
 
16646
/* Given FROM and TO register numbers, say whether this elimination is
16647
   allowed.  Frame pointer elimination is automatically handled.
16648
 
16649
   All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
16650
   HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
16651
   pointer, we must eliminate FRAME_POINTER_REGNUM into
16652
   HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16653
   ARG_POINTER_REGNUM.  */
16654
 
16655
bool
16656
arm_can_eliminate (const int from, const int to)
16657
{
16658
  return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16659
          (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16660
          (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16661
          (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16662
           true);
16663
}
16664
 
16665
/* Emit RTL to save coprocessor registers on function entry.  Returns the
16666
   number of bytes pushed.  */
16667
 
16668
static int
16669
arm_save_coproc_regs(void)
16670
{
16671
  int saved_size = 0;
16672
  unsigned reg;
16673
  unsigned start_reg;
16674
  rtx insn;
16675
 
16676
  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16677
    if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16678
      {
16679
        insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16680
        insn = gen_rtx_MEM (V2SImode, insn);
16681
        insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16682
        RTX_FRAME_RELATED_P (insn) = 1;
16683
        saved_size += 8;
16684
      }
16685
 
16686
  /* Save any floating point call-saved registers used by this
16687
     function.  */
16688
  if (TARGET_FPA_EMU2)
16689
    {
16690
      for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16691
        if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16692
          {
16693
            insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16694
            insn = gen_rtx_MEM (XFmode, insn);
16695
            insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16696
            RTX_FRAME_RELATED_P (insn) = 1;
16697
            saved_size += 12;
16698
          }
16699
    }
16700
  else
16701
    {
16702
      start_reg = LAST_FPA_REGNUM;
16703
 
16704
      for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16705
        {
16706
          if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16707
            {
16708
              if (start_reg - reg == 3)
16709
                {
16710
                  insn = emit_sfm (reg, 4);
16711
                  RTX_FRAME_RELATED_P (insn) = 1;
16712
                  saved_size += 48;
16713
                  start_reg = reg - 1;
16714
                }
16715
            }
16716
          else
16717
            {
16718
              if (start_reg != reg)
16719
                {
16720
                  insn = emit_sfm (reg + 1, start_reg - reg);
16721
                  RTX_FRAME_RELATED_P (insn) = 1;
16722
                  saved_size += (start_reg - reg) * 12;
16723
                }
16724
              start_reg = reg - 1;
16725
            }
16726
        }
16727
 
16728
      if (start_reg != reg)
16729
        {
16730
          insn = emit_sfm (reg + 1, start_reg - reg);
16731
          saved_size += (start_reg - reg) * 12;
16732
          RTX_FRAME_RELATED_P (insn) = 1;
16733
        }
16734
    }
16735
  if (TARGET_HARD_FLOAT && TARGET_VFP)
16736
    {
16737
      start_reg = FIRST_VFP_REGNUM;
16738
 
16739
      for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16740
        {
16741
          if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16742
              && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16743
            {
16744
              if (start_reg != reg)
16745
                saved_size += vfp_emit_fstmd (start_reg,
16746
                                              (reg - start_reg) / 2);
16747
              start_reg = reg + 2;
16748
            }
16749
        }
16750
      if (start_reg != reg)
16751
        saved_size += vfp_emit_fstmd (start_reg,
16752
                                      (reg - start_reg) / 2);
16753
    }
16754
  return saved_size;
16755
}
16756
 
16757
 
16758
/* Set the Thumb frame pointer from the stack pointer.  */
16759
 
16760
static void
16761
thumb_set_frame_pointer (arm_stack_offsets *offsets)
16762
{
16763
  HOST_WIDE_INT amount;
16764
  rtx insn, dwarf;
16765
 
16766
  amount = offsets->outgoing_args - offsets->locals_base;
16767
  if (amount < 1024)
16768
    insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16769
                                  stack_pointer_rtx, GEN_INT (amount)));
16770
  else
16771
    {
16772
      emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16773
      /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
16774
         expects the first two operands to be the same.  */
16775
      if (TARGET_THUMB2)
16776
        {
16777
          insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16778
                                        stack_pointer_rtx,
16779
                                        hard_frame_pointer_rtx));
16780
        }
16781
      else
16782
        {
16783
          insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16784
                                        hard_frame_pointer_rtx,
16785
                                        stack_pointer_rtx));
16786
        }
16787
      dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16788
                           plus_constant (stack_pointer_rtx, amount));
16789
      RTX_FRAME_RELATED_P (dwarf) = 1;
16790
      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16791
    }
16792
 
16793
  RTX_FRAME_RELATED_P (insn) = 1;
16794
}
16795
 
16796
/* Generate the prologue instructions for entry into an ARM or Thumb-2
16797
   function.  */
16798
void
16799
arm_expand_prologue (void)
16800
{
16801
  rtx amount;
16802
  rtx insn;
16803
  rtx ip_rtx;
16804
  unsigned long live_regs_mask;
16805
  unsigned long func_type;
16806
  int fp_offset = 0;
16807
  int saved_pretend_args = 0;
16808
  int saved_regs = 0;
16809
  unsigned HOST_WIDE_INT args_to_push;
16810
  arm_stack_offsets *offsets;
16811
 
16812
  func_type = arm_current_func_type ();
16813
 
16814
  /* Naked functions don't have prologues.  */
16815
  if (IS_NAKED (func_type))
16816
    return;
16817
 
16818
  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
16819
  args_to_push = crtl->args.pretend_args_size;
16820
 
16821
  /* Compute which register we will have to save onto the stack.  */
16822
  offsets = arm_get_frame_offsets ();
16823
  live_regs_mask = offsets->saved_regs_mask;
16824
 
16825
  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16826
 
16827
  if (IS_STACKALIGN (func_type))
16828
    {
16829
      rtx r0, r1;
16830
 
16831
      /* Handle a word-aligned stack pointer.  We generate the following:
16832
 
16833
          mov r0, sp
16834
          bic r1, r0, #7
16835
          mov sp, r1
16836
          <save and restore r0 in normal prologue/epilogue>
16837
          mov sp, r0
16838
          bx lr
16839
 
16840
         The unwinder doesn't need to know about the stack realignment.
16841
         Just tell it we saved SP in r0.  */
16842
      gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16843
 
16844
      r0 = gen_rtx_REG (SImode, 0);
16845
      r1 = gen_rtx_REG (SImode, 1);
16846
 
16847
      insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16848
      RTX_FRAME_RELATED_P (insn) = 1;
16849
      add_reg_note (insn, REG_CFA_REGISTER, NULL);
16850
 
16851
      emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16852
 
16853
      /* ??? The CFA changes here, which may cause GDB to conclude that it
16854
         has entered a different function.  That said, the unwind info is
16855
         correct, individually, before and after this instruction because
16856
         we've described the save of SP, which will override the default
16857
         handling of SP as restoring from the CFA.  */
16858
      emit_insn (gen_movsi (stack_pointer_rtx, r1));
16859
    }
16860
 
16861
  /* For APCS frames, if IP register is clobbered
16862
     when creating frame, save that register in a special
16863
     way.  */
16864
  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16865
    {
16866
      if (IS_INTERRUPT (func_type))
16867
        {
16868
          /* Interrupt functions must not corrupt any registers.
16869
             Creating a frame pointer however, corrupts the IP
16870
             register, so we must push it first.  */
16871
          emit_multi_reg_push (1 << IP_REGNUM);
16872
 
16873
          /* Do not set RTX_FRAME_RELATED_P on this insn.
16874
             The dwarf stack unwinding code only wants to see one
16875
             stack decrement per function, and this is not it.  If
16876
             this instruction is labeled as being part of the frame
16877
             creation sequence then dwarf2out_frame_debug_expr will
16878
             die when it encounters the assignment of IP to FP
16879
             later on, since the use of SP here establishes SP as
16880
             the CFA register and not IP.
16881
 
16882
             Anyway this instruction is not really part of the stack
16883
             frame creation although it is part of the prologue.  */
16884
        }
16885
      else if (IS_NESTED (func_type))
16886
        {
16887
          /* The Static chain register is the same as the IP register
16888
             used as a scratch register during stack frame creation.
16889
             To get around this need to find somewhere to store IP
16890
             whilst the frame is being created.  We try the following
16891
             places in order:
16892
 
16893
               1. The last argument register.
16894
               2. A slot on the stack above the frame.  (This only
16895
                  works if the function is not a varargs function).
16896
               3. Register r3, after pushing the argument registers
16897
                  onto the stack.
16898
 
16899
             Note - we only need to tell the dwarf2 backend about the SP
16900
             adjustment in the second variant; the static chain register
16901
             doesn't need to be unwound, as it doesn't contain a value
16902
             inherited from the caller.  */
16903
 
16904
          if (df_regs_ever_live_p (3) == false)
16905
            insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16906
          else if (args_to_push == 0)
16907
            {
16908
              rtx dwarf;
16909
 
16910
              gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16911
              saved_regs += 4;
16912
 
16913
              insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16914
              insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16915
              fp_offset = 4;
16916
 
16917
              /* Just tell the dwarf backend that we adjusted SP.  */
16918
              dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16919
                                   plus_constant (stack_pointer_rtx,
16920
                                                  -fp_offset));
16921
              RTX_FRAME_RELATED_P (insn) = 1;
16922
              add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16923
            }
16924
          else
16925
            {
16926
              /* Store the args on the stack.  */
16927
              if (cfun->machine->uses_anonymous_args)
16928
                insn = emit_multi_reg_push
16929
                  ((0xf0 >> (args_to_push / 4)) & 0xf);
16930
              else
16931
                insn = emit_insn
16932
                  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16933
                               GEN_INT (- args_to_push)));
16934
 
16935
              RTX_FRAME_RELATED_P (insn) = 1;
16936
 
16937
              saved_pretend_args = 1;
16938
              fp_offset = args_to_push;
16939
              args_to_push = 0;
16940
 
16941
              /* Now reuse r3 to preserve IP.  */
16942
              emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16943
            }
16944
        }
16945
 
16946
      insn = emit_set_insn (ip_rtx,
16947
                            plus_constant (stack_pointer_rtx, fp_offset));
16948
      RTX_FRAME_RELATED_P (insn) = 1;
16949
    }
16950
 
16951
  if (args_to_push)
16952
    {
16953
      /* Push the argument registers, or reserve space for them.  */
16954
      if (cfun->machine->uses_anonymous_args)
16955
        insn = emit_multi_reg_push
16956
          ((0xf0 >> (args_to_push / 4)) & 0xf);
16957
      else
16958
        insn = emit_insn
16959
          (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16960
                       GEN_INT (- args_to_push)));
16961
      RTX_FRAME_RELATED_P (insn) = 1;
16962
    }
16963
 
16964
  /* If this is an interrupt service routine, and the link register
16965
     is going to be pushed, and we're not generating extra
16966
     push of IP (needed when frame is needed and frame layout if apcs),
16967
     subtracting four from LR now will mean that the function return
16968
     can be done with a single instruction.  */
16969
  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16970
      && (live_regs_mask & (1 << LR_REGNUM)) != 0
16971
      && !(frame_pointer_needed && TARGET_APCS_FRAME)
16972
      && TARGET_ARM)
16973
    {
16974
      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16975
 
16976
      emit_set_insn (lr, plus_constant (lr, -4));
16977
    }
16978
 
16979
  if (live_regs_mask)
16980
    {
16981
      saved_regs += bit_count (live_regs_mask) * 4;
16982
      if (optimize_size && !frame_pointer_needed
16983
          && saved_regs == offsets->saved_regs - offsets->saved_args)
16984
        {
16985
          /* If no coprocessor registers are being pushed and we don't have
16986
             to worry about a frame pointer then push extra registers to
16987
             create the stack frame.  This is done is a way that does not
16988
             alter the frame layout, so is independent of the epilogue.  */
16989
          int n;
16990
          int frame;
16991
          n = 0;
16992
          while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16993
            n++;
16994
          frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16995
          if (frame && n * 4 >= frame)
16996
            {
16997
              n = frame / 4;
16998
              live_regs_mask |= (1 << n) - 1;
16999
              saved_regs += frame;
17000
            }
17001
        }
17002
      insn = emit_multi_reg_push (live_regs_mask);
17003
      RTX_FRAME_RELATED_P (insn) = 1;
17004
    }
17005
 
17006
  if (! IS_VOLATILE (func_type))
17007
    saved_regs += arm_save_coproc_regs ();
17008
 
17009
  if (frame_pointer_needed && TARGET_ARM)
17010
    {
17011
      /* Create the new frame pointer.  */
17012
      if (TARGET_APCS_FRAME)
17013
        {
17014
          insn = GEN_INT (-(4 + args_to_push + fp_offset));
17015
          insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17016
          RTX_FRAME_RELATED_P (insn) = 1;
17017
 
17018
          if (IS_NESTED (func_type))
17019
            {
17020
              /* Recover the static chain register.  */
17021
              if (!df_regs_ever_live_p (3)
17022
                  || saved_pretend_args)
17023
                insn = gen_rtx_REG (SImode, 3);
17024
              else /* if (crtl->args.pretend_args_size == 0) */
17025
                {
17026
                  insn = plus_constant (hard_frame_pointer_rtx, 4);
17027
                  insn = gen_frame_mem (SImode, insn);
17028
                }
17029
              emit_set_insn (ip_rtx, insn);
17030
              /* Add a USE to stop propagate_one_insn() from barfing.  */
17031
              emit_insn (gen_prologue_use (ip_rtx));
17032
            }
17033
        }
17034
      else
17035
        {
17036
          insn = GEN_INT (saved_regs - 4);
17037
          insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17038
                                        stack_pointer_rtx, insn));
17039
          RTX_FRAME_RELATED_P (insn) = 1;
17040
        }
17041
    }
17042
 
17043
  if (flag_stack_usage_info)
17044
    current_function_static_stack_size
17045
      = offsets->outgoing_args - offsets->saved_args;
17046
 
17047
  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17048
    {
17049
      /* This add can produce multiple insns for a large constant, so we
17050
         need to get tricky.  */
17051
      rtx last = get_last_insn ();
17052
 
17053
      amount = GEN_INT (offsets->saved_args + saved_regs
17054
                        - offsets->outgoing_args);
17055
 
17056
      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17057
                                    amount));
17058
      do
17059
        {
17060
          last = last ? NEXT_INSN (last) : get_insns ();
17061
          RTX_FRAME_RELATED_P (last) = 1;
17062
        }
17063
      while (last != insn);
17064
 
17065
      /* If the frame pointer is needed, emit a special barrier that
17066
         will prevent the scheduler from moving stores to the frame
17067
         before the stack adjustment.  */
17068
      if (frame_pointer_needed)
17069
        insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17070
                                         hard_frame_pointer_rtx));
17071
    }
17072
 
17073
 
17074
  if (frame_pointer_needed && TARGET_THUMB2)
17075
    thumb_set_frame_pointer (offsets);
17076
 
17077
  if (flag_pic && arm_pic_register != INVALID_REGNUM)
17078
    {
17079
      unsigned long mask;
17080
 
17081
      mask = live_regs_mask;
17082
      mask &= THUMB2_WORK_REGS;
17083
      if (!IS_NESTED (func_type))
17084
        mask |= (1 << IP_REGNUM);
17085
      arm_load_pic_register (mask);
17086
    }
17087
 
17088
  /* If we are profiling, make sure no instructions are scheduled before
17089
     the call to mcount.  Similarly if the user has requested no
17090
     scheduling in the prolog.  Similarly if we want non-call exceptions
17091
     using the EABI unwinder, to prevent faulting instructions from being
17092
     swapped with a stack adjustment.  */
17093
  if (crtl->profile || !TARGET_SCHED_PROLOG
17094
      || (arm_except_unwind_info (&global_options) == UI_TARGET
17095
          && cfun->can_throw_non_call_exceptions))
17096
    emit_insn (gen_blockage ());
17097
 
17098
  /* If the link register is being kept alive, with the return address in it,
17099
     then make sure that it does not get reused by the ce2 pass.  */
17100
  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17101
    cfun->machine->lr_save_eliminated = 1;
17102
}
17103
 
17104
/* Print condition code to STREAM.  Helper function for arm_print_operand.  */
17105
static void
17106
arm_print_condition (FILE *stream)
17107
{
17108
  if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17109
    {
17110
      /* Branch conversion is not implemented for Thumb-2.  */
17111
      if (TARGET_THUMB)
17112
        {
17113
          output_operand_lossage ("predicated Thumb instruction");
17114
          return;
17115
        }
17116
      if (current_insn_predicate != NULL)
17117
        {
17118
          output_operand_lossage
17119
            ("predicated instruction in conditional sequence");
17120
          return;
17121
        }
17122
 
17123
      fputs (arm_condition_codes[arm_current_cc], stream);
17124
    }
17125
  else if (current_insn_predicate)
17126
    {
17127
      enum arm_cond_code code;
17128
 
17129
      if (TARGET_THUMB1)
17130
        {
17131
          output_operand_lossage ("predicated Thumb instruction");
17132
          return;
17133
        }
17134
 
17135
      code = get_arm_condition_code (current_insn_predicate);
17136
      fputs (arm_condition_codes[code], stream);
17137
    }
17138
}
17139
 
17140
 
17141
/* If CODE is 'd', then the X is a condition operand and the instruction
17142
   should only be executed if the condition is true.
17143
   if CODE is 'D', then the X is a condition operand and the instruction
17144
   should only be executed if the condition is false: however, if the mode
17145
   of the comparison is CCFPEmode, then always execute the instruction -- we
17146
   do this because in these circumstances !GE does not necessarily imply LT;
17147
   in these cases the instruction pattern will take care to make sure that
17148
   an instruction containing %d will follow, thereby undoing the effects of
17149
   doing this instruction unconditionally.
17150
   If CODE is 'N' then X is a floating point operand that must be negated
17151
   before output.
17152
   If CODE is 'B' then output a bitwise inverted value of X (a const int).
17153
   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
17154
static void
17155
arm_print_operand (FILE *stream, rtx x, int code)
17156
{
17157
  switch (code)
17158
    {
17159
    case '@':
17160
      fputs (ASM_COMMENT_START, stream);
17161
      return;
17162
 
17163
    case '_':
17164
      fputs (user_label_prefix, stream);
17165
      return;
17166
 
17167
    case '|':
17168
      fputs (REGISTER_PREFIX, stream);
17169
      return;
17170
 
17171
    case '?':
17172
      arm_print_condition (stream);
17173
      return;
17174
 
17175
    case '(':
17176
      /* Nothing in unified syntax, otherwise the current condition code.  */
17177
      if (!TARGET_UNIFIED_ASM)
17178
        arm_print_condition (stream);
17179
      break;
17180
 
17181
    case ')':
17182
      /* The current condition code in unified syntax, otherwise nothing.  */
17183
      if (TARGET_UNIFIED_ASM)
17184
        arm_print_condition (stream);
17185
      break;
17186
 
17187
    case '.':
17188
      /* The current condition code for a condition code setting instruction.
17189
         Preceded by 's' in unified syntax, otherwise followed by 's'.  */
17190
      if (TARGET_UNIFIED_ASM)
17191
        {
17192
          fputc('s', stream);
17193
          arm_print_condition (stream);
17194
        }
17195
      else
17196
        {
17197
          arm_print_condition (stream);
17198
          fputc('s', stream);
17199
        }
17200
      return;
17201
 
17202
    case '!':
17203
      /* If the instruction is conditionally executed then print
17204
         the current condition code, otherwise print 's'.  */
17205
      gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17206
      if (current_insn_predicate)
17207
        arm_print_condition (stream);
17208
      else
17209
        fputc('s', stream);
17210
      break;
17211
 
17212
    /* %# is a "break" sequence. It doesn't output anything, but is used to
17213
       separate e.g. operand numbers from following text, if that text consists
17214
       of further digits which we don't want to be part of the operand
17215
       number.  */
17216
    case '#':
17217
      return;
17218
 
17219
    case 'N':
17220
      {
17221
        REAL_VALUE_TYPE r;
17222
        REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17223
        r = real_value_negate (&r);
17224
        fprintf (stream, "%s", fp_const_from_val (&r));
17225
      }
17226
      return;
17227
 
17228
    /* An integer or symbol address without a preceding # sign.  */
17229
    case 'c':
17230
      switch (GET_CODE (x))
17231
        {
17232
        case CONST_INT:
17233
          fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17234
          break;
17235
 
17236
        case SYMBOL_REF:
17237
          output_addr_const (stream, x);
17238
          break;
17239
 
17240
        case CONST:
17241
          if (GET_CODE (XEXP (x, 0)) == PLUS
17242
              && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17243
            {
17244
              output_addr_const (stream, x);
17245
              break;
17246
            }
17247
          /* Fall through.  */
17248
 
17249
        default:
17250
          output_operand_lossage ("Unsupported operand for code '%c'", code);
17251
        }
17252
      return;
17253
 
17254
    case 'B':
17255
      if (GET_CODE (x) == CONST_INT)
17256
        {
17257
          HOST_WIDE_INT val;
17258
          val = ARM_SIGN_EXTEND (~INTVAL (x));
17259
          fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17260
        }
17261
      else
17262
        {
17263
          putc ('~', stream);
17264
          output_addr_const (stream, x);
17265
        }
17266
      return;
17267
 
17268
    case 'L':
17269
      /* The low 16 bits of an immediate constant.  */
17270
      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17271
      return;
17272
 
17273
    case 'i':
17274
      fprintf (stream, "%s", arithmetic_instr (x, 1));
17275
      return;
17276
 
17277
    /* Truncate Cirrus shift counts.  */
17278
    case 's':
17279
      if (GET_CODE (x) == CONST_INT)
17280
        {
17281
          fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17282
          return;
17283
        }
17284
      arm_print_operand (stream, x, 0);
17285
      return;
17286
 
17287
    case 'I':
17288
      fprintf (stream, "%s", arithmetic_instr (x, 0));
17289
      return;
17290
 
17291
    case 'S':
17292
      {
17293
        HOST_WIDE_INT val;
17294
        const char *shift;
17295
 
17296
        if (!shift_operator (x, SImode))
17297
          {
17298
            output_operand_lossage ("invalid shift operand");
17299
            break;
17300
          }
17301
 
17302
        shift = shift_op (x, &val);
17303
 
17304
        if (shift)
17305
          {
17306
            fprintf (stream, ", %s ", shift);
17307
            if (val == -1)
17308
              arm_print_operand (stream, XEXP (x, 1), 0);
17309
            else
17310
              fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17311
          }
17312
      }
17313
      return;
17314
 
17315
      /* An explanation of the 'Q', 'R' and 'H' register operands:
17316
 
17317
         In a pair of registers containing a DI or DF value the 'Q'
17318
         operand returns the register number of the register containing
17319
         the least significant part of the value.  The 'R' operand returns
17320
         the register number of the register containing the most
17321
         significant part of the value.
17322
 
17323
         The 'H' operand returns the higher of the two register numbers.
17324
         On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17325
         same as the 'Q' operand, since the most significant part of the
17326
         value is held in the lower number register.  The reverse is true
17327
         on systems where WORDS_BIG_ENDIAN is false.
17328
 
17329
         The purpose of these operands is to distinguish between cases
17330
         where the endian-ness of the values is important (for example
17331
         when they are added together), and cases where the endian-ness
17332
         is irrelevant, but the order of register operations is important.
17333
         For example when loading a value from memory into a register
17334
         pair, the endian-ness does not matter.  Provided that the value
17335
         from the lower memory address is put into the lower numbered
17336
         register, and the value from the higher address is put into the
17337
         higher numbered register, the load will work regardless of whether
17338
         the value being loaded is big-wordian or little-wordian.  The
17339
         order of the two register loads can matter however, if the address
17340
         of the memory location is actually held in one of the registers
17341
         being overwritten by the load.
17342
 
17343
         The 'Q' and 'R' constraints are also available for 64-bit
17344
         constants.  */
17345
    case 'Q':
17346
      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17347
        {
17348
          rtx part = gen_lowpart (SImode, x);
17349
          fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17350
          return;
17351
        }
17352
 
17353
      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17354
        {
17355
          output_operand_lossage ("invalid operand for code '%c'", code);
17356
          return;
17357
        }
17358
 
17359
      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17360
      return;
17361
 
17362
    case 'R':
17363
      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17364
        {
17365
          enum machine_mode mode = GET_MODE (x);
17366
          rtx part;
17367
 
17368
          if (mode == VOIDmode)
17369
            mode = DImode;
17370
          part = gen_highpart_mode (SImode, mode, x);
17371
          fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17372
          return;
17373
        }
17374
 
17375
      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17376
        {
17377
          output_operand_lossage ("invalid operand for code '%c'", code);
17378
          return;
17379
        }
17380
 
17381
      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17382
      return;
17383
 
17384
    case 'H':
17385
      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17386
        {
17387
          output_operand_lossage ("invalid operand for code '%c'", code);
17388
          return;
17389
        }
17390
 
17391
      asm_fprintf (stream, "%r", REGNO (x) + 1);
17392
      return;
17393
 
17394
    case 'J':
17395
      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17396
        {
17397
          output_operand_lossage ("invalid operand for code '%c'", code);
17398
          return;
17399
        }
17400
 
17401
      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17402
      return;
17403
 
17404
    case 'K':
17405
      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17406
        {
17407
          output_operand_lossage ("invalid operand for code '%c'", code);
17408
          return;
17409
        }
17410
 
17411
      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17412
      return;
17413
 
17414
    case 'm':
17415
      asm_fprintf (stream, "%r",
17416
                   GET_CODE (XEXP (x, 0)) == REG
17417
                   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17418
      return;
17419
 
17420
    case 'M':
17421
      asm_fprintf (stream, "{%r-%r}",
17422
                   REGNO (x),
17423
                   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17424
      return;
17425
 
17426
    /* Like 'M', but writing doubleword vector registers, for use by Neon
17427
       insns.  */
17428
    case 'h':
17429
      {
17430
        int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17431
        int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17432
        if (numregs == 1)
17433
          asm_fprintf (stream, "{d%d}", regno);
17434
        else
17435
          asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17436
      }
17437
      return;
17438
 
17439
    case 'd':
17440
      /* CONST_TRUE_RTX means always -- that's the default.  */
17441
      if (x == const_true_rtx)
17442
        return;
17443
 
17444
      if (!COMPARISON_P (x))
17445
        {
17446
          output_operand_lossage ("invalid operand for code '%c'", code);
17447
          return;
17448
        }
17449
 
17450
      fputs (arm_condition_codes[get_arm_condition_code (x)],
17451
             stream);
17452
      return;
17453
 
17454
    case 'D':
17455
      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
17456
         want to do that.  */
17457
      if (x == const_true_rtx)
17458
        {
17459
          output_operand_lossage ("instruction never executed");
17460
          return;
17461
        }
17462
      if (!COMPARISON_P (x))
17463
        {
17464
          output_operand_lossage ("invalid operand for code '%c'", code);
17465
          return;
17466
        }
17467
 
17468
      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17469
                                 (get_arm_condition_code (x))],
17470
             stream);
17471
      return;
17472
 
17473
    /* Cirrus registers can be accessed in a variety of ways:
17474
         single floating point (f)
17475
         double floating point (d)
17476
         32bit integer         (fx)
17477
         64bit integer         (dx).  */
17478
    case 'W':                   /* Cirrus register in F mode.  */
17479
    case 'X':                   /* Cirrus register in D mode.  */
17480
    case 'Y':                   /* Cirrus register in FX mode.  */
17481
    case 'Z':                   /* Cirrus register in DX mode.  */
17482
      gcc_assert (GET_CODE (x) == REG
17483
                  && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17484
 
17485
      fprintf (stream, "mv%s%s",
17486
               code == 'W' ? "f"
17487
               : code == 'X' ? "d"
17488
               : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17489
 
17490
      return;
17491
 
17492
    /* Print cirrus register in the mode specified by the register's mode.  */
17493
    case 'V':
17494
      {
17495
        int mode = GET_MODE (x);
17496
 
17497
        if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17498
          {
17499
            output_operand_lossage ("invalid operand for code '%c'", code);
17500
            return;
17501
          }
17502
 
17503
        fprintf (stream, "mv%s%s",
17504
                 mode == DFmode ? "d"
17505
                 : mode == SImode ? "fx"
17506
                 : mode == DImode ? "dx"
17507
                 : "f", reg_names[REGNO (x)] + 2);
17508
 
17509
        return;
17510
      }
17511
 
17512
    case 'U':
17513
      if (GET_CODE (x) != REG
17514
          || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17515
          || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17516
        /* Bad value for wCG register number.  */
17517
        {
17518
          output_operand_lossage ("invalid operand for code '%c'", code);
17519
          return;
17520
        }
17521
 
17522
      else
17523
        fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17524
      return;
17525
 
17526
      /* Print an iWMMXt control register name.  */
17527
    case 'w':
17528
      if (GET_CODE (x) != CONST_INT
17529
          || INTVAL (x) < 0
17530
          || INTVAL (x) >= 16)
17531
        /* Bad value for wC register number.  */
17532
        {
17533
          output_operand_lossage ("invalid operand for code '%c'", code);
17534
          return;
17535
        }
17536
 
17537
      else
17538
        {
17539
          static const char * wc_reg_names [16] =
17540
            {
17541
              "wCID",  "wCon",  "wCSSF", "wCASF",
17542
              "wC4",   "wC5",   "wC6",   "wC7",
17543
              "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17544
              "wC12",  "wC13",  "wC14",  "wC15"
17545
            };
17546
 
17547
          fprintf (stream, wc_reg_names [INTVAL (x)]);
17548
        }
17549
      return;
17550
 
17551
    /* Print the high single-precision register of a VFP double-precision
17552
       register.  */
17553
    case 'p':
17554
      {
17555
        int mode = GET_MODE (x);
17556
        int regno;
17557
 
17558
        if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17559
          {
17560
            output_operand_lossage ("invalid operand for code '%c'", code);
17561
            return;
17562
          }
17563
 
17564
        regno = REGNO (x);
17565
        if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17566
          {
17567
            output_operand_lossage ("invalid operand for code '%c'", code);
17568
            return;
17569
          }
17570
 
17571
        fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17572
      }
17573
      return;
17574
 
17575
    /* Print a VFP/Neon double precision or quad precision register name.  */
17576
    case 'P':
17577
    case 'q':
17578
      {
17579
        int mode = GET_MODE (x);
17580
        int is_quad = (code == 'q');
17581
        int regno;
17582
 
17583
        if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17584
          {
17585
            output_operand_lossage ("invalid operand for code '%c'", code);
17586
            return;
17587
          }
17588
 
17589
        if (GET_CODE (x) != REG
17590
            || !IS_VFP_REGNUM (REGNO (x)))
17591
          {
17592
            output_operand_lossage ("invalid operand for code '%c'", code);
17593
            return;
17594
          }
17595
 
17596
        regno = REGNO (x);
17597
        if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17598
            || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17599
          {
17600
            output_operand_lossage ("invalid operand for code '%c'", code);
17601
            return;
17602
          }
17603
 
17604
        fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17605
          (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17606
      }
17607
      return;
17608
 
17609
    /* These two codes print the low/high doubleword register of a Neon quad
17610
       register, respectively.  For pair-structure types, can also print
17611
       low/high quadword registers.  */
17612
    case 'e':
17613
    case 'f':
17614
      {
17615
        int mode = GET_MODE (x);
17616
        int regno;
17617
 
17618
        if ((GET_MODE_SIZE (mode) != 16
17619
             && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17620
          {
17621
            output_operand_lossage ("invalid operand for code '%c'", code);
17622
            return;
17623
          }
17624
 
17625
        regno = REGNO (x);
17626
        if (!NEON_REGNO_OK_FOR_QUAD (regno))
17627
          {
17628
            output_operand_lossage ("invalid operand for code '%c'", code);
17629
            return;
17630
          }
17631
 
17632
        if (GET_MODE_SIZE (mode) == 16)
17633
          fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17634
                                  + (code == 'f' ? 1 : 0));
17635
        else
17636
          fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17637
                                  + (code == 'f' ? 1 : 0));
17638
      }
17639
      return;
17640
 
17641
    /* Print a VFPv3 floating-point constant, represented as an integer
17642
       index.  */
17643
    case 'G':
17644
      {
17645
        int index = vfp3_const_double_index (x);
17646
        gcc_assert (index != -1);
17647
        fprintf (stream, "%d", index);
17648
      }
17649
      return;
17650
 
17651
    /* Print bits representing opcode features for Neon.
17652
 
17653
       Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
17654
       and polynomials as unsigned.
17655
 
17656
       Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17657
 
17658
       Bit 2 is 1 for rounding functions, 0 otherwise.  */
17659
 
17660
    /* Identify the type as 's', 'u', 'p' or 'f'.  */
17661
    case 'T':
17662
      {
17663
        HOST_WIDE_INT bits = INTVAL (x);
17664
        fputc ("uspf"[bits & 3], stream);
17665
      }
17666
      return;
17667
 
17668
    /* Likewise, but signed and unsigned integers are both 'i'.  */
17669
    case 'F':
17670
      {
17671
        HOST_WIDE_INT bits = INTVAL (x);
17672
        fputc ("iipf"[bits & 3], stream);
17673
      }
17674
      return;
17675
 
17676
    /* As for 'T', but emit 'u' instead of 'p'.  */
17677
    case 't':
17678
      {
17679
        HOST_WIDE_INT bits = INTVAL (x);
17680
        fputc ("usuf"[bits & 3], stream);
17681
      }
17682
      return;
17683
 
17684
    /* Bit 2: rounding (vs none).  */
17685
    case 'O':
17686
      {
17687
        HOST_WIDE_INT bits = INTVAL (x);
17688
        fputs ((bits & 4) != 0 ? "r" : "", stream);
17689
      }
17690
      return;
17691
 
17692
    /* Memory operand for vld1/vst1 instruction.  */
17693
    case 'A':
17694
      {
17695
        rtx addr;
17696
        bool postinc = FALSE;
17697
        unsigned align, memsize, align_bits;
17698
 
17699
        gcc_assert (GET_CODE (x) == MEM);
17700
        addr = XEXP (x, 0);
17701
        if (GET_CODE (addr) == POST_INC)
17702
          {
17703
            postinc = 1;
17704
            addr = XEXP (addr, 0);
17705
          }
17706
        asm_fprintf (stream, "[%r", REGNO (addr));
17707
 
17708
        /* We know the alignment of this access, so we can emit a hint in the
17709
           instruction (for some alignments) as an aid to the memory subsystem
17710
           of the target.  */
17711
        align = MEM_ALIGN (x) >> 3;
17712
        memsize = MEM_SIZE (x);
17713
 
17714
        /* Only certain alignment specifiers are supported by the hardware.  */
17715
        if (memsize == 16 && (align % 32) == 0)
17716
          align_bits = 256;
17717
        else if (memsize == 16 && (align % 16) == 0)
17718
          align_bits = 128;
17719
        else if (memsize >= 8 && (align % 8) == 0)
17720
          align_bits = 64;
17721
        else
17722
          align_bits = 0;
17723
 
17724
        if (align_bits != 0)
17725
          asm_fprintf (stream, ":%d", align_bits);
17726
 
17727
        asm_fprintf (stream, "]");
17728
 
17729
        if (postinc)
17730
          fputs("!", stream);
17731
      }
17732
      return;
17733
 
17734
    case 'C':
17735
      {
17736
        rtx addr;
17737
 
17738
        gcc_assert (GET_CODE (x) == MEM);
17739
        addr = XEXP (x, 0);
17740
        gcc_assert (GET_CODE (addr) == REG);
17741
        asm_fprintf (stream, "[%r]", REGNO (addr));
17742
      }
17743
      return;
17744
 
17745
    /* Translate an S register number into a D register number and element index.  */
17746
    case 'y':
17747
      {
17748
        int mode = GET_MODE (x);
17749
        int regno;
17750
 
17751
        if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17752
          {
17753
            output_operand_lossage ("invalid operand for code '%c'", code);
17754
            return;
17755
          }
17756
 
17757
        regno = REGNO (x);
17758
        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17759
          {
17760
            output_operand_lossage ("invalid operand for code '%c'", code);
17761
            return;
17762
          }
17763
 
17764
        regno = regno - FIRST_VFP_REGNUM;
17765
        fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17766
      }
17767
      return;
17768
 
17769
    case 'v':
17770
        gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17771
        fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17772
        return;
17773
 
17774
    /* Register specifier for vld1.16/vst1.16.  Translate the S register
17775
       number into a D register number and element index.  */
17776
    case 'z':
17777
      {
17778
        int mode = GET_MODE (x);
17779
        int regno;
17780
 
17781
        if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17782
          {
17783
            output_operand_lossage ("invalid operand for code '%c'", code);
17784
            return;
17785
          }
17786
 
17787
        regno = REGNO (x);
17788
        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17789
          {
17790
            output_operand_lossage ("invalid operand for code '%c'", code);
17791
            return;
17792
          }
17793
 
17794
        regno = regno - FIRST_VFP_REGNUM;
17795
        fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17796
      }
17797
      return;
17798
 
17799
    default:
17800
      if (x == 0)
17801
        {
17802
          output_operand_lossage ("missing operand");
17803
          return;
17804
        }
17805
 
17806
      switch (GET_CODE (x))
17807
        {
17808
        case REG:
17809
          asm_fprintf (stream, "%r", REGNO (x));
17810
          break;
17811
 
17812
        case MEM:
17813
          output_memory_reference_mode = GET_MODE (x);
17814
          output_address (XEXP (x, 0));
17815
          break;
17816
 
17817
        case CONST_DOUBLE:
17818
          if (TARGET_NEON)
17819
            {
17820
              char fpstr[20];
17821
              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17822
                               sizeof (fpstr), 0, 1);
17823
              fprintf (stream, "#%s", fpstr);
17824
            }
17825
          else
17826
            fprintf (stream, "#%s", fp_immediate_constant (x));
17827
          break;
17828
 
17829
        default:
17830
          gcc_assert (GET_CODE (x) != NEG);
17831
          fputc ('#', stream);
17832
          if (GET_CODE (x) == HIGH)
17833
            {
17834
              fputs (":lower16:", stream);
17835
              x = XEXP (x, 0);
17836
            }
17837
 
17838
          output_addr_const (stream, x);
17839
          break;
17840
        }
17841
    }
17842
}
17843
 
17844
/* Target hook for printing a memory address.  */
17845
static void
17846
arm_print_operand_address (FILE *stream, rtx x)
17847
{
17848
  if (TARGET_32BIT)
17849
    {
17850
      int is_minus = GET_CODE (x) == MINUS;
17851
 
17852
      if (GET_CODE (x) == REG)
17853
        asm_fprintf (stream, "[%r, #0]", REGNO (x));
17854
      else if (GET_CODE (x) == PLUS || is_minus)
17855
        {
17856
          rtx base = XEXP (x, 0);
17857
          rtx index = XEXP (x, 1);
17858
          HOST_WIDE_INT offset = 0;
17859
          if (GET_CODE (base) != REG
17860
              || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17861
            {
17862
              /* Ensure that BASE is a register.  */
17863
              /* (one of them must be).  */
17864
              /* Also ensure the SP is not used as in index register.  */
17865
              rtx temp = base;
17866
              base = index;
17867
              index = temp;
17868
            }
17869
          switch (GET_CODE (index))
17870
            {
17871
            case CONST_INT:
17872
              offset = INTVAL (index);
17873
              if (is_minus)
17874
                offset = -offset;
17875
              asm_fprintf (stream, "[%r, #%wd]",
17876
                           REGNO (base), offset);
17877
              break;
17878
 
17879
            case REG:
17880
              asm_fprintf (stream, "[%r, %s%r]",
17881
                           REGNO (base), is_minus ? "-" : "",
17882
                           REGNO (index));
17883
              break;
17884
 
17885
            case MULT:
17886
            case ASHIFTRT:
17887
            case LSHIFTRT:
17888
            case ASHIFT:
17889
            case ROTATERT:
17890
              {
17891
                asm_fprintf (stream, "[%r, %s%r",
17892
                             REGNO (base), is_minus ? "-" : "",
17893
                             REGNO (XEXP (index, 0)));
17894
                arm_print_operand (stream, index, 'S');
17895
                fputs ("]", stream);
17896
                break;
17897
              }
17898
 
17899
            default:
17900
              gcc_unreachable ();
17901
            }
17902
        }
17903
      else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17904
               || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17905
        {
17906
          extern enum machine_mode output_memory_reference_mode;
17907
 
17908
          gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17909
 
17910
          if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17911
            asm_fprintf (stream, "[%r, #%s%d]!",
17912
                         REGNO (XEXP (x, 0)),
17913
                         GET_CODE (x) == PRE_DEC ? "-" : "",
17914
                         GET_MODE_SIZE (output_memory_reference_mode));
17915
          else
17916
            asm_fprintf (stream, "[%r], #%s%d",
17917
                         REGNO (XEXP (x, 0)),
17918
                         GET_CODE (x) == POST_DEC ? "-" : "",
17919
                         GET_MODE_SIZE (output_memory_reference_mode));
17920
        }
17921
      else if (GET_CODE (x) == PRE_MODIFY)
17922
        {
17923
          asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17924
          if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17925
            asm_fprintf (stream, "#%wd]!",
17926
                         INTVAL (XEXP (XEXP (x, 1), 1)));
17927
          else
17928
            asm_fprintf (stream, "%r]!",
17929
                         REGNO (XEXP (XEXP (x, 1), 1)));
17930
        }
17931
      else if (GET_CODE (x) == POST_MODIFY)
17932
        {
17933
          asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17934
          if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17935
            asm_fprintf (stream, "#%wd",
17936
                         INTVAL (XEXP (XEXP (x, 1), 1)));
17937
          else
17938
            asm_fprintf (stream, "%r",
17939
                         REGNO (XEXP (XEXP (x, 1), 1)));
17940
        }
17941
      else output_addr_const (stream, x);
17942
    }
17943
  else
17944
    {
17945
      if (GET_CODE (x) == REG)
17946
        asm_fprintf (stream, "[%r]", REGNO (x));
17947
      else if (GET_CODE (x) == POST_INC)
17948
        asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17949
      else if (GET_CODE (x) == PLUS)
17950
        {
17951
          gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17952
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17953
            asm_fprintf (stream, "[%r, #%wd]",
17954
                         REGNO (XEXP (x, 0)),
17955
                         INTVAL (XEXP (x, 1)));
17956
          else
17957
            asm_fprintf (stream, "[%r, %r]",
17958
                         REGNO (XEXP (x, 0)),
17959
                         REGNO (XEXP (x, 1)));
17960
        }
17961
      else
17962
        output_addr_const (stream, x);
17963
    }
17964
}
17965
 
17966
/* Target hook for indicating whether a punctuation character for
17967
   TARGET_PRINT_OPERAND is valid.  */
17968
static bool
17969
arm_print_operand_punct_valid_p (unsigned char code)
17970
{
17971
  return (code == '@' || code == '|' || code == '.'
17972
          || code == '(' || code == ')' || code == '#'
17973
          || (TARGET_32BIT && (code == '?'))
17974
          || (TARGET_THUMB2 && (code == '!'))
17975
          || (TARGET_THUMB && (code == '_')));
17976
}
17977
 
17978
/* Target hook for assembling integer objects.  The ARM version needs to
17979
   handle word-sized values specially.  */
17980
static bool
17981
arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17982
{
17983
  enum machine_mode mode;
17984
 
17985
  if (size == UNITS_PER_WORD && aligned_p)
17986
    {
17987
      fputs ("\t.word\t", asm_out_file);
17988
      output_addr_const (asm_out_file, x);
17989
 
17990
      /* Mark symbols as position independent.  We only do this in the
17991
         .text segment, not in the .data segment.  */
17992
      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17993
          (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17994
        {
17995
          /* See legitimize_pic_address for an explanation of the
17996
             TARGET_VXWORKS_RTP check.  */
17997
          if (TARGET_VXWORKS_RTP
17998
              || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17999
            fputs ("(GOT)", asm_out_file);
18000
          else
18001
            fputs ("(GOTOFF)", asm_out_file);
18002
        }
18003
      fputc ('\n', asm_out_file);
18004
      return true;
18005
    }
18006
 
18007
  mode = GET_MODE (x);
18008
 
18009
  if (arm_vector_mode_supported_p (mode))
18010
    {
18011
      int i, units;
18012
 
18013
      gcc_assert (GET_CODE (x) == CONST_VECTOR);
18014
 
18015
      units = CONST_VECTOR_NUNITS (x);
18016
      size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18017
 
18018
      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18019
        for (i = 0; i < units; i++)
18020
          {
18021
            rtx elt = CONST_VECTOR_ELT (x, i);
18022
            assemble_integer
18023
              (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18024
          }
18025
      else
18026
        for (i = 0; i < units; i++)
18027
          {
18028
            rtx elt = CONST_VECTOR_ELT (x, i);
18029
            REAL_VALUE_TYPE rval;
18030
 
18031
            REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18032
 
18033
            assemble_real
18034
              (rval, GET_MODE_INNER (mode),
18035
              i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18036
          }
18037
 
18038
      return true;
18039
    }
18040
 
18041
  return default_assemble_integer (x, size, aligned_p);
18042
}
18043
 
18044
static void
18045
arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18046
{
18047
  section *s;
18048
 
18049
  if (!TARGET_AAPCS_BASED)
18050
    {
18051
      (is_ctor ?
18052
       default_named_section_asm_out_constructor
18053
       : default_named_section_asm_out_destructor) (symbol, priority);
18054
      return;
18055
    }
18056
 
18057
  /* Put these in the .init_array section, using a special relocation.  */
18058
  if (priority != DEFAULT_INIT_PRIORITY)
18059
    {
18060
      char buf[18];
18061
      sprintf (buf, "%s.%.5u",
18062
               is_ctor ? ".init_array" : ".fini_array",
18063
               priority);
18064
      s = get_section (buf, SECTION_WRITE, NULL_TREE);
18065
    }
18066
  else if (is_ctor)
18067
    s = ctors_section;
18068
  else
18069
    s = dtors_section;
18070
 
18071
  switch_to_section (s);
18072
  assemble_align (POINTER_SIZE);
18073
  fputs ("\t.word\t", asm_out_file);
18074
  output_addr_const (asm_out_file, symbol);
18075
  fputs ("(target1)\n", asm_out_file);
18076
}
18077
 
18078
/* Add a function to the list of static constructors.  */
18079
 
18080
static void
18081
arm_elf_asm_constructor (rtx symbol, int priority)
18082
{
18083
  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18084
}
18085
 
18086
/* Add a function to the list of static destructors.  */
18087
 
18088
static void
18089
arm_elf_asm_destructor (rtx symbol, int priority)
18090
{
18091
  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18092
}
18093
 
18094
/* A finite state machine takes care of noticing whether or not instructions
18095
   can be conditionally executed, and thus decrease execution time and code
18096
   size by deleting branch instructions.  The fsm is controlled by
18097
   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
18098
 
18099
/* The state of the fsm controlling condition codes are:
18100
   0: normal, do nothing special
18101
   1: make ASM_OUTPUT_OPCODE not output this instruction
18102
   2: make ASM_OUTPUT_OPCODE not output this instruction
18103
   3: make instructions conditional
18104
   4: make instructions conditional
18105
 
18106
   State transitions (state->state by whom under condition):
18107
 
18108
 
18109
   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18110
   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18111
   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18112
          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18113
   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18114
          (the target insn is arm_target_insn).
18115
 
18116
   If the jump clobbers the conditions then we use states 2 and 4.
18117
 
18118
   A similar thing can be done with conditional return insns.
18119
 
18120
   XXX In case the `target' is an unconditional branch, this conditionalising
18121
   of the instructions always reduces code size, but not always execution
18122
   time.  But then, I want to reduce the code size to somewhere near what
18123
   /bin/cc produces.  */
18124
 
18125
/* In addition to this, state is maintained for Thumb-2 COND_EXEC
18126
   instructions.  When a COND_EXEC instruction is seen the subsequent
18127
   instructions are scanned so that multiple conditional instructions can be
18128
   combined into a single IT block.  arm_condexec_count and arm_condexec_mask
18129
   specify the length and true/false mask for the IT block.  These will be
18130
   decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
18131
 
18132
/* Returns the index of the ARM condition code string in
18133
   `arm_condition_codes', or ARM_NV if the comparison is invalid.
18134
   COMPARISON should be an rtx like `(eq (...) (...))'.  */
18135
 
18136
enum arm_cond_code
18137
maybe_get_arm_condition_code (rtx comparison)
18138
{
18139
  enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18140
  enum arm_cond_code code;
18141
  enum rtx_code comp_code = GET_CODE (comparison);
18142
 
18143
  if (GET_MODE_CLASS (mode) != MODE_CC)
18144
    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18145
                           XEXP (comparison, 1));
18146
 
18147
  switch (mode)
18148
    {
18149
    case CC_DNEmode: code = ARM_NE; goto dominance;
18150
    case CC_DEQmode: code = ARM_EQ; goto dominance;
18151
    case CC_DGEmode: code = ARM_GE; goto dominance;
18152
    case CC_DGTmode: code = ARM_GT; goto dominance;
18153
    case CC_DLEmode: code = ARM_LE; goto dominance;
18154
    case CC_DLTmode: code = ARM_LT; goto dominance;
18155
    case CC_DGEUmode: code = ARM_CS; goto dominance;
18156
    case CC_DGTUmode: code = ARM_HI; goto dominance;
18157
    case CC_DLEUmode: code = ARM_LS; goto dominance;
18158
    case CC_DLTUmode: code = ARM_CC;
18159
 
18160
    dominance:
18161
      if (comp_code == EQ)
18162
        return ARM_INVERSE_CONDITION_CODE (code);
18163
      if (comp_code == NE)
18164
        return code;
18165
      return ARM_NV;
18166
 
18167
    case CC_NOOVmode:
18168
      switch (comp_code)
18169
        {
18170
        case NE: return ARM_NE;
18171
        case EQ: return ARM_EQ;
18172
        case GE: return ARM_PL;
18173
        case LT: return ARM_MI;
18174
        default: return ARM_NV;
18175
        }
18176
 
18177
    case CC_Zmode:
18178
      switch (comp_code)
18179
        {
18180
        case NE: return ARM_NE;
18181
        case EQ: return ARM_EQ;
18182
        default: return ARM_NV;
18183
        }
18184
 
18185
    case CC_Nmode:
18186
      switch (comp_code)
18187
        {
18188
        case NE: return ARM_MI;
18189
        case EQ: return ARM_PL;
18190
        default: return ARM_NV;
18191
        }
18192
 
18193
    case CCFPEmode:
18194
    case CCFPmode:
18195
      /* These encodings assume that AC=1 in the FPA system control
18196
         byte.  This allows us to handle all cases except UNEQ and
18197
         LTGT.  */
18198
      switch (comp_code)
18199
        {
18200
        case GE: return ARM_GE;
18201
        case GT: return ARM_GT;
18202
        case LE: return ARM_LS;
18203
        case LT: return ARM_MI;
18204
        case NE: return ARM_NE;
18205
        case EQ: return ARM_EQ;
18206
        case ORDERED: return ARM_VC;
18207
        case UNORDERED: return ARM_VS;
18208
        case UNLT: return ARM_LT;
18209
        case UNLE: return ARM_LE;
18210
        case UNGT: return ARM_HI;
18211
        case UNGE: return ARM_PL;
18212
          /* UNEQ and LTGT do not have a representation.  */
18213
        case UNEQ: /* Fall through.  */
18214
        case LTGT: /* Fall through.  */
18215
        default: return ARM_NV;
18216
        }
18217
 
18218
    case CC_SWPmode:
18219
      switch (comp_code)
18220
        {
18221
        case NE: return ARM_NE;
18222
        case EQ: return ARM_EQ;
18223
        case GE: return ARM_LE;
18224
        case GT: return ARM_LT;
18225
        case LE: return ARM_GE;
18226
        case LT: return ARM_GT;
18227
        case GEU: return ARM_LS;
18228
        case GTU: return ARM_CC;
18229
        case LEU: return ARM_CS;
18230
        case LTU: return ARM_HI;
18231
        default: return ARM_NV;
18232
        }
18233
 
18234
    case CC_Cmode:
18235
      switch (comp_code)
18236
        {
18237
        case LTU: return ARM_CS;
18238
        case GEU: return ARM_CC;
18239
        default: return ARM_NV;
18240
        }
18241
 
18242
    case CC_CZmode:
18243
      switch (comp_code)
18244
        {
18245
        case NE: return ARM_NE;
18246
        case EQ: return ARM_EQ;
18247
        case GEU: return ARM_CS;
18248
        case GTU: return ARM_HI;
18249
        case LEU: return ARM_LS;
18250
        case LTU: return ARM_CC;
18251
        default: return ARM_NV;
18252
        }
18253
 
18254
    case CC_NCVmode:
18255
      switch (comp_code)
18256
        {
18257
        case GE: return ARM_GE;
18258
        case LT: return ARM_LT;
18259
        case GEU: return ARM_CS;
18260
        case LTU: return ARM_CC;
18261
        default: return ARM_NV;
18262
        }
18263
 
18264
    case CCmode:
18265
      switch (comp_code)
18266
        {
18267
        case NE: return ARM_NE;
18268
        case EQ: return ARM_EQ;
18269
        case GE: return ARM_GE;
18270
        case GT: return ARM_GT;
18271
        case LE: return ARM_LE;
18272
        case LT: return ARM_LT;
18273
        case GEU: return ARM_CS;
18274
        case GTU: return ARM_HI;
18275
        case LEU: return ARM_LS;
18276
        case LTU: return ARM_CC;
18277
        default: return ARM_NV;
18278
        }
18279
 
18280
    default: gcc_unreachable ();
18281
    }
18282
}
18283
 
18284
/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
18285
static enum arm_cond_code
18286
get_arm_condition_code (rtx comparison)
18287
{
18288
  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18289
  gcc_assert (code != ARM_NV);
18290
  return code;
18291
}
18292
 
18293
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18294
   instructions.  */
18295
void
18296
thumb2_final_prescan_insn (rtx insn)
18297
{
18298
  rtx first_insn = insn;
18299
  rtx body = PATTERN (insn);
18300
  rtx predicate;
18301
  enum arm_cond_code code;
18302
  int n;
18303
  int mask;
18304
 
18305
  /* Remove the previous insn from the count of insns to be output.  */
18306
  if (arm_condexec_count)
18307
      arm_condexec_count--;
18308
 
18309
  /* Nothing to do if we are already inside a conditional block.  */
18310
  if (arm_condexec_count)
18311
    return;
18312
 
18313
  if (GET_CODE (body) != COND_EXEC)
18314
    return;
18315
 
18316
  /* Conditional jumps are implemented directly.  */
18317
  if (GET_CODE (insn) == JUMP_INSN)
18318
    return;
18319
 
18320
  predicate = COND_EXEC_TEST (body);
18321
  arm_current_cc = get_arm_condition_code (predicate);
18322
 
18323
  n = get_attr_ce_count (insn);
18324
  arm_condexec_count = 1;
18325
  arm_condexec_mask = (1 << n) - 1;
18326
  arm_condexec_masklen = n;
18327
  /* See if subsequent instructions can be combined into the same block.  */
18328
  for (;;)
18329
    {
18330
      insn = next_nonnote_insn (insn);
18331
 
18332
      /* Jumping into the middle of an IT block is illegal, so a label or
18333
         barrier terminates the block.  */
18334
      if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18335
        break;
18336
 
18337
      body = PATTERN (insn);
18338
      /* USE and CLOBBER aren't really insns, so just skip them.  */
18339
      if (GET_CODE (body) == USE
18340
          || GET_CODE (body) == CLOBBER)
18341
        continue;
18342
 
18343
      /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
18344
      if (GET_CODE (body) != COND_EXEC)
18345
        break;
18346
      /* Allow up to 4 conditionally executed instructions in a block.  */
18347
      n = get_attr_ce_count (insn);
18348
      if (arm_condexec_masklen + n > 4)
18349
        break;
18350
 
18351
      predicate = COND_EXEC_TEST (body);
18352
      code = get_arm_condition_code (predicate);
18353
      mask = (1 << n) - 1;
18354
      if (arm_current_cc == code)
18355
        arm_condexec_mask |= (mask << arm_condexec_masklen);
18356
      else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18357
        break;
18358
 
18359
      arm_condexec_count++;
18360
      arm_condexec_masklen += n;
18361
 
18362
      /* A jump must be the last instruction in a conditional block.  */
18363
      if (GET_CODE(insn) == JUMP_INSN)
18364
        break;
18365
    }
18366
  /* Restore recog_data (getting the attributes of other insns can
18367
     destroy this array, but final.c assumes that it remains intact
18368
     across this call).  */
18369
  extract_constrain_insn_cached (first_insn);
18370
}
18371
 
18372
void
18373
arm_final_prescan_insn (rtx insn)
18374
{
18375
  /* BODY will hold the body of INSN.  */
18376
  rtx body = PATTERN (insn);
18377
 
18378
  /* This will be 1 if trying to repeat the trick, and things need to be
18379
     reversed if it appears to fail.  */
18380
  int reverse = 0;
18381
 
18382
  /* If we start with a return insn, we only succeed if we find another one.  */
18383
  int seeking_return = 0;
18384
  enum rtx_code return_code = UNKNOWN;
18385
 
18386
  /* START_INSN will hold the insn from where we start looking.  This is the
18387
     first insn after the following code_label if REVERSE is true.  */
18388
  rtx start_insn = insn;
18389
 
18390
  /* If in state 4, check if the target branch is reached, in order to
18391
     change back to state 0.  */
18392
  if (arm_ccfsm_state == 4)
18393
    {
18394
      if (insn == arm_target_insn)
18395
        {
18396
          arm_target_insn = NULL;
18397
          arm_ccfsm_state = 0;
18398
        }
18399
      return;
18400
    }
18401
 
18402
  /* If in state 3, it is possible to repeat the trick, if this insn is an
18403
     unconditional branch to a label, and immediately following this branch
18404
     is the previous target label which is only used once, and the label this
18405
     branch jumps to is not too far off.  */
18406
  if (arm_ccfsm_state == 3)
18407
    {
18408
      if (simplejump_p (insn))
18409
        {
18410
          start_insn = next_nonnote_insn (start_insn);
18411
          if (GET_CODE (start_insn) == BARRIER)
18412
            {
18413
              /* XXX Isn't this always a barrier?  */
18414
              start_insn = next_nonnote_insn (start_insn);
18415
            }
18416
          if (GET_CODE (start_insn) == CODE_LABEL
18417
              && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18418
              && LABEL_NUSES (start_insn) == 1)
18419
            reverse = TRUE;
18420
          else
18421
            return;
18422
        }
18423
      else if (ANY_RETURN_P (body))
18424
        {
18425
          start_insn = next_nonnote_insn (start_insn);
18426
          if (GET_CODE (start_insn) == BARRIER)
18427
            start_insn = next_nonnote_insn (start_insn);
18428
          if (GET_CODE (start_insn) == CODE_LABEL
18429
              && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18430
              && LABEL_NUSES (start_insn) == 1)
18431
            {
18432
              reverse = TRUE;
18433
              seeking_return = 1;
18434
              return_code = GET_CODE (body);
18435
            }
18436
          else
18437
            return;
18438
        }
18439
      else
18440
        return;
18441
    }
18442
 
18443
  gcc_assert (!arm_ccfsm_state || reverse);
18444
  if (GET_CODE (insn) != JUMP_INSN)
18445
    return;
18446
 
18447
  /* This jump might be paralleled with a clobber of the condition codes
18448
     the jump should always come first */
18449
  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18450
    body = XVECEXP (body, 0, 0);
18451
 
18452
  if (reverse
18453
      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18454
          && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18455
    {
18456
      int insns_skipped;
18457
      int fail = FALSE, succeed = FALSE;
18458
      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
18459
      int then_not_else = TRUE;
18460
      rtx this_insn = start_insn, label = 0;
18461
 
18462
      /* Register the insn jumped to.  */
18463
      if (reverse)
18464
        {
18465
          if (!seeking_return)
18466
            label = XEXP (SET_SRC (body), 0);
18467
        }
18468
      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18469
        label = XEXP (XEXP (SET_SRC (body), 1), 0);
18470
      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18471
        {
18472
          label = XEXP (XEXP (SET_SRC (body), 2), 0);
18473
          then_not_else = FALSE;
18474
        }
18475
      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18476
        {
18477
          seeking_return = 1;
18478
          return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18479
        }
18480
      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18481
        {
18482
          seeking_return = 1;
18483
          return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18484
          then_not_else = FALSE;
18485
        }
18486
      else
18487
        gcc_unreachable ();
18488
 
18489
      /* See how many insns this branch skips, and what kind of insns.  If all
18490
         insns are okay, and the label or unconditional branch to the same
18491
         label is not too far away, succeed.  */
18492
      for (insns_skipped = 0;
18493
           !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18494
        {
18495
          rtx scanbody;
18496
 
18497
          this_insn = next_nonnote_insn (this_insn);
18498
          if (!this_insn)
18499
            break;
18500
 
18501
          switch (GET_CODE (this_insn))
18502
            {
18503
            case CODE_LABEL:
18504
              /* Succeed if it is the target label, otherwise fail since
18505
                 control falls in from somewhere else.  */
18506
              if (this_insn == label)
18507
                {
18508
                  arm_ccfsm_state = 1;
18509
                  succeed = TRUE;
18510
                }
18511
              else
18512
                fail = TRUE;
18513
              break;
18514
 
18515
            case BARRIER:
18516
              /* Succeed if the following insn is the target label.
18517
                 Otherwise fail.
18518
                 If return insns are used then the last insn in a function
18519
                 will be a barrier.  */
18520
              this_insn = next_nonnote_insn (this_insn);
18521
              if (this_insn && this_insn == label)
18522
                {
18523
                  arm_ccfsm_state = 1;
18524
                  succeed = TRUE;
18525
                }
18526
              else
18527
                fail = TRUE;
18528
              break;
18529
 
18530
            case CALL_INSN:
18531
              /* The AAPCS says that conditional calls should not be
18532
                 used since they make interworking inefficient (the
18533
                 linker can't transform BL<cond> into BLX).  That's
18534
                 only a problem if the machine has BLX.  */
18535
              if (arm_arch5)
18536
                {
18537
                  fail = TRUE;
18538
                  break;
18539
                }
18540
 
18541
              /* Succeed if the following insn is the target label, or
18542
                 if the following two insns are a barrier and the
18543
                 target label.  */
18544
              this_insn = next_nonnote_insn (this_insn);
18545
              if (this_insn && GET_CODE (this_insn) == BARRIER)
18546
                this_insn = next_nonnote_insn (this_insn);
18547
 
18548
              if (this_insn && this_insn == label
18549
                  && insns_skipped < max_insns_skipped)
18550
                {
18551
                  arm_ccfsm_state = 1;
18552
                  succeed = TRUE;
18553
                }
18554
              else
18555
                fail = TRUE;
18556
              break;
18557
 
18558
            case JUMP_INSN:
18559
              /* If this is an unconditional branch to the same label, succeed.
18560
                 If it is to another label, do nothing.  If it is conditional,
18561
                 fail.  */
18562
              /* XXX Probably, the tests for SET and the PC are
18563
                 unnecessary.  */
18564
 
18565
              scanbody = PATTERN (this_insn);
18566
              if (GET_CODE (scanbody) == SET
18567
                  && GET_CODE (SET_DEST (scanbody)) == PC)
18568
                {
18569
                  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18570
                      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18571
                    {
18572
                      arm_ccfsm_state = 2;
18573
                      succeed = TRUE;
18574
                    }
18575
                  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18576
                    fail = TRUE;
18577
                }
18578
              /* Fail if a conditional return is undesirable (e.g. on a
18579
                 StrongARM), but still allow this if optimizing for size.  */
18580
              else if (GET_CODE (scanbody) == return_code
18581
                       && !use_return_insn (TRUE, NULL)
18582
                       && !optimize_size)
18583
                fail = TRUE;
18584
              else if (GET_CODE (scanbody) == return_code)
18585
                {
18586
                  arm_ccfsm_state = 2;
18587
                  succeed = TRUE;
18588
                }
18589
              else if (GET_CODE (scanbody) == PARALLEL)
18590
                {
18591
                  switch (get_attr_conds (this_insn))
18592
                    {
18593
                    case CONDS_NOCOND:
18594
                      break;
18595
                    default:
18596
                      fail = TRUE;
18597
                      break;
18598
                    }
18599
                }
18600
              else
18601
                fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
18602
 
18603
              break;
18604
 
18605
            case INSN:
18606
              /* Instructions using or affecting the condition codes make it
18607
                 fail.  */
18608
              scanbody = PATTERN (this_insn);
18609
              if (!(GET_CODE (scanbody) == SET
18610
                    || GET_CODE (scanbody) == PARALLEL)
18611
                  || get_attr_conds (this_insn) != CONDS_NOCOND)
18612
                fail = TRUE;
18613
 
18614
              /* A conditional cirrus instruction must be followed by
18615
                 a non Cirrus instruction.  However, since we
18616
                 conditionalize instructions in this function and by
18617
                 the time we get here we can't add instructions
18618
                 (nops), because shorten_branches() has already been
18619
                 called, we will disable conditionalizing Cirrus
18620
                 instructions to be safe.  */
18621
              if (GET_CODE (scanbody) != USE
18622
                  && GET_CODE (scanbody) != CLOBBER
18623
                  && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18624
                fail = TRUE;
18625
              break;
18626
 
18627
            default:
18628
              break;
18629
            }
18630
        }
18631
      if (succeed)
18632
        {
18633
          if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18634
            arm_target_label = CODE_LABEL_NUMBER (label);
18635
          else
18636
            {
18637
              gcc_assert (seeking_return || arm_ccfsm_state == 2);
18638
 
18639
              while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18640
                {
18641
                  this_insn = next_nonnote_insn (this_insn);
18642
                  gcc_assert (!this_insn
18643
                              || (GET_CODE (this_insn) != BARRIER
18644
                                  && GET_CODE (this_insn) != CODE_LABEL));
18645
                }
18646
              if (!this_insn)
18647
                {
18648
                  /* Oh, dear! we ran off the end.. give up.  */
18649
                  extract_constrain_insn_cached (insn);
18650
                  arm_ccfsm_state = 0;
18651
                  arm_target_insn = NULL;
18652
                  return;
18653
                }
18654
              arm_target_insn = this_insn;
18655
            }
18656
 
18657
          /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18658
             what it was.  */
18659
          if (!reverse)
18660
            arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18661
 
18662
          if (reverse || then_not_else)
18663
            arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18664
        }
18665
 
18666
      /* Restore recog_data (getting the attributes of other insns can
18667
         destroy this array, but final.c assumes that it remains intact
18668
         across this call.  */
18669
      extract_constrain_insn_cached (insn);
18670
    }
18671
}
18672
 
18673
/* Output IT instructions.  */
18674
void
18675
thumb2_asm_output_opcode (FILE * stream)
18676
{
18677
  char buff[5];
18678
  int n;
18679
 
18680
  if (arm_condexec_mask)
18681
    {
18682
      for (n = 0; n < arm_condexec_masklen; n++)
18683
        buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18684
      buff[n] = 0;
18685
      asm_fprintf(stream, "i%s\t%s\n\t", buff,
18686
                  arm_condition_codes[arm_current_cc]);
18687
      arm_condexec_mask = 0;
18688
    }
18689
}
18690
 
18691
/* Returns true if REGNO is a valid register
18692
   for holding a quantity of type MODE.  */
18693
int
18694
arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18695
{
18696
  if (GET_MODE_CLASS (mode) == MODE_CC)
18697
    return (regno == CC_REGNUM
18698
            || (TARGET_HARD_FLOAT && TARGET_VFP
18699
                && regno == VFPCC_REGNUM));
18700
 
18701
  if (TARGET_THUMB1)
18702
    /* For the Thumb we only allow values bigger than SImode in
18703
       registers 0 - 6, so that there is always a second low
18704
       register available to hold the upper part of the value.
18705
       We probably we ought to ensure that the register is the
18706
       start of an even numbered register pair.  */
18707
    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18708
 
18709
  if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18710
      && IS_CIRRUS_REGNUM (regno))
18711
    /* We have outlawed SI values in Cirrus registers because they
18712
       reside in the lower 32 bits, but SF values reside in the
18713
       upper 32 bits.  This causes gcc all sorts of grief.  We can't
18714
       even split the registers into pairs because Cirrus SI values
18715
       get sign extended to 64bits-- aldyh.  */
18716
    return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18717
 
18718
  if (TARGET_HARD_FLOAT && TARGET_VFP
18719
      && IS_VFP_REGNUM (regno))
18720
    {
18721
      if (mode == SFmode || mode == SImode)
18722
        return VFP_REGNO_OK_FOR_SINGLE (regno);
18723
 
18724
      if (mode == DFmode)
18725
        return VFP_REGNO_OK_FOR_DOUBLE (regno);
18726
 
18727
      /* VFP registers can hold HFmode values, but there is no point in
18728
         putting them there unless we have hardware conversion insns. */
18729
      if (mode == HFmode)
18730
        return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18731
 
18732
      if (TARGET_NEON)
18733
        return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18734
               || (VALID_NEON_QREG_MODE (mode)
18735
                   && NEON_REGNO_OK_FOR_QUAD (regno))
18736
               || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18737
               || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18738
               || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18739
               || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18740
               || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18741
 
18742
      return FALSE;
18743
    }
18744
 
18745
  if (TARGET_REALLY_IWMMXT)
18746
    {
18747
      if (IS_IWMMXT_GR_REGNUM (regno))
18748
        return mode == SImode;
18749
 
18750
      if (IS_IWMMXT_REGNUM (regno))
18751
        return VALID_IWMMXT_REG_MODE (mode);
18752
    }
18753
 
18754
  /* We allow almost any value to be stored in the general registers.
18755
     Restrict doubleword quantities to even register pairs so that we can
18756
     use ldrd.  Do not allow very large Neon structure opaque modes in
18757
     general registers; they would use too many.  */
18758
  if (regno <= LAST_ARM_REGNUM)
18759
    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18760
      && ARM_NUM_REGS (mode) <= 4;
18761
 
18762
  if (regno == FRAME_POINTER_REGNUM
18763
      || regno == ARG_POINTER_REGNUM)
18764
    /* We only allow integers in the fake hard registers.  */
18765
    return GET_MODE_CLASS (mode) == MODE_INT;
18766
 
18767
  /* The only registers left are the FPA registers
18768
     which we only allow to hold FP values.  */
18769
  return (TARGET_HARD_FLOAT && TARGET_FPA
18770
          && GET_MODE_CLASS (mode) == MODE_FLOAT
18771
          && regno >= FIRST_FPA_REGNUM
18772
          && regno <= LAST_FPA_REGNUM);
18773
}
18774
 
18775
/* Implement MODES_TIEABLE_P.  */
18776
 
18777
bool
18778
arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18779
{
18780
  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18781
    return true;
18782
 
18783
  /* We specifically want to allow elements of "structure" modes to
18784
     be tieable to the structure.  This more general condition allows
18785
     other rarer situations too.  */
18786
  if (TARGET_NEON
18787
      && (VALID_NEON_DREG_MODE (mode1)
18788
          || VALID_NEON_QREG_MODE (mode1)
18789
          || VALID_NEON_STRUCT_MODE (mode1))
18790
      && (VALID_NEON_DREG_MODE (mode2)
18791
          || VALID_NEON_QREG_MODE (mode2)
18792
          || VALID_NEON_STRUCT_MODE (mode2)))
18793
    return true;
18794
 
18795
  return false;
18796
}
18797
 
18798
/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18799
   not used in arm mode.  */
18800
 
18801
enum reg_class
18802
arm_regno_class (int regno)
18803
{
18804
  if (TARGET_THUMB1)
18805
    {
18806
      if (regno == STACK_POINTER_REGNUM)
18807
        return STACK_REG;
18808
      if (regno == CC_REGNUM)
18809
        return CC_REG;
18810
      if (regno < 8)
18811
        return LO_REGS;
18812
      return HI_REGS;
18813
    }
18814
 
18815
  if (TARGET_THUMB2 && regno < 8)
18816
    return LO_REGS;
18817
 
18818
  if (   regno <= LAST_ARM_REGNUM
18819
      || regno == FRAME_POINTER_REGNUM
18820
      || regno == ARG_POINTER_REGNUM)
18821
    return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18822
 
18823
  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18824
    return TARGET_THUMB2 ? CC_REG : NO_REGS;
18825
 
18826
  if (IS_CIRRUS_REGNUM (regno))
18827
    return CIRRUS_REGS;
18828
 
18829
  if (IS_VFP_REGNUM (regno))
18830
    {
18831
      if (regno <= D7_VFP_REGNUM)
18832
        return VFP_D0_D7_REGS;
18833
      else if (regno <= LAST_LO_VFP_REGNUM)
18834
        return VFP_LO_REGS;
18835
      else
18836
        return VFP_HI_REGS;
18837
    }
18838
 
18839
  if (IS_IWMMXT_REGNUM (regno))
18840
    return IWMMXT_REGS;
18841
 
18842
  if (IS_IWMMXT_GR_REGNUM (regno))
18843
    return IWMMXT_GR_REGS;
18844
 
18845
  return FPA_REGS;
18846
}
18847
 
18848
/* Handle a special case when computing the offset
18849
   of an argument from the frame pointer.  */
18850
int
18851
arm_debugger_arg_offset (int value, rtx addr)
18852
{
18853
  rtx insn;
18854
 
18855
  /* We are only interested if dbxout_parms() failed to compute the offset.  */
18856
  if (value != 0)
18857
    return 0;
18858
 
18859
  /* We can only cope with the case where the address is held in a register.  */
18860
  if (GET_CODE (addr) != REG)
18861
    return 0;
18862
 
18863
  /* If we are using the frame pointer to point at the argument, then
18864
     an offset of 0 is correct.  */
18865
  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18866
    return 0;
18867
 
18868
  /* If we are using the stack pointer to point at the
18869
     argument, then an offset of 0 is correct.  */
18870
  /* ??? Check this is consistent with thumb2 frame layout.  */
18871
  if ((TARGET_THUMB || !frame_pointer_needed)
18872
      && REGNO (addr) == SP_REGNUM)
18873
    return 0;
18874
 
18875
  /* Oh dear.  The argument is pointed to by a register rather
18876
     than being held in a register, or being stored at a known
18877
     offset from the frame pointer.  Since GDB only understands
18878
     those two kinds of argument we must translate the address
18879
     held in the register into an offset from the frame pointer.
18880
     We do this by searching through the insns for the function
18881
     looking to see where this register gets its value.  If the
18882
     register is initialized from the frame pointer plus an offset
18883
     then we are in luck and we can continue, otherwise we give up.
18884
 
18885
     This code is exercised by producing debugging information
18886
     for a function with arguments like this:
18887
 
18888
           double func (double a, double b, int c, double d) {return d;}
18889
 
18890
     Without this code the stab for parameter 'd' will be set to
18891
     an offset of 0 from the frame pointer, rather than 8.  */
18892
 
18893
  /* The if() statement says:
18894
 
18895
     If the insn is a normal instruction
18896
     and if the insn is setting the value in a register
18897
     and if the register being set is the register holding the address of the argument
18898
     and if the address is computing by an addition
18899
     that involves adding to a register
18900
     which is the frame pointer
18901
     a constant integer
18902
 
18903
     then...  */
18904
 
18905
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18906
    {
18907
      if (   GET_CODE (insn) == INSN
18908
          && GET_CODE (PATTERN (insn)) == SET
18909
          && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18910
          && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18911
          && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18912
          && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18913
          && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18914
             )
18915
        {
18916
          value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18917
 
18918
          break;
18919
        }
18920
    }
18921
 
18922
  if (value == 0)
18923
    {
18924
      debug_rtx (addr);
18925
      warning (0, "unable to compute real location of stacked parameter");
18926
      value = 8; /* XXX magic hack */
18927
    }
18928
 
18929
  return value;
18930
}
18931
 
18932
typedef enum {
18933
  T_V8QI,
18934
  T_V4HI,
18935
  T_V2SI,
18936
  T_V2SF,
18937
  T_DI,
18938
  T_V16QI,
18939
  T_V8HI,
18940
  T_V4SI,
18941
  T_V4SF,
18942
  T_V2DI,
18943
  T_TI,
18944
  T_EI,
18945
  T_OI,
18946
  T_MAX         /* Size of enum.  Keep last.  */
18947
} neon_builtin_type_mode;
18948
 
18949
#define TYPE_MODE_BIT(X) (1 << (X))
18950
 
18951
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)        \
18952
                 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)      \
18953
                 | TYPE_MODE_BIT (T_DI))
18954
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)       \
18955
                 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
18956
                 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18957
 
18958
#define v8qi_UP  T_V8QI
18959
#define v4hi_UP  T_V4HI
18960
#define v2si_UP  T_V2SI
18961
#define v2sf_UP  T_V2SF
18962
#define di_UP    T_DI
18963
#define v16qi_UP T_V16QI
18964
#define v8hi_UP  T_V8HI
18965
#define v4si_UP  T_V4SI
18966
#define v4sf_UP  T_V4SF
18967
#define v2di_UP  T_V2DI
18968
#define ti_UP    T_TI
18969
#define ei_UP    T_EI
18970
#define oi_UP    T_OI
18971
 
18972
#define UP(X) X##_UP
18973
 
18974
typedef enum {
18975
  NEON_BINOP,
18976
  NEON_TERNOP,
18977
  NEON_UNOP,
18978
  NEON_GETLANE,
18979
  NEON_SETLANE,
18980
  NEON_CREATE,
18981
  NEON_DUP,
18982
  NEON_DUPLANE,
18983
  NEON_COMBINE,
18984
  NEON_SPLIT,
18985
  NEON_LANEMUL,
18986
  NEON_LANEMULL,
18987
  NEON_LANEMULH,
18988
  NEON_LANEMAC,
18989
  NEON_SCALARMUL,
18990
  NEON_SCALARMULL,
18991
  NEON_SCALARMULH,
18992
  NEON_SCALARMAC,
18993
  NEON_CONVERT,
18994
  NEON_FIXCONV,
18995
  NEON_SELECT,
18996
  NEON_RESULTPAIR,
18997
  NEON_REINTERP,
18998
  NEON_VTBL,
18999
  NEON_VTBX,
19000
  NEON_LOAD1,
19001
  NEON_LOAD1LANE,
19002
  NEON_STORE1,
19003
  NEON_STORE1LANE,
19004
  NEON_LOADSTRUCT,
19005
  NEON_LOADSTRUCTLANE,
19006
  NEON_STORESTRUCT,
19007
  NEON_STORESTRUCTLANE,
19008
  NEON_LOGICBINOP,
19009
  NEON_SHIFTINSERT,
19010
  NEON_SHIFTIMM,
19011
  NEON_SHIFTACC
19012
} neon_itype;
19013
 
19014
typedef struct {
19015
  const char *name;
19016
  const neon_itype itype;
19017
  const neon_builtin_type_mode mode;
19018
  const enum insn_code code;
19019
  unsigned int fcode;
19020
} neon_builtin_datum;
19021
 
19022
#define CF(N,X) CODE_FOR_neon_##N##X
19023
 
19024
#define VAR1(T, N, A) \
19025
  {#N, NEON_##T, UP (A), CF (N, A), 0}
19026
#define VAR2(T, N, A, B) \
19027
  VAR1 (T, N, A), \
19028
  {#N, NEON_##T, UP (B), CF (N, B), 0}
19029
#define VAR3(T, N, A, B, C) \
19030
  VAR2 (T, N, A, B), \
19031
  {#N, NEON_##T, UP (C), CF (N, C), 0}
19032
#define VAR4(T, N, A, B, C, D) \
19033
  VAR3 (T, N, A, B, C), \
19034
  {#N, NEON_##T, UP (D), CF (N, D), 0}
19035
#define VAR5(T, N, A, B, C, D, E) \
19036
  VAR4 (T, N, A, B, C, D), \
19037
  {#N, NEON_##T, UP (E), CF (N, E), 0}
19038
#define VAR6(T, N, A, B, C, D, E, F) \
19039
  VAR5 (T, N, A, B, C, D, E), \
19040
  {#N, NEON_##T, UP (F), CF (N, F), 0}
19041
#define VAR7(T, N, A, B, C, D, E, F, G) \
19042
  VAR6 (T, N, A, B, C, D, E, F), \
19043
  {#N, NEON_##T, UP (G), CF (N, G), 0}
19044
#define VAR8(T, N, A, B, C, D, E, F, G, H) \
19045
  VAR7 (T, N, A, B, C, D, E, F, G), \
19046
  {#N, NEON_##T, UP (H), CF (N, H), 0}
19047
#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19048
  VAR8 (T, N, A, B, C, D, E, F, G, H), \
19049
  {#N, NEON_##T, UP (I), CF (N, I), 0}
19050
#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19051
  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19052
  {#N, NEON_##T, UP (J), CF (N, J), 0}
19053
 
19054
/* The mode entries in the following table correspond to the "key" type of the
19055
   instruction variant, i.e. equivalent to that which would be specified after
19056
   the assembler mnemonic, which usually refers to the last vector operand.
19057
   (Signed/unsigned/polynomial types are not differentiated between though, and
19058
   are all mapped onto the same mode for a given element size.) The modes
19059
   listed per instruction should be the same as those defined for that
19060
   instruction's pattern in neon.md.  */
19061
 
19062
static neon_builtin_datum neon_builtin_data[] =
19063
{
19064
  VAR10 (BINOP, vadd,
19065
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19066
  VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19067
  VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19068
  VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19069
  VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19070
  VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19071
  VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19072
  VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19073
  VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19074
  VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19075
  VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19076
  VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19077
  VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19078
  VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19079
  VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19080
  VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19081
  VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19082
  VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19083
  VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19084
  VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19085
  VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19086
  VAR2 (BINOP, vqdmull, v4hi, v2si),
19087
  VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19088
  VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19089
  VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19090
  VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19091
  VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19092
  VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19093
  VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19094
  VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19095
  VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19096
  VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19097
  VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19098
  VAR10 (BINOP, vsub,
19099
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19100
  VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19101
  VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19102
  VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19103
  VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19104
  VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19105
  VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19106
  VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19107
  VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19108
  VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19109
  VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19110
  VAR2 (BINOP, vcage, v2sf, v4sf),
19111
  VAR2 (BINOP, vcagt, v2sf, v4sf),
19112
  VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19113
  VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19114
  VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19115
  VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19116
  VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19117
  VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19118
  VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19119
  VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19120
  VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19121
  VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19122
  VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19123
  VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19124
  VAR2 (BINOP, vrecps, v2sf, v4sf),
19125
  VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19126
  VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19127
  VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19128
  VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19129
  VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19130
  VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19131
  VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19132
  VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19133
  VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19134
  VAR2 (UNOP, vcnt, v8qi, v16qi),
19135
  VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19136
  VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19137
  VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19138
  /* FIXME: vget_lane supports more variants than this!  */
19139
  VAR10 (GETLANE, vget_lane,
19140
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19141
  VAR10 (SETLANE, vset_lane,
19142
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19143
  VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19144
  VAR10 (DUP, vdup_n,
19145
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19146
  VAR10 (DUPLANE, vdup_lane,
19147
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19148
  VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19149
  VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19150
  VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19151
  VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19152
  VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19153
  VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19154
  VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19155
  VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19156
  VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19157
  VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19158
  VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19159
  VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19160
  VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19161
  VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19162
  VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19163
  VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19164
  VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19165
  VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19166
  VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19167
  VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19168
  VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19169
  VAR10 (BINOP, vext,
19170
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19171
  VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19172
  VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19173
  VAR2 (UNOP, vrev16, v8qi, v16qi),
19174
  VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19175
  VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19176
  VAR10 (SELECT, vbsl,
19177
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19178
  VAR1 (VTBL, vtbl1, v8qi),
19179
  VAR1 (VTBL, vtbl2, v8qi),
19180
  VAR1 (VTBL, vtbl3, v8qi),
19181
  VAR1 (VTBL, vtbl4, v8qi),
19182
  VAR1 (VTBX, vtbx1, v8qi),
19183
  VAR1 (VTBX, vtbx2, v8qi),
19184
  VAR1 (VTBX, vtbx3, v8qi),
19185
  VAR1 (VTBX, vtbx4, v8qi),
19186
  VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19187
  VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19188
  VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19189
  VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19190
  VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19191
  VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19192
  VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19193
  VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19194
  VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19195
  VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19196
  VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19197
  VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19198
  VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19199
  VAR10 (LOAD1, vld1,
19200
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19201
  VAR10 (LOAD1LANE, vld1_lane,
19202
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19203
  VAR10 (LOAD1, vld1_dup,
19204
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19205
  VAR10 (STORE1, vst1,
19206
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19207
  VAR10 (STORE1LANE, vst1_lane,
19208
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19209
  VAR9 (LOADSTRUCT,
19210
        vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19211
  VAR7 (LOADSTRUCTLANE, vld2_lane,
19212
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19213
  VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19214
  VAR9 (STORESTRUCT, vst2,
19215
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19216
  VAR7 (STORESTRUCTLANE, vst2_lane,
19217
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19218
  VAR9 (LOADSTRUCT,
19219
        vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19220
  VAR7 (LOADSTRUCTLANE, vld3_lane,
19221
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19222
  VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19223
  VAR9 (STORESTRUCT, vst3,
19224
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19225
  VAR7 (STORESTRUCTLANE, vst3_lane,
19226
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19227
  VAR9 (LOADSTRUCT, vld4,
19228
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19229
  VAR7 (LOADSTRUCTLANE, vld4_lane,
19230
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19231
  VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19232
  VAR9 (STORESTRUCT, vst4,
19233
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19234
  VAR7 (STORESTRUCTLANE, vst4_lane,
19235
        v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19236
  VAR10 (LOGICBINOP, vand,
19237
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19238
  VAR10 (LOGICBINOP, vorr,
19239
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19240
  VAR10 (BINOP, veor,
19241
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19242
  VAR10 (LOGICBINOP, vbic,
19243
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19244
  VAR10 (LOGICBINOP, vorn,
19245
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19246
};
19247
 
19248
#undef CF
19249
#undef VAR1
19250
#undef VAR2
19251
#undef VAR3
19252
#undef VAR4
19253
#undef VAR5
19254
#undef VAR6
19255
#undef VAR7
19256
#undef VAR8
19257
#undef VAR9
19258
#undef VAR10
19259
 
19260
/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19261
   symbolic names defined here (which would require too much duplication).
19262
   FIXME?  */
19263
enum arm_builtins
19264
{
19265
  ARM_BUILTIN_GETWCX,
19266
  ARM_BUILTIN_SETWCX,
19267
 
19268
  ARM_BUILTIN_WZERO,
19269
 
19270
  ARM_BUILTIN_WAVG2BR,
19271
  ARM_BUILTIN_WAVG2HR,
19272
  ARM_BUILTIN_WAVG2B,
19273
  ARM_BUILTIN_WAVG2H,
19274
 
19275
  ARM_BUILTIN_WACCB,
19276
  ARM_BUILTIN_WACCH,
19277
  ARM_BUILTIN_WACCW,
19278
 
19279
  ARM_BUILTIN_WMACS,
19280
  ARM_BUILTIN_WMACSZ,
19281
  ARM_BUILTIN_WMACU,
19282
  ARM_BUILTIN_WMACUZ,
19283
 
19284
  ARM_BUILTIN_WSADB,
19285
  ARM_BUILTIN_WSADBZ,
19286
  ARM_BUILTIN_WSADH,
19287
  ARM_BUILTIN_WSADHZ,
19288
 
19289
  ARM_BUILTIN_WALIGN,
19290
 
19291
  ARM_BUILTIN_TMIA,
19292
  ARM_BUILTIN_TMIAPH,
19293
  ARM_BUILTIN_TMIABB,
19294
  ARM_BUILTIN_TMIABT,
19295
  ARM_BUILTIN_TMIATB,
19296
  ARM_BUILTIN_TMIATT,
19297
 
19298
  ARM_BUILTIN_TMOVMSKB,
19299
  ARM_BUILTIN_TMOVMSKH,
19300
  ARM_BUILTIN_TMOVMSKW,
19301
 
19302
  ARM_BUILTIN_TBCSTB,
19303
  ARM_BUILTIN_TBCSTH,
19304
  ARM_BUILTIN_TBCSTW,
19305
 
19306
  ARM_BUILTIN_WMADDS,
19307
  ARM_BUILTIN_WMADDU,
19308
 
19309
  ARM_BUILTIN_WPACKHSS,
19310
  ARM_BUILTIN_WPACKWSS,
19311
  ARM_BUILTIN_WPACKDSS,
19312
  ARM_BUILTIN_WPACKHUS,
19313
  ARM_BUILTIN_WPACKWUS,
19314
  ARM_BUILTIN_WPACKDUS,
19315
 
19316
  ARM_BUILTIN_WADDB,
19317
  ARM_BUILTIN_WADDH,
19318
  ARM_BUILTIN_WADDW,
19319
  ARM_BUILTIN_WADDSSB,
19320
  ARM_BUILTIN_WADDSSH,
19321
  ARM_BUILTIN_WADDSSW,
19322
  ARM_BUILTIN_WADDUSB,
19323
  ARM_BUILTIN_WADDUSH,
19324
  ARM_BUILTIN_WADDUSW,
19325
  ARM_BUILTIN_WSUBB,
19326
  ARM_BUILTIN_WSUBH,
19327
  ARM_BUILTIN_WSUBW,
19328
  ARM_BUILTIN_WSUBSSB,
19329
  ARM_BUILTIN_WSUBSSH,
19330
  ARM_BUILTIN_WSUBSSW,
19331
  ARM_BUILTIN_WSUBUSB,
19332
  ARM_BUILTIN_WSUBUSH,
19333
  ARM_BUILTIN_WSUBUSW,
19334
 
19335
  ARM_BUILTIN_WAND,
19336
  ARM_BUILTIN_WANDN,
19337
  ARM_BUILTIN_WOR,
19338
  ARM_BUILTIN_WXOR,
19339
 
19340
  ARM_BUILTIN_WCMPEQB,
19341
  ARM_BUILTIN_WCMPEQH,
19342
  ARM_BUILTIN_WCMPEQW,
19343
  ARM_BUILTIN_WCMPGTUB,
19344
  ARM_BUILTIN_WCMPGTUH,
19345
  ARM_BUILTIN_WCMPGTUW,
19346
  ARM_BUILTIN_WCMPGTSB,
19347
  ARM_BUILTIN_WCMPGTSH,
19348
  ARM_BUILTIN_WCMPGTSW,
19349
 
19350
  ARM_BUILTIN_TEXTRMSB,
19351
  ARM_BUILTIN_TEXTRMSH,
19352
  ARM_BUILTIN_TEXTRMSW,
19353
  ARM_BUILTIN_TEXTRMUB,
19354
  ARM_BUILTIN_TEXTRMUH,
19355
  ARM_BUILTIN_TEXTRMUW,
19356
  ARM_BUILTIN_TINSRB,
19357
  ARM_BUILTIN_TINSRH,
19358
  ARM_BUILTIN_TINSRW,
19359
 
19360
  ARM_BUILTIN_WMAXSW,
19361
  ARM_BUILTIN_WMAXSH,
19362
  ARM_BUILTIN_WMAXSB,
19363
  ARM_BUILTIN_WMAXUW,
19364
  ARM_BUILTIN_WMAXUH,
19365
  ARM_BUILTIN_WMAXUB,
19366
  ARM_BUILTIN_WMINSW,
19367
  ARM_BUILTIN_WMINSH,
19368
  ARM_BUILTIN_WMINSB,
19369
  ARM_BUILTIN_WMINUW,
19370
  ARM_BUILTIN_WMINUH,
19371
  ARM_BUILTIN_WMINUB,
19372
 
19373
  ARM_BUILTIN_WMULUM,
19374
  ARM_BUILTIN_WMULSM,
19375
  ARM_BUILTIN_WMULUL,
19376
 
19377
  ARM_BUILTIN_PSADBH,
19378
  ARM_BUILTIN_WSHUFH,
19379
 
19380
  ARM_BUILTIN_WSLLH,
19381
  ARM_BUILTIN_WSLLW,
19382
  ARM_BUILTIN_WSLLD,
19383
  ARM_BUILTIN_WSRAH,
19384
  ARM_BUILTIN_WSRAW,
19385
  ARM_BUILTIN_WSRAD,
19386
  ARM_BUILTIN_WSRLH,
19387
  ARM_BUILTIN_WSRLW,
19388
  ARM_BUILTIN_WSRLD,
19389
  ARM_BUILTIN_WRORH,
19390
  ARM_BUILTIN_WRORW,
19391
  ARM_BUILTIN_WRORD,
19392
  ARM_BUILTIN_WSLLHI,
19393
  ARM_BUILTIN_WSLLWI,
19394
  ARM_BUILTIN_WSLLDI,
19395
  ARM_BUILTIN_WSRAHI,
19396
  ARM_BUILTIN_WSRAWI,
19397
  ARM_BUILTIN_WSRADI,
19398
  ARM_BUILTIN_WSRLHI,
19399
  ARM_BUILTIN_WSRLWI,
19400
  ARM_BUILTIN_WSRLDI,
19401
  ARM_BUILTIN_WRORHI,
19402
  ARM_BUILTIN_WRORWI,
19403
  ARM_BUILTIN_WRORDI,
19404
 
19405
  ARM_BUILTIN_WUNPCKIHB,
19406
  ARM_BUILTIN_WUNPCKIHH,
19407
  ARM_BUILTIN_WUNPCKIHW,
19408
  ARM_BUILTIN_WUNPCKILB,
19409
  ARM_BUILTIN_WUNPCKILH,
19410
  ARM_BUILTIN_WUNPCKILW,
19411
 
19412
  ARM_BUILTIN_WUNPCKEHSB,
19413
  ARM_BUILTIN_WUNPCKEHSH,
19414
  ARM_BUILTIN_WUNPCKEHSW,
19415
  ARM_BUILTIN_WUNPCKEHUB,
19416
  ARM_BUILTIN_WUNPCKEHUH,
19417
  ARM_BUILTIN_WUNPCKEHUW,
19418
  ARM_BUILTIN_WUNPCKELSB,
19419
  ARM_BUILTIN_WUNPCKELSH,
19420
  ARM_BUILTIN_WUNPCKELSW,
19421
  ARM_BUILTIN_WUNPCKELUB,
19422
  ARM_BUILTIN_WUNPCKELUH,
19423
  ARM_BUILTIN_WUNPCKELUW,
19424
 
19425
  ARM_BUILTIN_THREAD_POINTER,
19426
 
19427
  ARM_BUILTIN_NEON_BASE,
19428
 
19429
  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19430
};
19431
 
19432
static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19433
 
19434
static void
19435
arm_init_neon_builtins (void)
19436
{
19437
  unsigned int i, fcode;
19438
  tree decl;
19439
 
19440
  tree neon_intQI_type_node;
19441
  tree neon_intHI_type_node;
19442
  tree neon_polyQI_type_node;
19443
  tree neon_polyHI_type_node;
19444
  tree neon_intSI_type_node;
19445
  tree neon_intDI_type_node;
19446
  tree neon_float_type_node;
19447
 
19448
  tree intQI_pointer_node;
19449
  tree intHI_pointer_node;
19450
  tree intSI_pointer_node;
19451
  tree intDI_pointer_node;
19452
  tree float_pointer_node;
19453
 
19454
  tree const_intQI_node;
19455
  tree const_intHI_node;
19456
  tree const_intSI_node;
19457
  tree const_intDI_node;
19458
  tree const_float_node;
19459
 
19460
  tree const_intQI_pointer_node;
19461
  tree const_intHI_pointer_node;
19462
  tree const_intSI_pointer_node;
19463
  tree const_intDI_pointer_node;
19464
  tree const_float_pointer_node;
19465
 
19466
  tree V8QI_type_node;
19467
  tree V4HI_type_node;
19468
  tree V2SI_type_node;
19469
  tree V2SF_type_node;
19470
  tree V16QI_type_node;
19471
  tree V8HI_type_node;
19472
  tree V4SI_type_node;
19473
  tree V4SF_type_node;
19474
  tree V2DI_type_node;
19475
 
19476
  tree intUQI_type_node;
19477
  tree intUHI_type_node;
19478
  tree intUSI_type_node;
19479
  tree intUDI_type_node;
19480
 
19481
  tree intEI_type_node;
19482
  tree intOI_type_node;
19483
  tree intCI_type_node;
19484
  tree intXI_type_node;
19485
 
19486
  tree V8QI_pointer_node;
19487
  tree V4HI_pointer_node;
19488
  tree V2SI_pointer_node;
19489
  tree V2SF_pointer_node;
19490
  tree V16QI_pointer_node;
19491
  tree V8HI_pointer_node;
19492
  tree V4SI_pointer_node;
19493
  tree V4SF_pointer_node;
19494
  tree V2DI_pointer_node;
19495
 
19496
  tree void_ftype_pv8qi_v8qi_v8qi;
19497
  tree void_ftype_pv4hi_v4hi_v4hi;
19498
  tree void_ftype_pv2si_v2si_v2si;
19499
  tree void_ftype_pv2sf_v2sf_v2sf;
19500
  tree void_ftype_pdi_di_di;
19501
  tree void_ftype_pv16qi_v16qi_v16qi;
19502
  tree void_ftype_pv8hi_v8hi_v8hi;
19503
  tree void_ftype_pv4si_v4si_v4si;
19504
  tree void_ftype_pv4sf_v4sf_v4sf;
19505
  tree void_ftype_pv2di_v2di_v2di;
19506
 
19507
  tree reinterp_ftype_dreg[5][5];
19508
  tree reinterp_ftype_qreg[5][5];
19509
  tree dreg_types[5], qreg_types[5];
19510
 
19511
  /* Create distinguished type nodes for NEON vector element types,
19512
     and pointers to values of such types, so we can detect them later.  */
19513
  neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19514
  neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19515
  neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19516
  neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19517
  neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19518
  neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19519
  neon_float_type_node = make_node (REAL_TYPE);
19520
  TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19521
  layout_type (neon_float_type_node);
19522
 
19523
  /* Define typedefs which exactly correspond to the modes we are basing vector
19524
     types on.  If you change these names you'll need to change
19525
     the table used by arm_mangle_type too.  */
19526
  (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19527
                                             "__builtin_neon_qi");
19528
  (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19529
                                             "__builtin_neon_hi");
19530
  (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19531
                                             "__builtin_neon_si");
19532
  (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19533
                                             "__builtin_neon_sf");
19534
  (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19535
                                             "__builtin_neon_di");
19536
  (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19537
                                             "__builtin_neon_poly8");
19538
  (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19539
                                             "__builtin_neon_poly16");
19540
 
19541
  intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19542
  intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19543
  intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19544
  intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19545
  float_pointer_node = build_pointer_type (neon_float_type_node);
19546
 
19547
  /* Next create constant-qualified versions of the above types.  */
19548
  const_intQI_node = build_qualified_type (neon_intQI_type_node,
19549
                                           TYPE_QUAL_CONST);
19550
  const_intHI_node = build_qualified_type (neon_intHI_type_node,
19551
                                           TYPE_QUAL_CONST);
19552
  const_intSI_node = build_qualified_type (neon_intSI_type_node,
19553
                                           TYPE_QUAL_CONST);
19554
  const_intDI_node = build_qualified_type (neon_intDI_type_node,
19555
                                           TYPE_QUAL_CONST);
19556
  const_float_node = build_qualified_type (neon_float_type_node,
19557
                                           TYPE_QUAL_CONST);
19558
 
19559
  const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19560
  const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19561
  const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19562
  const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19563
  const_float_pointer_node = build_pointer_type (const_float_node);
19564
 
19565
  /* Now create vector types based on our NEON element types.  */
19566
  /* 64-bit vectors.  */
19567
  V8QI_type_node =
19568
    build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19569
  V4HI_type_node =
19570
    build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19571
  V2SI_type_node =
19572
    build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19573
  V2SF_type_node =
19574
    build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19575
  /* 128-bit vectors.  */
19576
  V16QI_type_node =
19577
    build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19578
  V8HI_type_node =
19579
    build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19580
  V4SI_type_node =
19581
    build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19582
  V4SF_type_node =
19583
    build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19584
  V2DI_type_node =
19585
    build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19586
 
19587
  /* Unsigned integer types for various mode sizes.  */
19588
  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19589
  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19590
  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19591
  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19592
 
19593
  (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19594
                                             "__builtin_neon_uqi");
19595
  (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19596
                                             "__builtin_neon_uhi");
19597
  (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19598
                                             "__builtin_neon_usi");
19599
  (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19600
                                             "__builtin_neon_udi");
19601
 
19602
  /* Opaque integer types for structures of vectors.  */
19603
  intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19604
  intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19605
  intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19606
  intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19607
 
19608
  (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19609
                                             "__builtin_neon_ti");
19610
  (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19611
                                             "__builtin_neon_ei");
19612
  (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19613
                                             "__builtin_neon_oi");
19614
  (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19615
                                             "__builtin_neon_ci");
19616
  (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19617
                                             "__builtin_neon_xi");
19618
 
19619
  /* Pointers to vector types.  */
19620
  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19621
  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19622
  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19623
  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19624
  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19625
  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19626
  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19627
  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19628
  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19629
 
19630
  /* Operations which return results as pairs.  */
19631
  void_ftype_pv8qi_v8qi_v8qi =
19632
    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19633
                              V8QI_type_node, NULL);
19634
  void_ftype_pv4hi_v4hi_v4hi =
19635
    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19636
                              V4HI_type_node, NULL);
19637
  void_ftype_pv2si_v2si_v2si =
19638
    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19639
                              V2SI_type_node, NULL);
19640
  void_ftype_pv2sf_v2sf_v2sf =
19641
    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19642
                              V2SF_type_node, NULL);
19643
  void_ftype_pdi_di_di =
19644
    build_function_type_list (void_type_node, intDI_pointer_node,
19645
                              neon_intDI_type_node, neon_intDI_type_node, NULL);
19646
  void_ftype_pv16qi_v16qi_v16qi =
19647
    build_function_type_list (void_type_node, V16QI_pointer_node,
19648
                              V16QI_type_node, V16QI_type_node, NULL);
19649
  void_ftype_pv8hi_v8hi_v8hi =
19650
    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19651
                              V8HI_type_node, NULL);
19652
  void_ftype_pv4si_v4si_v4si =
19653
    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19654
                              V4SI_type_node, NULL);
19655
  void_ftype_pv4sf_v4sf_v4sf =
19656
    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19657
                              V4SF_type_node, NULL);
19658
  void_ftype_pv2di_v2di_v2di =
19659
    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19660
                              V2DI_type_node, NULL);
19661
 
19662
  dreg_types[0] = V8QI_type_node;
19663
  dreg_types[1] = V4HI_type_node;
19664
  dreg_types[2] = V2SI_type_node;
19665
  dreg_types[3] = V2SF_type_node;
19666
  dreg_types[4] = neon_intDI_type_node;
19667
 
19668
  qreg_types[0] = V16QI_type_node;
19669
  qreg_types[1] = V8HI_type_node;
19670
  qreg_types[2] = V4SI_type_node;
19671
  qreg_types[3] = V4SF_type_node;
19672
  qreg_types[4] = V2DI_type_node;
19673
 
19674
  for (i = 0; i < 5; i++)
19675
    {
19676
      int j;
19677
      for (j = 0; j < 5; j++)
19678
        {
19679
          reinterp_ftype_dreg[i][j]
19680
            = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19681
          reinterp_ftype_qreg[i][j]
19682
            = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19683
        }
19684
    }
19685
 
19686
  for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19687
       i < ARRAY_SIZE (neon_builtin_data);
19688
       i++, fcode++)
19689
    {
19690
      neon_builtin_datum *d = &neon_builtin_data[i];
19691
 
19692
      const char* const modenames[] = {
19693
        "v8qi", "v4hi", "v2si", "v2sf", "di",
19694
        "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19695
        "ti", "ei", "oi"
19696
      };
19697
      char namebuf[60];
19698
      tree ftype = NULL;
19699
      int is_load = 0, is_store = 0;
19700
 
19701
      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19702
 
19703
      d->fcode = fcode;
19704
 
19705
      switch (d->itype)
19706
        {
19707
        case NEON_LOAD1:
19708
        case NEON_LOAD1LANE:
19709
        case NEON_LOADSTRUCT:
19710
        case NEON_LOADSTRUCTLANE:
19711
          is_load = 1;
19712
          /* Fall through.  */
19713
        case NEON_STORE1:
19714
        case NEON_STORE1LANE:
19715
        case NEON_STORESTRUCT:
19716
        case NEON_STORESTRUCTLANE:
19717
          if (!is_load)
19718
            is_store = 1;
19719
          /* Fall through.  */
19720
        case NEON_UNOP:
19721
        case NEON_BINOP:
19722
        case NEON_LOGICBINOP:
19723
        case NEON_SHIFTINSERT:
19724
        case NEON_TERNOP:
19725
        case NEON_GETLANE:
19726
        case NEON_SETLANE:
19727
        case NEON_CREATE:
19728
        case NEON_DUP:
19729
        case NEON_DUPLANE:
19730
        case NEON_SHIFTIMM:
19731
        case NEON_SHIFTACC:
19732
        case NEON_COMBINE:
19733
        case NEON_SPLIT:
19734
        case NEON_CONVERT:
19735
        case NEON_FIXCONV:
19736
        case NEON_LANEMUL:
19737
        case NEON_LANEMULL:
19738
        case NEON_LANEMULH:
19739
        case NEON_LANEMAC:
19740
        case NEON_SCALARMUL:
19741
        case NEON_SCALARMULL:
19742
        case NEON_SCALARMULH:
19743
        case NEON_SCALARMAC:
19744
        case NEON_SELECT:
19745
        case NEON_VTBL:
19746
        case NEON_VTBX:
19747
          {
19748
            int k;
19749
            tree return_type = void_type_node, args = void_list_node;
19750
 
19751
            /* Build a function type directly from the insn_data for
19752
               this builtin.  The build_function_type() function takes
19753
               care of removing duplicates for us.  */
19754
            for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19755
              {
19756
                tree eltype;
19757
 
19758
                if (is_load && k == 1)
19759
                  {
19760
                    /* Neon load patterns always have the memory
19761
                       operand in the operand 1 position.  */
19762
                    gcc_assert (insn_data[d->code].operand[k].predicate
19763
                                == neon_struct_operand);
19764
 
19765
                    switch (d->mode)
19766
                      {
19767
                      case T_V8QI:
19768
                      case T_V16QI:
19769
                        eltype = const_intQI_pointer_node;
19770
                        break;
19771
 
19772
                      case T_V4HI:
19773
                      case T_V8HI:
19774
                        eltype = const_intHI_pointer_node;
19775
                        break;
19776
 
19777
                      case T_V2SI:
19778
                      case T_V4SI:
19779
                        eltype = const_intSI_pointer_node;
19780
                        break;
19781
 
19782
                      case T_V2SF:
19783
                      case T_V4SF:
19784
                        eltype = const_float_pointer_node;
19785
                        break;
19786
 
19787
                      case T_DI:
19788
                      case T_V2DI:
19789
                        eltype = const_intDI_pointer_node;
19790
                        break;
19791
 
19792
                      default: gcc_unreachable ();
19793
                      }
19794
                  }
19795
                else if (is_store && k == 0)
19796
                  {
19797
                    /* Similarly, Neon store patterns use operand 0 as
19798
                       the memory location to store to.  */
19799
                    gcc_assert (insn_data[d->code].operand[k].predicate
19800
                                == neon_struct_operand);
19801
 
19802
                    switch (d->mode)
19803
                      {
19804
                      case T_V8QI:
19805
                      case T_V16QI:
19806
                        eltype = intQI_pointer_node;
19807
                        break;
19808
 
19809
                      case T_V4HI:
19810
                      case T_V8HI:
19811
                        eltype = intHI_pointer_node;
19812
                        break;
19813
 
19814
                      case T_V2SI:
19815
                      case T_V4SI:
19816
                        eltype = intSI_pointer_node;
19817
                        break;
19818
 
19819
                      case T_V2SF:
19820
                      case T_V4SF:
19821
                        eltype = float_pointer_node;
19822
                        break;
19823
 
19824
                      case T_DI:
19825
                      case T_V2DI:
19826
                        eltype = intDI_pointer_node;
19827
                        break;
19828
 
19829
                      default: gcc_unreachable ();
19830
                      }
19831
                  }
19832
                else
19833
                  {
19834
                    switch (insn_data[d->code].operand[k].mode)
19835
                      {
19836
                      case VOIDmode: eltype = void_type_node; break;
19837
                        /* Scalars.  */
19838
                      case QImode: eltype = neon_intQI_type_node; break;
19839
                      case HImode: eltype = neon_intHI_type_node; break;
19840
                      case SImode: eltype = neon_intSI_type_node; break;
19841
                      case SFmode: eltype = neon_float_type_node; break;
19842
                      case DImode: eltype = neon_intDI_type_node; break;
19843
                      case TImode: eltype = intTI_type_node; break;
19844
                      case EImode: eltype = intEI_type_node; break;
19845
                      case OImode: eltype = intOI_type_node; break;
19846
                      case CImode: eltype = intCI_type_node; break;
19847
                      case XImode: eltype = intXI_type_node; break;
19848
                        /* 64-bit vectors.  */
19849
                      case V8QImode: eltype = V8QI_type_node; break;
19850
                      case V4HImode: eltype = V4HI_type_node; break;
19851
                      case V2SImode: eltype = V2SI_type_node; break;
19852
                      case V2SFmode: eltype = V2SF_type_node; break;
19853
                        /* 128-bit vectors.  */
19854
                      case V16QImode: eltype = V16QI_type_node; break;
19855
                      case V8HImode: eltype = V8HI_type_node; break;
19856
                      case V4SImode: eltype = V4SI_type_node; break;
19857
                      case V4SFmode: eltype = V4SF_type_node; break;
19858
                      case V2DImode: eltype = V2DI_type_node; break;
19859
                      default: gcc_unreachable ();
19860
                      }
19861
                  }
19862
 
19863
                if (k == 0 && !is_store)
19864
                  return_type = eltype;
19865
                else
19866
                  args = tree_cons (NULL_TREE, eltype, args);
19867
              }
19868
 
19869
            ftype = build_function_type (return_type, args);
19870
          }
19871
          break;
19872
 
19873
        case NEON_RESULTPAIR:
19874
          {
19875
            switch (insn_data[d->code].operand[1].mode)
19876
              {
19877
              case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19878
              case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19879
              case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19880
              case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19881
              case DImode: ftype = void_ftype_pdi_di_di; break;
19882
              case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19883
              case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19884
              case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19885
              case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19886
              case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19887
              default: gcc_unreachable ();
19888
              }
19889
          }
19890
          break;
19891
 
19892
        case NEON_REINTERP:
19893
          {
19894
            /* We iterate over 5 doubleword types, then 5 quadword
19895
               types.  */
19896
            int rhs = d->mode % 5;
19897
            switch (insn_data[d->code].operand[0].mode)
19898
              {
19899
              case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19900
              case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19901
              case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19902
              case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19903
              case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19904
              case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19905
              case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19906
              case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19907
              case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19908
              case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19909
              default: gcc_unreachable ();
19910
              }
19911
          }
19912
          break;
19913
 
19914
        default:
19915
          gcc_unreachable ();
19916
        }
19917
 
19918
      gcc_assert (ftype != NULL);
19919
 
19920
      sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19921
 
19922
      decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19923
                                   NULL_TREE);
19924
      arm_builtin_decls[fcode] = decl;
19925
    }
19926
}
19927
 
19928
#define def_mbuiltin(MASK, NAME, TYPE, CODE)                            \
19929
  do                                                                    \
19930
    {                                                                   \
19931
      if ((MASK) & insn_flags)                                          \
19932
        {                                                               \
19933
          tree bdecl;                                                   \
19934
          bdecl = add_builtin_function ((NAME), (TYPE), (CODE),         \
19935
                                        BUILT_IN_MD, NULL, NULL_TREE);  \
19936
          arm_builtin_decls[CODE] = bdecl;                              \
19937
        }                                                               \
19938
    }                                                                   \
19939
  while (0)
19940
 
19941
struct builtin_description
19942
{
19943
  const unsigned int       mask;
19944
  const enum insn_code     icode;
19945
  const char * const       name;
19946
  const enum arm_builtins  code;
19947
  const enum rtx_code      comparison;
19948
  const unsigned int       flag;
19949
};
19950
 
19951
static const struct builtin_description bdesc_2arg[] =
19952
{
19953
#define IWMMXT_BUILTIN(code, string, builtin) \
19954
  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19955
    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19956
 
19957
  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19958
  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19959
  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19960
  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19961
  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19962
  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19963
  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19964
  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19965
  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19966
  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19967
  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19968
  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19969
  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19970
  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19971
  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19972
  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19973
  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19974
  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19975
  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19976
  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19977
  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19978
  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19979
  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19980
  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19981
  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19982
  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19983
  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19984
  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19985
  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19986
  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19987
  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19988
  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19989
  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19990
  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19991
  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19992
  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19993
  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19994
  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19995
  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19996
  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19997
  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19998
  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19999
  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20000
  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20001
  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20002
  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20003
  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20004
  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20005
  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20006
  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20007
  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20008
  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20009
  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20010
  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20011
  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20012
  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20013
  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
20014
  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
20015
 
20016
#define IWMMXT_BUILTIN2(code, builtin) \
20017
  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20018
 
20019
  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20020
  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20021
  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20022
  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20023
  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20024
  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20025
  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
20026
  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20027
  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
20028
  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20029
  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
20030
  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
20031
  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
20032
  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20033
  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
20034
  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20035
  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
20036
  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
20037
  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
20038
  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20039
  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
20040
  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20041
  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
20042
  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
20043
  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
20044
  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
20045
  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
20046
  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
20047
  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
20048
  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
20049
  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
20050
  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
20051
};
20052
 
20053
static const struct builtin_description bdesc_1arg[] =
20054
{
20055
  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20056
  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20057
  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20058
  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20059
  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20060
  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20061
  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20062
  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20063
  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20064
  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20065
  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20066
  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20067
  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20068
  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20069
  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20070
  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20071
  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20072
  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20073
};
20074
 
20075
/* Set up all the iWMMXt builtins.  This is not called if
20076
   TARGET_IWMMXT is zero.  */
20077
 
20078
static void
20079
arm_init_iwmmxt_builtins (void)
20080
{
20081
  const struct builtin_description * d;
20082
  size_t i;
20083
 
20084
  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20085
  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20086
  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20087
 
20088
  tree int_ftype_int
20089
    = build_function_type_list (integer_type_node,
20090
                                integer_type_node, NULL_TREE);
20091
  tree v8qi_ftype_v8qi_v8qi_int
20092
    = build_function_type_list (V8QI_type_node,
20093
                                V8QI_type_node, V8QI_type_node,
20094
                                integer_type_node, NULL_TREE);
20095
  tree v4hi_ftype_v4hi_int
20096
    = build_function_type_list (V4HI_type_node,
20097
                                V4HI_type_node, integer_type_node, NULL_TREE);
20098
  tree v2si_ftype_v2si_int
20099
    = build_function_type_list (V2SI_type_node,
20100
                                V2SI_type_node, integer_type_node, NULL_TREE);
20101
  tree v2si_ftype_di_di
20102
    = build_function_type_list (V2SI_type_node,
20103
                                long_long_integer_type_node,
20104
                                long_long_integer_type_node,
20105
                                NULL_TREE);
20106
  tree di_ftype_di_int
20107
    = build_function_type_list (long_long_integer_type_node,
20108
                                long_long_integer_type_node,
20109
                                integer_type_node, NULL_TREE);
20110
  tree di_ftype_di_int_int
20111
    = build_function_type_list (long_long_integer_type_node,
20112
                                long_long_integer_type_node,
20113
                                integer_type_node,
20114
                                integer_type_node, NULL_TREE);
20115
  tree int_ftype_v8qi
20116
    = build_function_type_list (integer_type_node,
20117
                                V8QI_type_node, NULL_TREE);
20118
  tree int_ftype_v4hi
20119
    = build_function_type_list (integer_type_node,
20120
                                V4HI_type_node, NULL_TREE);
20121
  tree int_ftype_v2si
20122
    = build_function_type_list (integer_type_node,
20123
                                V2SI_type_node, NULL_TREE);
20124
  tree int_ftype_v8qi_int
20125
    = build_function_type_list (integer_type_node,
20126
                                V8QI_type_node, integer_type_node, NULL_TREE);
20127
  tree int_ftype_v4hi_int
20128
    = build_function_type_list (integer_type_node,
20129
                                V4HI_type_node, integer_type_node, NULL_TREE);
20130
  tree int_ftype_v2si_int
20131
    = build_function_type_list (integer_type_node,
20132
                                V2SI_type_node, integer_type_node, NULL_TREE);
20133
  tree v8qi_ftype_v8qi_int_int
20134
    = build_function_type_list (V8QI_type_node,
20135
                                V8QI_type_node, integer_type_node,
20136
                                integer_type_node, NULL_TREE);
20137
  tree v4hi_ftype_v4hi_int_int
20138
    = build_function_type_list (V4HI_type_node,
20139
                                V4HI_type_node, integer_type_node,
20140
                                integer_type_node, NULL_TREE);
20141
  tree v2si_ftype_v2si_int_int
20142
    = build_function_type_list (V2SI_type_node,
20143
                                V2SI_type_node, integer_type_node,
20144
                                integer_type_node, NULL_TREE);
20145
  /* Miscellaneous.  */
20146
  tree v8qi_ftype_v4hi_v4hi
20147
    = build_function_type_list (V8QI_type_node,
20148
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
20149
  tree v4hi_ftype_v2si_v2si
20150
    = build_function_type_list (V4HI_type_node,
20151
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
20152
  tree v2si_ftype_v4hi_v4hi
20153
    = build_function_type_list (V2SI_type_node,
20154
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
20155
  tree v2si_ftype_v8qi_v8qi
20156
    = build_function_type_list (V2SI_type_node,
20157
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
20158
  tree v4hi_ftype_v4hi_di
20159
    = build_function_type_list (V4HI_type_node,
20160
                                V4HI_type_node, long_long_integer_type_node,
20161
                                NULL_TREE);
20162
  tree v2si_ftype_v2si_di
20163
    = build_function_type_list (V2SI_type_node,
20164
                                V2SI_type_node, long_long_integer_type_node,
20165
                                NULL_TREE);
20166
  tree void_ftype_int_int
20167
    = build_function_type_list (void_type_node,
20168
                                integer_type_node, integer_type_node,
20169
                                NULL_TREE);
20170
  tree di_ftype_void
20171
    = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20172
  tree di_ftype_v8qi
20173
    = build_function_type_list (long_long_integer_type_node,
20174
                                V8QI_type_node, NULL_TREE);
20175
  tree di_ftype_v4hi
20176
    = build_function_type_list (long_long_integer_type_node,
20177
                                V4HI_type_node, NULL_TREE);
20178
  tree di_ftype_v2si
20179
    = build_function_type_list (long_long_integer_type_node,
20180
                                V2SI_type_node, NULL_TREE);
20181
  tree v2si_ftype_v4hi
20182
    = build_function_type_list (V2SI_type_node,
20183
                                V4HI_type_node, NULL_TREE);
20184
  tree v4hi_ftype_v8qi
20185
    = build_function_type_list (V4HI_type_node,
20186
                                V8QI_type_node, NULL_TREE);
20187
 
20188
  tree di_ftype_di_v4hi_v4hi
20189
    = build_function_type_list (long_long_unsigned_type_node,
20190
                                long_long_unsigned_type_node,
20191
                                V4HI_type_node, V4HI_type_node,
20192
                                NULL_TREE);
20193
 
20194
  tree di_ftype_v4hi_v4hi
20195
    = build_function_type_list (long_long_unsigned_type_node,
20196
                                V4HI_type_node,V4HI_type_node,
20197
                                NULL_TREE);
20198
 
20199
  /* Normal vector binops.  */
20200
  tree v8qi_ftype_v8qi_v8qi
20201
    = build_function_type_list (V8QI_type_node,
20202
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
20203
  tree v4hi_ftype_v4hi_v4hi
20204
    = build_function_type_list (V4HI_type_node,
20205
                                V4HI_type_node,V4HI_type_node, NULL_TREE);
20206
  tree v2si_ftype_v2si_v2si
20207
    = build_function_type_list (V2SI_type_node,
20208
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
20209
  tree di_ftype_di_di
20210
    = build_function_type_list (long_long_unsigned_type_node,
20211
                                long_long_unsigned_type_node,
20212
                                long_long_unsigned_type_node,
20213
                                NULL_TREE);
20214
 
20215
  /* Add all builtins that are more or less simple operations on two
20216
     operands.  */
20217
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20218
    {
20219
      /* Use one of the operands; the target can have a different mode for
20220
         mask-generating compares.  */
20221
      enum machine_mode mode;
20222
      tree type;
20223
 
20224
      if (d->name == 0)
20225
        continue;
20226
 
20227
      mode = insn_data[d->icode].operand[1].mode;
20228
 
20229
      switch (mode)
20230
        {
20231
        case V8QImode:
20232
          type = v8qi_ftype_v8qi_v8qi;
20233
          break;
20234
        case V4HImode:
20235
          type = v4hi_ftype_v4hi_v4hi;
20236
          break;
20237
        case V2SImode:
20238
          type = v2si_ftype_v2si_v2si;
20239
          break;
20240
        case DImode:
20241
          type = di_ftype_di_di;
20242
          break;
20243
 
20244
        default:
20245
          gcc_unreachable ();
20246
        }
20247
 
20248
      def_mbuiltin (d->mask, d->name, type, d->code);
20249
    }
20250
 
20251
  /* Add the remaining MMX insns with somewhat more complicated types.  */
20252
#define iwmmx_mbuiltin(NAME, TYPE, CODE)                        \
20253
  def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),       \
20254
                ARM_BUILTIN_ ## CODE)
20255
 
20256
  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20257
  iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20258
  iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20259
 
20260
  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20261
  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20262
  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20263
  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20264
  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20265
  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20266
 
20267
  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20268
  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20269
  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20270
  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20271
  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20272
  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20273
 
20274
  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20275
  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20276
  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20277
  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20278
  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20279
  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20280
 
20281
  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20282
  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20283
  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20284
  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20285
  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20286
  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20287
 
20288
  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20289
 
20290
  iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20291
  iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20292
  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20293
  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20294
 
20295
  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20296
  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20297
  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20298
  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20299
  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20300
  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20301
  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20302
  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20303
  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20304
 
20305
  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20306
  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20307
  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20308
 
20309
  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20310
  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20311
  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20312
 
20313
  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20314
  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20315
  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20316
  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20317
  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20318
  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20319
 
20320
  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20321
  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20322
  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20323
  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20324
  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20325
  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20326
  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20327
  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20328
  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20329
  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20330
  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20331
  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20332
 
20333
  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20334
  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20335
  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20336
  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20337
 
20338
  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20339
  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20340
  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20341
  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20342
  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20343
  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20344
  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20345
 
20346
#undef iwmmx_mbuiltin
20347
}
20348
 
20349
static void
20350
arm_init_tls_builtins (void)
20351
{
20352
  tree ftype, decl;
20353
 
20354
  ftype = build_function_type (ptr_type_node, void_list_node);
20355
  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20356
                               ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20357
                               NULL, NULL_TREE);
20358
  TREE_NOTHROW (decl) = 1;
20359
  TREE_READONLY (decl) = 1;
20360
  arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20361
}
20362
 
20363
static void
20364
arm_init_fp16_builtins (void)
20365
{
20366
  tree fp16_type = make_node (REAL_TYPE);
20367
  TYPE_PRECISION (fp16_type) = 16;
20368
  layout_type (fp16_type);
20369
  (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20370
}
20371
 
20372
static void
20373
arm_init_builtins (void)
20374
{
20375
  arm_init_tls_builtins ();
20376
 
20377
  if (TARGET_REALLY_IWMMXT)
20378
    arm_init_iwmmxt_builtins ();
20379
 
20380
  if (TARGET_NEON)
20381
    arm_init_neon_builtins ();
20382
 
20383
  if (arm_fp16_format)
20384
    arm_init_fp16_builtins ();
20385
}
20386
 
20387
/* Return the ARM builtin for CODE.  */
20388
 
20389
static tree
20390
arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20391
{
20392
  if (code >= ARM_BUILTIN_MAX)
20393
    return error_mark_node;
20394
 
20395
  return arm_builtin_decls[code];
20396
}
20397
 
20398
/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
20399
 
20400
static const char *
20401
arm_invalid_parameter_type (const_tree t)
20402
{
20403
  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20404
    return N_("function parameters cannot have __fp16 type");
20405
  return NULL;
20406
}
20407
 
20408
/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
20409
 
20410
static const char *
20411
arm_invalid_return_type (const_tree t)
20412
{
20413
  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20414
    return N_("functions cannot return __fp16 type");
20415
  return NULL;
20416
}
20417
 
20418
/* Implement TARGET_PROMOTED_TYPE.  */
20419
 
20420
static tree
20421
arm_promoted_type (const_tree t)
20422
{
20423
  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20424
    return float_type_node;
20425
  return NULL_TREE;
20426
}
20427
 
20428
/* Implement TARGET_CONVERT_TO_TYPE.
20429
   Specifically, this hook implements the peculiarity of the ARM
20430
   half-precision floating-point C semantics that requires conversions between
20431
   __fp16 to or from double to do an intermediate conversion to float.  */
20432
 
20433
static tree
20434
arm_convert_to_type (tree type, tree expr)
20435
{
20436
  tree fromtype = TREE_TYPE (expr);
20437
  if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20438
    return NULL_TREE;
20439
  if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20440
      || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20441
    return convert (type, convert (float_type_node, expr));
20442
  return NULL_TREE;
20443
}
20444
 
20445
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20446
   This simply adds HFmode as a supported mode; even though we don't
20447
   implement arithmetic on this type directly, it's supported by
20448
   optabs conversions, much the way the double-word arithmetic is
20449
   special-cased in the default hook.  */
20450
 
20451
static bool
20452
arm_scalar_mode_supported_p (enum machine_mode mode)
20453
{
20454
  if (mode == HFmode)
20455
    return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20456
  else if (ALL_FIXED_POINT_MODE_P (mode))
20457
    return true;
20458
  else
20459
    return default_scalar_mode_supported_p (mode);
20460
}
20461
 
20462
/* Errors in the source file can cause expand_expr to return const0_rtx
20463
   where we expect a vector.  To avoid crashing, use one of the vector
20464
   clear instructions.  */
20465
 
20466
static rtx
20467
safe_vector_operand (rtx x, enum machine_mode mode)
20468
{
20469
  if (x != const0_rtx)
20470
    return x;
20471
  x = gen_reg_rtx (mode);
20472
 
20473
  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20474
                               : gen_rtx_SUBREG (DImode, x, 0)));
20475
  return x;
20476
}
20477
 
20478
/* Subroutine of arm_expand_builtin to take care of binop insns.  */
20479
 
20480
static rtx
20481
arm_expand_binop_builtin (enum insn_code icode,
20482
                          tree exp, rtx target)
20483
{
20484
  rtx pat;
20485
  tree arg0 = CALL_EXPR_ARG (exp, 0);
20486
  tree arg1 = CALL_EXPR_ARG (exp, 1);
20487
  rtx op0 = expand_normal (arg0);
20488
  rtx op1 = expand_normal (arg1);
20489
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
20490
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20491
  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20492
 
20493
  if (VECTOR_MODE_P (mode0))
20494
    op0 = safe_vector_operand (op0, mode0);
20495
  if (VECTOR_MODE_P (mode1))
20496
    op1 = safe_vector_operand (op1, mode1);
20497
 
20498
  if (! target
20499
      || GET_MODE (target) != tmode
20500
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20501
    target = gen_reg_rtx (tmode);
20502
 
20503
  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20504
 
20505
  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20506
    op0 = copy_to_mode_reg (mode0, op0);
20507
  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20508
    op1 = copy_to_mode_reg (mode1, op1);
20509
 
20510
  pat = GEN_FCN (icode) (target, op0, op1);
20511
  if (! pat)
20512
    return 0;
20513
  emit_insn (pat);
20514
  return target;
20515
}
20516
 
20517
/* Subroutine of arm_expand_builtin to take care of unop insns.  */
20518
 
20519
static rtx
20520
arm_expand_unop_builtin (enum insn_code icode,
20521
                         tree exp, rtx target, int do_load)
20522
{
20523
  rtx pat;
20524
  tree arg0 = CALL_EXPR_ARG (exp, 0);
20525
  rtx op0 = expand_normal (arg0);
20526
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
20527
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20528
 
20529
  if (! target
20530
      || GET_MODE (target) != tmode
20531
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20532
    target = gen_reg_rtx (tmode);
20533
  if (do_load)
20534
    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20535
  else
20536
    {
20537
      if (VECTOR_MODE_P (mode0))
20538
        op0 = safe_vector_operand (op0, mode0);
20539
 
20540
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20541
        op0 = copy_to_mode_reg (mode0, op0);
20542
    }
20543
 
20544
  pat = GEN_FCN (icode) (target, op0);
20545
  if (! pat)
20546
    return 0;
20547
  emit_insn (pat);
20548
  return target;
20549
}
20550
 
20551
typedef enum {
20552
  NEON_ARG_COPY_TO_REG,
20553
  NEON_ARG_CONSTANT,
20554
  NEON_ARG_MEMORY,
20555
  NEON_ARG_STOP
20556
} builtin_arg;
20557
 
20558
#define NEON_MAX_BUILTIN_ARGS 5
20559
 
20560
/* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
20561
   and return an expression for the accessed memory.
20562
 
20563
   The intrinsic function operates on a block of registers that has
20564
   mode REG_MODE.  This block contains vectors of type TYPE_MODE.
20565
   The function references the memory at EXP in mode MEM_MODE;
20566
   this mode may be BLKmode if no more suitable mode is available.  */
20567
 
20568
static tree
20569
neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20570
                          enum machine_mode reg_mode,
20571
                          neon_builtin_type_mode type_mode)
20572
{
20573
  HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20574
  tree elem_type, upper_bound, array_type;
20575
 
20576
  /* Work out the size of the register block in bytes.  */
20577
  reg_size = GET_MODE_SIZE (reg_mode);
20578
 
20579
  /* Work out the size of each vector in bytes.  */
20580
  gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20581
  vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20582
 
20583
  /* Work out how many vectors there are.  */
20584
  gcc_assert (reg_size % vector_size == 0);
20585
  nvectors = reg_size / vector_size;
20586
 
20587
  /* Work out how many elements are being loaded or stored.
20588
     MEM_MODE == REG_MODE implies a one-to-one mapping between register
20589
     and memory elements; anything else implies a lane load or store.  */
20590
  if (mem_mode == reg_mode)
20591
    nelems = vector_size * nvectors;
20592
  else
20593
    nelems = nvectors;
20594
 
20595
  /* Work out the type of each element.  */
20596
  gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20597
  elem_type = TREE_TYPE (TREE_TYPE (exp));
20598
 
20599
  /* Create a type that describes the full access.  */
20600
  upper_bound = build_int_cst (size_type_node, nelems - 1);
20601
  array_type = build_array_type (elem_type, build_index_type (upper_bound));
20602
 
20603
  /* Dereference EXP using that type.  */
20604
  exp = convert (build_pointer_type (array_type), exp);
20605
  return fold_build2 (MEM_REF, array_type, exp,
20606
                      build_int_cst (TREE_TYPE (exp), 0));
20607
}
20608
 
20609
/* Expand a Neon builtin.  */
20610
static rtx
20611
arm_expand_neon_args (rtx target, int icode, int have_retval,
20612
                      neon_builtin_type_mode type_mode,
20613
                      tree exp, ...)
20614
{
20615
  va_list ap;
20616
  rtx pat;
20617
  tree arg[NEON_MAX_BUILTIN_ARGS];
20618
  rtx op[NEON_MAX_BUILTIN_ARGS];
20619
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
20620
  enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20621
  enum machine_mode other_mode;
20622
  int argc = 0;
20623
  int opno;
20624
 
20625
  if (have_retval
20626
      && (!target
20627
          || GET_MODE (target) != tmode
20628
          || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20629
    target = gen_reg_rtx (tmode);
20630
 
20631
  va_start (ap, exp);
20632
 
20633
  for (;;)
20634
    {
20635
      builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20636
 
20637
      if (thisarg == NEON_ARG_STOP)
20638
        break;
20639
      else
20640
        {
20641
          opno = argc + have_retval;
20642
          mode[argc] = insn_data[icode].operand[opno].mode;
20643
          arg[argc] = CALL_EXPR_ARG (exp, argc);
20644
          if (thisarg == NEON_ARG_MEMORY)
20645
            {
20646
              other_mode = insn_data[icode].operand[1 - opno].mode;
20647
              arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20648
                                                    other_mode, type_mode);
20649
            }
20650
          op[argc] = expand_normal (arg[argc]);
20651
 
20652
          switch (thisarg)
20653
            {
20654
            case NEON_ARG_COPY_TO_REG:
20655
              /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20656
              if (!(*insn_data[icode].operand[opno].predicate)
20657
                     (op[argc], mode[argc]))
20658
                op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20659
              break;
20660
 
20661
            case NEON_ARG_CONSTANT:
20662
              /* FIXME: This error message is somewhat unhelpful.  */
20663
              if (!(*insn_data[icode].operand[opno].predicate)
20664
                    (op[argc], mode[argc]))
20665
                error ("argument must be a constant");
20666
              break;
20667
 
20668
            case NEON_ARG_MEMORY:
20669
              gcc_assert (MEM_P (op[argc]));
20670
              PUT_MODE (op[argc], mode[argc]);
20671
              /* ??? arm_neon.h uses the same built-in functions for signed
20672
                 and unsigned accesses, casting where necessary.  This isn't
20673
                 alias safe.  */
20674
              set_mem_alias_set (op[argc], 0);
20675
              if (!(*insn_data[icode].operand[opno].predicate)
20676
                    (op[argc], mode[argc]))
20677
                op[argc] = (replace_equiv_address
20678
                            (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20679
              break;
20680
 
20681
            case NEON_ARG_STOP:
20682
              gcc_unreachable ();
20683
            }
20684
 
20685
          argc++;
20686
        }
20687
    }
20688
 
20689
  va_end (ap);
20690
 
20691
  if (have_retval)
20692
    switch (argc)
20693
      {
20694
      case 1:
20695
        pat = GEN_FCN (icode) (target, op[0]);
20696
        break;
20697
 
20698
      case 2:
20699
        pat = GEN_FCN (icode) (target, op[0], op[1]);
20700
        break;
20701
 
20702
      case 3:
20703
        pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20704
        break;
20705
 
20706
      case 4:
20707
        pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20708
        break;
20709
 
20710
      case 5:
20711
        pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20712
        break;
20713
 
20714
      default:
20715
        gcc_unreachable ();
20716
      }
20717
  else
20718
    switch (argc)
20719
      {
20720
      case 1:
20721
        pat = GEN_FCN (icode) (op[0]);
20722
        break;
20723
 
20724
      case 2:
20725
        pat = GEN_FCN (icode) (op[0], op[1]);
20726
        break;
20727
 
20728
      case 3:
20729
        pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20730
        break;
20731
 
20732
      case 4:
20733
        pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20734
        break;
20735
 
20736
      case 5:
20737
        pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20738
        break;
20739
 
20740
      default:
20741
        gcc_unreachable ();
20742
      }
20743
 
20744
  if (!pat)
20745
    return 0;
20746
 
20747
  emit_insn (pat);
20748
 
20749
  return target;
20750
}
20751
 
20752
/* Expand a Neon builtin. These are "special" because they don't have symbolic
20753
   constants defined per-instruction or per instruction-variant. Instead, the
20754
   required info is looked up in the table neon_builtin_data.  */
20755
static rtx
20756
arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20757
{
20758
  neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20759
  neon_itype itype = d->itype;
20760
  enum insn_code icode = d->code;
20761
  neon_builtin_type_mode type_mode = d->mode;
20762
 
20763
  switch (itype)
20764
    {
20765
    case NEON_UNOP:
20766
    case NEON_CONVERT:
20767
    case NEON_DUPLANE:
20768
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20769
        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20770
 
20771
    case NEON_BINOP:
20772
    case NEON_SETLANE:
20773
    case NEON_SCALARMUL:
20774
    case NEON_SCALARMULL:
20775
    case NEON_SCALARMULH:
20776
    case NEON_SHIFTINSERT:
20777
    case NEON_LOGICBINOP:
20778
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20779
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20780
        NEON_ARG_STOP);
20781
 
20782
    case NEON_TERNOP:
20783
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20784
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20785
        NEON_ARG_CONSTANT, NEON_ARG_STOP);
20786
 
20787
    case NEON_GETLANE:
20788
    case NEON_FIXCONV:
20789
    case NEON_SHIFTIMM:
20790
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20791
        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20792
        NEON_ARG_STOP);
20793
 
20794
    case NEON_CREATE:
20795
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20796
        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20797
 
20798
    case NEON_DUP:
20799
    case NEON_SPLIT:
20800
    case NEON_REINTERP:
20801
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20802
        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20803
 
20804
    case NEON_COMBINE:
20805
    case NEON_VTBL:
20806
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20807
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20808
 
20809
    case NEON_RESULTPAIR:
20810
      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20811
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20812
        NEON_ARG_STOP);
20813
 
20814
    case NEON_LANEMUL:
20815
    case NEON_LANEMULL:
20816
    case NEON_LANEMULH:
20817
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20818
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20819
        NEON_ARG_CONSTANT, NEON_ARG_STOP);
20820
 
20821
    case NEON_LANEMAC:
20822
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20823
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20824
        NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20825
 
20826
    case NEON_SHIFTACC:
20827
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20828
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20829
        NEON_ARG_CONSTANT, NEON_ARG_STOP);
20830
 
20831
    case NEON_SCALARMAC:
20832
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20833
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20834
        NEON_ARG_CONSTANT, NEON_ARG_STOP);
20835
 
20836
    case NEON_SELECT:
20837
    case NEON_VTBX:
20838
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20839
        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20840
        NEON_ARG_STOP);
20841
 
20842
    case NEON_LOAD1:
20843
    case NEON_LOADSTRUCT:
20844
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20845
        NEON_ARG_MEMORY, NEON_ARG_STOP);
20846
 
20847
    case NEON_LOAD1LANE:
20848
    case NEON_LOADSTRUCTLANE:
20849
      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20850
        NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20851
        NEON_ARG_STOP);
20852
 
20853
    case NEON_STORE1:
20854
    case NEON_STORESTRUCT:
20855
      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20856
        NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20857
 
20858
    case NEON_STORE1LANE:
20859
    case NEON_STORESTRUCTLANE:
20860
      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20861
        NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20862
        NEON_ARG_STOP);
20863
    }
20864
 
20865
  gcc_unreachable ();
20866
}
20867
 
20868
/* Emit code to reinterpret one Neon type as another, without altering bits.  */
20869
void
20870
neon_reinterpret (rtx dest, rtx src)
20871
{
20872
  emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20873
}
20874
 
20875
/* Emit code to place a Neon pair result in memory locations (with equal
20876
   registers).  */
20877
void
20878
neon_emit_pair_result_insn (enum machine_mode mode,
20879
                            rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20880
                            rtx op1, rtx op2)
20881
{
20882
  rtx mem = gen_rtx_MEM (mode, destaddr);
20883
  rtx tmp1 = gen_reg_rtx (mode);
20884
  rtx tmp2 = gen_reg_rtx (mode);
20885
 
20886
  emit_insn (intfn (tmp1, op1, op2, tmp2));
20887
 
20888
  emit_move_insn (mem, tmp1);
20889
  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20890
  emit_move_insn (mem, tmp2);
20891
}
20892
 
20893
/* Set up OPERANDS for a register copy from SRC to DEST, taking care
20894
   not to early-clobber SRC registers in the process.
20895
 
20896
   We assume that the operands described by SRC and DEST represent a
20897
   decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
20898
   number of components into which the copy has been decomposed.  */
20899
void
20900
neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20901
{
20902
  unsigned int i;
20903
 
20904
  if (!reg_overlap_mentioned_p (operands[0], operands[1])
20905
      || REGNO (operands[0]) < REGNO (operands[1]))
20906
    {
20907
      for (i = 0; i < count; i++)
20908
        {
20909
          operands[2 * i] = dest[i];
20910
          operands[2 * i + 1] = src[i];
20911
        }
20912
    }
20913
  else
20914
    {
20915
      for (i = 0; i < count; i++)
20916
        {
20917
          operands[2 * i] = dest[count - i - 1];
20918
          operands[2 * i + 1] = src[count - i - 1];
20919
        }
20920
    }
20921
}
20922
 
20923
/* Split operands into moves from op[1] + op[2] into op[0].  */
20924
 
20925
void
20926
neon_split_vcombine (rtx operands[3])
20927
{
20928
  unsigned int dest = REGNO (operands[0]);
20929
  unsigned int src1 = REGNO (operands[1]);
20930
  unsigned int src2 = REGNO (operands[2]);
20931
  enum machine_mode halfmode = GET_MODE (operands[1]);
20932
  unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20933
  rtx destlo, desthi;
20934
 
20935
  if (src1 == dest && src2 == dest + halfregs)
20936
    {
20937
      /* No-op move.  Can't split to nothing; emit something.  */
20938
      emit_note (NOTE_INSN_DELETED);
20939
      return;
20940
    }
20941
 
20942
  /* Preserve register attributes for variable tracking.  */
20943
  destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20944
  desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20945
                               GET_MODE_SIZE (halfmode));
20946
 
20947
  /* Special case of reversed high/low parts.  Use VSWP.  */
20948
  if (src2 == dest && src1 == dest + halfregs)
20949
    {
20950
      rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20951
      rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20952
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20953
      return;
20954
    }
20955
 
20956
  if (!reg_overlap_mentioned_p (operands[2], destlo))
20957
    {
20958
      /* Try to avoid unnecessary moves if part of the result
20959
         is in the right place already.  */
20960
      if (src1 != dest)
20961
        emit_move_insn (destlo, operands[1]);
20962
      if (src2 != dest + halfregs)
20963
        emit_move_insn (desthi, operands[2]);
20964
    }
20965
  else
20966
    {
20967
      if (src2 != dest + halfregs)
20968
        emit_move_insn (desthi, operands[2]);
20969
      if (src1 != dest)
20970
        emit_move_insn (destlo, operands[1]);
20971
    }
20972
}
20973
 
20974
/* Expand an expression EXP that calls a built-in function,
20975
   with result going to TARGET if that's convenient
20976
   (and in mode MODE if that's convenient).
20977
   SUBTARGET may be used as the target for computing one of EXP's operands.
20978
   IGNORE is nonzero if the value is to be ignored.  */
20979
 
20980
static rtx
20981
arm_expand_builtin (tree exp,
20982
                    rtx target,
20983
                    rtx subtarget ATTRIBUTE_UNUSED,
20984
                    enum machine_mode mode ATTRIBUTE_UNUSED,
20985
                    int ignore ATTRIBUTE_UNUSED)
20986
{
20987
  const struct builtin_description * d;
20988
  enum insn_code    icode;
20989
  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20990
  tree              arg0;
20991
  tree              arg1;
20992
  tree              arg2;
20993
  rtx               op0;
20994
  rtx               op1;
20995
  rtx               op2;
20996
  rtx               pat;
20997
  int               fcode = DECL_FUNCTION_CODE (fndecl);
20998
  size_t            i;
20999
  enum machine_mode tmode;
21000
  enum machine_mode mode0;
21001
  enum machine_mode mode1;
21002
  enum machine_mode mode2;
21003
 
21004
  if (fcode >= ARM_BUILTIN_NEON_BASE)
21005
    return arm_expand_neon_builtin (fcode, exp, target);
21006
 
21007
  switch (fcode)
21008
    {
21009
    case ARM_BUILTIN_TEXTRMSB:
21010
    case ARM_BUILTIN_TEXTRMUB:
21011
    case ARM_BUILTIN_TEXTRMSH:
21012
    case ARM_BUILTIN_TEXTRMUH:
21013
    case ARM_BUILTIN_TEXTRMSW:
21014
    case ARM_BUILTIN_TEXTRMUW:
21015
      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21016
               : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21017
               : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21018
               : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21019
               : CODE_FOR_iwmmxt_textrmw);
21020
 
21021
      arg0 = CALL_EXPR_ARG (exp, 0);
21022
      arg1 = CALL_EXPR_ARG (exp, 1);
21023
      op0 = expand_normal (arg0);
21024
      op1 = expand_normal (arg1);
21025
      tmode = insn_data[icode].operand[0].mode;
21026
      mode0 = insn_data[icode].operand[1].mode;
21027
      mode1 = insn_data[icode].operand[2].mode;
21028
 
21029
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21030
        op0 = copy_to_mode_reg (mode0, op0);
21031
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21032
        {
21033
          /* @@@ better error message */
21034
          error ("selector must be an immediate");
21035
          return gen_reg_rtx (tmode);
21036
        }
21037
      if (target == 0
21038
          || GET_MODE (target) != tmode
21039
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21040
        target = gen_reg_rtx (tmode);
21041
      pat = GEN_FCN (icode) (target, op0, op1);
21042
      if (! pat)
21043
        return 0;
21044
      emit_insn (pat);
21045
      return target;
21046
 
21047
    case ARM_BUILTIN_TINSRB:
21048
    case ARM_BUILTIN_TINSRH:
21049
    case ARM_BUILTIN_TINSRW:
21050
      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21051
               : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21052
               : CODE_FOR_iwmmxt_tinsrw);
21053
      arg0 = CALL_EXPR_ARG (exp, 0);
21054
      arg1 = CALL_EXPR_ARG (exp, 1);
21055
      arg2 = CALL_EXPR_ARG (exp, 2);
21056
      op0 = expand_normal (arg0);
21057
      op1 = expand_normal (arg1);
21058
      op2 = expand_normal (arg2);
21059
      tmode = insn_data[icode].operand[0].mode;
21060
      mode0 = insn_data[icode].operand[1].mode;
21061
      mode1 = insn_data[icode].operand[2].mode;
21062
      mode2 = insn_data[icode].operand[3].mode;
21063
 
21064
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21065
        op0 = copy_to_mode_reg (mode0, op0);
21066
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21067
        op1 = copy_to_mode_reg (mode1, op1);
21068
      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21069
        {
21070
          /* @@@ better error message */
21071
          error ("selector must be an immediate");
21072
          return const0_rtx;
21073
        }
21074
      if (target == 0
21075
          || GET_MODE (target) != tmode
21076
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21077
        target = gen_reg_rtx (tmode);
21078
      pat = GEN_FCN (icode) (target, op0, op1, op2);
21079
      if (! pat)
21080
        return 0;
21081
      emit_insn (pat);
21082
      return target;
21083
 
21084
    case ARM_BUILTIN_SETWCX:
21085
      arg0 = CALL_EXPR_ARG (exp, 0);
21086
      arg1 = CALL_EXPR_ARG (exp, 1);
21087
      op0 = force_reg (SImode, expand_normal (arg0));
21088
      op1 = expand_normal (arg1);
21089
      emit_insn (gen_iwmmxt_tmcr (op1, op0));
21090
      return 0;
21091
 
21092
    case ARM_BUILTIN_GETWCX:
21093
      arg0 = CALL_EXPR_ARG (exp, 0);
21094
      op0 = expand_normal (arg0);
21095
      target = gen_reg_rtx (SImode);
21096
      emit_insn (gen_iwmmxt_tmrc (target, op0));
21097
      return target;
21098
 
21099
    case ARM_BUILTIN_WSHUFH:
21100
      icode = CODE_FOR_iwmmxt_wshufh;
21101
      arg0 = CALL_EXPR_ARG (exp, 0);
21102
      arg1 = CALL_EXPR_ARG (exp, 1);
21103
      op0 = expand_normal (arg0);
21104
      op1 = expand_normal (arg1);
21105
      tmode = insn_data[icode].operand[0].mode;
21106
      mode1 = insn_data[icode].operand[1].mode;
21107
      mode2 = insn_data[icode].operand[2].mode;
21108
 
21109
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21110
        op0 = copy_to_mode_reg (mode1, op0);
21111
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21112
        {
21113
          /* @@@ better error message */
21114
          error ("mask must be an immediate");
21115
          return const0_rtx;
21116
        }
21117
      if (target == 0
21118
          || GET_MODE (target) != tmode
21119
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21120
        target = gen_reg_rtx (tmode);
21121
      pat = GEN_FCN (icode) (target, op0, op1);
21122
      if (! pat)
21123
        return 0;
21124
      emit_insn (pat);
21125
      return target;
21126
 
21127
    case ARM_BUILTIN_WSADB:
21128
      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21129
    case ARM_BUILTIN_WSADH:
21130
      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21131
    case ARM_BUILTIN_WSADBZ:
21132
      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21133
    case ARM_BUILTIN_WSADHZ:
21134
      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21135
 
21136
      /* Several three-argument builtins.  */
21137
    case ARM_BUILTIN_WMACS:
21138
    case ARM_BUILTIN_WMACU:
21139
    case ARM_BUILTIN_WALIGN:
21140
    case ARM_BUILTIN_TMIA:
21141
    case ARM_BUILTIN_TMIAPH:
21142
    case ARM_BUILTIN_TMIATT:
21143
    case ARM_BUILTIN_TMIATB:
21144
    case ARM_BUILTIN_TMIABT:
21145
    case ARM_BUILTIN_TMIABB:
21146
      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21147
               : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21148
               : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21149
               : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21150
               : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21151
               : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21152
               : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21153
               : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21154
               : CODE_FOR_iwmmxt_walign);
21155
      arg0 = CALL_EXPR_ARG (exp, 0);
21156
      arg1 = CALL_EXPR_ARG (exp, 1);
21157
      arg2 = CALL_EXPR_ARG (exp, 2);
21158
      op0 = expand_normal (arg0);
21159
      op1 = expand_normal (arg1);
21160
      op2 = expand_normal (arg2);
21161
      tmode = insn_data[icode].operand[0].mode;
21162
      mode0 = insn_data[icode].operand[1].mode;
21163
      mode1 = insn_data[icode].operand[2].mode;
21164
      mode2 = insn_data[icode].operand[3].mode;
21165
 
21166
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21167
        op0 = copy_to_mode_reg (mode0, op0);
21168
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21169
        op1 = copy_to_mode_reg (mode1, op1);
21170
      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21171
        op2 = copy_to_mode_reg (mode2, op2);
21172
      if (target == 0
21173
          || GET_MODE (target) != tmode
21174
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21175
        target = gen_reg_rtx (tmode);
21176
      pat = GEN_FCN (icode) (target, op0, op1, op2);
21177
      if (! pat)
21178
        return 0;
21179
      emit_insn (pat);
21180
      return target;
21181
 
21182
    case ARM_BUILTIN_WZERO:
21183
      target = gen_reg_rtx (DImode);
21184
      emit_insn (gen_iwmmxt_clrdi (target));
21185
      return target;
21186
 
21187
    case ARM_BUILTIN_THREAD_POINTER:
21188
      return arm_load_tp (target);
21189
 
21190
    default:
21191
      break;
21192
    }
21193
 
21194
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21195
    if (d->code == (const enum arm_builtins) fcode)
21196
      return arm_expand_binop_builtin (d->icode, exp, target);
21197
 
21198
  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21199
    if (d->code == (const enum arm_builtins) fcode)
21200
      return arm_expand_unop_builtin (d->icode, exp, target, 0);
21201
 
21202
  /* @@@ Should really do something sensible here.  */
21203
  return NULL_RTX;
21204
}
21205
 
21206
/* Return the number (counting from 0) of
21207
   the least significant set bit in MASK.  */
21208
 
21209
inline static int
21210
number_of_first_bit_set (unsigned mask)
21211
{
21212
  return ctz_hwi (mask);
21213
}
21214
 
21215
/* Like emit_multi_reg_push, but allowing for a different set of
21216
   registers to be described as saved.  MASK is the set of registers
21217
   to be saved; REAL_REGS is the set of registers to be described as
21218
   saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
21219
 
21220
static rtx
21221
thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21222
{
21223
  unsigned long regno;
21224
  rtx par[10], tmp, reg, insn;
21225
  int i, j;
21226
 
21227
  /* Build the parallel of the registers actually being stored.  */
21228
  for (i = 0; mask; ++i, mask &= mask - 1)
21229
    {
21230
      regno = ctz_hwi (mask);
21231
      reg = gen_rtx_REG (SImode, regno);
21232
 
21233
      if (i == 0)
21234
        tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21235
      else
21236
        tmp = gen_rtx_USE (VOIDmode, reg);
21237
 
21238
      par[i] = tmp;
21239
    }
21240
 
21241
  tmp = plus_constant (stack_pointer_rtx, -4 * i);
21242
  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21243
  tmp = gen_frame_mem (BLKmode, tmp);
21244
  tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21245
  par[0] = tmp;
21246
 
21247
  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21248
  insn = emit_insn (tmp);
21249
 
21250
  /* Always build the stack adjustment note for unwind info.  */
21251
  tmp = plus_constant (stack_pointer_rtx, -4 * i);
21252
  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21253
  par[0] = tmp;
21254
 
21255
  /* Build the parallel of the registers recorded as saved for unwind.  */
21256
  for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21257
    {
21258
      regno = ctz_hwi (real_regs);
21259
      reg = gen_rtx_REG (SImode, regno);
21260
 
21261
      tmp = plus_constant (stack_pointer_rtx, j * 4);
21262
      tmp = gen_frame_mem (SImode, tmp);
21263
      tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21264
      RTX_FRAME_RELATED_P (tmp) = 1;
21265
      par[j + 1] = tmp;
21266
    }
21267
 
21268
  if (j == 0)
21269
    tmp = par[0];
21270
  else
21271
    {
21272
      RTX_FRAME_RELATED_P (par[0]) = 1;
21273
      tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21274
    }
21275
 
21276
  add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21277
 
21278
  return insn;
21279
}
21280
 
21281
/* Emit code to push or pop registers to or from the stack.  F is the
21282
   assembly file.  MASK is the registers to pop.  */
21283
static void
21284
thumb_pop (FILE *f, unsigned long mask)
21285
{
21286
  int regno;
21287
  int lo_mask = mask & 0xFF;
21288
  int pushed_words = 0;
21289
 
21290
  gcc_assert (mask);
21291
 
21292
  if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21293
    {
21294
      /* Special case.  Do not generate a POP PC statement here, do it in
21295
         thumb_exit() */
21296
      thumb_exit (f, -1);
21297
      return;
21298
    }
21299
 
21300
  fprintf (f, "\tpop\t{");
21301
 
21302
  /* Look at the low registers first.  */
21303
  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21304
    {
21305
      if (lo_mask & 1)
21306
        {
21307
          asm_fprintf (f, "%r", regno);
21308
 
21309
          if ((lo_mask & ~1) != 0)
21310
            fprintf (f, ", ");
21311
 
21312
          pushed_words++;
21313
        }
21314
    }
21315
 
21316
  if (mask & (1 << PC_REGNUM))
21317
    {
21318
      /* Catch popping the PC.  */
21319
      if (TARGET_INTERWORK || TARGET_BACKTRACE
21320
          || crtl->calls_eh_return)
21321
        {
21322
          /* The PC is never poped directly, instead
21323
             it is popped into r3 and then BX is used.  */
21324
          fprintf (f, "}\n");
21325
 
21326
          thumb_exit (f, -1);
21327
 
21328
          return;
21329
        }
21330
      else
21331
        {
21332
          if (mask & 0xFF)
21333
            fprintf (f, ", ");
21334
 
21335
          asm_fprintf (f, "%r", PC_REGNUM);
21336
        }
21337
    }
21338
 
21339
  fprintf (f, "}\n");
21340
}
21341
 
21342
/* Generate code to return from a thumb function.
21343
   If 'reg_containing_return_addr' is -1, then the return address is
21344
   actually on the stack, at the stack pointer.  */
21345
static void
21346
thumb_exit (FILE *f, int reg_containing_return_addr)
21347
{
21348
  unsigned regs_available_for_popping;
21349
  unsigned regs_to_pop;
21350
  int pops_needed;
21351
  unsigned available;
21352
  unsigned required;
21353
  int mode;
21354
  int size;
21355
  int restore_a4 = FALSE;
21356
 
21357
  /* Compute the registers we need to pop.  */
21358
  regs_to_pop = 0;
21359
  pops_needed = 0;
21360
 
21361
  if (reg_containing_return_addr == -1)
21362
    {
21363
      regs_to_pop |= 1 << LR_REGNUM;
21364
      ++pops_needed;
21365
    }
21366
 
21367
  if (TARGET_BACKTRACE)
21368
    {
21369
      /* Restore the (ARM) frame pointer and stack pointer.  */
21370
      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21371
      pops_needed += 2;
21372
    }
21373
 
21374
  /* If there is nothing to pop then just emit the BX instruction and
21375
     return.  */
21376
  if (pops_needed == 0)
21377
    {
21378
      if (crtl->calls_eh_return)
21379
        asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21380
 
21381
      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21382
      return;
21383
    }
21384
  /* Otherwise if we are not supporting interworking and we have not created
21385
     a backtrace structure and the function was not entered in ARM mode then
21386
     just pop the return address straight into the PC.  */
21387
  else if (!TARGET_INTERWORK
21388
           && !TARGET_BACKTRACE
21389
           && !is_called_in_ARM_mode (current_function_decl)
21390
           && !crtl->calls_eh_return)
21391
    {
21392
      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21393
      return;
21394
    }
21395
 
21396
  /* Find out how many of the (return) argument registers we can corrupt.  */
21397
  regs_available_for_popping = 0;
21398
 
21399
  /* If returning via __builtin_eh_return, the bottom three registers
21400
     all contain information needed for the return.  */
21401
  if (crtl->calls_eh_return)
21402
    size = 12;
21403
  else
21404
    {
21405
      /* If we can deduce the registers used from the function's
21406
         return value.  This is more reliable that examining
21407
         df_regs_ever_live_p () because that will be set if the register is
21408
         ever used in the function, not just if the register is used
21409
         to hold a return value.  */
21410
 
21411
      if (crtl->return_rtx != 0)
21412
        mode = GET_MODE (crtl->return_rtx);
21413
      else
21414
        mode = DECL_MODE (DECL_RESULT (current_function_decl));
21415
 
21416
      size = GET_MODE_SIZE (mode);
21417
 
21418
      if (size == 0)
21419
        {
21420
          /* In a void function we can use any argument register.
21421
             In a function that returns a structure on the stack
21422
             we can use the second and third argument registers.  */
21423
          if (mode == VOIDmode)
21424
            regs_available_for_popping =
21425
              (1 << ARG_REGISTER (1))
21426
              | (1 << ARG_REGISTER (2))
21427
              | (1 << ARG_REGISTER (3));
21428
          else
21429
            regs_available_for_popping =
21430
              (1 << ARG_REGISTER (2))
21431
              | (1 << ARG_REGISTER (3));
21432
        }
21433
      else if (size <= 4)
21434
        regs_available_for_popping =
21435
          (1 << ARG_REGISTER (2))
21436
          | (1 << ARG_REGISTER (3));
21437
      else if (size <= 8)
21438
        regs_available_for_popping =
21439
          (1 << ARG_REGISTER (3));
21440
    }
21441
 
21442
  /* Match registers to be popped with registers into which we pop them.  */
21443
  for (available = regs_available_for_popping,
21444
       required  = regs_to_pop;
21445
       required != 0 && available != 0;
21446
       available &= ~(available & - available),
21447
       required  &= ~(required  & - required))
21448
    -- pops_needed;
21449
 
21450
  /* If we have any popping registers left over, remove them.  */
21451
  if (available > 0)
21452
    regs_available_for_popping &= ~available;
21453
 
21454
  /* Otherwise if we need another popping register we can use
21455
     the fourth argument register.  */
21456
  else if (pops_needed)
21457
    {
21458
      /* If we have not found any free argument registers and
21459
         reg a4 contains the return address, we must move it.  */
21460
      if (regs_available_for_popping == 0
21461
          && reg_containing_return_addr == LAST_ARG_REGNUM)
21462
        {
21463
          asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21464
          reg_containing_return_addr = LR_REGNUM;
21465
        }
21466
      else if (size > 12)
21467
        {
21468
          /* Register a4 is being used to hold part of the return value,
21469
             but we have dire need of a free, low register.  */
21470
          restore_a4 = TRUE;
21471
 
21472
          asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21473
        }
21474
 
21475
      if (reg_containing_return_addr != LAST_ARG_REGNUM)
21476
        {
21477
          /* The fourth argument register is available.  */
21478
          regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21479
 
21480
          --pops_needed;
21481
        }
21482
    }
21483
 
21484
  /* Pop as many registers as we can.  */
21485
  thumb_pop (f, regs_available_for_popping);
21486
 
21487
  /* Process the registers we popped.  */
21488
  if (reg_containing_return_addr == -1)
21489
    {
21490
      /* The return address was popped into the lowest numbered register.  */
21491
      regs_to_pop &= ~(1 << LR_REGNUM);
21492
 
21493
      reg_containing_return_addr =
21494
        number_of_first_bit_set (regs_available_for_popping);
21495
 
21496
      /* Remove this register for the mask of available registers, so that
21497
         the return address will not be corrupted by further pops.  */
21498
      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21499
    }
21500
 
21501
  /* If we popped other registers then handle them here.  */
21502
  if (regs_available_for_popping)
21503
    {
21504
      int frame_pointer;
21505
 
21506
      /* Work out which register currently contains the frame pointer.  */
21507
      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21508
 
21509
      /* Move it into the correct place.  */
21510
      asm_fprintf (f, "\tmov\t%r, %r\n",
21511
                   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21512
 
21513
      /* (Temporarily) remove it from the mask of popped registers.  */
21514
      regs_available_for_popping &= ~(1 << frame_pointer);
21515
      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21516
 
21517
      if (regs_available_for_popping)
21518
        {
21519
          int stack_pointer;
21520
 
21521
          /* We popped the stack pointer as well,
21522
             find the register that contains it.  */
21523
          stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21524
 
21525
          /* Move it into the stack register.  */
21526
          asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21527
 
21528
          /* At this point we have popped all necessary registers, so
21529
             do not worry about restoring regs_available_for_popping
21530
             to its correct value:
21531
 
21532
             assert (pops_needed == 0)
21533
             assert (regs_available_for_popping == (1 << frame_pointer))
21534
             assert (regs_to_pop == (1 << STACK_POINTER))  */
21535
        }
21536
      else
21537
        {
21538
          /* Since we have just move the popped value into the frame
21539
             pointer, the popping register is available for reuse, and
21540
             we know that we still have the stack pointer left to pop.  */
21541
          regs_available_for_popping |= (1 << frame_pointer);
21542
        }
21543
    }
21544
 
21545
  /* If we still have registers left on the stack, but we no longer have
21546
     any registers into which we can pop them, then we must move the return
21547
     address into the link register and make available the register that
21548
     contained it.  */
21549
  if (regs_available_for_popping == 0 && pops_needed > 0)
21550
    {
21551
      regs_available_for_popping |= 1 << reg_containing_return_addr;
21552
 
21553
      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21554
                   reg_containing_return_addr);
21555
 
21556
      reg_containing_return_addr = LR_REGNUM;
21557
    }
21558
 
21559
  /* If we have registers left on the stack then pop some more.
21560
     We know that at most we will want to pop FP and SP.  */
21561
  if (pops_needed > 0)
21562
    {
21563
      int  popped_into;
21564
      int  move_to;
21565
 
21566
      thumb_pop (f, regs_available_for_popping);
21567
 
21568
      /* We have popped either FP or SP.
21569
         Move whichever one it is into the correct register.  */
21570
      popped_into = number_of_first_bit_set (regs_available_for_popping);
21571
      move_to     = number_of_first_bit_set (regs_to_pop);
21572
 
21573
      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21574
 
21575
      regs_to_pop &= ~(1 << move_to);
21576
 
21577
      --pops_needed;
21578
    }
21579
 
21580
  /* If we still have not popped everything then we must have only
21581
     had one register available to us and we are now popping the SP.  */
21582
  if (pops_needed > 0)
21583
    {
21584
      int  popped_into;
21585
 
21586
      thumb_pop (f, regs_available_for_popping);
21587
 
21588
      popped_into = number_of_first_bit_set (regs_available_for_popping);
21589
 
21590
      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21591
      /*
21592
        assert (regs_to_pop == (1 << STACK_POINTER))
21593
        assert (pops_needed == 1)
21594
      */
21595
    }
21596
 
21597
  /* If necessary restore the a4 register.  */
21598
  if (restore_a4)
21599
    {
21600
      if (reg_containing_return_addr != LR_REGNUM)
21601
        {
21602
          asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21603
          reg_containing_return_addr = LR_REGNUM;
21604
        }
21605
 
21606
      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21607
    }
21608
 
21609
  if (crtl->calls_eh_return)
21610
    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21611
 
21612
  /* Return to caller.  */
21613
  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21614
}
21615
 
21616
/* Scan INSN just before assembler is output for it.
21617
   For Thumb-1, we track the status of the condition codes; this
21618
   information is used in the cbranchsi4_insn pattern.  */
21619
void
21620
thumb1_final_prescan_insn (rtx insn)
21621
{
21622
  if (flag_print_asm_name)
21623
    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21624
                 INSN_ADDRESSES (INSN_UID (insn)));
21625
  /* Don't overwrite the previous setter when we get to a cbranch.  */
21626
  if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21627
    {
21628
      enum attr_conds conds;
21629
 
21630
      if (cfun->machine->thumb1_cc_insn)
21631
        {
21632
          if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21633
              || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21634
            CC_STATUS_INIT;
21635
        }
21636
      conds = get_attr_conds (insn);
21637
      if (conds == CONDS_SET)
21638
        {
21639
          rtx set = single_set (insn);
21640
          cfun->machine->thumb1_cc_insn = insn;
21641
          cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21642
          cfun->machine->thumb1_cc_op1 = const0_rtx;
21643
          cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21644
          if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21645
            {
21646
              rtx src1 = XEXP (SET_SRC (set), 1);
21647
              if (src1 == const0_rtx)
21648
                cfun->machine->thumb1_cc_mode = CCmode;
21649
            }
21650
        }
21651
      else if (conds != CONDS_NOCOND)
21652
        cfun->machine->thumb1_cc_insn = NULL_RTX;
21653
    }
21654
}
21655
 
21656
int
21657
thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21658
{
21659
  unsigned HOST_WIDE_INT mask = 0xff;
21660
  int i;
21661
 
21662
  val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21663
  if (val == 0) /* XXX */
21664
    return 0;
21665
 
21666
  for (i = 0; i < 25; i++)
21667
    if ((val & (mask << i)) == val)
21668
      return 1;
21669
 
21670
  return 0;
21671
}
21672
 
21673
/* Returns nonzero if the current function contains,
21674
   or might contain a far jump.  */
21675
static int
21676
thumb_far_jump_used_p (void)
21677
{
21678
  rtx insn;
21679
 
21680
  /* This test is only important for leaf functions.  */
21681
  /* assert (!leaf_function_p ()); */
21682
 
21683
  /* If we have already decided that far jumps may be used,
21684
     do not bother checking again, and always return true even if
21685
     it turns out that they are not being used.  Once we have made
21686
     the decision that far jumps are present (and that hence the link
21687
     register will be pushed onto the stack) we cannot go back on it.  */
21688
  if (cfun->machine->far_jump_used)
21689
    return 1;
21690
 
21691
  /* If this function is not being called from the prologue/epilogue
21692
     generation code then it must be being called from the
21693
     INITIAL_ELIMINATION_OFFSET macro.  */
21694
  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21695
    {
21696
      /* In this case we know that we are being asked about the elimination
21697
         of the arg pointer register.  If that register is not being used,
21698
         then there are no arguments on the stack, and we do not have to
21699
         worry that a far jump might force the prologue to push the link
21700
         register, changing the stack offsets.  In this case we can just
21701
         return false, since the presence of far jumps in the function will
21702
         not affect stack offsets.
21703
 
21704
         If the arg pointer is live (or if it was live, but has now been
21705
         eliminated and so set to dead) then we do have to test to see if
21706
         the function might contain a far jump.  This test can lead to some
21707
         false negatives, since before reload is completed, then length of
21708
         branch instructions is not known, so gcc defaults to returning their
21709
         longest length, which in turn sets the far jump attribute to true.
21710
 
21711
         A false negative will not result in bad code being generated, but it
21712
         will result in a needless push and pop of the link register.  We
21713
         hope that this does not occur too often.
21714
 
21715
         If we need doubleword stack alignment this could affect the other
21716
         elimination offsets so we can't risk getting it wrong.  */
21717
      if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21718
        cfun->machine->arg_pointer_live = 1;
21719
      else if (!cfun->machine->arg_pointer_live)
21720
        return 0;
21721
    }
21722
 
21723
  /* Check to see if the function contains a branch
21724
     insn with the far jump attribute set.  */
21725
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21726
    {
21727
      if (GET_CODE (insn) == JUMP_INSN
21728
          /* Ignore tablejump patterns.  */
21729
          && GET_CODE (PATTERN (insn)) != ADDR_VEC
21730
          && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21731
          && get_attr_far_jump (insn) == FAR_JUMP_YES
21732
          )
21733
        {
21734
          /* Record the fact that we have decided that
21735
             the function does use far jumps.  */
21736
          cfun->machine->far_jump_used = 1;
21737
          return 1;
21738
        }
21739
    }
21740
 
21741
  return 0;
21742
}
21743
 
21744
/* Return nonzero if FUNC must be entered in ARM mode.  */
21745
int
21746
is_called_in_ARM_mode (tree func)
21747
{
21748
  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21749
 
21750
  /* Ignore the problem about functions whose address is taken.  */
21751
  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21752
    return TRUE;
21753
 
21754
#ifdef ARM_PE
21755
  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21756
#else
21757
  return FALSE;
21758
#endif
21759
}
21760
 
21761
/* Given the stack offsets and register mask in OFFSETS, decide how
21762
   many additional registers to push instead of subtracting a constant
21763
   from SP.  For epilogues the principle is the same except we use pop.
21764
   FOR_PROLOGUE indicates which we're generating.  */
21765
static int
21766
thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21767
{
21768
  HOST_WIDE_INT amount;
21769
  unsigned long live_regs_mask = offsets->saved_regs_mask;
21770
  /* Extract a mask of the ones we can give to the Thumb's push/pop
21771
     instruction.  */
21772
  unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21773
  /* Then count how many other high registers will need to be pushed.  */
21774
  unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21775
  int n_free, reg_base;
21776
 
21777
  if (!for_prologue && frame_pointer_needed)
21778
    amount = offsets->locals_base - offsets->saved_regs;
21779
  else
21780
    amount = offsets->outgoing_args - offsets->saved_regs;
21781
 
21782
  /* If the stack frame size is 512 exactly, we can save one load
21783
     instruction, which should make this a win even when optimizing
21784
     for speed.  */
21785
  if (!optimize_size && amount != 512)
21786
    return 0;
21787
 
21788
  /* Can't do this if there are high registers to push.  */
21789
  if (high_regs_pushed != 0)
21790
    return 0;
21791
 
21792
  /* Shouldn't do it in the prologue if no registers would normally
21793
     be pushed at all.  In the epilogue, also allow it if we'll have
21794
     a pop insn for the PC.  */
21795
  if  (l_mask == 0
21796
       && (for_prologue
21797
           || TARGET_BACKTRACE
21798
           || (live_regs_mask & 1 << LR_REGNUM) == 0
21799
           || TARGET_INTERWORK
21800
           || crtl->args.pretend_args_size != 0))
21801
    return 0;
21802
 
21803
  /* Don't do this if thumb_expand_prologue wants to emit instructions
21804
     between the push and the stack frame allocation.  */
21805
  if (for_prologue
21806
      && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21807
          || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21808
    return 0;
21809
 
21810
  reg_base = 0;
21811
  n_free = 0;
21812
  if (!for_prologue)
21813
    {
21814
      reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21815
      live_regs_mask >>= reg_base;
21816
    }
21817
 
21818
  while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21819
         && (for_prologue || call_used_regs[reg_base + n_free]))
21820
    {
21821
      live_regs_mask >>= 1;
21822
      n_free++;
21823
    }
21824
 
21825
  if (n_free == 0)
21826
    return 0;
21827
  gcc_assert (amount / 4 * 4 == amount);
21828
 
21829
  if (amount >= 512 && (amount - n_free * 4) < 512)
21830
    return (amount - 508) / 4;
21831
  if (amount <= n_free * 4)
21832
    return amount / 4;
21833
  return 0;
21834
}
21835
 
21836
/* The bits which aren't usefully expanded as rtl.  */
21837
const char *
21838
thumb_unexpanded_epilogue (void)
21839
{
21840
  arm_stack_offsets *offsets;
21841
  int regno;
21842
  unsigned long live_regs_mask = 0;
21843
  int high_regs_pushed = 0;
21844
  int extra_pop;
21845
  int had_to_push_lr;
21846
  int size;
21847
 
21848
  if (cfun->machine->return_used_this_function != 0)
21849
    return "";
21850
 
21851
  if (IS_NAKED (arm_current_func_type ()))
21852
    return "";
21853
 
21854
  offsets = arm_get_frame_offsets ();
21855
  live_regs_mask = offsets->saved_regs_mask;
21856
  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21857
 
21858
  /* If we can deduce the registers used from the function's return value.
21859
     This is more reliable that examining df_regs_ever_live_p () because that
21860
     will be set if the register is ever used in the function, not just if
21861
     the register is used to hold a return value.  */
21862
  size = arm_size_return_regs ();
21863
 
21864
  extra_pop = thumb1_extra_regs_pushed (offsets, false);
21865
  if (extra_pop > 0)
21866
    {
21867
      unsigned long extra_mask = (1 << extra_pop) - 1;
21868
      live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21869
                                       / UNITS_PER_WORD);
21870
    }
21871
 
21872
  /* The prolog may have pushed some high registers to use as
21873
     work registers.  e.g. the testsuite file:
21874
     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21875
     compiles to produce:
21876
        push    {r4, r5, r6, r7, lr}
21877
        mov     r7, r9
21878
        mov     r6, r8
21879
        push    {r6, r7}
21880
     as part of the prolog.  We have to undo that pushing here.  */
21881
 
21882
  if (high_regs_pushed)
21883
    {
21884
      unsigned long mask = live_regs_mask & 0xff;
21885
      int next_hi_reg;
21886
 
21887
      /* The available low registers depend on the size of the value we are
21888
         returning.  */
21889
      if (size <= 12)
21890
        mask |=  1 << 3;
21891
      if (size <= 8)
21892
        mask |= 1 << 2;
21893
 
21894
      if (mask == 0)
21895
        /* Oh dear!  We have no low registers into which we can pop
21896
           high registers!  */
21897
        internal_error
21898
          ("no low registers available for popping high registers");
21899
 
21900
      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21901
        if (live_regs_mask & (1 << next_hi_reg))
21902
          break;
21903
 
21904
      while (high_regs_pushed)
21905
        {
21906
          /* Find lo register(s) into which the high register(s) can
21907
             be popped.  */
21908
          for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21909
            {
21910
              if (mask & (1 << regno))
21911
                high_regs_pushed--;
21912
              if (high_regs_pushed == 0)
21913
                break;
21914
            }
21915
 
21916
          mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
21917
 
21918
          /* Pop the values into the low register(s).  */
21919
          thumb_pop (asm_out_file, mask);
21920
 
21921
          /* Move the value(s) into the high registers.  */
21922
          for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21923
            {
21924
              if (mask & (1 << regno))
21925
                {
21926
                  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21927
                               regno);
21928
 
21929
                  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21930
                    if (live_regs_mask & (1 << next_hi_reg))
21931
                      break;
21932
                }
21933
            }
21934
        }
21935
      live_regs_mask &= ~0x0f00;
21936
    }
21937
 
21938
  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21939
  live_regs_mask &= 0xff;
21940
 
21941
  if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21942
    {
21943
      /* Pop the return address into the PC.  */
21944
      if (had_to_push_lr)
21945
        live_regs_mask |= 1 << PC_REGNUM;
21946
 
21947
      /* Either no argument registers were pushed or a backtrace
21948
         structure was created which includes an adjusted stack
21949
         pointer, so just pop everything.  */
21950
      if (live_regs_mask)
21951
        thumb_pop (asm_out_file, live_regs_mask);
21952
 
21953
      /* We have either just popped the return address into the
21954
         PC or it is was kept in LR for the entire function.
21955
         Note that thumb_pop has already called thumb_exit if the
21956
         PC was in the list.  */
21957
      if (!had_to_push_lr)
21958
        thumb_exit (asm_out_file, LR_REGNUM);
21959
    }
21960
  else
21961
    {
21962
      /* Pop everything but the return address.  */
21963
      if (live_regs_mask)
21964
        thumb_pop (asm_out_file, live_regs_mask);
21965
 
21966
      if (had_to_push_lr)
21967
        {
21968
          if (size > 12)
21969
            {
21970
              /* We have no free low regs, so save one.  */
21971
              asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21972
                           LAST_ARG_REGNUM);
21973
            }
21974
 
21975
          /* Get the return address into a temporary register.  */
21976
          thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21977
 
21978
          if (size > 12)
21979
            {
21980
              /* Move the return address to lr.  */
21981
              asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21982
                           LAST_ARG_REGNUM);
21983
              /* Restore the low register.  */
21984
              asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21985
                           IP_REGNUM);
21986
              regno = LR_REGNUM;
21987
            }
21988
          else
21989
            regno = LAST_ARG_REGNUM;
21990
        }
21991
      else
21992
        regno = LR_REGNUM;
21993
 
21994
      /* Remove the argument registers that were pushed onto the stack.  */
21995
      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21996
                   SP_REGNUM, SP_REGNUM,
21997
                   crtl->args.pretend_args_size);
21998
 
21999
      thumb_exit (asm_out_file, regno);
22000
    }
22001
 
22002
  return "";
22003
}
22004
 
22005
/* Functions to save and restore machine-specific function data.  */
22006
static struct machine_function *
22007
arm_init_machine_status (void)
22008
{
22009
  struct machine_function *machine;
22010
  machine = ggc_alloc_cleared_machine_function ();
22011
 
22012
#if ARM_FT_UNKNOWN != 0
22013
  machine->func_type = ARM_FT_UNKNOWN;
22014
#endif
22015
  return machine;
22016
}
22017
 
22018
/* Return an RTX indicating where the return address to the
22019
   calling function can be found.  */
22020
rtx
22021
arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22022
{
22023
  if (count != 0)
22024
    return NULL_RTX;
22025
 
22026
  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22027
}
22028
 
22029
/* Do anything needed before RTL is emitted for each function.  */
22030
void
22031
arm_init_expanders (void)
22032
{
22033
  /* Arrange to initialize and mark the machine per-function status.  */
22034
  init_machine_status = arm_init_machine_status;
22035
 
22036
  /* This is to stop the combine pass optimizing away the alignment
22037
     adjustment of va_arg.  */
22038
  /* ??? It is claimed that this should not be necessary.  */
22039
  if (cfun)
22040
    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22041
}
22042
 
22043
 
22044
/* Like arm_compute_initial_elimination offset.  Simpler because there
22045
   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
22046
   to point at the base of the local variables after static stack
22047
   space for a function has been allocated.  */
22048
 
22049
HOST_WIDE_INT
22050
thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22051
{
22052
  arm_stack_offsets *offsets;
22053
 
22054
  offsets = arm_get_frame_offsets ();
22055
 
22056
  switch (from)
22057
    {
22058
    case ARG_POINTER_REGNUM:
22059
      switch (to)
22060
        {
22061
        case STACK_POINTER_REGNUM:
22062
          return offsets->outgoing_args - offsets->saved_args;
22063
 
22064
        case FRAME_POINTER_REGNUM:
22065
          return offsets->soft_frame - offsets->saved_args;
22066
 
22067
        case ARM_HARD_FRAME_POINTER_REGNUM:
22068
          return offsets->saved_regs - offsets->saved_args;
22069
 
22070
        case THUMB_HARD_FRAME_POINTER_REGNUM:
22071
          return offsets->locals_base - offsets->saved_args;
22072
 
22073
        default:
22074
          gcc_unreachable ();
22075
        }
22076
      break;
22077
 
22078
    case FRAME_POINTER_REGNUM:
22079
      switch (to)
22080
        {
22081
        case STACK_POINTER_REGNUM:
22082
          return offsets->outgoing_args - offsets->soft_frame;
22083
 
22084
        case ARM_HARD_FRAME_POINTER_REGNUM:
22085
          return offsets->saved_regs - offsets->soft_frame;
22086
 
22087
        case THUMB_HARD_FRAME_POINTER_REGNUM:
22088
          return offsets->locals_base - offsets->soft_frame;
22089
 
22090
        default:
22091
          gcc_unreachable ();
22092
        }
22093
      break;
22094
 
22095
    default:
22096
      gcc_unreachable ();
22097
    }
22098
}
22099
 
22100
/* Generate the function's prologue.  */
22101
 
22102
void
22103
thumb1_expand_prologue (void)
22104
{
22105
  rtx insn;
22106
 
22107
  HOST_WIDE_INT amount;
22108
  arm_stack_offsets *offsets;
22109
  unsigned long func_type;
22110
  int regno;
22111
  unsigned long live_regs_mask;
22112
  unsigned long l_mask;
22113
  unsigned high_regs_pushed = 0;
22114
 
22115
  func_type = arm_current_func_type ();
22116
 
22117
  /* Naked functions don't have prologues.  */
22118
  if (IS_NAKED (func_type))
22119
    return;
22120
 
22121
  if (IS_INTERRUPT (func_type))
22122
    {
22123
      error ("interrupt Service Routines cannot be coded in Thumb mode");
22124
      return;
22125
    }
22126
 
22127
  if (is_called_in_ARM_mode (current_function_decl))
22128
    emit_insn (gen_prologue_thumb1_interwork ());
22129
 
22130
  offsets = arm_get_frame_offsets ();
22131
  live_regs_mask = offsets->saved_regs_mask;
22132
 
22133
  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
22134
  l_mask = live_regs_mask & 0x40ff;
22135
  /* Then count how many other high registers will need to be pushed.  */
22136
  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22137
 
22138
  if (crtl->args.pretend_args_size)
22139
    {
22140
      rtx x = GEN_INT (-crtl->args.pretend_args_size);
22141
 
22142
      if (cfun->machine->uses_anonymous_args)
22143
        {
22144
          int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22145
          unsigned long mask;
22146
 
22147
          mask = 1ul << (LAST_ARG_REGNUM + 1);
22148
          mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22149
 
22150
          insn = thumb1_emit_multi_reg_push (mask, 0);
22151
        }
22152
      else
22153
        {
22154
          insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22155
                                        stack_pointer_rtx, x));
22156
        }
22157
      RTX_FRAME_RELATED_P (insn) = 1;
22158
    }
22159
 
22160
  if (TARGET_BACKTRACE)
22161
    {
22162
      HOST_WIDE_INT offset = 0;
22163
      unsigned work_register;
22164
      rtx work_reg, x, arm_hfp_rtx;
22165
 
22166
      /* We have been asked to create a stack backtrace structure.
22167
         The code looks like this:
22168
 
22169
 
22170
 
22171
 
22172
         2     push  {R7}            Push low registers.
22173
         4     add   R7, SP, #20     Get the stack pointer before the push.
22174
         6     str   R7, [SP, #8]    Store the stack pointer
22175
                                        (before reserving the space).
22176
         8     mov   R7, PC          Get hold of the start of this code + 12.
22177
        10     str   R7, [SP, #16]   Store it.
22178
        12     mov   R7, FP          Get hold of the current frame pointer.
22179
        14     str   R7, [SP, #4]    Store it.
22180
        16     mov   R7, LR          Get hold of the current return address.
22181
        18     str   R7, [SP, #12]   Store it.
22182
        20     add   R7, SP, #16     Point at the start of the
22183
                                        backtrace structure.
22184
        22     mov   FP, R7          Put this value into the frame pointer.  */
22185
 
22186
      work_register = thumb_find_work_register (live_regs_mask);
22187
      work_reg = gen_rtx_REG (SImode, work_register);
22188
      arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22189
 
22190
      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22191
                                    stack_pointer_rtx, GEN_INT (-16)));
22192
      RTX_FRAME_RELATED_P (insn) = 1;
22193
 
22194
      if (l_mask)
22195
        {
22196
          insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22197
          RTX_FRAME_RELATED_P (insn) = 1;
22198
 
22199
          offset = bit_count (l_mask) * UNITS_PER_WORD;
22200
        }
22201
 
22202
      x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22203
      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22204
 
22205
      x = plus_constant (stack_pointer_rtx, offset + 4);
22206
      x = gen_frame_mem (SImode, x);
22207
      emit_move_insn (x, work_reg);
22208
 
22209
      /* Make sure that the instruction fetching the PC is in the right place
22210
         to calculate "start of backtrace creation code + 12".  */
22211
      /* ??? The stores using the common WORK_REG ought to be enough to
22212
         prevent the scheduler from doing anything weird.  Failing that
22213
         we could always move all of the following into an UNSPEC_VOLATILE.  */
22214
      if (l_mask)
22215
        {
22216
          x = gen_rtx_REG (SImode, PC_REGNUM);
22217
          emit_move_insn (work_reg, x);
22218
 
22219
          x = plus_constant (stack_pointer_rtx, offset + 12);
22220
          x = gen_frame_mem (SImode, x);
22221
          emit_move_insn (x, work_reg);
22222
 
22223
          emit_move_insn (work_reg, arm_hfp_rtx);
22224
 
22225
          x = plus_constant (stack_pointer_rtx, offset);
22226
          x = gen_frame_mem (SImode, x);
22227
          emit_move_insn (x, work_reg);
22228
        }
22229
      else
22230
        {
22231
          emit_move_insn (work_reg, arm_hfp_rtx);
22232
 
22233
          x = plus_constant (stack_pointer_rtx, offset);
22234
          x = gen_frame_mem (SImode, x);
22235
          emit_move_insn (x, work_reg);
22236
 
22237
          x = gen_rtx_REG (SImode, PC_REGNUM);
22238
          emit_move_insn (work_reg, x);
22239
 
22240
          x = plus_constant (stack_pointer_rtx, offset + 12);
22241
          x = gen_frame_mem (SImode, x);
22242
          emit_move_insn (x, work_reg);
22243
        }
22244
 
22245
      x = gen_rtx_REG (SImode, LR_REGNUM);
22246
      emit_move_insn (work_reg, x);
22247
 
22248
      x = plus_constant (stack_pointer_rtx, offset + 8);
22249
      x = gen_frame_mem (SImode, x);
22250
      emit_move_insn (x, work_reg);
22251
 
22252
      x = GEN_INT (offset + 12);
22253
      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22254
 
22255
      emit_move_insn (arm_hfp_rtx, work_reg);
22256
    }
22257
  /* Optimization:  If we are not pushing any low registers but we are going
22258
     to push some high registers then delay our first push.  This will just
22259
     be a push of LR and we can combine it with the push of the first high
22260
     register.  */
22261
  else if ((l_mask & 0xff) != 0
22262
           || (high_regs_pushed == 0 && l_mask))
22263
    {
22264
      unsigned long mask = l_mask;
22265
      mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22266
      insn = thumb1_emit_multi_reg_push (mask, mask);
22267
      RTX_FRAME_RELATED_P (insn) = 1;
22268
    }
22269
 
22270
  if (high_regs_pushed)
22271
    {
22272
      unsigned pushable_regs;
22273
      unsigned next_hi_reg;
22274
 
22275
      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22276
        if (live_regs_mask & (1 << next_hi_reg))
22277
          break;
22278
 
22279
      pushable_regs = l_mask & 0xff;
22280
 
22281
      if (pushable_regs == 0)
22282
        pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22283
 
22284
      while (high_regs_pushed > 0)
22285
        {
22286
          unsigned long real_regs_mask = 0;
22287
 
22288
          for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22289
            {
22290
              if (pushable_regs & (1 << regno))
22291
                {
22292
                  emit_move_insn (gen_rtx_REG (SImode, regno),
22293
                                  gen_rtx_REG (SImode, next_hi_reg));
22294
 
22295
                  high_regs_pushed --;
22296
                  real_regs_mask |= (1 << next_hi_reg);
22297
 
22298
                  if (high_regs_pushed)
22299
                    {
22300
                      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22301
                           next_hi_reg --)
22302
                        if (live_regs_mask & (1 << next_hi_reg))
22303
                          break;
22304
                    }
22305
                  else
22306
                    {
22307
                      pushable_regs &= ~((1 << regno) - 1);
22308
                      break;
22309
                    }
22310
                }
22311
            }
22312
 
22313
          /* If we had to find a work register and we have not yet
22314
             saved the LR then add it to the list of regs to push.  */
22315
          if (l_mask == (1 << LR_REGNUM))
22316
            {
22317
              pushable_regs |= l_mask;
22318
              real_regs_mask |= l_mask;
22319
              l_mask = 0;
22320
            }
22321
 
22322
          insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22323
          RTX_FRAME_RELATED_P (insn) = 1;
22324
        }
22325
    }
22326
 
22327
  /* Load the pic register before setting the frame pointer,
22328
     so we can use r7 as a temporary work register.  */
22329
  if (flag_pic && arm_pic_register != INVALID_REGNUM)
22330
    arm_load_pic_register (live_regs_mask);
22331
 
22332
  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22333
    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22334
                    stack_pointer_rtx);
22335
 
22336
  if (flag_stack_usage_info)
22337
    current_function_static_stack_size
22338
      = offsets->outgoing_args - offsets->saved_args;
22339
 
22340
  amount = offsets->outgoing_args - offsets->saved_regs;
22341
  amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22342
  if (amount)
22343
    {
22344
      if (amount < 512)
22345
        {
22346
          insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22347
                                        GEN_INT (- amount)));
22348
          RTX_FRAME_RELATED_P (insn) = 1;
22349
        }
22350
      else
22351
        {
22352
          rtx reg, dwarf;
22353
 
22354
          /* The stack decrement is too big for an immediate value in a single
22355
             insn.  In theory we could issue multiple subtracts, but after
22356
             three of them it becomes more space efficient to place the full
22357
             value in the constant pool and load into a register.  (Also the
22358
             ARM debugger really likes to see only one stack decrement per
22359
             function).  So instead we look for a scratch register into which
22360
             we can load the decrement, and then we subtract this from the
22361
             stack pointer.  Unfortunately on the thumb the only available
22362
             scratch registers are the argument registers, and we cannot use
22363
             these as they may hold arguments to the function.  Instead we
22364
             attempt to locate a call preserved register which is used by this
22365
             function.  If we can find one, then we know that it will have
22366
             been pushed at the start of the prologue and so we can corrupt
22367
             it now.  */
22368
          for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22369
            if (live_regs_mask & (1 << regno))
22370
              break;
22371
 
22372
          gcc_assert(regno <= LAST_LO_REGNUM);
22373
 
22374
          reg = gen_rtx_REG (SImode, regno);
22375
 
22376
          emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22377
 
22378
          insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22379
                                        stack_pointer_rtx, reg));
22380
 
22381
          dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22382
                               plus_constant (stack_pointer_rtx,
22383
                                              -amount));
22384
          add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22385
          RTX_FRAME_RELATED_P (insn) = 1;
22386
        }
22387
    }
22388
 
22389
  if (frame_pointer_needed)
22390
    thumb_set_frame_pointer (offsets);
22391
 
22392
  /* If we are profiling, make sure no instructions are scheduled before
22393
     the call to mcount.  Similarly if the user has requested no
22394
     scheduling in the prolog.  Similarly if we want non-call exceptions
22395
     using the EABI unwinder, to prevent faulting instructions from being
22396
     swapped with a stack adjustment.  */
22397
  if (crtl->profile || !TARGET_SCHED_PROLOG
22398
      || (arm_except_unwind_info (&global_options) == UI_TARGET
22399
          && cfun->can_throw_non_call_exceptions))
22400
    emit_insn (gen_blockage ());
22401
 
22402
  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22403
  if (live_regs_mask & 0xff)
22404
    cfun->machine->lr_save_eliminated = 0;
22405
}
22406
 
22407
 
22408
void
22409
thumb1_expand_epilogue (void)
22410
{
22411
  HOST_WIDE_INT amount;
22412
  arm_stack_offsets *offsets;
22413
  int regno;
22414
 
22415
  /* Naked functions don't have prologues.  */
22416
  if (IS_NAKED (arm_current_func_type ()))
22417
    return;
22418
 
22419
  offsets = arm_get_frame_offsets ();
22420
  amount = offsets->outgoing_args - offsets->saved_regs;
22421
 
22422
  if (frame_pointer_needed)
22423
    {
22424
      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22425
      amount = offsets->locals_base - offsets->saved_regs;
22426
    }
22427
  amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22428
 
22429
  gcc_assert (amount >= 0);
22430
  if (amount)
22431
    {
22432
      emit_insn (gen_blockage ());
22433
 
22434
      if (amount < 512)
22435
        emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22436
                               GEN_INT (amount)));
22437
      else
22438
        {
22439
          /* r3 is always free in the epilogue.  */
22440
          rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22441
 
22442
          emit_insn (gen_movsi (reg, GEN_INT (amount)));
22443
          emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22444
        }
22445
    }
22446
 
22447
  /* Emit a USE (stack_pointer_rtx), so that
22448
     the stack adjustment will not be deleted.  */
22449
  emit_insn (gen_prologue_use (stack_pointer_rtx));
22450
 
22451
  if (crtl->profile || !TARGET_SCHED_PROLOG)
22452
    emit_insn (gen_blockage ());
22453
 
22454
  /* Emit a clobber for each insn that will be restored in the epilogue,
22455
     so that flow2 will get register lifetimes correct.  */
22456
  for (regno = 0; regno < 13; regno++)
22457
    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22458
      emit_clobber (gen_rtx_REG (SImode, regno));
22459
 
22460
  if (! df_regs_ever_live_p (LR_REGNUM))
22461
    emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22462
}
22463
 
22464
/* Implementation of insn prologue_thumb1_interwork.  This is the first
22465
   "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
22466
 
22467
const char *
22468
thumb1_output_interwork (void)
22469
{
22470
  const char * name;
22471
  FILE *f = asm_out_file;
22472
 
22473
  gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22474
  gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22475
              == SYMBOL_REF);
22476
  name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22477
 
22478
  /* Generate code sequence to switch us into Thumb mode.  */
22479
  /* The .code 32 directive has already been emitted by
22480
     ASM_DECLARE_FUNCTION_NAME.  */
22481
  asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22482
  asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22483
 
22484
  /* Generate a label, so that the debugger will notice the
22485
     change in instruction sets.  This label is also used by
22486
     the assembler to bypass the ARM code when this function
22487
     is called from a Thumb encoded function elsewhere in the
22488
     same file.  Hence the definition of STUB_NAME here must
22489
     agree with the definition in gas/config/tc-arm.c.  */
22490
 
22491
#define STUB_NAME ".real_start_of"
22492
 
22493
  fprintf (f, "\t.code\t16\n");
22494
#ifdef ARM_PE
22495
  if (arm_dllexport_name_p (name))
22496
    name = arm_strip_name_encoding (name);
22497
#endif
22498
  asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22499
  fprintf (f, "\t.thumb_func\n");
22500
  asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22501
 
22502
  return "";
22503
}
22504
 
22505
/* Handle the case of a double word load into a low register from
22506
   a computed memory address.  The computed address may involve a
22507
   register which is overwritten by the load.  */
22508
const char *
22509
thumb_load_double_from_address (rtx *operands)
22510
{
22511
  rtx addr;
22512
  rtx base;
22513
  rtx offset;
22514
  rtx arg1;
22515
  rtx arg2;
22516
 
22517
  gcc_assert (GET_CODE (operands[0]) == REG);
22518
  gcc_assert (GET_CODE (operands[1]) == MEM);
22519
 
22520
  /* Get the memory address.  */
22521
  addr = XEXP (operands[1], 0);
22522
 
22523
  /* Work out how the memory address is computed.  */
22524
  switch (GET_CODE (addr))
22525
    {
22526
    case REG:
22527
      operands[2] = adjust_address (operands[1], SImode, 4);
22528
 
22529
      if (REGNO (operands[0]) == REGNO (addr))
22530
        {
22531
          output_asm_insn ("ldr\t%H0, %2", operands);
22532
          output_asm_insn ("ldr\t%0, %1", operands);
22533
        }
22534
      else
22535
        {
22536
          output_asm_insn ("ldr\t%0, %1", operands);
22537
          output_asm_insn ("ldr\t%H0, %2", operands);
22538
        }
22539
      break;
22540
 
22541
    case CONST:
22542
      /* Compute <address> + 4 for the high order load.  */
22543
      operands[2] = adjust_address (operands[1], SImode, 4);
22544
 
22545
      output_asm_insn ("ldr\t%0, %1", operands);
22546
      output_asm_insn ("ldr\t%H0, %2", operands);
22547
      break;
22548
 
22549
    case PLUS:
22550
      arg1   = XEXP (addr, 0);
22551
      arg2   = XEXP (addr, 1);
22552
 
22553
      if (CONSTANT_P (arg1))
22554
        base = arg2, offset = arg1;
22555
      else
22556
        base = arg1, offset = arg2;
22557
 
22558
      gcc_assert (GET_CODE (base) == REG);
22559
 
22560
      /* Catch the case of <address> = <reg> + <reg> */
22561
      if (GET_CODE (offset) == REG)
22562
        {
22563
          int reg_offset = REGNO (offset);
22564
          int reg_base   = REGNO (base);
22565
          int reg_dest   = REGNO (operands[0]);
22566
 
22567
          /* Add the base and offset registers together into the
22568
             higher destination register.  */
22569
          asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22570
                       reg_dest + 1, reg_base, reg_offset);
22571
 
22572
          /* Load the lower destination register from the address in
22573
             the higher destination register.  */
22574
          asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22575
                       reg_dest, reg_dest + 1);
22576
 
22577
          /* Load the higher destination register from its own address
22578
             plus 4.  */
22579
          asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22580
                       reg_dest + 1, reg_dest + 1);
22581
        }
22582
      else
22583
        {
22584
          /* Compute <address> + 4 for the high order load.  */
22585
          operands[2] = adjust_address (operands[1], SImode, 4);
22586
 
22587
          /* If the computed address is held in the low order register
22588
             then load the high order register first, otherwise always
22589
             load the low order register first.  */
22590
          if (REGNO (operands[0]) == REGNO (base))
22591
            {
22592
              output_asm_insn ("ldr\t%H0, %2", operands);
22593
              output_asm_insn ("ldr\t%0, %1", operands);
22594
            }
22595
          else
22596
            {
22597
              output_asm_insn ("ldr\t%0, %1", operands);
22598
              output_asm_insn ("ldr\t%H0, %2", operands);
22599
            }
22600
        }
22601
      break;
22602
 
22603
    case LABEL_REF:
22604
      /* With no registers to worry about we can just load the value
22605
         directly.  */
22606
      operands[2] = adjust_address (operands[1], SImode, 4);
22607
 
22608
      output_asm_insn ("ldr\t%H0, %2", operands);
22609
      output_asm_insn ("ldr\t%0, %1", operands);
22610
      break;
22611
 
22612
    default:
22613
      gcc_unreachable ();
22614
    }
22615
 
22616
  return "";
22617
}
22618
 
22619
const char *
22620
thumb_output_move_mem_multiple (int n, rtx *operands)
22621
{
22622
  rtx tmp;
22623
 
22624
  switch (n)
22625
    {
22626
    case 2:
22627
      if (REGNO (operands[4]) > REGNO (operands[5]))
22628
        {
22629
          tmp = operands[4];
22630
          operands[4] = operands[5];
22631
          operands[5] = tmp;
22632
        }
22633
      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22634
      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22635
      break;
22636
 
22637
    case 3:
22638
      if (REGNO (operands[4]) > REGNO (operands[5]))
22639
        {
22640
          tmp = operands[4];
22641
          operands[4] = operands[5];
22642
          operands[5] = tmp;
22643
        }
22644
      if (REGNO (operands[5]) > REGNO (operands[6]))
22645
        {
22646
          tmp = operands[5];
22647
          operands[5] = operands[6];
22648
          operands[6] = tmp;
22649
        }
22650
      if (REGNO (operands[4]) > REGNO (operands[5]))
22651
        {
22652
          tmp = operands[4];
22653
          operands[4] = operands[5];
22654
          operands[5] = tmp;
22655
        }
22656
 
22657
      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22658
      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22659
      break;
22660
 
22661
    default:
22662
      gcc_unreachable ();
22663
    }
22664
 
22665
  return "";
22666
}
22667
 
22668
/* Output a call-via instruction for thumb state.  */
22669
const char *
22670
thumb_call_via_reg (rtx reg)
22671
{
22672
  int regno = REGNO (reg);
22673
  rtx *labelp;
22674
 
22675
  gcc_assert (regno < LR_REGNUM);
22676
 
22677
  /* If we are in the normal text section we can use a single instance
22678
     per compilation unit.  If we are doing function sections, then we need
22679
     an entry per section, since we can't rely on reachability.  */
22680
  if (in_section == text_section)
22681
    {
22682
      thumb_call_reg_needed = 1;
22683
 
22684
      if (thumb_call_via_label[regno] == NULL)
22685
        thumb_call_via_label[regno] = gen_label_rtx ();
22686
      labelp = thumb_call_via_label + regno;
22687
    }
22688
  else
22689
    {
22690
      if (cfun->machine->call_via[regno] == NULL)
22691
        cfun->machine->call_via[regno] = gen_label_rtx ();
22692
      labelp = cfun->machine->call_via + regno;
22693
    }
22694
 
22695
  output_asm_insn ("bl\t%a0", labelp);
22696
  return "";
22697
}
22698
 
22699
/* Routines for generating rtl.  */
22700
void
22701
thumb_expand_movmemqi (rtx *operands)
22702
{
22703
  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22704
  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22705
  HOST_WIDE_INT len = INTVAL (operands[2]);
22706
  HOST_WIDE_INT offset = 0;
22707
 
22708
  while (len >= 12)
22709
    {
22710
      emit_insn (gen_movmem12b (out, in, out, in));
22711
      len -= 12;
22712
    }
22713
 
22714
  if (len >= 8)
22715
    {
22716
      emit_insn (gen_movmem8b (out, in, out, in));
22717
      len -= 8;
22718
    }
22719
 
22720
  if (len >= 4)
22721
    {
22722
      rtx reg = gen_reg_rtx (SImode);
22723
      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22724
      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22725
      len -= 4;
22726
      offset += 4;
22727
    }
22728
 
22729
  if (len >= 2)
22730
    {
22731
      rtx reg = gen_reg_rtx (HImode);
22732
      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22733
                                              plus_constant (in, offset))));
22734
      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22735
                            reg));
22736
      len -= 2;
22737
      offset += 2;
22738
    }
22739
 
22740
  if (len)
22741
    {
22742
      rtx reg = gen_reg_rtx (QImode);
22743
      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22744
                                              plus_constant (in, offset))));
22745
      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22746
                            reg));
22747
    }
22748
}
22749
 
22750
void
22751
thumb_reload_out_hi (rtx *operands)
22752
{
22753
  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22754
}
22755
 
22756
/* Handle reading a half-word from memory during reload.  */
22757
void
22758
thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22759
{
22760
  gcc_unreachable ();
22761
}
22762
 
22763
/* Return the length of a function name prefix
22764
    that starts with the character 'c'.  */
22765
static int
22766
arm_get_strip_length (int c)
22767
{
22768
  switch (c)
22769
    {
22770
    ARM_NAME_ENCODING_LENGTHS
22771
      default: return 0;
22772
    }
22773
}
22774
 
22775
/* Return a pointer to a function's name with any
22776
   and all prefix encodings stripped from it.  */
22777
const char *
22778
arm_strip_name_encoding (const char *name)
22779
{
22780
  int skip;
22781
 
22782
  while ((skip = arm_get_strip_length (* name)))
22783
    name += skip;
22784
 
22785
  return name;
22786
}
22787
 
22788
/* If there is a '*' anywhere in the name's prefix, then
22789
   emit the stripped name verbatim, otherwise prepend an
22790
   underscore if leading underscores are being used.  */
22791
void
22792
arm_asm_output_labelref (FILE *stream, const char *name)
22793
{
22794
  int skip;
22795
  int verbatim = 0;
22796
 
22797
  while ((skip = arm_get_strip_length (* name)))
22798
    {
22799
      verbatim |= (*name == '*');
22800
      name += skip;
22801
    }
22802
 
22803
  if (verbatim)
22804
    fputs (name, stream);
22805
  else
22806
    asm_fprintf (stream, "%U%s", name);
22807
}
22808
 
22809
static void
22810
arm_file_start (void)
22811
{
22812
  int val;
22813
 
22814
  if (TARGET_UNIFIED_ASM)
22815
    asm_fprintf (asm_out_file, "\t.syntax unified\n");
22816
 
22817
  if (TARGET_BPABI)
22818
    {
22819
      const char *fpu_name;
22820
      if (arm_selected_arch)
22821
        asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22822
      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22823
        asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22824
      else
22825
        asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22826
 
22827
      if (TARGET_SOFT_FLOAT)
22828
        {
22829
          if (TARGET_VFP)
22830
            fpu_name = "softvfp";
22831
          else
22832
            fpu_name = "softfpa";
22833
        }
22834
      else
22835
        {
22836
          fpu_name = arm_fpu_desc->name;
22837
          if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22838
            {
22839
              if (TARGET_HARD_FLOAT)
22840
                EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22841
              if (TARGET_HARD_FLOAT_ABI)
22842
                EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22843
            }
22844
        }
22845
      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22846
 
22847
      /* Some of these attributes only apply when the corresponding features
22848
         are used.  However we don't have any easy way of figuring this out.
22849
         Conservatively record the setting that would have been used.  */
22850
 
22851
      if (flag_rounding_math)
22852
        EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22853
 
22854
      if (!flag_unsafe_math_optimizations)
22855
        {
22856
          EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22857
          EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22858
        }
22859
      if (flag_signaling_nans)
22860
        EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22861
 
22862
      EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22863
                           flag_finite_math_only ? 1 : 3);
22864
 
22865
      EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22866
      EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22867
      EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22868
 
22869
      /* Tag_ABI_optimization_goals.  */
22870
      if (optimize_size)
22871
        val = 4;
22872
      else if (optimize >= 2)
22873
        val = 2;
22874
      else if (optimize)
22875
        val = 1;
22876
      else
22877
        val = 6;
22878
      EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22879
 
22880
      EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22881
 
22882
      if (arm_fp16_format)
22883
        EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22884
 
22885
      if (arm_lang_output_object_attributes_hook)
22886
        arm_lang_output_object_attributes_hook();
22887
    }
22888
 
22889
  default_file_start ();
22890
}
22891
 
22892
static void
22893
arm_file_end (void)
22894
{
22895
  int regno;
22896
 
22897
  if (NEED_INDICATE_EXEC_STACK)
22898
    /* Add .note.GNU-stack.  */
22899
    file_end_indicate_exec_stack ();
22900
 
22901
  if (! thumb_call_reg_needed)
22902
    return;
22903
 
22904
  switch_to_section (text_section);
22905
  asm_fprintf (asm_out_file, "\t.code 16\n");
22906
  ASM_OUTPUT_ALIGN (asm_out_file, 1);
22907
 
22908
  for (regno = 0; regno < LR_REGNUM; regno++)
22909
    {
22910
      rtx label = thumb_call_via_label[regno];
22911
 
22912
      if (label != 0)
22913
        {
22914
          targetm.asm_out.internal_label (asm_out_file, "L",
22915
                                          CODE_LABEL_NUMBER (label));
22916
          asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22917
        }
22918
    }
22919
}
22920
 
22921
#ifndef ARM_PE
22922
/* Symbols in the text segment can be accessed without indirecting via the
22923
   constant pool; it may take an extra binary operation, but this is still
22924
   faster than indirecting via memory.  Don't do this when not optimizing,
22925
   since we won't be calculating al of the offsets necessary to do this
22926
   simplification.  */
22927
 
22928
static void
22929
arm_encode_section_info (tree decl, rtx rtl, int first)
22930
{
22931
  if (optimize > 0 && TREE_CONSTANT (decl))
22932
    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22933
 
22934
  default_encode_section_info (decl, rtl, first);
22935
}
22936
#endif /* !ARM_PE */
22937
 
22938
static void
22939
arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22940
{
22941
  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22942
      && !strcmp (prefix, "L"))
22943
    {
22944
      arm_ccfsm_state = 0;
22945
      arm_target_insn = NULL;
22946
    }
22947
  default_internal_label (stream, prefix, labelno);
22948
}
22949
 
22950
/* Output code to add DELTA to the first argument, and then jump
22951
   to FUNCTION.  Used for C++ multiple inheritance.  */
22952
static void
22953
arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22954
                     HOST_WIDE_INT delta,
22955
                     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22956
                     tree function)
22957
{
22958
  static int thunk_label = 0;
22959
  char label[256];
22960
  char labelpc[256];
22961
  int mi_delta = delta;
22962
  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22963
  int shift = 0;
22964
  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22965
                    ? 1 : 0);
22966
  if (mi_delta < 0)
22967
    mi_delta = - mi_delta;
22968
 
22969
  if (TARGET_THUMB1)
22970
    {
22971
      int labelno = thunk_label++;
22972
      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22973
      /* Thunks are entered in arm mode when avaiable.  */
22974
      if (TARGET_THUMB1_ONLY)
22975
        {
22976
          /* push r3 so we can use it as a temporary.  */
22977
          /* TODO: Omit this save if r3 is not used.  */
22978
          fputs ("\tpush {r3}\n", file);
22979
          fputs ("\tldr\tr3, ", file);
22980
        }
22981
      else
22982
        {
22983
          fputs ("\tldr\tr12, ", file);
22984
        }
22985
      assemble_name (file, label);
22986
      fputc ('\n', file);
22987
      if (flag_pic)
22988
        {
22989
          /* If we are generating PIC, the ldr instruction below loads
22990
             "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
22991
             the address of the add + 8, so we have:
22992
 
22993
             r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22994
                 = target + 1.
22995
 
22996
             Note that we have "+ 1" because some versions of GNU ld
22997
             don't set the low bit of the result for R_ARM_REL32
22998
             relocations against thumb function symbols.
22999
             On ARMv6M this is +4, not +8.  */
23000
          ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23001
          assemble_name (file, labelpc);
23002
          fputs (":\n", file);
23003
          if (TARGET_THUMB1_ONLY)
23004
            {
23005
              /* This is 2 insns after the start of the thunk, so we know it
23006
                 is 4-byte aligned.  */
23007
              fputs ("\tadd\tr3, pc, r3\n", file);
23008
              fputs ("\tmov r12, r3\n", file);
23009
            }
23010
          else
23011
            fputs ("\tadd\tr12, pc, r12\n", file);
23012
        }
23013
      else if (TARGET_THUMB1_ONLY)
23014
        fputs ("\tmov r12, r3\n", file);
23015
    }
23016
  if (TARGET_THUMB1_ONLY)
23017
    {
23018
      if (mi_delta > 255)
23019
        {
23020
          fputs ("\tldr\tr3, ", file);
23021
          assemble_name (file, label);
23022
          fputs ("+4\n", file);
23023
          asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23024
                       mi_op, this_regno, this_regno);
23025
        }
23026
      else if (mi_delta != 0)
23027
        {
23028
          asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23029
                       mi_op, this_regno, this_regno,
23030
                       mi_delta);
23031
        }
23032
    }
23033
  else
23034
    {
23035
      /* TODO: Use movw/movt for large constants when available.  */
23036
      while (mi_delta != 0)
23037
        {
23038
          if ((mi_delta & (3 << shift)) == 0)
23039
            shift += 2;
23040
          else
23041
            {
23042
              asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23043
                           mi_op, this_regno, this_regno,
23044
                           mi_delta & (0xff << shift));
23045
              mi_delta &= ~(0xff << shift);
23046
              shift += 8;
23047
            }
23048
        }
23049
    }
23050
  if (TARGET_THUMB1)
23051
    {
23052
      if (TARGET_THUMB1_ONLY)
23053
        fputs ("\tpop\t{r3}\n", file);
23054
 
23055
      fprintf (file, "\tbx\tr12\n");
23056
      ASM_OUTPUT_ALIGN (file, 2);
23057
      assemble_name (file, label);
23058
      fputs (":\n", file);
23059
      if (flag_pic)
23060
        {
23061
          /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
23062
          rtx tem = XEXP (DECL_RTL (function), 0);
23063
          tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23064
          tem = gen_rtx_MINUS (GET_MODE (tem),
23065
                               tem,
23066
                               gen_rtx_SYMBOL_REF (Pmode,
23067
                                                   ggc_strdup (labelpc)));
23068
          assemble_integer (tem, 4, BITS_PER_WORD, 1);
23069
        }
23070
      else
23071
        /* Output ".word .LTHUNKn".  */
23072
        assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23073
 
23074
      if (TARGET_THUMB1_ONLY && mi_delta > 255)
23075
        assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23076
    }
23077
  else
23078
    {
23079
      fputs ("\tb\t", file);
23080
      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23081
      if (NEED_PLT_RELOC)
23082
        fputs ("(PLT)", file);
23083
      fputc ('\n', file);
23084
    }
23085
}
23086
 
23087
int
23088
arm_emit_vector_const (FILE *file, rtx x)
23089
{
23090
  int i;
23091
  const char * pattern;
23092
 
23093
  gcc_assert (GET_CODE (x) == CONST_VECTOR);
23094
 
23095
  switch (GET_MODE (x))
23096
    {
23097
    case V2SImode: pattern = "%08x"; break;
23098
    case V4HImode: pattern = "%04x"; break;
23099
    case V8QImode: pattern = "%02x"; break;
23100
    default:       gcc_unreachable ();
23101
    }
23102
 
23103
  fprintf (file, "0x");
23104
  for (i = CONST_VECTOR_NUNITS (x); i--;)
23105
    {
23106
      rtx element;
23107
 
23108
      element = CONST_VECTOR_ELT (x, i);
23109
      fprintf (file, pattern, INTVAL (element));
23110
    }
23111
 
23112
  return 1;
23113
}
23114
 
23115
/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23116
   HFmode constant pool entries are actually loaded with ldr.  */
23117
void
23118
arm_emit_fp16_const (rtx c)
23119
{
23120
  REAL_VALUE_TYPE r;
23121
  long bits;
23122
 
23123
  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23124
  bits = real_to_target (NULL, &r, HFmode);
23125
  if (WORDS_BIG_ENDIAN)
23126
    assemble_zeros (2);
23127
  assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23128
  if (!WORDS_BIG_ENDIAN)
23129
    assemble_zeros (2);
23130
}
23131
 
23132
const char *
23133
arm_output_load_gr (rtx *operands)
23134
{
23135
  rtx reg;
23136
  rtx offset;
23137
  rtx wcgr;
23138
  rtx sum;
23139
 
23140
  if (GET_CODE (operands [1]) != MEM
23141
      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23142
      || GET_CODE (reg = XEXP (sum, 0)) != REG
23143
      || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23144
      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23145
    return "wldrw%?\t%0, %1";
23146
 
23147
  /* Fix up an out-of-range load of a GR register.  */
23148
  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23149
  wcgr = operands[0];
23150
  operands[0] = reg;
23151
  output_asm_insn ("ldr%?\t%0, %1", operands);
23152
 
23153
  operands[0] = wcgr;
23154
  operands[1] = reg;
23155
  output_asm_insn ("tmcr%?\t%0, %1", operands);
23156
  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23157
 
23158
  return "";
23159
}
23160
 
23161
/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23162
 
23163
   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23164
   named arg and all anonymous args onto the stack.
23165
   XXX I know the prologue shouldn't be pushing registers, but it is faster
23166
   that way.  */
23167
 
23168
static void
23169
arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23170
                            enum machine_mode mode,
23171
                            tree type,
23172
                            int *pretend_size,
23173
                            int second_time ATTRIBUTE_UNUSED)
23174
{
23175
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23176
  int nregs;
23177
 
23178
  cfun->machine->uses_anonymous_args = 1;
23179
  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23180
    {
23181
      nregs = pcum->aapcs_ncrn;
23182
      if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23183
        nregs++;
23184
    }
23185
  else
23186
    nregs = pcum->nregs;
23187
 
23188
  if (nregs < NUM_ARG_REGS)
23189
    *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23190
}
23191
 
23192
/* Return nonzero if the CONSUMER instruction (a store) does not need
23193
   PRODUCER's value to calculate the address.  */
23194
 
23195
int
23196
arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23197
{
23198
  rtx value = PATTERN (producer);
23199
  rtx addr = PATTERN (consumer);
23200
 
23201
  if (GET_CODE (value) == COND_EXEC)
23202
    value = COND_EXEC_CODE (value);
23203
  if (GET_CODE (value) == PARALLEL)
23204
    value = XVECEXP (value, 0, 0);
23205
  value = XEXP (value, 0);
23206
  if (GET_CODE (addr) == COND_EXEC)
23207
    addr = COND_EXEC_CODE (addr);
23208
  if (GET_CODE (addr) == PARALLEL)
23209
    addr = XVECEXP (addr, 0, 0);
23210
  addr = XEXP (addr, 0);
23211
 
23212
  return !reg_overlap_mentioned_p (value, addr);
23213
}
23214
 
23215
/* Return nonzero if the CONSUMER instruction (a store) does need
23216
   PRODUCER's value to calculate the address.  */
23217
 
23218
int
23219
arm_early_store_addr_dep (rtx producer, rtx consumer)
23220
{
23221
  return !arm_no_early_store_addr_dep (producer, consumer);
23222
}
23223
 
23224
/* Return nonzero if the CONSUMER instruction (a load) does need
23225
   PRODUCER's value to calculate the address.  */
23226
 
23227
int
23228
arm_early_load_addr_dep (rtx producer, rtx consumer)
23229
{
23230
  rtx value = PATTERN (producer);
23231
  rtx addr = PATTERN (consumer);
23232
 
23233
  if (GET_CODE (value) == COND_EXEC)
23234
    value = COND_EXEC_CODE (value);
23235
  if (GET_CODE (value) == PARALLEL)
23236
    value = XVECEXP (value, 0, 0);
23237
  value = XEXP (value, 0);
23238
  if (GET_CODE (addr) == COND_EXEC)
23239
    addr = COND_EXEC_CODE (addr);
23240
  if (GET_CODE (addr) == PARALLEL)
23241
    addr = XVECEXP (addr, 0, 0);
23242
  addr = XEXP (addr, 1);
23243
 
23244
  return reg_overlap_mentioned_p (value, addr);
23245
}
23246
 
23247
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
23248
   have an early register shift value or amount dependency on the
23249
   result of PRODUCER.  */
23250
 
23251
int
23252
arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23253
{
23254
  rtx value = PATTERN (producer);
23255
  rtx op = PATTERN (consumer);
23256
  rtx early_op;
23257
 
23258
  if (GET_CODE (value) == COND_EXEC)
23259
    value = COND_EXEC_CODE (value);
23260
  if (GET_CODE (value) == PARALLEL)
23261
    value = XVECEXP (value, 0, 0);
23262
  value = XEXP (value, 0);
23263
  if (GET_CODE (op) == COND_EXEC)
23264
    op = COND_EXEC_CODE (op);
23265
  if (GET_CODE (op) == PARALLEL)
23266
    op = XVECEXP (op, 0, 0);
23267
  op = XEXP (op, 1);
23268
 
23269
  early_op = XEXP (op, 0);
23270
  /* This is either an actual independent shift, or a shift applied to
23271
     the first operand of another operation.  We want the whole shift
23272
     operation.  */
23273
  if (GET_CODE (early_op) == REG)
23274
    early_op = op;
23275
 
23276
  return !reg_overlap_mentioned_p (value, early_op);
23277
}
23278
 
23279
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
23280
   have an early register shift value dependency on the result of
23281
   PRODUCER.  */
23282
 
23283
int
23284
arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23285
{
23286
  rtx value = PATTERN (producer);
23287
  rtx op = PATTERN (consumer);
23288
  rtx early_op;
23289
 
23290
  if (GET_CODE (value) == COND_EXEC)
23291
    value = COND_EXEC_CODE (value);
23292
  if (GET_CODE (value) == PARALLEL)
23293
    value = XVECEXP (value, 0, 0);
23294
  value = XEXP (value, 0);
23295
  if (GET_CODE (op) == COND_EXEC)
23296
    op = COND_EXEC_CODE (op);
23297
  if (GET_CODE (op) == PARALLEL)
23298
    op = XVECEXP (op, 0, 0);
23299
  op = XEXP (op, 1);
23300
 
23301
  early_op = XEXP (op, 0);
23302
 
23303
  /* This is either an actual independent shift, or a shift applied to
23304
     the first operand of another operation.  We want the value being
23305
     shifted, in either case.  */
23306
  if (GET_CODE (early_op) != REG)
23307
    early_op = XEXP (early_op, 0);
23308
 
23309
  return !reg_overlap_mentioned_p (value, early_op);
23310
}
23311
 
23312
/* Return nonzero if the CONSUMER (a mul or mac op) does not
23313
   have an early register mult dependency on the result of
23314
   PRODUCER.  */
23315
 
23316
int
23317
arm_no_early_mul_dep (rtx producer, rtx consumer)
23318
{
23319
  rtx value = PATTERN (producer);
23320
  rtx op = PATTERN (consumer);
23321
 
23322
  if (GET_CODE (value) == COND_EXEC)
23323
    value = COND_EXEC_CODE (value);
23324
  if (GET_CODE (value) == PARALLEL)
23325
    value = XVECEXP (value, 0, 0);
23326
  value = XEXP (value, 0);
23327
  if (GET_CODE (op) == COND_EXEC)
23328
    op = COND_EXEC_CODE (op);
23329
  if (GET_CODE (op) == PARALLEL)
23330
    op = XVECEXP (op, 0, 0);
23331
  op = XEXP (op, 1);
23332
 
23333
  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23334
    {
23335
      if (GET_CODE (XEXP (op, 0)) == MULT)
23336
        return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23337
      else
23338
        return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23339
    }
23340
 
23341
  return 0;
23342
}
23343
 
23344
/* We can't rely on the caller doing the proper promotion when
23345
   using APCS or ATPCS.  */
23346
 
23347
static bool
23348
arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23349
{
23350
    return !TARGET_AAPCS_BASED;
23351
}
23352
 
23353
static enum machine_mode
23354
arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23355
                           enum machine_mode mode,
23356
                           int *punsignedp ATTRIBUTE_UNUSED,
23357
                           const_tree fntype ATTRIBUTE_UNUSED,
23358
                           int for_return ATTRIBUTE_UNUSED)
23359
{
23360
  if (GET_MODE_CLASS (mode) == MODE_INT
23361
      && GET_MODE_SIZE (mode) < 4)
23362
    return SImode;
23363
 
23364
  return mode;
23365
}
23366
 
23367
/* AAPCS based ABIs use short enums by default.  */
23368
 
23369
static bool
23370
arm_default_short_enums (void)
23371
{
23372
  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23373
}
23374
 
23375
 
23376
/* AAPCS requires that anonymous bitfields affect structure alignment.  */
23377
 
23378
static bool
23379
arm_align_anon_bitfield (void)
23380
{
23381
  return TARGET_AAPCS_BASED;
23382
}
23383
 
23384
 
23385
/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
23386
 
23387
static tree
23388
arm_cxx_guard_type (void)
23389
{
23390
  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23391
}
23392
 
23393
/* Return non-zero if the consumer (a multiply-accumulate instruction)
23394
   has an accumulator dependency on the result of the producer (a
23395
   multiplication instruction) and no other dependency on that result.  */
23396
int
23397
arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23398
{
23399
  rtx mul = PATTERN (producer);
23400
  rtx mac = PATTERN (consumer);
23401
  rtx mul_result;
23402
  rtx mac_op0, mac_op1, mac_acc;
23403
 
23404
  if (GET_CODE (mul) == COND_EXEC)
23405
    mul = COND_EXEC_CODE (mul);
23406
  if (GET_CODE (mac) == COND_EXEC)
23407
    mac = COND_EXEC_CODE (mac);
23408
 
23409
  /* Check that mul is of the form (set (...) (mult ...))
23410
     and mla is of the form (set (...) (plus (mult ...) (...))).  */
23411
  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23412
      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23413
          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23414
    return 0;
23415
 
23416
  mul_result = XEXP (mul, 0);
23417
  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23418
  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23419
  mac_acc = XEXP (XEXP (mac, 1), 1);
23420
 
23421
  return (reg_overlap_mentioned_p (mul_result, mac_acc)
23422
          && !reg_overlap_mentioned_p (mul_result, mac_op0)
23423
          && !reg_overlap_mentioned_p (mul_result, mac_op1));
23424
}
23425
 
23426
 
23427
/* The EABI says test the least significant bit of a guard variable.  */
23428
 
23429
static bool
23430
arm_cxx_guard_mask_bit (void)
23431
{
23432
  return TARGET_AAPCS_BASED;
23433
}
23434
 
23435
 
23436
/* The EABI specifies that all array cookies are 8 bytes long.  */
23437
 
23438
static tree
23439
arm_get_cookie_size (tree type)
23440
{
23441
  tree size;
23442
 
23443
  if (!TARGET_AAPCS_BASED)
23444
    return default_cxx_get_cookie_size (type);
23445
 
23446
  size = build_int_cst (sizetype, 8);
23447
  return size;
23448
}
23449
 
23450
 
23451
/* The EABI says that array cookies should also contain the element size.  */
23452
 
23453
static bool
23454
arm_cookie_has_size (void)
23455
{
23456
  return TARGET_AAPCS_BASED;
23457
}
23458
 
23459
 
23460
/* The EABI says constructors and destructors should return a pointer to
23461
   the object constructed/destroyed.  */
23462
 
23463
static bool
23464
arm_cxx_cdtor_returns_this (void)
23465
{
23466
  return TARGET_AAPCS_BASED;
23467
}
23468
 
23469
/* The EABI says that an inline function may never be the key
23470
   method.  */
23471
 
23472
static bool
23473
arm_cxx_key_method_may_be_inline (void)
23474
{
23475
  return !TARGET_AAPCS_BASED;
23476
}
23477
 
23478
static void
23479
arm_cxx_determine_class_data_visibility (tree decl)
23480
{
23481
  if (!TARGET_AAPCS_BASED
23482
      || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23483
    return;
23484
 
23485
  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23486
     is exported.  However, on systems without dynamic vague linkage,
23487
     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
23488
  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23489
    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23490
  else
23491
    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23492
  DECL_VISIBILITY_SPECIFIED (decl) = 1;
23493
}
23494
 
23495
static bool
23496
arm_cxx_class_data_always_comdat (void)
23497
{
23498
  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23499
     vague linkage if the class has no key function.  */
23500
  return !TARGET_AAPCS_BASED;
23501
}
23502
 
23503
 
23504
/* The EABI says __aeabi_atexit should be used to register static
23505
   destructors.  */
23506
 
23507
static bool
23508
arm_cxx_use_aeabi_atexit (void)
23509
{
23510
  return TARGET_AAPCS_BASED;
23511
}
23512
 
23513
 
23514
void
23515
arm_set_return_address (rtx source, rtx scratch)
23516
{
23517
  arm_stack_offsets *offsets;
23518
  HOST_WIDE_INT delta;
23519
  rtx addr;
23520
  unsigned long saved_regs;
23521
 
23522
  offsets = arm_get_frame_offsets ();
23523
  saved_regs = offsets->saved_regs_mask;
23524
 
23525
  if ((saved_regs & (1 << LR_REGNUM)) == 0)
23526
    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23527
  else
23528
    {
23529
      if (frame_pointer_needed)
23530
        addr = plus_constant(hard_frame_pointer_rtx, -4);
23531
      else
23532
        {
23533
          /* LR will be the first saved register.  */
23534
          delta = offsets->outgoing_args - (offsets->frame + 4);
23535
 
23536
 
23537
          if (delta >= 4096)
23538
            {
23539
              emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23540
                                     GEN_INT (delta & ~4095)));
23541
              addr = scratch;
23542
              delta &= 4095;
23543
            }
23544
          else
23545
            addr = stack_pointer_rtx;
23546
 
23547
          addr = plus_constant (addr, delta);
23548
        }
23549
      emit_move_insn (gen_frame_mem (Pmode, addr), source);
23550
    }
23551
}
23552
 
23553
 
23554
void
23555
thumb_set_return_address (rtx source, rtx scratch)
23556
{
23557
  arm_stack_offsets *offsets;
23558
  HOST_WIDE_INT delta;
23559
  HOST_WIDE_INT limit;
23560
  int reg;
23561
  rtx addr;
23562
  unsigned long mask;
23563
 
23564
  emit_use (source);
23565
 
23566
  offsets = arm_get_frame_offsets ();
23567
  mask = offsets->saved_regs_mask;
23568
  if (mask & (1 << LR_REGNUM))
23569
    {
23570
      limit = 1024;
23571
      /* Find the saved regs.  */
23572
      if (frame_pointer_needed)
23573
        {
23574
          delta = offsets->soft_frame - offsets->saved_args;
23575
          reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23576
          if (TARGET_THUMB1)
23577
            limit = 128;
23578
        }
23579
      else
23580
        {
23581
          delta = offsets->outgoing_args - offsets->saved_args;
23582
          reg = SP_REGNUM;
23583
        }
23584
      /* Allow for the stack frame.  */
23585
      if (TARGET_THUMB1 && TARGET_BACKTRACE)
23586
        delta -= 16;
23587
      /* The link register is always the first saved register.  */
23588
      delta -= 4;
23589
 
23590
      /* Construct the address.  */
23591
      addr = gen_rtx_REG (SImode, reg);
23592
      if (delta > limit)
23593
        {
23594
          emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23595
          emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23596
          addr = scratch;
23597
        }
23598
      else
23599
        addr = plus_constant (addr, delta);
23600
 
23601
      emit_move_insn (gen_frame_mem (Pmode, addr), source);
23602
    }
23603
  else
23604
    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23605
}
23606
 
23607
/* Implements target hook vector_mode_supported_p.  */
23608
bool
23609
arm_vector_mode_supported_p (enum machine_mode mode)
23610
{
23611
  /* Neon also supports V2SImode, etc. listed in the clause below.  */
23612
  if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23613
      || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23614
    return true;
23615
 
23616
  if ((TARGET_NEON || TARGET_IWMMXT)
23617
      && ((mode == V2SImode)
23618
          || (mode == V4HImode)
23619
          || (mode == V8QImode)))
23620
    return true;
23621
 
23622
  if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23623
      || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23624
      || mode == V2HAmode))
23625
    return true;
23626
 
23627
  return false;
23628
}
23629
 
23630
/* Implements target hook array_mode_supported_p.  */
23631
 
23632
static bool
23633
arm_array_mode_supported_p (enum machine_mode mode,
23634
                            unsigned HOST_WIDE_INT nelems)
23635
{
23636
  if (TARGET_NEON
23637
      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23638
      && (nelems >= 2 && nelems <= 4))
23639
    return true;
23640
 
23641
  return false;
23642
}
23643
 
23644
/* Use the option -mvectorize-with-neon-double to override the use of quardword
23645
   registers when autovectorizing for Neon, at least until multiple vector
23646
   widths are supported properly by the middle-end.  */
23647
 
23648
static enum machine_mode
23649
arm_preferred_simd_mode (enum machine_mode mode)
23650
{
23651
  if (TARGET_NEON)
23652
    switch (mode)
23653
      {
23654
      case SFmode:
23655
        return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23656
      case SImode:
23657
        return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23658
      case HImode:
23659
        return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23660
      case QImode:
23661
        return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23662
      case DImode:
23663
        if (!TARGET_NEON_VECTORIZE_DOUBLE)
23664
          return V2DImode;
23665
        break;
23666
 
23667
      default:;
23668
      }
23669
 
23670
  if (TARGET_REALLY_IWMMXT)
23671
    switch (mode)
23672
      {
23673
      case SImode:
23674
        return V2SImode;
23675
      case HImode:
23676
        return V4HImode;
23677
      case QImode:
23678
        return V8QImode;
23679
 
23680
      default:;
23681
      }
23682
 
23683
  return word_mode;
23684
}
23685
 
23686
/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23687
 
23688
   We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
23689
   using r0-r4 for function arguments, r7 for the stack frame and don't have
23690
   enough left over to do doubleword arithmetic.  For Thumb-2 all the
23691
   potentially problematic instructions accept high registers so this is not
23692
   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
23693
   that require many low registers.  */
23694
static bool
23695
arm_class_likely_spilled_p (reg_class_t rclass)
23696
{
23697
  if ((TARGET_THUMB1 && rclass == LO_REGS)
23698
      || rclass  == CC_REG)
23699
    return true;
23700
 
23701
  return false;
23702
}
23703
 
23704
/* Implements target hook small_register_classes_for_mode_p.  */
23705
bool
23706
arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23707
{
23708
  return TARGET_THUMB1;
23709
}
23710
 
23711
/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
23712
   ARM insns and therefore guarantee that the shift count is modulo 256.
23713
   DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23714
   guarantee no particular behavior for out-of-range counts.  */
23715
 
23716
static unsigned HOST_WIDE_INT
23717
arm_shift_truncation_mask (enum machine_mode mode)
23718
{
23719
  return mode == SImode ? 255 : 0;
23720
}
23721
 
23722
 
23723
/* Map internal gcc register numbers to DWARF2 register numbers.  */
23724
 
23725
unsigned int
23726
arm_dbx_register_number (unsigned int regno)
23727
{
23728
  if (regno < 16)
23729
    return regno;
23730
 
23731
  /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23732
     compatibility.  The EABI defines them as registers 96-103.  */
23733
  if (IS_FPA_REGNUM (regno))
23734
    return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23735
 
23736
  if (IS_VFP_REGNUM (regno))
23737
    {
23738
      /* See comment in arm_dwarf_register_span.  */
23739
      if (VFP_REGNO_OK_FOR_SINGLE (regno))
23740
        return 64 + regno - FIRST_VFP_REGNUM;
23741
      else
23742
        return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23743
    }
23744
 
23745
  if (IS_IWMMXT_GR_REGNUM (regno))
23746
    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23747
 
23748
  if (IS_IWMMXT_REGNUM (regno))
23749
    return 112 + regno - FIRST_IWMMXT_REGNUM;
23750
 
23751
  gcc_unreachable ();
23752
}
23753
 
23754
/* Dwarf models VFPv3 registers as 32 64-bit registers.
23755
   GCC models tham as 64 32-bit registers, so we need to describe this to
23756
   the DWARF generation code.  Other registers can use the default.  */
23757
static rtx
23758
arm_dwarf_register_span (rtx rtl)
23759
{
23760
  unsigned regno;
23761
  int nregs;
23762
  int i;
23763
  rtx p;
23764
 
23765
  regno = REGNO (rtl);
23766
  if (!IS_VFP_REGNUM (regno))
23767
    return NULL_RTX;
23768
 
23769
  /* XXX FIXME: The EABI defines two VFP register ranges:
23770
        64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23771
        256-287: D0-D31
23772
     The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23773
     corresponding D register.  Until GDB supports this, we shall use the
23774
     legacy encodings.  We also use these encodings for D0-D15 for
23775
     compatibility with older debuggers.  */
23776
  if (VFP_REGNO_OK_FOR_SINGLE (regno))
23777
    return NULL_RTX;
23778
 
23779
  nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23780
  p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23781
  regno = (regno - FIRST_VFP_REGNUM) / 2;
23782
  for (i = 0; i < nregs; i++)
23783
    XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23784
 
23785
  return p;
23786
}
23787
 
23788
#if ARM_UNWIND_INFO
23789
/* Emit unwind directives for a store-multiple instruction or stack pointer
23790
   push during alignment.
23791
   These should only ever be generated by the function prologue code, so
23792
   expect them to have a particular form.  */
23793
 
23794
static void
23795
arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23796
{
23797
  int i;
23798
  HOST_WIDE_INT offset;
23799
  HOST_WIDE_INT nregs;
23800
  int reg_size;
23801
  unsigned reg;
23802
  unsigned lastreg;
23803
  rtx e;
23804
 
23805
  e = XVECEXP (p, 0, 0);
23806
  if (GET_CODE (e) != SET)
23807
    abort ();
23808
 
23809
  /* First insn will adjust the stack pointer.  */
23810
  if (GET_CODE (e) != SET
23811
      || GET_CODE (XEXP (e, 0)) != REG
23812
      || REGNO (XEXP (e, 0)) != SP_REGNUM
23813
      || GET_CODE (XEXP (e, 1)) != PLUS)
23814
    abort ();
23815
 
23816
  offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23817
  nregs = XVECLEN (p, 0) - 1;
23818
 
23819
  reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23820
  if (reg < 16)
23821
    {
23822
      /* The function prologue may also push pc, but not annotate it as it is
23823
         never restored.  We turn this into a stack pointer adjustment.  */
23824
      if (nregs * 4 == offset - 4)
23825
        {
23826
          fprintf (asm_out_file, "\t.pad #4\n");
23827
          offset -= 4;
23828
        }
23829
      reg_size = 4;
23830
      fprintf (asm_out_file, "\t.save {");
23831
    }
23832
  else if (IS_VFP_REGNUM (reg))
23833
    {
23834
      reg_size = 8;
23835
      fprintf (asm_out_file, "\t.vsave {");
23836
    }
23837
  else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23838
    {
23839
      /* FPA registers are done differently.  */
23840
      asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23841
      return;
23842
    }
23843
  else
23844
    /* Unknown register type.  */
23845
    abort ();
23846
 
23847
  /* If the stack increment doesn't match the size of the saved registers,
23848
     something has gone horribly wrong.  */
23849
  if (offset != nregs * reg_size)
23850
    abort ();
23851
 
23852
  offset = 0;
23853
  lastreg = 0;
23854
  /* The remaining insns will describe the stores.  */
23855
  for (i = 1; i <= nregs; i++)
23856
    {
23857
      /* Expect (set (mem <addr>) (reg)).
23858
         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
23859
      e = XVECEXP (p, 0, i);
23860
      if (GET_CODE (e) != SET
23861
          || GET_CODE (XEXP (e, 0)) != MEM
23862
          || GET_CODE (XEXP (e, 1)) != REG)
23863
        abort ();
23864
 
23865
      reg = REGNO (XEXP (e, 1));
23866
      if (reg < lastreg)
23867
        abort ();
23868
 
23869
      if (i != 1)
23870
        fprintf (asm_out_file, ", ");
23871
      /* We can't use %r for vfp because we need to use the
23872
         double precision register names.  */
23873
      if (IS_VFP_REGNUM (reg))
23874
        asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23875
      else
23876
        asm_fprintf (asm_out_file, "%r", reg);
23877
 
23878
#ifdef ENABLE_CHECKING
23879
      /* Check that the addresses are consecutive.  */
23880
      e = XEXP (XEXP (e, 0), 0);
23881
      if (GET_CODE (e) == PLUS)
23882
        {
23883
          offset += reg_size;
23884
          if (GET_CODE (XEXP (e, 0)) != REG
23885
              || REGNO (XEXP (e, 0)) != SP_REGNUM
23886
              || GET_CODE (XEXP (e, 1)) != CONST_INT
23887
              || offset != INTVAL (XEXP (e, 1)))
23888
            abort ();
23889
        }
23890
      else if (i != 1
23891
               || GET_CODE (e) != REG
23892
               || REGNO (e) != SP_REGNUM)
23893
        abort ();
23894
#endif
23895
    }
23896
  fprintf (asm_out_file, "}\n");
23897
}
23898
 
23899
/*  Emit unwind directives for a SET.  */
23900
 
23901
static void
23902
arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23903
{
23904
  rtx e0;
23905
  rtx e1;
23906
  unsigned reg;
23907
 
23908
  e0 = XEXP (p, 0);
23909
  e1 = XEXP (p, 1);
23910
  switch (GET_CODE (e0))
23911
    {
23912
    case MEM:
23913
      /* Pushing a single register.  */
23914
      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23915
          || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23916
          || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23917
        abort ();
23918
 
23919
      asm_fprintf (asm_out_file, "\t.save ");
23920
      if (IS_VFP_REGNUM (REGNO (e1)))
23921
        asm_fprintf(asm_out_file, "{d%d}\n",
23922
                    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23923
      else
23924
        asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23925
      break;
23926
 
23927
    case REG:
23928
      if (REGNO (e0) == SP_REGNUM)
23929
        {
23930
          /* A stack increment.  */
23931
          if (GET_CODE (e1) != PLUS
23932
              || GET_CODE (XEXP (e1, 0)) != REG
23933
              || REGNO (XEXP (e1, 0)) != SP_REGNUM
23934
              || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23935
            abort ();
23936
 
23937
          asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23938
                       -INTVAL (XEXP (e1, 1)));
23939
        }
23940
      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23941
        {
23942
          HOST_WIDE_INT offset;
23943
 
23944
          if (GET_CODE (e1) == PLUS)
23945
            {
23946
              if (GET_CODE (XEXP (e1, 0)) != REG
23947
                  || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23948
                abort ();
23949
              reg = REGNO (XEXP (e1, 0));
23950
              offset = INTVAL (XEXP (e1, 1));
23951
              asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23952
                           HARD_FRAME_POINTER_REGNUM, reg,
23953
                           offset);
23954
            }
23955
          else if (GET_CODE (e1) == REG)
23956
            {
23957
              reg = REGNO (e1);
23958
              asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23959
                           HARD_FRAME_POINTER_REGNUM, reg);
23960
            }
23961
          else
23962
            abort ();
23963
        }
23964
      else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23965
        {
23966
          /* Move from sp to reg.  */
23967
          asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23968
        }
23969
     else if (GET_CODE (e1) == PLUS
23970
              && GET_CODE (XEXP (e1, 0)) == REG
23971
              && REGNO (XEXP (e1, 0)) == SP_REGNUM
23972
              && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23973
        {
23974
          /* Set reg to offset from sp.  */
23975
          asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23976
                       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23977
        }
23978
      else
23979
        abort ();
23980
      break;
23981
 
23982
    default:
23983
      abort ();
23984
    }
23985
}
23986
 
23987
 
23988
/* Emit unwind directives for the given insn.  */
23989
 
23990
static void
23991
arm_unwind_emit (FILE * asm_out_file, rtx insn)
23992
{
23993
  rtx note, pat;
23994
  bool handled_one = false;
23995
 
23996
  if (arm_except_unwind_info (&global_options) != UI_TARGET)
23997
    return;
23998
 
23999
  if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24000
      && (TREE_NOTHROW (current_function_decl)
24001
          || crtl->all_throwers_are_sibcalls))
24002
    return;
24003
 
24004
  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24005
    return;
24006
 
24007
  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24008
    {
24009
      pat = XEXP (note, 0);
24010
      switch (REG_NOTE_KIND (note))
24011
        {
24012
        case REG_FRAME_RELATED_EXPR:
24013
          goto found;
24014
 
24015
        case REG_CFA_REGISTER:
24016
          if (pat == NULL)
24017
            {
24018
              pat = PATTERN (insn);
24019
              if (GET_CODE (pat) == PARALLEL)
24020
                pat = XVECEXP (pat, 0, 0);
24021
            }
24022
 
24023
          /* Only emitted for IS_STACKALIGN re-alignment.  */
24024
          {
24025
            rtx dest, src;
24026
            unsigned reg;
24027
 
24028
            src = SET_SRC (pat);
24029
            dest = SET_DEST (pat);
24030
 
24031
            gcc_assert (src == stack_pointer_rtx);
24032
            reg = REGNO (dest);
24033
            asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24034
                         reg + 0x90, reg);
24035
          }
24036
          handled_one = true;
24037
          break;
24038
 
24039
        case REG_CFA_DEF_CFA:
24040
        case REG_CFA_EXPRESSION:
24041
        case REG_CFA_ADJUST_CFA:
24042
        case REG_CFA_OFFSET:
24043
          /* ??? Only handling here what we actually emit.  */
24044
          gcc_unreachable ();
24045
 
24046
        default:
24047
          break;
24048
        }
24049
    }
24050
  if (handled_one)
24051
    return;
24052
  pat = PATTERN (insn);
24053
 found:
24054
 
24055
  switch (GET_CODE (pat))
24056
    {
24057
    case SET:
24058
      arm_unwind_emit_set (asm_out_file, pat);
24059
      break;
24060
 
24061
    case SEQUENCE:
24062
      /* Store multiple.  */
24063
      arm_unwind_emit_sequence (asm_out_file, pat);
24064
      break;
24065
 
24066
    default:
24067
      abort();
24068
    }
24069
}
24070
 
24071
 
24072
/* Output a reference from a function exception table to the type_info
24073
   object X.  The EABI specifies that the symbol should be relocated by
24074
   an R_ARM_TARGET2 relocation.  */
24075
 
24076
static bool
24077
arm_output_ttype (rtx x)
24078
{
24079
  fputs ("\t.word\t", asm_out_file);
24080
  output_addr_const (asm_out_file, x);
24081
  /* Use special relocations for symbol references.  */
24082
  if (GET_CODE (x) != CONST_INT)
24083
    fputs ("(TARGET2)", asm_out_file);
24084
  fputc ('\n', asm_out_file);
24085
 
24086
  return TRUE;
24087
}
24088
 
24089
/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
24090
 
24091
static void
24092
arm_asm_emit_except_personality (rtx personality)
24093
{
24094
  fputs ("\t.personality\t", asm_out_file);
24095
  output_addr_const (asm_out_file, personality);
24096
  fputc ('\n', asm_out_file);
24097
}
24098
 
24099
/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
24100
 
24101
static void
24102
arm_asm_init_sections (void)
24103
{
24104
  exception_section = get_unnamed_section (0, output_section_asm_op,
24105
                                           "\t.handlerdata");
24106
}
24107
#endif /* ARM_UNWIND_INFO */
24108
 
24109
/* Output unwind directives for the start/end of a function.  */
24110
 
24111
void
24112
arm_output_fn_unwind (FILE * f, bool prologue)
24113
{
24114
  if (arm_except_unwind_info (&global_options) != UI_TARGET)
24115
    return;
24116
 
24117
  if (prologue)
24118
    fputs ("\t.fnstart\n", f);
24119
  else
24120
    {
24121
      /* If this function will never be unwound, then mark it as such.
24122
         The came condition is used in arm_unwind_emit to suppress
24123
         the frame annotations.  */
24124
      if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24125
          && (TREE_NOTHROW (current_function_decl)
24126
              || crtl->all_throwers_are_sibcalls))
24127
        fputs("\t.cantunwind\n", f);
24128
 
24129
      fputs ("\t.fnend\n", f);
24130
    }
24131
}
24132
 
24133
static bool
24134
arm_emit_tls_decoration (FILE *fp, rtx x)
24135
{
24136
  enum tls_reloc reloc;
24137
  rtx val;
24138
 
24139
  val = XVECEXP (x, 0, 0);
24140
  reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24141
 
24142
  output_addr_const (fp, val);
24143
 
24144
  switch (reloc)
24145
    {
24146
    case TLS_GD32:
24147
      fputs ("(tlsgd)", fp);
24148
      break;
24149
    case TLS_LDM32:
24150
      fputs ("(tlsldm)", fp);
24151
      break;
24152
    case TLS_LDO32:
24153
      fputs ("(tlsldo)", fp);
24154
      break;
24155
    case TLS_IE32:
24156
      fputs ("(gottpoff)", fp);
24157
      break;
24158
    case TLS_LE32:
24159
      fputs ("(tpoff)", fp);
24160
      break;
24161
    case TLS_DESCSEQ:
24162
      fputs ("(tlsdesc)", fp);
24163
      break;
24164
    default:
24165
      gcc_unreachable ();
24166
    }
24167
 
24168
  switch (reloc)
24169
    {
24170
    case TLS_GD32:
24171
    case TLS_LDM32:
24172
    case TLS_IE32:
24173
    case TLS_DESCSEQ:
24174
      fputs (" + (. - ", fp);
24175
      output_addr_const (fp, XVECEXP (x, 0, 2));
24176
      /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24177
      fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24178
      output_addr_const (fp, XVECEXP (x, 0, 3));
24179
      fputc (')', fp);
24180
      break;
24181
    default:
24182
      break;
24183
    }
24184
 
24185
  return TRUE;
24186
}
24187
 
24188
/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
24189
 
24190
static void
24191
arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24192
{
24193
  gcc_assert (size == 4);
24194
  fputs ("\t.word\t", file);
24195
  output_addr_const (file, x);
24196
  fputs ("(tlsldo)", file);
24197
}
24198
 
24199
/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
24200
 
24201
static bool
24202
arm_output_addr_const_extra (FILE *fp, rtx x)
24203
{
24204
  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24205
    return arm_emit_tls_decoration (fp, x);
24206
  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24207
    {
24208
      char label[256];
24209
      int labelno = INTVAL (XVECEXP (x, 0, 0));
24210
 
24211
      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24212
      assemble_name_raw (fp, label);
24213
 
24214
      return TRUE;
24215
    }
24216
  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24217
    {
24218
      assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24219
      if (GOT_PCREL)
24220
        fputs ("+.", fp);
24221
      fputs ("-(", fp);
24222
      output_addr_const (fp, XVECEXP (x, 0, 0));
24223
      fputc (')', fp);
24224
      return TRUE;
24225
    }
24226
  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24227
    {
24228
      output_addr_const (fp, XVECEXP (x, 0, 0));
24229
      if (GOT_PCREL)
24230
        fputs ("+.", fp);
24231
      fputs ("-(", fp);
24232
      output_addr_const (fp, XVECEXP (x, 0, 1));
24233
      fputc (')', fp);
24234
      return TRUE;
24235
    }
24236
  else if (GET_CODE (x) == CONST_VECTOR)
24237
    return arm_emit_vector_const (fp, x);
24238
 
24239
  return FALSE;
24240
}
24241
 
24242
/* Output assembly for a shift instruction.
24243
   SET_FLAGS determines how the instruction modifies the condition codes.
24244
 
24245
   1 - Set condition codes.
24246
   2 - Use smallest instruction.  */
24247
const char *
24248
arm_output_shift(rtx * operands, int set_flags)
24249
{
24250
  char pattern[100];
24251
  static const char flag_chars[3] = {'?', '.', '!'};
24252
  const char *shift;
24253
  HOST_WIDE_INT val;
24254
  char c;
24255
 
24256
  c = flag_chars[set_flags];
24257
  if (TARGET_UNIFIED_ASM)
24258
    {
24259
      shift = shift_op(operands[3], &val);
24260
      if (shift)
24261
        {
24262
          if (val != -1)
24263
            operands[2] = GEN_INT(val);
24264
          sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24265
        }
24266
      else
24267
        sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24268
    }
24269
  else
24270
    sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24271
  output_asm_insn (pattern, operands);
24272
  return "";
24273
}
24274
 
24275
/* Output a Thumb-1 casesi dispatch sequence.  */
24276
const char *
24277
thumb1_output_casesi (rtx *operands)
24278
{
24279
  rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24280
 
24281
  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24282
 
24283
  switch (GET_MODE(diff_vec))
24284
    {
24285
    case QImode:
24286
      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24287
              "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24288
    case HImode:
24289
      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24290
              "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24291
    case SImode:
24292
      return "bl\t%___gnu_thumb1_case_si";
24293
    default:
24294
      gcc_unreachable ();
24295
    }
24296
}
24297
 
24298
/* Output a Thumb-2 casesi instruction.  */
24299
const char *
24300
thumb2_output_casesi (rtx *operands)
24301
{
24302
  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24303
 
24304
  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24305
 
24306
  output_asm_insn ("cmp\t%0, %1", operands);
24307
  output_asm_insn ("bhi\t%l3", operands);
24308
  switch (GET_MODE(diff_vec))
24309
    {
24310
    case QImode:
24311
      return "tbb\t[%|pc, %0]";
24312
    case HImode:
24313
      return "tbh\t[%|pc, %0, lsl #1]";
24314
    case SImode:
24315
      if (flag_pic)
24316
        {
24317
          output_asm_insn ("adr\t%4, %l2", operands);
24318
          output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24319
          output_asm_insn ("add\t%4, %4, %5", operands);
24320
          return "bx\t%4";
24321
        }
24322
      else
24323
        {
24324
          output_asm_insn ("adr\t%4, %l2", operands);
24325
          return "ldr\t%|pc, [%4, %0, lsl #2]";
24326
        }
24327
    default:
24328
      gcc_unreachable ();
24329
    }
24330
}
24331
 
24332
/* Most ARM cores are single issue, but some newer ones can dual issue.
24333
   The scheduler descriptions rely on this being correct.  */
24334
static int
24335
arm_issue_rate (void)
24336
{
24337
  switch (arm_tune)
24338
    {
24339
    case cortexa15:
24340
      return 3;
24341
 
24342
    case cortexr4:
24343
    case cortexr4f:
24344
    case cortexr5:
24345
    case genericv7a:
24346
    case cortexa5:
24347
    case cortexa8:
24348
    case cortexa9:
24349
    case fa726te:
24350
      return 2;
24351
 
24352
    default:
24353
      return 1;
24354
    }
24355
}
24356
 
24357
/* A table and a function to perform ARM-specific name mangling for
24358
   NEON vector types in order to conform to the AAPCS (see "Procedure
24359
   Call Standard for the ARM Architecture", Appendix A).  To qualify
24360
   for emission with the mangled names defined in that document, a
24361
   vector type must not only be of the correct mode but also be
24362
   composed of NEON vector element types (e.g. __builtin_neon_qi).  */
24363
typedef struct
24364
{
24365
  enum machine_mode mode;
24366
  const char *element_type_name;
24367
  const char *aapcs_name;
24368
} arm_mangle_map_entry;
24369
 
24370
static arm_mangle_map_entry arm_mangle_map[] = {
24371
  /* 64-bit containerized types.  */
24372
  { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
24373
  { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
24374
  { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
24375
  { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
24376
  { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
24377
  { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
24378
  { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
24379
  { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
24380
  { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
24381
  /* 128-bit containerized types.  */
24382
  { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
24383
  { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
24384
  { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
24385
  { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
24386
  { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
24387
  { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
24388
  { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
24389
  { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
24390
  { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
24391
  { VOIDmode, NULL, NULL }
24392
};
24393
 
24394
const char *
24395
arm_mangle_type (const_tree type)
24396
{
24397
  arm_mangle_map_entry *pos = arm_mangle_map;
24398
 
24399
  /* The ARM ABI documents (10th October 2008) say that "__va_list"
24400
     has to be managled as if it is in the "std" namespace.  */
24401
  if (TARGET_AAPCS_BASED
24402
      && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24403
    {
24404
      static bool warned;
24405
      if (!warned && warn_psabi && !in_system_header)
24406
        {
24407
          warned = true;
24408
          inform (input_location,
24409
                  "the mangling of %<va_list%> has changed in GCC 4.4");
24410
        }
24411
      return "St9__va_list";
24412
    }
24413
 
24414
  /* Half-precision float.  */
24415
  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24416
    return "Dh";
24417
 
24418
  if (TREE_CODE (type) != VECTOR_TYPE)
24419
    return NULL;
24420
 
24421
  /* Check the mode of the vector type, and the name of the vector
24422
     element type, against the table.  */
24423
  while (pos->mode != VOIDmode)
24424
    {
24425
      tree elt_type = TREE_TYPE (type);
24426
 
24427
      if (pos->mode == TYPE_MODE (type)
24428
          && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24429
          && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24430
                      pos->element_type_name))
24431
        return pos->aapcs_name;
24432
 
24433
      pos++;
24434
    }
24435
 
24436
  /* Use the default mangling for unrecognized (possibly user-defined)
24437
     vector types.  */
24438
  return NULL;
24439
}
24440
 
24441
/* Order of allocation of core registers for Thumb: this allocation is
24442
   written over the corresponding initial entries of the array
24443
   initialized with REG_ALLOC_ORDER.  We allocate all low registers
24444
   first.  Saving and restoring a low register is usually cheaper than
24445
   using a call-clobbered high register.  */
24446
 
24447
static const int thumb_core_reg_alloc_order[] =
24448
{
24449
   3,  2,  1,  0,  4,  5,  6,  7,
24450
  14, 12,  8,  9, 10, 11, 13, 15
24451
};
24452
 
24453
/* Adjust register allocation order when compiling for Thumb.  */
24454
 
24455
void
24456
arm_order_regs_for_local_alloc (void)
24457
{
24458
  const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24459
  memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24460
  if (TARGET_THUMB)
24461
    memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24462
            sizeof (thumb_core_reg_alloc_order));
24463
}
24464
 
24465
/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
24466
 
24467
bool
24468
arm_frame_pointer_required (void)
24469
{
24470
  return (cfun->has_nonlocal_label
24471
          || SUBTARGET_FRAME_POINTER_REQUIRED
24472
          || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24473
}
24474
 
24475
/* Only thumb1 can't support conditional execution, so return true if
24476
   the target is not thumb1.  */
24477
static bool
24478
arm_have_conditional_execution (void)
24479
{
24480
  return !TARGET_THUMB1;
24481
}
24482
 
24483
static unsigned int
24484
arm_autovectorize_vector_sizes (void)
24485
{
24486
  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24487
}
24488
 
24489
static bool
24490
arm_vector_alignment_reachable (const_tree type, bool is_packed)
24491
{
24492
  /* Vectors which aren't in packed structures will not be less aligned than
24493
     the natural alignment of their element type, so this is safe.  */
24494
  if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24495
    return !is_packed;
24496
 
24497
  return default_builtin_vector_alignment_reachable (type, is_packed);
24498
}
24499
 
24500
static bool
24501
arm_builtin_support_vector_misalignment (enum machine_mode mode,
24502
                                         const_tree type, int misalignment,
24503
                                         bool is_packed)
24504
{
24505
  if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24506
    {
24507
      HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24508
 
24509
      if (is_packed)
24510
        return align == 1;
24511
 
24512
      /* If the misalignment is unknown, we should be able to handle the access
24513
         so long as it is not to a member of a packed data structure.  */
24514
      if (misalignment == -1)
24515
        return true;
24516
 
24517
      /* Return true if the misalignment is a multiple of the natural alignment
24518
         of the vector's element type.  This is probably always going to be
24519
         true in practice, since we've already established that this isn't a
24520
         packed access.  */
24521
      return ((misalignment % align) == 0);
24522
    }
24523
 
24524
  return default_builtin_support_vector_misalignment (mode, type, misalignment,
24525
                                                      is_packed);
24526
}
24527
 
24528
static void
24529
arm_conditional_register_usage (void)
24530
{
24531
  int regno;
24532
 
24533
  if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24534
    {
24535
      for (regno = FIRST_FPA_REGNUM;
24536
           regno <= LAST_FPA_REGNUM; ++regno)
24537
        fixed_regs[regno] = call_used_regs[regno] = 1;
24538
    }
24539
 
24540
  if (TARGET_THUMB1 && optimize_size)
24541
    {
24542
      /* When optimizing for size on Thumb-1, it's better not
24543
        to use the HI regs, because of the overhead of
24544
        stacking them.  */
24545
      for (regno = FIRST_HI_REGNUM;
24546
           regno <= LAST_HI_REGNUM; ++regno)
24547
        fixed_regs[regno] = call_used_regs[regno] = 1;
24548
    }
24549
 
24550
  /* The link register can be clobbered by any branch insn,
24551
     but we have no way to track that at present, so mark
24552
     it as unavailable.  */
24553
  if (TARGET_THUMB1)
24554
    fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24555
 
24556
  if (TARGET_32BIT && TARGET_HARD_FLOAT)
24557
    {
24558
      if (TARGET_MAVERICK)
24559
        {
24560
          for (regno = FIRST_FPA_REGNUM;
24561
               regno <= LAST_FPA_REGNUM; ++ regno)
24562
            fixed_regs[regno] = call_used_regs[regno] = 1;
24563
          for (regno = FIRST_CIRRUS_FP_REGNUM;
24564
               regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24565
            {
24566
              fixed_regs[regno] = 0;
24567
              call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24568
            }
24569
        }
24570
      if (TARGET_VFP)
24571
        {
24572
          /* VFPv3 registers are disabled when earlier VFP
24573
             versions are selected due to the definition of
24574
             LAST_VFP_REGNUM.  */
24575
          for (regno = FIRST_VFP_REGNUM;
24576
               regno <= LAST_VFP_REGNUM; ++ regno)
24577
            {
24578
              fixed_regs[regno] = 0;
24579
              call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24580
                || regno >= FIRST_VFP_REGNUM + 32;
24581
            }
24582
        }
24583
    }
24584
 
24585
  if (TARGET_REALLY_IWMMXT)
24586
    {
24587
      regno = FIRST_IWMMXT_GR_REGNUM;
24588
      /* The 2002/10/09 revision of the XScale ABI has wCG0
24589
         and wCG1 as call-preserved registers.  The 2002/11/21
24590
         revision changed this so that all wCG registers are
24591
         scratch registers.  */
24592
      for (regno = FIRST_IWMMXT_GR_REGNUM;
24593
           regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24594
        fixed_regs[regno] = 0;
24595
      /* The XScale ABI has wR0 - wR9 as scratch registers,
24596
         the rest as call-preserved registers.  */
24597
      for (regno = FIRST_IWMMXT_REGNUM;
24598
           regno <= LAST_IWMMXT_REGNUM; ++ regno)
24599
        {
24600
          fixed_regs[regno] = 0;
24601
          call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24602
        }
24603
    }
24604
 
24605
  if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24606
    {
24607
      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24608
      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24609
    }
24610
  else if (TARGET_APCS_STACK)
24611
    {
24612
      fixed_regs[10]     = 1;
24613
      call_used_regs[10] = 1;
24614
    }
24615
  /* -mcaller-super-interworking reserves r11 for calls to
24616
     _interwork_r11_call_via_rN().  Making the register global
24617
     is an easy way of ensuring that it remains valid for all
24618
     calls.  */
24619
  if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24620
      || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24621
    {
24622
      fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24623
      call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24624
      if (TARGET_CALLER_INTERWORKING)
24625
        global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24626
    }
24627
  SUBTARGET_CONDITIONAL_REGISTER_USAGE
24628
}
24629
 
24630
static reg_class_t
24631
arm_preferred_rename_class (reg_class_t rclass)
24632
{
24633
  /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24634
     using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
24635
     and code size can be reduced.  */
24636
  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24637
    return LO_REGS;
24638
  else
24639
    return NO_REGS;
24640
}
24641
 
24642
/* Compute the atrribute "length" of insn "*push_multi".
24643
   So this function MUST be kept in sync with that insn pattern.  */
24644
int
24645
arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24646
{
24647
  int i, regno, hi_reg;
24648
  int num_saves = XVECLEN (parallel_op, 0);
24649
 
24650
  /* ARM mode.  */
24651
  if (TARGET_ARM)
24652
    return 4;
24653
  /* Thumb1 mode.  */
24654
  if (TARGET_THUMB1)
24655
    return 2;
24656
 
24657
  /* Thumb2 mode.  */
24658
  regno = REGNO (first_op);
24659
  hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24660
  for (i = 1; i < num_saves && !hi_reg; i++)
24661
    {
24662
      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24663
      hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24664
    }
24665
 
24666
  if (!hi_reg)
24667
    return 2;
24668
  return 4;
24669
}
24670
 
24671
/* Compute the number of instructions emitted by output_move_double.  */
24672
int
24673
arm_count_output_move_double_insns (rtx *operands)
24674
{
24675
  int count;
24676
  rtx ops[2];
24677
  /* output_move_double may modify the operands array, so call it
24678
     here on a copy of the array.  */
24679
  ops[0] = operands[0];
24680
  ops[1] = operands[1];
24681
  output_move_double (ops, false, &count);
24682
  return count;
24683
}
24684
 
24685
int
24686
vfp3_const_double_for_fract_bits (rtx operand)
24687
{
24688
  REAL_VALUE_TYPE r0;
24689
 
24690
  if (GET_CODE (operand) != CONST_DOUBLE)
24691
    return 0;
24692
 
24693
  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
24694
  if (exact_real_inverse (DFmode, &r0))
24695
    {
24696
      if (exact_real_truncate (DFmode, &r0))
24697
        {
24698
          HOST_WIDE_INT value = real_to_integer (&r0);
24699
          value = value & 0xffffffff;
24700
          if ((value != 0) && ( (value & (value - 1)) == 0))
24701
            return int_log2 (value);
24702
        }
24703
    }
24704
  return 0;
24705
}
24706
 
24707
/* Emit a memory barrier around an atomic sequence according to MODEL.  */
24708
 
24709
static void
24710
arm_pre_atomic_barrier (enum memmodel model)
24711
{
24712
  switch (model)
24713
    {
24714
    case MEMMODEL_RELAXED:
24715
    case MEMMODEL_CONSUME:
24716
    case MEMMODEL_ACQUIRE:
24717
      break;
24718
    case MEMMODEL_RELEASE:
24719
    case MEMMODEL_ACQ_REL:
24720
    case MEMMODEL_SEQ_CST:
24721
      emit_insn (gen_memory_barrier ());
24722
      break;
24723
    default:
24724
      gcc_unreachable ();
24725
    }
24726
}
24727
 
24728
static void
24729
arm_post_atomic_barrier (enum memmodel model)
24730
{
24731
  switch (model)
24732
    {
24733
    case MEMMODEL_RELAXED:
24734
    case MEMMODEL_CONSUME:
24735
    case MEMMODEL_RELEASE:
24736
      break;
24737
    case MEMMODEL_ACQUIRE:
24738
    case MEMMODEL_ACQ_REL:
24739
    case MEMMODEL_SEQ_CST:
24740
      emit_insn (gen_memory_barrier ());
24741
      break;
24742
    default:
24743
      gcc_unreachable ();
24744
    }
24745
}
24746
 
24747
/* Emit the load-exclusive and store-exclusive instructions.  */
24748
 
24749
static void
24750
arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
24751
{
24752
  rtx (*gen) (rtx, rtx);
24753
 
24754
  switch (mode)
24755
    {
24756
    case QImode: gen = gen_arm_load_exclusiveqi; break;
24757
    case HImode: gen = gen_arm_load_exclusivehi; break;
24758
    case SImode: gen = gen_arm_load_exclusivesi; break;
24759
    case DImode: gen = gen_arm_load_exclusivedi; break;
24760
    default:
24761
      gcc_unreachable ();
24762
    }
24763
 
24764
  emit_insn (gen (rval, mem));
24765
}
24766
 
24767
static void
24768
arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
24769
{
24770
  rtx (*gen) (rtx, rtx, rtx);
24771
 
24772
  switch (mode)
24773
    {
24774
    case QImode: gen = gen_arm_store_exclusiveqi; break;
24775
    case HImode: gen = gen_arm_store_exclusivehi; break;
24776
    case SImode: gen = gen_arm_store_exclusivesi; break;
24777
    case DImode: gen = gen_arm_store_exclusivedi; break;
24778
    default:
24779
      gcc_unreachable ();
24780
    }
24781
 
24782
  emit_insn (gen (bval, rval, mem));
24783
}
24784
 
24785
/* Mark the previous jump instruction as unlikely.  */
24786
 
24787
static void
24788
emit_unlikely_jump (rtx insn)
24789
{
24790
  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
24791
 
24792
  insn = emit_jump_insn (insn);
24793
  add_reg_note (insn, REG_BR_PROB, very_unlikely);
24794
}
24795
 
24796
/* Expand a compare and swap pattern.  */
24797
 
24798
void
24799
arm_expand_compare_and_swap (rtx operands[])
24800
{
24801
  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
24802
  enum machine_mode mode;
24803
  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
24804
 
24805
  bval = operands[0];
24806
  rval = operands[1];
24807
  mem = operands[2];
24808
  oldval = operands[3];
24809
  newval = operands[4];
24810
  is_weak = operands[5];
24811
  mod_s = operands[6];
24812
  mod_f = operands[7];
24813
  mode = GET_MODE (mem);
24814
 
24815
  switch (mode)
24816
    {
24817
    case QImode:
24818
    case HImode:
24819
      /* For narrow modes, we're going to perform the comparison in SImode,
24820
         so do the zero-extension now.  */
24821
      rval = gen_reg_rtx (SImode);
24822
      oldval = convert_modes (SImode, mode, oldval, true);
24823
      /* FALLTHRU */
24824
 
24825
    case SImode:
24826
      /* Force the value into a register if needed.  We waited until after
24827
         the zero-extension above to do this properly.  */
24828
      if (!arm_add_operand (oldval, mode))
24829
        oldval = force_reg (mode, oldval);
24830
      break;
24831
 
24832
    case DImode:
24833
      if (!cmpdi_operand (oldval, mode))
24834
        oldval = force_reg (mode, oldval);
24835
      break;
24836
 
24837
    default:
24838
      gcc_unreachable ();
24839
    }
24840
 
24841
  switch (mode)
24842
    {
24843
    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
24844
    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
24845
    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
24846
    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
24847
    default:
24848
      gcc_unreachable ();
24849
    }
24850
 
24851
  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
24852
 
24853
  if (mode == QImode || mode == HImode)
24854
    emit_move_insn (operands[1], gen_lowpart (mode, rval));
24855
 
24856
  /* In all cases, we arrange for success to be signaled by Z set.
24857
     This arrangement allows for the boolean result to be used directly
24858
     in a subsequent branch, post optimization.  */
24859
  x = gen_rtx_REG (CCmode, CC_REGNUM);
24860
  x = gen_rtx_EQ (SImode, x, const0_rtx);
24861
  emit_insn (gen_rtx_SET (VOIDmode, bval, x));
24862
}
24863
 
24864
/* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
24865
   another memory store between the load-exclusive and store-exclusive can
24866
   reset the monitor from Exclusive to Open state.  This means we must wait
24867
   until after reload to split the pattern, lest we get a register spill in
24868
   the middle of the atomic sequence.  */
24869
 
24870
void
24871
arm_split_compare_and_swap (rtx operands[])
24872
{
24873
  rtx rval, mem, oldval, newval, scratch;
24874
  enum machine_mode mode;
24875
  enum memmodel mod_s, mod_f;
24876
  bool is_weak;
24877
  rtx label1, label2, x, cond;
24878
 
24879
  rval = operands[0];
24880
  mem = operands[1];
24881
  oldval = operands[2];
24882
  newval = operands[3];
24883
  is_weak = (operands[4] != const0_rtx);
24884
  mod_s = (enum memmodel) INTVAL (operands[5]);
24885
  mod_f = (enum memmodel) INTVAL (operands[6]);
24886
  scratch = operands[7];
24887
  mode = GET_MODE (mem);
24888
 
24889
  arm_pre_atomic_barrier (mod_s);
24890
 
24891
  label1 = NULL_RTX;
24892
  if (!is_weak)
24893
    {
24894
      label1 = gen_label_rtx ();
24895
      emit_label (label1);
24896
    }
24897
  label2 = gen_label_rtx ();
24898
 
24899
  arm_emit_load_exclusive (mode, rval, mem);
24900
 
24901
  cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
24902
  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24903
  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24904
                            gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
24905
  emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24906
 
24907
  arm_emit_store_exclusive (mode, scratch, mem, newval);
24908
 
24909
  /* Weak or strong, we want EQ to be true for success, so that we
24910
     match the flags that we got from the compare above.  */
24911
  cond = gen_rtx_REG (CCmode, CC_REGNUM);
24912
  x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
24913
  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
24914
 
24915
  if (!is_weak)
24916
    {
24917
      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24918
      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24919
                                gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
24920
      emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24921
    }
24922
 
24923
  if (mod_f != MEMMODEL_RELAXED)
24924
    emit_label (label2);
24925
 
24926
  arm_post_atomic_barrier (mod_s);
24927
 
24928
  if (mod_f == MEMMODEL_RELAXED)
24929
    emit_label (label2);
24930
}
24931
 
24932
void
24933
arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
24934
                     rtx value, rtx model_rtx, rtx cond)
24935
{
24936
  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
24937
  enum machine_mode mode = GET_MODE (mem);
24938
  enum machine_mode wmode = (mode == DImode ? DImode : SImode);
24939
  rtx label, x;
24940
 
24941
  arm_pre_atomic_barrier (model);
24942
 
24943
  label = gen_label_rtx ();
24944
  emit_label (label);
24945
 
24946
  if (new_out)
24947
    new_out = gen_lowpart (wmode, new_out);
24948
  if (old_out)
24949
    old_out = gen_lowpart (wmode, old_out);
24950
  else
24951
    old_out = new_out;
24952
  value = simplify_gen_subreg (wmode, value, mode, 0);
24953
 
24954
  arm_emit_load_exclusive (mode, old_out, mem);
24955
 
24956
  switch (code)
24957
    {
24958
    case SET:
24959
      new_out = value;
24960
      break;
24961
 
24962
    case NOT:
24963
      x = gen_rtx_AND (wmode, old_out, value);
24964
      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24965
      x = gen_rtx_NOT (wmode, new_out);
24966
      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24967
      break;
24968
 
24969
    case MINUS:
24970
      if (CONST_INT_P (value))
24971
        {
24972
          value = GEN_INT (-INTVAL (value));
24973
          code = PLUS;
24974
        }
24975
      /* FALLTHRU */
24976
 
24977
    case PLUS:
24978
      if (mode == DImode)
24979
        {
24980
          /* DImode plus/minus need to clobber flags.  */
24981
          /* The adddi3 and subdi3 patterns are incorrectly written so that
24982
             they require matching operands, even when we could easily support
24983
             three operands.  Thankfully, this can be fixed up post-splitting,
24984
             as the individual add+adc patterns do accept three operands and
24985
             post-reload cprop can make these moves go away.  */
24986
          emit_move_insn (new_out, old_out);
24987
          if (code == PLUS)
24988
            x = gen_adddi3 (new_out, new_out, value);
24989
          else
24990
            x = gen_subdi3 (new_out, new_out, value);
24991
          emit_insn (x);
24992
          break;
24993
        }
24994
      /* FALLTHRU */
24995
 
24996
    default:
24997
      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
24998
      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24999
      break;
25000
    }
25001
 
25002
  arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25003
 
25004
  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25005
  emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25006
 
25007
  arm_post_atomic_barrier (model);
25008
}
25009
 
25010
#define MAX_VECT_LEN 16
25011
 
25012
struct expand_vec_perm_d
25013
{
25014
  rtx target, op0, op1;
25015
  unsigned char perm[MAX_VECT_LEN];
25016
  enum machine_mode vmode;
25017
  unsigned char nelt;
25018
  bool one_vector_p;
25019
  bool testing_p;
25020
};
25021
 
25022
/* Generate a variable permutation.  */
25023
 
25024
static void
25025
arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25026
{
25027
  enum machine_mode vmode = GET_MODE (target);
25028
  bool one_vector_p = rtx_equal_p (op0, op1);
25029
 
25030
  gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25031
  gcc_checking_assert (GET_MODE (op0) == vmode);
25032
  gcc_checking_assert (GET_MODE (op1) == vmode);
25033
  gcc_checking_assert (GET_MODE (sel) == vmode);
25034
  gcc_checking_assert (TARGET_NEON);
25035
 
25036
  if (one_vector_p)
25037
    {
25038
      if (vmode == V8QImode)
25039
        emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25040
      else
25041
        emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25042
    }
25043
  else
25044
    {
25045
      rtx pair;
25046
 
25047
      if (vmode == V8QImode)
25048
        {
25049
          pair = gen_reg_rtx (V16QImode);
25050
          emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25051
          pair = gen_lowpart (TImode, pair);
25052
          emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25053
        }
25054
      else
25055
        {
25056
          pair = gen_reg_rtx (OImode);
25057
          emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25058
          emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25059
        }
25060
    }
25061
}
25062
 
25063
void
25064
arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25065
{
25066
  enum machine_mode vmode = GET_MODE (target);
25067
  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25068
  bool one_vector_p = rtx_equal_p (op0, op1);
25069
  rtx rmask[MAX_VECT_LEN], mask;
25070
 
25071
  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
25072
     numbering of elements for big-endian, we must reverse the order.  */
25073
  gcc_checking_assert (!BYTES_BIG_ENDIAN);
25074
 
25075
  /* The VTBL instruction does not use a modulo index, so we must take care
25076
     of that ourselves.  */
25077
  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25078
  for (i = 0; i < nelt; ++i)
25079
    rmask[i] = mask;
25080
  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25081
  sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25082
 
25083
  arm_expand_vec_perm_1 (target, op0, op1, sel);
25084
}
25085
 
25086
/* Generate or test for an insn that supports a constant permutation.  */
25087
 
25088
/* Recognize patterns for the VUZP insns.  */
25089
 
25090
static bool
25091
arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25092
{
25093
  unsigned int i, odd, mask, nelt = d->nelt;
25094
  rtx out0, out1, in0, in1, x;
25095
  rtx (*gen)(rtx, rtx, rtx, rtx);
25096
 
25097
  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25098
    return false;
25099
 
25100
  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
25101
  if (d->perm[0] == 0)
25102
    odd = 0;
25103
  else if (d->perm[0] == 1)
25104
    odd = 1;
25105
  else
25106
    return false;
25107
  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25108
 
25109
  for (i = 0; i < nelt; i++)
25110
    {
25111
      unsigned elt = (i * 2 + odd) & mask;
25112
      if (d->perm[i] != elt)
25113
        return false;
25114
    }
25115
 
25116
  /* Success!  */
25117
  if (d->testing_p)
25118
    return true;
25119
 
25120
  switch (d->vmode)
25121
    {
25122
    case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25123
    case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
25124
    case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
25125
    case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
25126
    case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
25127
    case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
25128
    case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
25129
    case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
25130
    default:
25131
      gcc_unreachable ();
25132
    }
25133
 
25134
  in0 = d->op0;
25135
  in1 = d->op1;
25136
  if (BYTES_BIG_ENDIAN)
25137
    {
25138
      x = in0, in0 = in1, in1 = x;
25139
      odd = !odd;
25140
    }
25141
 
25142
  out0 = d->target;
25143
  out1 = gen_reg_rtx (d->vmode);
25144
  if (odd)
25145
    x = out0, out0 = out1, out1 = x;
25146
 
25147
  emit_insn (gen (out0, in0, in1, out1));
25148
  return true;
25149
}
25150
 
25151
/* Recognize patterns for the VZIP insns.  */
25152
 
25153
static bool
25154
arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25155
{
25156
  unsigned int i, high, mask, nelt = d->nelt;
25157
  rtx out0, out1, in0, in1, x;
25158
  rtx (*gen)(rtx, rtx, rtx, rtx);
25159
 
25160
  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25161
    return false;
25162
 
25163
  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
25164
  high = nelt / 2;
25165
  if (d->perm[0] == high)
25166
    ;
25167
  else if (d->perm[0] == 0)
25168
    high = 0;
25169
  else
25170
    return false;
25171
  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25172
 
25173
  for (i = 0; i < nelt / 2; i++)
25174
    {
25175
      unsigned elt = (i + high) & mask;
25176
      if (d->perm[i * 2] != elt)
25177
        return false;
25178
      elt = (elt + nelt) & mask;
25179
      if (d->perm[i * 2 + 1] != elt)
25180
        return false;
25181
    }
25182
 
25183
  /* Success!  */
25184
  if (d->testing_p)
25185
    return true;
25186
 
25187
  switch (d->vmode)
25188
    {
25189
    case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25190
    case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
25191
    case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
25192
    case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
25193
    case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
25194
    case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
25195
    case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
25196
    case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
25197
    default:
25198
      gcc_unreachable ();
25199
    }
25200
 
25201
  in0 = d->op0;
25202
  in1 = d->op1;
25203
  if (BYTES_BIG_ENDIAN)
25204
    {
25205
      x = in0, in0 = in1, in1 = x;
25206
      high = !high;
25207
    }
25208
 
25209
  out0 = d->target;
25210
  out1 = gen_reg_rtx (d->vmode);
25211
  if (high)
25212
    x = out0, out0 = out1, out1 = x;
25213
 
25214
  emit_insn (gen (out0, in0, in1, out1));
25215
  return true;
25216
}
25217
 
25218
/* Recognize patterns for the VREV insns.  */
25219
 
25220
static bool
25221
arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25222
{
25223
  unsigned int i, j, diff, nelt = d->nelt;
25224
  rtx (*gen)(rtx, rtx, rtx);
25225
 
25226
  if (!d->one_vector_p)
25227
    return false;
25228
 
25229
  diff = d->perm[0];
25230
  switch (diff)
25231
    {
25232
    case 7:
25233
      switch (d->vmode)
25234
        {
25235
        case V16QImode: gen = gen_neon_vrev64v16qi; break;
25236
        case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
25237
        default:
25238
          return false;
25239
        }
25240
      break;
25241
    case 3:
25242
      switch (d->vmode)
25243
        {
25244
        case V16QImode: gen = gen_neon_vrev32v16qi; break;
25245
        case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
25246
        case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
25247
        case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
25248
        default:
25249
          return false;
25250
        }
25251
      break;
25252
    case 1:
25253
      switch (d->vmode)
25254
        {
25255
        case V16QImode: gen = gen_neon_vrev16v16qi; break;
25256
        case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
25257
        case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
25258
        case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
25259
        case V4SImode:  gen = gen_neon_vrev64v4si;  break;
25260
        case V2SImode:  gen = gen_neon_vrev64v2si;  break;
25261
        case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
25262
        case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
25263
        default:
25264
          return false;
25265
        }
25266
      break;
25267
    default:
25268
      return false;
25269
    }
25270
 
25271
  for (i = 0; i < nelt; i += diff)
25272
    for (j = 0; j <= diff; j += 1)
25273
      if (d->perm[i + j] != i + diff - j)
25274
        return false;
25275
 
25276
  /* Success! */
25277
  if (d->testing_p)
25278
    return true;
25279
 
25280
  /* ??? The third operand is an artifact of the builtin infrastructure
25281
     and is ignored by the actual instruction.  */
25282
  emit_insn (gen (d->target, d->op0, const0_rtx));
25283
  return true;
25284
}
25285
 
25286
/* Recognize patterns for the VTRN insns.  */
25287
 
25288
static bool
25289
arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25290
{
25291
  unsigned int i, odd, mask, nelt = d->nelt;
25292
  rtx out0, out1, in0, in1, x;
25293
  rtx (*gen)(rtx, rtx, rtx, rtx);
25294
 
25295
  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25296
    return false;
25297
 
25298
  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
25299
  if (d->perm[0] == 0)
25300
    odd = 0;
25301
  else if (d->perm[0] == 1)
25302
    odd = 1;
25303
  else
25304
    return false;
25305
  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25306
 
25307
  for (i = 0; i < nelt; i += 2)
25308
    {
25309
      if (d->perm[i] != i + odd)
25310
        return false;
25311
      if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25312
        return false;
25313
    }
25314
 
25315
  /* Success!  */
25316
  if (d->testing_p)
25317
    return true;
25318
 
25319
  switch (d->vmode)
25320
    {
25321
    case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25322
    case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
25323
    case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
25324
    case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
25325
    case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
25326
    case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
25327
    case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
25328
    case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
25329
    default:
25330
      gcc_unreachable ();
25331
    }
25332
 
25333
  in0 = d->op0;
25334
  in1 = d->op1;
25335
  if (BYTES_BIG_ENDIAN)
25336
    {
25337
      x = in0, in0 = in1, in1 = x;
25338
      odd = !odd;
25339
    }
25340
 
25341
  out0 = d->target;
25342
  out1 = gen_reg_rtx (d->vmode);
25343
  if (odd)
25344
    x = out0, out0 = out1, out1 = x;
25345
 
25346
  emit_insn (gen (out0, in0, in1, out1));
25347
  return true;
25348
}
25349
 
25350
/* The NEON VTBL instruction is a fully variable permuation that's even
25351
   stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
25352
   is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
25353
   can do slightly better by expanding this as a constant where we don't
25354
   have to apply a mask.  */
25355
 
25356
static bool
25357
arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25358
{
25359
  rtx rperm[MAX_VECT_LEN], sel;
25360
  enum machine_mode vmode = d->vmode;
25361
  unsigned int i, nelt = d->nelt;
25362
 
25363
  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
25364
     numbering of elements for big-endian, we must reverse the order.  */
25365
  if (BYTES_BIG_ENDIAN)
25366
    return false;
25367
 
25368
  if (d->testing_p)
25369
    return true;
25370
 
25371
  /* Generic code will try constant permutation twice.  Once with the
25372
     original mode and again with the elements lowered to QImode.
25373
     So wait and don't do the selector expansion ourselves.  */
25374
  if (vmode != V8QImode && vmode != V16QImode)
25375
    return false;
25376
 
25377
  for (i = 0; i < nelt; ++i)
25378
    rperm[i] = GEN_INT (d->perm[i]);
25379
  sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25380
  sel = force_reg (vmode, sel);
25381
 
25382
  arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25383
  return true;
25384
}
25385
 
25386
static bool
25387
arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25388
{
25389
  /* The pattern matching functions above are written to look for a small
25390
     number to begin the sequence (0, 1, N/2).  If we begin with an index
25391
     from the second operand, we can swap the operands.  */
25392
  if (d->perm[0] >= d->nelt)
25393
    {
25394
      unsigned i, nelt = d->nelt;
25395
      rtx x;
25396
 
25397
      for (i = 0; i < nelt; ++i)
25398
        d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25399
 
25400
      x = d->op0;
25401
      d->op0 = d->op1;
25402
      d->op1 = x;
25403
    }
25404
 
25405
  if (TARGET_NEON)
25406
    {
25407
      if (arm_evpc_neon_vuzp (d))
25408
        return true;
25409
      if (arm_evpc_neon_vzip (d))
25410
        return true;
25411
      if (arm_evpc_neon_vrev (d))
25412
        return true;
25413
      if (arm_evpc_neon_vtrn (d))
25414
        return true;
25415
      return arm_evpc_neon_vtbl (d);
25416
    }
25417
  return false;
25418
}
25419
 
25420
/* Expand a vec_perm_const pattern.  */
25421
 
25422
bool
25423
arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25424
{
25425
  struct expand_vec_perm_d d;
25426
  int i, nelt, which;
25427
 
25428
  d.target = target;
25429
  d.op0 = op0;
25430
  d.op1 = op1;
25431
 
25432
  d.vmode = GET_MODE (target);
25433
  gcc_assert (VECTOR_MODE_P (d.vmode));
25434
  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25435
  d.testing_p = false;
25436
 
25437
  for (i = which = 0; i < nelt; ++i)
25438
    {
25439
      rtx e = XVECEXP (sel, 0, i);
25440
      int ei = INTVAL (e) & (2 * nelt - 1);
25441
      which |= (ei < nelt ? 1 : 2);
25442
      d.perm[i] = ei;
25443
    }
25444
 
25445
  switch (which)
25446
    {
25447
    default:
25448
      gcc_unreachable();
25449
 
25450
    case 3:
25451
      d.one_vector_p = false;
25452
      if (!rtx_equal_p (op0, op1))
25453
        break;
25454
 
25455
      /* The elements of PERM do not suggest that only the first operand
25456
         is used, but both operands are identical.  Allow easier matching
25457
         of the permutation by folding the permutation into the single
25458
         input vector.  */
25459
      /* FALLTHRU */
25460
    case 2:
25461
      for (i = 0; i < nelt; ++i)
25462
        d.perm[i] &= nelt - 1;
25463
      d.op0 = op1;
25464
      d.one_vector_p = true;
25465
      break;
25466
 
25467
    case 1:
25468
      d.op1 = op0;
25469
      d.one_vector_p = true;
25470
      break;
25471
    }
25472
 
25473
  return arm_expand_vec_perm_const_1 (&d);
25474
}
25475
 
25476
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
25477
 
25478
static bool
25479
arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25480
                                 const unsigned char *sel)
25481
{
25482
  struct expand_vec_perm_d d;
25483
  unsigned int i, nelt, which;
25484
  bool ret;
25485
 
25486
  d.vmode = vmode;
25487
  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25488
  d.testing_p = true;
25489
  memcpy (d.perm, sel, nelt);
25490
 
25491
  /* Categorize the set of elements in the selector.  */
25492
  for (i = which = 0; i < nelt; ++i)
25493
    {
25494
      unsigned char e = d.perm[i];
25495
      gcc_assert (e < 2 * nelt);
25496
      which |= (e < nelt ? 1 : 2);
25497
    }
25498
 
25499
  /* For all elements from second vector, fold the elements to first.  */
25500
  if (which == 2)
25501
    for (i = 0; i < nelt; ++i)
25502
      d.perm[i] -= nelt;
25503
 
25504
  /* Check whether the mask can be applied to the vector type.  */
25505
  d.one_vector_p = (which != 3);
25506
 
25507
  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25508
  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25509
  if (!d.one_vector_p)
25510
    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25511
 
25512
  start_sequence ();
25513
  ret = arm_expand_vec_perm_const_1 (&d);
25514
  end_sequence ();
25515
 
25516
  return ret;
25517
}
25518
 
25519
 
25520
#include "gt-arm.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.