OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [spu/] [spu.c] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
/* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2
   Free Software Foundation, Inc.
3
 
4
   This file is free software; you can redistribute it and/or modify it under
5
   the terms of the GNU General Public License as published by the Free
6
   Software Foundation; either version 3 of the License, or (at your option)
7
   any later version.
8
 
9
   This file is distributed in the hope that it will be useful, but WITHOUT
10
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
   for more details.
13
 
14
   You should have received a copy of the GNU General Public License
15
   along with GCC; see the file COPYING3.  If not see
16
   <http://www.gnu.org/licenses/>.  */
17
 
18
#include "config.h"
19
#include "system.h"
20
#include "coretypes.h"
21
#include "tm.h"
22
#include "rtl.h"
23
#include "regs.h"
24
#include "hard-reg-set.h"
25
#include "insn-config.h"
26
#include "conditions.h"
27
#include "insn-attr.h"
28
#include "flags.h"
29
#include "recog.h"
30
#include "obstack.h"
31
#include "tree.h"
32
#include "expr.h"
33
#include "optabs.h"
34
#include "except.h"
35
#include "function.h"
36
#include "output.h"
37
#include "basic-block.h"
38
#include "integrate.h"
39
#include "diagnostic-core.h"
40
#include "ggc.h"
41
#include "hashtab.h"
42
#include "tm_p.h"
43
#include "target.h"
44
#include "target-def.h"
45
#include "langhooks.h"
46
#include "reload.h"
47
#include "cfglayout.h"
48
#include "sched-int.h"
49
#include "params.h"
50
#include "machmode.h"
51
#include "gimple.h"
52
#include "tm-constrs.h"
53
#include "ddg.h"
54
#include "sbitmap.h"
55
#include "timevar.h"
56
#include "df.h"
57
 
58
/* Builtin types, data and prototypes. */
59
 
60
enum spu_builtin_type_index
61
{
62
  SPU_BTI_END_OF_PARAMS,
63
 
64
  /* We create new type nodes for these. */
65
  SPU_BTI_V16QI,
66
  SPU_BTI_V8HI,
67
  SPU_BTI_V4SI,
68
  SPU_BTI_V2DI,
69
  SPU_BTI_V4SF,
70
  SPU_BTI_V2DF,
71
  SPU_BTI_UV16QI,
72
  SPU_BTI_UV8HI,
73
  SPU_BTI_UV4SI,
74
  SPU_BTI_UV2DI,
75
 
76
  /* A 16-byte type. (Implemented with V16QI_type_node) */
77
  SPU_BTI_QUADWORD,
78
 
79
  /* These all correspond to intSI_type_node */
80
  SPU_BTI_7,
81
  SPU_BTI_S7,
82
  SPU_BTI_U7,
83
  SPU_BTI_S10,
84
  SPU_BTI_S10_4,
85
  SPU_BTI_U14,
86
  SPU_BTI_16,
87
  SPU_BTI_S16,
88
  SPU_BTI_S16_2,
89
  SPU_BTI_U16,
90
  SPU_BTI_U16_2,
91
  SPU_BTI_U18,
92
 
93
  /* These correspond to the standard types */
94
  SPU_BTI_INTQI,
95
  SPU_BTI_INTHI,
96
  SPU_BTI_INTSI,
97
  SPU_BTI_INTDI,
98
 
99
  SPU_BTI_UINTQI,
100
  SPU_BTI_UINTHI,
101
  SPU_BTI_UINTSI,
102
  SPU_BTI_UINTDI,
103
 
104
  SPU_BTI_FLOAT,
105
  SPU_BTI_DOUBLE,
106
 
107
  SPU_BTI_VOID,
108
  SPU_BTI_PTR,
109
 
110
  SPU_BTI_MAX
111
};
112
 
113
#define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
114
#define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
115
#define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
116
#define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
117
#define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
118
#define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
119
#define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
120
#define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
121
#define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
122
#define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
123
 
124
static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
125
 
126
struct spu_builtin_range
127
{
128
  int low, high;
129
};
130
 
131
static struct spu_builtin_range spu_builtin_range[] = {
132
  {-0x40ll, 0x7fll},            /* SPU_BTI_7     */
133
  {-0x40ll, 0x3fll},            /* SPU_BTI_S7    */
134
  {0ll, 0x7fll},                /* SPU_BTI_U7    */
135
  {-0x200ll, 0x1ffll},          /* SPU_BTI_S10   */
136
  {-0x2000ll, 0x1fffll},        /* SPU_BTI_S10_4 */
137
  {0ll, 0x3fffll},              /* SPU_BTI_U14   */
138
  {-0x8000ll, 0xffffll},        /* SPU_BTI_16    */
139
  {-0x8000ll, 0x7fffll},        /* SPU_BTI_S16   */
140
  {-0x20000ll, 0x1ffffll},      /* SPU_BTI_S16_2 */
141
  {0ll, 0xffffll},              /* SPU_BTI_U16   */
142
  {0ll, 0x3ffffll},             /* SPU_BTI_U16_2 */
143
  {0ll, 0x3ffffll},             /* SPU_BTI_U18   */
144
};
145
 
146
 
147
/*  Target specific attribute specifications.  */
148
char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
149
 
150
/*  Prototypes and external defs.  */
151
static void spu_option_override (void);
152
static void spu_init_builtins (void);
153
static tree spu_builtin_decl (unsigned, bool);
154
static bool spu_scalar_mode_supported_p (enum machine_mode mode);
155
static bool spu_vector_mode_supported_p (enum machine_mode mode);
156
static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
157
static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158
                                                 bool, addr_space_t);
159
static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160
static rtx get_pic_reg (void);
161
static int need_to_save_reg (int regno, int saving);
162
static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163
static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164
static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165
                               rtx scratch);
166
static void emit_nop_for_insn (rtx insn);
167
static bool insn_clobbers_hbr (rtx insn);
168
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
169
                                  int distance, sbitmap blocks);
170
static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171
                                    enum machine_mode dmode);
172
static rtx get_branch_target (rtx branch);
173
static void spu_machine_dependent_reorg (void);
174
static int spu_sched_issue_rate (void);
175
static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176
                                     int can_issue_more);
177
static int get_pipe (rtx insn);
178
static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
179
static void spu_sched_init_global (FILE *, int, int);
180
static void spu_sched_init (FILE *, int, int);
181
static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
182
static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183
                                         int flags,
184
                                         bool *no_add_attrs);
185
static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186
                                         int flags,
187
                                         bool *no_add_attrs);
188
static int spu_naked_function_p (tree func);
189
static bool spu_pass_by_reference (cumulative_args_t cum,
190
                                   enum machine_mode mode,
191
                                   const_tree type, bool named);
192
static rtx spu_function_arg (cumulative_args_t cum, enum machine_mode mode,
193
                             const_tree type, bool named);
194
static void spu_function_arg_advance (cumulative_args_t cum,
195
                                      enum machine_mode mode,
196
                                      const_tree type, bool named);
197
static tree spu_build_builtin_va_list (void);
198
static void spu_va_start (tree, rtx);
199
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
200
                                      gimple_seq * pre_p, gimple_seq * post_p);
201
static int store_with_one_insn_p (rtx mem);
202
static int mem_is_padded_component_ref (rtx x);
203
static int reg_aligned_for_addr (rtx x);
204
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
205
static void spu_asm_globalize_label (FILE * file, const char *name);
206
static bool spu_rtx_costs (rtx x, int code, int outer_code, int opno,
207
                           int *total, bool speed);
208
static bool spu_function_ok_for_sibcall (tree decl, tree exp);
209
static void spu_init_libfuncs (void);
210
static bool spu_return_in_memory (const_tree type, const_tree fntype);
211
static void fix_range (const char *);
212
static void spu_encode_section_info (tree, rtx, int);
213
static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
214
static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
215
                                              addr_space_t);
216
static tree spu_builtin_mul_widen_even (tree);
217
static tree spu_builtin_mul_widen_odd (tree);
218
static tree spu_builtin_mask_for_load (void);
219
static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
220
static bool spu_vector_alignment_reachable (const_tree, bool);
221
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222
static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223
static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224
static rtx spu_addr_space_convert (rtx, tree, tree);
225
static int spu_sms_res_mii (struct ddg *g);
226
static unsigned int spu_section_type_flags (tree, const char *, int);
227
static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
228
static void spu_unique_section (tree, int);
229
static rtx spu_expand_load (rtx, rtx, rtx, int);
230
static void spu_trampoline_init (rtx, tree, rtx);
231
static void spu_conditional_register_usage (void);
232
static bool spu_ref_may_alias_errno (ao_ref *);
233
static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234
                                 HOST_WIDE_INT, tree);
235
 
236
/* Which instruction set architecture to use.  */
237
int spu_arch;
238
/* Which cpu are we tuning for.  */
239
int spu_tune;
240
 
241
/* The hardware requires 8 insns between a hint and the branch it
242
   effects.  This variable describes how many rtl instructions the
243
   compiler needs to see before inserting a hint, and then the compiler
244
   will insert enough nops to make it at least 8 insns.  The default is
245
   for the compiler to allow up to 2 nops be emitted.  The nops are
246
   inserted in pairs, so we round down. */
247
int spu_hint_dist = (8*4) - (2*4);
248
 
249
enum spu_immediate {
250
  SPU_NONE,
251
  SPU_IL,
252
  SPU_ILA,
253
  SPU_ILH,
254
  SPU_ILHU,
255
  SPU_ORI,
256
  SPU_ORHI,
257
  SPU_ORBI,
258
  SPU_IOHL
259
};
260
enum immediate_class
261
{
262
  IC_POOL,                      /* constant pool */
263
  IC_IL1,                       /* one il* instruction */
264
  IC_IL2,                       /* both ilhu and iohl instructions */
265
  IC_IL1s,                      /* one il* instruction */
266
  IC_IL2s,                      /* both ilhu and iohl instructions */
267
  IC_FSMBI,                     /* the fsmbi instruction */
268
  IC_CPAT,                      /* one of the c*d instructions */
269
  IC_FSMBI2                     /* fsmbi plus 1 other instruction */
270
};
271
 
272
static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
273
static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
274
static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
275
static enum immediate_class classify_immediate (rtx op,
276
                                                enum machine_mode mode);
277
 
278
static enum machine_mode spu_unwind_word_mode (void);
279
 
280
static enum machine_mode
281
spu_libgcc_cmp_return_mode (void);
282
 
283
static enum machine_mode
284
spu_libgcc_shift_count_mode (void);
285
 
286
/* Pointer mode for __ea references.  */
287
#define EAmode (spu_ea_model != 32 ? DImode : SImode)
288
 
289
 
290
/*  Table of machine attributes.  */
291
static const struct attribute_spec spu_attribute_table[] =
292
{
293
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294
       affects_type_identity } */
295
  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute,
296
    false },
297
  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute,
298
    false },
299
  { NULL,             0, 0, false, false, false, NULL, false }
300
};
301
 
302
/*  TARGET overrides.  */
303
 
304
#undef TARGET_ADDR_SPACE_POINTER_MODE
305
#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
306
 
307
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
308
#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
309
 
310
#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311
#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312
  spu_addr_space_legitimate_address_p
313
 
314
#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315
#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
316
 
317
#undef TARGET_ADDR_SPACE_SUBSET_P
318
#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
319
 
320
#undef TARGET_ADDR_SPACE_CONVERT
321
#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
322
 
323
#undef TARGET_INIT_BUILTINS
324
#define TARGET_INIT_BUILTINS spu_init_builtins
325
#undef TARGET_BUILTIN_DECL
326
#define TARGET_BUILTIN_DECL spu_builtin_decl
327
 
328
#undef TARGET_EXPAND_BUILTIN
329
#define TARGET_EXPAND_BUILTIN spu_expand_builtin
330
 
331
#undef TARGET_UNWIND_WORD_MODE
332
#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
333
 
334
#undef TARGET_LEGITIMIZE_ADDRESS
335
#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
336
 
337
/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338
   and .quad for the debugger.  When it is known that the assembler is fixed,
339
   these can be removed.  */
340
#undef TARGET_ASM_UNALIGNED_SI_OP
341
#define TARGET_ASM_UNALIGNED_SI_OP      "\t.long\t"
342
 
343
#undef TARGET_ASM_ALIGNED_DI_OP
344
#define TARGET_ASM_ALIGNED_DI_OP        "\t.quad\t"
345
 
346
/* The .8byte directive doesn't seem to work well for a 32 bit
347
   architecture. */
348
#undef TARGET_ASM_UNALIGNED_DI_OP
349
#define TARGET_ASM_UNALIGNED_DI_OP NULL
350
 
351
#undef TARGET_RTX_COSTS
352
#define TARGET_RTX_COSTS spu_rtx_costs
353
 
354
#undef TARGET_ADDRESS_COST
355
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
356
 
357
#undef TARGET_SCHED_ISSUE_RATE
358
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
359
 
360
#undef TARGET_SCHED_INIT_GLOBAL
361
#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
362
 
363
#undef TARGET_SCHED_INIT
364
#define TARGET_SCHED_INIT spu_sched_init
365
 
366
#undef TARGET_SCHED_VARIABLE_ISSUE
367
#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
368
 
369
#undef TARGET_SCHED_REORDER
370
#define TARGET_SCHED_REORDER spu_sched_reorder
371
 
372
#undef TARGET_SCHED_REORDER2
373
#define TARGET_SCHED_REORDER2 spu_sched_reorder
374
 
375
#undef TARGET_SCHED_ADJUST_COST
376
#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
377
 
378
#undef  TARGET_ATTRIBUTE_TABLE
379
#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
380
 
381
#undef TARGET_ASM_INTEGER
382
#define TARGET_ASM_INTEGER spu_assemble_integer
383
 
384
#undef TARGET_SCALAR_MODE_SUPPORTED_P
385
#define TARGET_SCALAR_MODE_SUPPORTED_P  spu_scalar_mode_supported_p
386
 
387
#undef TARGET_VECTOR_MODE_SUPPORTED_P
388
#define TARGET_VECTOR_MODE_SUPPORTED_P  spu_vector_mode_supported_p
389
 
390
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
391
#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
392
 
393
#undef TARGET_ASM_GLOBALIZE_LABEL
394
#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
395
 
396
#undef TARGET_PASS_BY_REFERENCE
397
#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
398
 
399
#undef TARGET_FUNCTION_ARG
400
#define TARGET_FUNCTION_ARG spu_function_arg
401
 
402
#undef TARGET_FUNCTION_ARG_ADVANCE
403
#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
404
 
405
#undef TARGET_MUST_PASS_IN_STACK
406
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
 
408
#undef TARGET_BUILD_BUILTIN_VA_LIST
409
#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
410
 
411
#undef TARGET_EXPAND_BUILTIN_VA_START
412
#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
413
 
414
static void spu_setup_incoming_varargs (cumulative_args_t cum,
415
                                        enum machine_mode mode,
416
                                        tree type, int *pretend_size,
417
                                        int no_rtl);
418
#undef TARGET_SETUP_INCOMING_VARARGS
419
#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
420
 
421
#undef TARGET_MACHINE_DEPENDENT_REORG
422
#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
423
 
424
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
425
#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
426
 
427
#undef TARGET_INIT_LIBFUNCS
428
#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
429
 
430
#undef TARGET_RETURN_IN_MEMORY
431
#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
432
 
433
#undef  TARGET_ENCODE_SECTION_INFO
434
#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
435
 
436
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
437
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
438
 
439
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
440
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
441
 
442
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
443
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
444
 
445
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
446
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
447
 
448
#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
449
#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
450
 
451
#undef TARGET_LIBGCC_CMP_RETURN_MODE
452
#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
453
 
454
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455
#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
456
 
457
#undef TARGET_SCHED_SMS_RES_MII
458
#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
459
 
460
#undef TARGET_SECTION_TYPE_FLAGS
461
#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
462
 
463
#undef TARGET_ASM_SELECT_SECTION
464
#define TARGET_ASM_SELECT_SECTION  spu_select_section
465
 
466
#undef TARGET_ASM_UNIQUE_SECTION
467
#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
468
 
469
#undef TARGET_LEGITIMATE_ADDRESS_P
470
#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
471
 
472
#undef TARGET_LEGITIMATE_CONSTANT_P
473
#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
474
 
475
#undef TARGET_TRAMPOLINE_INIT
476
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
477
 
478
#undef TARGET_OPTION_OVERRIDE
479
#define TARGET_OPTION_OVERRIDE spu_option_override
480
 
481
#undef TARGET_CONDITIONAL_REGISTER_USAGE
482
#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
483
 
484
#undef TARGET_REF_MAY_ALIAS_ERRNO
485
#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
486
 
487
#undef TARGET_ASM_OUTPUT_MI_THUNK
488
#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
489
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
490
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
491
 
492
/* Variable tracking should be run after all optimizations which
493
   change order of insns.  It also needs a valid CFG.  */
494
#undef TARGET_DELAY_VARTRACK
495
#define TARGET_DELAY_VARTRACK true
496
 
497
struct gcc_target targetm = TARGET_INITIALIZER;
498
 
499
/* Define the structure for the machine field in struct function.  */
500
struct GTY(()) machine_function
501
{
502
  /* Register to use for PIC accesses.  */
503
  rtx pic_reg;
504
};
505
 
506
/* How to allocate a 'struct machine_function'.  */
507
static struct machine_function *
508
spu_init_machine_status (void)
509
{
510
  return ggc_alloc_cleared_machine_function ();
511
}
512
 
513
/* Implement TARGET_OPTION_OVERRIDE.  */
514
static void
515
spu_option_override (void)
516
{
517
  /* Set up function hooks.  */
518
  init_machine_status = spu_init_machine_status;
519
 
520
  /* Small loops will be unpeeled at -O3.  For SPU it is more important
521
     to keep code small by default.  */
522
  if (!flag_unroll_loops && !flag_peel_loops)
523
    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
524
                           global_options.x_param_values,
525
                           global_options_set.x_param_values);
526
 
527
  flag_omit_frame_pointer = 1;
528
 
529
  /* Functions must be 8 byte aligned so we correctly handle dual issue */
530
  if (align_functions < 8)
531
    align_functions = 8;
532
 
533
  spu_hint_dist = 8*4 - spu_max_nops*4;
534
  if (spu_hint_dist < 0)
535
    spu_hint_dist = 0;
536
 
537
  if (spu_fixed_range_string)
538
    fix_range (spu_fixed_range_string);
539
 
540
  /* Determine processor architectural level.  */
541
  if (spu_arch_string)
542
    {
543
      if (strcmp (&spu_arch_string[0], "cell") == 0)
544
        spu_arch = PROCESSOR_CELL;
545
      else if (strcmp (&spu_arch_string[0], "celledp") == 0)
546
        spu_arch = PROCESSOR_CELLEDP;
547
      else
548
        error ("bad value (%s) for -march= switch", spu_arch_string);
549
    }
550
 
551
  /* Determine processor to tune for.  */
552
  if (spu_tune_string)
553
    {
554
      if (strcmp (&spu_tune_string[0], "cell") == 0)
555
        spu_tune = PROCESSOR_CELL;
556
      else if (strcmp (&spu_tune_string[0], "celledp") == 0)
557
        spu_tune = PROCESSOR_CELLEDP;
558
      else
559
        error ("bad value (%s) for -mtune= switch", spu_tune_string);
560
    }
561
 
562
  /* Change defaults according to the processor architecture.  */
563
  if (spu_arch == PROCESSOR_CELLEDP)
564
    {
565
      /* If no command line option has been otherwise specified, change
566
         the default to -mno-safe-hints on celledp -- only the original
567
         Cell/B.E. processors require this workaround.  */
568
      if (!(target_flags_explicit & MASK_SAFE_HINTS))
569
        target_flags &= ~MASK_SAFE_HINTS;
570
    }
571
 
572
  REAL_MODE_FORMAT (SFmode) = &spu_single_format;
573
}
574
 
575
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
576
   struct attribute_spec.handler.  */
577
 
578
/* True if MODE is valid for the target.  By "valid", we mean able to
579
   be manipulated in non-trivial ways.  In particular, this means all
580
   the arithmetic is supported.  */
581
static bool
582
spu_scalar_mode_supported_p (enum machine_mode mode)
583
{
584
  switch (mode)
585
    {
586
    case QImode:
587
    case HImode:
588
    case SImode:
589
    case SFmode:
590
    case DImode:
591
    case TImode:
592
    case DFmode:
593
      return true;
594
 
595
    default:
596
      return false;
597
    }
598
}
599
 
600
/* Similarly for vector modes.  "Supported" here is less strict.  At
601
   least some operations are supported; need to check optabs or builtins
602
   for further details.  */
603
static bool
604
spu_vector_mode_supported_p (enum machine_mode mode)
605
{
606
  switch (mode)
607
    {
608
    case V16QImode:
609
    case V8HImode:
610
    case V4SImode:
611
    case V2DImode:
612
    case V4SFmode:
613
    case V2DFmode:
614
      return true;
615
 
616
    default:
617
      return false;
618
    }
619
}
620
 
621
/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
622
   least significant bytes of the outer mode.  This function returns
623
   TRUE for the SUBREG's where this is correct.  */
624
int
625
valid_subreg (rtx op)
626
{
627
  enum machine_mode om = GET_MODE (op);
628
  enum machine_mode im = GET_MODE (SUBREG_REG (op));
629
  return om != VOIDmode && im != VOIDmode
630
    && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
631
        || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
632
        || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
633
}
634
 
635
/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
636
   and adjust the start offset.  */
637
static rtx
638
adjust_operand (rtx op, HOST_WIDE_INT * start)
639
{
640
  enum machine_mode mode;
641
  int op_size;
642
  /* Strip any paradoxical SUBREG.  */
643
  if (GET_CODE (op) == SUBREG
644
      && (GET_MODE_BITSIZE (GET_MODE (op))
645
          > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
646
    {
647
      if (start)
648
        *start -=
649
          GET_MODE_BITSIZE (GET_MODE (op)) -
650
          GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
651
      op = SUBREG_REG (op);
652
    }
653
  /* If it is smaller than SI, assure a SUBREG */
654
  op_size = GET_MODE_BITSIZE (GET_MODE (op));
655
  if (op_size < 32)
656
    {
657
      if (start)
658
        *start += 32 - op_size;
659
      op_size = 32;
660
    }
661
  /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
662
  mode = mode_for_size (op_size, MODE_INT, 0);
663
  if (mode != GET_MODE (op))
664
    op = gen_rtx_SUBREG (mode, op, 0);
665
  return op;
666
}
667
 
668
void
669
spu_expand_extv (rtx ops[], int unsignedp)
670
{
671
  rtx dst = ops[0], src = ops[1];
672
  HOST_WIDE_INT width = INTVAL (ops[2]);
673
  HOST_WIDE_INT start = INTVAL (ops[3]);
674
  HOST_WIDE_INT align_mask;
675
  rtx s0, s1, mask, r0;
676
 
677
  gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
678
 
679
  if (MEM_P (src))
680
    {
681
      /* First, determine if we need 1 TImode load or 2.  We need only 1
682
         if the bits being extracted do not cross the alignment boundary
683
         as determined by the MEM and its address. */
684
 
685
      align_mask = -MEM_ALIGN (src);
686
      if ((start & align_mask) == ((start + width - 1) & align_mask))
687
        {
688
          /* Alignment is sufficient for 1 load. */
689
          s0 = gen_reg_rtx (TImode);
690
          r0 = spu_expand_load (s0, 0, src, start / 8);
691
          start &= 7;
692
          if (r0)
693
            emit_insn (gen_rotqby_ti (s0, s0, r0));
694
        }
695
      else
696
        {
697
          /* Need 2 loads. */
698
          s0 = gen_reg_rtx (TImode);
699
          s1 = gen_reg_rtx (TImode);
700
          r0 = spu_expand_load (s0, s1, src, start / 8);
701
          start &= 7;
702
 
703
          gcc_assert (start + width <= 128);
704
          if (r0)
705
            {
706
              rtx r1 = gen_reg_rtx (SImode);
707
              mask = gen_reg_rtx (TImode);
708
              emit_move_insn (mask, GEN_INT (-1));
709
              emit_insn (gen_rotqby_ti (s0, s0, r0));
710
              emit_insn (gen_rotqby_ti (s1, s1, r0));
711
              if (GET_CODE (r0) == CONST_INT)
712
                r1 = GEN_INT (INTVAL (r0) & 15);
713
              else
714
                emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
715
              emit_insn (gen_shlqby_ti (mask, mask, r1));
716
              emit_insn (gen_selb (s0, s1, s0, mask));
717
            }
718
        }
719
 
720
    }
721
  else if (GET_CODE (src) == SUBREG)
722
    {
723
      rtx r = SUBREG_REG (src);
724
      gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
725
      s0 = gen_reg_rtx (TImode);
726
      if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
727
        emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
728
      else
729
        emit_move_insn (s0, src);
730
    }
731
  else
732
    {
733
      gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
734
      s0 = gen_reg_rtx (TImode);
735
      emit_move_insn (s0, src);
736
    }
737
 
738
  /* Now s0 is TImode and contains the bits to extract at start. */
739
 
740
  if (start)
741
    emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
742
 
743
  if (128 - width)
744
    s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
745
 
746
  emit_move_insn (dst, s0);
747
}
748
 
749
void
750
spu_expand_insv (rtx ops[])
751
{
752
  HOST_WIDE_INT width = INTVAL (ops[1]);
753
  HOST_WIDE_INT start = INTVAL (ops[2]);
754
  HOST_WIDE_INT maskbits;
755
  enum machine_mode dst_mode;
756
  rtx dst = ops[0], src = ops[3];
757
  int dst_size;
758
  rtx mask;
759
  rtx shift_reg;
760
  int shift;
761
 
762
 
763
  if (GET_CODE (ops[0]) == MEM)
764
    dst = gen_reg_rtx (TImode);
765
  else
766
    dst = adjust_operand (dst, &start);
767
  dst_mode = GET_MODE (dst);
768
  dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
769
 
770
  if (CONSTANT_P (src))
771
    {
772
      enum machine_mode m =
773
        (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
774
      src = force_reg (m, convert_to_mode (m, src, 0));
775
    }
776
  src = adjust_operand (src, 0);
777
 
778
  mask = gen_reg_rtx (dst_mode);
779
  shift_reg = gen_reg_rtx (dst_mode);
780
  shift = dst_size - start - width;
781
 
782
  /* It's not safe to use subreg here because the compiler assumes
783
     that the SUBREG_REG is right justified in the SUBREG. */
784
  convert_move (shift_reg, src, 1);
785
 
786
  if (shift > 0)
787
    {
788
      switch (dst_mode)
789
        {
790
        case SImode:
791
          emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
792
          break;
793
        case DImode:
794
          emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
795
          break;
796
        case TImode:
797
          emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
798
          break;
799
        default:
800
          abort ();
801
        }
802
    }
803
  else if (shift < 0)
804
    abort ();
805
 
806
  switch (dst_size)
807
    {
808
    case 32:
809
      maskbits = (-1ll << (32 - width - start));
810
      if (start)
811
        maskbits += (1ll << (32 - start));
812
      emit_move_insn (mask, GEN_INT (maskbits));
813
      break;
814
    case 64:
815
      maskbits = (-1ll << (64 - width - start));
816
      if (start)
817
        maskbits += (1ll << (64 - start));
818
      emit_move_insn (mask, GEN_INT (maskbits));
819
      break;
820
    case 128:
821
      {
822
        unsigned char arr[16];
823
        int i = start / 8;
824
        memset (arr, 0, sizeof (arr));
825
        arr[i] = 0xff >> (start & 7);
826
        for (i++; i <= (start + width - 1) / 8; i++)
827
          arr[i] = 0xff;
828
        arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
829
        emit_move_insn (mask, array_to_constant (TImode, arr));
830
      }
831
      break;
832
    default:
833
      abort ();
834
    }
835
  if (GET_CODE (ops[0]) == MEM)
836
    {
837
      rtx low = gen_reg_rtx (SImode);
838
      rtx rotl = gen_reg_rtx (SImode);
839
      rtx mask0 = gen_reg_rtx (TImode);
840
      rtx addr;
841
      rtx addr0;
842
      rtx addr1;
843
      rtx mem;
844
 
845
      addr = force_reg (Pmode, XEXP (ops[0], 0));
846
      addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
847
      emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
848
      emit_insn (gen_negsi2 (rotl, low));
849
      emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
850
      emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
851
      mem = change_address (ops[0], TImode, addr0);
852
      set_mem_alias_set (mem, 0);
853
      emit_move_insn (dst, mem);
854
      emit_insn (gen_selb (dst, dst, shift_reg, mask0));
855
      if (start + width > MEM_ALIGN (ops[0]))
856
        {
857
          rtx shl = gen_reg_rtx (SImode);
858
          rtx mask1 = gen_reg_rtx (TImode);
859
          rtx dst1 = gen_reg_rtx (TImode);
860
          rtx mem1;
861
          addr1 = plus_constant (addr, 16);
862
          addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
863
          emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
864
          emit_insn (gen_shlqby_ti (mask1, mask, shl));
865
          mem1 = change_address (ops[0], TImode, addr1);
866
          set_mem_alias_set (mem1, 0);
867
          emit_move_insn (dst1, mem1);
868
          emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
869
          emit_move_insn (mem1, dst1);
870
        }
871
      emit_move_insn (mem, dst);
872
    }
873
  else
874
    emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
875
}
876
 
877
 
878
int
879
spu_expand_block_move (rtx ops[])
880
{
881
  HOST_WIDE_INT bytes, align, offset;
882
  rtx src, dst, sreg, dreg, target;
883
  int i;
884
  if (GET_CODE (ops[2]) != CONST_INT
885
      || GET_CODE (ops[3]) != CONST_INT
886
      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
887
    return 0;
888
 
889
  bytes = INTVAL (ops[2]);
890
  align = INTVAL (ops[3]);
891
 
892
  if (bytes <= 0)
893
    return 1;
894
 
895
  dst = ops[0];
896
  src = ops[1];
897
 
898
  if (align == 16)
899
    {
900
      for (offset = 0; offset + 16 <= bytes; offset += 16)
901
        {
902
          dst = adjust_address (ops[0], V16QImode, offset);
903
          src = adjust_address (ops[1], V16QImode, offset);
904
          emit_move_insn (dst, src);
905
        }
906
      if (offset < bytes)
907
        {
908
          rtx mask;
909
          unsigned char arr[16] = { 0 };
910
          for (i = 0; i < bytes - offset; i++)
911
            arr[i] = 0xff;
912
          dst = adjust_address (ops[0], V16QImode, offset);
913
          src = adjust_address (ops[1], V16QImode, offset);
914
          mask = gen_reg_rtx (V16QImode);
915
          sreg = gen_reg_rtx (V16QImode);
916
          dreg = gen_reg_rtx (V16QImode);
917
          target = gen_reg_rtx (V16QImode);
918
          emit_move_insn (mask, array_to_constant (V16QImode, arr));
919
          emit_move_insn (dreg, dst);
920
          emit_move_insn (sreg, src);
921
          emit_insn (gen_selb (target, dreg, sreg, mask));
922
          emit_move_insn (dst, target);
923
        }
924
      return 1;
925
    }
926
  return 0;
927
}
928
 
929
enum spu_comp_code
930
{ SPU_EQ, SPU_GT, SPU_GTU };
931
 
932
int spu_comp_icode[12][3] = {
933
 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
934
 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
935
 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
936
 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
937
 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
938
 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
939
 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
940
 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
941
 {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
942
 {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
943
 {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
944
 {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
945
};
946
 
947
/* Generate a compare for CODE.  Return a brand-new rtx that represents
948
   the result of the compare.   GCC can figure this out too if we don't
949
   provide all variations of compares, but GCC always wants to use
950
   WORD_MODE, we can generate better code in most cases if we do it
951
   ourselves.  */
952
void
953
spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
954
{
955
  int reverse_compare = 0;
956
  int reverse_test = 0;
957
  rtx compare_result, eq_result;
958
  rtx comp_rtx, eq_rtx;
959
  enum machine_mode comp_mode;
960
  enum machine_mode op_mode;
961
  enum spu_comp_code scode, eq_code;
962
  enum insn_code ior_code;
963
  enum rtx_code code = GET_CODE (cmp);
964
  rtx op0 = XEXP (cmp, 0);
965
  rtx op1 = XEXP (cmp, 1);
966
  int index;
967
  int eq_test = 0;
968
 
969
  /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
970
     and so on, to keep the constant in operand 1. */
971
  if (GET_CODE (op1) == CONST_INT)
972
    {
973
      HOST_WIDE_INT val = INTVAL (op1) - 1;
974
      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
975
        switch (code)
976
          {
977
          case GE:
978
            op1 = GEN_INT (val);
979
            code = GT;
980
            break;
981
          case LT:
982
            op1 = GEN_INT (val);
983
            code = LE;
984
            break;
985
          case GEU:
986
            op1 = GEN_INT (val);
987
            code = GTU;
988
            break;
989
          case LTU:
990
            op1 = GEN_INT (val);
991
            code = LEU;
992
            break;
993
          default:
994
            break;
995
          }
996
    }
997
 
998
  /* However, if we generate an integer result, performing a reverse test
999
     would require an extra negation, so avoid that where possible.  */
1000
  if (GET_CODE (op1) == CONST_INT && is_set == 1)
1001
    {
1002
      HOST_WIDE_INT val = INTVAL (op1) + 1;
1003
      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
1004
        switch (code)
1005
          {
1006
          case LE:
1007
            op1 = GEN_INT (val);
1008
            code = LT;
1009
            break;
1010
          case LEU:
1011
            op1 = GEN_INT (val);
1012
            code = LTU;
1013
            break;
1014
          default:
1015
            break;
1016
          }
1017
    }
1018
 
1019
  comp_mode = SImode;
1020
  op_mode = GET_MODE (op0);
1021
 
1022
  switch (code)
1023
    {
1024
    case GE:
1025
      scode = SPU_GT;
1026
      if (HONOR_NANS (op_mode))
1027
        {
1028
          reverse_compare = 0;
1029
          reverse_test = 0;
1030
          eq_test = 1;
1031
          eq_code = SPU_EQ;
1032
        }
1033
      else
1034
        {
1035
          reverse_compare = 1;
1036
          reverse_test = 1;
1037
        }
1038
      break;
1039
    case LE:
1040
      scode = SPU_GT;
1041
      if (HONOR_NANS (op_mode))
1042
        {
1043
          reverse_compare = 1;
1044
          reverse_test = 0;
1045
          eq_test = 1;
1046
          eq_code = SPU_EQ;
1047
        }
1048
      else
1049
        {
1050
          reverse_compare = 0;
1051
          reverse_test = 1;
1052
        }
1053
      break;
1054
    case LT:
1055
      reverse_compare = 1;
1056
      reverse_test = 0;
1057
      scode = SPU_GT;
1058
      break;
1059
    case GEU:
1060
      reverse_compare = 1;
1061
      reverse_test = 1;
1062
      scode = SPU_GTU;
1063
      break;
1064
    case LEU:
1065
      reverse_compare = 0;
1066
      reverse_test = 1;
1067
      scode = SPU_GTU;
1068
      break;
1069
    case LTU:
1070
      reverse_compare = 1;
1071
      reverse_test = 0;
1072
      scode = SPU_GTU;
1073
      break;
1074
    case NE:
1075
      reverse_compare = 0;
1076
      reverse_test = 1;
1077
      scode = SPU_EQ;
1078
      break;
1079
 
1080
    case EQ:
1081
      scode = SPU_EQ;
1082
      break;
1083
    case GT:
1084
      scode = SPU_GT;
1085
      break;
1086
    case GTU:
1087
      scode = SPU_GTU;
1088
      break;
1089
    default:
1090
      scode = SPU_EQ;
1091
      break;
1092
    }
1093
 
1094
  switch (op_mode)
1095
    {
1096
    case QImode:
1097
      index = 0;
1098
      comp_mode = QImode;
1099
      break;
1100
    case HImode:
1101
      index = 1;
1102
      comp_mode = HImode;
1103
      break;
1104
    case SImode:
1105
      index = 2;
1106
      break;
1107
    case DImode:
1108
      index = 3;
1109
      break;
1110
    case TImode:
1111
      index = 4;
1112
      break;
1113
    case SFmode:
1114
      index = 5;
1115
      break;
1116
    case DFmode:
1117
      index = 6;
1118
      break;
1119
    case V16QImode:
1120
      index = 7;
1121
      comp_mode = op_mode;
1122
      break;
1123
    case V8HImode:
1124
      index = 8;
1125
      comp_mode = op_mode;
1126
      break;
1127
    case V4SImode:
1128
      index = 9;
1129
      comp_mode = op_mode;
1130
      break;
1131
    case V4SFmode:
1132
      index = 10;
1133
      comp_mode = V4SImode;
1134
      break;
1135
    case V2DFmode:
1136
      index = 11;
1137
      comp_mode = V2DImode;
1138
      break;
1139
    case V2DImode:
1140
    default:
1141
      abort ();
1142
    }
1143
 
1144
  if (GET_MODE (op1) == DFmode
1145
      && (scode != SPU_GT && scode != SPU_EQ))
1146
    abort ();
1147
 
1148
  if (is_set == 0 && op1 == const0_rtx
1149
      && (GET_MODE (op0) == SImode
1150
          || GET_MODE (op0) == HImode
1151
          || GET_MODE (op0) == QImode) && scode == SPU_EQ)
1152
    {
1153
      /* Don't need to set a register with the result when we are
1154
         comparing against zero and branching. */
1155
      reverse_test = !reverse_test;
1156
      compare_result = op0;
1157
    }
1158
  else
1159
    {
1160
      compare_result = gen_reg_rtx (comp_mode);
1161
 
1162
      if (reverse_compare)
1163
        {
1164
          rtx t = op1;
1165
          op1 = op0;
1166
          op0 = t;
1167
        }
1168
 
1169
      if (spu_comp_icode[index][scode] == 0)
1170
        abort ();
1171
 
1172
      if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1173
          (op0, op_mode))
1174
        op0 = force_reg (op_mode, op0);
1175
      if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1176
          (op1, op_mode))
1177
        op1 = force_reg (op_mode, op1);
1178
      comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1179
                                                         op0, op1);
1180
      if (comp_rtx == 0)
1181
        abort ();
1182
      emit_insn (comp_rtx);
1183
 
1184
      if (eq_test)
1185
        {
1186
          eq_result = gen_reg_rtx (comp_mode);
1187
          eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1188
                                                             op0, op1);
1189
          if (eq_rtx == 0)
1190
            abort ();
1191
          emit_insn (eq_rtx);
1192
          ior_code = optab_handler (ior_optab, comp_mode);
1193
          gcc_assert (ior_code != CODE_FOR_nothing);
1194
          emit_insn (GEN_FCN (ior_code)
1195
                     (compare_result, compare_result, eq_result));
1196
        }
1197
    }
1198
 
1199
  if (is_set == 0)
1200
    {
1201
      rtx bcomp;
1202
      rtx loc_ref;
1203
 
1204
      /* We don't have branch on QI compare insns, so we convert the
1205
         QI compare result to a HI result. */
1206
      if (comp_mode == QImode)
1207
        {
1208
          rtx old_res = compare_result;
1209
          compare_result = gen_reg_rtx (HImode);
1210
          comp_mode = HImode;
1211
          emit_insn (gen_extendqihi2 (compare_result, old_res));
1212
        }
1213
 
1214
      if (reverse_test)
1215
        bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1216
      else
1217
        bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1218
 
1219
      loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1220
      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1221
                                   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1222
                                                         loc_ref, pc_rtx)));
1223
    }
1224
  else if (is_set == 2)
1225
    {
1226
      rtx target = operands[0];
1227
      int compare_size = GET_MODE_BITSIZE (comp_mode);
1228
      int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1229
      enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1230
      rtx select_mask;
1231
      rtx op_t = operands[2];
1232
      rtx op_f = operands[3];
1233
 
1234
      /* The result of the comparison can be SI, HI or QI mode.  Create a
1235
         mask based on that result. */
1236
      if (target_size > compare_size)
1237
        {
1238
          select_mask = gen_reg_rtx (mode);
1239
          emit_insn (gen_extend_compare (select_mask, compare_result));
1240
        }
1241
      else if (target_size < compare_size)
1242
        select_mask =
1243
          gen_rtx_SUBREG (mode, compare_result,
1244
                          (compare_size - target_size) / BITS_PER_UNIT);
1245
      else if (comp_mode != mode)
1246
        select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1247
      else
1248
        select_mask = compare_result;
1249
 
1250
      if (GET_MODE (target) != GET_MODE (op_t)
1251
          || GET_MODE (target) != GET_MODE (op_f))
1252
        abort ();
1253
 
1254
      if (reverse_test)
1255
        emit_insn (gen_selb (target, op_t, op_f, select_mask));
1256
      else
1257
        emit_insn (gen_selb (target, op_f, op_t, select_mask));
1258
    }
1259
  else
1260
    {
1261
      rtx target = operands[0];
1262
      if (reverse_test)
1263
        emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1264
                                gen_rtx_NOT (comp_mode, compare_result)));
1265
      if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1266
        emit_insn (gen_extendhisi2 (target, compare_result));
1267
      else if (GET_MODE (target) == SImode
1268
               && GET_MODE (compare_result) == QImode)
1269
        emit_insn (gen_extend_compare (target, compare_result));
1270
      else
1271
        emit_move_insn (target, compare_result);
1272
    }
1273
}
1274
 
1275
HOST_WIDE_INT
1276
const_double_to_hwint (rtx x)
1277
{
1278
  HOST_WIDE_INT val;
1279
  REAL_VALUE_TYPE rv;
1280
  if (GET_MODE (x) == SFmode)
1281
    {
1282
      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1283
      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1284
    }
1285
  else if (GET_MODE (x) == DFmode)
1286
    {
1287
      long l[2];
1288
      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1289
      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1290
      val = l[0];
1291
      val = (val << 32) | (l[1] & 0xffffffff);
1292
    }
1293
  else
1294
    abort ();
1295
  return val;
1296
}
1297
 
1298
rtx
1299
hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1300
{
1301
  long tv[2];
1302
  REAL_VALUE_TYPE rv;
1303
  gcc_assert (mode == SFmode || mode == DFmode);
1304
 
1305
  if (mode == SFmode)
1306
    tv[0] = (v << 32) >> 32;
1307
  else if (mode == DFmode)
1308
    {
1309
      tv[1] = (v << 32) >> 32;
1310
      tv[0] = v >> 32;
1311
    }
1312
  real_from_target (&rv, tv, mode);
1313
  return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1314
}
1315
 
1316
void
1317
print_operand_address (FILE * file, register rtx addr)
1318
{
1319
  rtx reg;
1320
  rtx offset;
1321
 
1322
  if (GET_CODE (addr) == AND
1323
      && GET_CODE (XEXP (addr, 1)) == CONST_INT
1324
      && INTVAL (XEXP (addr, 1)) == -16)
1325
    addr = XEXP (addr, 0);
1326
 
1327
  switch (GET_CODE (addr))
1328
    {
1329
    case REG:
1330
      fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1331
      break;
1332
 
1333
    case PLUS:
1334
      reg = XEXP (addr, 0);
1335
      offset = XEXP (addr, 1);
1336
      if (GET_CODE (offset) == REG)
1337
        {
1338
          fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1339
                   reg_names[REGNO (offset)]);
1340
        }
1341
      else if (GET_CODE (offset) == CONST_INT)
1342
        {
1343
          fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1344
                   INTVAL (offset), reg_names[REGNO (reg)]);
1345
        }
1346
      else
1347
        abort ();
1348
      break;
1349
 
1350
    case CONST:
1351
    case LABEL_REF:
1352
    case SYMBOL_REF:
1353
    case CONST_INT:
1354
      output_addr_const (file, addr);
1355
      break;
1356
 
1357
    default:
1358
      debug_rtx (addr);
1359
      abort ();
1360
    }
1361
}
1362
 
1363
void
1364
print_operand (FILE * file, rtx x, int code)
1365
{
1366
  enum machine_mode mode = GET_MODE (x);
1367
  HOST_WIDE_INT val;
1368
  unsigned char arr[16];
1369
  int xcode = GET_CODE (x);
1370
  int i, info;
1371
  if (GET_MODE (x) == VOIDmode)
1372
    switch (code)
1373
      {
1374
      case 'L':                 /* 128 bits, signed */
1375
      case 'm':                 /* 128 bits, signed */
1376
      case 'T':                 /* 128 bits, signed */
1377
      case 't':                 /* 128 bits, signed */
1378
        mode = TImode;
1379
        break;
1380
      case 'K':                 /* 64 bits, signed */
1381
      case 'k':                 /* 64 bits, signed */
1382
      case 'D':                 /* 64 bits, signed */
1383
      case 'd':                 /* 64 bits, signed */
1384
        mode = DImode;
1385
        break;
1386
      case 'J':                 /* 32 bits, signed */
1387
      case 'j':                 /* 32 bits, signed */
1388
      case 's':                 /* 32 bits, signed */
1389
      case 'S':                 /* 32 bits, signed */
1390
        mode = SImode;
1391
        break;
1392
      }
1393
  switch (code)
1394
    {
1395
 
1396
    case 'j':                   /* 32 bits, signed */
1397
    case 'k':                   /* 64 bits, signed */
1398
    case 'm':                   /* 128 bits, signed */
1399
      if (xcode == CONST_INT
1400
          || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1401
        {
1402
          gcc_assert (logical_immediate_p (x, mode));
1403
          constant_to_array (mode, x, arr);
1404
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1405
          val = trunc_int_for_mode (val, SImode);
1406
          switch (which_logical_immediate (val))
1407
          {
1408
          case SPU_ORI:
1409
            break;
1410
          case SPU_ORHI:
1411
            fprintf (file, "h");
1412
            break;
1413
          case SPU_ORBI:
1414
            fprintf (file, "b");
1415
            break;
1416
          default:
1417
            gcc_unreachable();
1418
          }
1419
        }
1420
      else
1421
        gcc_unreachable();
1422
      return;
1423
 
1424
    case 'J':                   /* 32 bits, signed */
1425
    case 'K':                   /* 64 bits, signed */
1426
    case 'L':                   /* 128 bits, signed */
1427
      if (xcode == CONST_INT
1428
          || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1429
        {
1430
          gcc_assert (logical_immediate_p (x, mode)
1431
                      || iohl_immediate_p (x, mode));
1432
          constant_to_array (mode, x, arr);
1433
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1434
          val = trunc_int_for_mode (val, SImode);
1435
          switch (which_logical_immediate (val))
1436
          {
1437
          case SPU_ORI:
1438
          case SPU_IOHL:
1439
            break;
1440
          case SPU_ORHI:
1441
            val = trunc_int_for_mode (val, HImode);
1442
            break;
1443
          case SPU_ORBI:
1444
            val = trunc_int_for_mode (val, QImode);
1445
            break;
1446
          default:
1447
            gcc_unreachable();
1448
          }
1449
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1450
        }
1451
      else
1452
        gcc_unreachable();
1453
      return;
1454
 
1455
    case 't':                   /* 128 bits, signed */
1456
    case 'd':                   /* 64 bits, signed */
1457
    case 's':                   /* 32 bits, signed */
1458
      if (CONSTANT_P (x))
1459
        {
1460
          enum immediate_class c = classify_immediate (x, mode);
1461
          switch (c)
1462
            {
1463
            case IC_IL1:
1464
              constant_to_array (mode, x, arr);
1465
              val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1466
              val = trunc_int_for_mode (val, SImode);
1467
              switch (which_immediate_load (val))
1468
                {
1469
                case SPU_IL:
1470
                  break;
1471
                case SPU_ILA:
1472
                  fprintf (file, "a");
1473
                  break;
1474
                case SPU_ILH:
1475
                  fprintf (file, "h");
1476
                  break;
1477
                case SPU_ILHU:
1478
                  fprintf (file, "hu");
1479
                  break;
1480
                default:
1481
                  gcc_unreachable ();
1482
                }
1483
              break;
1484
            case IC_CPAT:
1485
              constant_to_array (mode, x, arr);
1486
              cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1487
              if (info == 1)
1488
                fprintf (file, "b");
1489
              else if (info == 2)
1490
                fprintf (file, "h");
1491
              else if (info == 4)
1492
                fprintf (file, "w");
1493
              else if (info == 8)
1494
                fprintf (file, "d");
1495
              break;
1496
            case IC_IL1s:
1497
              if (xcode == CONST_VECTOR)
1498
                {
1499
                  x = CONST_VECTOR_ELT (x, 0);
1500
                  xcode = GET_CODE (x);
1501
                }
1502
              if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1503
                fprintf (file, "a");
1504
              else if (xcode == HIGH)
1505
                fprintf (file, "hu");
1506
              break;
1507
            case IC_FSMBI:
1508
            case IC_FSMBI2:
1509
            case IC_IL2:
1510
            case IC_IL2s:
1511
            case IC_POOL:
1512
              abort ();
1513
            }
1514
        }
1515
      else
1516
        gcc_unreachable ();
1517
      return;
1518
 
1519
    case 'T':                   /* 128 bits, signed */
1520
    case 'D':                   /* 64 bits, signed */
1521
    case 'S':                   /* 32 bits, signed */
1522
      if (CONSTANT_P (x))
1523
        {
1524
          enum immediate_class c = classify_immediate (x, mode);
1525
          switch (c)
1526
            {
1527
            case IC_IL1:
1528
              constant_to_array (mode, x, arr);
1529
              val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1530
              val = trunc_int_for_mode (val, SImode);
1531
              switch (which_immediate_load (val))
1532
                {
1533
                case SPU_IL:
1534
                case SPU_ILA:
1535
                  break;
1536
                case SPU_ILH:
1537
                case SPU_ILHU:
1538
                  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1539
                  break;
1540
                default:
1541
                  gcc_unreachable ();
1542
                }
1543
              fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1544
              break;
1545
            case IC_FSMBI:
1546
              constant_to_array (mode, x, arr);
1547
              val = 0;
1548
              for (i = 0; i < 16; i++)
1549
                {
1550
                  val <<= 1;
1551
                  val |= arr[i] & 1;
1552
                }
1553
              print_operand (file, GEN_INT (val), 0);
1554
              break;
1555
            case IC_CPAT:
1556
              constant_to_array (mode, x, arr);
1557
              cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1558
              fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1559
              break;
1560
            case IC_IL1s:
1561
              if (xcode == HIGH)
1562
                x = XEXP (x, 0);
1563
              if (GET_CODE (x) == CONST_VECTOR)
1564
                x = CONST_VECTOR_ELT (x, 0);
1565
              output_addr_const (file, x);
1566
              if (xcode == HIGH)
1567
                fprintf (file, "@h");
1568
              break;
1569
            case IC_IL2:
1570
            case IC_IL2s:
1571
            case IC_FSMBI2:
1572
            case IC_POOL:
1573
              abort ();
1574
            }
1575
        }
1576
      else
1577
        gcc_unreachable ();
1578
      return;
1579
 
1580
    case 'C':
1581
      if (xcode == CONST_INT)
1582
        {
1583
          /* Only 4 least significant bits are relevant for generate
1584
             control word instructions. */
1585
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1586
          return;
1587
        }
1588
      break;
1589
 
1590
    case 'M':                   /* print code for c*d */
1591
      if (GET_CODE (x) == CONST_INT)
1592
        switch (INTVAL (x))
1593
          {
1594
          case 1:
1595
            fprintf (file, "b");
1596
            break;
1597
          case 2:
1598
            fprintf (file, "h");
1599
            break;
1600
          case 4:
1601
            fprintf (file, "w");
1602
            break;
1603
          case 8:
1604
            fprintf (file, "d");
1605
            break;
1606
          default:
1607
            gcc_unreachable();
1608
          }
1609
      else
1610
        gcc_unreachable();
1611
      return;
1612
 
1613
    case 'N':                   /* Negate the operand */
1614
      if (xcode == CONST_INT)
1615
        fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1616
      else if (xcode == CONST_VECTOR)
1617
        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1618
                 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1619
      return;
1620
 
1621
    case 'I':                   /* enable/disable interrupts */
1622
      if (xcode == CONST_INT)
1623
        fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1624
      return;
1625
 
1626
    case 'b':                   /* branch modifiers */
1627
      if (xcode == REG)
1628
        fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1629
      else if (COMPARISON_P (x))
1630
        fprintf (file, "%s", xcode == NE ? "n" : "");
1631
      return;
1632
 
1633
    case 'i':                   /* indirect call */
1634
      if (xcode == MEM)
1635
        {
1636
          if (GET_CODE (XEXP (x, 0)) == REG)
1637
            /* Used in indirect function calls. */
1638
            fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1639
          else
1640
            output_address (XEXP (x, 0));
1641
        }
1642
      return;
1643
 
1644
    case 'p':                   /* load/store */
1645
      if (xcode == MEM)
1646
        {
1647
          x = XEXP (x, 0);
1648
          xcode = GET_CODE (x);
1649
        }
1650
      if (xcode == AND)
1651
        {
1652
          x = XEXP (x, 0);
1653
          xcode = GET_CODE (x);
1654
        }
1655
      if (xcode == REG)
1656
        fprintf (file, "d");
1657
      else if (xcode == CONST_INT)
1658
        fprintf (file, "a");
1659
      else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1660
        fprintf (file, "r");
1661
      else if (xcode == PLUS || xcode == LO_SUM)
1662
        {
1663
          if (GET_CODE (XEXP (x, 1)) == REG)
1664
            fprintf (file, "x");
1665
          else
1666
            fprintf (file, "d");
1667
        }
1668
      return;
1669
 
1670
    case 'e':
1671
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1672
      val &= 0x7;
1673
      output_addr_const (file, GEN_INT (val));
1674
      return;
1675
 
1676
    case 'f':
1677
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1678
      val &= 0x1f;
1679
      output_addr_const (file, GEN_INT (val));
1680
      return;
1681
 
1682
    case 'g':
1683
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1684
      val &= 0x3f;
1685
      output_addr_const (file, GEN_INT (val));
1686
      return;
1687
 
1688
    case 'h':
1689
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1690
      val = (val >> 3) & 0x1f;
1691
      output_addr_const (file, GEN_INT (val));
1692
      return;
1693
 
1694
    case 'E':
1695
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1696
      val = -val;
1697
      val &= 0x7;
1698
      output_addr_const (file, GEN_INT (val));
1699
      return;
1700
 
1701
    case 'F':
1702
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1703
      val = -val;
1704
      val &= 0x1f;
1705
      output_addr_const (file, GEN_INT (val));
1706
      return;
1707
 
1708
    case 'G':
1709
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1710
      val = -val;
1711
      val &= 0x3f;
1712
      output_addr_const (file, GEN_INT (val));
1713
      return;
1714
 
1715
    case 'H':
1716
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1717
      val = -(val & -8ll);
1718
      val = (val >> 3) & 0x1f;
1719
      output_addr_const (file, GEN_INT (val));
1720
      return;
1721
 
1722
    case 'v':
1723
    case 'w':
1724
      constant_to_array (mode, x, arr);
1725
      val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1726
      output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1727
      return;
1728
 
1729
    case 0:
1730
      if (xcode == REG)
1731
        fprintf (file, "%s", reg_names[REGNO (x)]);
1732
      else if (xcode == MEM)
1733
        output_address (XEXP (x, 0));
1734
      else if (xcode == CONST_VECTOR)
1735
        print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1736
      else
1737
        output_addr_const (file, x);
1738
      return;
1739
 
1740
      /* unused letters
1741
                      o qr  u   yz
1742
        AB            OPQR  UVWXYZ */
1743
    default:
1744
      output_operand_lossage ("invalid %%xn code");
1745
    }
1746
  gcc_unreachable ();
1747
}
1748
 
1749
/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1750
   caller saved register.  For leaf functions it is more efficient to
1751
   use a volatile register because we won't need to save and restore the
1752
   pic register.  This routine is only valid after register allocation
1753
   is completed, so we can pick an unused register.  */
1754
static rtx
1755
get_pic_reg (void)
1756
{
1757
  if (!reload_completed && !reload_in_progress)
1758
    abort ();
1759
 
1760
  /* If we've already made the decision, we need to keep with it.  Once we've
1761
     decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1762
     return true since the register is now live; this should not cause us to
1763
     "switch back" to using pic_offset_table_rtx.  */
1764
  if (!cfun->machine->pic_reg)
1765
    {
1766
      if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1767
        cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1768
      else
1769
        cfun->machine->pic_reg = pic_offset_table_rtx;
1770
    }
1771
 
1772
  return cfun->machine->pic_reg;
1773
}
1774
 
1775
/* Split constant addresses to handle cases that are too large.
1776
   Add in the pic register when in PIC mode.
1777
   Split immediates that require more than 1 instruction. */
1778
int
1779
spu_split_immediate (rtx * ops)
1780
{
1781
  enum machine_mode mode = GET_MODE (ops[0]);
1782
  enum immediate_class c = classify_immediate (ops[1], mode);
1783
 
1784
  switch (c)
1785
    {
1786
    case IC_IL2:
1787
      {
1788
        unsigned char arrhi[16];
1789
        unsigned char arrlo[16];
1790
        rtx to, temp, hi, lo;
1791
        int i;
1792
        enum machine_mode imode = mode;
1793
        /* We need to do reals as ints because the constant used in the
1794
           IOR might not be a legitimate real constant. */
1795
        imode = int_mode_for_mode (mode);
1796
        constant_to_array (mode, ops[1], arrhi);
1797
        if (imode != mode)
1798
          to = simplify_gen_subreg (imode, ops[0], mode, 0);
1799
        else
1800
          to = ops[0];
1801
        temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1802
        for (i = 0; i < 16; i += 4)
1803
          {
1804
            arrlo[i + 2] = arrhi[i + 2];
1805
            arrlo[i + 3] = arrhi[i + 3];
1806
            arrlo[i + 0] = arrlo[i + 1] = 0;
1807
            arrhi[i + 2] = arrhi[i + 3] = 0;
1808
          }
1809
        hi = array_to_constant (imode, arrhi);
1810
        lo = array_to_constant (imode, arrlo);
1811
        emit_move_insn (temp, hi);
1812
        emit_insn (gen_rtx_SET
1813
                   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1814
        return 1;
1815
      }
1816
    case IC_FSMBI2:
1817
      {
1818
        unsigned char arr_fsmbi[16];
1819
        unsigned char arr_andbi[16];
1820
        rtx to, reg_fsmbi, reg_and;
1821
        int i;
1822
        enum machine_mode imode = mode;
1823
        /* We need to do reals as ints because the constant used in the
1824
         * AND might not be a legitimate real constant. */
1825
        imode = int_mode_for_mode (mode);
1826
        constant_to_array (mode, ops[1], arr_fsmbi);
1827
        if (imode != mode)
1828
          to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1829
        else
1830
          to = ops[0];
1831
        for (i = 0; i < 16; i++)
1832
          if (arr_fsmbi[i] != 0)
1833
            {
1834
              arr_andbi[0] = arr_fsmbi[i];
1835
              arr_fsmbi[i] = 0xff;
1836
            }
1837
        for (i = 1; i < 16; i++)
1838
          arr_andbi[i] = arr_andbi[0];
1839
        reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1840
        reg_and = array_to_constant (imode, arr_andbi);
1841
        emit_move_insn (to, reg_fsmbi);
1842
        emit_insn (gen_rtx_SET
1843
                   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1844
        return 1;
1845
      }
1846
    case IC_POOL:
1847
      if (reload_in_progress || reload_completed)
1848
        {
1849
          rtx mem = force_const_mem (mode, ops[1]);
1850
          if (TARGET_LARGE_MEM)
1851
            {
1852
              rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1853
              emit_move_insn (addr, XEXP (mem, 0));
1854
              mem = replace_equiv_address (mem, addr);
1855
            }
1856
          emit_move_insn (ops[0], mem);
1857
          return 1;
1858
        }
1859
      break;
1860
    case IC_IL1s:
1861
    case IC_IL2s:
1862
      if (reload_completed && GET_CODE (ops[1]) != HIGH)
1863
        {
1864
          if (c == IC_IL2s)
1865
            {
1866
              emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1867
              emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1868
            }
1869
          else if (flag_pic)
1870
            emit_insn (gen_pic (ops[0], ops[1]));
1871
          if (flag_pic)
1872
            {
1873
              rtx pic_reg = get_pic_reg ();
1874
              emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1875
            }
1876
          return flag_pic || c == IC_IL2s;
1877
        }
1878
      break;
1879
    case IC_IL1:
1880
    case IC_FSMBI:
1881
    case IC_CPAT:
1882
      break;
1883
    }
1884
  return 0;
1885
}
1886
 
1887
/* SAVING is TRUE when we are generating the actual load and store
1888
   instructions for REGNO.  When determining the size of the stack
1889
   needed for saving register we must allocate enough space for the
1890
   worst case, because we don't always have the information early enough
1891
   to not allocate it.  But we can at least eliminate the actual loads
1892
   and stores during the prologue/epilogue.  */
1893
static int
1894
need_to_save_reg (int regno, int saving)
1895
{
1896
  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1897
    return 1;
1898
  if (flag_pic
1899
      && regno == PIC_OFFSET_TABLE_REGNUM
1900
      && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1901
    return 1;
1902
  return 0;
1903
}
1904
 
1905
/* This function is only correct starting with local register
1906
   allocation */
1907
int
1908
spu_saved_regs_size (void)
1909
{
1910
  int reg_save_size = 0;
1911
  int regno;
1912
 
1913
  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1914
    if (need_to_save_reg (regno, 0))
1915
      reg_save_size += 0x10;
1916
  return reg_save_size;
1917
}
1918
 
1919
static rtx
1920
frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1921
{
1922
  rtx reg = gen_rtx_REG (V4SImode, regno);
1923
  rtx mem =
1924
    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1925
  return emit_insn (gen_movv4si (mem, reg));
1926
}
1927
 
1928
static rtx
1929
frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1930
{
1931
  rtx reg = gen_rtx_REG (V4SImode, regno);
1932
  rtx mem =
1933
    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1934
  return emit_insn (gen_movv4si (reg, mem));
1935
}
1936
 
1937
/* This happens after reload, so we need to expand it.  */
1938
static rtx
1939
frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1940
{
1941
  rtx insn;
1942
  if (satisfies_constraint_K (GEN_INT (imm)))
1943
    {
1944
      insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1945
    }
1946
  else
1947
    {
1948
      emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1949
      insn = emit_insn (gen_addsi3 (dst, src, scratch));
1950
      if (REGNO (src) == REGNO (scratch))
1951
        abort ();
1952
    }
1953
  return insn;
1954
}
1955
 
1956
/* Return nonzero if this function is known to have a null epilogue.  */
1957
 
1958
int
1959
direct_return (void)
1960
{
1961
  if (reload_completed)
1962
    {
1963
      if (cfun->static_chain_decl == 0
1964
          && (spu_saved_regs_size ()
1965
              + get_frame_size ()
1966
              + crtl->outgoing_args_size
1967
              + crtl->args.pretend_args_size == 0)
1968
          && current_function_is_leaf)
1969
        return 1;
1970
    }
1971
  return 0;
1972
}
1973
 
1974
/*
1975
   The stack frame looks like this:
1976
         +-------------+
1977
         |  incoming   |
1978
         |    args     |
1979
   AP -> +-------------+
1980
         | $lr save    |
1981
         +-------------+
1982
 prev SP | back chain  |
1983
         +-------------+
1984
         |  var args   |
1985
         |  reg save   | crtl->args.pretend_args_size bytes
1986
         +-------------+
1987
         |    ...      |
1988
         | saved regs  | spu_saved_regs_size() bytes
1989
   FP -> +-------------+
1990
         |    ...      |
1991
         |   vars      | get_frame_size()  bytes
1992
  HFP -> +-------------+
1993
         |    ...      |
1994
         |  outgoing   |
1995
         |    args     | crtl->outgoing_args_size bytes
1996
         +-------------+
1997
         | $lr of next |
1998
         |   frame     |
1999
         +-------------+
2000
         | back chain  |
2001
   SP -> +-------------+
2002
 
2003
*/
2004
void
2005
spu_expand_prologue (void)
2006
{
2007
  HOST_WIDE_INT size = get_frame_size (), offset, regno;
2008
  HOST_WIDE_INT total_size;
2009
  HOST_WIDE_INT saved_regs_size;
2010
  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2011
  rtx scratch_reg_0, scratch_reg_1;
2012
  rtx insn, real;
2013
 
2014
  if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
2015
    cfun->machine->pic_reg = pic_offset_table_rtx;
2016
 
2017
  if (spu_naked_function_p (current_function_decl))
2018
    return;
2019
 
2020
  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2021
  scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
2022
 
2023
  saved_regs_size = spu_saved_regs_size ();
2024
  total_size = size + saved_regs_size
2025
    + crtl->outgoing_args_size
2026
    + crtl->args.pretend_args_size;
2027
 
2028
  if (!current_function_is_leaf
2029
      || cfun->calls_alloca || total_size > 0)
2030
    total_size += STACK_POINTER_OFFSET;
2031
 
2032
  /* Save this first because code after this might use the link
2033
     register as a scratch register. */
2034
  if (!current_function_is_leaf)
2035
    {
2036
      insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2037
      RTX_FRAME_RELATED_P (insn) = 1;
2038
    }
2039
 
2040
  if (total_size > 0)
2041
    {
2042
      offset = -crtl->args.pretend_args_size;
2043
      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2044
        if (need_to_save_reg (regno, 1))
2045
          {
2046
            offset -= 16;
2047
            insn = frame_emit_store (regno, sp_reg, offset);
2048
            RTX_FRAME_RELATED_P (insn) = 1;
2049
          }
2050
    }
2051
 
2052
  if (flag_pic && cfun->machine->pic_reg)
2053
    {
2054
      rtx pic_reg = cfun->machine->pic_reg;
2055
      insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2056
      insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2057
    }
2058
 
2059
  if (total_size > 0)
2060
    {
2061
      if (flag_stack_check)
2062
        {
2063
          /* We compare against total_size-1 because
2064
             ($sp >= total_size) <=> ($sp > total_size-1) */
2065
          rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2066
          rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2067
          rtx size_v4si = spu_const (V4SImode, total_size - 1);
2068
          if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2069
            {
2070
              emit_move_insn (scratch_v4si, size_v4si);
2071
              size_v4si = scratch_v4si;
2072
            }
2073
          emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2074
          emit_insn (gen_vec_extractv4si
2075
                     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2076
          emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2077
        }
2078
 
2079
      /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2080
         the value of the previous $sp because we save it as the back
2081
         chain. */
2082
      if (total_size <= 2000)
2083
        {
2084
          /* In this case we save the back chain first. */
2085
          insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2086
          insn =
2087
            frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2088
        }
2089
      else
2090
        {
2091
          insn = emit_move_insn (scratch_reg_0, sp_reg);
2092
          insn =
2093
            frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2094
        }
2095
      RTX_FRAME_RELATED_P (insn) = 1;
2096
      real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2097
      add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2098
 
2099
      if (total_size > 2000)
2100
        {
2101
          /* Save the back chain ptr */
2102
          insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2103
        }
2104
 
2105
      if (frame_pointer_needed)
2106
        {
2107
          rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2108
          HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2109
            + crtl->outgoing_args_size;
2110
          /* Set the new frame_pointer */
2111
          insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2112
          RTX_FRAME_RELATED_P (insn) = 1;
2113
          real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2114
          add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2115
          REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2116
        }
2117
    }
2118
 
2119
  if (flag_stack_usage_info)
2120
    current_function_static_stack_size = total_size;
2121
}
2122
 
2123
void
2124
spu_expand_epilogue (bool sibcall_p)
2125
{
2126
  int size = get_frame_size (), offset, regno;
2127
  HOST_WIDE_INT saved_regs_size, total_size;
2128
  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2129
  rtx scratch_reg_0;
2130
 
2131
  if (spu_naked_function_p (current_function_decl))
2132
    return;
2133
 
2134
  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2135
 
2136
  saved_regs_size = spu_saved_regs_size ();
2137
  total_size = size + saved_regs_size
2138
    + crtl->outgoing_args_size
2139
    + crtl->args.pretend_args_size;
2140
 
2141
  if (!current_function_is_leaf
2142
      || cfun->calls_alloca || total_size > 0)
2143
    total_size += STACK_POINTER_OFFSET;
2144
 
2145
  if (total_size > 0)
2146
    {
2147
      if (cfun->calls_alloca)
2148
        frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2149
      else
2150
        frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2151
 
2152
 
2153
      if (saved_regs_size > 0)
2154
        {
2155
          offset = -crtl->args.pretend_args_size;
2156
          for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2157
            if (need_to_save_reg (regno, 1))
2158
              {
2159
                offset -= 0x10;
2160
                frame_emit_load (regno, sp_reg, offset);
2161
              }
2162
        }
2163
    }
2164
 
2165
  if (!current_function_is_leaf)
2166
    frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2167
 
2168
  if (!sibcall_p)
2169
    {
2170
      emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2171
      emit_jump_insn (gen__return ());
2172
    }
2173
}
2174
 
2175
rtx
2176
spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2177
{
2178
  if (count != 0)
2179
    return 0;
2180
  /* This is inefficient because it ends up copying to a save-register
2181
     which then gets saved even though $lr has already been saved.  But
2182
     it does generate better code for leaf functions and we don't need
2183
     to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
2184
     used for __builtin_return_address anyway, so maybe we don't care if
2185
     it's inefficient. */
2186
  return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2187
}
2188
 
2189
 
2190
/* Given VAL, generate a constant appropriate for MODE.
2191
   If MODE is a vector mode, every element will be VAL.
2192
   For TImode, VAL will be zero extended to 128 bits. */
2193
rtx
2194
spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2195
{
2196
  rtx inner;
2197
  rtvec v;
2198
  int units, i;
2199
 
2200
  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2201
              || GET_MODE_CLASS (mode) == MODE_FLOAT
2202
              || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2203
              || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2204
 
2205
  if (GET_MODE_CLASS (mode) == MODE_INT)
2206
    return immed_double_const (val, 0, mode);
2207
 
2208
  /* val is the bit representation of the float */
2209
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2210
    return hwint_to_const_double (mode, val);
2211
 
2212
  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2213
    inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2214
  else
2215
    inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2216
 
2217
  units = GET_MODE_NUNITS (mode);
2218
 
2219
  v = rtvec_alloc (units);
2220
 
2221
  for (i = 0; i < units; ++i)
2222
    RTVEC_ELT (v, i) = inner;
2223
 
2224
  return gen_rtx_CONST_VECTOR (mode, v);
2225
}
2226
 
2227
/* Create a MODE vector constant from 4 ints. */
2228
rtx
2229
spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2230
{
2231
  unsigned char arr[16];
2232
  arr[0] = (a >> 24) & 0xff;
2233
  arr[1] = (a >> 16) & 0xff;
2234
  arr[2] = (a >> 8) & 0xff;
2235
  arr[3] = (a >> 0) & 0xff;
2236
  arr[4] = (b >> 24) & 0xff;
2237
  arr[5] = (b >> 16) & 0xff;
2238
  arr[6] = (b >> 8) & 0xff;
2239
  arr[7] = (b >> 0) & 0xff;
2240
  arr[8] = (c >> 24) & 0xff;
2241
  arr[9] = (c >> 16) & 0xff;
2242
  arr[10] = (c >> 8) & 0xff;
2243
  arr[11] = (c >> 0) & 0xff;
2244
  arr[12] = (d >> 24) & 0xff;
2245
  arr[13] = (d >> 16) & 0xff;
2246
  arr[14] = (d >> 8) & 0xff;
2247
  arr[15] = (d >> 0) & 0xff;
2248
  return array_to_constant(mode, arr);
2249
}
2250
 
2251
/* branch hint stuff */
2252
 
2253
/* An array of these is used to propagate hints to predecessor blocks. */
2254
struct spu_bb_info
2255
{
2256
  rtx prop_jump; /* propagated from another block */
2257
  int bb_index;  /* the original block. */
2258
};
2259
static struct spu_bb_info *spu_bb_info;
2260
 
2261
#define STOP_HINT_P(INSN) \
2262
                (GET_CODE(INSN) == CALL_INSN \
2263
                 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2264
                 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2265
 
2266
/* 1 when RTX is a hinted branch or its target.  We keep track of
2267
   what has been hinted so the safe-hint code can test it easily.  */
2268
#define HINTED_P(RTX)                                           \
2269
  (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2270
 
2271
/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2272
#define SCHED_ON_EVEN_P(RTX)                                            \
2273
  (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2274
 
2275
/* Emit a nop for INSN such that the two will dual issue.  This assumes
2276
   INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
2277
   We check for TImode to handle a MULTI1 insn which has dual issued its
2278
   first instruction.  get_pipe returns -1 for MULTI0, inline asm, or
2279
   ADDR_VEC insns. */
2280
static void
2281
emit_nop_for_insn (rtx insn)
2282
{
2283
  int p;
2284
  rtx new_insn;
2285
  p = get_pipe (insn);
2286
  if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2287
    new_insn = emit_insn_after (gen_lnop (), insn);
2288
  else if (p == 1 && GET_MODE (insn) == TImode)
2289
    {
2290
      new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2291
      PUT_MODE (new_insn, TImode);
2292
      PUT_MODE (insn, VOIDmode);
2293
    }
2294
  else
2295
    new_insn = emit_insn_after (gen_lnop (), insn);
2296
  recog_memoized (new_insn);
2297
  INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
2298
}
2299
 
2300
/* Insert nops in basic blocks to meet dual issue alignment
2301
   requirements.  Also make sure hbrp and hint instructions are at least
2302
   one cycle apart, possibly inserting a nop.  */
2303
static void
2304
pad_bb(void)
2305
{
2306
  rtx insn, next_insn, prev_insn, hbr_insn = 0;
2307
  int length;
2308
  int addr;
2309
 
2310
  /* This sets up INSN_ADDRESSES. */
2311
  shorten_branches (get_insns ());
2312
 
2313
  /* Keep track of length added by nops. */
2314
  length = 0;
2315
 
2316
  prev_insn = 0;
2317
  insn = get_insns ();
2318
  if (!active_insn_p (insn))
2319
    insn = next_active_insn (insn);
2320
  for (; insn; insn = next_insn)
2321
    {
2322
      next_insn = next_active_insn (insn);
2323
      if (INSN_CODE (insn) == CODE_FOR_iprefetch
2324
          || INSN_CODE (insn) == CODE_FOR_hbr)
2325
        {
2326
          if (hbr_insn)
2327
            {
2328
              int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2329
              int a1 = INSN_ADDRESSES (INSN_UID (insn));
2330
              if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2331
                  || (a1 - a0 == 4))
2332
                {
2333
                  prev_insn = emit_insn_before (gen_lnop (), insn);
2334
                  PUT_MODE (prev_insn, GET_MODE (insn));
2335
                  PUT_MODE (insn, TImode);
2336
                  INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
2337
                  length += 4;
2338
                }
2339
            }
2340
          hbr_insn = insn;
2341
        }
2342
      if (INSN_CODE (insn) == CODE_FOR_blockage)
2343
        {
2344
          if (GET_MODE (insn) == TImode)
2345
            PUT_MODE (next_insn, TImode);
2346
          insn = next_insn;
2347
          next_insn = next_active_insn (insn);
2348
        }
2349
      addr = INSN_ADDRESSES (INSN_UID (insn));
2350
      if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2351
        {
2352
          if (((addr + length) & 7) != 0)
2353
            {
2354
              emit_nop_for_insn (prev_insn);
2355
              length += 4;
2356
            }
2357
        }
2358
      else if (GET_MODE (insn) == TImode
2359
               && ((next_insn && GET_MODE (next_insn) != TImode)
2360
                   || get_attr_type (insn) == TYPE_MULTI0)
2361
               && ((addr + length) & 7) != 0)
2362
        {
2363
          /* prev_insn will always be set because the first insn is
2364
             always 8-byte aligned. */
2365
          emit_nop_for_insn (prev_insn);
2366
          length += 4;
2367
        }
2368
      prev_insn = insn;
2369
    }
2370
}
2371
 
2372
 
2373
/* Routines for branch hints. */
2374
 
2375
static void
2376
spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2377
                      int distance, sbitmap blocks)
2378
{
2379
  rtx branch_label = 0;
2380
  rtx hint;
2381
  rtx insn;
2382
  rtx table;
2383
 
2384
  if (before == 0 || branch == 0 || target == 0)
2385
    return;
2386
 
2387
  /* While scheduling we require hints to be no further than 600, so
2388
     we need to enforce that here too */
2389
  if (distance > 600)
2390
    return;
2391
 
2392
  /* If we have a Basic block note, emit it after the basic block note.  */
2393
  if (NOTE_INSN_BASIC_BLOCK_P (before))
2394
    before = NEXT_INSN (before);
2395
 
2396
  branch_label = gen_label_rtx ();
2397
  LABEL_NUSES (branch_label)++;
2398
  LABEL_PRESERVE_P (branch_label) = 1;
2399
  insn = emit_label_before (branch_label, branch);
2400
  branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2401
  SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2402
 
2403
  hint = emit_insn_before (gen_hbr (branch_label, target), before);
2404
  recog_memoized (hint);
2405
  INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
2406
  HINTED_P (branch) = 1;
2407
 
2408
  if (GET_CODE (target) == LABEL_REF)
2409
    HINTED_P (XEXP (target, 0)) = 1;
2410
  else if (tablejump_p (branch, 0, &table))
2411
    {
2412
      rtvec vec;
2413
      int j;
2414
      if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2415
        vec = XVEC (PATTERN (table), 0);
2416
      else
2417
        vec = XVEC (PATTERN (table), 1);
2418
      for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2419
        HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2420
    }
2421
 
2422
  if (distance >= 588)
2423
    {
2424
      /* Make sure the hint isn't scheduled any earlier than this point,
2425
         which could make it too far for the branch offest to fit */
2426
      insn = emit_insn_before (gen_blockage (), hint);
2427
      recog_memoized (insn);
2428
      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2429
    }
2430
  else if (distance <= 8 * 4)
2431
    {
2432
      /* To guarantee at least 8 insns between the hint and branch we
2433
         insert nops. */
2434
      int d;
2435
      for (d = distance; d < 8 * 4; d += 4)
2436
        {
2437
          insn =
2438
            emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2439
          recog_memoized (insn);
2440
          INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2441
        }
2442
 
2443
      /* Make sure any nops inserted aren't scheduled before the hint. */
2444
      insn = emit_insn_after (gen_blockage (), hint);
2445
      recog_memoized (insn);
2446
      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2447
 
2448
      /* Make sure any nops inserted aren't scheduled after the call. */
2449
      if (CALL_P (branch) && distance < 8 * 4)
2450
        {
2451
          insn = emit_insn_before (gen_blockage (), branch);
2452
          recog_memoized (insn);
2453
          INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2454
        }
2455
    }
2456
}
2457
 
2458
/* Returns 0 if we don't want a hint for this branch.  Otherwise return
2459
   the rtx for the branch target. */
2460
static rtx
2461
get_branch_target (rtx branch)
2462
{
2463
  if (GET_CODE (branch) == JUMP_INSN)
2464
    {
2465
      rtx set, src;
2466
 
2467
      /* Return statements */
2468
      if (GET_CODE (PATTERN (branch)) == RETURN)
2469
        return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2470
 
2471
      /* jump table */
2472
      if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2473
          || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2474
        return 0;
2475
 
2476
     /* ASM GOTOs. */
2477
     if (extract_asm_operands (PATTERN (branch)) != NULL)
2478
        return NULL;
2479
 
2480
      set = single_set (branch);
2481
      src = SET_SRC (set);
2482
      if (GET_CODE (SET_DEST (set)) != PC)
2483
        abort ();
2484
 
2485
      if (GET_CODE (src) == IF_THEN_ELSE)
2486
        {
2487
          rtx lab = 0;
2488
          rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2489
          if (note)
2490
            {
2491
              /* If the more probable case is not a fall through, then
2492
                 try a branch hint.  */
2493
              HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2494
              if (prob > (REG_BR_PROB_BASE * 6 / 10)
2495
                  && GET_CODE (XEXP (src, 1)) != PC)
2496
                lab = XEXP (src, 1);
2497
              else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2498
                       && GET_CODE (XEXP (src, 2)) != PC)
2499
                lab = XEXP (src, 2);
2500
            }
2501
          if (lab)
2502
            {
2503
              if (GET_CODE (lab) == RETURN)
2504
                return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2505
              return lab;
2506
            }
2507
          return 0;
2508
        }
2509
 
2510
      return src;
2511
    }
2512
  else if (GET_CODE (branch) == CALL_INSN)
2513
    {
2514
      rtx call;
2515
      /* All of our call patterns are in a PARALLEL and the CALL is
2516
         the first pattern in the PARALLEL. */
2517
      if (GET_CODE (PATTERN (branch)) != PARALLEL)
2518
        abort ();
2519
      call = XVECEXP (PATTERN (branch), 0, 0);
2520
      if (GET_CODE (call) == SET)
2521
        call = SET_SRC (call);
2522
      if (GET_CODE (call) != CALL)
2523
        abort ();
2524
      return XEXP (XEXP (call, 0), 0);
2525
    }
2526
  return 0;
2527
}
2528
 
2529
/* The special $hbr register is used to prevent the insn scheduler from
2530
   moving hbr insns across instructions which invalidate them.  It
2531
   should only be used in a clobber, and this function searches for
2532
   insns which clobber it.  */
2533
static bool
2534
insn_clobbers_hbr (rtx insn)
2535
{
2536
  if (INSN_P (insn)
2537
      && GET_CODE (PATTERN (insn)) == PARALLEL)
2538
    {
2539
      rtx parallel = PATTERN (insn);
2540
      rtx clobber;
2541
      int j;
2542
      for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2543
        {
2544
          clobber = XVECEXP (parallel, 0, j);
2545
          if (GET_CODE (clobber) == CLOBBER
2546
              && GET_CODE (XEXP (clobber, 0)) == REG
2547
              && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2548
            return 1;
2549
        }
2550
    }
2551
  return 0;
2552
}
2553
 
2554
/* Search up to 32 insns starting at FIRST:
2555
   - at any kind of hinted branch, just return
2556
   - at any unconditional branch in the first 15 insns, just return
2557
   - at a call or indirect branch, after the first 15 insns, force it to
2558
     an even address and return
2559
   - at any unconditional branch, after the first 15 insns, force it to
2560
     an even address.
2561
   At then end of the search, insert an hbrp within 4 insns of FIRST,
2562
   and an hbrp within 16 instructions of FIRST.
2563
 */
2564
static void
2565
insert_hbrp_for_ilb_runout (rtx first)
2566
{
2567
  rtx insn, before_4 = 0, before_16 = 0;
2568
  int addr = 0, length, first_addr = -1;
2569
  int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2570
  int insert_lnop_after = 0;
2571
  for (insn = first; insn; insn = NEXT_INSN (insn))
2572
    if (INSN_P (insn))
2573
      {
2574
        if (first_addr == -1)
2575
          first_addr = INSN_ADDRESSES (INSN_UID (insn));
2576
        addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2577
        length = get_attr_length (insn);
2578
 
2579
        if (before_4 == 0 && addr + length >= 4 * 4)
2580
          before_4 = insn;
2581
        /* We test for 14 instructions because the first hbrp will add
2582
           up to 2 instructions. */
2583
        if (before_16 == 0 && addr + length >= 14 * 4)
2584
          before_16 = insn;
2585
 
2586
        if (INSN_CODE (insn) == CODE_FOR_hbr)
2587
          {
2588
            /* Make sure an hbrp is at least 2 cycles away from a hint.
2589
               Insert an lnop after the hbrp when necessary. */
2590
            if (before_4 == 0 && addr > 0)
2591
              {
2592
                before_4 = insn;
2593
                insert_lnop_after |= 1;
2594
              }
2595
            else if (before_4 && addr <= 4 * 4)
2596
              insert_lnop_after |= 1;
2597
            if (before_16 == 0 && addr > 10 * 4)
2598
              {
2599
                before_16 = insn;
2600
                insert_lnop_after |= 2;
2601
              }
2602
            else if (before_16 && addr <= 14 * 4)
2603
              insert_lnop_after |= 2;
2604
          }
2605
 
2606
        if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2607
          {
2608
            if (addr < hbrp_addr0)
2609
              hbrp_addr0 = addr;
2610
            else if (addr < hbrp_addr1)
2611
              hbrp_addr1 = addr;
2612
          }
2613
 
2614
        if (CALL_P (insn) || JUMP_P (insn))
2615
          {
2616
            if (HINTED_P (insn))
2617
              return;
2618
 
2619
            /* Any branch after the first 15 insns should be on an even
2620
               address to avoid a special case branch.  There might be
2621
               some nops and/or hbrps inserted, so we test after 10
2622
               insns. */
2623
            if (addr > 10 * 4)
2624
              SCHED_ON_EVEN_P (insn) = 1;
2625
          }
2626
 
2627
        if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2628
          return;
2629
 
2630
 
2631
        if (addr + length >= 32 * 4)
2632
          {
2633
            gcc_assert (before_4 && before_16);
2634
            if (hbrp_addr0 > 4 * 4)
2635
              {
2636
                insn =
2637
                  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2638
                recog_memoized (insn);
2639
                INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2640
                INSN_ADDRESSES_NEW (insn,
2641
                                    INSN_ADDRESSES (INSN_UID (before_4)));
2642
                PUT_MODE (insn, GET_MODE (before_4));
2643
                PUT_MODE (before_4, TImode);
2644
                if (insert_lnop_after & 1)
2645
                  {
2646
                    insn = emit_insn_before (gen_lnop (), before_4);
2647
                    recog_memoized (insn);
2648
                    INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2649
                    INSN_ADDRESSES_NEW (insn,
2650
                                        INSN_ADDRESSES (INSN_UID (before_4)));
2651
                    PUT_MODE (insn, TImode);
2652
                  }
2653
              }
2654
            if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2655
                && hbrp_addr1 > 16 * 4)
2656
              {
2657
                insn =
2658
                  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2659
                recog_memoized (insn);
2660
                INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2661
                INSN_ADDRESSES_NEW (insn,
2662
                                    INSN_ADDRESSES (INSN_UID (before_16)));
2663
                PUT_MODE (insn, GET_MODE (before_16));
2664
                PUT_MODE (before_16, TImode);
2665
                if (insert_lnop_after & 2)
2666
                  {
2667
                    insn = emit_insn_before (gen_lnop (), before_16);
2668
                    recog_memoized (insn);
2669
                    INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2670
                    INSN_ADDRESSES_NEW (insn,
2671
                                        INSN_ADDRESSES (INSN_UID
2672
                                                        (before_16)));
2673
                    PUT_MODE (insn, TImode);
2674
                  }
2675
              }
2676
            return;
2677
          }
2678
      }
2679
    else if (BARRIER_P (insn))
2680
      return;
2681
 
2682
}
2683
 
2684
/* The SPU might hang when it executes 48 inline instructions after a
2685
   hinted branch jumps to its hinted target.  The beginning of a
2686
   function and the return from a call might have been hinted, and
2687
   must be handled as well.  To prevent a hang we insert 2 hbrps.  The
2688
   first should be within 6 insns of the branch target.  The second
2689
   should be within 22 insns of the branch target.  When determining
2690
   if hbrps are necessary, we look for only 32 inline instructions,
2691
   because up to 12 nops and 4 hbrps could be inserted.  Similarily,
2692
   when inserting new hbrps, we insert them within 4 and 16 insns of
2693
   the target.  */
2694
static void
2695
insert_hbrp (void)
2696
{
2697
  rtx insn;
2698
  if (TARGET_SAFE_HINTS)
2699
    {
2700
      shorten_branches (get_insns ());
2701
      /* Insert hbrp at beginning of function */
2702
      insn = next_active_insn (get_insns ());
2703
      if (insn)
2704
        insert_hbrp_for_ilb_runout (insn);
2705
      /* Insert hbrp after hinted targets. */
2706
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2707
        if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2708
          insert_hbrp_for_ilb_runout (next_active_insn (insn));
2709
    }
2710
}
2711
 
2712
static int in_spu_reorg;
2713
 
2714
static void
2715
spu_var_tracking (void)
2716
{
2717
  if (flag_var_tracking)
2718
    {
2719
      df_analyze ();
2720
      timevar_push (TV_VAR_TRACKING);
2721
      variable_tracking_main ();
2722
      timevar_pop (TV_VAR_TRACKING);
2723
      df_finish_pass (false);
2724
    }
2725
}
2726
 
2727
/* Insert branch hints.  There are no branch optimizations after this
2728
   pass, so it's safe to set our branch hints now. */
2729
static void
2730
spu_machine_dependent_reorg (void)
2731
{
2732
  sbitmap blocks;
2733
  basic_block bb;
2734
  rtx branch, insn;
2735
  rtx branch_target = 0;
2736
  int branch_addr = 0, insn_addr, required_dist = 0;
2737
  int i;
2738
  unsigned int j;
2739
 
2740
  if (!TARGET_BRANCH_HINTS || optimize == 0)
2741
    {
2742
      /* We still do it for unoptimized code because an external
2743
         function might have hinted a call or return. */
2744
      compute_bb_for_insn ();
2745
      insert_hbrp ();
2746
      pad_bb ();
2747
      spu_var_tracking ();
2748
      free_bb_for_insn ();
2749
      return;
2750
    }
2751
 
2752
  blocks = sbitmap_alloc (last_basic_block);
2753
  sbitmap_zero (blocks);
2754
 
2755
  in_spu_reorg = 1;
2756
  compute_bb_for_insn ();
2757
 
2758
  compact_blocks ();
2759
 
2760
  spu_bb_info =
2761
    (struct spu_bb_info *) xcalloc (n_basic_blocks,
2762
                                    sizeof (struct spu_bb_info));
2763
 
2764
  /* We need exact insn addresses and lengths.  */
2765
  shorten_branches (get_insns ());
2766
 
2767
  for (i = n_basic_blocks - 1; i >= 0; i--)
2768
    {
2769
      bb = BASIC_BLOCK (i);
2770
      branch = 0;
2771
      if (spu_bb_info[i].prop_jump)
2772
        {
2773
          branch = spu_bb_info[i].prop_jump;
2774
          branch_target = get_branch_target (branch);
2775
          branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2776
          required_dist = spu_hint_dist;
2777
        }
2778
      /* Search from end of a block to beginning.   In this loop, find
2779
         jumps which need a branch and emit them only when:
2780
         - it's an indirect branch and we're at the insn which sets
2781
         the register
2782
         - we're at an insn that will invalidate the hint. e.g., a
2783
         call, another hint insn, inline asm that clobbers $hbr, and
2784
         some inlined operations (divmodsi4).  Don't consider jumps
2785
         because they are only at the end of a block and are
2786
         considered when we are deciding whether to propagate
2787
         - we're getting too far away from the branch.  The hbr insns
2788
         only have a signed 10 bit offset
2789
         We go back as far as possible so the branch will be considered
2790
         for propagation when we get to the beginning of the block.  */
2791
      for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2792
        {
2793
          if (INSN_P (insn))
2794
            {
2795
              insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2796
              if (branch
2797
                  && ((GET_CODE (branch_target) == REG
2798
                       && set_of (branch_target, insn) != NULL_RTX)
2799
                      || insn_clobbers_hbr (insn)
2800
                      || branch_addr - insn_addr > 600))
2801
                {
2802
                  rtx next = NEXT_INSN (insn);
2803
                  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2804
                  if (insn != BB_END (bb)
2805
                      && branch_addr - next_addr >= required_dist)
2806
                    {
2807
                      if (dump_file)
2808
                        fprintf (dump_file,
2809
                                 "hint for %i in block %i before %i\n",
2810
                                 INSN_UID (branch), bb->index,
2811
                                 INSN_UID (next));
2812
                      spu_emit_branch_hint (next, branch, branch_target,
2813
                                            branch_addr - next_addr, blocks);
2814
                    }
2815
                  branch = 0;
2816
                }
2817
 
2818
              /* JUMP_P will only be true at the end of a block.  When
2819
                 branch is already set it means we've previously decided
2820
                 to propagate a hint for that branch into this block. */
2821
              if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2822
                {
2823
                  branch = 0;
2824
                  if ((branch_target = get_branch_target (insn)))
2825
                    {
2826
                      branch = insn;
2827
                      branch_addr = insn_addr;
2828
                      required_dist = spu_hint_dist;
2829
                    }
2830
                }
2831
            }
2832
          if (insn == BB_HEAD (bb))
2833
            break;
2834
        }
2835
 
2836
      if (branch)
2837
        {
2838
          /* If we haven't emitted a hint for this branch yet, it might
2839
             be profitable to emit it in one of the predecessor blocks,
2840
             especially for loops.  */
2841
          rtx bbend;
2842
          basic_block prev = 0, prop = 0, prev2 = 0;
2843
          int loop_exit = 0, simple_loop = 0;
2844
          int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2845
 
2846
          for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2847
            if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2848
              prev = EDGE_PRED (bb, j)->src;
2849
            else
2850
              prev2 = EDGE_PRED (bb, j)->src;
2851
 
2852
          for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2853
            if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2854
              loop_exit = 1;
2855
            else if (EDGE_SUCC (bb, j)->dest == bb)
2856
              simple_loop = 1;
2857
 
2858
          /* If this branch is a loop exit then propagate to previous
2859
             fallthru block. This catches the cases when it is a simple
2860
             loop or when there is an initial branch into the loop. */
2861
          if (prev && (loop_exit || simple_loop)
2862
              && prev->loop_depth <= bb->loop_depth)
2863
            prop = prev;
2864
 
2865
          /* If there is only one adjacent predecessor.  Don't propagate
2866
             outside this loop.  This loop_depth test isn't perfect, but
2867
             I'm not sure the loop_father member is valid at this point.  */
2868
          else if (prev && single_pred_p (bb)
2869
                   && prev->loop_depth == bb->loop_depth)
2870
            prop = prev;
2871
 
2872
          /* If this is the JOIN block of a simple IF-THEN then
2873
             propogate the hint to the HEADER block. */
2874
          else if (prev && prev2
2875
                   && EDGE_COUNT (bb->preds) == 2
2876
                   && EDGE_COUNT (prev->preds) == 1
2877
                   && EDGE_PRED (prev, 0)->src == prev2
2878
                   && prev2->loop_depth == bb->loop_depth
2879
                   && GET_CODE (branch_target) != REG)
2880
            prop = prev;
2881
 
2882
          /* Don't propagate when:
2883
             - this is a simple loop and the hint would be too far
2884
             - this is not a simple loop and there are 16 insns in
2885
             this block already
2886
             - the predecessor block ends in a branch that will be
2887
             hinted
2888
             - the predecessor block ends in an insn that invalidates
2889
             the hint */
2890
          if (prop
2891
              && prop->index >= 0
2892
              && (bbend = BB_END (prop))
2893
              && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2894
              (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2895
              && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2896
            {
2897
              if (dump_file)
2898
                fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2899
                         "for %i (loop_exit %i simple_loop %i dist %i)\n",
2900
                         bb->index, prop->index, bb->loop_depth,
2901
                         INSN_UID (branch), loop_exit, simple_loop,
2902
                         branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2903
 
2904
              spu_bb_info[prop->index].prop_jump = branch;
2905
              spu_bb_info[prop->index].bb_index = i;
2906
            }
2907
          else if (branch_addr - next_addr >= required_dist)
2908
            {
2909
              if (dump_file)
2910
                fprintf (dump_file, "hint for %i in block %i before %i\n",
2911
                         INSN_UID (branch), bb->index,
2912
                         INSN_UID (NEXT_INSN (insn)));
2913
              spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2914
                                    branch_addr - next_addr, blocks);
2915
            }
2916
          branch = 0;
2917
        }
2918
    }
2919
  free (spu_bb_info);
2920
 
2921
  if (!sbitmap_empty_p (blocks))
2922
    find_many_sub_basic_blocks (blocks);
2923
 
2924
  /* We have to schedule to make sure alignment is ok. */
2925
  FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2926
 
2927
  /* The hints need to be scheduled, so call it again. */
2928
  schedule_insns ();
2929
  df_finish_pass (true);
2930
 
2931
  insert_hbrp ();
2932
 
2933
  pad_bb ();
2934
 
2935
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2936
    if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2937
      {
2938
        /* Adjust the LABEL_REF in a hint when we have inserted a nop
2939
           between its branch label and the branch .  We don't move the
2940
           label because GCC expects it at the beginning of the block. */
2941
        rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2942
        rtx label_ref = XVECEXP (unspec, 0, 0);
2943
        rtx label = XEXP (label_ref, 0);
2944
        rtx branch;
2945
        int offset = 0;
2946
        for (branch = NEXT_INSN (label);
2947
             !JUMP_P (branch) && !CALL_P (branch);
2948
             branch = NEXT_INSN (branch))
2949
          if (NONJUMP_INSN_P (branch))
2950
            offset += get_attr_length (branch);
2951
        if (offset > 0)
2952
          XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2953
      }
2954
 
2955
  spu_var_tracking ();
2956
 
2957
  free_bb_for_insn ();
2958
 
2959
  in_spu_reorg = 0;
2960
}
2961
 
2962
 
2963
/* Insn scheduling routines, primarily for dual issue. */
2964
static int
2965
spu_sched_issue_rate (void)
2966
{
2967
  return 2;
2968
}
2969
 
2970
static int
2971
uses_ls_unit(rtx insn)
2972
{
2973
  rtx set = single_set (insn);
2974
  if (set != 0
2975
      && (GET_CODE (SET_DEST (set)) == MEM
2976
          || GET_CODE (SET_SRC (set)) == MEM))
2977
    return 1;
2978
  return 0;
2979
}
2980
 
2981
static int
2982
get_pipe (rtx insn)
2983
{
2984
  enum attr_type t;
2985
  /* Handle inline asm */
2986
  if (INSN_CODE (insn) == -1)
2987
    return -1;
2988
  t = get_attr_type (insn);
2989
  switch (t)
2990
    {
2991
    case TYPE_CONVERT:
2992
      return -2;
2993
    case TYPE_MULTI0:
2994
      return -1;
2995
 
2996
    case TYPE_FX2:
2997
    case TYPE_FX3:
2998
    case TYPE_SPR:
2999
    case TYPE_NOP:
3000
    case TYPE_FXB:
3001
    case TYPE_FPD:
3002
    case TYPE_FP6:
3003
    case TYPE_FP7:
3004
      return 0;
3005
 
3006
    case TYPE_LNOP:
3007
    case TYPE_SHUF:
3008
    case TYPE_LOAD:
3009
    case TYPE_STORE:
3010
    case TYPE_BR:
3011
    case TYPE_MULTI1:
3012
    case TYPE_HBR:
3013
    case TYPE_IPREFETCH:
3014
      return 1;
3015
    default:
3016
      abort ();
3017
    }
3018
}
3019
 
3020
 
3021
/* haifa-sched.c has a static variable that keeps track of the current
3022
   cycle.  It is passed to spu_sched_reorder, and we record it here for
3023
   use by spu_sched_variable_issue.  It won't be accurate if the
3024
   scheduler updates it's clock_var between the two calls. */
3025
static int clock_var;
3026
 
3027
/* This is used to keep track of insn alignment.  Set to 0 at the
3028
   beginning of each block and increased by the "length" attr of each
3029
   insn scheduled. */
3030
static int spu_sched_length;
3031
 
3032
/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3033
   ready list appropriately in spu_sched_reorder(). */
3034
static int pipe0_clock;
3035
static int pipe1_clock;
3036
 
3037
static int prev_clock_var;
3038
 
3039
static int prev_priority;
3040
 
3041
/* The SPU needs to load the next ilb sometime during the execution of
3042
   the previous ilb.  There is a potential conflict if every cycle has a
3043
   load or store.  To avoid the conflict we make sure the load/store
3044
   unit is free for at least one cycle during the execution of insns in
3045
   the previous ilb. */
3046
static int spu_ls_first;
3047
static int prev_ls_clock;
3048
 
3049
static void
3050
spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3051
                       int max_ready ATTRIBUTE_UNUSED)
3052
{
3053
  spu_sched_length = 0;
3054
}
3055
 
3056
static void
3057
spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3058
                int max_ready ATTRIBUTE_UNUSED)
3059
{
3060
  if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3061
    {
3062
      /* When any block might be at least 8-byte aligned, assume they
3063
         will all be at least 8-byte aligned to make sure dual issue
3064
         works out correctly. */
3065
      spu_sched_length = 0;
3066
    }
3067
  spu_ls_first = INT_MAX;
3068
  clock_var = -1;
3069
  prev_ls_clock = -1;
3070
  pipe0_clock = -1;
3071
  pipe1_clock = -1;
3072
  prev_clock_var = -1;
3073
  prev_priority = -1;
3074
}
3075
 
3076
static int
3077
spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3078
                          int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3079
{
3080
  int len;
3081
  int p;
3082
  if (GET_CODE (PATTERN (insn)) == USE
3083
      || GET_CODE (PATTERN (insn)) == CLOBBER
3084
      || (len = get_attr_length (insn)) == 0)
3085
    return more;
3086
 
3087
  spu_sched_length += len;
3088
 
3089
  /* Reset on inline asm */
3090
  if (INSN_CODE (insn) == -1)
3091
    {
3092
      spu_ls_first = INT_MAX;
3093
      pipe0_clock = -1;
3094
      pipe1_clock = -1;
3095
      return 0;
3096
    }
3097
  p = get_pipe (insn);
3098
  if (p == 0)
3099
    pipe0_clock = clock_var;
3100
  else
3101
    pipe1_clock = clock_var;
3102
 
3103
  if (in_spu_reorg)
3104
    {
3105
      if (clock_var - prev_ls_clock > 1
3106
          || INSN_CODE (insn) == CODE_FOR_iprefetch)
3107
        spu_ls_first = INT_MAX;
3108
      if (uses_ls_unit (insn))
3109
        {
3110
          if (spu_ls_first == INT_MAX)
3111
            spu_ls_first = spu_sched_length;
3112
          prev_ls_clock = clock_var;
3113
        }
3114
 
3115
      /* The scheduler hasn't inserted the nop, but we will later on.
3116
         Include those nops in spu_sched_length. */
3117
      if (prev_clock_var == clock_var && (spu_sched_length & 7))
3118
        spu_sched_length += 4;
3119
      prev_clock_var = clock_var;
3120
 
3121
      /* more is -1 when called from spu_sched_reorder for new insns
3122
         that don't have INSN_PRIORITY */
3123
      if (more >= 0)
3124
        prev_priority = INSN_PRIORITY (insn);
3125
    }
3126
 
3127
  /* Always try issueing more insns.  spu_sched_reorder will decide
3128
     when the cycle should be advanced. */
3129
  return 1;
3130
}
3131
 
3132
/* This function is called for both TARGET_SCHED_REORDER and
3133
   TARGET_SCHED_REORDER2.  */
3134
static int
3135
spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3136
                   rtx *ready, int *nreadyp, int clock)
3137
{
3138
  int i, nready = *nreadyp;
3139
  int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3140
  rtx insn;
3141
 
3142
  clock_var = clock;
3143
 
3144
  if (nready <= 0 || pipe1_clock >= clock)
3145
    return 0;
3146
 
3147
  /* Find any rtl insns that don't generate assembly insns and schedule
3148
     them first. */
3149
  for (i = nready - 1; i >= 0; i--)
3150
    {
3151
      insn = ready[i];
3152
      if (INSN_CODE (insn) == -1
3153
          || INSN_CODE (insn) == CODE_FOR_blockage
3154
          || (INSN_P (insn) && get_attr_length (insn) == 0))
3155
        {
3156
          ready[i] = ready[nready - 1];
3157
          ready[nready - 1] = insn;
3158
          return 1;
3159
        }
3160
    }
3161
 
3162
  pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3163
  for (i = 0; i < nready; i++)
3164
    if (INSN_CODE (ready[i]) != -1)
3165
      {
3166
        insn = ready[i];
3167
        switch (get_attr_type (insn))
3168
          {
3169
          default:
3170
          case TYPE_MULTI0:
3171
          case TYPE_CONVERT:
3172
          case TYPE_FX2:
3173
          case TYPE_FX3:
3174
          case TYPE_SPR:
3175
          case TYPE_NOP:
3176
          case TYPE_FXB:
3177
          case TYPE_FPD:
3178
          case TYPE_FP6:
3179
          case TYPE_FP7:
3180
            pipe_0 = i;
3181
            break;
3182
          case TYPE_LOAD:
3183
          case TYPE_STORE:
3184
            pipe_ls = i;
3185
          case TYPE_LNOP:
3186
          case TYPE_SHUF:
3187
          case TYPE_BR:
3188
          case TYPE_MULTI1:
3189
          case TYPE_HBR:
3190
            pipe_1 = i;
3191
            break;
3192
          case TYPE_IPREFETCH:
3193
            pipe_hbrp = i;
3194
            break;
3195
          }
3196
      }
3197
 
3198
  /* In the first scheduling phase, schedule loads and stores together
3199
     to increase the chance they will get merged during postreload CSE. */
3200
  if (!reload_completed && pipe_ls >= 0)
3201
    {
3202
      insn = ready[pipe_ls];
3203
      ready[pipe_ls] = ready[nready - 1];
3204
      ready[nready - 1] = insn;
3205
      return 1;
3206
    }
3207
 
3208
  /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3209
  if (pipe_hbrp >= 0)
3210
    pipe_1 = pipe_hbrp;
3211
 
3212
  /* When we have loads/stores in every cycle of the last 15 insns and
3213
     we are about to schedule another load/store, emit an hbrp insn
3214
     instead. */
3215
  if (in_spu_reorg
3216
      && spu_sched_length - spu_ls_first >= 4 * 15
3217
      && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3218
    {
3219
      insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3220
      recog_memoized (insn);
3221
      if (pipe0_clock < clock)
3222
        PUT_MODE (insn, TImode);
3223
      spu_sched_variable_issue (file, verbose, insn, -1);
3224
      return 0;
3225
    }
3226
 
3227
  /* In general, we want to emit nops to increase dual issue, but dual
3228
     issue isn't faster when one of the insns could be scheduled later
3229
     without effecting the critical path.  We look at INSN_PRIORITY to
3230
     make a good guess, but it isn't perfect so -mdual-nops=n can be
3231
     used to effect it. */
3232
  if (in_spu_reorg && spu_dual_nops < 10)
3233
    {
3234
      /* When we are at an even address and we are not issueing nops to
3235
         improve scheduling then we need to advance the cycle.  */
3236
      if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3237
          && (spu_dual_nops == 0
3238
              || (pipe_1 != -1
3239
                  && prev_priority >
3240
                  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3241
        return 0;
3242
 
3243
      /* When at an odd address, schedule the highest priority insn
3244
         without considering pipeline. */
3245
      if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3246
          && (spu_dual_nops == 0
3247
              || (prev_priority >
3248
                  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3249
        return 1;
3250
    }
3251
 
3252
 
3253
  /* We haven't issued a pipe0 insn yet this cycle, if there is a
3254
     pipe0 insn in the ready list, schedule it. */
3255
  if (pipe0_clock < clock && pipe_0 >= 0)
3256
    schedule_i = pipe_0;
3257
 
3258
  /* Either we've scheduled a pipe0 insn already or there is no pipe0
3259
     insn to schedule.  Put a pipe1 insn at the front of the ready list. */
3260
  else
3261
    schedule_i = pipe_1;
3262
 
3263
  if (schedule_i > -1)
3264
    {
3265
      insn = ready[schedule_i];
3266
      ready[schedule_i] = ready[nready - 1];
3267
      ready[nready - 1] = insn;
3268
      return 1;
3269
    }
3270
  return 0;
3271
}
3272
 
3273
/* INSN is dependent on DEP_INSN. */
3274
static int
3275
spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3276
{
3277
  rtx set;
3278
 
3279
  /* The blockage pattern is used to prevent instructions from being
3280
     moved across it and has no cost. */
3281
  if (INSN_CODE (insn) == CODE_FOR_blockage
3282
      || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3283
    return 0;
3284
 
3285
  if ((INSN_P (insn) && get_attr_length (insn) == 0)
3286
      || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3287
    return 0;
3288
 
3289
  /* Make sure hbrps are spread out. */
3290
  if (INSN_CODE (insn) == CODE_FOR_iprefetch
3291
      && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3292
    return 8;
3293
 
3294
  /* Make sure hints and hbrps are 2 cycles apart. */
3295
  if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3296
       || INSN_CODE (insn) == CODE_FOR_hbr)
3297
       && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3298
           || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3299
    return 2;
3300
 
3301
  /* An hbrp has no real dependency on other insns. */
3302
  if (INSN_CODE (insn) == CODE_FOR_iprefetch
3303
      || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3304
    return 0;
3305
 
3306
  /* Assuming that it is unlikely an argument register will be used in
3307
     the first cycle of the called function, we reduce the cost for
3308
     slightly better scheduling of dep_insn.  When not hinted, the
3309
     mispredicted branch would hide the cost as well.  */
3310
  if (CALL_P (insn))
3311
  {
3312
    rtx target = get_branch_target (insn);
3313
    if (GET_CODE (target) != REG || !set_of (target, insn))
3314
      return cost - 2;
3315
    return cost;
3316
  }
3317
 
3318
  /* And when returning from a function, let's assume the return values
3319
     are completed sooner too. */
3320
  if (CALL_P (dep_insn))
3321
    return cost - 2;
3322
 
3323
  /* Make sure an instruction that loads from the back chain is schedule
3324
     away from the return instruction so a hint is more likely to get
3325
     issued. */
3326
  if (INSN_CODE (insn) == CODE_FOR__return
3327
      && (set = single_set (dep_insn))
3328
      && GET_CODE (SET_DEST (set)) == REG
3329
      && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3330
    return 20;
3331
 
3332
  /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3333
     scheduler makes every insn in a block anti-dependent on the final
3334
     jump_insn.  We adjust here so higher cost insns will get scheduled
3335
     earlier. */
3336
  if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3337
    return insn_cost (dep_insn) - 3;
3338
 
3339
  return cost;
3340
}
3341
 
3342
/* Create a CONST_DOUBLE from a string.  */
3343
rtx
3344
spu_float_const (const char *string, enum machine_mode mode)
3345
{
3346
  REAL_VALUE_TYPE value;
3347
  value = REAL_VALUE_ATOF (string, mode);
3348
  return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3349
}
3350
 
3351
int
3352
spu_constant_address_p (rtx x)
3353
{
3354
  return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3355
          || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3356
          || GET_CODE (x) == HIGH);
3357
}
3358
 
3359
static enum spu_immediate
3360
which_immediate_load (HOST_WIDE_INT val)
3361
{
3362
  gcc_assert (val == trunc_int_for_mode (val, SImode));
3363
 
3364
  if (val >= -0x8000 && val <= 0x7fff)
3365
    return SPU_IL;
3366
  if (val >= 0 && val <= 0x3ffff)
3367
    return SPU_ILA;
3368
  if ((val & 0xffff) == ((val >> 16) & 0xffff))
3369
    return SPU_ILH;
3370
  if ((val & 0xffff) == 0)
3371
    return SPU_ILHU;
3372
 
3373
  return SPU_NONE;
3374
}
3375
 
3376
/* Return true when OP can be loaded by one of the il instructions, or
3377
   when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3378
int
3379
immediate_load_p (rtx op, enum machine_mode mode)
3380
{
3381
  if (CONSTANT_P (op))
3382
    {
3383
      enum immediate_class c = classify_immediate (op, mode);
3384
      return c == IC_IL1 || c == IC_IL1s
3385
             || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3386
    }
3387
  return 0;
3388
}
3389
 
3390
/* Return true if the first SIZE bytes of arr is a constant that can be
3391
   generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3392
   represent the size and offset of the instruction to use. */
3393
static int
3394
cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3395
{
3396
  int cpat, run, i, start;
3397
  cpat = 1;
3398
  run = 0;
3399
  start = -1;
3400
  for (i = 0; i < size && cpat; i++)
3401
    if (arr[i] != i+16)
3402
      {
3403
        if (!run)
3404
          {
3405
            start = i;
3406
            if (arr[i] == 3)
3407
              run = 1;
3408
            else if (arr[i] == 2 && arr[i+1] == 3)
3409
              run = 2;
3410
            else if (arr[i] == 0)
3411
              {
3412
                while (arr[i+run] == run && i+run < 16)
3413
                  run++;
3414
                if (run != 4 && run != 8)
3415
                  cpat = 0;
3416
              }
3417
            else
3418
              cpat = 0;
3419
            if ((i & (run-1)) != 0)
3420
              cpat = 0;
3421
            i += run;
3422
          }
3423
        else
3424
          cpat = 0;
3425
      }
3426
  if (cpat && (run || size < 16))
3427
    {
3428
      if (run == 0)
3429
        run = 1;
3430
      if (prun)
3431
        *prun = run;
3432
      if (pstart)
3433
        *pstart = start == -1 ? 16-run : start;
3434
      return 1;
3435
    }
3436
  return 0;
3437
}
3438
 
3439
/* OP is a CONSTANT_P.  Determine what instructions can be used to load
3440
   it into a register.  MODE is only valid when OP is a CONST_INT. */
3441
static enum immediate_class
3442
classify_immediate (rtx op, enum machine_mode mode)
3443
{
3444
  HOST_WIDE_INT val;
3445
  unsigned char arr[16];
3446
  int i, j, repeated, fsmbi, repeat;
3447
 
3448
  gcc_assert (CONSTANT_P (op));
3449
 
3450
  if (GET_MODE (op) != VOIDmode)
3451
    mode = GET_MODE (op);
3452
 
3453
  /* A V4SI const_vector with all identical symbols is ok. */
3454
  if (!flag_pic
3455
      && mode == V4SImode
3456
      && GET_CODE (op) == CONST_VECTOR
3457
      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3458
      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3459
      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3460
      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3461
      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3462
    op = CONST_VECTOR_ELT (op, 0);
3463
 
3464
  switch (GET_CODE (op))
3465
    {
3466
    case SYMBOL_REF:
3467
    case LABEL_REF:
3468
      return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3469
 
3470
    case CONST:
3471
      /* We can never know if the resulting address fits in 18 bits and can be
3472
         loaded with ila.  For now, assume the address will not overflow if
3473
         the displacement is "small" (fits 'K' constraint).  */
3474
      if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3475
        {
3476
          rtx sym = XEXP (XEXP (op, 0), 0);
3477
          rtx cst = XEXP (XEXP (op, 0), 1);
3478
 
3479
          if (GET_CODE (sym) == SYMBOL_REF
3480
              && GET_CODE (cst) == CONST_INT
3481
              && satisfies_constraint_K (cst))
3482
            return IC_IL1s;
3483
        }
3484
      return IC_IL2s;
3485
 
3486
    case HIGH:
3487
      return IC_IL1s;
3488
 
3489
    case CONST_VECTOR:
3490
      for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3491
        if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3492
            && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3493
          return IC_POOL;
3494
      /* Fall through. */
3495
 
3496
    case CONST_INT:
3497
    case CONST_DOUBLE:
3498
      constant_to_array (mode, op, arr);
3499
 
3500
      /* Check that each 4-byte slot is identical. */
3501
      repeated = 1;
3502
      for (i = 4; i < 16; i += 4)
3503
        for (j = 0; j < 4; j++)
3504
          if (arr[j] != arr[i + j])
3505
            repeated = 0;
3506
 
3507
      if (repeated)
3508
        {
3509
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3510
          val = trunc_int_for_mode (val, SImode);
3511
 
3512
          if (which_immediate_load (val) != SPU_NONE)
3513
            return IC_IL1;
3514
        }
3515
 
3516
      /* Any mode of 2 bytes or smaller can be loaded with an il
3517
         instruction. */
3518
      gcc_assert (GET_MODE_SIZE (mode) > 2);
3519
 
3520
      fsmbi = 1;
3521
      repeat = 0;
3522
      for (i = 0; i < 16 && fsmbi; i++)
3523
        if (arr[i] != 0 && repeat == 0)
3524
          repeat = arr[i];
3525
        else if (arr[i] != 0 && arr[i] != repeat)
3526
          fsmbi = 0;
3527
      if (fsmbi)
3528
        return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3529
 
3530
      if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3531
        return IC_CPAT;
3532
 
3533
      if (repeated)
3534
        return IC_IL2;
3535
 
3536
      return IC_POOL;
3537
    default:
3538
      break;
3539
    }
3540
  gcc_unreachable ();
3541
}
3542
 
3543
static enum spu_immediate
3544
which_logical_immediate (HOST_WIDE_INT val)
3545
{
3546
  gcc_assert (val == trunc_int_for_mode (val, SImode));
3547
 
3548
  if (val >= -0x200 && val <= 0x1ff)
3549
    return SPU_ORI;
3550
  if (val >= 0 && val <= 0xffff)
3551
    return SPU_IOHL;
3552
  if ((val & 0xffff) == ((val >> 16) & 0xffff))
3553
    {
3554
      val = trunc_int_for_mode (val, HImode);
3555
      if (val >= -0x200 && val <= 0x1ff)
3556
        return SPU_ORHI;
3557
      if ((val & 0xff) == ((val >> 8) & 0xff))
3558
        {
3559
          val = trunc_int_for_mode (val, QImode);
3560
          if (val >= -0x200 && val <= 0x1ff)
3561
            return SPU_ORBI;
3562
        }
3563
    }
3564
  return SPU_NONE;
3565
}
3566
 
3567
/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3568
   CONST_DOUBLEs. */
3569
static int
3570
const_vector_immediate_p (rtx x)
3571
{
3572
  int i;
3573
  gcc_assert (GET_CODE (x) == CONST_VECTOR);
3574
  for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3575
    if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3576
        && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3577
      return 0;
3578
  return 1;
3579
}
3580
 
3581
int
3582
logical_immediate_p (rtx op, enum machine_mode mode)
3583
{
3584
  HOST_WIDE_INT val;
3585
  unsigned char arr[16];
3586
  int i, j;
3587
 
3588
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3589
              || GET_CODE (op) == CONST_VECTOR);
3590
 
3591
  if (GET_CODE (op) == CONST_VECTOR
3592
      && !const_vector_immediate_p (op))
3593
    return 0;
3594
 
3595
  if (GET_MODE (op) != VOIDmode)
3596
    mode = GET_MODE (op);
3597
 
3598
  constant_to_array (mode, op, arr);
3599
 
3600
  /* Check that bytes are repeated. */
3601
  for (i = 4; i < 16; i += 4)
3602
    for (j = 0; j < 4; j++)
3603
      if (arr[j] != arr[i + j])
3604
        return 0;
3605
 
3606
  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3607
  val = trunc_int_for_mode (val, SImode);
3608
 
3609
  i = which_logical_immediate (val);
3610
  return i != SPU_NONE && i != SPU_IOHL;
3611
}
3612
 
3613
int
3614
iohl_immediate_p (rtx op, enum machine_mode mode)
3615
{
3616
  HOST_WIDE_INT val;
3617
  unsigned char arr[16];
3618
  int i, j;
3619
 
3620
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3621
              || GET_CODE (op) == CONST_VECTOR);
3622
 
3623
  if (GET_CODE (op) == CONST_VECTOR
3624
      && !const_vector_immediate_p (op))
3625
    return 0;
3626
 
3627
  if (GET_MODE (op) != VOIDmode)
3628
    mode = GET_MODE (op);
3629
 
3630
  constant_to_array (mode, op, arr);
3631
 
3632
  /* Check that bytes are repeated. */
3633
  for (i = 4; i < 16; i += 4)
3634
    for (j = 0; j < 4; j++)
3635
      if (arr[j] != arr[i + j])
3636
        return 0;
3637
 
3638
  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3639
  val = trunc_int_for_mode (val, SImode);
3640
 
3641
  return val >= 0 && val <= 0xffff;
3642
}
3643
 
3644
int
3645
arith_immediate_p (rtx op, enum machine_mode mode,
3646
                   HOST_WIDE_INT low, HOST_WIDE_INT high)
3647
{
3648
  HOST_WIDE_INT val;
3649
  unsigned char arr[16];
3650
  int bytes, i, j;
3651
 
3652
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3653
              || GET_CODE (op) == CONST_VECTOR);
3654
 
3655
  if (GET_CODE (op) == CONST_VECTOR
3656
      && !const_vector_immediate_p (op))
3657
    return 0;
3658
 
3659
  if (GET_MODE (op) != VOIDmode)
3660
    mode = GET_MODE (op);
3661
 
3662
  constant_to_array (mode, op, arr);
3663
 
3664
  if (VECTOR_MODE_P (mode))
3665
    mode = GET_MODE_INNER (mode);
3666
 
3667
  bytes = GET_MODE_SIZE (mode);
3668
  mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3669
 
3670
  /* Check that bytes are repeated. */
3671
  for (i = bytes; i < 16; i += bytes)
3672
    for (j = 0; j < bytes; j++)
3673
      if (arr[j] != arr[i + j])
3674
        return 0;
3675
 
3676
  val = arr[0];
3677
  for (j = 1; j < bytes; j++)
3678
    val = (val << 8) | arr[j];
3679
 
3680
  val = trunc_int_for_mode (val, mode);
3681
 
3682
  return val >= low && val <= high;
3683
}
3684
 
3685
/* TRUE when op is an immediate and an exact power of 2, and given that
3686
   OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3687
   all entries must be the same. */
3688
bool
3689
exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3690
{
3691
  enum machine_mode int_mode;
3692
  HOST_WIDE_INT val;
3693
  unsigned char arr[16];
3694
  int bytes, i, j;
3695
 
3696
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3697
              || GET_CODE (op) == CONST_VECTOR);
3698
 
3699
  if (GET_CODE (op) == CONST_VECTOR
3700
      && !const_vector_immediate_p (op))
3701
    return 0;
3702
 
3703
  if (GET_MODE (op) != VOIDmode)
3704
    mode = GET_MODE (op);
3705
 
3706
  constant_to_array (mode, op, arr);
3707
 
3708
  if (VECTOR_MODE_P (mode))
3709
    mode = GET_MODE_INNER (mode);
3710
 
3711
  bytes = GET_MODE_SIZE (mode);
3712
  int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3713
 
3714
  /* Check that bytes are repeated. */
3715
  for (i = bytes; i < 16; i += bytes)
3716
    for (j = 0; j < bytes; j++)
3717
      if (arr[j] != arr[i + j])
3718
        return 0;
3719
 
3720
  val = arr[0];
3721
  for (j = 1; j < bytes; j++)
3722
    val = (val << 8) | arr[j];
3723
 
3724
  val = trunc_int_for_mode (val, int_mode);
3725
 
3726
  /* Currently, we only handle SFmode */
3727
  gcc_assert (mode == SFmode);
3728
  if (mode == SFmode)
3729
    {
3730
      int exp = (val >> 23) - 127;
3731
      return val > 0 && (val & 0x007fffff) == 0
3732
             &&  exp >= low && exp <= high;
3733
    }
3734
  return FALSE;
3735
}
3736
 
3737
/* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3738
 
3739
static int
3740
ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3741
{
3742
  rtx x = *px;
3743
  tree decl;
3744
 
3745
  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3746
    {
3747
      rtx plus = XEXP (x, 0);
3748
      rtx op0 = XEXP (plus, 0);
3749
      rtx op1 = XEXP (plus, 1);
3750
      if (GET_CODE (op1) == CONST_INT)
3751
        x = op0;
3752
    }
3753
 
3754
  return (GET_CODE (x) == SYMBOL_REF
3755
          && (decl = SYMBOL_REF_DECL (x)) != 0
3756
          && TREE_CODE (decl) == VAR_DECL
3757
          && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3758
}
3759
 
3760
/* We accept:
3761
   - any 32-bit constant (SImode, SFmode)
3762
   - any constant that can be generated with fsmbi (any mode)
3763
   - a 64-bit constant where the high and low bits are identical
3764
     (DImode, DFmode)
3765
   - a 128-bit constant where the four 32-bit words match.  */
3766
bool
3767
spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3768
{
3769
  if (GET_CODE (x) == HIGH)
3770
    x = XEXP (x, 0);
3771
 
3772
  /* Reject any __ea qualified reference.  These can't appear in
3773
     instructions but must be forced to the constant pool.  */
3774
  if (for_each_rtx (&x, ea_symbol_ref, 0))
3775
    return 0;
3776
 
3777
  /* V4SI with all identical symbols is valid. */
3778
  if (!flag_pic
3779
      && mode == V4SImode
3780
      && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3781
          || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3782
          || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3783
    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3784
           && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3785
           && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3786
 
3787
  if (GET_CODE (x) == CONST_VECTOR
3788
      && !const_vector_immediate_p (x))
3789
    return 0;
3790
  return 1;
3791
}
3792
 
3793
/* Valid address are:
3794
   - symbol_ref, label_ref, const
3795
   - reg
3796
   - reg + const_int, where const_int is 16 byte aligned
3797
   - reg + reg, alignment doesn't matter
3798
  The alignment matters in the reg+const case because lqd and stqd
3799
  ignore the 4 least significant bits of the const.  We only care about
3800
  16 byte modes because the expand phase will change all smaller MEM
3801
  references to TImode.  */
3802
static bool
3803
spu_legitimate_address_p (enum machine_mode mode,
3804
                          rtx x, bool reg_ok_strict)
3805
{
3806
  int aligned = GET_MODE_SIZE (mode) >= 16;
3807
  if (aligned
3808
      && GET_CODE (x) == AND
3809
      && GET_CODE (XEXP (x, 1)) == CONST_INT
3810
      && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3811
    x = XEXP (x, 0);
3812
  switch (GET_CODE (x))
3813
    {
3814
    case LABEL_REF:
3815
      return !TARGET_LARGE_MEM;
3816
 
3817
    case SYMBOL_REF:
3818
    case CONST:
3819
      /* Keep __ea references until reload so that spu_expand_mov can see them
3820
         in MEMs.  */
3821
      if (ea_symbol_ref (&x, 0))
3822
        return !reload_in_progress && !reload_completed;
3823
      return !TARGET_LARGE_MEM;
3824
 
3825
    case CONST_INT:
3826
      return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3827
 
3828
    case SUBREG:
3829
      x = XEXP (x, 0);
3830
      if (REG_P (x))
3831
        return 0;
3832
 
3833
    case REG:
3834
      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3835
 
3836
    case PLUS:
3837
    case LO_SUM:
3838
      {
3839
        rtx op0 = XEXP (x, 0);
3840
        rtx op1 = XEXP (x, 1);
3841
        if (GET_CODE (op0) == SUBREG)
3842
          op0 = XEXP (op0, 0);
3843
        if (GET_CODE (op1) == SUBREG)
3844
          op1 = XEXP (op1, 0);
3845
        if (GET_CODE (op0) == REG
3846
            && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3847
            && GET_CODE (op1) == CONST_INT
3848
            && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3849
                /* If virtual registers are involved, the displacement will
3850
                   change later on anyway, so checking would be premature.
3851
                   Reload will make sure the final displacement after
3852
                   register elimination is OK.  */
3853
                || op0 == arg_pointer_rtx
3854
                || op0 == frame_pointer_rtx
3855
                || op0 == virtual_stack_vars_rtx)
3856
            && (!aligned || (INTVAL (op1) & 15) == 0))
3857
          return TRUE;
3858
        if (GET_CODE (op0) == REG
3859
            && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3860
            && GET_CODE (op1) == REG
3861
            && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3862
          return TRUE;
3863
      }
3864
      break;
3865
 
3866
    default:
3867
      break;
3868
    }
3869
  return FALSE;
3870
}
3871
 
3872
/* Like spu_legitimate_address_p, except with named addresses.  */
3873
static bool
3874
spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3875
                                     bool reg_ok_strict, addr_space_t as)
3876
{
3877
  if (as == ADDR_SPACE_EA)
3878
    return (REG_P (x) && (GET_MODE (x) == EAmode));
3879
 
3880
  else if (as != ADDR_SPACE_GENERIC)
3881
    gcc_unreachable ();
3882
 
3883
  return spu_legitimate_address_p (mode, x, reg_ok_strict);
3884
}
3885
 
3886
/* When the address is reg + const_int, force the const_int into a
3887
   register.  */
3888
rtx
3889
spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3890
                        enum machine_mode mode ATTRIBUTE_UNUSED)
3891
{
3892
  rtx op0, op1;
3893
  /* Make sure both operands are registers.  */
3894
  if (GET_CODE (x) == PLUS)
3895
    {
3896
      op0 = XEXP (x, 0);
3897
      op1 = XEXP (x, 1);
3898
      if (ALIGNED_SYMBOL_REF_P (op0))
3899
        {
3900
          op0 = force_reg (Pmode, op0);
3901
          mark_reg_pointer (op0, 128);
3902
        }
3903
      else if (GET_CODE (op0) != REG)
3904
        op0 = force_reg (Pmode, op0);
3905
      if (ALIGNED_SYMBOL_REF_P (op1))
3906
        {
3907
          op1 = force_reg (Pmode, op1);
3908
          mark_reg_pointer (op1, 128);
3909
        }
3910
      else if (GET_CODE (op1) != REG)
3911
        op1 = force_reg (Pmode, op1);
3912
      x = gen_rtx_PLUS (Pmode, op0, op1);
3913
    }
3914
  return x;
3915
}
3916
 
3917
/* Like spu_legitimate_address, except with named address support.  */
3918
static rtx
3919
spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3920
                                   addr_space_t as)
3921
{
3922
  if (as != ADDR_SPACE_GENERIC)
3923
    return x;
3924
 
3925
  return spu_legitimize_address (x, oldx, mode);
3926
}
3927
 
3928
/* Reload reg + const_int for out-of-range displacements.  */
3929
rtx
3930
spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3931
                               int opnum, int type)
3932
{
3933
  bool removed_and = false;
3934
 
3935
  if (GET_CODE (ad) == AND
3936
      && CONST_INT_P (XEXP (ad, 1))
3937
      && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3938
    {
3939
      ad = XEXP (ad, 0);
3940
      removed_and = true;
3941
    }
3942
 
3943
  if (GET_CODE (ad) == PLUS
3944
      && REG_P (XEXP (ad, 0))
3945
      && CONST_INT_P (XEXP (ad, 1))
3946
      && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3947
           && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3948
    {
3949
      /* Unshare the sum.  */
3950
      ad = copy_rtx (ad);
3951
 
3952
      /* Reload the displacement.  */
3953
      push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3954
                   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3955
                   opnum, (enum reload_type) type);
3956
 
3957
      /* Add back AND for alignment if we stripped it.  */
3958
      if (removed_and)
3959
        ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3960
 
3961
      return ad;
3962
    }
3963
 
3964
  return NULL_RTX;
3965
}
3966
 
3967
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3968
   struct attribute_spec.handler.  */
3969
static tree
3970
spu_handle_fndecl_attribute (tree * node,
3971
                             tree name,
3972
                             tree args ATTRIBUTE_UNUSED,
3973
                             int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3974
{
3975
  if (TREE_CODE (*node) != FUNCTION_DECL)
3976
    {
3977
      warning (0, "%qE attribute only applies to functions",
3978
               name);
3979
      *no_add_attrs = true;
3980
    }
3981
 
3982
  return NULL_TREE;
3983
}
3984
 
3985
/* Handle the "vector" attribute.  */
3986
static tree
3987
spu_handle_vector_attribute (tree * node, tree name,
3988
                             tree args ATTRIBUTE_UNUSED,
3989
                             int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3990
{
3991
  tree type = *node, result = NULL_TREE;
3992
  enum machine_mode mode;
3993
  int unsigned_p;
3994
 
3995
  while (POINTER_TYPE_P (type)
3996
         || TREE_CODE (type) == FUNCTION_TYPE
3997
         || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3998
    type = TREE_TYPE (type);
3999
 
4000
  mode = TYPE_MODE (type);
4001
 
4002
  unsigned_p = TYPE_UNSIGNED (type);
4003
  switch (mode)
4004
    {
4005
    case DImode:
4006
      result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
4007
      break;
4008
    case SImode:
4009
      result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
4010
      break;
4011
    case HImode:
4012
      result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
4013
      break;
4014
    case QImode:
4015
      result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
4016
      break;
4017
    case SFmode:
4018
      result = V4SF_type_node;
4019
      break;
4020
    case DFmode:
4021
      result = V2DF_type_node;
4022
      break;
4023
    default:
4024
      break;
4025
    }
4026
 
4027
  /* Propagate qualifiers attached to the element type
4028
     onto the vector type.  */
4029
  if (result && result != type && TYPE_QUALS (type))
4030
    result = build_qualified_type (result, TYPE_QUALS (type));
4031
 
4032
  *no_add_attrs = true;         /* No need to hang on to the attribute.  */
4033
 
4034
  if (!result)
4035
    warning (0, "%qE attribute ignored", name);
4036
  else
4037
    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
4038
 
4039
  return NULL_TREE;
4040
}
4041
 
4042
/* Return nonzero if FUNC is a naked function.  */
4043
static int
4044
spu_naked_function_p (tree func)
4045
{
4046
  tree a;
4047
 
4048
  if (TREE_CODE (func) != FUNCTION_DECL)
4049
    abort ();
4050
 
4051
  a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
4052
  return a != NULL_TREE;
4053
}
4054
 
4055
int
4056
spu_initial_elimination_offset (int from, int to)
4057
{
4058
  int saved_regs_size = spu_saved_regs_size ();
4059
  int sp_offset = 0;
4060
  if (!current_function_is_leaf || crtl->outgoing_args_size
4061
      || get_frame_size () || saved_regs_size)
4062
    sp_offset = STACK_POINTER_OFFSET;
4063
  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4064
    return get_frame_size () + crtl->outgoing_args_size + sp_offset;
4065
  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4066
    return get_frame_size ();
4067
  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4068
    return sp_offset + crtl->outgoing_args_size
4069
      + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
4070
  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4071
    return get_frame_size () + saved_regs_size + sp_offset;
4072
  else
4073
    gcc_unreachable ();
4074
}
4075
 
4076
rtx
4077
spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
4078
{
4079
  enum machine_mode mode = TYPE_MODE (type);
4080
  int byte_size = ((mode == BLKmode)
4081
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4082
 
4083
  /* Make sure small structs are left justified in a register. */
4084
  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4085
      && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4086
    {
4087
      enum machine_mode smode;
4088
      rtvec v;
4089
      int i;
4090
      int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4091
      int n = byte_size / UNITS_PER_WORD;
4092
      v = rtvec_alloc (nregs);
4093
      for (i = 0; i < n; i++)
4094
        {
4095
          RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4096
                                                gen_rtx_REG (TImode,
4097
                                                             FIRST_RETURN_REGNUM
4098
                                                             + i),
4099
                                                GEN_INT (UNITS_PER_WORD * i));
4100
          byte_size -= UNITS_PER_WORD;
4101
        }
4102
 
4103
      if (n < nregs)
4104
        {
4105
          if (byte_size < 4)
4106
            byte_size = 4;
4107
          smode =
4108
            smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4109
          RTVEC_ELT (v, n) =
4110
            gen_rtx_EXPR_LIST (VOIDmode,
4111
                               gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4112
                               GEN_INT (UNITS_PER_WORD * n));
4113
        }
4114
      return gen_rtx_PARALLEL (mode, v);
4115
    }
4116
  return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4117
}
4118
 
4119
static rtx
4120
spu_function_arg (cumulative_args_t cum_v,
4121
                  enum machine_mode mode,
4122
                  const_tree type, bool named ATTRIBUTE_UNUSED)
4123
{
4124
  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4125
  int byte_size;
4126
 
4127
  if (*cum >= MAX_REGISTER_ARGS)
4128
    return 0;
4129
 
4130
  byte_size = ((mode == BLKmode)
4131
               ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4132
 
4133
  /* The ABI does not allow parameters to be passed partially in
4134
     reg and partially in stack. */
4135
  if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4136
    return 0;
4137
 
4138
  /* Make sure small structs are left justified in a register. */
4139
  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4140
      && byte_size < UNITS_PER_WORD && byte_size > 0)
4141
    {
4142
      enum machine_mode smode;
4143
      rtx gr_reg;
4144
      if (byte_size < 4)
4145
        byte_size = 4;
4146
      smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4147
      gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4148
                                  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
4149
                                  const0_rtx);
4150
      return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4151
    }
4152
  else
4153
    return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
4154
}
4155
 
4156
static void
4157
spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
4158
                          const_tree type, bool named ATTRIBUTE_UNUSED)
4159
{
4160
  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4161
 
4162
  *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4163
           ? 1
4164
           : mode == BLKmode
4165
           ? ((int_size_in_bytes (type) + 15) / 16)
4166
           : mode == VOIDmode
4167
           ? 1
4168
           : HARD_REGNO_NREGS (cum, mode));
4169
}
4170
 
4171
/* Variable sized types are passed by reference.  */
4172
static bool
4173
spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4174
                       enum machine_mode mode ATTRIBUTE_UNUSED,
4175
                       const_tree type, bool named ATTRIBUTE_UNUSED)
4176
{
4177
  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4178
}
4179
 
4180
 
4181
/* Var args. */
4182
 
4183
/* Create and return the va_list datatype.
4184
 
4185
   On SPU, va_list is an array type equivalent to
4186
 
4187
      typedef struct __va_list_tag
4188
        {
4189
            void *__args __attribute__((__aligned(16)));
4190
            void *__skip __attribute__((__aligned(16)));
4191
 
4192
        } va_list[1];
4193
 
4194
   where __args points to the arg that will be returned by the next
4195
   va_arg(), and __skip points to the previous stack frame such that
4196
   when __args == __skip we should advance __args by 32 bytes. */
4197
static tree
4198
spu_build_builtin_va_list (void)
4199
{
4200
  tree f_args, f_skip, record, type_decl;
4201
  bool owp;
4202
 
4203
  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4204
 
4205
  type_decl =
4206
    build_decl (BUILTINS_LOCATION,
4207
                TYPE_DECL, get_identifier ("__va_list_tag"), record);
4208
 
4209
  f_args = build_decl (BUILTINS_LOCATION,
4210
                       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4211
  f_skip = build_decl (BUILTINS_LOCATION,
4212
                       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4213
 
4214
  DECL_FIELD_CONTEXT (f_args) = record;
4215
  DECL_ALIGN (f_args) = 128;
4216
  DECL_USER_ALIGN (f_args) = 1;
4217
 
4218
  DECL_FIELD_CONTEXT (f_skip) = record;
4219
  DECL_ALIGN (f_skip) = 128;
4220
  DECL_USER_ALIGN (f_skip) = 1;
4221
 
4222
  TYPE_STUB_DECL (record) = type_decl;
4223
  TYPE_NAME (record) = type_decl;
4224
  TYPE_FIELDS (record) = f_args;
4225
  DECL_CHAIN (f_args) = f_skip;
4226
 
4227
  /* We know this is being padded and we want it too.  It is an internal
4228
     type so hide the warnings from the user. */
4229
  owp = warn_padded;
4230
  warn_padded = false;
4231
 
4232
  layout_type (record);
4233
 
4234
  warn_padded = owp;
4235
 
4236
  /* The correct type is an array type of one element.  */
4237
  return build_array_type (record, build_index_type (size_zero_node));
4238
}
4239
 
4240
/* Implement va_start by filling the va_list structure VALIST.
4241
   NEXTARG points to the first anonymous stack argument.
4242
 
4243
   The following global variables are used to initialize
4244
   the va_list structure:
4245
 
4246
     crtl->args.info;
4247
       the CUMULATIVE_ARGS for this function
4248
 
4249
     crtl->args.arg_offset_rtx:
4250
       holds the offset of the first anonymous stack argument
4251
       (relative to the virtual arg pointer).  */
4252
 
4253
static void
4254
spu_va_start (tree valist, rtx nextarg)
4255
{
4256
  tree f_args, f_skip;
4257
  tree args, skip, t;
4258
 
4259
  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4260
  f_skip = DECL_CHAIN (f_args);
4261
 
4262
  valist = build_simple_mem_ref (valist);
4263
  args =
4264
    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4265
  skip =
4266
    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4267
 
4268
  /* Find the __args area.  */
4269
  t = make_tree (TREE_TYPE (args), nextarg);
4270
  if (crtl->args.pretend_args_size > 0)
4271
    t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4272
  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4273
  TREE_SIDE_EFFECTS (t) = 1;
4274
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4275
 
4276
  /* Find the __skip area.  */
4277
  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4278
  t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4279
                                       - STACK_POINTER_OFFSET));
4280
  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4281
  TREE_SIDE_EFFECTS (t) = 1;
4282
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4283
}
4284
 
4285
/* Gimplify va_arg by updating the va_list structure
4286
   VALIST as required to retrieve an argument of type
4287
   TYPE, and returning that argument.
4288
 
4289
   ret = va_arg(VALIST, TYPE);
4290
 
4291
   generates code equivalent to:
4292
 
4293
    paddedsize = (sizeof(TYPE) + 15) & -16;
4294
    if (VALIST.__args + paddedsize > VALIST.__skip
4295
        && VALIST.__args <= VALIST.__skip)
4296
      addr = VALIST.__skip + 32;
4297
    else
4298
      addr = VALIST.__args;
4299
    VALIST.__args = addr + paddedsize;
4300
    ret = *(TYPE *)addr;
4301
 */
4302
static tree
4303
spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4304
                          gimple_seq * post_p ATTRIBUTE_UNUSED)
4305
{
4306
  tree f_args, f_skip;
4307
  tree args, skip;
4308
  HOST_WIDE_INT size, rsize;
4309
  tree addr, tmp;
4310
  bool pass_by_reference_p;
4311
 
4312
  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4313
  f_skip = DECL_CHAIN (f_args);
4314
 
4315
  valist = build_simple_mem_ref (valist);
4316
  args =
4317
    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4318
  skip =
4319
    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4320
 
4321
  addr = create_tmp_var (ptr_type_node, "va_arg");
4322
 
4323
  /* if an object is dynamically sized, a pointer to it is passed
4324
     instead of the object itself. */
4325
  pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4326
                                           false);
4327
  if (pass_by_reference_p)
4328
    type = build_pointer_type (type);
4329
  size = int_size_in_bytes (type);
4330
  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4331
 
4332
  /* build conditional expression to calculate addr. The expression
4333
     will be gimplified later. */
4334
  tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4335
  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4336
                build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4337
                build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4338
                unshare_expr (skip)));
4339
 
4340
  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4341
                fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4342
                unshare_expr (args));
4343
 
4344
  gimplify_assign (addr, tmp, pre_p);
4345
 
4346
  /* update VALIST.__args */
4347
  tmp = fold_build_pointer_plus_hwi (addr, rsize);
4348
  gimplify_assign (unshare_expr (args), tmp, pre_p);
4349
 
4350
  addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4351
                       addr);
4352
 
4353
  if (pass_by_reference_p)
4354
    addr = build_va_arg_indirect_ref (addr);
4355
 
4356
  return build_va_arg_indirect_ref (addr);
4357
}
4358
 
4359
/* Save parameter registers starting with the register that corresponds
4360
   to the first unnamed parameters.  If the first unnamed parameter is
4361
   in the stack then save no registers.  Set pretend_args_size to the
4362
   amount of space needed to save the registers. */
4363
static void
4364
spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4365
                            tree type, int *pretend_size, int no_rtl)
4366
{
4367
  if (!no_rtl)
4368
    {
4369
      rtx tmp;
4370
      int regno;
4371
      int offset;
4372
      int ncum = *get_cumulative_args (cum);
4373
 
4374
      /* cum currently points to the last named argument, we want to
4375
         start at the next argument. */
4376
      spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4377
 
4378
      offset = -STACK_POINTER_OFFSET;
4379
      for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4380
        {
4381
          tmp = gen_frame_mem (V4SImode,
4382
                               plus_constant (virtual_incoming_args_rtx,
4383
                                              offset));
4384
          emit_move_insn (tmp,
4385
                          gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4386
          offset += 16;
4387
        }
4388
      *pretend_size = offset + STACK_POINTER_OFFSET;
4389
    }
4390
}
4391
 
4392
static void
4393
spu_conditional_register_usage (void)
4394
{
4395
  if (flag_pic)
4396
    {
4397
      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4398
      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4399
    }
4400
}
4401
 
4402
/* This is called any time we inspect the alignment of a register for
4403
   addresses.  */
4404
static int
4405
reg_aligned_for_addr (rtx x)
4406
{
4407
  int regno =
4408
    REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4409
  return REGNO_POINTER_ALIGN (regno) >= 128;
4410
}
4411
 
4412
/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4413
   into its SYMBOL_REF_FLAGS.  */
4414
static void
4415
spu_encode_section_info (tree decl, rtx rtl, int first)
4416
{
4417
  default_encode_section_info (decl, rtl, first);
4418
 
4419
  /* If a variable has a forced alignment to < 16 bytes, mark it with
4420
     SYMBOL_FLAG_ALIGN1.  */
4421
  if (TREE_CODE (decl) == VAR_DECL
4422
      && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4423
    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4424
}
4425
 
4426
/* Return TRUE if we are certain the mem refers to a complete object
4427
   which is both 16-byte aligned and padded to a 16-byte boundary.  This
4428
   would make it safe to store with a single instruction.
4429
   We guarantee the alignment and padding for static objects by aligning
4430
   all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4431
   FIXME: We currently cannot guarantee this for objects on the stack
4432
   because assign_parm_setup_stack calls assign_stack_local with the
4433
   alignment of the parameter mode and in that case the alignment never
4434
   gets adjusted by LOCAL_ALIGNMENT. */
4435
static int
4436
store_with_one_insn_p (rtx mem)
4437
{
4438
  enum machine_mode mode = GET_MODE (mem);
4439
  rtx addr = XEXP (mem, 0);
4440
  if (mode == BLKmode)
4441
    return 0;
4442
  if (GET_MODE_SIZE (mode) >= 16)
4443
    return 1;
4444
  /* Only static objects. */
4445
  if (GET_CODE (addr) == SYMBOL_REF)
4446
    {
4447
      /* We use the associated declaration to make sure the access is
4448
         referring to the whole object.
4449
         We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
4450
         if it is necessary.  Will there be cases where one exists, and
4451
         the other does not?  Will there be cases where both exist, but
4452
         have different types?  */
4453
      tree decl = MEM_EXPR (mem);
4454
      if (decl
4455
          && TREE_CODE (decl) == VAR_DECL
4456
          && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4457
        return 1;
4458
      decl = SYMBOL_REF_DECL (addr);
4459
      if (decl
4460
          && TREE_CODE (decl) == VAR_DECL
4461
          && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4462
        return 1;
4463
    }
4464
  return 0;
4465
}
4466
 
4467
/* Return 1 when the address is not valid for a simple load and store as
4468
   required by the '_mov*' patterns.   We could make this less strict
4469
   for loads, but we prefer mem's to look the same so they are more
4470
   likely to be merged.  */
4471
static int
4472
address_needs_split (rtx mem)
4473
{
4474
  if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4475
      && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4476
          || !(store_with_one_insn_p (mem)
4477
               || mem_is_padded_component_ref (mem))))
4478
    return 1;
4479
 
4480
  return 0;
4481
}
4482
 
4483
static GTY(()) rtx cache_fetch;           /* __cache_fetch function */
4484
static GTY(()) rtx cache_fetch_dirty;     /* __cache_fetch_dirty function */
4485
static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4486
 
4487
/* MEM is known to be an __ea qualified memory access.  Emit a call to
4488
   fetch the ppu memory to local store, and return its address in local
4489
   store.  */
4490
 
4491
static void
4492
ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4493
{
4494
  if (is_store)
4495
    {
4496
      rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4497
      if (!cache_fetch_dirty)
4498
        cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4499
      emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4500
                               2, ea_addr, EAmode, ndirty, SImode);
4501
    }
4502
  else
4503
    {
4504
      if (!cache_fetch)
4505
        cache_fetch = init_one_libfunc ("__cache_fetch");
4506
      emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4507
                               1, ea_addr, EAmode);
4508
    }
4509
}
4510
 
4511
/* Like ea_load_store, but do the cache tag comparison and, for stores,
4512
   dirty bit marking, inline.
4513
 
4514
   The cache control data structure is an array of
4515
 
4516
   struct __cache_tag_array
4517
     {
4518
        unsigned int tag_lo[4];
4519
        unsigned int tag_hi[4];
4520
        void *data_pointer[4];
4521
        int reserved[4];
4522
        vector unsigned short dirty_bits[4];
4523
     }  */
4524
 
4525
static void
4526
ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4527
{
4528
  rtx ea_addr_si;
4529
  HOST_WIDE_INT v;
4530
  rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4531
  rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4532
  rtx index_mask = gen_reg_rtx (SImode);
4533
  rtx tag_arr = gen_reg_rtx (Pmode);
4534
  rtx splat_mask = gen_reg_rtx (TImode);
4535
  rtx splat = gen_reg_rtx (V4SImode);
4536
  rtx splat_hi = NULL_RTX;
4537
  rtx tag_index = gen_reg_rtx (Pmode);
4538
  rtx block_off = gen_reg_rtx (SImode);
4539
  rtx tag_addr = gen_reg_rtx (Pmode);
4540
  rtx tag = gen_reg_rtx (V4SImode);
4541
  rtx cache_tag = gen_reg_rtx (V4SImode);
4542
  rtx cache_tag_hi = NULL_RTX;
4543
  rtx cache_ptrs = gen_reg_rtx (TImode);
4544
  rtx cache_ptrs_si = gen_reg_rtx (SImode);
4545
  rtx tag_equal = gen_reg_rtx (V4SImode);
4546
  rtx tag_equal_hi = NULL_RTX;
4547
  rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4548
  rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4549
  rtx eq_index = gen_reg_rtx (SImode);
4550
  rtx bcomp, hit_label, hit_ref, cont_label, insn;
4551
 
4552
  if (spu_ea_model != 32)
4553
    {
4554
      splat_hi = gen_reg_rtx (V4SImode);
4555
      cache_tag_hi = gen_reg_rtx (V4SImode);
4556
      tag_equal_hi = gen_reg_rtx (V4SImode);
4557
    }
4558
 
4559
  emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4560
  emit_move_insn (tag_arr, tag_arr_sym);
4561
  v = 0x0001020300010203LL;
4562
  emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4563
  ea_addr_si = ea_addr;
4564
  if (spu_ea_model != 32)
4565
    ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4566
 
4567
  /* tag_index = ea_addr & (tag_array_size - 128)  */
4568
  emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4569
 
4570
  /* splat ea_addr to all 4 slots.  */
4571
  emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4572
  /* Similarly for high 32 bits of ea_addr.  */
4573
  if (spu_ea_model != 32)
4574
    emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4575
 
4576
  /* block_off = ea_addr & 127  */
4577
  emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4578
 
4579
  /* tag_addr = tag_arr + tag_index  */
4580
  emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4581
 
4582
  /* Read cache tags.  */
4583
  emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4584
  if (spu_ea_model != 32)
4585
    emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4586
                                               plus_constant (tag_addr, 16)));
4587
 
4588
  /* tag = ea_addr & -128  */
4589
  emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4590
 
4591
  /* Read all four cache data pointers.  */
4592
  emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4593
                                           plus_constant (tag_addr, 32)));
4594
 
4595
  /* Compare tags.  */
4596
  emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4597
  if (spu_ea_model != 32)
4598
    {
4599
      emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4600
      emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4601
    }
4602
 
4603
  /* At most one of the tags compare equal, so tag_equal has one
4604
     32-bit slot set to all 1's, with the other slots all zero.
4605
     gbb picks off low bit from each byte in the 128-bit registers,
4606
     so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4607
     we have a hit.  */
4608
  emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4609
  emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4610
 
4611
  /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4612
  emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4613
 
4614
  /* Allowing us to rotate the corresponding cache data pointer to slot0.
4615
     (rotating eq_index mod 16 bytes).  */
4616
  emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4617
  emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4618
 
4619
  /* Add block offset to form final data address.  */
4620
  emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4621
 
4622
  /* Check that we did hit.  */
4623
  hit_label = gen_label_rtx ();
4624
  hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4625
  bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4626
  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4627
                                      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4628
                                                            hit_ref, pc_rtx)));
4629
  /* Say that this branch is very likely to happen.  */
4630
  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4631
  add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4632
 
4633
  ea_load_store (mem, is_store, ea_addr, data_addr);
4634
  cont_label = gen_label_rtx ();
4635
  emit_jump_insn (gen_jump (cont_label));
4636
  emit_barrier ();
4637
 
4638
  emit_label (hit_label);
4639
 
4640
  if (is_store)
4641
    {
4642
      HOST_WIDE_INT v_hi;
4643
      rtx dirty_bits = gen_reg_rtx (TImode);
4644
      rtx dirty_off = gen_reg_rtx (SImode);
4645
      rtx dirty_128 = gen_reg_rtx (TImode);
4646
      rtx neg_block_off = gen_reg_rtx (SImode);
4647
 
4648
      /* Set up mask with one dirty bit per byte of the mem we are
4649
         writing, starting from top bit.  */
4650
      v_hi = v = -1;
4651
      v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4652
      if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4653
        {
4654
          v_hi = v;
4655
          v = 0;
4656
        }
4657
      emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4658
 
4659
      /* Form index into cache dirty_bits.  eq_index is one of
4660
         0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4661
         0x40, 0x50, 0x60 or 0x70 which just happens to be the
4662
         offset to each of the four dirty_bits elements.  */
4663
      emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4664
 
4665
      emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4666
 
4667
      /* Rotate bit mask to proper bit.  */
4668
      emit_insn (gen_negsi2 (neg_block_off, block_off));
4669
      emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4670
      emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4671
 
4672
      /* Or in the new dirty bits.  */
4673
      emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4674
 
4675
      /* Store.  */
4676
      emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4677
    }
4678
 
4679
  emit_label (cont_label);
4680
}
4681
 
4682
static rtx
4683
expand_ea_mem (rtx mem, bool is_store)
4684
{
4685
  rtx ea_addr;
4686
  rtx data_addr = gen_reg_rtx (Pmode);
4687
  rtx new_mem;
4688
 
4689
  ea_addr = force_reg (EAmode, XEXP (mem, 0));
4690
  if (optimize_size || optimize == 0)
4691
    ea_load_store (mem, is_store, ea_addr, data_addr);
4692
  else
4693
    ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4694
 
4695
  if (ea_alias_set == -1)
4696
    ea_alias_set = new_alias_set ();
4697
 
4698
  /* We generate a new MEM RTX to refer to the copy of the data
4699
     in the cache.  We do not copy memory attributes (except the
4700
     alignment) from the original MEM, as they may no longer apply
4701
     to the cache copy.  */
4702
  new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4703
  set_mem_alias_set (new_mem, ea_alias_set);
4704
  set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4705
 
4706
  return new_mem;
4707
}
4708
 
4709
int
4710
spu_expand_mov (rtx * ops, enum machine_mode mode)
4711
{
4712
  if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4713
    {
4714
      /* Perform the move in the destination SUBREG's inner mode.  */
4715
      ops[0] = SUBREG_REG (ops[0]);
4716
      mode = GET_MODE (ops[0]);
4717
      ops[1] = gen_lowpart_common (mode, ops[1]);
4718
      gcc_assert (ops[1]);
4719
    }
4720
 
4721
  if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4722
    {
4723
      rtx from = SUBREG_REG (ops[1]);
4724
      enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4725
 
4726
      gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4727
                  && GET_MODE_CLASS (imode) == MODE_INT
4728
                  && subreg_lowpart_p (ops[1]));
4729
 
4730
      if (GET_MODE_SIZE (imode) < 4)
4731
        imode = SImode;
4732
      if (imode != GET_MODE (from))
4733
        from = gen_rtx_SUBREG (imode, from, 0);
4734
 
4735
      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4736
        {
4737
          enum insn_code icode = convert_optab_handler (trunc_optab,
4738
                                                        mode, imode);
4739
          emit_insn (GEN_FCN (icode) (ops[0], from));
4740
        }
4741
      else
4742
        emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4743
      return 1;
4744
    }
4745
 
4746
  /* At least one of the operands needs to be a register. */
4747
  if ((reload_in_progress | reload_completed) == 0
4748
      && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4749
    {
4750
      rtx temp = force_reg (mode, ops[1]);
4751
      emit_move_insn (ops[0], temp);
4752
      return 1;
4753
    }
4754
  if (reload_in_progress || reload_completed)
4755
    {
4756
      if (CONSTANT_P (ops[1]))
4757
        return spu_split_immediate (ops);
4758
      return 0;
4759
    }
4760
 
4761
  /* Catch the SImode immediates greater than 0x7fffffff, and sign
4762
     extend them. */
4763
  if (GET_CODE (ops[1]) == CONST_INT)
4764
    {
4765
      HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4766
      if (val != INTVAL (ops[1]))
4767
        {
4768
          emit_move_insn (ops[0], GEN_INT (val));
4769
          return 1;
4770
        }
4771
    }
4772
  if (MEM_P (ops[0]))
4773
    {
4774
      if (MEM_ADDR_SPACE (ops[0]))
4775
        ops[0] = expand_ea_mem (ops[0], true);
4776
      return spu_split_store (ops);
4777
    }
4778
  if (MEM_P (ops[1]))
4779
    {
4780
      if (MEM_ADDR_SPACE (ops[1]))
4781
        ops[1] = expand_ea_mem (ops[1], false);
4782
      return spu_split_load (ops);
4783
    }
4784
 
4785
  return 0;
4786
}
4787
 
4788
static void
4789
spu_convert_move (rtx dst, rtx src)
4790
{
4791
  enum machine_mode mode = GET_MODE (dst);
4792
  enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4793
  rtx reg;
4794
  gcc_assert (GET_MODE (src) == TImode);
4795
  reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4796
  emit_insn (gen_rtx_SET (VOIDmode, reg,
4797
               gen_rtx_TRUNCATE (int_mode,
4798
                 gen_rtx_LSHIFTRT (TImode, src,
4799
                   GEN_INT (int_mode == DImode ? 64 : 96)))));
4800
  if (int_mode != mode)
4801
    {
4802
      reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4803
      emit_move_insn (dst, reg);
4804
    }
4805
}
4806
 
4807
/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4808
   the address from SRC and SRC+16.  Return a REG or CONST_INT that
4809
   specifies how many bytes to rotate the loaded registers, plus any
4810
   extra from EXTRA_ROTQBY.  The address and rotate amounts are
4811
   normalized to improve merging of loads and rotate computations. */
4812
static rtx
4813
spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4814
{
4815
  rtx addr = XEXP (src, 0);
4816
  rtx p0, p1, rot, addr0, addr1;
4817
  int rot_amt;
4818
 
4819
  rot = 0;
4820
  rot_amt = 0;
4821
 
4822
  if (MEM_ALIGN (src) >= 128)
4823
    /* Address is already aligned; simply perform a TImode load.  */ ;
4824
  else if (GET_CODE (addr) == PLUS)
4825
    {
4826
      /* 8 cases:
4827
         aligned reg   + aligned reg     => lqx
4828
         aligned reg   + unaligned reg   => lqx, rotqby
4829
         aligned reg   + aligned const   => lqd
4830
         aligned reg   + unaligned const => lqd, rotqbyi
4831
         unaligned reg + aligned reg     => lqx, rotqby
4832
         unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4833
         unaligned reg + aligned const   => lqd, rotqby
4834
         unaligned reg + unaligned const -> not allowed by legitimate address
4835
       */
4836
      p0 = XEXP (addr, 0);
4837
      p1 = XEXP (addr, 1);
4838
      if (!reg_aligned_for_addr (p0))
4839
        {
4840
          if (REG_P (p1) && !reg_aligned_for_addr (p1))
4841
            {
4842
              rot = gen_reg_rtx (SImode);
4843
              emit_insn (gen_addsi3 (rot, p0, p1));
4844
            }
4845
          else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4846
            {
4847
              if (INTVAL (p1) > 0
4848
                  && REG_POINTER (p0)
4849
                  && INTVAL (p1) * BITS_PER_UNIT
4850
                     < REGNO_POINTER_ALIGN (REGNO (p0)))
4851
                {
4852
                  rot = gen_reg_rtx (SImode);
4853
                  emit_insn (gen_addsi3 (rot, p0, p1));
4854
                  addr = p0;
4855
                }
4856
              else
4857
                {
4858
                  rtx x = gen_reg_rtx (SImode);
4859
                  emit_move_insn (x, p1);
4860
                  if (!spu_arith_operand (p1, SImode))
4861
                    p1 = x;
4862
                  rot = gen_reg_rtx (SImode);
4863
                  emit_insn (gen_addsi3 (rot, p0, p1));
4864
                  addr = gen_rtx_PLUS (Pmode, p0, x);
4865
                }
4866
            }
4867
          else
4868
            rot = p0;
4869
        }
4870
      else
4871
        {
4872
          if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4873
            {
4874
              rot_amt = INTVAL (p1) & 15;
4875
              if (INTVAL (p1) & -16)
4876
                {
4877
                  p1 = GEN_INT (INTVAL (p1) & -16);
4878
                  addr = gen_rtx_PLUS (SImode, p0, p1);
4879
                }
4880
              else
4881
                addr = p0;
4882
            }
4883
          else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4884
            rot = p1;
4885
        }
4886
    }
4887
  else if (REG_P (addr))
4888
    {
4889
      if (!reg_aligned_for_addr (addr))
4890
        rot = addr;
4891
    }
4892
  else if (GET_CODE (addr) == CONST)
4893
    {
4894
      if (GET_CODE (XEXP (addr, 0)) == PLUS
4895
          && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4896
          && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4897
        {
4898
          rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4899
          if (rot_amt & -16)
4900
            addr = gen_rtx_CONST (Pmode,
4901
                                  gen_rtx_PLUS (Pmode,
4902
                                                XEXP (XEXP (addr, 0), 0),
4903
                                                GEN_INT (rot_amt & -16)));
4904
          else
4905
            addr = XEXP (XEXP (addr, 0), 0);
4906
        }
4907
      else
4908
        {
4909
          rot = gen_reg_rtx (Pmode);
4910
          emit_move_insn (rot, addr);
4911
        }
4912
    }
4913
  else if (GET_CODE (addr) == CONST_INT)
4914
    {
4915
      rot_amt = INTVAL (addr);
4916
      addr = GEN_INT (rot_amt & -16);
4917
    }
4918
  else if (!ALIGNED_SYMBOL_REF_P (addr))
4919
    {
4920
      rot = gen_reg_rtx (Pmode);
4921
      emit_move_insn (rot, addr);
4922
    }
4923
 
4924
  rot_amt += extra_rotby;
4925
 
4926
  rot_amt &= 15;
4927
 
4928
  if (rot && rot_amt)
4929
    {
4930
      rtx x = gen_reg_rtx (SImode);
4931
      emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4932
      rot = x;
4933
      rot_amt = 0;
4934
    }
4935
  if (!rot && rot_amt)
4936
    rot = GEN_INT (rot_amt);
4937
 
4938
  addr0 = copy_rtx (addr);
4939
  addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4940
  emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4941
 
4942
  if (dst1)
4943
    {
4944
      addr1 = plus_constant (copy_rtx (addr), 16);
4945
      addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4946
      emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4947
    }
4948
 
4949
  return rot;
4950
}
4951
 
4952
int
4953
spu_split_load (rtx * ops)
4954
{
4955
  enum machine_mode mode = GET_MODE (ops[0]);
4956
  rtx addr, load, rot;
4957
  int rot_amt;
4958
 
4959
  if (GET_MODE_SIZE (mode) >= 16)
4960
    return 0;
4961
 
4962
  addr = XEXP (ops[1], 0);
4963
  gcc_assert (GET_CODE (addr) != AND);
4964
 
4965
  if (!address_needs_split (ops[1]))
4966
    {
4967
      ops[1] = change_address (ops[1], TImode, addr);
4968
      load = gen_reg_rtx (TImode);
4969
      emit_insn (gen__movti (load, ops[1]));
4970
      spu_convert_move (ops[0], load);
4971
      return 1;
4972
    }
4973
 
4974
  rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4975
 
4976
  load = gen_reg_rtx (TImode);
4977
  rot = spu_expand_load (load, 0, ops[1], rot_amt);
4978
 
4979
  if (rot)
4980
    emit_insn (gen_rotqby_ti (load, load, rot));
4981
 
4982
  spu_convert_move (ops[0], load);
4983
  return 1;
4984
}
4985
 
4986
int
4987
spu_split_store (rtx * ops)
4988
{
4989
  enum machine_mode mode = GET_MODE (ops[0]);
4990
  rtx reg;
4991
  rtx addr, p0, p1, p1_lo, smem;
4992
  int aform;
4993
  int scalar;
4994
 
4995
  if (GET_MODE_SIZE (mode) >= 16)
4996
    return 0;
4997
 
4998
  addr = XEXP (ops[0], 0);
4999
  gcc_assert (GET_CODE (addr) != AND);
5000
 
5001
  if (!address_needs_split (ops[0]))
5002
    {
5003
      reg = gen_reg_rtx (TImode);
5004
      emit_insn (gen_spu_convert (reg, ops[1]));
5005
      ops[0] = change_address (ops[0], TImode, addr);
5006
      emit_move_insn (ops[0], reg);
5007
      return 1;
5008
    }
5009
 
5010
  if (GET_CODE (addr) == PLUS)
5011
    {
5012
      /* 8 cases:
5013
         aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
5014
         aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
5015
         aligned reg   + aligned const   => lqd, c?d, shuf, stqx
5016
         aligned reg   + unaligned const => lqd, c?d, shuf, stqx
5017
         unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
5018
         unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
5019
         unaligned reg + aligned const   => lqd, c?d, shuf, stqx
5020
         unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
5021
       */
5022
      aform = 0;
5023
      p0 = XEXP (addr, 0);
5024
      p1 = p1_lo = XEXP (addr, 1);
5025
      if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
5026
        {
5027
          p1_lo = GEN_INT (INTVAL (p1) & 15);
5028
          if (reg_aligned_for_addr (p0))
5029
            {
5030
              p1 = GEN_INT (INTVAL (p1) & -16);
5031
              if (p1 == const0_rtx)
5032
                addr = p0;
5033
              else
5034
                addr = gen_rtx_PLUS (SImode, p0, p1);
5035
            }
5036
          else
5037
            {
5038
              rtx x = gen_reg_rtx (SImode);
5039
              emit_move_insn (x, p1);
5040
              addr = gen_rtx_PLUS (SImode, p0, x);
5041
            }
5042
        }
5043
    }
5044
  else if (REG_P (addr))
5045
    {
5046
      aform = 0;
5047
      p0 = addr;
5048
      p1 = p1_lo = const0_rtx;
5049
    }
5050
  else
5051
    {
5052
      aform = 1;
5053
      p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
5054
      p1 = 0;                    /* aform doesn't use p1 */
5055
      p1_lo = addr;
5056
      if (ALIGNED_SYMBOL_REF_P (addr))
5057
        p1_lo = const0_rtx;
5058
      else if (GET_CODE (addr) == CONST
5059
               && GET_CODE (XEXP (addr, 0)) == PLUS
5060
               && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
5061
               && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5062
        {
5063
          HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
5064
          if ((v & -16) != 0)
5065
            addr = gen_rtx_CONST (Pmode,
5066
                                  gen_rtx_PLUS (Pmode,
5067
                                                XEXP (XEXP (addr, 0), 0),
5068
                                                GEN_INT (v & -16)));
5069
          else
5070
            addr = XEXP (XEXP (addr, 0), 0);
5071
          p1_lo = GEN_INT (v & 15);
5072
        }
5073
      else if (GET_CODE (addr) == CONST_INT)
5074
        {
5075
          p1_lo = GEN_INT (INTVAL (addr) & 15);
5076
          addr = GEN_INT (INTVAL (addr) & -16);
5077
        }
5078
      else
5079
        {
5080
          p1_lo = gen_reg_rtx (SImode);
5081
          emit_move_insn (p1_lo, addr);
5082
        }
5083
    }
5084
 
5085
  gcc_assert (aform == 0 || aform == 1);
5086
  reg = gen_reg_rtx (TImode);
5087
 
5088
  scalar = store_with_one_insn_p (ops[0]);
5089
  if (!scalar)
5090
    {
5091
      /* We could copy the flags from the ops[0] MEM to mem here,
5092
         We don't because we want this load to be optimized away if
5093
         possible, and copying the flags will prevent that in certain
5094
         cases, e.g. consider the volatile flag. */
5095
 
5096
      rtx pat = gen_reg_rtx (TImode);
5097
      rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5098
      set_mem_alias_set (lmem, 0);
5099
      emit_insn (gen_movti (reg, lmem));
5100
 
5101
      if (!p0 || reg_aligned_for_addr (p0))
5102
        p0 = stack_pointer_rtx;
5103
      if (!p1_lo)
5104
        p1_lo = const0_rtx;
5105
 
5106
      emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5107
      emit_insn (gen_shufb (reg, ops[1], reg, pat));
5108
    }
5109
  else
5110
    {
5111
      if (GET_CODE (ops[1]) == REG)
5112
        emit_insn (gen_spu_convert (reg, ops[1]));
5113
      else if (GET_CODE (ops[1]) == SUBREG)
5114
        emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5115
      else
5116
        abort ();
5117
    }
5118
 
5119
  if (GET_MODE_SIZE (mode) < 4 && scalar)
5120
    emit_insn (gen_ashlti3
5121
               (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5122
 
5123
  smem = change_address (ops[0], TImode, copy_rtx (addr));
5124
  /* We can't use the previous alias set because the memory has changed
5125
     size and can potentially overlap objects of other types.  */
5126
  set_mem_alias_set (smem, 0);
5127
 
5128
  emit_insn (gen_movti (smem, reg));
5129
  return 1;
5130
}
5131
 
5132
/* Return TRUE if X is MEM which is a struct member reference
5133
   and the member can safely be loaded and stored with a single
5134
   instruction because it is padded. */
5135
static int
5136
mem_is_padded_component_ref (rtx x)
5137
{
5138
  tree t = MEM_EXPR (x);
5139
  tree r;
5140
  if (!t || TREE_CODE (t) != COMPONENT_REF)
5141
    return 0;
5142
  t = TREE_OPERAND (t, 1);
5143
  if (!t || TREE_CODE (t) != FIELD_DECL
5144
      || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5145
    return 0;
5146
  /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5147
  r = DECL_FIELD_CONTEXT (t);
5148
  if (!r || TREE_CODE (r) != RECORD_TYPE)
5149
    return 0;
5150
  /* Make sure they are the same mode */
5151
  if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5152
    return 0;
5153
  /* If there are no following fields then the field alignment assures
5154
     the structure is padded to the alignment which means this field is
5155
     padded too.  */
5156
  if (TREE_CHAIN (t) == 0)
5157
    return 1;
5158
  /* If the following field is also aligned then this field will be
5159
     padded. */
5160
  t = TREE_CHAIN (t);
5161
  if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5162
    return 1;
5163
  return 0;
5164
}
5165
 
5166
/* Parse the -mfixed-range= option string.  */
5167
static void
5168
fix_range (const char *const_str)
5169
{
5170
  int i, first, last;
5171
  char *str, *dash, *comma;
5172
 
5173
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5174
     REG2 are either register names or register numbers.  The effect
5175
     of this option is to mark the registers in the range from REG1 to
5176
     REG2 as ``fixed'' so they won't be used by the compiler.  */
5177
 
5178
  i = strlen (const_str);
5179
  str = (char *) alloca (i + 1);
5180
  memcpy (str, const_str, i + 1);
5181
 
5182
  while (1)
5183
    {
5184
      dash = strchr (str, '-');
5185
      if (!dash)
5186
        {
5187
          warning (0, "value of -mfixed-range must have form REG1-REG2");
5188
          return;
5189
        }
5190
      *dash = '\0';
5191
      comma = strchr (dash + 1, ',');
5192
      if (comma)
5193
        *comma = '\0';
5194
 
5195
      first = decode_reg_name (str);
5196
      if (first < 0)
5197
        {
5198
          warning (0, "unknown register name: %s", str);
5199
          return;
5200
        }
5201
 
5202
      last = decode_reg_name (dash + 1);
5203
      if (last < 0)
5204
        {
5205
          warning (0, "unknown register name: %s", dash + 1);
5206
          return;
5207
        }
5208
 
5209
      *dash = '-';
5210
 
5211
      if (first > last)
5212
        {
5213
          warning (0, "%s-%s is an empty range", str, dash + 1);
5214
          return;
5215
        }
5216
 
5217
      for (i = first; i <= last; ++i)
5218
        fixed_regs[i] = call_used_regs[i] = 1;
5219
 
5220
      if (!comma)
5221
        break;
5222
 
5223
      *comma = ',';
5224
      str = comma + 1;
5225
    }
5226
}
5227
 
5228
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5229
   can be generated using the fsmbi instruction. */
5230
int
5231
fsmbi_const_p (rtx x)
5232
{
5233
  if (CONSTANT_P (x))
5234
    {
5235
      /* We can always choose TImode for CONST_INT because the high bits
5236
         of an SImode will always be all 1s, i.e., valid for fsmbi. */
5237
      enum immediate_class c = classify_immediate (x, TImode);
5238
      return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5239
    }
5240
  return 0;
5241
}
5242
 
5243
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5244
   can be generated using the cbd, chd, cwd or cdd instruction. */
5245
int
5246
cpat_const_p (rtx x, enum machine_mode mode)
5247
{
5248
  if (CONSTANT_P (x))
5249
    {
5250
      enum immediate_class c = classify_immediate (x, mode);
5251
      return c == IC_CPAT;
5252
    }
5253
  return 0;
5254
}
5255
 
5256
rtx
5257
gen_cpat_const (rtx * ops)
5258
{
5259
  unsigned char dst[16];
5260
  int i, offset, shift, isize;
5261
  if (GET_CODE (ops[3]) != CONST_INT
5262
      || GET_CODE (ops[2]) != CONST_INT
5263
      || (GET_CODE (ops[1]) != CONST_INT
5264
          && GET_CODE (ops[1]) != REG))
5265
    return 0;
5266
  if (GET_CODE (ops[1]) == REG
5267
      && (!REG_POINTER (ops[1])
5268
          || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5269
    return 0;
5270
 
5271
  for (i = 0; i < 16; i++)
5272
    dst[i] = i + 16;
5273
  isize = INTVAL (ops[3]);
5274
  if (isize == 1)
5275
    shift = 3;
5276
  else if (isize == 2)
5277
    shift = 2;
5278
  else
5279
    shift = 0;
5280
  offset = (INTVAL (ops[2]) +
5281
            (GET_CODE (ops[1]) ==
5282
             CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5283
  for (i = 0; i < isize; i++)
5284
    dst[offset + i] = i + shift;
5285
  return array_to_constant (TImode, dst);
5286
}
5287
 
5288
/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5289
   array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
5290
   than 16 bytes, the value is repeated across the rest of the array. */
5291
void
5292
constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5293
{
5294
  HOST_WIDE_INT val;
5295
  int i, j, first;
5296
 
5297
  memset (arr, 0, 16);
5298
  mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5299
  if (GET_CODE (x) == CONST_INT
5300
      || (GET_CODE (x) == CONST_DOUBLE
5301
          && (mode == SFmode || mode == DFmode)))
5302
    {
5303
      gcc_assert (mode != VOIDmode && mode != BLKmode);
5304
 
5305
      if (GET_CODE (x) == CONST_DOUBLE)
5306
        val = const_double_to_hwint (x);
5307
      else
5308
        val = INTVAL (x);
5309
      first = GET_MODE_SIZE (mode) - 1;
5310
      for (i = first; i >= 0; i--)
5311
        {
5312
          arr[i] = val & 0xff;
5313
          val >>= 8;
5314
        }
5315
      /* Splat the constant across the whole array. */
5316
      for (j = 0, i = first + 1; i < 16; i++)
5317
        {
5318
          arr[i] = arr[j];
5319
          j = (j == first) ? 0 : j + 1;
5320
        }
5321
    }
5322
  else if (GET_CODE (x) == CONST_DOUBLE)
5323
    {
5324
      val = CONST_DOUBLE_LOW (x);
5325
      for (i = 15; i >= 8; i--)
5326
        {
5327
          arr[i] = val & 0xff;
5328
          val >>= 8;
5329
        }
5330
      val = CONST_DOUBLE_HIGH (x);
5331
      for (i = 7; i >= 0; i--)
5332
        {
5333
          arr[i] = val & 0xff;
5334
          val >>= 8;
5335
        }
5336
    }
5337
  else if (GET_CODE (x) == CONST_VECTOR)
5338
    {
5339
      int units;
5340
      rtx elt;
5341
      mode = GET_MODE_INNER (mode);
5342
      units = CONST_VECTOR_NUNITS (x);
5343
      for (i = 0; i < units; i++)
5344
        {
5345
          elt = CONST_VECTOR_ELT (x, i);
5346
          if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5347
            {
5348
              if (GET_CODE (elt) == CONST_DOUBLE)
5349
                val = const_double_to_hwint (elt);
5350
              else
5351
                val = INTVAL (elt);
5352
              first = GET_MODE_SIZE (mode) - 1;
5353
              if (first + i * GET_MODE_SIZE (mode) > 16)
5354
                abort ();
5355
              for (j = first; j >= 0; j--)
5356
                {
5357
                  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5358
                  val >>= 8;
5359
                }
5360
            }
5361
        }
5362
    }
5363
  else
5364
    gcc_unreachable();
5365
}
5366
 
5367
/* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5368
   smaller than 16 bytes, use the bytes that would represent that value
5369
   in a register, e.g., for QImode return the value of arr[3].  */
5370
rtx
5371
array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5372
{
5373
  enum machine_mode inner_mode;
5374
  rtvec v;
5375
  int units, size, i, j, k;
5376
  HOST_WIDE_INT val;
5377
 
5378
  if (GET_MODE_CLASS (mode) == MODE_INT
5379
      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5380
    {
5381
      j = GET_MODE_SIZE (mode);
5382
      i = j < 4 ? 4 - j : 0;
5383
      for (val = 0; i < j; i++)
5384
        val = (val << 8) | arr[i];
5385
      val = trunc_int_for_mode (val, mode);
5386
      return GEN_INT (val);
5387
    }
5388
 
5389
  if (mode == TImode)
5390
    {
5391
      HOST_WIDE_INT high;
5392
      for (i = high = 0; i < 8; i++)
5393
        high = (high << 8) | arr[i];
5394
      for (i = 8, val = 0; i < 16; i++)
5395
        val = (val << 8) | arr[i];
5396
      return immed_double_const (val, high, TImode);
5397
    }
5398
  if (mode == SFmode)
5399
    {
5400
      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5401
      val = trunc_int_for_mode (val, SImode);
5402
      return hwint_to_const_double (SFmode, val);
5403
    }
5404
  if (mode == DFmode)
5405
    {
5406
      for (i = 0, val = 0; i < 8; i++)
5407
        val = (val << 8) | arr[i];
5408
      return hwint_to_const_double (DFmode, val);
5409
    }
5410
 
5411
  if (!VECTOR_MODE_P (mode))
5412
    abort ();
5413
 
5414
  units = GET_MODE_NUNITS (mode);
5415
  size = GET_MODE_UNIT_SIZE (mode);
5416
  inner_mode = GET_MODE_INNER (mode);
5417
  v = rtvec_alloc (units);
5418
 
5419
  for (k = i = 0; i < units; ++i)
5420
    {
5421
      val = 0;
5422
      for (j = 0; j < size; j++, k++)
5423
        val = (val << 8) | arr[k];
5424
 
5425
      if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5426
        RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5427
      else
5428
        RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5429
    }
5430
  if (k > 16)
5431
    abort ();
5432
 
5433
  return gen_rtx_CONST_VECTOR (mode, v);
5434
}
5435
 
5436
static void
5437
reloc_diagnostic (rtx x)
5438
{
5439
  tree decl = 0;
5440
  if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5441
    return;
5442
 
5443
  if (GET_CODE (x) == SYMBOL_REF)
5444
    decl = SYMBOL_REF_DECL (x);
5445
  else if (GET_CODE (x) == CONST
5446
           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5447
    decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5448
 
5449
  /* SYMBOL_REF_DECL is not necessarily a DECL. */
5450
  if (decl && !DECL_P (decl))
5451
    decl = 0;
5452
 
5453
  /* The decl could be a string constant.  */
5454
  if (decl && DECL_P (decl))
5455
    {
5456
      location_t loc;
5457
      /* We use last_assemble_variable_decl to get line information.  It's
5458
         not always going to be right and might not even be close, but will
5459
         be right for the more common cases. */
5460
      if (!last_assemble_variable_decl || in_section == ctors_section)
5461
        loc = DECL_SOURCE_LOCATION (decl);
5462
      else
5463
        loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5464
 
5465
      if (TARGET_WARN_RELOC)
5466
        warning_at (loc, 0,
5467
                    "creating run-time relocation for %qD", decl);
5468
      else
5469
        error_at (loc,
5470
                  "creating run-time relocation for %qD", decl);
5471
    }
5472
  else
5473
    {
5474
      if (TARGET_WARN_RELOC)
5475
        warning_at (input_location, 0, "creating run-time relocation");
5476
      else
5477
        error_at (input_location, "creating run-time relocation");
5478
    }
5479
}
5480
 
5481
/* Hook into assemble_integer so we can generate an error for run-time
5482
   relocations.  The SPU ABI disallows them. */
5483
static bool
5484
spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5485
{
5486
  /* By default run-time relocations aren't supported, but we allow them
5487
     in case users support it in their own run-time loader.  And we provide
5488
     a warning for those users that don't.  */
5489
  if ((GET_CODE (x) == SYMBOL_REF)
5490
      || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5491
    reloc_diagnostic (x);
5492
 
5493
  return default_assemble_integer (x, size, aligned_p);
5494
}
5495
 
5496
static void
5497
spu_asm_globalize_label (FILE * file, const char *name)
5498
{
5499
  fputs ("\t.global\t", file);
5500
  assemble_name (file, name);
5501
  fputs ("\n", file);
5502
}
5503
 
5504
static bool
5505
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5506
               int opno ATTRIBUTE_UNUSED, int *total,
5507
               bool speed ATTRIBUTE_UNUSED)
5508
{
5509
  enum machine_mode mode = GET_MODE (x);
5510
  int cost = COSTS_N_INSNS (2);
5511
 
5512
  /* Folding to a CONST_VECTOR will use extra space but there might
5513
     be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5514
     only if it allows us to fold away multiple insns.  Changing the cost
5515
     of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5516
     because this cost will only be compared against a single insn.
5517
     if (code == CONST_VECTOR)
5518
       return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5519
   */
5520
 
5521
  /* Use defaults for float operations.  Not accurate but good enough. */
5522
  if (mode == DFmode)
5523
    {
5524
      *total = COSTS_N_INSNS (13);
5525
      return true;
5526
    }
5527
  if (mode == SFmode)
5528
    {
5529
      *total = COSTS_N_INSNS (6);
5530
      return true;
5531
    }
5532
  switch (code)
5533
    {
5534
    case CONST_INT:
5535
      if (satisfies_constraint_K (x))
5536
        *total = 0;
5537
      else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5538
        *total = COSTS_N_INSNS (1);
5539
      else
5540
        *total = COSTS_N_INSNS (3);
5541
      return true;
5542
 
5543
    case CONST:
5544
      *total = COSTS_N_INSNS (3);
5545
      return true;
5546
 
5547
    case LABEL_REF:
5548
    case SYMBOL_REF:
5549
      *total = COSTS_N_INSNS (0);
5550
      return true;
5551
 
5552
    case CONST_DOUBLE:
5553
      *total = COSTS_N_INSNS (5);
5554
      return true;
5555
 
5556
    case FLOAT_EXTEND:
5557
    case FLOAT_TRUNCATE:
5558
    case FLOAT:
5559
    case UNSIGNED_FLOAT:
5560
    case FIX:
5561
    case UNSIGNED_FIX:
5562
      *total = COSTS_N_INSNS (7);
5563
      return true;
5564
 
5565
    case PLUS:
5566
      if (mode == TImode)
5567
        {
5568
          *total = COSTS_N_INSNS (9);
5569
          return true;
5570
        }
5571
      break;
5572
 
5573
    case MULT:
5574
      cost =
5575
        GET_CODE (XEXP (x, 0)) ==
5576
        REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5577
      if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5578
        {
5579
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5580
            {
5581
              HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5582
              cost = COSTS_N_INSNS (14);
5583
              if ((val & 0xffff) == 0)
5584
                cost = COSTS_N_INSNS (9);
5585
              else if (val > 0 && val < 0x10000)
5586
                cost = COSTS_N_INSNS (11);
5587
            }
5588
        }
5589
      *total = cost;
5590
      return true;
5591
    case DIV:
5592
    case UDIV:
5593
    case MOD:
5594
    case UMOD:
5595
      *total = COSTS_N_INSNS (20);
5596
      return true;
5597
    case ROTATE:
5598
    case ROTATERT:
5599
    case ASHIFT:
5600
    case ASHIFTRT:
5601
    case LSHIFTRT:
5602
      *total = COSTS_N_INSNS (4);
5603
      return true;
5604
    case UNSPEC:
5605
      if (XINT (x, 1) == UNSPEC_CONVERT)
5606
        *total = COSTS_N_INSNS (0);
5607
      else
5608
        *total = COSTS_N_INSNS (4);
5609
      return true;
5610
    }
5611
  /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5612
  if (GET_MODE_CLASS (mode) == MODE_INT
5613
      && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5614
    cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5615
      * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5616
  *total = cost;
5617
  return true;
5618
}
5619
 
5620
static enum machine_mode
5621
spu_unwind_word_mode (void)
5622
{
5623
  return SImode;
5624
}
5625
 
5626
/* Decide whether we can make a sibling call to a function.  DECL is the
5627
   declaration of the function being targeted by the call and EXP is the
5628
   CALL_EXPR representing the call.  */
5629
static bool
5630
spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5631
{
5632
  return decl && !TARGET_LARGE_MEM;
5633
}
5634
 
5635
/* We need to correctly update the back chain pointer and the Available
5636
   Stack Size (which is in the second slot of the sp register.) */
5637
void
5638
spu_allocate_stack (rtx op0, rtx op1)
5639
{
5640
  HOST_WIDE_INT v;
5641
  rtx chain = gen_reg_rtx (V4SImode);
5642
  rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5643
  rtx sp = gen_reg_rtx (V4SImode);
5644
  rtx splatted = gen_reg_rtx (V4SImode);
5645
  rtx pat = gen_reg_rtx (TImode);
5646
 
5647
  /* copy the back chain so we can save it back again. */
5648
  emit_move_insn (chain, stack_bot);
5649
 
5650
  op1 = force_reg (SImode, op1);
5651
 
5652
  v = 0x1020300010203ll;
5653
  emit_move_insn (pat, immed_double_const (v, v, TImode));
5654
  emit_insn (gen_shufb (splatted, op1, op1, pat));
5655
 
5656
  emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5657
  emit_insn (gen_subv4si3 (sp, sp, splatted));
5658
 
5659
  if (flag_stack_check)
5660
    {
5661
      rtx avail = gen_reg_rtx(SImode);
5662
      rtx result = gen_reg_rtx(SImode);
5663
      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5664
      emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5665
      emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5666
    }
5667
 
5668
  emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5669
 
5670
  emit_move_insn (stack_bot, chain);
5671
 
5672
  emit_move_insn (op0, virtual_stack_dynamic_rtx);
5673
}
5674
 
5675
void
5676
spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5677
{
5678
  static unsigned char arr[16] =
5679
    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5680
  rtx temp = gen_reg_rtx (SImode);
5681
  rtx temp2 = gen_reg_rtx (SImode);
5682
  rtx temp3 = gen_reg_rtx (V4SImode);
5683
  rtx temp4 = gen_reg_rtx (V4SImode);
5684
  rtx pat = gen_reg_rtx (TImode);
5685
  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5686
 
5687
  /* Restore the backchain from the first word, sp from the second.  */
5688
  emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5689
  emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5690
 
5691
  emit_move_insn (pat, array_to_constant (TImode, arr));
5692
 
5693
  /* Compute Available Stack Size for sp */
5694
  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5695
  emit_insn (gen_shufb (temp3, temp, temp, pat));
5696
 
5697
  /* Compute Available Stack Size for back chain */
5698
  emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5699
  emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5700
  emit_insn (gen_addv4si3 (temp4, sp, temp4));
5701
 
5702
  emit_insn (gen_addv4si3 (sp, sp, temp3));
5703
  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5704
}
5705
 
5706
static void
5707
spu_init_libfuncs (void)
5708
{
5709
  set_optab_libfunc (smul_optab, DImode, "__muldi3");
5710
  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5711
  set_optab_libfunc (smod_optab, DImode, "__moddi3");
5712
  set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5713
  set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5714
  set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5715
  set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5716
  set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5717
  set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5718
  set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5719
  set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5720
  set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5721
 
5722
  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5723
  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5724
 
5725
  set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5726
  set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5727
  set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5728
  set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5729
  set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5730
  set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5731
  set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5732
  set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5733
  set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5734
  set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5735
  set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5736
  set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5737
 
5738
  set_optab_libfunc (smul_optab, TImode, "__multi3");
5739
  set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5740
  set_optab_libfunc (smod_optab, TImode, "__modti3");
5741
  set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5742
  set_optab_libfunc (umod_optab, TImode, "__umodti3");
5743
  set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5744
}
5745
 
5746
/* Make a subreg, stripping any existing subreg.  We could possibly just
5747
   call simplify_subreg, but in this case we know what we want. */
5748
rtx
5749
spu_gen_subreg (enum machine_mode mode, rtx x)
5750
{
5751
  if (GET_CODE (x) == SUBREG)
5752
    x = SUBREG_REG (x);
5753
  if (GET_MODE (x) == mode)
5754
    return x;
5755
  return gen_rtx_SUBREG (mode, x, 0);
5756
}
5757
 
5758
static bool
5759
spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5760
{
5761
  return (TYPE_MODE (type) == BLKmode
5762
          && ((type) == 0
5763
              || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5764
              || int_size_in_bytes (type) >
5765
              (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5766
}
5767
 
5768
/* Create the built-in types and functions */
5769
 
5770
enum spu_function_code
5771
{
5772
#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5773
#include "spu-builtins.def"
5774
#undef DEF_BUILTIN
5775
   NUM_SPU_BUILTINS
5776
};
5777
 
5778
extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5779
 
5780
struct spu_builtin_description spu_builtins[] = {
5781
#define DEF_BUILTIN(fcode, icode, name, type, params) \
5782
  {fcode, icode, name, type, params},
5783
#include "spu-builtins.def"
5784
#undef DEF_BUILTIN
5785
};
5786
 
5787
static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5788
 
5789
/* Returns the spu builtin decl for CODE.  */
5790
 
5791
static tree
5792
spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5793
{
5794
  if (code >= NUM_SPU_BUILTINS)
5795
    return error_mark_node;
5796
 
5797
  return spu_builtin_decls[code];
5798
}
5799
 
5800
 
5801
static void
5802
spu_init_builtins (void)
5803
{
5804
  struct spu_builtin_description *d;
5805
  unsigned int i;
5806
 
5807
  V16QI_type_node = build_vector_type (intQI_type_node, 16);
5808
  V8HI_type_node = build_vector_type (intHI_type_node, 8);
5809
  V4SI_type_node = build_vector_type (intSI_type_node, 4);
5810
  V2DI_type_node = build_vector_type (intDI_type_node, 2);
5811
  V4SF_type_node = build_vector_type (float_type_node, 4);
5812
  V2DF_type_node = build_vector_type (double_type_node, 2);
5813
 
5814
  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5815
  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5816
  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5817
  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5818
 
5819
  spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5820
 
5821
  spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5822
  spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5823
  spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5824
  spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5825
  spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5826
  spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5827
  spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5828
  spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5829
  spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5830
  spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5831
  spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5832
  spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5833
 
5834
  spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5835
  spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5836
  spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5837
  spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5838
  spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5839
  spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5840
  spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5841
  spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5842
 
5843
  spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5844
  spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5845
 
5846
  spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5847
 
5848
  spu_builtin_types[SPU_BTI_PTR] =
5849
    build_pointer_type (build_qualified_type
5850
                        (void_type_node,
5851
                         TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5852
 
5853
  /* For each builtin we build a new prototype.  The tree code will make
5854
     sure nodes are shared. */
5855
  for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5856
    {
5857
      tree p;
5858
      char name[64];            /* build_function will make a copy. */
5859
      int parm;
5860
 
5861
      if (d->name == 0)
5862
        continue;
5863
 
5864
      /* Find last parm.  */
5865
      for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5866
        ;
5867
 
5868
      p = void_list_node;
5869
      while (parm > 1)
5870
        p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5871
 
5872
      p = build_function_type (spu_builtin_types[d->parm[0]], p);
5873
 
5874
      sprintf (name, "__builtin_%s", d->name);
5875
      spu_builtin_decls[i] =
5876
        add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5877
      if (d->fcode == SPU_MASK_FOR_LOAD)
5878
        TREE_READONLY (spu_builtin_decls[i]) = 1;
5879
 
5880
      /* These builtins don't throw.  */
5881
      TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5882
    }
5883
}
5884
 
5885
void
5886
spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5887
{
5888
  static unsigned char arr[16] =
5889
    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5890
 
5891
  rtx temp = gen_reg_rtx (Pmode);
5892
  rtx temp2 = gen_reg_rtx (V4SImode);
5893
  rtx temp3 = gen_reg_rtx (V4SImode);
5894
  rtx pat = gen_reg_rtx (TImode);
5895
  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5896
 
5897
  emit_move_insn (pat, array_to_constant (TImode, arr));
5898
 
5899
  /* Restore the sp.  */
5900
  emit_move_insn (temp, op1);
5901
  emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5902
 
5903
  /* Compute available stack size for sp.  */
5904
  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5905
  emit_insn (gen_shufb (temp3, temp, temp, pat));
5906
 
5907
  emit_insn (gen_addv4si3 (sp, sp, temp3));
5908
  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5909
}
5910
 
5911
int
5912
spu_safe_dma (HOST_WIDE_INT channel)
5913
{
5914
  return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5915
}
5916
 
5917
void
5918
spu_builtin_splats (rtx ops[])
5919
{
5920
  enum machine_mode mode = GET_MODE (ops[0]);
5921
  if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5922
    {
5923
      unsigned char arr[16];
5924
      constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5925
      emit_move_insn (ops[0], array_to_constant (mode, arr));
5926
    }
5927
  else
5928
    {
5929
      rtx reg = gen_reg_rtx (TImode);
5930
      rtx shuf;
5931
      if (GET_CODE (ops[1]) != REG
5932
          && GET_CODE (ops[1]) != SUBREG)
5933
        ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5934
      switch (mode)
5935
        {
5936
        case V2DImode:
5937
        case V2DFmode:
5938
          shuf =
5939
            immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5940
                                TImode);
5941
          break;
5942
        case V4SImode:
5943
        case V4SFmode:
5944
          shuf =
5945
            immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5946
                                TImode);
5947
          break;
5948
        case V8HImode:
5949
          shuf =
5950
            immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5951
                                TImode);
5952
          break;
5953
        case V16QImode:
5954
          shuf =
5955
            immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5956
                                TImode);
5957
          break;
5958
        default:
5959
          abort ();
5960
        }
5961
      emit_move_insn (reg, shuf);
5962
      emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5963
    }
5964
}
5965
 
5966
void
5967
spu_builtin_extract (rtx ops[])
5968
{
5969
  enum machine_mode mode;
5970
  rtx rot, from, tmp;
5971
 
5972
  mode = GET_MODE (ops[1]);
5973
 
5974
  if (GET_CODE (ops[2]) == CONST_INT)
5975
    {
5976
      switch (mode)
5977
        {
5978
        case V16QImode:
5979
          emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5980
          break;
5981
        case V8HImode:
5982
          emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5983
          break;
5984
        case V4SFmode:
5985
          emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5986
          break;
5987
        case V4SImode:
5988
          emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5989
          break;
5990
        case V2DImode:
5991
          emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5992
          break;
5993
        case V2DFmode:
5994
          emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5995
          break;
5996
        default:
5997
          abort ();
5998
        }
5999
      return;
6000
    }
6001
 
6002
  from = spu_gen_subreg (TImode, ops[1]);
6003
  rot = gen_reg_rtx (TImode);
6004
  tmp = gen_reg_rtx (SImode);
6005
 
6006
  switch (mode)
6007
    {
6008
    case V16QImode:
6009
      emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
6010
      break;
6011
    case V8HImode:
6012
      emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
6013
      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
6014
      break;
6015
    case V4SFmode:
6016
    case V4SImode:
6017
      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
6018
      break;
6019
    case V2DImode:
6020
    case V2DFmode:
6021
      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
6022
      break;
6023
    default:
6024
      abort ();
6025
    }
6026
  emit_insn (gen_rotqby_ti (rot, from, tmp));
6027
 
6028
  emit_insn (gen_spu_convert (ops[0], rot));
6029
}
6030
 
6031
void
6032
spu_builtin_insert (rtx ops[])
6033
{
6034
  enum machine_mode mode = GET_MODE (ops[0]);
6035
  enum machine_mode imode = GET_MODE_INNER (mode);
6036
  rtx mask = gen_reg_rtx (TImode);
6037
  rtx offset;
6038
 
6039
  if (GET_CODE (ops[3]) == CONST_INT)
6040
    offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
6041
  else
6042
    {
6043
      offset = gen_reg_rtx (SImode);
6044
      emit_insn (gen_mulsi3
6045
                 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
6046
    }
6047
  emit_insn (gen_cpat
6048
             (mask, stack_pointer_rtx, offset,
6049
              GEN_INT (GET_MODE_SIZE (imode))));
6050
  emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
6051
}
6052
 
6053
void
6054
spu_builtin_promote (rtx ops[])
6055
{
6056
  enum machine_mode mode, imode;
6057
  rtx rot, from, offset;
6058
  HOST_WIDE_INT pos;
6059
 
6060
  mode = GET_MODE (ops[0]);
6061
  imode = GET_MODE_INNER (mode);
6062
 
6063
  from = gen_reg_rtx (TImode);
6064
  rot = spu_gen_subreg (TImode, ops[0]);
6065
 
6066
  emit_insn (gen_spu_convert (from, ops[1]));
6067
 
6068
  if (GET_CODE (ops[2]) == CONST_INT)
6069
    {
6070
      pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
6071
      if (GET_MODE_SIZE (imode) < 4)
6072
        pos += 4 - GET_MODE_SIZE (imode);
6073
      offset = GEN_INT (pos & 15);
6074
    }
6075
  else
6076
    {
6077
      offset = gen_reg_rtx (SImode);
6078
      switch (mode)
6079
        {
6080
        case V16QImode:
6081
          emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6082
          break;
6083
        case V8HImode:
6084
          emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6085
          emit_insn (gen_addsi3 (offset, offset, offset));
6086
          break;
6087
        case V4SFmode:
6088
        case V4SImode:
6089
          emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6090
          emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6091
          break;
6092
        case V2DImode:
6093
        case V2DFmode:
6094
          emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6095
          break;
6096
        default:
6097
          abort ();
6098
        }
6099
    }
6100
  emit_insn (gen_rotqby_ti (rot, from, offset));
6101
}
6102
 
6103
static void
6104
spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
6105
{
6106
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
6107
  rtx shuf = gen_reg_rtx (V4SImode);
6108
  rtx insn = gen_reg_rtx (V4SImode);
6109
  rtx shufc;
6110
  rtx insnc;
6111
  rtx mem;
6112
 
6113
  fnaddr = force_reg (SImode, fnaddr);
6114
  cxt = force_reg (SImode, cxt);
6115
 
6116
  if (TARGET_LARGE_MEM)
6117
    {
6118
      rtx rotl = gen_reg_rtx (V4SImode);
6119
      rtx mask = gen_reg_rtx (V4SImode);
6120
      rtx bi = gen_reg_rtx (SImode);
6121
      static unsigned char const shufa[16] = {
6122
        2, 3, 0, 1, 18, 19, 16, 17,
6123
        0, 1, 2, 3, 16, 17, 18, 19
6124
      };
6125
      static unsigned char const insna[16] = {
6126
        0x41, 0, 0, 79,
6127
        0x41, 0, 0, STATIC_CHAIN_REGNUM,
6128
        0x60, 0x80, 0, 79,
6129
        0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6130
      };
6131
 
6132
      shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6133
      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6134
 
6135
      emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6136
      emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6137
      emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6138
      emit_insn (gen_selb (insn, insnc, rotl, mask));
6139
 
6140
      mem = adjust_address (m_tramp, V4SImode, 0);
6141
      emit_move_insn (mem, insn);
6142
 
6143
      emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6144
      mem = adjust_address (m_tramp, Pmode, 16);
6145
      emit_move_insn (mem, bi);
6146
    }
6147
  else
6148
    {
6149
      rtx scxt = gen_reg_rtx (SImode);
6150
      rtx sfnaddr = gen_reg_rtx (SImode);
6151
      static unsigned char const insna[16] = {
6152
        0x42, 0, 0, STATIC_CHAIN_REGNUM,
6153
        0x30, 0, 0, 0,
6154
        0, 0, 0, 0,
6155
        0, 0, 0, 0
6156
      };
6157
 
6158
      shufc = gen_reg_rtx (TImode);
6159
      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6160
 
6161
      /* By or'ing all of cxt with the ila opcode we are assuming cxt
6162
         fits 18 bits and the last 4 are zeros.  This will be true if
6163
         the stack pointer is initialized to 0x3fff0 at program start,
6164
         otherwise the ila instruction will be garbage. */
6165
 
6166
      emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6167
      emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6168
      emit_insn (gen_cpat
6169
                 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6170
      emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6171
      emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6172
 
6173
      mem = adjust_address (m_tramp, V4SImode, 0);
6174
      emit_move_insn (mem, insn);
6175
    }
6176
  emit_insn (gen_sync ());
6177
}
6178
 
6179
void
6180
spu_expand_sign_extend (rtx ops[])
6181
{
6182
  unsigned char arr[16];
6183
  rtx pat = gen_reg_rtx (TImode);
6184
  rtx sign, c;
6185
  int i, last;
6186
  last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6187
  if (GET_MODE (ops[1]) == QImode)
6188
    {
6189
      sign = gen_reg_rtx (HImode);
6190
      emit_insn (gen_extendqihi2 (sign, ops[1]));
6191
      for (i = 0; i < 16; i++)
6192
        arr[i] = 0x12;
6193
      arr[last] = 0x13;
6194
    }
6195
  else
6196
    {
6197
      for (i = 0; i < 16; i++)
6198
        arr[i] = 0x10;
6199
      switch (GET_MODE (ops[1]))
6200
        {
6201
        case HImode:
6202
          sign = gen_reg_rtx (SImode);
6203
          emit_insn (gen_extendhisi2 (sign, ops[1]));
6204
          arr[last] = 0x03;
6205
          arr[last - 1] = 0x02;
6206
          break;
6207
        case SImode:
6208
          sign = gen_reg_rtx (SImode);
6209
          emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6210
          for (i = 0; i < 4; i++)
6211
            arr[last - i] = 3 - i;
6212
          break;
6213
        case DImode:
6214
          sign = gen_reg_rtx (SImode);
6215
          c = gen_reg_rtx (SImode);
6216
          emit_insn (gen_spu_convert (c, ops[1]));
6217
          emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6218
          for (i = 0; i < 8; i++)
6219
            arr[last - i] = 7 - i;
6220
          break;
6221
        default:
6222
          abort ();
6223
        }
6224
    }
6225
  emit_move_insn (pat, array_to_constant (TImode, arr));
6226
  emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6227
}
6228
 
6229
/* expand vector initialization. If there are any constant parts,
6230
   load constant parts first. Then load any non-constant parts.  */
6231
void
6232
spu_expand_vector_init (rtx target, rtx vals)
6233
{
6234
  enum machine_mode mode = GET_MODE (target);
6235
  int n_elts = GET_MODE_NUNITS (mode);
6236
  int n_var = 0;
6237
  bool all_same = true;
6238
  rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6239
  int i;
6240
 
6241
  first = XVECEXP (vals, 0, 0);
6242
  for (i = 0; i < n_elts; ++i)
6243
    {
6244
      x = XVECEXP (vals, 0, i);
6245
      if (!(CONST_INT_P (x)
6246
            || GET_CODE (x) == CONST_DOUBLE
6247
            || GET_CODE (x) == CONST_FIXED))
6248
        ++n_var;
6249
      else
6250
        {
6251
          if (first_constant == NULL_RTX)
6252
            first_constant = x;
6253
        }
6254
      if (i > 0 && !rtx_equal_p (x, first))
6255
        all_same = false;
6256
    }
6257
 
6258
  /* if all elements are the same, use splats to repeat elements */
6259
  if (all_same)
6260
    {
6261
      if (!CONSTANT_P (first)
6262
          && !register_operand (first, GET_MODE (x)))
6263
        first = force_reg (GET_MODE (first), first);
6264
      emit_insn (gen_spu_splats (target, first));
6265
      return;
6266
    }
6267
 
6268
  /* load constant parts */
6269
  if (n_var != n_elts)
6270
    {
6271
      if (n_var == 0)
6272
        {
6273
          emit_move_insn (target,
6274
                          gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6275
        }
6276
      else
6277
        {
6278
          rtx constant_parts_rtx = copy_rtx (vals);
6279
 
6280
          gcc_assert (first_constant != NULL_RTX);
6281
          /* fill empty slots with the first constant, this increases
6282
             our chance of using splats in the recursive call below. */
6283
          for (i = 0; i < n_elts; ++i)
6284
            {
6285
              x = XVECEXP (constant_parts_rtx, 0, i);
6286
              if (!(CONST_INT_P (x)
6287
                    || GET_CODE (x) == CONST_DOUBLE
6288
                    || GET_CODE (x) == CONST_FIXED))
6289
                XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6290
            }
6291
 
6292
          spu_expand_vector_init (target, constant_parts_rtx);
6293
        }
6294
    }
6295
 
6296
  /* load variable parts */
6297
  if (n_var != 0)
6298
    {
6299
      rtx insert_operands[4];
6300
 
6301
      insert_operands[0] = target;
6302
      insert_operands[2] = target;
6303
      for (i = 0; i < n_elts; ++i)
6304
        {
6305
          x = XVECEXP (vals, 0, i);
6306
          if (!(CONST_INT_P (x)
6307
                || GET_CODE (x) == CONST_DOUBLE
6308
                || GET_CODE (x) == CONST_FIXED))
6309
            {
6310
              if (!register_operand (x, GET_MODE (x)))
6311
                x = force_reg (GET_MODE (x), x);
6312
              insert_operands[1] = x;
6313
              insert_operands[3] = GEN_INT (i);
6314
              spu_builtin_insert (insert_operands);
6315
            }
6316
        }
6317
    }
6318
}
6319
 
6320
/* Return insn index for the vector compare instruction for given CODE,
6321
   and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6322
 
6323
static int
6324
get_vec_cmp_insn (enum rtx_code code,
6325
                  enum machine_mode dest_mode,
6326
                  enum machine_mode op_mode)
6327
 
6328
{
6329
  switch (code)
6330
    {
6331
    case EQ:
6332
      if (dest_mode == V16QImode && op_mode == V16QImode)
6333
        return CODE_FOR_ceq_v16qi;
6334
      if (dest_mode == V8HImode && op_mode == V8HImode)
6335
        return CODE_FOR_ceq_v8hi;
6336
      if (dest_mode == V4SImode && op_mode == V4SImode)
6337
        return CODE_FOR_ceq_v4si;
6338
      if (dest_mode == V4SImode && op_mode == V4SFmode)
6339
        return CODE_FOR_ceq_v4sf;
6340
      if (dest_mode == V2DImode && op_mode == V2DFmode)
6341
        return CODE_FOR_ceq_v2df;
6342
      break;
6343
    case GT:
6344
      if (dest_mode == V16QImode && op_mode == V16QImode)
6345
        return CODE_FOR_cgt_v16qi;
6346
      if (dest_mode == V8HImode && op_mode == V8HImode)
6347
        return CODE_FOR_cgt_v8hi;
6348
      if (dest_mode == V4SImode && op_mode == V4SImode)
6349
        return CODE_FOR_cgt_v4si;
6350
      if (dest_mode == V4SImode && op_mode == V4SFmode)
6351
        return CODE_FOR_cgt_v4sf;
6352
      if (dest_mode == V2DImode && op_mode == V2DFmode)
6353
        return CODE_FOR_cgt_v2df;
6354
      break;
6355
    case GTU:
6356
      if (dest_mode == V16QImode && op_mode == V16QImode)
6357
        return CODE_FOR_clgt_v16qi;
6358
      if (dest_mode == V8HImode && op_mode == V8HImode)
6359
        return CODE_FOR_clgt_v8hi;
6360
      if (dest_mode == V4SImode && op_mode == V4SImode)
6361
        return CODE_FOR_clgt_v4si;
6362
      break;
6363
    default:
6364
      break;
6365
    }
6366
  return -1;
6367
}
6368
 
6369
/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6370
   DMODE is expected destination mode. This is a recursive function.  */
6371
 
6372
static rtx
6373
spu_emit_vector_compare (enum rtx_code rcode,
6374
                         rtx op0, rtx op1,
6375
                         enum machine_mode dmode)
6376
{
6377
  int vec_cmp_insn;
6378
  rtx mask;
6379
  enum machine_mode dest_mode;
6380
  enum machine_mode op_mode = GET_MODE (op1);
6381
 
6382
  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6383
 
6384
  /* Floating point vector compare instructions uses destination V4SImode.
6385
     Double floating point vector compare instructions uses destination V2DImode.
6386
     Move destination to appropriate mode later.  */
6387
  if (dmode == V4SFmode)
6388
    dest_mode = V4SImode;
6389
  else if (dmode == V2DFmode)
6390
    dest_mode = V2DImode;
6391
  else
6392
    dest_mode = dmode;
6393
 
6394
  mask = gen_reg_rtx (dest_mode);
6395
  vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6396
 
6397
  if (vec_cmp_insn == -1)
6398
    {
6399
      bool swap_operands = false;
6400
      bool try_again = false;
6401
      switch (rcode)
6402
        {
6403
        case LT:
6404
          rcode = GT;
6405
          swap_operands = true;
6406
          try_again = true;
6407
          break;
6408
        case LTU:
6409
          rcode = GTU;
6410
          swap_operands = true;
6411
          try_again = true;
6412
          break;
6413
        case NE:
6414
        case UNEQ:
6415
        case UNLE:
6416
        case UNLT:
6417
        case UNGE:
6418
        case UNGT:
6419
        case UNORDERED:
6420
          /* Treat A != B as ~(A==B).  */
6421
          {
6422
            enum rtx_code rev_code;
6423
            enum insn_code nor_code;
6424
            rtx rev_mask;
6425
 
6426
            rev_code = reverse_condition_maybe_unordered (rcode);
6427
            rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6428
 
6429
            nor_code = optab_handler (one_cmpl_optab, dest_mode);
6430
            gcc_assert (nor_code != CODE_FOR_nothing);
6431
            emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6432
            if (dmode != dest_mode)
6433
              {
6434
                rtx temp = gen_reg_rtx (dest_mode);
6435
                convert_move (temp, mask, 0);
6436
                return temp;
6437
              }
6438
            return mask;
6439
          }
6440
          break;
6441
        case GE:
6442
        case GEU:
6443
        case LE:
6444
        case LEU:
6445
          /* Try GT/GTU/LT/LTU OR EQ */
6446
          {
6447
            rtx c_rtx, eq_rtx;
6448
            enum insn_code ior_code;
6449
            enum rtx_code new_code;
6450
 
6451
            switch (rcode)
6452
              {
6453
              case GE:  new_code = GT;  break;
6454
              case GEU: new_code = GTU; break;
6455
              case LE:  new_code = LT;  break;
6456
              case LEU: new_code = LTU; break;
6457
              default:
6458
                gcc_unreachable ();
6459
              }
6460
 
6461
            c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6462
            eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6463
 
6464
            ior_code = optab_handler (ior_optab, dest_mode);
6465
            gcc_assert (ior_code != CODE_FOR_nothing);
6466
            emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6467
            if (dmode != dest_mode)
6468
              {
6469
                rtx temp = gen_reg_rtx (dest_mode);
6470
                convert_move (temp, mask, 0);
6471
                return temp;
6472
              }
6473
            return mask;
6474
          }
6475
          break;
6476
        case LTGT:
6477
          /* Try LT OR GT */
6478
          {
6479
            rtx lt_rtx, gt_rtx;
6480
            enum insn_code ior_code;
6481
 
6482
            lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6483
            gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6484
 
6485
            ior_code = optab_handler (ior_optab, dest_mode);
6486
            gcc_assert (ior_code != CODE_FOR_nothing);
6487
            emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6488
            if (dmode != dest_mode)
6489
              {
6490
                rtx temp = gen_reg_rtx (dest_mode);
6491
                convert_move (temp, mask, 0);
6492
                return temp;
6493
              }
6494
            return mask;
6495
          }
6496
          break;
6497
        case ORDERED:
6498
          /* Implement as (A==A) & (B==B) */
6499
          {
6500
            rtx a_rtx, b_rtx;
6501
            enum insn_code and_code;
6502
 
6503
            a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6504
            b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6505
 
6506
            and_code = optab_handler (and_optab, dest_mode);
6507
            gcc_assert (and_code != CODE_FOR_nothing);
6508
            emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6509
            if (dmode != dest_mode)
6510
              {
6511
                rtx temp = gen_reg_rtx (dest_mode);
6512
                convert_move (temp, mask, 0);
6513
                return temp;
6514
              }
6515
            return mask;
6516
          }
6517
          break;
6518
        default:
6519
          gcc_unreachable ();
6520
        }
6521
 
6522
      /* You only get two chances.  */
6523
      if (try_again)
6524
          vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6525
 
6526
      gcc_assert (vec_cmp_insn != -1);
6527
 
6528
      if (swap_operands)
6529
        {
6530
          rtx tmp;
6531
          tmp = op0;
6532
          op0 = op1;
6533
          op1 = tmp;
6534
        }
6535
    }
6536
 
6537
  emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6538
  if (dmode != dest_mode)
6539
    {
6540
      rtx temp = gen_reg_rtx (dest_mode);
6541
      convert_move (temp, mask, 0);
6542
      return temp;
6543
    }
6544
  return mask;
6545
}
6546
 
6547
 
6548
/* Emit vector conditional expression.
6549
   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6550
   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6551
 
6552
int
6553
spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6554
                           rtx cond, rtx cc_op0, rtx cc_op1)
6555
{
6556
  enum machine_mode dest_mode = GET_MODE (dest);
6557
  enum rtx_code rcode = GET_CODE (cond);
6558
  rtx mask;
6559
 
6560
  /* Get the vector mask for the given relational operations.  */
6561
  mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6562
 
6563
  emit_insn(gen_selb (dest, op2, op1, mask));
6564
 
6565
  return 1;
6566
}
6567
 
6568
static rtx
6569
spu_force_reg (enum machine_mode mode, rtx op)
6570
{
6571
  rtx x, r;
6572
  if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6573
    {
6574
      if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6575
          || GET_MODE (op) == BLKmode)
6576
        return force_reg (mode, convert_to_mode (mode, op, 0));
6577
      abort ();
6578
    }
6579
 
6580
  r = force_reg (GET_MODE (op), op);
6581
  if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6582
    {
6583
      x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6584
      if (x)
6585
        return x;
6586
    }
6587
 
6588
  x = gen_reg_rtx (mode);
6589
  emit_insn (gen_spu_convert (x, r));
6590
  return x;
6591
}
6592
 
6593
static void
6594
spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6595
{
6596
  HOST_WIDE_INT v = 0;
6597
  int lsbits;
6598
  /* Check the range of immediate operands. */
6599
  if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6600
    {
6601
      int range = p - SPU_BTI_7;
6602
 
6603
      if (!CONSTANT_P (op))
6604
        error ("%s expects an integer literal in the range [%d, %d]",
6605
               d->name,
6606
               spu_builtin_range[range].low, spu_builtin_range[range].high);
6607
 
6608
      if (GET_CODE (op) == CONST
6609
          && (GET_CODE (XEXP (op, 0)) == PLUS
6610
              || GET_CODE (XEXP (op, 0)) == MINUS))
6611
        {
6612
          v = INTVAL (XEXP (XEXP (op, 0), 1));
6613
          op = XEXP (XEXP (op, 0), 0);
6614
        }
6615
      else if (GET_CODE (op) == CONST_INT)
6616
        v = INTVAL (op);
6617
      else if (GET_CODE (op) == CONST_VECTOR
6618
               && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6619
        v = INTVAL (CONST_VECTOR_ELT (op, 0));
6620
 
6621
      /* The default for v is 0 which is valid in every range. */
6622
      if (v < spu_builtin_range[range].low
6623
          || v > spu_builtin_range[range].high)
6624
        error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6625
               d->name,
6626
               spu_builtin_range[range].low, spu_builtin_range[range].high,
6627
               v);
6628
 
6629
      switch (p)
6630
        {
6631
        case SPU_BTI_S10_4:
6632
          lsbits = 4;
6633
          break;
6634
        case SPU_BTI_U16_2:
6635
          /* This is only used in lqa, and stqa.  Even though the insns
6636
             encode 16 bits of the address (all but the 2 least
6637
             significant), only 14 bits are used because it is masked to
6638
             be 16 byte aligned. */
6639
          lsbits = 4;
6640
          break;
6641
        case SPU_BTI_S16_2:
6642
          /* This is used for lqr and stqr. */
6643
          lsbits = 2;
6644
          break;
6645
        default:
6646
          lsbits = 0;
6647
        }
6648
 
6649
      if (GET_CODE (op) == LABEL_REF
6650
          || (GET_CODE (op) == SYMBOL_REF
6651
              && SYMBOL_REF_FUNCTION_P (op))
6652
          || (v & ((1 << lsbits) - 1)) != 0)
6653
        warning (0, "%d least significant bits of %s are ignored", lsbits,
6654
                 d->name);
6655
    }
6656
}
6657
 
6658
 
6659
static int
6660
expand_builtin_args (struct spu_builtin_description *d, tree exp,
6661
                     rtx target, rtx ops[])
6662
{
6663
  enum insn_code icode = (enum insn_code) d->icode;
6664
  int i = 0, a;
6665
 
6666
  /* Expand the arguments into rtl. */
6667
 
6668
  if (d->parm[0] != SPU_BTI_VOID)
6669
    ops[i++] = target;
6670
 
6671
  for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6672
    {
6673
      tree arg = CALL_EXPR_ARG (exp, a);
6674
      if (arg == 0)
6675
        abort ();
6676
      ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6677
    }
6678
 
6679
  gcc_assert (i == insn_data[icode].n_generator_args);
6680
  return i;
6681
}
6682
 
6683
static rtx
6684
spu_expand_builtin_1 (struct spu_builtin_description *d,
6685
                      tree exp, rtx target)
6686
{
6687
  rtx pat;
6688
  rtx ops[8];
6689
  enum insn_code icode = (enum insn_code) d->icode;
6690
  enum machine_mode mode, tmode;
6691
  int i, p;
6692
  int n_operands;
6693
  tree return_type;
6694
 
6695
  /* Set up ops[] with values from arglist. */
6696
  n_operands = expand_builtin_args (d, exp, target, ops);
6697
 
6698
  /* Handle the target operand which must be operand 0. */
6699
  i = 0;
6700
  if (d->parm[0] != SPU_BTI_VOID)
6701
    {
6702
 
6703
      /* We prefer the mode specified for the match_operand otherwise
6704
         use the mode from the builtin function prototype. */
6705
      tmode = insn_data[d->icode].operand[0].mode;
6706
      if (tmode == VOIDmode)
6707
        tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6708
 
6709
      /* Try to use target because not using it can lead to extra copies
6710
         and when we are using all of the registers extra copies leads
6711
         to extra spills.  */
6712
      if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6713
        ops[0] = target;
6714
      else
6715
        target = ops[0] = gen_reg_rtx (tmode);
6716
 
6717
      if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6718
        abort ();
6719
 
6720
      i++;
6721
    }
6722
 
6723
  if (d->fcode == SPU_MASK_FOR_LOAD)
6724
    {
6725
      enum machine_mode mode = insn_data[icode].operand[1].mode;
6726
      tree arg;
6727
      rtx addr, op, pat;
6728
 
6729
      /* get addr */
6730
      arg = CALL_EXPR_ARG (exp, 0);
6731
      gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6732
      op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6733
      addr = memory_address (mode, op);
6734
 
6735
      /* negate addr */
6736
      op = gen_reg_rtx (GET_MODE (addr));
6737
      emit_insn (gen_rtx_SET (VOIDmode, op,
6738
                 gen_rtx_NEG (GET_MODE (addr), addr)));
6739
      op = gen_rtx_MEM (mode, op);
6740
 
6741
      pat = GEN_FCN (icode) (target, op);
6742
      if (!pat)
6743
        return 0;
6744
      emit_insn (pat);
6745
      return target;
6746
    }
6747
 
6748
  /* Ignore align_hint, but still expand it's args in case they have
6749
     side effects. */
6750
  if (icode == CODE_FOR_spu_align_hint)
6751
    return 0;
6752
 
6753
  /* Handle the rest of the operands. */
6754
  for (p = 1; i < n_operands; i++, p++)
6755
    {
6756
      if (insn_data[d->icode].operand[i].mode != VOIDmode)
6757
        mode = insn_data[d->icode].operand[i].mode;
6758
      else
6759
        mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6760
 
6761
      /* mode can be VOIDmode here for labels */
6762
 
6763
      /* For specific intrinsics with an immediate operand, e.g.,
6764
         si_ai(), we sometimes need to convert the scalar argument to a
6765
         vector argument by splatting the scalar. */
6766
      if (VECTOR_MODE_P (mode)
6767
          && (GET_CODE (ops[i]) == CONST_INT
6768
              || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6769
              || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6770
        {
6771
          if (GET_CODE (ops[i]) == CONST_INT)
6772
            ops[i] = spu_const (mode, INTVAL (ops[i]));
6773
          else
6774
            {
6775
              rtx reg = gen_reg_rtx (mode);
6776
              enum machine_mode imode = GET_MODE_INNER (mode);
6777
              if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6778
                ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6779
              if (imode != GET_MODE (ops[i]))
6780
                ops[i] = convert_to_mode (imode, ops[i],
6781
                                          TYPE_UNSIGNED (spu_builtin_types
6782
                                                         [d->parm[i]]));
6783
              emit_insn (gen_spu_splats (reg, ops[i]));
6784
              ops[i] = reg;
6785
            }
6786
        }
6787
 
6788
      spu_check_builtin_parm (d, ops[i], d->parm[p]);
6789
 
6790
      if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6791
        ops[i] = spu_force_reg (mode, ops[i]);
6792
    }
6793
 
6794
  switch (n_operands)
6795
    {
6796
    case 0:
6797
      pat = GEN_FCN (icode) (0);
6798
      break;
6799
    case 1:
6800
      pat = GEN_FCN (icode) (ops[0]);
6801
      break;
6802
    case 2:
6803
      pat = GEN_FCN (icode) (ops[0], ops[1]);
6804
      break;
6805
    case 3:
6806
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6807
      break;
6808
    case 4:
6809
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6810
      break;
6811
    case 5:
6812
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6813
      break;
6814
    case 6:
6815
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6816
      break;
6817
    default:
6818
      abort ();
6819
    }
6820
 
6821
  if (!pat)
6822
    abort ();
6823
 
6824
  if (d->type == B_CALL || d->type == B_BISLED)
6825
    emit_call_insn (pat);
6826
  else if (d->type == B_JUMP)
6827
    {
6828
      emit_jump_insn (pat);
6829
      emit_barrier ();
6830
    }
6831
  else
6832
    emit_insn (pat);
6833
 
6834
  return_type = spu_builtin_types[d->parm[0]];
6835
  if (d->parm[0] != SPU_BTI_VOID
6836
      && GET_MODE (target) != TYPE_MODE (return_type))
6837
    {
6838
      /* target is the return value.  It should always be the mode of
6839
         the builtin function prototype. */
6840
      target = spu_force_reg (TYPE_MODE (return_type), target);
6841
    }
6842
 
6843
  return target;
6844
}
6845
 
6846
rtx
6847
spu_expand_builtin (tree exp,
6848
                    rtx target,
6849
                    rtx subtarget ATTRIBUTE_UNUSED,
6850
                    enum machine_mode mode ATTRIBUTE_UNUSED,
6851
                    int ignore ATTRIBUTE_UNUSED)
6852
{
6853
  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6854
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6855
  struct spu_builtin_description *d;
6856
 
6857
  if (fcode < NUM_SPU_BUILTINS)
6858
    {
6859
      d = &spu_builtins[fcode];
6860
 
6861
      return spu_expand_builtin_1 (d, exp, target);
6862
    }
6863
  abort ();
6864
}
6865
 
6866
/* Implement targetm.vectorize.builtin_mul_widen_even.  */
6867
static tree
6868
spu_builtin_mul_widen_even (tree type)
6869
{
6870
  switch (TYPE_MODE (type))
6871
    {
6872
    case V8HImode:
6873
      if (TYPE_UNSIGNED (type))
6874
        return spu_builtin_decls[SPU_MULE_0];
6875
      else
6876
        return spu_builtin_decls[SPU_MULE_1];
6877
      break;
6878
    default:
6879
      return NULL_TREE;
6880
    }
6881
}
6882
 
6883
/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
6884
static tree
6885
spu_builtin_mul_widen_odd (tree type)
6886
{
6887
  switch (TYPE_MODE (type))
6888
    {
6889
    case V8HImode:
6890
      if (TYPE_UNSIGNED (type))
6891
        return spu_builtin_decls[SPU_MULO_1];
6892
      else
6893
        return spu_builtin_decls[SPU_MULO_0];
6894
      break;
6895
    default:
6896
      return NULL_TREE;
6897
    }
6898
}
6899
 
6900
/* Implement targetm.vectorize.builtin_mask_for_load.  */
6901
static tree
6902
spu_builtin_mask_for_load (void)
6903
{
6904
  return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6905
}
6906
 
6907
/* Implement targetm.vectorize.builtin_vectorization_cost.  */
6908
static int
6909
spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6910
                                tree vectype ATTRIBUTE_UNUSED,
6911
                                int misalign ATTRIBUTE_UNUSED)
6912
{
6913
  switch (type_of_cost)
6914
    {
6915
      case scalar_stmt:
6916
      case vector_stmt:
6917
      case vector_load:
6918
      case vector_store:
6919
      case vec_to_scalar:
6920
      case scalar_to_vec:
6921
      case cond_branch_not_taken:
6922
      case vec_perm:
6923
      case vec_promote_demote:
6924
        return 1;
6925
 
6926
      case scalar_store:
6927
        return 10;
6928
 
6929
      case scalar_load:
6930
        /* Load + rotate.  */
6931
        return 2;
6932
 
6933
      case unaligned_load:
6934
        return 2;
6935
 
6936
      case cond_branch_taken:
6937
        return 6;
6938
 
6939
      default:
6940
        gcc_unreachable ();
6941
    }
6942
}
6943
 
6944
/* Return true iff, data reference of TYPE can reach vector alignment (16)
6945
   after applying N number of iterations.  This routine does not determine
6946
   how may iterations are required to reach desired alignment.  */
6947
 
6948
static bool
6949
spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6950
{
6951
  if (is_packed)
6952
    return false;
6953
 
6954
  /* All other types are naturally aligned.  */
6955
  return true;
6956
}
6957
 
6958
/* Return the appropriate mode for a named address pointer.  */
6959
static enum machine_mode
6960
spu_addr_space_pointer_mode (addr_space_t addrspace)
6961
{
6962
  switch (addrspace)
6963
    {
6964
    case ADDR_SPACE_GENERIC:
6965
      return ptr_mode;
6966
    case ADDR_SPACE_EA:
6967
      return EAmode;
6968
    default:
6969
      gcc_unreachable ();
6970
    }
6971
}
6972
 
6973
/* Return the appropriate mode for a named address address.  */
6974
static enum machine_mode
6975
spu_addr_space_address_mode (addr_space_t addrspace)
6976
{
6977
  switch (addrspace)
6978
    {
6979
    case ADDR_SPACE_GENERIC:
6980
      return Pmode;
6981
    case ADDR_SPACE_EA:
6982
      return EAmode;
6983
    default:
6984
      gcc_unreachable ();
6985
    }
6986
}
6987
 
6988
/* Determine if one named address space is a subset of another.  */
6989
 
6990
static bool
6991
spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6992
{
6993
  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6994
  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6995
 
6996
  if (subset == superset)
6997
    return true;
6998
 
6999
  /* If we have -mno-address-space-conversion, treat __ea and generic as not
7000
     being subsets but instead as disjoint address spaces.  */
7001
  else if (!TARGET_ADDRESS_SPACE_CONVERSION)
7002
    return false;
7003
 
7004
  else
7005
    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
7006
}
7007
 
7008
/* Convert from one address space to another.  */
7009
static rtx
7010
spu_addr_space_convert (rtx op, tree from_type, tree to_type)
7011
{
7012
  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
7013
  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
7014
 
7015
  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
7016
  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
7017
 
7018
  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
7019
    {
7020
      rtx result, ls;
7021
 
7022
      ls = gen_const_mem (DImode,
7023
                          gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7024
      set_mem_align (ls, 128);
7025
 
7026
      result = gen_reg_rtx (Pmode);
7027
      ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
7028
      op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
7029
      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7030
                                          ls, const0_rtx, Pmode, 1);
7031
 
7032
      emit_insn (gen_subsi3 (result, op, ls));
7033
 
7034
      return result;
7035
    }
7036
 
7037
  else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
7038
    {
7039
      rtx result, ls;
7040
 
7041
      ls = gen_const_mem (DImode,
7042
                          gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7043
      set_mem_align (ls, 128);
7044
 
7045
      result = gen_reg_rtx (EAmode);
7046
      ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
7047
      op = force_reg (Pmode, op);
7048
      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7049
                                          ls, const0_rtx, EAmode, 1);
7050
      op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
7051
 
7052
      if (EAmode == SImode)
7053
        emit_insn (gen_addsi3 (result, op, ls));
7054
      else
7055
        emit_insn (gen_adddi3 (result, op, ls));
7056
 
7057
      return result;
7058
    }
7059
 
7060
  else
7061
    gcc_unreachable ();
7062
}
7063
 
7064
 
7065
/* Count the total number of instructions in each pipe and return the
7066
   maximum, which is used as the Minimum Iteration Interval (MII)
7067
   in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
7068
   -2 are instructions that can go in pipe0 or pipe1.  */
7069
static int
7070
spu_sms_res_mii (struct ddg *g)
7071
{
7072
  int i;
7073
  unsigned t[4] = {0, 0, 0, 0};
7074
 
7075
  for (i = 0; i < g->num_nodes; i++)
7076
    {
7077
      rtx insn = g->nodes[i].insn;
7078
      int p = get_pipe (insn) + 2;
7079
 
7080
      gcc_assert (p >= 0);
7081
      gcc_assert (p < 4);
7082
 
7083
      t[p]++;
7084
      if (dump_file && INSN_P (insn))
7085
            fprintf (dump_file, "i%d %s %d %d\n",
7086
                     INSN_UID (insn),
7087
                     insn_data[INSN_CODE(insn)].name,
7088
                     p, t[p]);
7089
    }
7090
  if (dump_file)
7091
    fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7092
 
7093
  return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7094
}
7095
 
7096
 
7097
void
7098
spu_init_expanders (void)
7099
{
7100
  if (cfun)
7101
    {
7102
      rtx r0, r1;
7103
      /* HARD_FRAME_REGISTER is only 128 bit aligned when
7104
         frame_pointer_needed is true.  We don't know that until we're
7105
         expanding the prologue. */
7106
      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7107
 
7108
      /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7109
         LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
7110
         to be treated as aligned, so generate them here. */
7111
      r0 = gen_reg_rtx (SImode);
7112
      r1 = gen_reg_rtx (SImode);
7113
      mark_reg_pointer (r0, 128);
7114
      mark_reg_pointer (r1, 128);
7115
      gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7116
                  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7117
    }
7118
}
7119
 
7120
static enum machine_mode
7121
spu_libgcc_cmp_return_mode (void)
7122
{
7123
 
7124
/* For SPU word mode is TI mode so it is better to use SImode
7125
   for compare returns.  */
7126
  return SImode;
7127
}
7128
 
7129
static enum machine_mode
7130
spu_libgcc_shift_count_mode (void)
7131
{
7132
/* For SPU word mode is TI mode so it is better to use SImode
7133
   for shift counts.  */
7134
  return SImode;
7135
}
7136
 
7137
/* Implement targetm.section_type_flags.  */
7138
static unsigned int
7139
spu_section_type_flags (tree decl, const char *name, int reloc)
7140
{
7141
  /* .toe needs to have type @nobits.  */
7142
  if (strcmp (name, ".toe") == 0)
7143
    return SECTION_BSS;
7144
  /* Don't load _ea into the current address space.  */
7145
  if (strcmp (name, "._ea") == 0)
7146
    return SECTION_WRITE | SECTION_DEBUG;
7147
  return default_section_type_flags (decl, name, reloc);
7148
}
7149
 
7150
/* Implement targetm.select_section.  */
7151
static section *
7152
spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7153
{
7154
  /* Variables and constants defined in the __ea address space
7155
     go into a special section named "._ea".  */
7156
  if (TREE_TYPE (decl) != error_mark_node
7157
      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7158
    {
7159
      /* We might get called with string constants, but get_named_section
7160
         doesn't like them as they are not DECLs.  Also, we need to set
7161
         flags in that case.  */
7162
      if (!DECL_P (decl))
7163
        return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7164
 
7165
      return get_named_section (decl, "._ea", reloc);
7166
    }
7167
 
7168
  return default_elf_select_section (decl, reloc, align);
7169
}
7170
 
7171
/* Implement targetm.unique_section.  */
7172
static void
7173
spu_unique_section (tree decl, int reloc)
7174
{
7175
  /* We don't support unique section names in the __ea address
7176
     space for now.  */
7177
  if (TREE_TYPE (decl) != error_mark_node
7178
      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7179
    return;
7180
 
7181
  default_unique_section (decl, reloc);
7182
}
7183
 
7184
/* Generate a constant or register which contains 2^SCALE.  We assume
7185
   the result is valid for MODE.  Currently, MODE must be V4SFmode and
7186
   SCALE must be SImode. */
7187
rtx
7188
spu_gen_exp2 (enum machine_mode mode, rtx scale)
7189
{
7190
  gcc_assert (mode == V4SFmode);
7191
  gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7192
  if (GET_CODE (scale) != CONST_INT)
7193
    {
7194
      /* unsigned int exp = (127 + scale) << 23;
7195
        __vector float m = (__vector float) spu_splats (exp); */
7196
      rtx reg = force_reg (SImode, scale);
7197
      rtx exp = gen_reg_rtx (SImode);
7198
      rtx mul = gen_reg_rtx (mode);
7199
      emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7200
      emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7201
      emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7202
      return mul;
7203
    }
7204
  else
7205
    {
7206
      HOST_WIDE_INT exp = 127 + INTVAL (scale);
7207
      unsigned char arr[16];
7208
      arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7209
      arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7210
      arr[2] = arr[6] = arr[10] = arr[14] = 0;
7211
      arr[3] = arr[7] = arr[11] = arr[15] = 0;
7212
      return array_to_constant (mode, arr);
7213
    }
7214
}
7215
 
7216
/* After reload, just change the convert into a move instruction
7217
   or a dead instruction. */
7218
void
7219
spu_split_convert (rtx ops[])
7220
{
7221
  if (REGNO (ops[0]) == REGNO (ops[1]))
7222
    emit_note (NOTE_INSN_DELETED);
7223
  else
7224
    {
7225
      /* Use TImode always as this might help hard reg copyprop.  */
7226
      rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7227
      rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7228
      emit_insn (gen_move_insn (op0, op1));
7229
    }
7230
}
7231
 
7232
void
7233
spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7234
{
7235
  fprintf (file, "# profile\n");
7236
  fprintf (file, "brsl $75,  _mcount\n");
7237
}
7238
 
7239
/* Implement targetm.ref_may_alias_errno.  */
7240
static bool
7241
spu_ref_may_alias_errno (ao_ref *ref)
7242
{
7243
  tree base = ao_ref_base (ref);
7244
 
7245
  /* With SPU newlib, errno is defined as something like
7246
         _impure_data._errno
7247
     The default implementation of this target macro does not
7248
     recognize such expressions, so special-code for it here.  */
7249
 
7250
  if (TREE_CODE (base) == VAR_DECL
7251
      && !TREE_STATIC (base)
7252
      && DECL_EXTERNAL (base)
7253
      && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7254
      && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7255
                 "_impure_data") == 0
7256
      /* _errno is the first member of _impure_data.  */
7257
      && ref->offset == 0)
7258
    return true;
7259
 
7260
  return default_ref_may_alias_errno (ref);
7261
}
7262
 
7263
/* Output thunk to FILE that implements a C++ virtual function call (with
7264
   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
7265
   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7266
   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7267
   relative to the resulting this pointer.  */
7268
 
7269
static void
7270
spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7271
                     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7272
                     tree function)
7273
{
7274
  rtx op[8];
7275
 
7276
  /* Make sure unwind info is emitted for the thunk if needed.  */
7277
  final_start_function (emit_barrier (), file, 1);
7278
 
7279
  /* Operand 0 is the target function.  */
7280
  op[0] = XEXP (DECL_RTL (function), 0);
7281
 
7282
  /* Operand 1 is the 'this' pointer.  */
7283
  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7284
    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7285
  else
7286
    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7287
 
7288
  /* Operands 2/3 are the low/high halfwords of delta.  */
7289
  op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7290
  op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7291
 
7292
  /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
7293
  op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7294
  op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7295
 
7296
  /* Operands 6/7 are temporary registers.  */
7297
  op[6] = gen_rtx_REG (Pmode, 79);
7298
  op[7] = gen_rtx_REG (Pmode, 78);
7299
 
7300
  /* Add DELTA to this pointer.  */
7301
  if (delta)
7302
    {
7303
      if (delta >= -0x200 && delta < 0x200)
7304
        output_asm_insn ("ai\t%1,%1,%2", op);
7305
      else if (delta >= -0x8000 && delta < 0x8000)
7306
        {
7307
          output_asm_insn ("il\t%6,%2", op);
7308
          output_asm_insn ("a\t%1,%1,%6", op);
7309
        }
7310
      else
7311
        {
7312
          output_asm_insn ("ilhu\t%6,%3", op);
7313
          output_asm_insn ("iohl\t%6,%2", op);
7314
          output_asm_insn ("a\t%1,%1,%6", op);
7315
        }
7316
    }
7317
 
7318
  /* Perform vcall adjustment.  */
7319
  if (vcall_offset)
7320
    {
7321
      output_asm_insn ("lqd\t%7,0(%1)", op);
7322
      output_asm_insn ("rotqby\t%7,%7,%1", op);
7323
 
7324
      if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7325
        output_asm_insn ("ai\t%7,%7,%4", op);
7326
      else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7327
        {
7328
          output_asm_insn ("il\t%6,%4", op);
7329
          output_asm_insn ("a\t%7,%7,%6", op);
7330
        }
7331
      else
7332
        {
7333
          output_asm_insn ("ilhu\t%6,%5", op);
7334
          output_asm_insn ("iohl\t%6,%4", op);
7335
          output_asm_insn ("a\t%7,%7,%6", op);
7336
        }
7337
 
7338
      output_asm_insn ("lqd\t%6,0(%7)", op);
7339
      output_asm_insn ("rotqby\t%6,%6,%7", op);
7340
      output_asm_insn ("a\t%1,%1,%6", op);
7341
    }
7342
 
7343
  /* Jump to target.  */
7344
  output_asm_insn ("br\t%0", op);
7345
 
7346
  final_end_function ();
7347
}
7348
 
7349
#include "gt-spu.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.