OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [config/] [spu/] [spu.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
2
 
3
   This file is free software; you can redistribute it and/or modify it under
4
   the terms of the GNU General Public License as published by the Free
5
   Software Foundation; either version 3 of the License, or (at your option)
6
   any later version.
7
 
8
   This file is distributed in the hope that it will be useful, but WITHOUT
9
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11
   for more details.
12
 
13
   You should have received a copy of the GNU General Public License
14
   along with GCC; see the file COPYING3.  If not see
15
   <http://www.gnu.org/licenses/>.  */
16
 
17
#include "config.h"
18
#include "system.h"
19
#include "coretypes.h"
20
#include "tm.h"
21
#include "rtl.h"
22
#include "regs.h"
23
#include "hard-reg-set.h"
24
#include "real.h"
25
#include "insn-config.h"
26
#include "conditions.h"
27
#include "insn-attr.h"
28
#include "flags.h"
29
#include "recog.h"
30
#include "obstack.h"
31
#include "tree.h"
32
#include "expr.h"
33
#include "optabs.h"
34
#include "except.h"
35
#include "function.h"
36
#include "output.h"
37
#include "basic-block.h"
38
#include "integrate.h"
39
#include "toplev.h"
40
#include "ggc.h"
41
#include "hashtab.h"
42
#include "tm_p.h"
43
#include "target.h"
44
#include "target-def.h"
45
#include "langhooks.h"
46
#include "reload.h"
47
#include "cfglayout.h"
48
#include "sched-int.h"
49
#include "params.h"
50
#include "assert.h"
51
#include "machmode.h"
52
#include "gimple.h"
53
#include "tm-constrs.h"
54
#include "ddg.h"
55
#include "sbitmap.h"
56
#include "timevar.h"
57
#include "df.h"
58
 
59
/* Builtin types, data and prototypes. */
60
 
61
enum spu_builtin_type_index
62
{
63
  SPU_BTI_END_OF_PARAMS,
64
 
65
  /* We create new type nodes for these. */
66
  SPU_BTI_V16QI,
67
  SPU_BTI_V8HI,
68
  SPU_BTI_V4SI,
69
  SPU_BTI_V2DI,
70
  SPU_BTI_V4SF,
71
  SPU_BTI_V2DF,
72
  SPU_BTI_UV16QI,
73
  SPU_BTI_UV8HI,
74
  SPU_BTI_UV4SI,
75
  SPU_BTI_UV2DI,
76
 
77
  /* A 16-byte type. (Implemented with V16QI_type_node) */
78
  SPU_BTI_QUADWORD,
79
 
80
  /* These all correspond to intSI_type_node */
81
  SPU_BTI_7,
82
  SPU_BTI_S7,
83
  SPU_BTI_U7,
84
  SPU_BTI_S10,
85
  SPU_BTI_S10_4,
86
  SPU_BTI_U14,
87
  SPU_BTI_16,
88
  SPU_BTI_S16,
89
  SPU_BTI_S16_2,
90
  SPU_BTI_U16,
91
  SPU_BTI_U16_2,
92
  SPU_BTI_U18,
93
 
94
  /* These correspond to the standard types */
95
  SPU_BTI_INTQI,
96
  SPU_BTI_INTHI,
97
  SPU_BTI_INTSI,
98
  SPU_BTI_INTDI,
99
 
100
  SPU_BTI_UINTQI,
101
  SPU_BTI_UINTHI,
102
  SPU_BTI_UINTSI,
103
  SPU_BTI_UINTDI,
104
 
105
  SPU_BTI_FLOAT,
106
  SPU_BTI_DOUBLE,
107
 
108
  SPU_BTI_VOID,
109
  SPU_BTI_PTR,
110
 
111
  SPU_BTI_MAX
112
};
113
 
114
#define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
115
#define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
116
#define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
117
#define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
118
#define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
119
#define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
120
#define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
121
#define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
122
#define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
123
#define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
124
 
125
static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
 
127
struct spu_builtin_range
128
{
129
  int low, high;
130
};
131
 
132
static struct spu_builtin_range spu_builtin_range[] = {
133
  {-0x40ll, 0x7fll},            /* SPU_BTI_7     */
134
  {-0x40ll, 0x3fll},            /* SPU_BTI_S7    */
135
  {0ll, 0x7fll},                /* SPU_BTI_U7    */
136
  {-0x200ll, 0x1ffll},          /* SPU_BTI_S10   */
137
  {-0x2000ll, 0x1fffll},        /* SPU_BTI_S10_4 */
138
  {0ll, 0x3fffll},              /* SPU_BTI_U14   */
139
  {-0x8000ll, 0xffffll},        /* SPU_BTI_16    */
140
  {-0x8000ll, 0x7fffll},        /* SPU_BTI_S16   */
141
  {-0x20000ll, 0x1ffffll},      /* SPU_BTI_S16_2 */
142
  {0ll, 0xffffll},              /* SPU_BTI_U16   */
143
  {0ll, 0x3ffffll},             /* SPU_BTI_U16_2 */
144
  {0ll, 0x3ffffll},             /* SPU_BTI_U18   */
145
};
146
 
147
 
148
/*  Target specific attribute specifications.  */
149
char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
 
151
/*  Prototypes and external defs.  */
152
static void spu_init_builtins (void);
153
static tree spu_builtin_decl (unsigned, bool);
154
static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155
static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
156
static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
157
static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158
                                                 bool, addr_space_t);
159
static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160
static rtx get_pic_reg (void);
161
static int need_to_save_reg (int regno, int saving);
162
static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163
static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164
static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165
                               rtx scratch);
166
static void emit_nop_for_insn (rtx insn);
167
static bool insn_clobbers_hbr (rtx insn);
168
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
169
                                  int distance, sbitmap blocks);
170
static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171
                                    enum machine_mode dmode);
172
static rtx get_branch_target (rtx branch);
173
static void spu_machine_dependent_reorg (void);
174
static int spu_sched_issue_rate (void);
175
static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176
                                     int can_issue_more);
177
static int get_pipe (rtx insn);
178
static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
179
static void spu_sched_init_global (FILE *, int, int);
180
static void spu_sched_init (FILE *, int, int);
181
static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
182
static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183
                                         int flags,
184
                                         unsigned char *no_add_attrs);
185
static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186
                                         int flags,
187
                                         unsigned char *no_add_attrs);
188
static int spu_naked_function_p (tree func);
189
static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
190
                                            const_tree type, unsigned char named);
191
static tree spu_build_builtin_va_list (void);
192
static void spu_va_start (tree, rtx);
193
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
194
                                      gimple_seq * pre_p, gimple_seq * post_p);
195
static int store_with_one_insn_p (rtx mem);
196
static int mem_is_padded_component_ref (rtx x);
197
static int reg_aligned_for_addr (rtx x);
198
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
199
static void spu_asm_globalize_label (FILE * file, const char *name);
200
static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
201
                                    int *total, bool speed);
202
static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
203
static void spu_init_libfuncs (void);
204
static bool spu_return_in_memory (const_tree type, const_tree fntype);
205
static void fix_range (const char *);
206
static void spu_encode_section_info (tree, rtx, int);
207
static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
208
static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
209
                                              addr_space_t);
210
static tree spu_builtin_mul_widen_even (tree);
211
static tree spu_builtin_mul_widen_odd (tree);
212
static tree spu_builtin_mask_for_load (void);
213
static int spu_builtin_vectorization_cost (bool);
214
static bool spu_vector_alignment_reachable (const_tree, bool);
215
static tree spu_builtin_vec_perm (tree, tree *);
216
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
217
static enum machine_mode spu_addr_space_address_mode (addr_space_t);
218
static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
219
static rtx spu_addr_space_convert (rtx, tree, tree);
220
static int spu_sms_res_mii (struct ddg *g);
221
static void asm_file_start (void);
222
static unsigned int spu_section_type_flags (tree, const char *, int);
223
static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
224
static void spu_unique_section (tree, int);
225
static rtx spu_expand_load (rtx, rtx, rtx, int);
226
static void spu_trampoline_init (rtx, tree, rtx);
227
 
228
extern const char *reg_names[];
229
 
230
/* Which instruction set architecture to use.  */
231
int spu_arch;
232
/* Which cpu are we tuning for.  */
233
int spu_tune;
234
 
235
/* The hardware requires 8 insns between a hint and the branch it
236
   effects.  This variable describes how many rtl instructions the
237
   compiler needs to see before inserting a hint, and then the compiler
238
   will insert enough nops to make it at least 8 insns.  The default is
239
   for the compiler to allow up to 2 nops be emitted.  The nops are
240
   inserted in pairs, so we round down. */
241
int spu_hint_dist = (8*4) - (2*4);
242
 
243
/* Determines whether we run variable tracking in machine dependent
244
   reorganization.  */
245
static int spu_flag_var_tracking;
246
 
247
enum spu_immediate {
248
  SPU_NONE,
249
  SPU_IL,
250
  SPU_ILA,
251
  SPU_ILH,
252
  SPU_ILHU,
253
  SPU_ORI,
254
  SPU_ORHI,
255
  SPU_ORBI,
256
  SPU_IOHL
257
};
258
enum immediate_class
259
{
260
  IC_POOL,                      /* constant pool */
261
  IC_IL1,                       /* one il* instruction */
262
  IC_IL2,                       /* both ilhu and iohl instructions */
263
  IC_IL1s,                      /* one il* instruction */
264
  IC_IL2s,                      /* both ilhu and iohl instructions */
265
  IC_FSMBI,                     /* the fsmbi instruction */
266
  IC_CPAT,                      /* one of the c*d instructions */
267
  IC_FSMBI2                     /* fsmbi plus 1 other instruction */
268
};
269
 
270
static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
271
static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
272
static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
273
static enum immediate_class classify_immediate (rtx op,
274
                                                enum machine_mode mode);
275
 
276
static enum machine_mode spu_unwind_word_mode (void);
277
 
278
static enum machine_mode
279
spu_libgcc_cmp_return_mode (void);
280
 
281
static enum machine_mode
282
spu_libgcc_shift_count_mode (void);
283
 
284
/* Pointer mode for __ea references.  */
285
#define EAmode (spu_ea_model != 32 ? DImode : SImode)
286
 
287
 
288
/*  Table of machine attributes.  */
289
static const struct attribute_spec spu_attribute_table[] =
290
{
291
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292
  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute },
293
  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute },
294
  { NULL,             0, 0, false, false, false, NULL }
295
};
296
 
297
/*  TARGET overrides.  */
298
 
299
#undef TARGET_ADDR_SPACE_POINTER_MODE
300
#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
301
 
302
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
303
#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
304
 
305
#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
306
#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
307
  spu_addr_space_legitimate_address_p
308
 
309
#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
310
#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
311
 
312
#undef TARGET_ADDR_SPACE_SUBSET_P
313
#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
314
 
315
#undef TARGET_ADDR_SPACE_CONVERT
316
#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
317
 
318
#undef TARGET_INIT_BUILTINS
319
#define TARGET_INIT_BUILTINS spu_init_builtins
320
#undef TARGET_BUILTIN_DECL
321
#define TARGET_BUILTIN_DECL spu_builtin_decl
322
 
323
#undef TARGET_EXPAND_BUILTIN
324
#define TARGET_EXPAND_BUILTIN spu_expand_builtin
325
 
326
#undef TARGET_UNWIND_WORD_MODE
327
#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
328
 
329
#undef TARGET_LEGITIMIZE_ADDRESS
330
#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
331
 
332
/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
333
   and .quad for the debugger.  When it is known that the assembler is fixed,
334
   these can be removed.  */
335
#undef TARGET_ASM_UNALIGNED_SI_OP
336
#define TARGET_ASM_UNALIGNED_SI_OP      "\t.long\t"
337
 
338
#undef TARGET_ASM_ALIGNED_DI_OP
339
#define TARGET_ASM_ALIGNED_DI_OP        "\t.quad\t"
340
 
341
/* The .8byte directive doesn't seem to work well for a 32 bit
342
   architecture. */
343
#undef TARGET_ASM_UNALIGNED_DI_OP
344
#define TARGET_ASM_UNALIGNED_DI_OP NULL
345
 
346
#undef TARGET_RTX_COSTS
347
#define TARGET_RTX_COSTS spu_rtx_costs
348
 
349
#undef TARGET_ADDRESS_COST
350
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
351
 
352
#undef TARGET_SCHED_ISSUE_RATE
353
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
354
 
355
#undef TARGET_SCHED_INIT_GLOBAL
356
#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
357
 
358
#undef TARGET_SCHED_INIT
359
#define TARGET_SCHED_INIT spu_sched_init
360
 
361
#undef TARGET_SCHED_VARIABLE_ISSUE
362
#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
363
 
364
#undef TARGET_SCHED_REORDER
365
#define TARGET_SCHED_REORDER spu_sched_reorder
366
 
367
#undef TARGET_SCHED_REORDER2
368
#define TARGET_SCHED_REORDER2 spu_sched_reorder
369
 
370
#undef TARGET_SCHED_ADJUST_COST
371
#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
372
 
373
#undef  TARGET_ATTRIBUTE_TABLE
374
#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
375
 
376
#undef TARGET_ASM_INTEGER
377
#define TARGET_ASM_INTEGER spu_assemble_integer
378
 
379
#undef TARGET_SCALAR_MODE_SUPPORTED_P
380
#define TARGET_SCALAR_MODE_SUPPORTED_P  spu_scalar_mode_supported_p
381
 
382
#undef TARGET_VECTOR_MODE_SUPPORTED_P
383
#define TARGET_VECTOR_MODE_SUPPORTED_P  spu_vector_mode_supported_p
384
 
385
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
386
#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
387
 
388
#undef TARGET_ASM_GLOBALIZE_LABEL
389
#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
390
 
391
#undef TARGET_PASS_BY_REFERENCE
392
#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
393
 
394
#undef TARGET_MUST_PASS_IN_STACK
395
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
396
 
397
#undef TARGET_BUILD_BUILTIN_VA_LIST
398
#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
399
 
400
#undef TARGET_EXPAND_BUILTIN_VA_START
401
#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
402
 
403
#undef TARGET_SETUP_INCOMING_VARARGS
404
#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
405
 
406
#undef TARGET_MACHINE_DEPENDENT_REORG
407
#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
408
 
409
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
410
#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
411
 
412
#undef TARGET_DEFAULT_TARGET_FLAGS
413
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
414
 
415
#undef TARGET_INIT_LIBFUNCS
416
#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
417
 
418
#undef TARGET_RETURN_IN_MEMORY
419
#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
420
 
421
#undef  TARGET_ENCODE_SECTION_INFO
422
#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
423
 
424
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
425
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
426
 
427
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
428
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
429
 
430
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
431
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
432
 
433
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
434
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
435
 
436
#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
437
#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
438
 
439
#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
440
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
441
 
442
#undef TARGET_LIBGCC_CMP_RETURN_MODE
443
#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
444
 
445
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
446
#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
447
 
448
#undef TARGET_SCHED_SMS_RES_MII
449
#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
450
 
451
#undef TARGET_ASM_FILE_START
452
#define TARGET_ASM_FILE_START asm_file_start
453
 
454
#undef TARGET_SECTION_TYPE_FLAGS
455
#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
456
 
457
#undef TARGET_ASM_SELECT_SECTION
458
#define TARGET_ASM_SELECT_SECTION  spu_select_section
459
 
460
#undef TARGET_ASM_UNIQUE_SECTION
461
#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
462
 
463
#undef TARGET_LEGITIMATE_ADDRESS_P
464
#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
465
 
466
#undef TARGET_TRAMPOLINE_INIT
467
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
468
 
469
struct gcc_target targetm = TARGET_INITIALIZER;
470
 
471
void
472
spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
473
{
474
  /* Override some of the default param values.  With so many registers
475
     larger values are better for these params.  */
476
  MAX_PENDING_LIST_LENGTH = 128;
477
 
478
  /* With so many registers this is better on by default. */
479
  flag_rename_registers = 1;
480
}
481
 
482
/* Sometimes certain combinations of command options do not make sense
483
   on a particular target machine.  You can define a macro
484
   OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
485
   executed once just after all the command options have been parsed.  */
486
void
487
spu_override_options (void)
488
{
489
  /* Small loops will be unpeeled at -O3.  For SPU it is more important
490
     to keep code small by default.  */
491
  if (!flag_unroll_loops && !flag_peel_loops
492
      && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
493
    PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
494
 
495
  flag_omit_frame_pointer = 1;
496
 
497
  /* Functions must be 8 byte aligned so we correctly handle dual issue */
498
  if (align_functions < 8)
499
    align_functions = 8;
500
 
501
  spu_hint_dist = 8*4 - spu_max_nops*4;
502
  if (spu_hint_dist < 0)
503
    spu_hint_dist = 0;
504
 
505
  if (spu_fixed_range_string)
506
    fix_range (spu_fixed_range_string);
507
 
508
  /* Determine processor architectural level.  */
509
  if (spu_arch_string)
510
    {
511
      if (strcmp (&spu_arch_string[0], "cell") == 0)
512
        spu_arch = PROCESSOR_CELL;
513
      else if (strcmp (&spu_arch_string[0], "celledp") == 0)
514
        spu_arch = PROCESSOR_CELLEDP;
515
      else
516
        error ("Unknown architecture '%s'", &spu_arch_string[0]);
517
    }
518
 
519
  /* Determine processor to tune for.  */
520
  if (spu_tune_string)
521
    {
522
      if (strcmp (&spu_tune_string[0], "cell") == 0)
523
        spu_tune = PROCESSOR_CELL;
524
      else if (strcmp (&spu_tune_string[0], "celledp") == 0)
525
        spu_tune = PROCESSOR_CELLEDP;
526
      else
527
        error ("Unknown architecture '%s'", &spu_tune_string[0]);
528
    }
529
 
530
  /* Change defaults according to the processor architecture.  */
531
  if (spu_arch == PROCESSOR_CELLEDP)
532
    {
533
      /* If no command line option has been otherwise specified, change
534
         the default to -mno-safe-hints on celledp -- only the original
535
         Cell/B.E. processors require this workaround.  */
536
      if (!(target_flags_explicit & MASK_SAFE_HINTS))
537
        target_flags &= ~MASK_SAFE_HINTS;
538
    }
539
 
540
  REAL_MODE_FORMAT (SFmode) = &spu_single_format;
541
}
542
 
543
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
544
   struct attribute_spec.handler.  */
545
 
546
/* True if MODE is valid for the target.  By "valid", we mean able to
547
   be manipulated in non-trivial ways.  In particular, this means all
548
   the arithmetic is supported.  */
549
static bool
550
spu_scalar_mode_supported_p (enum machine_mode mode)
551
{
552
  switch (mode)
553
    {
554
    case QImode:
555
    case HImode:
556
    case SImode:
557
    case SFmode:
558
    case DImode:
559
    case TImode:
560
    case DFmode:
561
      return true;
562
 
563
    default:
564
      return false;
565
    }
566
}
567
 
568
/* Similarly for vector modes.  "Supported" here is less strict.  At
569
   least some operations are supported; need to check optabs or builtins
570
   for further details.  */
571
static bool
572
spu_vector_mode_supported_p (enum machine_mode mode)
573
{
574
  switch (mode)
575
    {
576
    case V16QImode:
577
    case V8HImode:
578
    case V4SImode:
579
    case V2DImode:
580
    case V4SFmode:
581
    case V2DFmode:
582
      return true;
583
 
584
    default:
585
      return false;
586
    }
587
}
588
 
589
/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
590
   least significant bytes of the outer mode.  This function returns
591
   TRUE for the SUBREG's where this is correct.  */
592
int
593
valid_subreg (rtx op)
594
{
595
  enum machine_mode om = GET_MODE (op);
596
  enum machine_mode im = GET_MODE (SUBREG_REG (op));
597
  return om != VOIDmode && im != VOIDmode
598
    && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
599
        || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
600
        || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
601
}
602
 
603
/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
604
   and adjust the start offset.  */
605
static rtx
606
adjust_operand (rtx op, HOST_WIDE_INT * start)
607
{
608
  enum machine_mode mode;
609
  int op_size;
610
  /* Strip any paradoxical SUBREG.  */
611
  if (GET_CODE (op) == SUBREG
612
      && (GET_MODE_BITSIZE (GET_MODE (op))
613
          > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
614
    {
615
      if (start)
616
        *start -=
617
          GET_MODE_BITSIZE (GET_MODE (op)) -
618
          GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
619
      op = SUBREG_REG (op);
620
    }
621
  /* If it is smaller than SI, assure a SUBREG */
622
  op_size = GET_MODE_BITSIZE (GET_MODE (op));
623
  if (op_size < 32)
624
    {
625
      if (start)
626
        *start += 32 - op_size;
627
      op_size = 32;
628
    }
629
  /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
630
  mode = mode_for_size (op_size, MODE_INT, 0);
631
  if (mode != GET_MODE (op))
632
    op = gen_rtx_SUBREG (mode, op, 0);
633
  return op;
634
}
635
 
636
void
637
spu_expand_extv (rtx ops[], int unsignedp)
638
{
639
  rtx dst = ops[0], src = ops[1];
640
  HOST_WIDE_INT width = INTVAL (ops[2]);
641
  HOST_WIDE_INT start = INTVAL (ops[3]);
642
  HOST_WIDE_INT align_mask;
643
  rtx s0, s1, mask, r0;
644
 
645
  gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
646
 
647
  if (MEM_P (src))
648
    {
649
      /* First, determine if we need 1 TImode load or 2.  We need only 1
650
         if the bits being extracted do not cross the alignment boundary
651
         as determined by the MEM and its address. */
652
 
653
      align_mask = -MEM_ALIGN (src);
654
      if ((start & align_mask) == ((start + width - 1) & align_mask))
655
        {
656
          /* Alignment is sufficient for 1 load. */
657
          s0 = gen_reg_rtx (TImode);
658
          r0 = spu_expand_load (s0, 0, src, start / 8);
659
          start &= 7;
660
          if (r0)
661
            emit_insn (gen_rotqby_ti (s0, s0, r0));
662
        }
663
      else
664
        {
665
          /* Need 2 loads. */
666
          s0 = gen_reg_rtx (TImode);
667
          s1 = gen_reg_rtx (TImode);
668
          r0 = spu_expand_load (s0, s1, src, start / 8);
669
          start &= 7;
670
 
671
          gcc_assert (start + width <= 128);
672
          if (r0)
673
            {
674
              rtx r1 = gen_reg_rtx (SImode);
675
              mask = gen_reg_rtx (TImode);
676
              emit_move_insn (mask, GEN_INT (-1));
677
              emit_insn (gen_rotqby_ti (s0, s0, r0));
678
              emit_insn (gen_rotqby_ti (s1, s1, r0));
679
              if (GET_CODE (r0) == CONST_INT)
680
                r1 = GEN_INT (INTVAL (r0) & 15);
681
              else
682
                emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
683
              emit_insn (gen_shlqby_ti (mask, mask, r1));
684
              emit_insn (gen_selb (s0, s1, s0, mask));
685
            }
686
        }
687
 
688
    }
689
  else if (GET_CODE (src) == SUBREG)
690
    {
691
      rtx r = SUBREG_REG (src);
692
      gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
693
      s0 = gen_reg_rtx (TImode);
694
      if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
695
        emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
696
      else
697
        emit_move_insn (s0, src);
698
    }
699
  else
700
    {
701
      gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
702
      s0 = gen_reg_rtx (TImode);
703
      emit_move_insn (s0, src);
704
    }
705
 
706
  /* Now s0 is TImode and contains the bits to extract at start. */
707
 
708
  if (start)
709
    emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
710
 
711
  if (128 - width)
712
    {
713
      tree c = build_int_cst (NULL_TREE, 128 - width);
714
      s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
715
    }
716
 
717
  emit_move_insn (dst, s0);
718
}
719
 
720
void
721
spu_expand_insv (rtx ops[])
722
{
723
  HOST_WIDE_INT width = INTVAL (ops[1]);
724
  HOST_WIDE_INT start = INTVAL (ops[2]);
725
  HOST_WIDE_INT maskbits;
726
  enum machine_mode dst_mode, src_mode;
727
  rtx dst = ops[0], src = ops[3];
728
  int dst_size, src_size;
729
  rtx mask;
730
  rtx shift_reg;
731
  int shift;
732
 
733
 
734
  if (GET_CODE (ops[0]) == MEM)
735
    dst = gen_reg_rtx (TImode);
736
  else
737
    dst = adjust_operand (dst, &start);
738
  dst_mode = GET_MODE (dst);
739
  dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
740
 
741
  if (CONSTANT_P (src))
742
    {
743
      enum machine_mode m =
744
        (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
745
      src = force_reg (m, convert_to_mode (m, src, 0));
746
    }
747
  src = adjust_operand (src, 0);
748
  src_mode = GET_MODE (src);
749
  src_size = GET_MODE_BITSIZE (GET_MODE (src));
750
 
751
  mask = gen_reg_rtx (dst_mode);
752
  shift_reg = gen_reg_rtx (dst_mode);
753
  shift = dst_size - start - width;
754
 
755
  /* It's not safe to use subreg here because the compiler assumes
756
     that the SUBREG_REG is right justified in the SUBREG. */
757
  convert_move (shift_reg, src, 1);
758
 
759
  if (shift > 0)
760
    {
761
      switch (dst_mode)
762
        {
763
        case SImode:
764
          emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
765
          break;
766
        case DImode:
767
          emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
768
          break;
769
        case TImode:
770
          emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
771
          break;
772
        default:
773
          abort ();
774
        }
775
    }
776
  else if (shift < 0)
777
    abort ();
778
 
779
  switch (dst_size)
780
    {
781
    case 32:
782
      maskbits = (-1ll << (32 - width - start));
783
      if (start)
784
        maskbits += (1ll << (32 - start));
785
      emit_move_insn (mask, GEN_INT (maskbits));
786
      break;
787
    case 64:
788
      maskbits = (-1ll << (64 - width - start));
789
      if (start)
790
        maskbits += (1ll << (64 - start));
791
      emit_move_insn (mask, GEN_INT (maskbits));
792
      break;
793
    case 128:
794
      {
795
        unsigned char arr[16];
796
        int i = start / 8;
797
        memset (arr, 0, sizeof (arr));
798
        arr[i] = 0xff >> (start & 7);
799
        for (i++; i <= (start + width - 1) / 8; i++)
800
          arr[i] = 0xff;
801
        arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
802
        emit_move_insn (mask, array_to_constant (TImode, arr));
803
      }
804
      break;
805
    default:
806
      abort ();
807
    }
808
  if (GET_CODE (ops[0]) == MEM)
809
    {
810
      rtx low = gen_reg_rtx (SImode);
811
      rtx rotl = gen_reg_rtx (SImode);
812
      rtx mask0 = gen_reg_rtx (TImode);
813
      rtx addr;
814
      rtx addr0;
815
      rtx addr1;
816
      rtx mem;
817
 
818
      addr = force_reg (Pmode, XEXP (ops[0], 0));
819
      addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
820
      emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
821
      emit_insn (gen_negsi2 (rotl, low));
822
      emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
823
      emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
824
      mem = change_address (ops[0], TImode, addr0);
825
      set_mem_alias_set (mem, 0);
826
      emit_move_insn (dst, mem);
827
      emit_insn (gen_selb (dst, dst, shift_reg, mask0));
828
      if (start + width > MEM_ALIGN (ops[0]))
829
        {
830
          rtx shl = gen_reg_rtx (SImode);
831
          rtx mask1 = gen_reg_rtx (TImode);
832
          rtx dst1 = gen_reg_rtx (TImode);
833
          rtx mem1;
834
          addr1 = plus_constant (addr, 16);
835
          addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
836
          emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
837
          emit_insn (gen_shlqby_ti (mask1, mask, shl));
838
          mem1 = change_address (ops[0], TImode, addr1);
839
          set_mem_alias_set (mem1, 0);
840
          emit_move_insn (dst1, mem1);
841
          emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
842
          emit_move_insn (mem1, dst1);
843
        }
844
      emit_move_insn (mem, dst);
845
    }
846
  else
847
    emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
848
}
849
 
850
 
851
int
852
spu_expand_block_move (rtx ops[])
853
{
854
  HOST_WIDE_INT bytes, align, offset;
855
  rtx src, dst, sreg, dreg, target;
856
  int i;
857
  if (GET_CODE (ops[2]) != CONST_INT
858
      || GET_CODE (ops[3]) != CONST_INT
859
      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
860
    return 0;
861
 
862
  bytes = INTVAL (ops[2]);
863
  align = INTVAL (ops[3]);
864
 
865
  if (bytes <= 0)
866
    return 1;
867
 
868
  dst = ops[0];
869
  src = ops[1];
870
 
871
  if (align == 16)
872
    {
873
      for (offset = 0; offset + 16 <= bytes; offset += 16)
874
        {
875
          dst = adjust_address (ops[0], V16QImode, offset);
876
          src = adjust_address (ops[1], V16QImode, offset);
877
          emit_move_insn (dst, src);
878
        }
879
      if (offset < bytes)
880
        {
881
          rtx mask;
882
          unsigned char arr[16] = { 0 };
883
          for (i = 0; i < bytes - offset; i++)
884
            arr[i] = 0xff;
885
          dst = adjust_address (ops[0], V16QImode, offset);
886
          src = adjust_address (ops[1], V16QImode, offset);
887
          mask = gen_reg_rtx (V16QImode);
888
          sreg = gen_reg_rtx (V16QImode);
889
          dreg = gen_reg_rtx (V16QImode);
890
          target = gen_reg_rtx (V16QImode);
891
          emit_move_insn (mask, array_to_constant (V16QImode, arr));
892
          emit_move_insn (dreg, dst);
893
          emit_move_insn (sreg, src);
894
          emit_insn (gen_selb (target, dreg, sreg, mask));
895
          emit_move_insn (dst, target);
896
        }
897
      return 1;
898
    }
899
  return 0;
900
}
901
 
902
enum spu_comp_code
903
{ SPU_EQ, SPU_GT, SPU_GTU };
904
 
905
int spu_comp_icode[12][3] = {
906
 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
907
 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
908
 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
909
 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
910
 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
911
 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
912
 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
913
 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
914
 {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
915
 {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
916
 {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
917
 {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
918
};
919
 
920
/* Generate a compare for CODE.  Return a brand-new rtx that represents
921
   the result of the compare.   GCC can figure this out too if we don't
922
   provide all variations of compares, but GCC always wants to use
923
   WORD_MODE, we can generate better code in most cases if we do it
924
   ourselves.  */
925
void
926
spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
927
{
928
  int reverse_compare = 0;
929
  int reverse_test = 0;
930
  rtx compare_result, eq_result;
931
  rtx comp_rtx, eq_rtx;
932
  enum machine_mode comp_mode;
933
  enum machine_mode op_mode;
934
  enum spu_comp_code scode, eq_code;
935
  enum insn_code ior_code;
936
  enum rtx_code code = GET_CODE (cmp);
937
  rtx op0 = XEXP (cmp, 0);
938
  rtx op1 = XEXP (cmp, 1);
939
  int index;
940
  int eq_test = 0;
941
 
942
  /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
943
     and so on, to keep the constant in operand 1. */
944
  if (GET_CODE (op1) == CONST_INT)
945
    {
946
      HOST_WIDE_INT val = INTVAL (op1) - 1;
947
      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
948
        switch (code)
949
          {
950
          case GE:
951
            op1 = GEN_INT (val);
952
            code = GT;
953
            break;
954
          case LT:
955
            op1 = GEN_INT (val);
956
            code = LE;
957
            break;
958
          case GEU:
959
            op1 = GEN_INT (val);
960
            code = GTU;
961
            break;
962
          case LTU:
963
            op1 = GEN_INT (val);
964
            code = LEU;
965
            break;
966
          default:
967
            break;
968
          }
969
    }
970
 
971
  comp_mode = SImode;
972
  op_mode = GET_MODE (op0);
973
 
974
  switch (code)
975
    {
976
    case GE:
977
      scode = SPU_GT;
978
      if (HONOR_NANS (op_mode))
979
        {
980
          reverse_compare = 0;
981
          reverse_test = 0;
982
          eq_test = 1;
983
          eq_code = SPU_EQ;
984
        }
985
      else
986
        {
987
          reverse_compare = 1;
988
          reverse_test = 1;
989
        }
990
      break;
991
    case LE:
992
      scode = SPU_GT;
993
      if (HONOR_NANS (op_mode))
994
        {
995
          reverse_compare = 1;
996
          reverse_test = 0;
997
          eq_test = 1;
998
          eq_code = SPU_EQ;
999
        }
1000
      else
1001
        {
1002
          reverse_compare = 0;
1003
          reverse_test = 1;
1004
        }
1005
      break;
1006
    case LT:
1007
      reverse_compare = 1;
1008
      reverse_test = 0;
1009
      scode = SPU_GT;
1010
      break;
1011
    case GEU:
1012
      reverse_compare = 1;
1013
      reverse_test = 1;
1014
      scode = SPU_GTU;
1015
      break;
1016
    case LEU:
1017
      reverse_compare = 0;
1018
      reverse_test = 1;
1019
      scode = SPU_GTU;
1020
      break;
1021
    case LTU:
1022
      reverse_compare = 1;
1023
      reverse_test = 0;
1024
      scode = SPU_GTU;
1025
      break;
1026
    case NE:
1027
      reverse_compare = 0;
1028
      reverse_test = 1;
1029
      scode = SPU_EQ;
1030
      break;
1031
 
1032
    case EQ:
1033
      scode = SPU_EQ;
1034
      break;
1035
    case GT:
1036
      scode = SPU_GT;
1037
      break;
1038
    case GTU:
1039
      scode = SPU_GTU;
1040
      break;
1041
    default:
1042
      scode = SPU_EQ;
1043
      break;
1044
    }
1045
 
1046
  switch (op_mode)
1047
    {
1048
    case QImode:
1049
      index = 0;
1050
      comp_mode = QImode;
1051
      break;
1052
    case HImode:
1053
      index = 1;
1054
      comp_mode = HImode;
1055
      break;
1056
    case SImode:
1057
      index = 2;
1058
      break;
1059
    case DImode:
1060
      index = 3;
1061
      break;
1062
    case TImode:
1063
      index = 4;
1064
      break;
1065
    case SFmode:
1066
      index = 5;
1067
      break;
1068
    case DFmode:
1069
      index = 6;
1070
      break;
1071
    case V16QImode:
1072
      index = 7;
1073
      comp_mode = op_mode;
1074
      break;
1075
    case V8HImode:
1076
      index = 8;
1077
      comp_mode = op_mode;
1078
      break;
1079
    case V4SImode:
1080
      index = 9;
1081
      comp_mode = op_mode;
1082
      break;
1083
    case V4SFmode:
1084
      index = 10;
1085
      comp_mode = V4SImode;
1086
      break;
1087
    case V2DFmode:
1088
      index = 11;
1089
      comp_mode = V2DImode;
1090
      break;
1091
    case V2DImode:
1092
    default:
1093
      abort ();
1094
    }
1095
 
1096
  if (GET_MODE (op1) == DFmode
1097
      && (scode != SPU_GT && scode != SPU_EQ))
1098
    abort ();
1099
 
1100
  if (is_set == 0 && op1 == const0_rtx
1101
      && (GET_MODE (op0) == SImode
1102
          || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1103
    {
1104
      /* Don't need to set a register with the result when we are
1105
         comparing against zero and branching. */
1106
      reverse_test = !reverse_test;
1107
      compare_result = op0;
1108
    }
1109
  else
1110
    {
1111
      compare_result = gen_reg_rtx (comp_mode);
1112
 
1113
      if (reverse_compare)
1114
        {
1115
          rtx t = op1;
1116
          op1 = op0;
1117
          op0 = t;
1118
        }
1119
 
1120
      if (spu_comp_icode[index][scode] == 0)
1121
        abort ();
1122
 
1123
      if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1124
          (op0, op_mode))
1125
        op0 = force_reg (op_mode, op0);
1126
      if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1127
          (op1, op_mode))
1128
        op1 = force_reg (op_mode, op1);
1129
      comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1130
                                                         op0, op1);
1131
      if (comp_rtx == 0)
1132
        abort ();
1133
      emit_insn (comp_rtx);
1134
 
1135
      if (eq_test)
1136
        {
1137
          eq_result = gen_reg_rtx (comp_mode);
1138
          eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1139
                                                             op0, op1);
1140
          if (eq_rtx == 0)
1141
            abort ();
1142
          emit_insn (eq_rtx);
1143
          ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1144
          gcc_assert (ior_code != CODE_FOR_nothing);
1145
          emit_insn (GEN_FCN (ior_code)
1146
                     (compare_result, compare_result, eq_result));
1147
        }
1148
    }
1149
 
1150
  if (is_set == 0)
1151
    {
1152
      rtx bcomp;
1153
      rtx loc_ref;
1154
 
1155
      /* We don't have branch on QI compare insns, so we convert the
1156
         QI compare result to a HI result. */
1157
      if (comp_mode == QImode)
1158
        {
1159
          rtx old_res = compare_result;
1160
          compare_result = gen_reg_rtx (HImode);
1161
          comp_mode = HImode;
1162
          emit_insn (gen_extendqihi2 (compare_result, old_res));
1163
        }
1164
 
1165
      if (reverse_test)
1166
        bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1167
      else
1168
        bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1169
 
1170
      loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1171
      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1172
                                   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1173
                                                         loc_ref, pc_rtx)));
1174
    }
1175
  else if (is_set == 2)
1176
    {
1177
      rtx target = operands[0];
1178
      int compare_size = GET_MODE_BITSIZE (comp_mode);
1179
      int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1180
      enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1181
      rtx select_mask;
1182
      rtx op_t = operands[2];
1183
      rtx op_f = operands[3];
1184
 
1185
      /* The result of the comparison can be SI, HI or QI mode.  Create a
1186
         mask based on that result. */
1187
      if (target_size > compare_size)
1188
        {
1189
          select_mask = gen_reg_rtx (mode);
1190
          emit_insn (gen_extend_compare (select_mask, compare_result));
1191
        }
1192
      else if (target_size < compare_size)
1193
        select_mask =
1194
          gen_rtx_SUBREG (mode, compare_result,
1195
                          (compare_size - target_size) / BITS_PER_UNIT);
1196
      else if (comp_mode != mode)
1197
        select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1198
      else
1199
        select_mask = compare_result;
1200
 
1201
      if (GET_MODE (target) != GET_MODE (op_t)
1202
          || GET_MODE (target) != GET_MODE (op_f))
1203
        abort ();
1204
 
1205
      if (reverse_test)
1206
        emit_insn (gen_selb (target, op_t, op_f, select_mask));
1207
      else
1208
        emit_insn (gen_selb (target, op_f, op_t, select_mask));
1209
    }
1210
  else
1211
    {
1212
      rtx target = operands[0];
1213
      if (reverse_test)
1214
        emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1215
                                gen_rtx_NOT (comp_mode, compare_result)));
1216
      if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1217
        emit_insn (gen_extendhisi2 (target, compare_result));
1218
      else if (GET_MODE (target) == SImode
1219
               && GET_MODE (compare_result) == QImode)
1220
        emit_insn (gen_extend_compare (target, compare_result));
1221
      else
1222
        emit_move_insn (target, compare_result);
1223
    }
1224
}
1225
 
1226
HOST_WIDE_INT
1227
const_double_to_hwint (rtx x)
1228
{
1229
  HOST_WIDE_INT val;
1230
  REAL_VALUE_TYPE rv;
1231
  if (GET_MODE (x) == SFmode)
1232
    {
1233
      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1234
      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1235
    }
1236
  else if (GET_MODE (x) == DFmode)
1237
    {
1238
      long l[2];
1239
      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1240
      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1241
      val = l[0];
1242
      val = (val << 32) | (l[1] & 0xffffffff);
1243
    }
1244
  else
1245
    abort ();
1246
  return val;
1247
}
1248
 
1249
rtx
1250
hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1251
{
1252
  long tv[2];
1253
  REAL_VALUE_TYPE rv;
1254
  gcc_assert (mode == SFmode || mode == DFmode);
1255
 
1256
  if (mode == SFmode)
1257
    tv[0] = (v << 32) >> 32;
1258
  else if (mode == DFmode)
1259
    {
1260
      tv[1] = (v << 32) >> 32;
1261
      tv[0] = v >> 32;
1262
    }
1263
  real_from_target (&rv, tv, mode);
1264
  return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1265
}
1266
 
1267
void
1268
print_operand_address (FILE * file, register rtx addr)
1269
{
1270
  rtx reg;
1271
  rtx offset;
1272
 
1273
  if (GET_CODE (addr) == AND
1274
      && GET_CODE (XEXP (addr, 1)) == CONST_INT
1275
      && INTVAL (XEXP (addr, 1)) == -16)
1276
    addr = XEXP (addr, 0);
1277
 
1278
  switch (GET_CODE (addr))
1279
    {
1280
    case REG:
1281
      fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1282
      break;
1283
 
1284
    case PLUS:
1285
      reg = XEXP (addr, 0);
1286
      offset = XEXP (addr, 1);
1287
      if (GET_CODE (offset) == REG)
1288
        {
1289
          fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1290
                   reg_names[REGNO (offset)]);
1291
        }
1292
      else if (GET_CODE (offset) == CONST_INT)
1293
        {
1294
          fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1295
                   INTVAL (offset), reg_names[REGNO (reg)]);
1296
        }
1297
      else
1298
        abort ();
1299
      break;
1300
 
1301
    case CONST:
1302
    case LABEL_REF:
1303
    case SYMBOL_REF:
1304
    case CONST_INT:
1305
      output_addr_const (file, addr);
1306
      break;
1307
 
1308
    default:
1309
      debug_rtx (addr);
1310
      abort ();
1311
    }
1312
}
1313
 
1314
void
1315
print_operand (FILE * file, rtx x, int code)
1316
{
1317
  enum machine_mode mode = GET_MODE (x);
1318
  HOST_WIDE_INT val;
1319
  unsigned char arr[16];
1320
  int xcode = GET_CODE (x);
1321
  int i, info;
1322
  if (GET_MODE (x) == VOIDmode)
1323
    switch (code)
1324
      {
1325
      case 'L':                 /* 128 bits, signed */
1326
      case 'm':                 /* 128 bits, signed */
1327
      case 'T':                 /* 128 bits, signed */
1328
      case 't':                 /* 128 bits, signed */
1329
        mode = TImode;
1330
        break;
1331
      case 'K':                 /* 64 bits, signed */
1332
      case 'k':                 /* 64 bits, signed */
1333
      case 'D':                 /* 64 bits, signed */
1334
      case 'd':                 /* 64 bits, signed */
1335
        mode = DImode;
1336
        break;
1337
      case 'J':                 /* 32 bits, signed */
1338
      case 'j':                 /* 32 bits, signed */
1339
      case 's':                 /* 32 bits, signed */
1340
      case 'S':                 /* 32 bits, signed */
1341
        mode = SImode;
1342
        break;
1343
      }
1344
  switch (code)
1345
    {
1346
 
1347
    case 'j':                   /* 32 bits, signed */
1348
    case 'k':                   /* 64 bits, signed */
1349
    case 'm':                   /* 128 bits, signed */
1350
      if (xcode == CONST_INT
1351
          || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1352
        {
1353
          gcc_assert (logical_immediate_p (x, mode));
1354
          constant_to_array (mode, x, arr);
1355
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1356
          val = trunc_int_for_mode (val, SImode);
1357
          switch (which_logical_immediate (val))
1358
          {
1359
          case SPU_ORI:
1360
            break;
1361
          case SPU_ORHI:
1362
            fprintf (file, "h");
1363
            break;
1364
          case SPU_ORBI:
1365
            fprintf (file, "b");
1366
            break;
1367
          default:
1368
            gcc_unreachable();
1369
          }
1370
        }
1371
      else
1372
        gcc_unreachable();
1373
      return;
1374
 
1375
    case 'J':                   /* 32 bits, signed */
1376
    case 'K':                   /* 64 bits, signed */
1377
    case 'L':                   /* 128 bits, signed */
1378
      if (xcode == CONST_INT
1379
          || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1380
        {
1381
          gcc_assert (logical_immediate_p (x, mode)
1382
                      || iohl_immediate_p (x, mode));
1383
          constant_to_array (mode, x, arr);
1384
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1385
          val = trunc_int_for_mode (val, SImode);
1386
          switch (which_logical_immediate (val))
1387
          {
1388
          case SPU_ORI:
1389
          case SPU_IOHL:
1390
            break;
1391
          case SPU_ORHI:
1392
            val = trunc_int_for_mode (val, HImode);
1393
            break;
1394
          case SPU_ORBI:
1395
            val = trunc_int_for_mode (val, QImode);
1396
            break;
1397
          default:
1398
            gcc_unreachable();
1399
          }
1400
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1401
        }
1402
      else
1403
        gcc_unreachable();
1404
      return;
1405
 
1406
    case 't':                   /* 128 bits, signed */
1407
    case 'd':                   /* 64 bits, signed */
1408
    case 's':                   /* 32 bits, signed */
1409
      if (CONSTANT_P (x))
1410
        {
1411
          enum immediate_class c = classify_immediate (x, mode);
1412
          switch (c)
1413
            {
1414
            case IC_IL1:
1415
              constant_to_array (mode, x, arr);
1416
              val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1417
              val = trunc_int_for_mode (val, SImode);
1418
              switch (which_immediate_load (val))
1419
                {
1420
                case SPU_IL:
1421
                  break;
1422
                case SPU_ILA:
1423
                  fprintf (file, "a");
1424
                  break;
1425
                case SPU_ILH:
1426
                  fprintf (file, "h");
1427
                  break;
1428
                case SPU_ILHU:
1429
                  fprintf (file, "hu");
1430
                  break;
1431
                default:
1432
                  gcc_unreachable ();
1433
                }
1434
              break;
1435
            case IC_CPAT:
1436
              constant_to_array (mode, x, arr);
1437
              cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1438
              if (info == 1)
1439
                fprintf (file, "b");
1440
              else if (info == 2)
1441
                fprintf (file, "h");
1442
              else if (info == 4)
1443
                fprintf (file, "w");
1444
              else if (info == 8)
1445
                fprintf (file, "d");
1446
              break;
1447
            case IC_IL1s:
1448
              if (xcode == CONST_VECTOR)
1449
                {
1450
                  x = CONST_VECTOR_ELT (x, 0);
1451
                  xcode = GET_CODE (x);
1452
                }
1453
              if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1454
                fprintf (file, "a");
1455
              else if (xcode == HIGH)
1456
                fprintf (file, "hu");
1457
              break;
1458
            case IC_FSMBI:
1459
            case IC_FSMBI2:
1460
            case IC_IL2:
1461
            case IC_IL2s:
1462
            case IC_POOL:
1463
              abort ();
1464
            }
1465
        }
1466
      else
1467
        gcc_unreachable ();
1468
      return;
1469
 
1470
    case 'T':                   /* 128 bits, signed */
1471
    case 'D':                   /* 64 bits, signed */
1472
    case 'S':                   /* 32 bits, signed */
1473
      if (CONSTANT_P (x))
1474
        {
1475
          enum immediate_class c = classify_immediate (x, mode);
1476
          switch (c)
1477
            {
1478
            case IC_IL1:
1479
              constant_to_array (mode, x, arr);
1480
              val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1481
              val = trunc_int_for_mode (val, SImode);
1482
              switch (which_immediate_load (val))
1483
                {
1484
                case SPU_IL:
1485
                case SPU_ILA:
1486
                  break;
1487
                case SPU_ILH:
1488
                case SPU_ILHU:
1489
                  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1490
                  break;
1491
                default:
1492
                  gcc_unreachable ();
1493
                }
1494
              fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1495
              break;
1496
            case IC_FSMBI:
1497
              constant_to_array (mode, x, arr);
1498
              val = 0;
1499
              for (i = 0; i < 16; i++)
1500
                {
1501
                  val <<= 1;
1502
                  val |= arr[i] & 1;
1503
                }
1504
              print_operand (file, GEN_INT (val), 0);
1505
              break;
1506
            case IC_CPAT:
1507
              constant_to_array (mode, x, arr);
1508
              cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1509
              fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1510
              break;
1511
            case IC_IL1s:
1512
              if (xcode == HIGH)
1513
                x = XEXP (x, 0);
1514
              if (GET_CODE (x) == CONST_VECTOR)
1515
                x = CONST_VECTOR_ELT (x, 0);
1516
              output_addr_const (file, x);
1517
              if (xcode == HIGH)
1518
                fprintf (file, "@h");
1519
              break;
1520
            case IC_IL2:
1521
            case IC_IL2s:
1522
            case IC_FSMBI2:
1523
            case IC_POOL:
1524
              abort ();
1525
            }
1526
        }
1527
      else
1528
        gcc_unreachable ();
1529
      return;
1530
 
1531
    case 'C':
1532
      if (xcode == CONST_INT)
1533
        {
1534
          /* Only 4 least significant bits are relevant for generate
1535
             control word instructions. */
1536
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1537
          return;
1538
        }
1539
      break;
1540
 
1541
    case 'M':                   /* print code for c*d */
1542
      if (GET_CODE (x) == CONST_INT)
1543
        switch (INTVAL (x))
1544
          {
1545
          case 1:
1546
            fprintf (file, "b");
1547
            break;
1548
          case 2:
1549
            fprintf (file, "h");
1550
            break;
1551
          case 4:
1552
            fprintf (file, "w");
1553
            break;
1554
          case 8:
1555
            fprintf (file, "d");
1556
            break;
1557
          default:
1558
            gcc_unreachable();
1559
          }
1560
      else
1561
        gcc_unreachable();
1562
      return;
1563
 
1564
    case 'N':                   /* Negate the operand */
1565
      if (xcode == CONST_INT)
1566
        fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1567
      else if (xcode == CONST_VECTOR)
1568
        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1569
                 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1570
      return;
1571
 
1572
    case 'I':                   /* enable/disable interrupts */
1573
      if (xcode == CONST_INT)
1574
        fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1575
      return;
1576
 
1577
    case 'b':                   /* branch modifiers */
1578
      if (xcode == REG)
1579
        fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1580
      else if (COMPARISON_P (x))
1581
        fprintf (file, "%s", xcode == NE ? "n" : "");
1582
      return;
1583
 
1584
    case 'i':                   /* indirect call */
1585
      if (xcode == MEM)
1586
        {
1587
          if (GET_CODE (XEXP (x, 0)) == REG)
1588
            /* Used in indirect function calls. */
1589
            fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1590
          else
1591
            output_address (XEXP (x, 0));
1592
        }
1593
      return;
1594
 
1595
    case 'p':                   /* load/store */
1596
      if (xcode == MEM)
1597
        {
1598
          x = XEXP (x, 0);
1599
          xcode = GET_CODE (x);
1600
        }
1601
      if (xcode == AND)
1602
        {
1603
          x = XEXP (x, 0);
1604
          xcode = GET_CODE (x);
1605
        }
1606
      if (xcode == REG)
1607
        fprintf (file, "d");
1608
      else if (xcode == CONST_INT)
1609
        fprintf (file, "a");
1610
      else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1611
        fprintf (file, "r");
1612
      else if (xcode == PLUS || xcode == LO_SUM)
1613
        {
1614
          if (GET_CODE (XEXP (x, 1)) == REG)
1615
            fprintf (file, "x");
1616
          else
1617
            fprintf (file, "d");
1618
        }
1619
      return;
1620
 
1621
    case 'e':
1622
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1623
      val &= 0x7;
1624
      output_addr_const (file, GEN_INT (val));
1625
      return;
1626
 
1627
    case 'f':
1628
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1629
      val &= 0x1f;
1630
      output_addr_const (file, GEN_INT (val));
1631
      return;
1632
 
1633
    case 'g':
1634
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1635
      val &= 0x3f;
1636
      output_addr_const (file, GEN_INT (val));
1637
      return;
1638
 
1639
    case 'h':
1640
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1641
      val = (val >> 3) & 0x1f;
1642
      output_addr_const (file, GEN_INT (val));
1643
      return;
1644
 
1645
    case 'E':
1646
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1647
      val = -val;
1648
      val &= 0x7;
1649
      output_addr_const (file, GEN_INT (val));
1650
      return;
1651
 
1652
    case 'F':
1653
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654
      val = -val;
1655
      val &= 0x1f;
1656
      output_addr_const (file, GEN_INT (val));
1657
      return;
1658
 
1659
    case 'G':
1660
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661
      val = -val;
1662
      val &= 0x3f;
1663
      output_addr_const (file, GEN_INT (val));
1664
      return;
1665
 
1666
    case 'H':
1667
      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668
      val = -(val & -8ll);
1669
      val = (val >> 3) & 0x1f;
1670
      output_addr_const (file, GEN_INT (val));
1671
      return;
1672
 
1673
    case 'v':
1674
    case 'w':
1675
      constant_to_array (mode, x, arr);
1676
      val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1677
      output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1678
      return;
1679
 
1680
    case 0:
1681
      if (xcode == REG)
1682
        fprintf (file, "%s", reg_names[REGNO (x)]);
1683
      else if (xcode == MEM)
1684
        output_address (XEXP (x, 0));
1685
      else if (xcode == CONST_VECTOR)
1686
        print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1687
      else
1688
        output_addr_const (file, x);
1689
      return;
1690
 
1691
      /* unused letters
1692
                      o qr  u   yz
1693
        AB            OPQR  UVWXYZ */
1694
    default:
1695
      output_operand_lossage ("invalid %%xn code");
1696
    }
1697
  gcc_unreachable ();
1698
}
1699
 
1700
extern char call_used_regs[];
1701
 
1702
/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1703
   caller saved register.  For leaf functions it is more efficient to
1704
   use a volatile register because we won't need to save and restore the
1705
   pic register.  This routine is only valid after register allocation
1706
   is completed, so we can pick an unused register.  */
1707
static rtx
1708
get_pic_reg (void)
1709
{
1710
  rtx pic_reg = pic_offset_table_rtx;
1711
  if (!reload_completed && !reload_in_progress)
1712
    abort ();
1713
  if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1714
    pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1715
  return pic_reg;
1716
}
1717
 
1718
/* Split constant addresses to handle cases that are too large.
1719
   Add in the pic register when in PIC mode.
1720
   Split immediates that require more than 1 instruction. */
1721
int
1722
spu_split_immediate (rtx * ops)
1723
{
1724
  enum machine_mode mode = GET_MODE (ops[0]);
1725
  enum immediate_class c = classify_immediate (ops[1], mode);
1726
 
1727
  switch (c)
1728
    {
1729
    case IC_IL2:
1730
      {
1731
        unsigned char arrhi[16];
1732
        unsigned char arrlo[16];
1733
        rtx to, temp, hi, lo;
1734
        int i;
1735
        enum machine_mode imode = mode;
1736
        /* We need to do reals as ints because the constant used in the
1737
           IOR might not be a legitimate real constant. */
1738
        imode = int_mode_for_mode (mode);
1739
        constant_to_array (mode, ops[1], arrhi);
1740
        if (imode != mode)
1741
          to = simplify_gen_subreg (imode, ops[0], mode, 0);
1742
        else
1743
          to = ops[0];
1744
        temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1745
        for (i = 0; i < 16; i += 4)
1746
          {
1747
            arrlo[i + 2] = arrhi[i + 2];
1748
            arrlo[i + 3] = arrhi[i + 3];
1749
            arrlo[i + 0] = arrlo[i + 1] = 0;
1750
            arrhi[i + 2] = arrhi[i + 3] = 0;
1751
          }
1752
        hi = array_to_constant (imode, arrhi);
1753
        lo = array_to_constant (imode, arrlo);
1754
        emit_move_insn (temp, hi);
1755
        emit_insn (gen_rtx_SET
1756
                   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1757
        return 1;
1758
      }
1759
    case IC_FSMBI2:
1760
      {
1761
        unsigned char arr_fsmbi[16];
1762
        unsigned char arr_andbi[16];
1763
        rtx to, reg_fsmbi, reg_and;
1764
        int i;
1765
        enum machine_mode imode = mode;
1766
        /* We need to do reals as ints because the constant used in the
1767
         * AND might not be a legitimate real constant. */
1768
        imode = int_mode_for_mode (mode);
1769
        constant_to_array (mode, ops[1], arr_fsmbi);
1770
        if (imode != mode)
1771
          to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1772
        else
1773
          to = ops[0];
1774
        for (i = 0; i < 16; i++)
1775
          if (arr_fsmbi[i] != 0)
1776
            {
1777
              arr_andbi[0] = arr_fsmbi[i];
1778
              arr_fsmbi[i] = 0xff;
1779
            }
1780
        for (i = 1; i < 16; i++)
1781
          arr_andbi[i] = arr_andbi[0];
1782
        reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1783
        reg_and = array_to_constant (imode, arr_andbi);
1784
        emit_move_insn (to, reg_fsmbi);
1785
        emit_insn (gen_rtx_SET
1786
                   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1787
        return 1;
1788
      }
1789
    case IC_POOL:
1790
      if (reload_in_progress || reload_completed)
1791
        {
1792
          rtx mem = force_const_mem (mode, ops[1]);
1793
          if (TARGET_LARGE_MEM)
1794
            {
1795
              rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1796
              emit_move_insn (addr, XEXP (mem, 0));
1797
              mem = replace_equiv_address (mem, addr);
1798
            }
1799
          emit_move_insn (ops[0], mem);
1800
          return 1;
1801
        }
1802
      break;
1803
    case IC_IL1s:
1804
    case IC_IL2s:
1805
      if (reload_completed && GET_CODE (ops[1]) != HIGH)
1806
        {
1807
          if (c == IC_IL2s)
1808
            {
1809
              emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1810
              emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1811
            }
1812
          else if (flag_pic)
1813
            emit_insn (gen_pic (ops[0], ops[1]));
1814
          if (flag_pic)
1815
            {
1816
              rtx pic_reg = get_pic_reg ();
1817
              emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1818
              crtl->uses_pic_offset_table = 1;
1819
            }
1820
          return flag_pic || c == IC_IL2s;
1821
        }
1822
      break;
1823
    case IC_IL1:
1824
    case IC_FSMBI:
1825
    case IC_CPAT:
1826
      break;
1827
    }
1828
  return 0;
1829
}
1830
 
1831
/* SAVING is TRUE when we are generating the actual load and store
1832
   instructions for REGNO.  When determining the size of the stack
1833
   needed for saving register we must allocate enough space for the
1834
   worst case, because we don't always have the information early enough
1835
   to not allocate it.  But we can at least eliminate the actual loads
1836
   and stores during the prologue/epilogue.  */
1837
static int
1838
need_to_save_reg (int regno, int saving)
1839
{
1840
  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1841
    return 1;
1842
  if (flag_pic
1843
      && regno == PIC_OFFSET_TABLE_REGNUM
1844
      && (!saving || crtl->uses_pic_offset_table)
1845
      && (!saving
1846
          || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1847
    return 1;
1848
  return 0;
1849
}
1850
 
1851
/* This function is only correct starting with local register
1852
   allocation */
1853
int
1854
spu_saved_regs_size (void)
1855
{
1856
  int reg_save_size = 0;
1857
  int regno;
1858
 
1859
  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1860
    if (need_to_save_reg (regno, 0))
1861
      reg_save_size += 0x10;
1862
  return reg_save_size;
1863
}
1864
 
1865
static rtx
1866
frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1867
{
1868
  rtx reg = gen_rtx_REG (V4SImode, regno);
1869
  rtx mem =
1870
    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1871
  return emit_insn (gen_movv4si (mem, reg));
1872
}
1873
 
1874
static rtx
1875
frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1876
{
1877
  rtx reg = gen_rtx_REG (V4SImode, regno);
1878
  rtx mem =
1879
    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1880
  return emit_insn (gen_movv4si (reg, mem));
1881
}
1882
 
1883
/* This happens after reload, so we need to expand it.  */
1884
static rtx
1885
frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1886
{
1887
  rtx insn;
1888
  if (satisfies_constraint_K (GEN_INT (imm)))
1889
    {
1890
      insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1891
    }
1892
  else
1893
    {
1894
      emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1895
      insn = emit_insn (gen_addsi3 (dst, src, scratch));
1896
      if (REGNO (src) == REGNO (scratch))
1897
        abort ();
1898
    }
1899
  return insn;
1900
}
1901
 
1902
/* Return nonzero if this function is known to have a null epilogue.  */
1903
 
1904
int
1905
direct_return (void)
1906
{
1907
  if (reload_completed)
1908
    {
1909
      if (cfun->static_chain_decl == 0
1910
          && (spu_saved_regs_size ()
1911
              + get_frame_size ()
1912
              + crtl->outgoing_args_size
1913
              + crtl->args.pretend_args_size == 0)
1914
          && current_function_is_leaf)
1915
        return 1;
1916
    }
1917
  return 0;
1918
}
1919
 
1920
/*
1921
   The stack frame looks like this:
1922
         +-------------+
1923
         |  incoming   |
1924
         |    args     |
1925
   AP -> +-------------+
1926
         | $lr save    |
1927
         +-------------+
1928
 prev SP | back chain  |
1929
         +-------------+
1930
         |  var args   |
1931
         |  reg save   | crtl->args.pretend_args_size bytes
1932
         +-------------+
1933
         |    ...      |
1934
         | saved regs  | spu_saved_regs_size() bytes
1935
   FP -> +-------------+
1936
         |    ...      |
1937
         |   vars      | get_frame_size()  bytes
1938
  HFP -> +-------------+
1939
         |    ...      |
1940
         |  outgoing   |
1941
         |    args     | crtl->outgoing_args_size bytes
1942
         +-------------+
1943
         | $lr of next |
1944
         |   frame     |
1945
         +-------------+
1946
         | back chain  |
1947
   SP -> +-------------+
1948
 
1949
*/
1950
void
1951
spu_expand_prologue (void)
1952
{
1953
  HOST_WIDE_INT size = get_frame_size (), offset, regno;
1954
  HOST_WIDE_INT total_size;
1955
  HOST_WIDE_INT saved_regs_size;
1956
  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1957
  rtx scratch_reg_0, scratch_reg_1;
1958
  rtx insn, real;
1959
 
1960
  if (flag_pic && optimize == 0)
1961
    crtl->uses_pic_offset_table = 1;
1962
 
1963
  if (spu_naked_function_p (current_function_decl))
1964
    return;
1965
 
1966
  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1967
  scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1968
 
1969
  saved_regs_size = spu_saved_regs_size ();
1970
  total_size = size + saved_regs_size
1971
    + crtl->outgoing_args_size
1972
    + crtl->args.pretend_args_size;
1973
 
1974
  if (!current_function_is_leaf
1975
      || cfun->calls_alloca || total_size > 0)
1976
    total_size += STACK_POINTER_OFFSET;
1977
 
1978
  /* Save this first because code after this might use the link
1979
     register as a scratch register. */
1980
  if (!current_function_is_leaf)
1981
    {
1982
      insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1983
      RTX_FRAME_RELATED_P (insn) = 1;
1984
    }
1985
 
1986
  if (total_size > 0)
1987
    {
1988
      offset = -crtl->args.pretend_args_size;
1989
      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1990
        if (need_to_save_reg (regno, 1))
1991
          {
1992
            offset -= 16;
1993
            insn = frame_emit_store (regno, sp_reg, offset);
1994
            RTX_FRAME_RELATED_P (insn) = 1;
1995
          }
1996
    }
1997
 
1998
  if (flag_pic && crtl->uses_pic_offset_table)
1999
    {
2000
      rtx pic_reg = get_pic_reg ();
2001
      insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2002
      insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2003
    }
2004
 
2005
  if (total_size > 0)
2006
    {
2007
      if (flag_stack_check)
2008
        {
2009
          /* We compare against total_size-1 because
2010
             ($sp >= total_size) <=> ($sp > total_size-1) */
2011
          rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2012
          rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2013
          rtx size_v4si = spu_const (V4SImode, total_size - 1);
2014
          if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2015
            {
2016
              emit_move_insn (scratch_v4si, size_v4si);
2017
              size_v4si = scratch_v4si;
2018
            }
2019
          emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2020
          emit_insn (gen_vec_extractv4si
2021
                     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2022
          emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2023
        }
2024
 
2025
      /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2026
         the value of the previous $sp because we save it as the back
2027
         chain. */
2028
      if (total_size <= 2000)
2029
        {
2030
          /* In this case we save the back chain first. */
2031
          insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2032
          insn =
2033
            frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2034
        }
2035
      else
2036
        {
2037
          insn = emit_move_insn (scratch_reg_0, sp_reg);
2038
          insn =
2039
            frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2040
        }
2041
      RTX_FRAME_RELATED_P (insn) = 1;
2042
      real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2043
      add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2044
 
2045
      if (total_size > 2000)
2046
        {
2047
          /* Save the back chain ptr */
2048
          insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2049
        }
2050
 
2051
      if (frame_pointer_needed)
2052
        {
2053
          rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2054
          HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2055
            + crtl->outgoing_args_size;
2056
          /* Set the new frame_pointer */
2057
          insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2058
          RTX_FRAME_RELATED_P (insn) = 1;
2059
          real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2060
          add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2061
          REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2062
        }
2063
    }
2064
 
2065
}
2066
 
2067
void
2068
spu_expand_epilogue (bool sibcall_p)
2069
{
2070
  int size = get_frame_size (), offset, regno;
2071
  HOST_WIDE_INT saved_regs_size, total_size;
2072
  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2073
  rtx jump, scratch_reg_0;
2074
 
2075
  if (spu_naked_function_p (current_function_decl))
2076
    return;
2077
 
2078
  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2079
 
2080
  saved_regs_size = spu_saved_regs_size ();
2081
  total_size = size + saved_regs_size
2082
    + crtl->outgoing_args_size
2083
    + crtl->args.pretend_args_size;
2084
 
2085
  if (!current_function_is_leaf
2086
      || cfun->calls_alloca || total_size > 0)
2087
    total_size += STACK_POINTER_OFFSET;
2088
 
2089
  if (total_size > 0)
2090
    {
2091
      if (cfun->calls_alloca)
2092
        frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2093
      else
2094
        frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2095
 
2096
 
2097
      if (saved_regs_size > 0)
2098
        {
2099
          offset = -crtl->args.pretend_args_size;
2100
          for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2101
            if (need_to_save_reg (regno, 1))
2102
              {
2103
                offset -= 0x10;
2104
                frame_emit_load (regno, sp_reg, offset);
2105
              }
2106
        }
2107
    }
2108
 
2109
  if (!current_function_is_leaf)
2110
    frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2111
 
2112
  if (!sibcall_p)
2113
    {
2114
      emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2115
      jump = emit_jump_insn (gen__return ());
2116
      emit_barrier_after (jump);
2117
    }
2118
 
2119
}
2120
 
2121
rtx
2122
spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2123
{
2124
  if (count != 0)
2125
    return 0;
2126
  /* This is inefficient because it ends up copying to a save-register
2127
     which then gets saved even though $lr has already been saved.  But
2128
     it does generate better code for leaf functions and we don't need
2129
     to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
2130
     used for __builtin_return_address anyway, so maybe we don't care if
2131
     it's inefficient. */
2132
  return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2133
}
2134
 
2135
 
2136
/* Given VAL, generate a constant appropriate for MODE.
2137
   If MODE is a vector mode, every element will be VAL.
2138
   For TImode, VAL will be zero extended to 128 bits. */
2139
rtx
2140
spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2141
{
2142
  rtx inner;
2143
  rtvec v;
2144
  int units, i;
2145
 
2146
  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2147
              || GET_MODE_CLASS (mode) == MODE_FLOAT
2148
              || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2149
              || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2150
 
2151
  if (GET_MODE_CLASS (mode) == MODE_INT)
2152
    return immed_double_const (val, 0, mode);
2153
 
2154
  /* val is the bit representation of the float */
2155
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2156
    return hwint_to_const_double (mode, val);
2157
 
2158
  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2159
    inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2160
  else
2161
    inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2162
 
2163
  units = GET_MODE_NUNITS (mode);
2164
 
2165
  v = rtvec_alloc (units);
2166
 
2167
  for (i = 0; i < units; ++i)
2168
    RTVEC_ELT (v, i) = inner;
2169
 
2170
  return gen_rtx_CONST_VECTOR (mode, v);
2171
}
2172
 
2173
/* Create a MODE vector constant from 4 ints. */
2174
rtx
2175
spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2176
{
2177
  unsigned char arr[16];
2178
  arr[0] = (a >> 24) & 0xff;
2179
  arr[1] = (a >> 16) & 0xff;
2180
  arr[2] = (a >> 8) & 0xff;
2181
  arr[3] = (a >> 0) & 0xff;
2182
  arr[4] = (b >> 24) & 0xff;
2183
  arr[5] = (b >> 16) & 0xff;
2184
  arr[6] = (b >> 8) & 0xff;
2185
  arr[7] = (b >> 0) & 0xff;
2186
  arr[8] = (c >> 24) & 0xff;
2187
  arr[9] = (c >> 16) & 0xff;
2188
  arr[10] = (c >> 8) & 0xff;
2189
  arr[11] = (c >> 0) & 0xff;
2190
  arr[12] = (d >> 24) & 0xff;
2191
  arr[13] = (d >> 16) & 0xff;
2192
  arr[14] = (d >> 8) & 0xff;
2193
  arr[15] = (d >> 0) & 0xff;
2194
  return array_to_constant(mode, arr);
2195
}
2196
 
2197
/* branch hint stuff */
2198
 
2199
/* An array of these is used to propagate hints to predecessor blocks. */
2200
struct spu_bb_info
2201
{
2202
  rtx prop_jump; /* propagated from another block */
2203
  int bb_index;  /* the original block. */
2204
};
2205
static struct spu_bb_info *spu_bb_info;
2206
 
2207
#define STOP_HINT_P(INSN) \
2208
                (GET_CODE(INSN) == CALL_INSN \
2209
                 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2210
                 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2211
 
2212
/* 1 when RTX is a hinted branch or its target.  We keep track of
2213
   what has been hinted so the safe-hint code can test it easily.  */
2214
#define HINTED_P(RTX)                                           \
2215
  (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2216
 
2217
/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2218
#define SCHED_ON_EVEN_P(RTX)                                            \
2219
  (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2220
 
2221
/* Emit a nop for INSN such that the two will dual issue.  This assumes
2222
   INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
2223
   We check for TImode to handle a MULTI1 insn which has dual issued its
2224
   first instruction.  get_pipe returns -1 for MULTI0, inline asm, or
2225
   ADDR_VEC insns. */
2226
static void
2227
emit_nop_for_insn (rtx insn)
2228
{
2229
  int p;
2230
  rtx new_insn;
2231
  p = get_pipe (insn);
2232
  if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2233
    new_insn = emit_insn_after (gen_lnop (), insn);
2234
  else if (p == 1 && GET_MODE (insn) == TImode)
2235
    {
2236
      new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2237
      PUT_MODE (new_insn, TImode);
2238
      PUT_MODE (insn, VOIDmode);
2239
    }
2240
  else
2241
    new_insn = emit_insn_after (gen_lnop (), insn);
2242
  recog_memoized (new_insn);
2243
}
2244
 
2245
/* Insert nops in basic blocks to meet dual issue alignment
2246
   requirements.  Also make sure hbrp and hint instructions are at least
2247
   one cycle apart, possibly inserting a nop.  */
2248
static void
2249
pad_bb(void)
2250
{
2251
  rtx insn, next_insn, prev_insn, hbr_insn = 0;
2252
  int length;
2253
  int addr;
2254
 
2255
  /* This sets up INSN_ADDRESSES. */
2256
  shorten_branches (get_insns ());
2257
 
2258
  /* Keep track of length added by nops. */
2259
  length = 0;
2260
 
2261
  prev_insn = 0;
2262
  insn = get_insns ();
2263
  if (!active_insn_p (insn))
2264
    insn = next_active_insn (insn);
2265
  for (; insn; insn = next_insn)
2266
    {
2267
      next_insn = next_active_insn (insn);
2268
      if (INSN_CODE (insn) == CODE_FOR_iprefetch
2269
          || INSN_CODE (insn) == CODE_FOR_hbr)
2270
        {
2271
          if (hbr_insn)
2272
            {
2273
              int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2274
              int a1 = INSN_ADDRESSES (INSN_UID (insn));
2275
              if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2276
                  || (a1 - a0 == 4))
2277
                {
2278
                  prev_insn = emit_insn_before (gen_lnop (), insn);
2279
                  PUT_MODE (prev_insn, GET_MODE (insn));
2280
                  PUT_MODE (insn, TImode);
2281
                  length += 4;
2282
                }
2283
            }
2284
          hbr_insn = insn;
2285
        }
2286
      if (INSN_CODE (insn) == CODE_FOR_blockage)
2287
        {
2288
          if (GET_MODE (insn) == TImode)
2289
            PUT_MODE (next_insn, TImode);
2290
          insn = next_insn;
2291
          next_insn = next_active_insn (insn);
2292
        }
2293
      addr = INSN_ADDRESSES (INSN_UID (insn));
2294
      if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2295
        {
2296
          if (((addr + length) & 7) != 0)
2297
            {
2298
              emit_nop_for_insn (prev_insn);
2299
              length += 4;
2300
            }
2301
        }
2302
      else if (GET_MODE (insn) == TImode
2303
               && ((next_insn && GET_MODE (next_insn) != TImode)
2304
                   || get_attr_type (insn) == TYPE_MULTI0)
2305
               && ((addr + length) & 7) != 0)
2306
        {
2307
          /* prev_insn will always be set because the first insn is
2308
             always 8-byte aligned. */
2309
          emit_nop_for_insn (prev_insn);
2310
          length += 4;
2311
        }
2312
      prev_insn = insn;
2313
    }
2314
}
2315
 
2316
 
2317
/* Routines for branch hints. */
2318
 
2319
static void
2320
spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2321
                      int distance, sbitmap blocks)
2322
{
2323
  rtx branch_label = 0;
2324
  rtx hint;
2325
  rtx insn;
2326
  rtx table;
2327
 
2328
  if (before == 0 || branch == 0 || target == 0)
2329
    return;
2330
 
2331
  /* While scheduling we require hints to be no further than 600, so
2332
     we need to enforce that here too */
2333
  if (distance > 600)
2334
    return;
2335
 
2336
  /* If we have a Basic block note, emit it after the basic block note.  */
2337 378 julius
  if (NOTE_INSN_BASIC_BLOCK_P (before))
2338 282 jeremybenn
    before = NEXT_INSN (before);
2339
 
2340
  branch_label = gen_label_rtx ();
2341
  LABEL_NUSES (branch_label)++;
2342
  LABEL_PRESERVE_P (branch_label) = 1;
2343
  insn = emit_label_before (branch_label, branch);
2344
  branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2345
  SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2346
 
2347
  hint = emit_insn_before (gen_hbr (branch_label, target), before);
2348
  recog_memoized (hint);
2349
  HINTED_P (branch) = 1;
2350
 
2351
  if (GET_CODE (target) == LABEL_REF)
2352
    HINTED_P (XEXP (target, 0)) = 1;
2353
  else if (tablejump_p (branch, 0, &table))
2354
    {
2355
      rtvec vec;
2356
      int j;
2357
      if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2358
        vec = XVEC (PATTERN (table), 0);
2359
      else
2360
        vec = XVEC (PATTERN (table), 1);
2361
      for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2362
        HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2363
    }
2364
 
2365
  if (distance >= 588)
2366
    {
2367
      /* Make sure the hint isn't scheduled any earlier than this point,
2368
         which could make it too far for the branch offest to fit */
2369
      recog_memoized (emit_insn_before (gen_blockage (), hint));
2370
    }
2371
  else if (distance <= 8 * 4)
2372
    {
2373
      /* To guarantee at least 8 insns between the hint and branch we
2374
         insert nops. */
2375
      int d;
2376
      for (d = distance; d < 8 * 4; d += 4)
2377
        {
2378
          insn =
2379
            emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2380
          recog_memoized (insn);
2381
        }
2382
 
2383
      /* Make sure any nops inserted aren't scheduled before the hint. */
2384
      recog_memoized (emit_insn_after (gen_blockage (), hint));
2385
 
2386
      /* Make sure any nops inserted aren't scheduled after the call. */
2387
      if (CALL_P (branch) && distance < 8 * 4)
2388
        recog_memoized (emit_insn_before (gen_blockage (), branch));
2389
    }
2390
}
2391
 
2392
/* Returns 0 if we don't want a hint for this branch.  Otherwise return
2393
   the rtx for the branch target. */
2394
static rtx
2395
get_branch_target (rtx branch)
2396
{
2397
  if (GET_CODE (branch) == JUMP_INSN)
2398
    {
2399
      rtx set, src;
2400
 
2401
      /* Return statements */
2402
      if (GET_CODE (PATTERN (branch)) == RETURN)
2403
        return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2404
 
2405
      /* jump table */
2406
      if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2407
          || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2408
        return 0;
2409
 
2410
     /* ASM GOTOs. */
2411
     if (extract_asm_operands (PATTERN (branch)) != NULL)
2412
        return NULL;
2413
 
2414
      set = single_set (branch);
2415
      src = SET_SRC (set);
2416
      if (GET_CODE (SET_DEST (set)) != PC)
2417
        abort ();
2418
 
2419
      if (GET_CODE (src) == IF_THEN_ELSE)
2420
        {
2421
          rtx lab = 0;
2422
          rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2423
          if (note)
2424
            {
2425
              /* If the more probable case is not a fall through, then
2426
                 try a branch hint.  */
2427
              HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2428
              if (prob > (REG_BR_PROB_BASE * 6 / 10)
2429
                  && GET_CODE (XEXP (src, 1)) != PC)
2430
                lab = XEXP (src, 1);
2431
              else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2432
                       && GET_CODE (XEXP (src, 2)) != PC)
2433
                lab = XEXP (src, 2);
2434
            }
2435
          if (lab)
2436
            {
2437
              if (GET_CODE (lab) == RETURN)
2438
                return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2439
              return lab;
2440
            }
2441
          return 0;
2442
        }
2443
 
2444
      return src;
2445
    }
2446
  else if (GET_CODE (branch) == CALL_INSN)
2447
    {
2448
      rtx call;
2449
      /* All of our call patterns are in a PARALLEL and the CALL is
2450
         the first pattern in the PARALLEL. */
2451
      if (GET_CODE (PATTERN (branch)) != PARALLEL)
2452
        abort ();
2453
      call = XVECEXP (PATTERN (branch), 0, 0);
2454
      if (GET_CODE (call) == SET)
2455
        call = SET_SRC (call);
2456
      if (GET_CODE (call) != CALL)
2457
        abort ();
2458
      return XEXP (XEXP (call, 0), 0);
2459
    }
2460
  return 0;
2461
}
2462
 
2463
/* The special $hbr register is used to prevent the insn scheduler from
2464
   moving hbr insns across instructions which invalidate them.  It
2465
   should only be used in a clobber, and this function searches for
2466
   insns which clobber it.  */
2467
static bool
2468
insn_clobbers_hbr (rtx insn)
2469
{
2470
  if (INSN_P (insn)
2471
      && GET_CODE (PATTERN (insn)) == PARALLEL)
2472
    {
2473
      rtx parallel = PATTERN (insn);
2474
      rtx clobber;
2475
      int j;
2476
      for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2477
        {
2478
          clobber = XVECEXP (parallel, 0, j);
2479
          if (GET_CODE (clobber) == CLOBBER
2480
              && GET_CODE (XEXP (clobber, 0)) == REG
2481
              && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2482
            return 1;
2483
        }
2484
    }
2485
  return 0;
2486
}
2487
 
2488
/* Search up to 32 insns starting at FIRST:
2489
   - at any kind of hinted branch, just return
2490
   - at any unconditional branch in the first 15 insns, just return
2491
   - at a call or indirect branch, after the first 15 insns, force it to
2492
     an even address and return
2493
   - at any unconditional branch, after the first 15 insns, force it to
2494
     an even address.
2495
   At then end of the search, insert an hbrp within 4 insns of FIRST,
2496
   and an hbrp within 16 instructions of FIRST.
2497
 */
2498
static void
2499
insert_hbrp_for_ilb_runout (rtx first)
2500
{
2501
  rtx insn, before_4 = 0, before_16 = 0;
2502
  int addr = 0, length, first_addr = -1;
2503
  int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2504
  int insert_lnop_after = 0;
2505
  for (insn = first; insn; insn = NEXT_INSN (insn))
2506
    if (INSN_P (insn))
2507
      {
2508
        if (first_addr == -1)
2509
          first_addr = INSN_ADDRESSES (INSN_UID (insn));
2510
        addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2511
        length = get_attr_length (insn);
2512
 
2513
        if (before_4 == 0 && addr + length >= 4 * 4)
2514
          before_4 = insn;
2515
        /* We test for 14 instructions because the first hbrp will add
2516
           up to 2 instructions. */
2517
        if (before_16 == 0 && addr + length >= 14 * 4)
2518
          before_16 = insn;
2519
 
2520
        if (INSN_CODE (insn) == CODE_FOR_hbr)
2521
          {
2522
            /* Make sure an hbrp is at least 2 cycles away from a hint.
2523
               Insert an lnop after the hbrp when necessary. */
2524
            if (before_4 == 0 && addr > 0)
2525
              {
2526
                before_4 = insn;
2527
                insert_lnop_after |= 1;
2528
              }
2529
            else if (before_4 && addr <= 4 * 4)
2530
              insert_lnop_after |= 1;
2531
            if (before_16 == 0 && addr > 10 * 4)
2532
              {
2533
                before_16 = insn;
2534
                insert_lnop_after |= 2;
2535
              }
2536
            else if (before_16 && addr <= 14 * 4)
2537
              insert_lnop_after |= 2;
2538
          }
2539
 
2540
        if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2541
          {
2542
            if (addr < hbrp_addr0)
2543
              hbrp_addr0 = addr;
2544
            else if (addr < hbrp_addr1)
2545
              hbrp_addr1 = addr;
2546
          }
2547
 
2548
        if (CALL_P (insn) || JUMP_P (insn))
2549
          {
2550
            if (HINTED_P (insn))
2551
              return;
2552
 
2553
            /* Any branch after the first 15 insns should be on an even
2554
               address to avoid a special case branch.  There might be
2555
               some nops and/or hbrps inserted, so we test after 10
2556
               insns. */
2557
            if (addr > 10 * 4)
2558
              SCHED_ON_EVEN_P (insn) = 1;
2559
          }
2560
 
2561
        if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2562
          return;
2563
 
2564
 
2565
        if (addr + length >= 32 * 4)
2566
          {
2567
            gcc_assert (before_4 && before_16);
2568
            if (hbrp_addr0 > 4 * 4)
2569
              {
2570
                insn =
2571
                  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2572
                recog_memoized (insn);
2573
                INSN_ADDRESSES_NEW (insn,
2574
                                    INSN_ADDRESSES (INSN_UID (before_4)));
2575
                PUT_MODE (insn, GET_MODE (before_4));
2576
                PUT_MODE (before_4, TImode);
2577
                if (insert_lnop_after & 1)
2578
                  {
2579
                    insn = emit_insn_before (gen_lnop (), before_4);
2580
                    recog_memoized (insn);
2581
                    INSN_ADDRESSES_NEW (insn,
2582
                                        INSN_ADDRESSES (INSN_UID (before_4)));
2583
                    PUT_MODE (insn, TImode);
2584
                  }
2585
              }
2586
            if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2587
                && hbrp_addr1 > 16 * 4)
2588
              {
2589
                insn =
2590
                  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2591
                recog_memoized (insn);
2592
                INSN_ADDRESSES_NEW (insn,
2593
                                    INSN_ADDRESSES (INSN_UID (before_16)));
2594
                PUT_MODE (insn, GET_MODE (before_16));
2595
                PUT_MODE (before_16, TImode);
2596
                if (insert_lnop_after & 2)
2597
                  {
2598
                    insn = emit_insn_before (gen_lnop (), before_16);
2599
                    recog_memoized (insn);
2600
                    INSN_ADDRESSES_NEW (insn,
2601
                                        INSN_ADDRESSES (INSN_UID
2602
                                                        (before_16)));
2603
                    PUT_MODE (insn, TImode);
2604
                  }
2605
              }
2606
            return;
2607
          }
2608
      }
2609
    else if (BARRIER_P (insn))
2610
      return;
2611
 
2612
}
2613
 
2614
/* The SPU might hang when it executes 48 inline instructions after a
2615
   hinted branch jumps to its hinted target.  The beginning of a
2616
   function and the return from a call might have been hinted, and must
2617
   be handled as well.  To prevent a hang we insert 2 hbrps.  The first
2618
   should be within 6 insns of the branch target.  The second should be
2619
   within 22 insns of the branch target.  When determining if hbrps are
2620
   necessary, we look for only 32 inline instructions, because up to to
2621
   12 nops and 4 hbrps could be inserted.  Similarily, when inserting
2622
   new hbrps, we insert them within 4 and 16 insns of the target.  */
2623
static void
2624
insert_hbrp (void)
2625
{
2626
  rtx insn;
2627
  if (TARGET_SAFE_HINTS)
2628
    {
2629
      shorten_branches (get_insns ());
2630
      /* Insert hbrp at beginning of function */
2631
      insn = next_active_insn (get_insns ());
2632
      if (insn)
2633
        insert_hbrp_for_ilb_runout (insn);
2634
      /* Insert hbrp after hinted targets. */
2635
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2636
        if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2637
          insert_hbrp_for_ilb_runout (next_active_insn (insn));
2638
    }
2639
}
2640
 
2641
static int in_spu_reorg;
2642
 
2643
/* Insert branch hints.  There are no branch optimizations after this
2644
   pass, so it's safe to set our branch hints now. */
2645
static void
2646
spu_machine_dependent_reorg (void)
2647
{
2648
  sbitmap blocks;
2649
  basic_block bb;
2650
  rtx branch, insn;
2651
  rtx branch_target = 0;
2652
  int branch_addr = 0, insn_addr, required_dist = 0;
2653
  int i;
2654
  unsigned int j;
2655
 
2656
  if (!TARGET_BRANCH_HINTS || optimize == 0)
2657
    {
2658
      /* We still do it for unoptimized code because an external
2659
         function might have hinted a call or return. */
2660
      insert_hbrp ();
2661
      pad_bb ();
2662
      return;
2663
    }
2664
 
2665
  blocks = sbitmap_alloc (last_basic_block);
2666
  sbitmap_zero (blocks);
2667
 
2668
  in_spu_reorg = 1;
2669
  compute_bb_for_insn ();
2670
 
2671
  compact_blocks ();
2672
 
2673
  spu_bb_info =
2674
    (struct spu_bb_info *) xcalloc (n_basic_blocks,
2675
                                    sizeof (struct spu_bb_info));
2676
 
2677
  /* We need exact insn addresses and lengths.  */
2678
  shorten_branches (get_insns ());
2679
 
2680
  for (i = n_basic_blocks - 1; i >= 0; i--)
2681
    {
2682
      bb = BASIC_BLOCK (i);
2683
      branch = 0;
2684
      if (spu_bb_info[i].prop_jump)
2685
        {
2686
          branch = spu_bb_info[i].prop_jump;
2687
          branch_target = get_branch_target (branch);
2688
          branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2689
          required_dist = spu_hint_dist;
2690
        }
2691
      /* Search from end of a block to beginning.   In this loop, find
2692
         jumps which need a branch and emit them only when:
2693
         - it's an indirect branch and we're at the insn which sets
2694
         the register
2695
         - we're at an insn that will invalidate the hint. e.g., a
2696
         call, another hint insn, inline asm that clobbers $hbr, and
2697
         some inlined operations (divmodsi4).  Don't consider jumps
2698
         because they are only at the end of a block and are
2699
         considered when we are deciding whether to propagate
2700
         - we're getting too far away from the branch.  The hbr insns
2701
         only have a signed 10 bit offset
2702
         We go back as far as possible so the branch will be considered
2703
         for propagation when we get to the beginning of the block.  */
2704
      for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2705
        {
2706
          if (INSN_P (insn))
2707
            {
2708
              insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2709
              if (branch
2710
                  && ((GET_CODE (branch_target) == REG
2711
                       && set_of (branch_target, insn) != NULL_RTX)
2712
                      || insn_clobbers_hbr (insn)
2713
                      || branch_addr - insn_addr > 600))
2714
                {
2715
                  rtx next = NEXT_INSN (insn);
2716
                  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2717
                  if (insn != BB_END (bb)
2718
                      && branch_addr - next_addr >= required_dist)
2719
                    {
2720
                      if (dump_file)
2721
                        fprintf (dump_file,
2722
                                 "hint for %i in block %i before %i\n",
2723
                                 INSN_UID (branch), bb->index,
2724
                                 INSN_UID (next));
2725
                      spu_emit_branch_hint (next, branch, branch_target,
2726
                                            branch_addr - next_addr, blocks);
2727
                    }
2728
                  branch = 0;
2729
                }
2730
 
2731
              /* JUMP_P will only be true at the end of a block.  When
2732
                 branch is already set it means we've previously decided
2733
                 to propagate a hint for that branch into this block. */
2734
              if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2735
                {
2736
                  branch = 0;
2737
                  if ((branch_target = get_branch_target (insn)))
2738
                    {
2739
                      branch = insn;
2740
                      branch_addr = insn_addr;
2741
                      required_dist = spu_hint_dist;
2742
                    }
2743
                }
2744
            }
2745
          if (insn == BB_HEAD (bb))
2746
            break;
2747
        }
2748
 
2749
      if (branch)
2750
        {
2751
          /* If we haven't emitted a hint for this branch yet, it might
2752
             be profitable to emit it in one of the predecessor blocks,
2753
             especially for loops.  */
2754
          rtx bbend;
2755
          basic_block prev = 0, prop = 0, prev2 = 0;
2756
          int loop_exit = 0, simple_loop = 0;
2757
          int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2758
 
2759
          for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2760
            if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2761
              prev = EDGE_PRED (bb, j)->src;
2762
            else
2763
              prev2 = EDGE_PRED (bb, j)->src;
2764
 
2765
          for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2766
            if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2767
              loop_exit = 1;
2768
            else if (EDGE_SUCC (bb, j)->dest == bb)
2769
              simple_loop = 1;
2770
 
2771
          /* If this branch is a loop exit then propagate to previous
2772
             fallthru block. This catches the cases when it is a simple
2773
             loop or when there is an initial branch into the loop. */
2774
          if (prev && (loop_exit || simple_loop)
2775
              && prev->loop_depth <= bb->loop_depth)
2776
            prop = prev;
2777
 
2778
          /* If there is only one adjacent predecessor.  Don't propagate
2779
             outside this loop.  This loop_depth test isn't perfect, but
2780
             I'm not sure the loop_father member is valid at this point.  */
2781
          else if (prev && single_pred_p (bb)
2782
                   && prev->loop_depth == bb->loop_depth)
2783
            prop = prev;
2784
 
2785
          /* If this is the JOIN block of a simple IF-THEN then
2786
             propogate the hint to the HEADER block. */
2787
          else if (prev && prev2
2788
                   && EDGE_COUNT (bb->preds) == 2
2789
                   && EDGE_COUNT (prev->preds) == 1
2790
                   && EDGE_PRED (prev, 0)->src == prev2
2791
                   && prev2->loop_depth == bb->loop_depth
2792
                   && GET_CODE (branch_target) != REG)
2793
            prop = prev;
2794
 
2795
          /* Don't propagate when:
2796
             - this is a simple loop and the hint would be too far
2797
             - this is not a simple loop and there are 16 insns in
2798
             this block already
2799
             - the predecessor block ends in a branch that will be
2800
             hinted
2801
             - the predecessor block ends in an insn that invalidates
2802
             the hint */
2803
          if (prop
2804
              && prop->index >= 0
2805
              && (bbend = BB_END (prop))
2806
              && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2807
              (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2808
              && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2809
            {
2810
              if (dump_file)
2811
                fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2812
                         "for %i (loop_exit %i simple_loop %i dist %i)\n",
2813
                         bb->index, prop->index, bb->loop_depth,
2814
                         INSN_UID (branch), loop_exit, simple_loop,
2815
                         branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2816
 
2817
              spu_bb_info[prop->index].prop_jump = branch;
2818
              spu_bb_info[prop->index].bb_index = i;
2819
            }
2820
          else if (branch_addr - next_addr >= required_dist)
2821
            {
2822
              if (dump_file)
2823
                fprintf (dump_file, "hint for %i in block %i before %i\n",
2824
                         INSN_UID (branch), bb->index,
2825
                         INSN_UID (NEXT_INSN (insn)));
2826
              spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2827
                                    branch_addr - next_addr, blocks);
2828
            }
2829
          branch = 0;
2830
        }
2831
    }
2832
  free (spu_bb_info);
2833
 
2834
  if (!sbitmap_empty_p (blocks))
2835
    find_many_sub_basic_blocks (blocks);
2836
 
2837
  /* We have to schedule to make sure alignment is ok. */
2838
  FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2839
 
2840
  /* The hints need to be scheduled, so call it again. */
2841
  schedule_insns ();
2842
 
2843
  insert_hbrp ();
2844
 
2845
  pad_bb ();
2846
 
2847
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2848
    if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2849
      {
2850
        /* Adjust the LABEL_REF in a hint when we have inserted a nop
2851
           between its branch label and the branch .  We don't move the
2852
           label because GCC expects it at the beginning of the block. */
2853
        rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2854
        rtx label_ref = XVECEXP (unspec, 0, 0);
2855
        rtx label = XEXP (label_ref, 0);
2856
        rtx branch;
2857
        int offset = 0;
2858
        for (branch = NEXT_INSN (label);
2859
             !JUMP_P (branch) && !CALL_P (branch);
2860
             branch = NEXT_INSN (branch))
2861
          if (NONJUMP_INSN_P (branch))
2862
            offset += get_attr_length (branch);
2863
        if (offset > 0)
2864
          XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2865
      }
2866
 
2867
  if (spu_flag_var_tracking)
2868
    {
2869
      df_analyze ();
2870
      timevar_push (TV_VAR_TRACKING);
2871
      variable_tracking_main ();
2872
      timevar_pop (TV_VAR_TRACKING);
2873
      df_finish_pass (false);
2874
    }
2875
 
2876
  free_bb_for_insn ();
2877
 
2878
  in_spu_reorg = 0;
2879
}
2880
 
2881
 
2882
/* Insn scheduling routines, primarily for dual issue. */
2883
static int
2884
spu_sched_issue_rate (void)
2885
{
2886
  return 2;
2887
}
2888
 
2889
static int
2890
uses_ls_unit(rtx insn)
2891
{
2892
  rtx set = single_set (insn);
2893
  if (set != 0
2894
      && (GET_CODE (SET_DEST (set)) == MEM
2895
          || GET_CODE (SET_SRC (set)) == MEM))
2896
    return 1;
2897
  return 0;
2898
}
2899
 
2900
static int
2901
get_pipe (rtx insn)
2902
{
2903
  enum attr_type t;
2904
  /* Handle inline asm */
2905
  if (INSN_CODE (insn) == -1)
2906
    return -1;
2907
  t = get_attr_type (insn);
2908
  switch (t)
2909
    {
2910
    case TYPE_CONVERT:
2911
      return -2;
2912
    case TYPE_MULTI0:
2913
      return -1;
2914
 
2915
    case TYPE_FX2:
2916
    case TYPE_FX3:
2917
    case TYPE_SPR:
2918
    case TYPE_NOP:
2919
    case TYPE_FXB:
2920
    case TYPE_FPD:
2921
    case TYPE_FP6:
2922
    case TYPE_FP7:
2923
      return 0;
2924
 
2925
    case TYPE_LNOP:
2926
    case TYPE_SHUF:
2927
    case TYPE_LOAD:
2928
    case TYPE_STORE:
2929
    case TYPE_BR:
2930
    case TYPE_MULTI1:
2931
    case TYPE_HBR:
2932
    case TYPE_IPREFETCH:
2933
      return 1;
2934
    default:
2935
      abort ();
2936
    }
2937
}
2938
 
2939
 
2940
/* haifa-sched.c has a static variable that keeps track of the current
2941
   cycle.  It is passed to spu_sched_reorder, and we record it here for
2942
   use by spu_sched_variable_issue.  It won't be accurate if the
2943
   scheduler updates it's clock_var between the two calls. */
2944
static int clock_var;
2945
 
2946
/* This is used to keep track of insn alignment.  Set to 0 at the
2947
   beginning of each block and increased by the "length" attr of each
2948
   insn scheduled. */
2949
static int spu_sched_length;
2950
 
2951
/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2952
   ready list appropriately in spu_sched_reorder(). */
2953
static int pipe0_clock;
2954
static int pipe1_clock;
2955
 
2956
static int prev_clock_var;
2957
 
2958
static int prev_priority;
2959
 
2960
/* The SPU needs to load the next ilb sometime during the execution of
2961
   the previous ilb.  There is a potential conflict if every cycle has a
2962
   load or store.  To avoid the conflict we make sure the load/store
2963
   unit is free for at least one cycle during the execution of insns in
2964
   the previous ilb. */
2965
static int spu_ls_first;
2966
static int prev_ls_clock;
2967
 
2968
static void
2969
spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2970
                       int max_ready ATTRIBUTE_UNUSED)
2971
{
2972
  spu_sched_length = 0;
2973
}
2974
 
2975
static void
2976
spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2977
                int max_ready ATTRIBUTE_UNUSED)
2978
{
2979
  if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2980
    {
2981
      /* When any block might be at least 8-byte aligned, assume they
2982
         will all be at least 8-byte aligned to make sure dual issue
2983
         works out correctly. */
2984
      spu_sched_length = 0;
2985
    }
2986
  spu_ls_first = INT_MAX;
2987
  clock_var = -1;
2988
  prev_ls_clock = -1;
2989
  pipe0_clock = -1;
2990
  pipe1_clock = -1;
2991
  prev_clock_var = -1;
2992
  prev_priority = -1;
2993
}
2994
 
2995
static int
2996
spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2997
                          int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2998
{
2999
  int len;
3000
  int p;
3001
  if (GET_CODE (PATTERN (insn)) == USE
3002
      || GET_CODE (PATTERN (insn)) == CLOBBER
3003
      || (len = get_attr_length (insn)) == 0)
3004
    return more;
3005
 
3006
  spu_sched_length += len;
3007
 
3008
  /* Reset on inline asm */
3009
  if (INSN_CODE (insn) == -1)
3010
    {
3011
      spu_ls_first = INT_MAX;
3012
      pipe0_clock = -1;
3013
      pipe1_clock = -1;
3014
      return 0;
3015
    }
3016
  p = get_pipe (insn);
3017
  if (p == 0)
3018
    pipe0_clock = clock_var;
3019
  else
3020
    pipe1_clock = clock_var;
3021
 
3022
  if (in_spu_reorg)
3023
    {
3024
      if (clock_var - prev_ls_clock > 1
3025
          || INSN_CODE (insn) == CODE_FOR_iprefetch)
3026
        spu_ls_first = INT_MAX;
3027
      if (uses_ls_unit (insn))
3028
        {
3029
          if (spu_ls_first == INT_MAX)
3030
            spu_ls_first = spu_sched_length;
3031
          prev_ls_clock = clock_var;
3032
        }
3033
 
3034
      /* The scheduler hasn't inserted the nop, but we will later on.
3035
         Include those nops in spu_sched_length. */
3036
      if (prev_clock_var == clock_var && (spu_sched_length & 7))
3037
        spu_sched_length += 4;
3038
      prev_clock_var = clock_var;
3039
 
3040
      /* more is -1 when called from spu_sched_reorder for new insns
3041
         that don't have INSN_PRIORITY */
3042
      if (more >= 0)
3043
        prev_priority = INSN_PRIORITY (insn);
3044
    }
3045
 
3046
  /* Always try issueing more insns.  spu_sched_reorder will decide
3047
     when the cycle should be advanced. */
3048
  return 1;
3049
}
3050
 
3051
/* This function is called for both TARGET_SCHED_REORDER and
3052
   TARGET_SCHED_REORDER2.  */
3053
static int
3054
spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3055
                   rtx *ready, int *nreadyp, int clock)
3056
{
3057
  int i, nready = *nreadyp;
3058
  int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3059
  rtx insn;
3060
 
3061
  clock_var = clock;
3062
 
3063
  if (nready <= 0 || pipe1_clock >= clock)
3064
    return 0;
3065
 
3066
  /* Find any rtl insns that don't generate assembly insns and schedule
3067
     them first. */
3068
  for (i = nready - 1; i >= 0; i--)
3069
    {
3070
      insn = ready[i];
3071
      if (INSN_CODE (insn) == -1
3072
          || INSN_CODE (insn) == CODE_FOR_blockage
3073
          || (INSN_P (insn) && get_attr_length (insn) == 0))
3074
        {
3075
          ready[i] = ready[nready - 1];
3076
          ready[nready - 1] = insn;
3077
          return 1;
3078
        }
3079
    }
3080
 
3081
  pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3082
  for (i = 0; i < nready; i++)
3083
    if (INSN_CODE (ready[i]) != -1)
3084
      {
3085
        insn = ready[i];
3086
        switch (get_attr_type (insn))
3087
          {
3088
          default:
3089
          case TYPE_MULTI0:
3090
          case TYPE_CONVERT:
3091
          case TYPE_FX2:
3092
          case TYPE_FX3:
3093
          case TYPE_SPR:
3094
          case TYPE_NOP:
3095
          case TYPE_FXB:
3096
          case TYPE_FPD:
3097
          case TYPE_FP6:
3098
          case TYPE_FP7:
3099
            pipe_0 = i;
3100
            break;
3101
          case TYPE_LOAD:
3102
          case TYPE_STORE:
3103
            pipe_ls = i;
3104
          case TYPE_LNOP:
3105
          case TYPE_SHUF:
3106
          case TYPE_BR:
3107
          case TYPE_MULTI1:
3108
          case TYPE_HBR:
3109
            pipe_1 = i;
3110
            break;
3111
          case TYPE_IPREFETCH:
3112
            pipe_hbrp = i;
3113
            break;
3114
          }
3115
      }
3116
 
3117
  /* In the first scheduling phase, schedule loads and stores together
3118
     to increase the chance they will get merged during postreload CSE. */
3119
  if (!reload_completed && pipe_ls >= 0)
3120
    {
3121
      insn = ready[pipe_ls];
3122
      ready[pipe_ls] = ready[nready - 1];
3123
      ready[nready - 1] = insn;
3124
      return 1;
3125
    }
3126
 
3127
  /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3128
  if (pipe_hbrp >= 0)
3129
    pipe_1 = pipe_hbrp;
3130
 
3131
  /* When we have loads/stores in every cycle of the last 15 insns and
3132
     we are about to schedule another load/store, emit an hbrp insn
3133
     instead. */
3134
  if (in_spu_reorg
3135
      && spu_sched_length - spu_ls_first >= 4 * 15
3136
      && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3137
    {
3138
      insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3139
      recog_memoized (insn);
3140
      if (pipe0_clock < clock)
3141
        PUT_MODE (insn, TImode);
3142
      spu_sched_variable_issue (file, verbose, insn, -1);
3143
      return 0;
3144
    }
3145
 
3146
  /* In general, we want to emit nops to increase dual issue, but dual
3147
     issue isn't faster when one of the insns could be scheduled later
3148
     without effecting the critical path.  We look at INSN_PRIORITY to
3149
     make a good guess, but it isn't perfect so -mdual-nops=n can be
3150
     used to effect it. */
3151
  if (in_spu_reorg && spu_dual_nops < 10)
3152
    {
3153
      /* When we are at an even address and we are not issueing nops to
3154
         improve scheduling then we need to advance the cycle.  */
3155
      if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3156
          && (spu_dual_nops == 0
3157
              || (pipe_1 != -1
3158
                  && prev_priority >
3159
                  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3160
        return 0;
3161
 
3162
      /* When at an odd address, schedule the highest priority insn
3163
         without considering pipeline. */
3164
      if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3165
          && (spu_dual_nops == 0
3166
              || (prev_priority >
3167
                  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3168
        return 1;
3169
    }
3170
 
3171
 
3172
  /* We haven't issued a pipe0 insn yet this cycle, if there is a
3173
     pipe0 insn in the ready list, schedule it. */
3174
  if (pipe0_clock < clock && pipe_0 >= 0)
3175
    schedule_i = pipe_0;
3176
 
3177
  /* Either we've scheduled a pipe0 insn already or there is no pipe0
3178
     insn to schedule.  Put a pipe1 insn at the front of the ready list. */
3179
  else
3180
    schedule_i = pipe_1;
3181
 
3182
  if (schedule_i > -1)
3183
    {
3184
      insn = ready[schedule_i];
3185
      ready[schedule_i] = ready[nready - 1];
3186
      ready[nready - 1] = insn;
3187
      return 1;
3188
    }
3189
  return 0;
3190
}
3191
 
3192
/* INSN is dependent on DEP_INSN. */
3193
static int
3194
spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3195
{
3196
  rtx set;
3197
 
3198
  /* The blockage pattern is used to prevent instructions from being
3199
     moved across it and has no cost. */
3200
  if (INSN_CODE (insn) == CODE_FOR_blockage
3201
      || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3202
    return 0;
3203
 
3204
  if ((INSN_P (insn) && get_attr_length (insn) == 0)
3205
      || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3206
    return 0;
3207
 
3208
  /* Make sure hbrps are spread out. */
3209
  if (INSN_CODE (insn) == CODE_FOR_iprefetch
3210
      && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3211
    return 8;
3212
 
3213
  /* Make sure hints and hbrps are 2 cycles apart. */
3214
  if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3215
       || INSN_CODE (insn) == CODE_FOR_hbr)
3216
       && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3217
           || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3218
    return 2;
3219
 
3220
  /* An hbrp has no real dependency on other insns. */
3221
  if (INSN_CODE (insn) == CODE_FOR_iprefetch
3222
      || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3223
    return 0;
3224
 
3225
  /* Assuming that it is unlikely an argument register will be used in
3226
     the first cycle of the called function, we reduce the cost for
3227
     slightly better scheduling of dep_insn.  When not hinted, the
3228
     mispredicted branch would hide the cost as well.  */
3229
  if (CALL_P (insn))
3230
  {
3231
    rtx target = get_branch_target (insn);
3232
    if (GET_CODE (target) != REG || !set_of (target, insn))
3233
      return cost - 2;
3234
    return cost;
3235
  }
3236
 
3237
  /* And when returning from a function, let's assume the return values
3238
     are completed sooner too. */
3239
  if (CALL_P (dep_insn))
3240
    return cost - 2;
3241
 
3242
  /* Make sure an instruction that loads from the back chain is schedule
3243
     away from the return instruction so a hint is more likely to get
3244
     issued. */
3245
  if (INSN_CODE (insn) == CODE_FOR__return
3246
      && (set = single_set (dep_insn))
3247
      && GET_CODE (SET_DEST (set)) == REG
3248
      && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3249
    return 20;
3250
 
3251
  /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3252
     scheduler makes every insn in a block anti-dependent on the final
3253
     jump_insn.  We adjust here so higher cost insns will get scheduled
3254
     earlier. */
3255
  if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3256
    return insn_cost (dep_insn) - 3;
3257
 
3258
  return cost;
3259
}
3260
 
3261
/* Create a CONST_DOUBLE from a string.  */
3262
struct rtx_def *
3263
spu_float_const (const char *string, enum machine_mode mode)
3264
{
3265
  REAL_VALUE_TYPE value;
3266
  value = REAL_VALUE_ATOF (string, mode);
3267
  return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3268
}
3269
 
3270
int
3271
spu_constant_address_p (rtx x)
3272
{
3273
  return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3274
          || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3275
          || GET_CODE (x) == HIGH);
3276
}
3277
 
3278
static enum spu_immediate
3279
which_immediate_load (HOST_WIDE_INT val)
3280
{
3281
  gcc_assert (val == trunc_int_for_mode (val, SImode));
3282
 
3283
  if (val >= -0x8000 && val <= 0x7fff)
3284
    return SPU_IL;
3285
  if (val >= 0 && val <= 0x3ffff)
3286
    return SPU_ILA;
3287
  if ((val & 0xffff) == ((val >> 16) & 0xffff))
3288
    return SPU_ILH;
3289
  if ((val & 0xffff) == 0)
3290
    return SPU_ILHU;
3291
 
3292
  return SPU_NONE;
3293
}
3294
 
3295
/* Return true when OP can be loaded by one of the il instructions, or
3296
   when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3297
int
3298
immediate_load_p (rtx op, enum machine_mode mode)
3299
{
3300
  if (CONSTANT_P (op))
3301
    {
3302
      enum immediate_class c = classify_immediate (op, mode);
3303
      return c == IC_IL1 || c == IC_IL1s
3304
             || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3305
    }
3306
  return 0;
3307
}
3308
 
3309
/* Return true if the first SIZE bytes of arr is a constant that can be
3310
   generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3311
   represent the size and offset of the instruction to use. */
3312
static int
3313
cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3314
{
3315
  int cpat, run, i, start;
3316
  cpat = 1;
3317
  run = 0;
3318
  start = -1;
3319
  for (i = 0; i < size && cpat; i++)
3320
    if (arr[i] != i+16)
3321
      {
3322
        if (!run)
3323
          {
3324
            start = i;
3325
            if (arr[i] == 3)
3326
              run = 1;
3327
            else if (arr[i] == 2 && arr[i+1] == 3)
3328
              run = 2;
3329
            else if (arr[i] == 0)
3330
              {
3331
                while (arr[i+run] == run && i+run < 16)
3332
                  run++;
3333
                if (run != 4 && run != 8)
3334
                  cpat = 0;
3335
              }
3336
            else
3337
              cpat = 0;
3338
            if ((i & (run-1)) != 0)
3339
              cpat = 0;
3340
            i += run;
3341
          }
3342
        else
3343
          cpat = 0;
3344
      }
3345
  if (cpat && (run || size < 16))
3346
    {
3347
      if (run == 0)
3348
        run = 1;
3349
      if (prun)
3350
        *prun = run;
3351
      if (pstart)
3352
        *pstart = start == -1 ? 16-run : start;
3353
      return 1;
3354
    }
3355
  return 0;
3356
}
3357
 
3358
/* OP is a CONSTANT_P.  Determine what instructions can be used to load
3359
   it into a register.  MODE is only valid when OP is a CONST_INT. */
3360
static enum immediate_class
3361
classify_immediate (rtx op, enum machine_mode mode)
3362
{
3363
  HOST_WIDE_INT val;
3364
  unsigned char arr[16];
3365
  int i, j, repeated, fsmbi, repeat;
3366
 
3367
  gcc_assert (CONSTANT_P (op));
3368
 
3369
  if (GET_MODE (op) != VOIDmode)
3370
    mode = GET_MODE (op);
3371
 
3372
  /* A V4SI const_vector with all identical symbols is ok. */
3373
  if (!flag_pic
3374
      && mode == V4SImode
3375
      && GET_CODE (op) == CONST_VECTOR
3376
      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3377
      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3378
      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3379
      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3380
      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3381
    op = CONST_VECTOR_ELT (op, 0);
3382
 
3383
  switch (GET_CODE (op))
3384
    {
3385
    case SYMBOL_REF:
3386
    case LABEL_REF:
3387
      return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3388
 
3389
    case CONST:
3390
      /* We can never know if the resulting address fits in 18 bits and can be
3391
         loaded with ila.  For now, assume the address will not overflow if
3392
         the displacement is "small" (fits 'K' constraint).  */
3393
      if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3394
        {
3395
          rtx sym = XEXP (XEXP (op, 0), 0);
3396
          rtx cst = XEXP (XEXP (op, 0), 1);
3397
 
3398
          if (GET_CODE (sym) == SYMBOL_REF
3399
              && GET_CODE (cst) == CONST_INT
3400
              && satisfies_constraint_K (cst))
3401
            return IC_IL1s;
3402
        }
3403
      return IC_IL2s;
3404
 
3405
    case HIGH:
3406
      return IC_IL1s;
3407
 
3408
    case CONST_VECTOR:
3409
      for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3410
        if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3411
            && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3412
          return IC_POOL;
3413
      /* Fall through. */
3414
 
3415
    case CONST_INT:
3416
    case CONST_DOUBLE:
3417
      constant_to_array (mode, op, arr);
3418
 
3419
      /* Check that each 4-byte slot is identical. */
3420
      repeated = 1;
3421
      for (i = 4; i < 16; i += 4)
3422
        for (j = 0; j < 4; j++)
3423
          if (arr[j] != arr[i + j])
3424
            repeated = 0;
3425
 
3426
      if (repeated)
3427
        {
3428
          val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3429
          val = trunc_int_for_mode (val, SImode);
3430
 
3431
          if (which_immediate_load (val) != SPU_NONE)
3432
            return IC_IL1;
3433
        }
3434
 
3435
      /* Any mode of 2 bytes or smaller can be loaded with an il
3436
         instruction. */
3437
      gcc_assert (GET_MODE_SIZE (mode) > 2);
3438
 
3439
      fsmbi = 1;
3440
      repeat = 0;
3441
      for (i = 0; i < 16 && fsmbi; i++)
3442
        if (arr[i] != 0 && repeat == 0)
3443
          repeat = arr[i];
3444
        else if (arr[i] != 0 && arr[i] != repeat)
3445
          fsmbi = 0;
3446
      if (fsmbi)
3447
        return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3448
 
3449
      if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3450
        return IC_CPAT;
3451
 
3452
      if (repeated)
3453
        return IC_IL2;
3454
 
3455
      return IC_POOL;
3456
    default:
3457
      break;
3458
    }
3459
  gcc_unreachable ();
3460
}
3461
 
3462
static enum spu_immediate
3463
which_logical_immediate (HOST_WIDE_INT val)
3464
{
3465
  gcc_assert (val == trunc_int_for_mode (val, SImode));
3466
 
3467
  if (val >= -0x200 && val <= 0x1ff)
3468
    return SPU_ORI;
3469
  if (val >= 0 && val <= 0xffff)
3470
    return SPU_IOHL;
3471
  if ((val & 0xffff) == ((val >> 16) & 0xffff))
3472
    {
3473
      val = trunc_int_for_mode (val, HImode);
3474
      if (val >= -0x200 && val <= 0x1ff)
3475
        return SPU_ORHI;
3476
      if ((val & 0xff) == ((val >> 8) & 0xff))
3477
        {
3478
          val = trunc_int_for_mode (val, QImode);
3479
          if (val >= -0x200 && val <= 0x1ff)
3480
            return SPU_ORBI;
3481
        }
3482
    }
3483
  return SPU_NONE;
3484
}
3485
 
3486
/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3487
   CONST_DOUBLEs. */
3488
static int
3489
const_vector_immediate_p (rtx x)
3490
{
3491
  int i;
3492
  gcc_assert (GET_CODE (x) == CONST_VECTOR);
3493
  for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3494
    if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3495
        && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3496
      return 0;
3497
  return 1;
3498
}
3499
 
3500
int
3501
logical_immediate_p (rtx op, enum machine_mode mode)
3502
{
3503
  HOST_WIDE_INT val;
3504
  unsigned char arr[16];
3505
  int i, j;
3506
 
3507
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3508
              || GET_CODE (op) == CONST_VECTOR);
3509
 
3510
  if (GET_CODE (op) == CONST_VECTOR
3511
      && !const_vector_immediate_p (op))
3512
    return 0;
3513
 
3514
  if (GET_MODE (op) != VOIDmode)
3515
    mode = GET_MODE (op);
3516
 
3517
  constant_to_array (mode, op, arr);
3518
 
3519
  /* Check that bytes are repeated. */
3520
  for (i = 4; i < 16; i += 4)
3521
    for (j = 0; j < 4; j++)
3522
      if (arr[j] != arr[i + j])
3523
        return 0;
3524
 
3525
  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3526
  val = trunc_int_for_mode (val, SImode);
3527
 
3528
  i = which_logical_immediate (val);
3529
  return i != SPU_NONE && i != SPU_IOHL;
3530
}
3531
 
3532
int
3533
iohl_immediate_p (rtx op, enum machine_mode mode)
3534
{
3535
  HOST_WIDE_INT val;
3536
  unsigned char arr[16];
3537
  int i, j;
3538
 
3539
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3540
              || GET_CODE (op) == CONST_VECTOR);
3541
 
3542
  if (GET_CODE (op) == CONST_VECTOR
3543
      && !const_vector_immediate_p (op))
3544
    return 0;
3545
 
3546
  if (GET_MODE (op) != VOIDmode)
3547
    mode = GET_MODE (op);
3548
 
3549
  constant_to_array (mode, op, arr);
3550
 
3551
  /* Check that bytes are repeated. */
3552
  for (i = 4; i < 16; i += 4)
3553
    for (j = 0; j < 4; j++)
3554
      if (arr[j] != arr[i + j])
3555
        return 0;
3556
 
3557
  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3558
  val = trunc_int_for_mode (val, SImode);
3559
 
3560
  return val >= 0 && val <= 0xffff;
3561
}
3562
 
3563
int
3564
arith_immediate_p (rtx op, enum machine_mode mode,
3565
                   HOST_WIDE_INT low, HOST_WIDE_INT high)
3566
{
3567
  HOST_WIDE_INT val;
3568
  unsigned char arr[16];
3569
  int bytes, i, j;
3570
 
3571
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3572
              || GET_CODE (op) == CONST_VECTOR);
3573
 
3574
  if (GET_CODE (op) == CONST_VECTOR
3575
      && !const_vector_immediate_p (op))
3576
    return 0;
3577
 
3578
  if (GET_MODE (op) != VOIDmode)
3579
    mode = GET_MODE (op);
3580
 
3581
  constant_to_array (mode, op, arr);
3582
 
3583
  if (VECTOR_MODE_P (mode))
3584
    mode = GET_MODE_INNER (mode);
3585
 
3586
  bytes = GET_MODE_SIZE (mode);
3587
  mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3588
 
3589
  /* Check that bytes are repeated. */
3590
  for (i = bytes; i < 16; i += bytes)
3591
    for (j = 0; j < bytes; j++)
3592
      if (arr[j] != arr[i + j])
3593
        return 0;
3594
 
3595
  val = arr[0];
3596
  for (j = 1; j < bytes; j++)
3597
    val = (val << 8) | arr[j];
3598
 
3599
  val = trunc_int_for_mode (val, mode);
3600
 
3601
  return val >= low && val <= high;
3602
}
3603
 
3604
/* TRUE when op is an immediate and an exact power of 2, and given that
3605
   OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3606
   all entries must be the same. */
3607
bool
3608
exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3609
{
3610
  enum machine_mode int_mode;
3611
  HOST_WIDE_INT val;
3612
  unsigned char arr[16];
3613
  int bytes, i, j;
3614
 
3615
  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3616
              || GET_CODE (op) == CONST_VECTOR);
3617
 
3618
  if (GET_CODE (op) == CONST_VECTOR
3619
      && !const_vector_immediate_p (op))
3620
    return 0;
3621
 
3622
  if (GET_MODE (op) != VOIDmode)
3623
    mode = GET_MODE (op);
3624
 
3625
  constant_to_array (mode, op, arr);
3626
 
3627
  if (VECTOR_MODE_P (mode))
3628
    mode = GET_MODE_INNER (mode);
3629
 
3630
  bytes = GET_MODE_SIZE (mode);
3631
  int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3632
 
3633
  /* Check that bytes are repeated. */
3634
  for (i = bytes; i < 16; i += bytes)
3635
    for (j = 0; j < bytes; j++)
3636
      if (arr[j] != arr[i + j])
3637
        return 0;
3638
 
3639
  val = arr[0];
3640
  for (j = 1; j < bytes; j++)
3641
    val = (val << 8) | arr[j];
3642
 
3643
  val = trunc_int_for_mode (val, int_mode);
3644
 
3645
  /* Currently, we only handle SFmode */
3646
  gcc_assert (mode == SFmode);
3647
  if (mode == SFmode)
3648
    {
3649
      int exp = (val >> 23) - 127;
3650
      return val > 0 && (val & 0x007fffff) == 0
3651
             &&  exp >= low && exp <= high;
3652
    }
3653
  return FALSE;
3654
}
3655
 
3656
/* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3657
 
3658
static int
3659
ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3660
{
3661
  rtx x = *px;
3662
  tree decl;
3663
 
3664
  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3665
    {
3666
      rtx plus = XEXP (x, 0);
3667
      rtx op0 = XEXP (plus, 0);
3668
      rtx op1 = XEXP (plus, 1);
3669
      if (GET_CODE (op1) == CONST_INT)
3670
        x = op0;
3671
    }
3672
 
3673
  return (GET_CODE (x) == SYMBOL_REF
3674
          && (decl = SYMBOL_REF_DECL (x)) != 0
3675
          && TREE_CODE (decl) == VAR_DECL
3676
          && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3677
}
3678
 
3679
/* We accept:
3680
   - any 32-bit constant (SImode, SFmode)
3681
   - any constant that can be generated with fsmbi (any mode)
3682
   - a 64-bit constant where the high and low bits are identical
3683
     (DImode, DFmode)
3684
   - a 128-bit constant where the four 32-bit words match.  */
3685
int
3686
spu_legitimate_constant_p (rtx x)
3687
{
3688
  if (GET_CODE (x) == HIGH)
3689
    x = XEXP (x, 0);
3690
 
3691
  /* Reject any __ea qualified reference.  These can't appear in
3692
     instructions but must be forced to the constant pool.  */
3693
  if (for_each_rtx (&x, ea_symbol_ref, 0))
3694
    return 0;
3695
 
3696
  /* V4SI with all identical symbols is valid. */
3697
  if (!flag_pic
3698
      && GET_MODE (x) == V4SImode
3699
      && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3700
          || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3701
          || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3702
    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3703
           && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3704
           && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3705
 
3706
  if (GET_CODE (x) == CONST_VECTOR
3707
      && !const_vector_immediate_p (x))
3708
    return 0;
3709
  return 1;
3710
}
3711
 
3712
/* Valid address are:
3713
   - symbol_ref, label_ref, const
3714
   - reg
3715
   - reg + const_int, where const_int is 16 byte aligned
3716
   - reg + reg, alignment doesn't matter
3717
  The alignment matters in the reg+const case because lqd and stqd
3718
  ignore the 4 least significant bits of the const.  We only care about
3719
  16 byte modes because the expand phase will change all smaller MEM
3720
  references to TImode.  */
3721
static bool
3722
spu_legitimate_address_p (enum machine_mode mode,
3723
                          rtx x, bool reg_ok_strict)
3724
{
3725
  int aligned = GET_MODE_SIZE (mode) >= 16;
3726
  if (aligned
3727
      && GET_CODE (x) == AND
3728
      && GET_CODE (XEXP (x, 1)) == CONST_INT
3729
      && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3730
    x = XEXP (x, 0);
3731
  switch (GET_CODE (x))
3732
    {
3733
    case LABEL_REF:
3734
      return !TARGET_LARGE_MEM;
3735
 
3736
    case SYMBOL_REF:
3737
    case CONST:
3738
      /* Keep __ea references until reload so that spu_expand_mov can see them
3739
         in MEMs.  */
3740
      if (ea_symbol_ref (&x, 0))
3741
        return !reload_in_progress && !reload_completed;
3742
      return !TARGET_LARGE_MEM;
3743
 
3744
    case CONST_INT:
3745
      return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3746
 
3747
    case SUBREG:
3748
      x = XEXP (x, 0);
3749
      if (REG_P (x))
3750
        return 0;
3751
 
3752
    case REG:
3753
      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3754
 
3755
    case PLUS:
3756
    case LO_SUM:
3757
      {
3758
        rtx op0 = XEXP (x, 0);
3759
        rtx op1 = XEXP (x, 1);
3760
        if (GET_CODE (op0) == SUBREG)
3761
          op0 = XEXP (op0, 0);
3762
        if (GET_CODE (op1) == SUBREG)
3763
          op1 = XEXP (op1, 0);
3764
        if (GET_CODE (op0) == REG
3765
            && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3766
            && GET_CODE (op1) == CONST_INT
3767
            && INTVAL (op1) >= -0x2000
3768
            && INTVAL (op1) <= 0x1fff
3769
            && (!aligned || (INTVAL (op1) & 15) == 0))
3770
          return TRUE;
3771
        if (GET_CODE (op0) == REG
3772
            && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3773
            && GET_CODE (op1) == REG
3774
            && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3775
          return TRUE;
3776
      }
3777
      break;
3778
 
3779
    default:
3780
      break;
3781
    }
3782
  return FALSE;
3783
}
3784
 
3785
/* Like spu_legitimate_address_p, except with named addresses.  */
3786
static bool
3787
spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3788
                                     bool reg_ok_strict, addr_space_t as)
3789
{
3790
  if (as == ADDR_SPACE_EA)
3791
    return (REG_P (x) && (GET_MODE (x) == EAmode));
3792
 
3793
  else if (as != ADDR_SPACE_GENERIC)
3794
    gcc_unreachable ();
3795
 
3796
  return spu_legitimate_address_p (mode, x, reg_ok_strict);
3797
}
3798
 
3799
/* When the address is reg + const_int, force the const_int into a
3800
   register.  */
3801
rtx
3802
spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3803
                        enum machine_mode mode ATTRIBUTE_UNUSED)
3804
{
3805
  rtx op0, op1;
3806
  /* Make sure both operands are registers.  */
3807
  if (GET_CODE (x) == PLUS)
3808
    {
3809
      op0 = XEXP (x, 0);
3810
      op1 = XEXP (x, 1);
3811
      if (ALIGNED_SYMBOL_REF_P (op0))
3812
        {
3813
          op0 = force_reg (Pmode, op0);
3814
          mark_reg_pointer (op0, 128);
3815
        }
3816
      else if (GET_CODE (op0) != REG)
3817
        op0 = force_reg (Pmode, op0);
3818
      if (ALIGNED_SYMBOL_REF_P (op1))
3819
        {
3820
          op1 = force_reg (Pmode, op1);
3821
          mark_reg_pointer (op1, 128);
3822
        }
3823
      else if (GET_CODE (op1) != REG)
3824
        op1 = force_reg (Pmode, op1);
3825
      x = gen_rtx_PLUS (Pmode, op0, op1);
3826
    }
3827
  return x;
3828
}
3829
 
3830
/* Like spu_legitimate_address, except with named address support.  */
3831
static rtx
3832
spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3833
                                   addr_space_t as)
3834
{
3835
  if (as != ADDR_SPACE_GENERIC)
3836
    return x;
3837
 
3838
  return spu_legitimize_address (x, oldx, mode);
3839
}
3840
 
3841
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3842
   struct attribute_spec.handler.  */
3843
static tree
3844
spu_handle_fndecl_attribute (tree * node,
3845
                             tree name,
3846
                             tree args ATTRIBUTE_UNUSED,
3847
                             int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3848
{
3849
  if (TREE_CODE (*node) != FUNCTION_DECL)
3850
    {
3851
      warning (0, "%qE attribute only applies to functions",
3852
               name);
3853
      *no_add_attrs = true;
3854
    }
3855
 
3856
  return NULL_TREE;
3857
}
3858
 
3859
/* Handle the "vector" attribute.  */
3860
static tree
3861
spu_handle_vector_attribute (tree * node, tree name,
3862
                             tree args ATTRIBUTE_UNUSED,
3863
                             int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3864
{
3865
  tree type = *node, result = NULL_TREE;
3866
  enum machine_mode mode;
3867
  int unsigned_p;
3868
 
3869
  while (POINTER_TYPE_P (type)
3870
         || TREE_CODE (type) == FUNCTION_TYPE
3871
         || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3872
    type = TREE_TYPE (type);
3873
 
3874
  mode = TYPE_MODE (type);
3875
 
3876
  unsigned_p = TYPE_UNSIGNED (type);
3877
  switch (mode)
3878
    {
3879
    case DImode:
3880
      result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3881
      break;
3882
    case SImode:
3883
      result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3884
      break;
3885
    case HImode:
3886
      result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3887
      break;
3888
    case QImode:
3889
      result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3890
      break;
3891
    case SFmode:
3892
      result = V4SF_type_node;
3893
      break;
3894
    case DFmode:
3895
      result = V2DF_type_node;
3896
      break;
3897
    default:
3898
      break;
3899
    }
3900
 
3901
  /* Propagate qualifiers attached to the element type
3902
     onto the vector type.  */
3903
  if (result && result != type && TYPE_QUALS (type))
3904
    result = build_qualified_type (result, TYPE_QUALS (type));
3905
 
3906
  *no_add_attrs = true;         /* No need to hang on to the attribute.  */
3907
 
3908
  if (!result)
3909
    warning (0, "%qE attribute ignored", name);
3910
  else
3911
    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3912
 
3913
  return NULL_TREE;
3914
}
3915
 
3916
/* Return nonzero if FUNC is a naked function.  */
3917
static int
3918
spu_naked_function_p (tree func)
3919
{
3920
  tree a;
3921
 
3922
  if (TREE_CODE (func) != FUNCTION_DECL)
3923
    abort ();
3924
 
3925
  a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3926
  return a != NULL_TREE;
3927
}
3928
 
3929
int
3930
spu_initial_elimination_offset (int from, int to)
3931
{
3932
  int saved_regs_size = spu_saved_regs_size ();
3933
  int sp_offset = 0;
3934
  if (!current_function_is_leaf || crtl->outgoing_args_size
3935
      || get_frame_size () || saved_regs_size)
3936
    sp_offset = STACK_POINTER_OFFSET;
3937
  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3938
    return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3939
  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3940
    return get_frame_size ();
3941
  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3942
    return sp_offset + crtl->outgoing_args_size
3943
      + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3944
  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3945
    return get_frame_size () + saved_regs_size + sp_offset;
3946
  else
3947
    gcc_unreachable ();
3948
}
3949
 
3950
rtx
3951
spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3952
{
3953
  enum machine_mode mode = TYPE_MODE (type);
3954
  int byte_size = ((mode == BLKmode)
3955
                   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3956
 
3957
  /* Make sure small structs are left justified in a register. */
3958
  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3959
      && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3960
    {
3961
      enum machine_mode smode;
3962
      rtvec v;
3963
      int i;
3964
      int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3965
      int n = byte_size / UNITS_PER_WORD;
3966
      v = rtvec_alloc (nregs);
3967
      for (i = 0; i < n; i++)
3968
        {
3969
          RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3970
                                                gen_rtx_REG (TImode,
3971
                                                             FIRST_RETURN_REGNUM
3972
                                                             + i),
3973
                                                GEN_INT (UNITS_PER_WORD * i));
3974
          byte_size -= UNITS_PER_WORD;
3975
        }
3976
 
3977
      if (n < nregs)
3978
        {
3979
          if (byte_size < 4)
3980
            byte_size = 4;
3981
          smode =
3982
            smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3983
          RTVEC_ELT (v, n) =
3984
            gen_rtx_EXPR_LIST (VOIDmode,
3985
                               gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3986
                               GEN_INT (UNITS_PER_WORD * n));
3987
        }
3988
      return gen_rtx_PARALLEL (mode, v);
3989
    }
3990
  return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3991
}
3992
 
3993
rtx
3994
spu_function_arg (CUMULATIVE_ARGS cum,
3995
                  enum machine_mode mode,
3996
                  tree type, int named ATTRIBUTE_UNUSED)
3997
{
3998
  int byte_size;
3999
 
4000
  if (cum >= MAX_REGISTER_ARGS)
4001
    return 0;
4002
 
4003
  byte_size = ((mode == BLKmode)
4004
               ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4005
 
4006
  /* The ABI does not allow parameters to be passed partially in
4007
     reg and partially in stack. */
4008
  if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4009
    return 0;
4010
 
4011
  /* Make sure small structs are left justified in a register. */
4012
  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4013
      && byte_size < UNITS_PER_WORD && byte_size > 0)
4014
    {
4015
      enum machine_mode smode;
4016
      rtx gr_reg;
4017
      if (byte_size < 4)
4018
        byte_size = 4;
4019
      smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4020
      gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4021
                                  gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4022
                                  const0_rtx);
4023
      return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4024
    }
4025
  else
4026
    return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4027
}
4028
 
4029
/* Variable sized types are passed by reference.  */
4030
static bool
4031
spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4032
                       enum machine_mode mode ATTRIBUTE_UNUSED,
4033
                       const_tree type, bool named ATTRIBUTE_UNUSED)
4034
{
4035
  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4036
}
4037
 
4038
 
4039
/* Var args. */
4040
 
4041
/* Create and return the va_list datatype.
4042
 
4043
   On SPU, va_list is an array type equivalent to
4044
 
4045
      typedef struct __va_list_tag
4046
        {
4047
            void *__args __attribute__((__aligned(16)));
4048
            void *__skip __attribute__((__aligned(16)));
4049
 
4050
        } va_list[1];
4051
 
4052
   where __args points to the arg that will be returned by the next
4053
   va_arg(), and __skip points to the previous stack frame such that
4054
   when __args == __skip we should advance __args by 32 bytes. */
4055
static tree
4056
spu_build_builtin_va_list (void)
4057
{
4058
  tree f_args, f_skip, record, type_decl;
4059
  bool owp;
4060
 
4061
  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4062
 
4063
  type_decl =
4064
    build_decl (BUILTINS_LOCATION,
4065
                TYPE_DECL, get_identifier ("__va_list_tag"), record);
4066
 
4067
  f_args = build_decl (BUILTINS_LOCATION,
4068
                       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4069
  f_skip = build_decl (BUILTINS_LOCATION,
4070
                       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4071
 
4072
  DECL_FIELD_CONTEXT (f_args) = record;
4073
  DECL_ALIGN (f_args) = 128;
4074
  DECL_USER_ALIGN (f_args) = 1;
4075
 
4076
  DECL_FIELD_CONTEXT (f_skip) = record;
4077
  DECL_ALIGN (f_skip) = 128;
4078
  DECL_USER_ALIGN (f_skip) = 1;
4079
 
4080
  TREE_CHAIN (record) = type_decl;
4081
  TYPE_NAME (record) = type_decl;
4082
  TYPE_FIELDS (record) = f_args;
4083
  TREE_CHAIN (f_args) = f_skip;
4084
 
4085
  /* We know this is being padded and we want it too.  It is an internal
4086
     type so hide the warnings from the user. */
4087
  owp = warn_padded;
4088
  warn_padded = false;
4089
 
4090
  layout_type (record);
4091
 
4092
  warn_padded = owp;
4093
 
4094
  /* The correct type is an array type of one element.  */
4095
  return build_array_type (record, build_index_type (size_zero_node));
4096
}
4097
 
4098
/* Implement va_start by filling the va_list structure VALIST.
4099
   NEXTARG points to the first anonymous stack argument.
4100
 
4101
   The following global variables are used to initialize
4102
   the va_list structure:
4103
 
4104
     crtl->args.info;
4105
       the CUMULATIVE_ARGS for this function
4106
 
4107
     crtl->args.arg_offset_rtx:
4108
       holds the offset of the first anonymous stack argument
4109
       (relative to the virtual arg pointer).  */
4110
 
4111
static void
4112
spu_va_start (tree valist, rtx nextarg)
4113
{
4114
  tree f_args, f_skip;
4115
  tree args, skip, t;
4116
 
4117
  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4118
  f_skip = TREE_CHAIN (f_args);
4119
 
4120
  valist = build_va_arg_indirect_ref (valist);
4121
  args =
4122
    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4123
  skip =
4124
    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4125
 
4126
  /* Find the __args area.  */
4127
  t = make_tree (TREE_TYPE (args), nextarg);
4128
  if (crtl->args.pretend_args_size > 0)
4129
    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4130
                size_int (-STACK_POINTER_OFFSET));
4131
  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4132
  TREE_SIDE_EFFECTS (t) = 1;
4133
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4134
 
4135
  /* Find the __skip area.  */
4136
  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4137
  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4138
              size_int (crtl->args.pretend_args_size
4139
                         - STACK_POINTER_OFFSET));
4140
  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4141
  TREE_SIDE_EFFECTS (t) = 1;
4142
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4143
}
4144
 
4145
/* Gimplify va_arg by updating the va_list structure
4146
   VALIST as required to retrieve an argument of type
4147
   TYPE, and returning that argument.
4148
 
4149
   ret = va_arg(VALIST, TYPE);
4150
 
4151
   generates code equivalent to:
4152
 
4153
    paddedsize = (sizeof(TYPE) + 15) & -16;
4154
    if (VALIST.__args + paddedsize > VALIST.__skip
4155
        && VALIST.__args <= VALIST.__skip)
4156
      addr = VALIST.__skip + 32;
4157
    else
4158
      addr = VALIST.__args;
4159
    VALIST.__args = addr + paddedsize;
4160
    ret = *(TYPE *)addr;
4161
 */
4162
static tree
4163
spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4164
                          gimple_seq * post_p ATTRIBUTE_UNUSED)
4165
{
4166
  tree f_args, f_skip;
4167
  tree args, skip;
4168
  HOST_WIDE_INT size, rsize;
4169
  tree paddedsize, addr, tmp;
4170
  bool pass_by_reference_p;
4171
 
4172
  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4173
  f_skip = TREE_CHAIN (f_args);
4174
 
4175
  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4176
  args =
4177
    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4178
  skip =
4179
    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4180
 
4181
  addr = create_tmp_var (ptr_type_node, "va_arg");
4182
 
4183
  /* if an object is dynamically sized, a pointer to it is passed
4184
     instead of the object itself. */
4185
  pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4186
                                               false);
4187
  if (pass_by_reference_p)
4188
    type = build_pointer_type (type);
4189
  size = int_size_in_bytes (type);
4190
  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4191
 
4192
  /* build conditional expression to calculate addr. The expression
4193
     will be gimplified later. */
4194
  paddedsize = size_int (rsize);
4195
  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4196
  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4197
                build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4198
                build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4199
                unshare_expr (skip)));
4200
 
4201
  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4202
                build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4203
                        size_int (32)), unshare_expr (args));
4204
 
4205
  gimplify_assign (addr, tmp, pre_p);
4206
 
4207
  /* update VALIST.__args */
4208
  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4209
  gimplify_assign (unshare_expr (args), tmp, pre_p);
4210
 
4211
  addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4212
                       addr);
4213
 
4214
  if (pass_by_reference_p)
4215
    addr = build_va_arg_indirect_ref (addr);
4216
 
4217
  return build_va_arg_indirect_ref (addr);
4218
}
4219
 
4220
/* Save parameter registers starting with the register that corresponds
4221
   to the first unnamed parameters.  If the first unnamed parameter is
4222
   in the stack then save no registers.  Set pretend_args_size to the
4223
   amount of space needed to save the registers. */
4224
void
4225
spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4226
                            tree type, int *pretend_size, int no_rtl)
4227
{
4228
  if (!no_rtl)
4229
    {
4230
      rtx tmp;
4231
      int regno;
4232
      int offset;
4233
      int ncum = *cum;
4234
 
4235
      /* cum currently points to the last named argument, we want to
4236
         start at the next argument. */
4237
      FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4238
 
4239
      offset = -STACK_POINTER_OFFSET;
4240
      for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4241
        {
4242
          tmp = gen_frame_mem (V4SImode,
4243
                               plus_constant (virtual_incoming_args_rtx,
4244
                                              offset));
4245
          emit_move_insn (tmp,
4246
                          gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4247
          offset += 16;
4248
        }
4249
      *pretend_size = offset + STACK_POINTER_OFFSET;
4250
    }
4251
}
4252
 
4253
void
4254
spu_conditional_register_usage (void)
4255
{
4256
  if (flag_pic)
4257
    {
4258
      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4259
      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4260
    }
4261
}
4262
 
4263
/* This is called any time we inspect the alignment of a register for
4264
   addresses.  */
4265
static int
4266
reg_aligned_for_addr (rtx x)
4267
{
4268
  int regno =
4269
    REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4270
  return REGNO_POINTER_ALIGN (regno) >= 128;
4271
}
4272
 
4273
/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4274
   into its SYMBOL_REF_FLAGS.  */
4275
static void
4276
spu_encode_section_info (tree decl, rtx rtl, int first)
4277
{
4278
  default_encode_section_info (decl, rtl, first);
4279
 
4280
  /* If a variable has a forced alignment to < 16 bytes, mark it with
4281
     SYMBOL_FLAG_ALIGN1.  */
4282
  if (TREE_CODE (decl) == VAR_DECL
4283
      && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4284
    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4285
}
4286
 
4287
/* Return TRUE if we are certain the mem refers to a complete object
4288
   which is both 16-byte aligned and padded to a 16-byte boundary.  This
4289
   would make it safe to store with a single instruction.
4290
   We guarantee the alignment and padding for static objects by aligning
4291
   all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4292
   FIXME: We currently cannot guarantee this for objects on the stack
4293
   because assign_parm_setup_stack calls assign_stack_local with the
4294
   alignment of the parameter mode and in that case the alignment never
4295
   gets adjusted by LOCAL_ALIGNMENT. */
4296
static int
4297
store_with_one_insn_p (rtx mem)
4298
{
4299
  enum machine_mode mode = GET_MODE (mem);
4300
  rtx addr = XEXP (mem, 0);
4301
  if (mode == BLKmode)
4302
    return 0;
4303
  if (GET_MODE_SIZE (mode) >= 16)
4304
    return 1;
4305
  /* Only static objects. */
4306
  if (GET_CODE (addr) == SYMBOL_REF)
4307
    {
4308
      /* We use the associated declaration to make sure the access is
4309
         referring to the whole object.
4310
         We check both MEM_EXPR and and SYMBOL_REF_DECL.  I'm not sure
4311
         if it is necessary.  Will there be cases where one exists, and
4312
         the other does not?  Will there be cases where both exist, but
4313
         have different types?  */
4314
      tree decl = MEM_EXPR (mem);
4315
      if (decl
4316
          && TREE_CODE (decl) == VAR_DECL
4317
          && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4318
        return 1;
4319
      decl = SYMBOL_REF_DECL (addr);
4320
      if (decl
4321
          && TREE_CODE (decl) == VAR_DECL
4322
          && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4323
        return 1;
4324
    }
4325
  return 0;
4326
}
4327
 
4328
/* Return 1 when the address is not valid for a simple load and store as
4329
   required by the '_mov*' patterns.   We could make this less strict
4330
   for loads, but we prefer mem's to look the same so they are more
4331
   likely to be merged.  */
4332
static int
4333
address_needs_split (rtx mem)
4334
{
4335
  if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4336
      && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4337
          || !(store_with_one_insn_p (mem)
4338
               || mem_is_padded_component_ref (mem))))
4339
    return 1;
4340
 
4341
  return 0;
4342
}
4343
 
4344
static GTY(()) rtx cache_fetch;           /* __cache_fetch function */
4345
static GTY(()) rtx cache_fetch_dirty;     /* __cache_fetch_dirty function */
4346
static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4347
 
4348
/* MEM is known to be an __ea qualified memory access.  Emit a call to
4349
   fetch the ppu memory to local store, and return its address in local
4350
   store.  */
4351
 
4352
static void
4353
ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4354
{
4355
  if (is_store)
4356
    {
4357
      rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4358
      if (!cache_fetch_dirty)
4359
        cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4360
      emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4361
                               2, ea_addr, EAmode, ndirty, SImode);
4362
    }
4363
  else
4364
    {
4365
      if (!cache_fetch)
4366
        cache_fetch = init_one_libfunc ("__cache_fetch");
4367
      emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4368
                               1, ea_addr, EAmode);
4369
    }
4370
}
4371
 
4372
/* Like ea_load_store, but do the cache tag comparison and, for stores,
4373
   dirty bit marking, inline.
4374
 
4375
   The cache control data structure is an array of
4376
 
4377
   struct __cache_tag_array
4378
     {
4379
        unsigned int tag_lo[4];
4380
        unsigned int tag_hi[4];
4381
        void *data_pointer[4];
4382
        int reserved[4];
4383
        vector unsigned short dirty_bits[4];
4384
     }  */
4385
 
4386
static void
4387
ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4388
{
4389
  rtx ea_addr_si;
4390
  HOST_WIDE_INT v;
4391
  rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4392
  rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4393
  rtx index_mask = gen_reg_rtx (SImode);
4394
  rtx tag_arr = gen_reg_rtx (Pmode);
4395
  rtx splat_mask = gen_reg_rtx (TImode);
4396
  rtx splat = gen_reg_rtx (V4SImode);
4397
  rtx splat_hi = NULL_RTX;
4398
  rtx tag_index = gen_reg_rtx (Pmode);
4399
  rtx block_off = gen_reg_rtx (SImode);
4400
  rtx tag_addr = gen_reg_rtx (Pmode);
4401
  rtx tag = gen_reg_rtx (V4SImode);
4402
  rtx cache_tag = gen_reg_rtx (V4SImode);
4403
  rtx cache_tag_hi = NULL_RTX;
4404
  rtx cache_ptrs = gen_reg_rtx (TImode);
4405
  rtx cache_ptrs_si = gen_reg_rtx (SImode);
4406
  rtx tag_equal = gen_reg_rtx (V4SImode);
4407
  rtx tag_equal_hi = NULL_RTX;
4408
  rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4409
  rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4410
  rtx eq_index = gen_reg_rtx (SImode);
4411
  rtx bcomp, hit_label, hit_ref, cont_label, insn;
4412
 
4413
  if (spu_ea_model != 32)
4414
    {
4415
      splat_hi = gen_reg_rtx (V4SImode);
4416
      cache_tag_hi = gen_reg_rtx (V4SImode);
4417
      tag_equal_hi = gen_reg_rtx (V4SImode);
4418
    }
4419
 
4420
  emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4421
  emit_move_insn (tag_arr, tag_arr_sym);
4422
  v = 0x0001020300010203LL;
4423
  emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4424
  ea_addr_si = ea_addr;
4425
  if (spu_ea_model != 32)
4426
    ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4427
 
4428
  /* tag_index = ea_addr & (tag_array_size - 128)  */
4429
  emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4430
 
4431
  /* splat ea_addr to all 4 slots.  */
4432
  emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4433
  /* Similarly for high 32 bits of ea_addr.  */
4434
  if (spu_ea_model != 32)
4435
    emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4436
 
4437
  /* block_off = ea_addr & 127  */
4438
  emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4439
 
4440
  /* tag_addr = tag_arr + tag_index  */
4441
  emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4442
 
4443
  /* Read cache tags.  */
4444
  emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4445
  if (spu_ea_model != 32)
4446
    emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4447
                                               plus_constant (tag_addr, 16)));
4448
 
4449
  /* tag = ea_addr & -128  */
4450
  emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4451
 
4452
  /* Read all four cache data pointers.  */
4453
  emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4454
                                           plus_constant (tag_addr, 32)));
4455
 
4456
  /* Compare tags.  */
4457
  emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4458
  if (spu_ea_model != 32)
4459
    {
4460
      emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4461
      emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4462
    }
4463
 
4464
  /* At most one of the tags compare equal, so tag_equal has one
4465
     32-bit slot set to all 1's, with the other slots all zero.
4466
     gbb picks off low bit from each byte in the 128-bit registers,
4467
     so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4468
     we have a hit.  */
4469
  emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4470
  emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4471
 
4472
  /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4473
  emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4474
 
4475
  /* Allowing us to rotate the corresponding cache data pointer to slot0.
4476
     (rotating eq_index mod 16 bytes).  */
4477
  emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4478
  emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4479
 
4480
  /* Add block offset to form final data address.  */
4481
  emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4482
 
4483
  /* Check that we did hit.  */
4484
  hit_label = gen_label_rtx ();
4485
  hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4486
  bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4487
  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4488
                                      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4489
                                                            hit_ref, pc_rtx)));
4490
  /* Say that this branch is very likely to happen.  */
4491
  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4492
  REG_NOTES (insn)
4493
    = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
4494
 
4495
  ea_load_store (mem, is_store, ea_addr, data_addr);
4496
  cont_label = gen_label_rtx ();
4497
  emit_jump_insn (gen_jump (cont_label));
4498
  emit_barrier ();
4499
 
4500
  emit_label (hit_label);
4501
 
4502
  if (is_store)
4503
    {
4504
      HOST_WIDE_INT v_hi;
4505
      rtx dirty_bits = gen_reg_rtx (TImode);
4506
      rtx dirty_off = gen_reg_rtx (SImode);
4507
      rtx dirty_128 = gen_reg_rtx (TImode);
4508
      rtx neg_block_off = gen_reg_rtx (SImode);
4509
 
4510
      /* Set up mask with one dirty bit per byte of the mem we are
4511
         writing, starting from top bit.  */
4512
      v_hi = v = -1;
4513
      v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4514
      if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4515
        {
4516
          v_hi = v;
4517
          v = 0;
4518
        }
4519
      emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4520
 
4521
      /* Form index into cache dirty_bits.  eq_index is one of
4522
         0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4523
         0x40, 0x50, 0x60 or 0x70 which just happens to be the
4524
         offset to each of the four dirty_bits elements.  */
4525
      emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4526
 
4527
      emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4528
 
4529
      /* Rotate bit mask to proper bit.  */
4530
      emit_insn (gen_negsi2 (neg_block_off, block_off));
4531
      emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4532
      emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4533
 
4534
      /* Or in the new dirty bits.  */
4535
      emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4536
 
4537
      /* Store.  */
4538
      emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4539
    }
4540
 
4541
  emit_label (cont_label);
4542
}
4543
 
4544
static rtx
4545
expand_ea_mem (rtx mem, bool is_store)
4546
{
4547
  rtx ea_addr;
4548
  rtx data_addr = gen_reg_rtx (Pmode);
4549
  rtx new_mem;
4550
 
4551
  ea_addr = force_reg (EAmode, XEXP (mem, 0));
4552
  if (optimize_size || optimize == 0)
4553
    ea_load_store (mem, is_store, ea_addr, data_addr);
4554
  else
4555
    ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4556
 
4557
  if (ea_alias_set == -1)
4558
    ea_alias_set = new_alias_set ();
4559
 
4560
  /* We generate a new MEM RTX to refer to the copy of the data
4561
     in the cache.  We do not copy memory attributes (except the
4562
     alignment) from the original MEM, as they may no longer apply
4563
     to the cache copy.  */
4564
  new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4565
  set_mem_alias_set (new_mem, ea_alias_set);
4566
  set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4567
 
4568
  return new_mem;
4569
}
4570
 
4571
int
4572
spu_expand_mov (rtx * ops, enum machine_mode mode)
4573
{
4574
  if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4575
    abort ();
4576
 
4577
  if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4578
    {
4579
      rtx from = SUBREG_REG (ops[1]);
4580
      enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4581
 
4582
      gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4583
                  && GET_MODE_CLASS (imode) == MODE_INT
4584
                  && subreg_lowpart_p (ops[1]));
4585
 
4586
      if (GET_MODE_SIZE (imode) < 4)
4587
        imode = SImode;
4588
      if (imode != GET_MODE (from))
4589
        from = gen_rtx_SUBREG (imode, from, 0);
4590
 
4591
      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4592
        {
4593
          enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4594
          emit_insn (GEN_FCN (icode) (ops[0], from));
4595
        }
4596
      else
4597
        emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4598
      return 1;
4599
    }
4600
 
4601
  /* At least one of the operands needs to be a register. */
4602
  if ((reload_in_progress | reload_completed) == 0
4603
      && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4604
    {
4605
      rtx temp = force_reg (mode, ops[1]);
4606
      emit_move_insn (ops[0], temp);
4607
      return 1;
4608
    }
4609
  if (reload_in_progress || reload_completed)
4610
    {
4611
      if (CONSTANT_P (ops[1]))
4612
        return spu_split_immediate (ops);
4613
      return 0;
4614
    }
4615
 
4616
  /* Catch the SImode immediates greater than 0x7fffffff, and sign
4617
     extend them. */
4618
  if (GET_CODE (ops[1]) == CONST_INT)
4619
    {
4620
      HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4621
      if (val != INTVAL (ops[1]))
4622
        {
4623
          emit_move_insn (ops[0], GEN_INT (val));
4624
          return 1;
4625
        }
4626
    }
4627
  if (MEM_P (ops[0]))
4628
    {
4629
      if (MEM_ADDR_SPACE (ops[0]))
4630
        ops[0] = expand_ea_mem (ops[0], true);
4631
      return spu_split_store (ops);
4632
    }
4633
  if (MEM_P (ops[1]))
4634
    {
4635
      if (MEM_ADDR_SPACE (ops[1]))
4636
        ops[1] = expand_ea_mem (ops[1], false);
4637
      return spu_split_load (ops);
4638
    }
4639
 
4640
  return 0;
4641
}
4642
 
4643
static void
4644
spu_convert_move (rtx dst, rtx src)
4645
{
4646
  enum machine_mode mode = GET_MODE (dst);
4647
  enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4648
  rtx reg;
4649
  gcc_assert (GET_MODE (src) == TImode);
4650
  reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4651
  emit_insn (gen_rtx_SET (VOIDmode, reg,
4652
               gen_rtx_TRUNCATE (int_mode,
4653
                 gen_rtx_LSHIFTRT (TImode, src,
4654
                   GEN_INT (int_mode == DImode ? 64 : 96)))));
4655
  if (int_mode != mode)
4656
    {
4657
      reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4658
      emit_move_insn (dst, reg);
4659
    }
4660
}
4661
 
4662
/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4663
   the address from SRC and SRC+16.  Return a REG or CONST_INT that
4664
   specifies how many bytes to rotate the loaded registers, plus any
4665
   extra from EXTRA_ROTQBY.  The address and rotate amounts are
4666
   normalized to improve merging of loads and rotate computations. */
4667
static rtx
4668
spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4669
{
4670
  rtx addr = XEXP (src, 0);
4671
  rtx p0, p1, rot, addr0, addr1;
4672
  int rot_amt;
4673
 
4674
  rot = 0;
4675
  rot_amt = 0;
4676
 
4677
  if (MEM_ALIGN (src) >= 128)
4678
    /* Address is already aligned; simply perform a TImode load.  */ ;
4679
  else if (GET_CODE (addr) == PLUS)
4680
    {
4681
      /* 8 cases:
4682
         aligned reg   + aligned reg     => lqx
4683
         aligned reg   + unaligned reg   => lqx, rotqby
4684
         aligned reg   + aligned const   => lqd
4685
         aligned reg   + unaligned const => lqd, rotqbyi
4686
         unaligned reg + aligned reg     => lqx, rotqby
4687
         unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4688
         unaligned reg + aligned const   => lqd, rotqby
4689
         unaligned reg + unaligned const -> not allowed by legitimate address
4690
       */
4691
      p0 = XEXP (addr, 0);
4692
      p1 = XEXP (addr, 1);
4693
      if (!reg_aligned_for_addr (p0))
4694
        {
4695
          if (REG_P (p1) && !reg_aligned_for_addr (p1))
4696
            {
4697
              rot = gen_reg_rtx (SImode);
4698
              emit_insn (gen_addsi3 (rot, p0, p1));
4699
            }
4700
          else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4701
            {
4702
              if (INTVAL (p1) > 0
4703
                  && REG_POINTER (p0)
4704
                  && INTVAL (p1) * BITS_PER_UNIT
4705
                     < REGNO_POINTER_ALIGN (REGNO (p0)))
4706
                {
4707
                  rot = gen_reg_rtx (SImode);
4708
                  emit_insn (gen_addsi3 (rot, p0, p1));
4709
                  addr = p0;
4710
                }
4711
              else
4712
                {
4713
                  rtx x = gen_reg_rtx (SImode);
4714
                  emit_move_insn (x, p1);
4715
                  if (!spu_arith_operand (p1, SImode))
4716
                    p1 = x;
4717
                  rot = gen_reg_rtx (SImode);
4718
                  emit_insn (gen_addsi3 (rot, p0, p1));
4719
                  addr = gen_rtx_PLUS (Pmode, p0, x);
4720
                }
4721
            }
4722
          else
4723
            rot = p0;
4724
        }
4725
      else
4726
        {
4727
          if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4728
            {
4729
              rot_amt = INTVAL (p1) & 15;
4730
              if (INTVAL (p1) & -16)
4731
                {
4732
                  p1 = GEN_INT (INTVAL (p1) & -16);
4733
                  addr = gen_rtx_PLUS (SImode, p0, p1);
4734
                }
4735
              else
4736
                addr = p0;
4737
            }
4738
          else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4739
            rot = p1;
4740
        }
4741
    }
4742
  else if (REG_P (addr))
4743
    {
4744
      if (!reg_aligned_for_addr (addr))
4745
        rot = addr;
4746
    }
4747
  else if (GET_CODE (addr) == CONST)
4748
    {
4749
      if (GET_CODE (XEXP (addr, 0)) == PLUS
4750
          && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4751
          && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4752
        {
4753
          rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4754
          if (rot_amt & -16)
4755
            addr = gen_rtx_CONST (Pmode,
4756
                                  gen_rtx_PLUS (Pmode,
4757
                                                XEXP (XEXP (addr, 0), 0),
4758
                                                GEN_INT (rot_amt & -16)));
4759
          else
4760
            addr = XEXP (XEXP (addr, 0), 0);
4761
        }
4762
      else
4763
        {
4764
          rot = gen_reg_rtx (Pmode);
4765
          emit_move_insn (rot, addr);
4766
        }
4767
    }
4768
  else if (GET_CODE (addr) == CONST_INT)
4769
    {
4770
      rot_amt = INTVAL (addr);
4771
      addr = GEN_INT (rot_amt & -16);
4772
    }
4773
  else if (!ALIGNED_SYMBOL_REF_P (addr))
4774
    {
4775
      rot = gen_reg_rtx (Pmode);
4776
      emit_move_insn (rot, addr);
4777
    }
4778
 
4779
  rot_amt += extra_rotby;
4780
 
4781
  rot_amt &= 15;
4782
 
4783
  if (rot && rot_amt)
4784
    {
4785
      rtx x = gen_reg_rtx (SImode);
4786
      emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4787
      rot = x;
4788
      rot_amt = 0;
4789
    }
4790
  if (!rot && rot_amt)
4791
    rot = GEN_INT (rot_amt);
4792
 
4793
  addr0 = copy_rtx (addr);
4794
  addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4795
  emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4796
 
4797
  if (dst1)
4798
    {
4799
      addr1 = plus_constant (copy_rtx (addr), 16);
4800
      addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4801
      emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4802
    }
4803
 
4804
  return rot;
4805
}
4806
 
4807
int
4808
spu_split_load (rtx * ops)
4809
{
4810
  enum machine_mode mode = GET_MODE (ops[0]);
4811
  rtx addr, load, rot;
4812
  int rot_amt;
4813
 
4814
  if (GET_MODE_SIZE (mode) >= 16)
4815
    return 0;
4816
 
4817
  addr = XEXP (ops[1], 0);
4818
  gcc_assert (GET_CODE (addr) != AND);
4819
 
4820
  if (!address_needs_split (ops[1]))
4821
    {
4822
      ops[1] = change_address (ops[1], TImode, addr);
4823
      load = gen_reg_rtx (TImode);
4824
      emit_insn (gen__movti (load, ops[1]));
4825
      spu_convert_move (ops[0], load);
4826
      return 1;
4827
    }
4828
 
4829
  rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4830
 
4831
  load = gen_reg_rtx (TImode);
4832
  rot = spu_expand_load (load, 0, ops[1], rot_amt);
4833
 
4834
  if (rot)
4835
    emit_insn (gen_rotqby_ti (load, load, rot));
4836
 
4837
  spu_convert_move (ops[0], load);
4838
  return 1;
4839
}
4840
 
4841
int
4842
spu_split_store (rtx * ops)
4843
{
4844
  enum machine_mode mode = GET_MODE (ops[0]);
4845
  rtx reg;
4846
  rtx addr, p0, p1, p1_lo, smem;
4847
  int aform;
4848
  int scalar;
4849
 
4850
  if (GET_MODE_SIZE (mode) >= 16)
4851
    return 0;
4852
 
4853
  addr = XEXP (ops[0], 0);
4854
  gcc_assert (GET_CODE (addr) != AND);
4855
 
4856
  if (!address_needs_split (ops[0]))
4857
    {
4858
      reg = gen_reg_rtx (TImode);
4859
      emit_insn (gen_spu_convert (reg, ops[1]));
4860
      ops[0] = change_address (ops[0], TImode, addr);
4861
      emit_move_insn (ops[0], reg);
4862
      return 1;
4863
    }
4864
 
4865
  if (GET_CODE (addr) == PLUS)
4866
    {
4867
      /* 8 cases:
4868
         aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
4869
         aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
4870
         aligned reg   + aligned const   => lqd, c?d, shuf, stqx
4871
         aligned reg   + unaligned const => lqd, c?d, shuf, stqx
4872
         unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
4873
         unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
4874
         unaligned reg + aligned const   => lqd, c?d, shuf, stqx
4875
         unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4876
       */
4877
      aform = 0;
4878
      p0 = XEXP (addr, 0);
4879
      p1 = p1_lo = XEXP (addr, 1);
4880
      if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4881
        {
4882
          p1_lo = GEN_INT (INTVAL (p1) & 15);
4883
          if (reg_aligned_for_addr (p0))
4884
            {
4885
              p1 = GEN_INT (INTVAL (p1) & -16);
4886
              if (p1 == const0_rtx)
4887
                addr = p0;
4888
              else
4889
                addr = gen_rtx_PLUS (SImode, p0, p1);
4890
            }
4891
          else
4892
            {
4893
              rtx x = gen_reg_rtx (SImode);
4894
              emit_move_insn (x, p1);
4895
              addr = gen_rtx_PLUS (SImode, p0, x);
4896
            }
4897
        }
4898
    }
4899
  else if (REG_P (addr))
4900
    {
4901
      aform = 0;
4902
      p0 = addr;
4903
      p1 = p1_lo = const0_rtx;
4904
    }
4905
  else
4906
    {
4907
      aform = 1;
4908
      p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4909
      p1 = 0;                    /* aform doesn't use p1 */
4910
      p1_lo = addr;
4911
      if (ALIGNED_SYMBOL_REF_P (addr))
4912
        p1_lo = const0_rtx;
4913
      else if (GET_CODE (addr) == CONST
4914
               && GET_CODE (XEXP (addr, 0)) == PLUS
4915
               && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4916
               && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4917
        {
4918
          HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4919
          if ((v & -16) != 0)
4920
            addr = gen_rtx_CONST (Pmode,
4921
                                  gen_rtx_PLUS (Pmode,
4922
                                                XEXP (XEXP (addr, 0), 0),
4923
                                                GEN_INT (v & -16)));
4924
          else
4925
            addr = XEXP (XEXP (addr, 0), 0);
4926
          p1_lo = GEN_INT (v & 15);
4927
        }
4928
      else if (GET_CODE (addr) == CONST_INT)
4929
        {
4930
          p1_lo = GEN_INT (INTVAL (addr) & 15);
4931
          addr = GEN_INT (INTVAL (addr) & -16);
4932
        }
4933
      else
4934
        {
4935
          p1_lo = gen_reg_rtx (SImode);
4936
          emit_move_insn (p1_lo, addr);
4937
        }
4938
    }
4939
 
4940
  reg = gen_reg_rtx (TImode);
4941
 
4942
  scalar = store_with_one_insn_p (ops[0]);
4943
  if (!scalar)
4944
    {
4945
      /* We could copy the flags from the ops[0] MEM to mem here,
4946
         We don't because we want this load to be optimized away if
4947
         possible, and copying the flags will prevent that in certain
4948
         cases, e.g. consider the volatile flag. */
4949
 
4950
      rtx pat = gen_reg_rtx (TImode);
4951
      rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4952
      set_mem_alias_set (lmem, 0);
4953
      emit_insn (gen_movti (reg, lmem));
4954
 
4955
      if (!p0 || reg_aligned_for_addr (p0))
4956
        p0 = stack_pointer_rtx;
4957
      if (!p1_lo)
4958
        p1_lo = const0_rtx;
4959
 
4960
      emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4961
      emit_insn (gen_shufb (reg, ops[1], reg, pat));
4962
    }
4963
  else
4964
    {
4965
      if (GET_CODE (ops[1]) == REG)
4966
        emit_insn (gen_spu_convert (reg, ops[1]));
4967
      else if (GET_CODE (ops[1]) == SUBREG)
4968
        emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4969
      else
4970
        abort ();
4971
    }
4972
 
4973
  if (GET_MODE_SIZE (mode) < 4 && scalar)
4974
    emit_insn (gen_ashlti3
4975
               (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4976
 
4977
  smem = change_address (ops[0], TImode, copy_rtx (addr));
4978
  /* We can't use the previous alias set because the memory has changed
4979
     size and can potentially overlap objects of other types.  */
4980
  set_mem_alias_set (smem, 0);
4981
 
4982
  emit_insn (gen_movti (smem, reg));
4983
  return 1;
4984
}
4985
 
4986
/* Return TRUE if X is MEM which is a struct member reference
4987
   and the member can safely be loaded and stored with a single
4988
   instruction because it is padded. */
4989
static int
4990
mem_is_padded_component_ref (rtx x)
4991
{
4992
  tree t = MEM_EXPR (x);
4993
  tree r;
4994
  if (!t || TREE_CODE (t) != COMPONENT_REF)
4995
    return 0;
4996
  t = TREE_OPERAND (t, 1);
4997
  if (!t || TREE_CODE (t) != FIELD_DECL
4998
      || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4999
    return 0;
5000
  /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5001
  r = DECL_FIELD_CONTEXT (t);
5002
  if (!r || TREE_CODE (r) != RECORD_TYPE)
5003
    return 0;
5004
  /* Make sure they are the same mode */
5005
  if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5006
    return 0;
5007
  /* If there are no following fields then the field alignment assures
5008
     the structure is padded to the alignment which means this field is
5009
     padded too.  */
5010
  if (TREE_CHAIN (t) == 0)
5011
    return 1;
5012
  /* If the following field is also aligned then this field will be
5013
     padded. */
5014
  t = TREE_CHAIN (t);
5015
  if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5016
    return 1;
5017
  return 0;
5018
}
5019
 
5020
/* Parse the -mfixed-range= option string.  */
5021
static void
5022
fix_range (const char *const_str)
5023
{
5024
  int i, first, last;
5025
  char *str, *dash, *comma;
5026
 
5027
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5028
     REG2 are either register names or register numbers.  The effect
5029
     of this option is to mark the registers in the range from REG1 to
5030
     REG2 as ``fixed'' so they won't be used by the compiler.  */
5031
 
5032
  i = strlen (const_str);
5033
  str = (char *) alloca (i + 1);
5034
  memcpy (str, const_str, i + 1);
5035
 
5036
  while (1)
5037
    {
5038
      dash = strchr (str, '-');
5039
      if (!dash)
5040
        {
5041
          warning (0, "value of -mfixed-range must have form REG1-REG2");
5042
          return;
5043
        }
5044
      *dash = '\0';
5045
      comma = strchr (dash + 1, ',');
5046
      if (comma)
5047
        *comma = '\0';
5048
 
5049
      first = decode_reg_name (str);
5050
      if (first < 0)
5051
        {
5052
          warning (0, "unknown register name: %s", str);
5053
          return;
5054
        }
5055
 
5056
      last = decode_reg_name (dash + 1);
5057
      if (last < 0)
5058
        {
5059
          warning (0, "unknown register name: %s", dash + 1);
5060
          return;
5061
        }
5062
 
5063
      *dash = '-';
5064
 
5065
      if (first > last)
5066
        {
5067
          warning (0, "%s-%s is an empty range", str, dash + 1);
5068
          return;
5069
        }
5070
 
5071
      for (i = first; i <= last; ++i)
5072
        fixed_regs[i] = call_used_regs[i] = 1;
5073
 
5074
      if (!comma)
5075
        break;
5076
 
5077
      *comma = ',';
5078
      str = comma + 1;
5079
    }
5080
}
5081
 
5082
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5083
   can be generated using the fsmbi instruction. */
5084
int
5085
fsmbi_const_p (rtx x)
5086
{
5087
  if (CONSTANT_P (x))
5088
    {
5089
      /* We can always choose TImode for CONST_INT because the high bits
5090
         of an SImode will always be all 1s, i.e., valid for fsmbi. */
5091
      enum immediate_class c = classify_immediate (x, TImode);
5092
      return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5093
    }
5094
  return 0;
5095
}
5096
 
5097
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5098
   can be generated using the cbd, chd, cwd or cdd instruction. */
5099
int
5100
cpat_const_p (rtx x, enum machine_mode mode)
5101
{
5102
  if (CONSTANT_P (x))
5103
    {
5104
      enum immediate_class c = classify_immediate (x, mode);
5105
      return c == IC_CPAT;
5106
    }
5107
  return 0;
5108
}
5109
 
5110
rtx
5111
gen_cpat_const (rtx * ops)
5112
{
5113
  unsigned char dst[16];
5114
  int i, offset, shift, isize;
5115
  if (GET_CODE (ops[3]) != CONST_INT
5116
      || GET_CODE (ops[2]) != CONST_INT
5117
      || (GET_CODE (ops[1]) != CONST_INT
5118
          && GET_CODE (ops[1]) != REG))
5119
    return 0;
5120
  if (GET_CODE (ops[1]) == REG
5121
      && (!REG_POINTER (ops[1])
5122
          || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5123
    return 0;
5124
 
5125
  for (i = 0; i < 16; i++)
5126
    dst[i] = i + 16;
5127
  isize = INTVAL (ops[3]);
5128
  if (isize == 1)
5129
    shift = 3;
5130
  else if (isize == 2)
5131
    shift = 2;
5132
  else
5133
    shift = 0;
5134
  offset = (INTVAL (ops[2]) +
5135
            (GET_CODE (ops[1]) ==
5136
             CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5137
  for (i = 0; i < isize; i++)
5138
    dst[offset + i] = i + shift;
5139
  return array_to_constant (TImode, dst);
5140
}
5141
 
5142
/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5143
   array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
5144
   than 16 bytes, the value is repeated across the rest of the array. */
5145
void
5146
constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5147
{
5148
  HOST_WIDE_INT val;
5149
  int i, j, first;
5150
 
5151
  memset (arr, 0, 16);
5152
  mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5153
  if (GET_CODE (x) == CONST_INT
5154
      || (GET_CODE (x) == CONST_DOUBLE
5155
          && (mode == SFmode || mode == DFmode)))
5156
    {
5157
      gcc_assert (mode != VOIDmode && mode != BLKmode);
5158
 
5159
      if (GET_CODE (x) == CONST_DOUBLE)
5160
        val = const_double_to_hwint (x);
5161
      else
5162
        val = INTVAL (x);
5163
      first = GET_MODE_SIZE (mode) - 1;
5164
      for (i = first; i >= 0; i--)
5165
        {
5166
          arr[i] = val & 0xff;
5167
          val >>= 8;
5168
        }
5169
      /* Splat the constant across the whole array. */
5170
      for (j = 0, i = first + 1; i < 16; i++)
5171
        {
5172
          arr[i] = arr[j];
5173
          j = (j == first) ? 0 : j + 1;
5174
        }
5175
    }
5176
  else if (GET_CODE (x) == CONST_DOUBLE)
5177
    {
5178
      val = CONST_DOUBLE_LOW (x);
5179
      for (i = 15; i >= 8; i--)
5180
        {
5181
          arr[i] = val & 0xff;
5182
          val >>= 8;
5183
        }
5184
      val = CONST_DOUBLE_HIGH (x);
5185
      for (i = 7; i >= 0; i--)
5186
        {
5187
          arr[i] = val & 0xff;
5188
          val >>= 8;
5189
        }
5190
    }
5191
  else if (GET_CODE (x) == CONST_VECTOR)
5192
    {
5193
      int units;
5194
      rtx elt;
5195
      mode = GET_MODE_INNER (mode);
5196
      units = CONST_VECTOR_NUNITS (x);
5197
      for (i = 0; i < units; i++)
5198
        {
5199
          elt = CONST_VECTOR_ELT (x, i);
5200
          if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5201
            {
5202
              if (GET_CODE (elt) == CONST_DOUBLE)
5203
                val = const_double_to_hwint (elt);
5204
              else
5205
                val = INTVAL (elt);
5206
              first = GET_MODE_SIZE (mode) - 1;
5207
              if (first + i * GET_MODE_SIZE (mode) > 16)
5208
                abort ();
5209
              for (j = first; j >= 0; j--)
5210
                {
5211
                  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5212
                  val >>= 8;
5213
                }
5214
            }
5215
        }
5216
    }
5217
  else
5218
    gcc_unreachable();
5219
}
5220
 
5221
/* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5222
   smaller than 16 bytes, use the bytes that would represent that value
5223
   in a register, e.g., for QImode return the value of arr[3].  */
5224
rtx
5225
array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5226
{
5227
  enum machine_mode inner_mode;
5228
  rtvec v;
5229
  int units, size, i, j, k;
5230
  HOST_WIDE_INT val;
5231
 
5232
  if (GET_MODE_CLASS (mode) == MODE_INT
5233
      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5234
    {
5235
      j = GET_MODE_SIZE (mode);
5236
      i = j < 4 ? 4 - j : 0;
5237
      for (val = 0; i < j; i++)
5238
        val = (val << 8) | arr[i];
5239
      val = trunc_int_for_mode (val, mode);
5240
      return GEN_INT (val);
5241
    }
5242
 
5243
  if (mode == TImode)
5244
    {
5245
      HOST_WIDE_INT high;
5246
      for (i = high = 0; i < 8; i++)
5247
        high = (high << 8) | arr[i];
5248
      for (i = 8, val = 0; i < 16; i++)
5249
        val = (val << 8) | arr[i];
5250
      return immed_double_const (val, high, TImode);
5251
    }
5252
  if (mode == SFmode)
5253
    {
5254
      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5255
      val = trunc_int_for_mode (val, SImode);
5256
      return hwint_to_const_double (SFmode, val);
5257
    }
5258
  if (mode == DFmode)
5259
    {
5260
      for (i = 0, val = 0; i < 8; i++)
5261
        val = (val << 8) | arr[i];
5262
      return hwint_to_const_double (DFmode, val);
5263
    }
5264
 
5265
  if (!VECTOR_MODE_P (mode))
5266
    abort ();
5267
 
5268
  units = GET_MODE_NUNITS (mode);
5269
  size = GET_MODE_UNIT_SIZE (mode);
5270
  inner_mode = GET_MODE_INNER (mode);
5271
  v = rtvec_alloc (units);
5272
 
5273
  for (k = i = 0; i < units; ++i)
5274
    {
5275
      val = 0;
5276
      for (j = 0; j < size; j++, k++)
5277
        val = (val << 8) | arr[k];
5278
 
5279
      if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5280
        RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5281
      else
5282
        RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5283
    }
5284
  if (k > 16)
5285
    abort ();
5286
 
5287
  return gen_rtx_CONST_VECTOR (mode, v);
5288
}
5289
 
5290
static void
5291
reloc_diagnostic (rtx x)
5292
{
5293
  tree decl = 0;
5294
  if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5295
    return;
5296
 
5297
  if (GET_CODE (x) == SYMBOL_REF)
5298
    decl = SYMBOL_REF_DECL (x);
5299
  else if (GET_CODE (x) == CONST
5300
           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5301
    decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5302
 
5303
  /* SYMBOL_REF_DECL is not necessarily a DECL. */
5304
  if (decl && !DECL_P (decl))
5305
    decl = 0;
5306
 
5307
  /* The decl could be a string constant.  */
5308
  if (decl && DECL_P (decl))
5309
    {
5310
      location_t loc;
5311
      /* We use last_assemble_variable_decl to get line information.  It's
5312
         not always going to be right and might not even be close, but will
5313
         be right for the more common cases. */
5314
      if (!last_assemble_variable_decl || in_section == ctors_section)
5315
        loc = DECL_SOURCE_LOCATION (decl);
5316
      else
5317
        loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5318
 
5319
      if (TARGET_WARN_RELOC)
5320
        warning_at (loc, 0,
5321
                    "creating run-time relocation for %qD", decl);
5322
      else
5323
        error_at (loc,
5324
                  "creating run-time relocation for %qD", decl);
5325
    }
5326
  else
5327
    {
5328
      if (TARGET_WARN_RELOC)
5329
        warning_at (input_location, 0, "creating run-time relocation");
5330
      else
5331
        error_at (input_location, "creating run-time relocation");
5332
    }
5333
}
5334
 
5335
/* Hook into assemble_integer so we can generate an error for run-time
5336
   relocations.  The SPU ABI disallows them. */
5337
static bool
5338
spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5339
{
5340
  /* By default run-time relocations aren't supported, but we allow them
5341
     in case users support it in their own run-time loader.  And we provide
5342
     a warning for those users that don't.  */
5343
  if ((GET_CODE (x) == SYMBOL_REF)
5344
      || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5345
    reloc_diagnostic (x);
5346
 
5347
  return default_assemble_integer (x, size, aligned_p);
5348
}
5349
 
5350
static void
5351
spu_asm_globalize_label (FILE * file, const char *name)
5352
{
5353
  fputs ("\t.global\t", file);
5354
  assemble_name (file, name);
5355
  fputs ("\n", file);
5356
}
5357
 
5358
static bool
5359
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5360
               bool speed ATTRIBUTE_UNUSED)
5361
{
5362
  enum machine_mode mode = GET_MODE (x);
5363
  int cost = COSTS_N_INSNS (2);
5364
 
5365
  /* Folding to a CONST_VECTOR will use extra space but there might
5366
     be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5367
     only if it allows us to fold away multiple insns.  Changing the cost
5368
     of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5369
     because this cost will only be compared against a single insn.
5370
     if (code == CONST_VECTOR)
5371
       return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5372
   */
5373
 
5374
  /* Use defaults for float operations.  Not accurate but good enough. */
5375
  if (mode == DFmode)
5376
    {
5377
      *total = COSTS_N_INSNS (13);
5378
      return true;
5379
    }
5380
  if (mode == SFmode)
5381
    {
5382
      *total = COSTS_N_INSNS (6);
5383
      return true;
5384
    }
5385
  switch (code)
5386
    {
5387
    case CONST_INT:
5388
      if (satisfies_constraint_K (x))
5389
        *total = 0;
5390
      else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5391
        *total = COSTS_N_INSNS (1);
5392
      else
5393
        *total = COSTS_N_INSNS (3);
5394
      return true;
5395
 
5396
    case CONST:
5397
      *total = COSTS_N_INSNS (3);
5398
      return true;
5399
 
5400
    case LABEL_REF:
5401
    case SYMBOL_REF:
5402
      *total = COSTS_N_INSNS (0);
5403
      return true;
5404
 
5405
    case CONST_DOUBLE:
5406
      *total = COSTS_N_INSNS (5);
5407
      return true;
5408
 
5409
    case FLOAT_EXTEND:
5410
    case FLOAT_TRUNCATE:
5411
    case FLOAT:
5412
    case UNSIGNED_FLOAT:
5413
    case FIX:
5414
    case UNSIGNED_FIX:
5415
      *total = COSTS_N_INSNS (7);
5416
      return true;
5417
 
5418
    case PLUS:
5419
      if (mode == TImode)
5420
        {
5421
          *total = COSTS_N_INSNS (9);
5422
          return true;
5423
        }
5424
      break;
5425
 
5426
    case MULT:
5427
      cost =
5428
        GET_CODE (XEXP (x, 0)) ==
5429
        REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5430
      if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5431
        {
5432
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5433
            {
5434
              HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5435
              cost = COSTS_N_INSNS (14);
5436
              if ((val & 0xffff) == 0)
5437
                cost = COSTS_N_INSNS (9);
5438
              else if (val > 0 && val < 0x10000)
5439
                cost = COSTS_N_INSNS (11);
5440
            }
5441
        }
5442
      *total = cost;
5443
      return true;
5444
    case DIV:
5445
    case UDIV:
5446
    case MOD:
5447
    case UMOD:
5448
      *total = COSTS_N_INSNS (20);
5449
      return true;
5450
    case ROTATE:
5451
    case ROTATERT:
5452
    case ASHIFT:
5453
    case ASHIFTRT:
5454
    case LSHIFTRT:
5455
      *total = COSTS_N_INSNS (4);
5456
      return true;
5457
    case UNSPEC:
5458
      if (XINT (x, 1) == UNSPEC_CONVERT)
5459
        *total = COSTS_N_INSNS (0);
5460
      else
5461
        *total = COSTS_N_INSNS (4);
5462
      return true;
5463
    }
5464
  /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5465
  if (GET_MODE_CLASS (mode) == MODE_INT
5466
      && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5467
    cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5468
      * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5469
  *total = cost;
5470
  return true;
5471
}
5472
 
5473
static enum machine_mode
5474
spu_unwind_word_mode (void)
5475
{
5476
  return SImode;
5477
}
5478
 
5479
/* Decide whether we can make a sibling call to a function.  DECL is the
5480
   declaration of the function being targeted by the call and EXP is the
5481
   CALL_EXPR representing the call.  */
5482
static bool
5483
spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5484
{
5485
  return decl && !TARGET_LARGE_MEM;
5486
}
5487
 
5488
/* We need to correctly update the back chain pointer and the Available
5489
   Stack Size (which is in the second slot of the sp register.) */
5490
void
5491
spu_allocate_stack (rtx op0, rtx op1)
5492
{
5493
  HOST_WIDE_INT v;
5494
  rtx chain = gen_reg_rtx (V4SImode);
5495
  rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5496
  rtx sp = gen_reg_rtx (V4SImode);
5497
  rtx splatted = gen_reg_rtx (V4SImode);
5498
  rtx pat = gen_reg_rtx (TImode);
5499
 
5500
  /* copy the back chain so we can save it back again. */
5501
  emit_move_insn (chain, stack_bot);
5502
 
5503
  op1 = force_reg (SImode, op1);
5504
 
5505
  v = 0x1020300010203ll;
5506
  emit_move_insn (pat, immed_double_const (v, v, TImode));
5507
  emit_insn (gen_shufb (splatted, op1, op1, pat));
5508
 
5509
  emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5510
  emit_insn (gen_subv4si3 (sp, sp, splatted));
5511
 
5512
  if (flag_stack_check)
5513
    {
5514
      rtx avail = gen_reg_rtx(SImode);
5515
      rtx result = gen_reg_rtx(SImode);
5516
      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5517
      emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5518
      emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5519
    }
5520
 
5521
  emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5522
 
5523
  emit_move_insn (stack_bot, chain);
5524
 
5525
  emit_move_insn (op0, virtual_stack_dynamic_rtx);
5526
}
5527
 
5528
void
5529
spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5530
{
5531
  static unsigned char arr[16] =
5532
    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5533
  rtx temp = gen_reg_rtx (SImode);
5534
  rtx temp2 = gen_reg_rtx (SImode);
5535
  rtx temp3 = gen_reg_rtx (V4SImode);
5536
  rtx temp4 = gen_reg_rtx (V4SImode);
5537
  rtx pat = gen_reg_rtx (TImode);
5538
  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5539
 
5540
  /* Restore the backchain from the first word, sp from the second.  */
5541
  emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5542
  emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5543
 
5544
  emit_move_insn (pat, array_to_constant (TImode, arr));
5545
 
5546
  /* Compute Available Stack Size for sp */
5547
  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5548
  emit_insn (gen_shufb (temp3, temp, temp, pat));
5549
 
5550
  /* Compute Available Stack Size for back chain */
5551
  emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5552
  emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5553
  emit_insn (gen_addv4si3 (temp4, sp, temp4));
5554
 
5555
  emit_insn (gen_addv4si3 (sp, sp, temp3));
5556
  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5557
}
5558
 
5559
static void
5560
spu_init_libfuncs (void)
5561
{
5562
  set_optab_libfunc (smul_optab, DImode, "__muldi3");
5563
  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5564
  set_optab_libfunc (smod_optab, DImode, "__moddi3");
5565
  set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5566
  set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5567
  set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5568
  set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5569
  set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5570
  set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5571
  set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5572
  set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5573
 
5574
  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5575
  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5576
 
5577
  set_optab_libfunc (smul_optab, TImode, "__multi3");
5578
  set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5579
  set_optab_libfunc (smod_optab, TImode, "__modti3");
5580
  set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5581
  set_optab_libfunc (umod_optab, TImode, "__umodti3");
5582
  set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5583
}
5584
 
5585
/* Make a subreg, stripping any existing subreg.  We could possibly just
5586
   call simplify_subreg, but in this case we know what we want. */
5587
rtx
5588
spu_gen_subreg (enum machine_mode mode, rtx x)
5589
{
5590
  if (GET_CODE (x) == SUBREG)
5591
    x = SUBREG_REG (x);
5592
  if (GET_MODE (x) == mode)
5593
    return x;
5594
  return gen_rtx_SUBREG (mode, x, 0);
5595
}
5596
 
5597
static bool
5598
spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5599
{
5600
  return (TYPE_MODE (type) == BLKmode
5601
          && ((type) == 0
5602
              || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5603
              || int_size_in_bytes (type) >
5604
              (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5605
}
5606
 
5607
/* Create the built-in types and functions */
5608
 
5609
enum spu_function_code
5610
{
5611
#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5612
#include "spu-builtins.def"
5613
#undef DEF_BUILTIN
5614
   NUM_SPU_BUILTINS
5615
};
5616
 
5617
extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5618
 
5619
struct spu_builtin_description spu_builtins[] = {
5620
#define DEF_BUILTIN(fcode, icode, name, type, params) \
5621
  {fcode, icode, name, type, params, NULL_TREE},
5622
#include "spu-builtins.def"
5623
#undef DEF_BUILTIN
5624
};
5625
 
5626
/* Returns the rs6000 builtin decl for CODE.  */
5627
 
5628
static tree
5629
spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5630
{
5631
  if (code >= NUM_SPU_BUILTINS)
5632
    return error_mark_node;
5633
 
5634
  return spu_builtins[code].fndecl;
5635
}
5636
 
5637
 
5638
static void
5639
spu_init_builtins (void)
5640
{
5641
  struct spu_builtin_description *d;
5642
  unsigned int i;
5643
 
5644
  V16QI_type_node = build_vector_type (intQI_type_node, 16);
5645
  V8HI_type_node = build_vector_type (intHI_type_node, 8);
5646
  V4SI_type_node = build_vector_type (intSI_type_node, 4);
5647
  V2DI_type_node = build_vector_type (intDI_type_node, 2);
5648
  V4SF_type_node = build_vector_type (float_type_node, 4);
5649
  V2DF_type_node = build_vector_type (double_type_node, 2);
5650
 
5651
  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5652
  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5653
  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5654
  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5655
 
5656
  spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5657
 
5658
  spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5659
  spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5660
  spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5661
  spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5662
  spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5663
  spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5664
  spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5665
  spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5666
  spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5667
  spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5668
  spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5669
  spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5670
 
5671
  spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5672
  spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5673
  spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5674
  spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5675
  spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5676
  spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5677
  spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5678
  spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5679
 
5680
  spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5681
  spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5682
 
5683
  spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5684
 
5685
  spu_builtin_types[SPU_BTI_PTR] =
5686
    build_pointer_type (build_qualified_type
5687
                        (void_type_node,
5688
                         TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5689
 
5690
  /* For each builtin we build a new prototype.  The tree code will make
5691
     sure nodes are shared. */
5692
  for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5693
    {
5694
      tree p;
5695
      char name[64];            /* build_function will make a copy. */
5696
      int parm;
5697
 
5698
      if (d->name == 0)
5699
        continue;
5700
 
5701
      /* Find last parm.  */
5702
      for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5703
        ;
5704
 
5705
      p = void_list_node;
5706
      while (parm > 1)
5707
        p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5708
 
5709
      p = build_function_type (spu_builtin_types[d->parm[0]], p);
5710
 
5711
      sprintf (name, "__builtin_%s", d->name);
5712
      d->fndecl =
5713
        add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5714
                              NULL, NULL_TREE);
5715
      if (d->fcode == SPU_MASK_FOR_LOAD)
5716
        TREE_READONLY (d->fndecl) = 1;
5717
 
5718
      /* These builtins don't throw.  */
5719
      TREE_NOTHROW (d->fndecl) = 1;
5720
    }
5721
}
5722
 
5723
void
5724
spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5725
{
5726
  static unsigned char arr[16] =
5727
    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5728
 
5729
  rtx temp = gen_reg_rtx (Pmode);
5730
  rtx temp2 = gen_reg_rtx (V4SImode);
5731
  rtx temp3 = gen_reg_rtx (V4SImode);
5732
  rtx pat = gen_reg_rtx (TImode);
5733
  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5734
 
5735
  emit_move_insn (pat, array_to_constant (TImode, arr));
5736
 
5737
  /* Restore the sp.  */
5738
  emit_move_insn (temp, op1);
5739
  emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5740
 
5741
  /* Compute available stack size for sp.  */
5742
  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5743
  emit_insn (gen_shufb (temp3, temp, temp, pat));
5744
 
5745
  emit_insn (gen_addv4si3 (sp, sp, temp3));
5746
  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5747
}
5748
 
5749
int
5750
spu_safe_dma (HOST_WIDE_INT channel)
5751
{
5752
  return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5753
}
5754
 
5755
void
5756
spu_builtin_splats (rtx ops[])
5757
{
5758
  enum machine_mode mode = GET_MODE (ops[0]);
5759
  if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5760
    {
5761
      unsigned char arr[16];
5762
      constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5763
      emit_move_insn (ops[0], array_to_constant (mode, arr));
5764
    }
5765
  else
5766
    {
5767
      rtx reg = gen_reg_rtx (TImode);
5768
      rtx shuf;
5769
      if (GET_CODE (ops[1]) != REG
5770
          && GET_CODE (ops[1]) != SUBREG)
5771
        ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5772
      switch (mode)
5773
        {
5774
        case V2DImode:
5775
        case V2DFmode:
5776
          shuf =
5777
            immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5778
                                TImode);
5779
          break;
5780
        case V4SImode:
5781
        case V4SFmode:
5782
          shuf =
5783
            immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5784
                                TImode);
5785
          break;
5786
        case V8HImode:
5787
          shuf =
5788
            immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5789
                                TImode);
5790
          break;
5791
        case V16QImode:
5792
          shuf =
5793
            immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5794
                                TImode);
5795
          break;
5796
        default:
5797
          abort ();
5798
        }
5799
      emit_move_insn (reg, shuf);
5800
      emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5801
    }
5802
}
5803
 
5804
void
5805
spu_builtin_extract (rtx ops[])
5806
{
5807
  enum machine_mode mode;
5808
  rtx rot, from, tmp;
5809
 
5810
  mode = GET_MODE (ops[1]);
5811
 
5812
  if (GET_CODE (ops[2]) == CONST_INT)
5813
    {
5814
      switch (mode)
5815
        {
5816
        case V16QImode:
5817
          emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5818
          break;
5819
        case V8HImode:
5820
          emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5821
          break;
5822
        case V4SFmode:
5823
          emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5824
          break;
5825
        case V4SImode:
5826
          emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5827
          break;
5828
        case V2DImode:
5829
          emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5830
          break;
5831
        case V2DFmode:
5832
          emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5833
          break;
5834
        default:
5835
          abort ();
5836
        }
5837
      return;
5838
    }
5839
 
5840
  from = spu_gen_subreg (TImode, ops[1]);
5841
  rot = gen_reg_rtx (TImode);
5842
  tmp = gen_reg_rtx (SImode);
5843
 
5844
  switch (mode)
5845
    {
5846
    case V16QImode:
5847
      emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5848
      break;
5849
    case V8HImode:
5850
      emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5851
      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5852
      break;
5853
    case V4SFmode:
5854
    case V4SImode:
5855
      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5856
      break;
5857
    case V2DImode:
5858
    case V2DFmode:
5859
      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5860
      break;
5861
    default:
5862
      abort ();
5863
    }
5864
  emit_insn (gen_rotqby_ti (rot, from, tmp));
5865
 
5866
  emit_insn (gen_spu_convert (ops[0], rot));
5867
}
5868
 
5869
void
5870
spu_builtin_insert (rtx ops[])
5871
{
5872
  enum machine_mode mode = GET_MODE (ops[0]);
5873
  enum machine_mode imode = GET_MODE_INNER (mode);
5874
  rtx mask = gen_reg_rtx (TImode);
5875
  rtx offset;
5876
 
5877
  if (GET_CODE (ops[3]) == CONST_INT)
5878
    offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5879
  else
5880
    {
5881
      offset = gen_reg_rtx (SImode);
5882
      emit_insn (gen_mulsi3
5883
                 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5884
    }
5885
  emit_insn (gen_cpat
5886
             (mask, stack_pointer_rtx, offset,
5887
              GEN_INT (GET_MODE_SIZE (imode))));
5888
  emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5889
}
5890
 
5891
void
5892
spu_builtin_promote (rtx ops[])
5893
{
5894
  enum machine_mode mode, imode;
5895
  rtx rot, from, offset;
5896
  HOST_WIDE_INT pos;
5897
 
5898
  mode = GET_MODE (ops[0]);
5899
  imode = GET_MODE_INNER (mode);
5900
 
5901
  from = gen_reg_rtx (TImode);
5902
  rot = spu_gen_subreg (TImode, ops[0]);
5903
 
5904
  emit_insn (gen_spu_convert (from, ops[1]));
5905
 
5906
  if (GET_CODE (ops[2]) == CONST_INT)
5907
    {
5908
      pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5909
      if (GET_MODE_SIZE (imode) < 4)
5910
        pos += 4 - GET_MODE_SIZE (imode);
5911
      offset = GEN_INT (pos & 15);
5912
    }
5913
  else
5914
    {
5915
      offset = gen_reg_rtx (SImode);
5916
      switch (mode)
5917
        {
5918
        case V16QImode:
5919
          emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5920
          break;
5921
        case V8HImode:
5922
          emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5923
          emit_insn (gen_addsi3 (offset, offset, offset));
5924
          break;
5925
        case V4SFmode:
5926
        case V4SImode:
5927
          emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5928
          emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5929
          break;
5930
        case V2DImode:
5931
        case V2DFmode:
5932
          emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5933
          break;
5934
        default:
5935
          abort ();
5936
        }
5937
    }
5938
  emit_insn (gen_rotqby_ti (rot, from, offset));
5939
}
5940
 
5941
static void
5942
spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5943
{
5944
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5945
  rtx shuf = gen_reg_rtx (V4SImode);
5946
  rtx insn = gen_reg_rtx (V4SImode);
5947
  rtx shufc;
5948
  rtx insnc;
5949
  rtx mem;
5950
 
5951
  fnaddr = force_reg (SImode, fnaddr);
5952
  cxt = force_reg (SImode, cxt);
5953
 
5954
  if (TARGET_LARGE_MEM)
5955
    {
5956
      rtx rotl = gen_reg_rtx (V4SImode);
5957
      rtx mask = gen_reg_rtx (V4SImode);
5958
      rtx bi = gen_reg_rtx (SImode);
5959
      static unsigned char const shufa[16] = {
5960
        2, 3, 0, 1, 18, 19, 16, 17,
5961
        0, 1, 2, 3, 16, 17, 18, 19
5962
      };
5963
      static unsigned char const insna[16] = {
5964
        0x41, 0, 0, 79,
5965
        0x41, 0, 0, STATIC_CHAIN_REGNUM,
5966
        0x60, 0x80, 0, 79,
5967
        0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5968
      };
5969
 
5970
      shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5971
      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5972
 
5973
      emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5974
      emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5975
      emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5976
      emit_insn (gen_selb (insn, insnc, rotl, mask));
5977
 
5978
      mem = adjust_address (m_tramp, V4SImode, 0);
5979
      emit_move_insn (mem, insn);
5980
 
5981
      emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5982
      mem = adjust_address (m_tramp, Pmode, 16);
5983
      emit_move_insn (mem, bi);
5984
    }
5985
  else
5986
    {
5987
      rtx scxt = gen_reg_rtx (SImode);
5988
      rtx sfnaddr = gen_reg_rtx (SImode);
5989
      static unsigned char const insna[16] = {
5990
        0x42, 0, 0, STATIC_CHAIN_REGNUM,
5991
        0x30, 0, 0, 0,
5992
        0, 0, 0, 0,
5993
        0, 0, 0, 0
5994
      };
5995
 
5996
      shufc = gen_reg_rtx (TImode);
5997
      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5998
 
5999
      /* By or'ing all of cxt with the ila opcode we are assuming cxt
6000
         fits 18 bits and the last 4 are zeros.  This will be true if
6001
         the stack pointer is initialized to 0x3fff0 at program start,
6002
         otherwise the ila instruction will be garbage. */
6003
 
6004
      emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6005
      emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6006
      emit_insn (gen_cpat
6007
                 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6008
      emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6009
      emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6010
 
6011
      mem = adjust_address (m_tramp, V4SImode, 0);
6012
      emit_move_insn (mem, insn);
6013
    }
6014
  emit_insn (gen_sync ());
6015
}
6016
 
6017
void
6018
spu_expand_sign_extend (rtx ops[])
6019
{
6020
  unsigned char arr[16];
6021
  rtx pat = gen_reg_rtx (TImode);
6022
  rtx sign, c;
6023
  int i, last;
6024
  last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6025
  if (GET_MODE (ops[1]) == QImode)
6026
    {
6027
      sign = gen_reg_rtx (HImode);
6028
      emit_insn (gen_extendqihi2 (sign, ops[1]));
6029
      for (i = 0; i < 16; i++)
6030
        arr[i] = 0x12;
6031
      arr[last] = 0x13;
6032
    }
6033
  else
6034
    {
6035
      for (i = 0; i < 16; i++)
6036
        arr[i] = 0x10;
6037
      switch (GET_MODE (ops[1]))
6038
        {
6039
        case HImode:
6040
          sign = gen_reg_rtx (SImode);
6041
          emit_insn (gen_extendhisi2 (sign, ops[1]));
6042
          arr[last] = 0x03;
6043
          arr[last - 1] = 0x02;
6044
          break;
6045
        case SImode:
6046
          sign = gen_reg_rtx (SImode);
6047
          emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6048
          for (i = 0; i < 4; i++)
6049
            arr[last - i] = 3 - i;
6050
          break;
6051
        case DImode:
6052
          sign = gen_reg_rtx (SImode);
6053
          c = gen_reg_rtx (SImode);
6054
          emit_insn (gen_spu_convert (c, ops[1]));
6055
          emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6056
          for (i = 0; i < 8; i++)
6057
            arr[last - i] = 7 - i;
6058
          break;
6059
        default:
6060
          abort ();
6061
        }
6062
    }
6063
  emit_move_insn (pat, array_to_constant (TImode, arr));
6064
  emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6065
}
6066
 
6067
/* expand vector initialization. If there are any constant parts,
6068
   load constant parts first. Then load any non-constant parts.  */
6069
void
6070
spu_expand_vector_init (rtx target, rtx vals)
6071
{
6072
  enum machine_mode mode = GET_MODE (target);
6073
  int n_elts = GET_MODE_NUNITS (mode);
6074
  int n_var = 0;
6075
  bool all_same = true;
6076
  rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6077
  int i;
6078
 
6079
  first = XVECEXP (vals, 0, 0);
6080
  for (i = 0; i < n_elts; ++i)
6081
    {
6082
      x = XVECEXP (vals, 0, i);
6083
      if (!(CONST_INT_P (x)
6084
            || GET_CODE (x) == CONST_DOUBLE
6085
            || GET_CODE (x) == CONST_FIXED))
6086
        ++n_var;
6087
      else
6088
        {
6089
          if (first_constant == NULL_RTX)
6090
            first_constant = x;
6091
        }
6092
      if (i > 0 && !rtx_equal_p (x, first))
6093
        all_same = false;
6094
    }
6095
 
6096
  /* if all elements are the same, use splats to repeat elements */
6097
  if (all_same)
6098
    {
6099
      if (!CONSTANT_P (first)
6100
          && !register_operand (first, GET_MODE (x)))
6101
        first = force_reg (GET_MODE (first), first);
6102
      emit_insn (gen_spu_splats (target, first));
6103
      return;
6104
    }
6105
 
6106
  /* load constant parts */
6107
  if (n_var != n_elts)
6108
    {
6109
      if (n_var == 0)
6110
        {
6111
          emit_move_insn (target,
6112
                          gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6113
        }
6114
      else
6115
        {
6116
          rtx constant_parts_rtx = copy_rtx (vals);
6117
 
6118
          gcc_assert (first_constant != NULL_RTX);
6119
          /* fill empty slots with the first constant, this increases
6120
             our chance of using splats in the recursive call below. */
6121
          for (i = 0; i < n_elts; ++i)
6122
            {
6123
              x = XVECEXP (constant_parts_rtx, 0, i);
6124
              if (!(CONST_INT_P (x)
6125
                    || GET_CODE (x) == CONST_DOUBLE
6126
                    || GET_CODE (x) == CONST_FIXED))
6127
                XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6128
            }
6129
 
6130
          spu_expand_vector_init (target, constant_parts_rtx);
6131
        }
6132
    }
6133
 
6134
  /* load variable parts */
6135
  if (n_var != 0)
6136
    {
6137
      rtx insert_operands[4];
6138
 
6139
      insert_operands[0] = target;
6140
      insert_operands[2] = target;
6141
      for (i = 0; i < n_elts; ++i)
6142
        {
6143
          x = XVECEXP (vals, 0, i);
6144
          if (!(CONST_INT_P (x)
6145
                || GET_CODE (x) == CONST_DOUBLE
6146
                || GET_CODE (x) == CONST_FIXED))
6147
            {
6148
              if (!register_operand (x, GET_MODE (x)))
6149
                x = force_reg (GET_MODE (x), x);
6150
              insert_operands[1] = x;
6151
              insert_operands[3] = GEN_INT (i);
6152
              spu_builtin_insert (insert_operands);
6153
            }
6154
        }
6155
    }
6156
}
6157
 
6158
/* Return insn index for the vector compare instruction for given CODE,
6159
   and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6160
 
6161
static int
6162
get_vec_cmp_insn (enum rtx_code code,
6163
                  enum machine_mode dest_mode,
6164
                  enum machine_mode op_mode)
6165
 
6166
{
6167
  switch (code)
6168
    {
6169
    case EQ:
6170
      if (dest_mode == V16QImode && op_mode == V16QImode)
6171
        return CODE_FOR_ceq_v16qi;
6172
      if (dest_mode == V8HImode && op_mode == V8HImode)
6173
        return CODE_FOR_ceq_v8hi;
6174
      if (dest_mode == V4SImode && op_mode == V4SImode)
6175
        return CODE_FOR_ceq_v4si;
6176
      if (dest_mode == V4SImode && op_mode == V4SFmode)
6177
        return CODE_FOR_ceq_v4sf;
6178
      if (dest_mode == V2DImode && op_mode == V2DFmode)
6179
        return CODE_FOR_ceq_v2df;
6180
      break;
6181
    case GT:
6182
      if (dest_mode == V16QImode && op_mode == V16QImode)
6183
        return CODE_FOR_cgt_v16qi;
6184
      if (dest_mode == V8HImode && op_mode == V8HImode)
6185
        return CODE_FOR_cgt_v8hi;
6186
      if (dest_mode == V4SImode && op_mode == V4SImode)
6187
        return CODE_FOR_cgt_v4si;
6188
      if (dest_mode == V4SImode && op_mode == V4SFmode)
6189
        return CODE_FOR_cgt_v4sf;
6190
      if (dest_mode == V2DImode && op_mode == V2DFmode)
6191
        return CODE_FOR_cgt_v2df;
6192
      break;
6193
    case GTU:
6194
      if (dest_mode == V16QImode && op_mode == V16QImode)
6195
        return CODE_FOR_clgt_v16qi;
6196
      if (dest_mode == V8HImode && op_mode == V8HImode)
6197
        return CODE_FOR_clgt_v8hi;
6198
      if (dest_mode == V4SImode && op_mode == V4SImode)
6199
        return CODE_FOR_clgt_v4si;
6200
      break;
6201
    default:
6202
      break;
6203
    }
6204
  return -1;
6205
}
6206
 
6207
/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6208
   DMODE is expected destination mode. This is a recursive function.  */
6209
 
6210
static rtx
6211
spu_emit_vector_compare (enum rtx_code rcode,
6212
                         rtx op0, rtx op1,
6213
                         enum machine_mode dmode)
6214
{
6215
  int vec_cmp_insn;
6216
  rtx mask;
6217
  enum machine_mode dest_mode;
6218
  enum machine_mode op_mode = GET_MODE (op1);
6219
 
6220
  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6221
 
6222
  /* Floating point vector compare instructions uses destination V4SImode.
6223
     Double floating point vector compare instructions uses destination V2DImode.
6224
     Move destination to appropriate mode later.  */
6225
  if (dmode == V4SFmode)
6226
    dest_mode = V4SImode;
6227
  else if (dmode == V2DFmode)
6228
    dest_mode = V2DImode;
6229
  else
6230
    dest_mode = dmode;
6231
 
6232
  mask = gen_reg_rtx (dest_mode);
6233
  vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6234
 
6235
  if (vec_cmp_insn == -1)
6236
    {
6237
      bool swap_operands = false;
6238
      bool try_again = false;
6239
      switch (rcode)
6240
        {
6241
        case LT:
6242
          rcode = GT;
6243
          swap_operands = true;
6244
          try_again = true;
6245
          break;
6246
        case LTU:
6247
          rcode = GTU;
6248
          swap_operands = true;
6249
          try_again = true;
6250
          break;
6251
        case NE:
6252
          /* Treat A != B as ~(A==B).  */
6253
          {
6254
            enum insn_code nor_code;
6255
            rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6256
            nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
6257
            gcc_assert (nor_code != CODE_FOR_nothing);
6258
            emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6259
            if (dmode != dest_mode)
6260
              {
6261
                rtx temp = gen_reg_rtx (dest_mode);
6262
                convert_move (temp, mask, 0);
6263
                return temp;
6264
              }
6265
            return mask;
6266
          }
6267
          break;
6268
        case GE:
6269
        case GEU:
6270
        case LE:
6271
        case LEU:
6272
          /* Try GT/GTU/LT/LTU OR EQ */
6273
          {
6274
            rtx c_rtx, eq_rtx;
6275
            enum insn_code ior_code;
6276
            enum rtx_code new_code;
6277
 
6278
            switch (rcode)
6279
              {
6280
              case GE:  new_code = GT;  break;
6281
              case GEU: new_code = GTU; break;
6282
              case LE:  new_code = LT;  break;
6283
              case LEU: new_code = LTU; break;
6284
              default:
6285
                gcc_unreachable ();
6286
              }
6287
 
6288
            c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6289
            eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6290
 
6291
            ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
6292
            gcc_assert (ior_code != CODE_FOR_nothing);
6293
            emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6294
            if (dmode != dest_mode)
6295
              {
6296
                rtx temp = gen_reg_rtx (dest_mode);
6297
                convert_move (temp, mask, 0);
6298
                return temp;
6299
              }
6300
            return mask;
6301
          }
6302
          break;
6303
        default:
6304
          gcc_unreachable ();
6305
        }
6306
 
6307
      /* You only get two chances.  */
6308
      if (try_again)
6309
          vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6310
 
6311
      gcc_assert (vec_cmp_insn != -1);
6312
 
6313
      if (swap_operands)
6314
        {
6315
          rtx tmp;
6316
          tmp = op0;
6317
          op0 = op1;
6318
          op1 = tmp;
6319
        }
6320
    }
6321
 
6322
  emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6323
  if (dmode != dest_mode)
6324
    {
6325
      rtx temp = gen_reg_rtx (dest_mode);
6326
      convert_move (temp, mask, 0);
6327
      return temp;
6328
    }
6329
  return mask;
6330
}
6331
 
6332
 
6333
/* Emit vector conditional expression.
6334
   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6335
   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6336
 
6337
int
6338
spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6339
                           rtx cond, rtx cc_op0, rtx cc_op1)
6340
{
6341
  enum machine_mode dest_mode = GET_MODE (dest);
6342
  enum rtx_code rcode = GET_CODE (cond);
6343
  rtx mask;
6344
 
6345
  /* Get the vector mask for the given relational operations.  */
6346
  mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6347
 
6348
  emit_insn(gen_selb (dest, op2, op1, mask));
6349
 
6350
  return 1;
6351
}
6352
 
6353
static rtx
6354
spu_force_reg (enum machine_mode mode, rtx op)
6355
{
6356
  rtx x, r;
6357
  if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6358
    {
6359
      if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6360
          || GET_MODE (op) == BLKmode)
6361
        return force_reg (mode, convert_to_mode (mode, op, 0));
6362
      abort ();
6363
    }
6364
 
6365
  r = force_reg (GET_MODE (op), op);
6366
  if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6367
    {
6368
      x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6369
      if (x)
6370
        return x;
6371
    }
6372
 
6373
  x = gen_reg_rtx (mode);
6374
  emit_insn (gen_spu_convert (x, r));
6375
  return x;
6376
}
6377
 
6378
static void
6379
spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6380
{
6381
  HOST_WIDE_INT v = 0;
6382
  int lsbits;
6383
  /* Check the range of immediate operands. */
6384
  if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6385
    {
6386
      int range = p - SPU_BTI_7;
6387
 
6388
      if (!CONSTANT_P (op))
6389
        error ("%s expects an integer literal in the range [%d, %d].",
6390
               d->name,
6391
               spu_builtin_range[range].low, spu_builtin_range[range].high);
6392
 
6393
      if (GET_CODE (op) == CONST
6394
          && (GET_CODE (XEXP (op, 0)) == PLUS
6395
              || GET_CODE (XEXP (op, 0)) == MINUS))
6396
        {
6397
          v = INTVAL (XEXP (XEXP (op, 0), 1));
6398
          op = XEXP (XEXP (op, 0), 0);
6399
        }
6400
      else if (GET_CODE (op) == CONST_INT)
6401
        v = INTVAL (op);
6402
      else if (GET_CODE (op) == CONST_VECTOR
6403
               && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6404
        v = INTVAL (CONST_VECTOR_ELT (op, 0));
6405
 
6406
      /* The default for v is 0 which is valid in every range. */
6407
      if (v < spu_builtin_range[range].low
6408
          || v > spu_builtin_range[range].high)
6409
        error ("%s expects an integer literal in the range [%d, %d]. ("
6410
               HOST_WIDE_INT_PRINT_DEC ")",
6411
               d->name,
6412
               spu_builtin_range[range].low, spu_builtin_range[range].high,
6413
               v);
6414
 
6415
      switch (p)
6416
        {
6417
        case SPU_BTI_S10_4:
6418
          lsbits = 4;
6419
          break;
6420
        case SPU_BTI_U16_2:
6421
          /* This is only used in lqa, and stqa.  Even though the insns
6422
             encode 16 bits of the address (all but the 2 least
6423
             significant), only 14 bits are used because it is masked to
6424
             be 16 byte aligned. */
6425
          lsbits = 4;
6426
          break;
6427
        case SPU_BTI_S16_2:
6428
          /* This is used for lqr and stqr. */
6429
          lsbits = 2;
6430
          break;
6431
        default:
6432
          lsbits = 0;
6433
        }
6434
 
6435
      if (GET_CODE (op) == LABEL_REF
6436
          || (GET_CODE (op) == SYMBOL_REF
6437
              && SYMBOL_REF_FUNCTION_P (op))
6438
          || (v & ((1 << lsbits) - 1)) != 0)
6439
        warning (0, "%d least significant bits of %s are ignored.", lsbits,
6440
                 d->name);
6441
    }
6442
}
6443
 
6444
 
6445
static int
6446
expand_builtin_args (struct spu_builtin_description *d, tree exp,
6447
                     rtx target, rtx ops[])
6448
{
6449
  enum insn_code icode = (enum insn_code) d->icode;
6450
  int i = 0, a;
6451
 
6452
  /* Expand the arguments into rtl. */
6453
 
6454
  if (d->parm[0] != SPU_BTI_VOID)
6455
    ops[i++] = target;
6456
 
6457
  for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6458
    {
6459
      tree arg = CALL_EXPR_ARG (exp, a);
6460
      if (arg == 0)
6461
        abort ();
6462
      ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6463
    }
6464
 
6465
  /* The insn pattern may have additional operands (SCRATCH).
6466
     Return the number of actual non-SCRATCH operands.  */
6467
  gcc_assert (i <= insn_data[icode].n_operands);
6468
  return i;
6469
}
6470
 
6471
static rtx
6472
spu_expand_builtin_1 (struct spu_builtin_description *d,
6473
                      tree exp, rtx target)
6474
{
6475
  rtx pat;
6476
  rtx ops[8];
6477
  enum insn_code icode = (enum insn_code) d->icode;
6478
  enum machine_mode mode, tmode;
6479
  int i, p;
6480
  int n_operands;
6481
  tree return_type;
6482
 
6483
  /* Set up ops[] with values from arglist. */
6484
  n_operands = expand_builtin_args (d, exp, target, ops);
6485
 
6486
  /* Handle the target operand which must be operand 0. */
6487
  i = 0;
6488
  if (d->parm[0] != SPU_BTI_VOID)
6489
    {
6490
 
6491
      /* We prefer the mode specified for the match_operand otherwise
6492
         use the mode from the builtin function prototype. */
6493
      tmode = insn_data[d->icode].operand[0].mode;
6494
      if (tmode == VOIDmode)
6495
        tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6496
 
6497
      /* Try to use target because not using it can lead to extra copies
6498
         and when we are using all of the registers extra copies leads
6499
         to extra spills.  */
6500
      if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6501
        ops[0] = target;
6502
      else
6503
        target = ops[0] = gen_reg_rtx (tmode);
6504
 
6505
      if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6506
        abort ();
6507
 
6508
      i++;
6509
    }
6510
 
6511
  if (d->fcode == SPU_MASK_FOR_LOAD)
6512
    {
6513
      enum machine_mode mode = insn_data[icode].operand[1].mode;
6514
      tree arg;
6515
      rtx addr, op, pat;
6516
 
6517
      /* get addr */
6518
      arg = CALL_EXPR_ARG (exp, 0);
6519
      gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6520
      op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6521
      addr = memory_address (mode, op);
6522
 
6523
      /* negate addr */
6524
      op = gen_reg_rtx (GET_MODE (addr));
6525
      emit_insn (gen_rtx_SET (VOIDmode, op,
6526
                 gen_rtx_NEG (GET_MODE (addr), addr)));
6527
      op = gen_rtx_MEM (mode, op);
6528
 
6529
      pat = GEN_FCN (icode) (target, op);
6530
      if (!pat)
6531
        return 0;
6532
      emit_insn (pat);
6533
      return target;
6534
    }
6535
 
6536
  /* Ignore align_hint, but still expand it's args in case they have
6537
     side effects. */
6538
  if (icode == CODE_FOR_spu_align_hint)
6539
    return 0;
6540
 
6541
  /* Handle the rest of the operands. */
6542
  for (p = 1; i < n_operands; i++, p++)
6543
    {
6544
      if (insn_data[d->icode].operand[i].mode != VOIDmode)
6545
        mode = insn_data[d->icode].operand[i].mode;
6546
      else
6547
        mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6548
 
6549
      /* mode can be VOIDmode here for labels */
6550
 
6551
      /* For specific intrinsics with an immediate operand, e.g.,
6552
         si_ai(), we sometimes need to convert the scalar argument to a
6553
         vector argument by splatting the scalar. */
6554
      if (VECTOR_MODE_P (mode)
6555
          && (GET_CODE (ops[i]) == CONST_INT
6556
              || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6557
              || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6558
        {
6559
          if (GET_CODE (ops[i]) == CONST_INT)
6560
            ops[i] = spu_const (mode, INTVAL (ops[i]));
6561
          else
6562
            {
6563
              rtx reg = gen_reg_rtx (mode);
6564
              enum machine_mode imode = GET_MODE_INNER (mode);
6565
              if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6566
                ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6567
              if (imode != GET_MODE (ops[i]))
6568
                ops[i] = convert_to_mode (imode, ops[i],
6569
                                          TYPE_UNSIGNED (spu_builtin_types
6570
                                                         [d->parm[i]]));
6571
              emit_insn (gen_spu_splats (reg, ops[i]));
6572
              ops[i] = reg;
6573
            }
6574
        }
6575
 
6576
      spu_check_builtin_parm (d, ops[i], d->parm[p]);
6577
 
6578
      if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6579
        ops[i] = spu_force_reg (mode, ops[i]);
6580
    }
6581
 
6582
  switch (n_operands)
6583
    {
6584
    case 0:
6585
      pat = GEN_FCN (icode) (0);
6586
      break;
6587
    case 1:
6588
      pat = GEN_FCN (icode) (ops[0]);
6589
      break;
6590
    case 2:
6591
      pat = GEN_FCN (icode) (ops[0], ops[1]);
6592
      break;
6593
    case 3:
6594
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6595
      break;
6596
    case 4:
6597
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6598
      break;
6599
    case 5:
6600
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6601
      break;
6602
    case 6:
6603
      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6604
      break;
6605
    default:
6606
      abort ();
6607
    }
6608
 
6609
  if (!pat)
6610
    abort ();
6611
 
6612
  if (d->type == B_CALL || d->type == B_BISLED)
6613
    emit_call_insn (pat);
6614
  else if (d->type == B_JUMP)
6615
    {
6616
      emit_jump_insn (pat);
6617
      emit_barrier ();
6618
    }
6619
  else
6620
    emit_insn (pat);
6621
 
6622
  return_type = spu_builtin_types[d->parm[0]];
6623
  if (d->parm[0] != SPU_BTI_VOID
6624
      && GET_MODE (target) != TYPE_MODE (return_type))
6625
    {
6626
      /* target is the return value.  It should always be the mode of
6627
         the builtin function prototype. */
6628
      target = spu_force_reg (TYPE_MODE (return_type), target);
6629
    }
6630
 
6631
  return target;
6632
}
6633
 
6634
rtx
6635
spu_expand_builtin (tree exp,
6636
                    rtx target,
6637
                    rtx subtarget ATTRIBUTE_UNUSED,
6638
                    enum machine_mode mode ATTRIBUTE_UNUSED,
6639
                    int ignore ATTRIBUTE_UNUSED)
6640
{
6641
  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6642
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6643
  struct spu_builtin_description *d;
6644
 
6645
  if (fcode < NUM_SPU_BUILTINS)
6646
    {
6647
      d = &spu_builtins[fcode];
6648
 
6649
      return spu_expand_builtin_1 (d, exp, target);
6650
    }
6651
  abort ();
6652
}
6653
 
6654
/* Implement targetm.vectorize.builtin_mul_widen_even.  */
6655
static tree
6656
spu_builtin_mul_widen_even (tree type)
6657
{
6658
  switch (TYPE_MODE (type))
6659
    {
6660
    case V8HImode:
6661
      if (TYPE_UNSIGNED (type))
6662
        return spu_builtins[SPU_MULE_0].fndecl;
6663
      else
6664
        return spu_builtins[SPU_MULE_1].fndecl;
6665
      break;
6666
    default:
6667
      return NULL_TREE;
6668
    }
6669
}
6670
 
6671
/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
6672
static tree
6673
spu_builtin_mul_widen_odd (tree type)
6674
{
6675
  switch (TYPE_MODE (type))
6676
    {
6677
    case V8HImode:
6678
      if (TYPE_UNSIGNED (type))
6679
        return spu_builtins[SPU_MULO_1].fndecl;
6680
      else
6681
        return spu_builtins[SPU_MULO_0].fndecl;
6682
      break;
6683
    default:
6684
      return NULL_TREE;
6685
    }
6686
}
6687
 
6688
/* Implement targetm.vectorize.builtin_mask_for_load.  */
6689
static tree
6690
spu_builtin_mask_for_load (void)
6691
{
6692
  struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6693
  gcc_assert (d);
6694
  return d->fndecl;
6695
}
6696
 
6697
/* Implement targetm.vectorize.builtin_vectorization_cost.  */
6698
static int
6699
spu_builtin_vectorization_cost (bool runtime_test)
6700
{
6701
  /* If the branch of the runtime test is taken - i.e. - the vectorized
6702
     version is skipped - this incurs a misprediction cost (because the
6703
     vectorized version is expected to be the fall-through).  So we subtract
6704
     the latency of a mispredicted branch from the costs that are incurred
6705
     when the vectorized version is executed.  */
6706
  if (runtime_test)
6707
    return -19;
6708
  else
6709
    return 0;
6710
}
6711
 
6712
/* Return true iff, data reference of TYPE can reach vector alignment (16)
6713
   after applying N number of iterations.  This routine does not determine
6714
   how may iterations are required to reach desired alignment.  */
6715
 
6716
static bool
6717
spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6718
{
6719
  if (is_packed)
6720
    return false;
6721
 
6722
  /* All other types are naturally aligned.  */
6723
  return true;
6724
}
6725
 
6726
/* Implement targetm.vectorize.builtin_vec_perm.  */
6727
tree
6728
spu_builtin_vec_perm (tree type, tree *mask_element_type)
6729
{
6730
  struct spu_builtin_description *d;
6731
 
6732
  *mask_element_type = unsigned_char_type_node;
6733
 
6734
  switch (TYPE_MODE (type))
6735
    {
6736
    case V16QImode:
6737
      if (TYPE_UNSIGNED (type))
6738
        d = &spu_builtins[SPU_SHUFFLE_0];
6739
      else
6740
        d = &spu_builtins[SPU_SHUFFLE_1];
6741
      break;
6742
 
6743
    case V8HImode:
6744
      if (TYPE_UNSIGNED (type))
6745
        d = &spu_builtins[SPU_SHUFFLE_2];
6746
      else
6747
        d = &spu_builtins[SPU_SHUFFLE_3];
6748
      break;
6749
 
6750
    case V4SImode:
6751
      if (TYPE_UNSIGNED (type))
6752
        d = &spu_builtins[SPU_SHUFFLE_4];
6753
      else
6754
        d = &spu_builtins[SPU_SHUFFLE_5];
6755
      break;
6756
 
6757
    case V2DImode:
6758
      if (TYPE_UNSIGNED (type))
6759
        d = &spu_builtins[SPU_SHUFFLE_6];
6760
      else
6761
        d = &spu_builtins[SPU_SHUFFLE_7];
6762
      break;
6763
 
6764
    case V4SFmode:
6765
      d = &spu_builtins[SPU_SHUFFLE_8];
6766
      break;
6767
 
6768
    case V2DFmode:
6769
      d = &spu_builtins[SPU_SHUFFLE_9];
6770
      break;
6771
 
6772
    default:
6773
      return NULL_TREE;
6774
    }
6775
 
6776
  gcc_assert (d);
6777
  return d->fndecl;
6778
}
6779
 
6780
/* Return the appropriate mode for a named address pointer.  */
6781
static enum machine_mode
6782
spu_addr_space_pointer_mode (addr_space_t addrspace)
6783
{
6784
  switch (addrspace)
6785
    {
6786
    case ADDR_SPACE_GENERIC:
6787
      return ptr_mode;
6788
    case ADDR_SPACE_EA:
6789
      return EAmode;
6790
    default:
6791
      gcc_unreachable ();
6792
    }
6793
}
6794
 
6795
/* Return the appropriate mode for a named address address.  */
6796
static enum machine_mode
6797
spu_addr_space_address_mode (addr_space_t addrspace)
6798
{
6799
  switch (addrspace)
6800
    {
6801
    case ADDR_SPACE_GENERIC:
6802
      return Pmode;
6803
    case ADDR_SPACE_EA:
6804
      return EAmode;
6805
    default:
6806
      gcc_unreachable ();
6807
    }
6808
}
6809
 
6810
/* Determine if one named address space is a subset of another.  */
6811
 
6812
static bool
6813
spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6814
{
6815
  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6816
  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6817
 
6818
  if (subset == superset)
6819
    return true;
6820
 
6821
  /* If we have -mno-address-space-conversion, treat __ea and generic as not
6822
     being subsets but instead as disjoint address spaces.  */
6823
  else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6824
    return false;
6825
 
6826
  else
6827
    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6828
}
6829
 
6830
/* Convert from one address space to another.  */
6831
static rtx
6832
spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6833
{
6834
  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6835
  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6836
 
6837
  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6838
  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6839
 
6840
  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6841
    {
6842
      rtx result, ls;
6843
 
6844
      ls = gen_const_mem (DImode,
6845
                          gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6846
      set_mem_align (ls, 128);
6847
 
6848
      result = gen_reg_rtx (Pmode);
6849
      ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6850
      op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6851
      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6852
                                          ls, const0_rtx, Pmode, 1);
6853
 
6854
      emit_insn (gen_subsi3 (result, op, ls));
6855
 
6856
      return result;
6857
    }
6858
 
6859
  else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6860
    {
6861
      rtx result, ls;
6862
 
6863
      ls = gen_const_mem (DImode,
6864
                          gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6865
      set_mem_align (ls, 128);
6866
 
6867
      result = gen_reg_rtx (EAmode);
6868
      ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6869
      op = force_reg (Pmode, op);
6870
      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6871
                                          ls, const0_rtx, EAmode, 1);
6872
      op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6873
 
6874
      if (EAmode == SImode)
6875
        emit_insn (gen_addsi3 (result, op, ls));
6876
      else
6877
        emit_insn (gen_adddi3 (result, op, ls));
6878
 
6879
      return result;
6880
    }
6881
 
6882
  else
6883
    gcc_unreachable ();
6884
}
6885
 
6886
 
6887
/* Count the total number of instructions in each pipe and return the
6888
   maximum, which is used as the Minimum Iteration Interval (MII)
6889
   in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
6890
   -2 are instructions that can go in pipe0 or pipe1.  */
6891
static int
6892
spu_sms_res_mii (struct ddg *g)
6893
{
6894
  int i;
6895
  unsigned t[4] = {0, 0, 0, 0};
6896
 
6897
  for (i = 0; i < g->num_nodes; i++)
6898
    {
6899
      rtx insn = g->nodes[i].insn;
6900
      int p = get_pipe (insn) + 2;
6901
 
6902
      assert (p >= 0);
6903
      assert (p < 4);
6904
 
6905
      t[p]++;
6906
      if (dump_file && INSN_P (insn))
6907
            fprintf (dump_file, "i%d %s %d %d\n",
6908
                     INSN_UID (insn),
6909
                     insn_data[INSN_CODE(insn)].name,
6910
                     p, t[p]);
6911
    }
6912
  if (dump_file)
6913
    fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6914
 
6915
  return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6916
}
6917
 
6918
 
6919
void
6920
spu_init_expanders (void)
6921
{
6922
  if (cfun)
6923
    {
6924
      rtx r0, r1;
6925
      /* HARD_FRAME_REGISTER is only 128 bit aligned when
6926
         frame_pointer_needed is true.  We don't know that until we're
6927
         expanding the prologue. */
6928
      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6929
 
6930
      /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6931
         LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
6932
         to be treated as aligned, so generate them here. */
6933
      r0 = gen_reg_rtx (SImode);
6934
      r1 = gen_reg_rtx (SImode);
6935
      mark_reg_pointer (r0, 128);
6936
      mark_reg_pointer (r1, 128);
6937
      gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6938
                  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6939
    }
6940
}
6941
 
6942
static enum machine_mode
6943
spu_libgcc_cmp_return_mode (void)
6944
{
6945
 
6946
/* For SPU word mode is TI mode so it is better to use SImode
6947
   for compare returns.  */
6948
  return SImode;
6949
}
6950
 
6951
static enum machine_mode
6952
spu_libgcc_shift_count_mode (void)
6953
{
6954
/* For SPU word mode is TI mode so it is better to use SImode
6955
   for shift counts.  */
6956
  return SImode;
6957
}
6958
 
6959
/* An early place to adjust some flags after GCC has finished processing
6960
 * them. */
6961
static void
6962
asm_file_start (void)
6963
{
6964
  /* Variable tracking should be run after all optimizations which
6965
     change order of insns.  It also needs a valid CFG. */
6966
  spu_flag_var_tracking = flag_var_tracking;
6967
  flag_var_tracking = 0;
6968
 
6969
  default_file_start ();
6970
}
6971
 
6972
/* Implement targetm.section_type_flags.  */
6973
static unsigned int
6974
spu_section_type_flags (tree decl, const char *name, int reloc)
6975
{
6976
  /* .toe needs to have type @nobits.  */
6977
  if (strcmp (name, ".toe") == 0)
6978
    return SECTION_BSS;
6979
  /* Don't load _ea into the current address space.  */
6980
  if (strcmp (name, "._ea") == 0)
6981
    return SECTION_WRITE | SECTION_DEBUG;
6982
  return default_section_type_flags (decl, name, reloc);
6983
}
6984
 
6985
/* Implement targetm.select_section.  */
6986
static section *
6987
spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6988
{
6989
  /* Variables and constants defined in the __ea address space
6990
     go into a special section named "._ea".  */
6991
  if (TREE_TYPE (decl) != error_mark_node
6992
      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6993
    {
6994
      /* We might get called with string constants, but get_named_section
6995
         doesn't like them as they are not DECLs.  Also, we need to set
6996
         flags in that case.  */
6997
      if (!DECL_P (decl))
6998
        return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6999
 
7000
      return get_named_section (decl, "._ea", reloc);
7001
    }
7002
 
7003
  return default_elf_select_section (decl, reloc, align);
7004
}
7005
 
7006
/* Implement targetm.unique_section.  */
7007
static void
7008
spu_unique_section (tree decl, int reloc)
7009
{
7010
  /* We don't support unique section names in the __ea address
7011
     space for now.  */
7012
  if (TREE_TYPE (decl) != error_mark_node
7013
      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7014
    return;
7015
 
7016
  default_unique_section (decl, reloc);
7017
}
7018
 
7019
/* Generate a constant or register which contains 2^SCALE.  We assume
7020
   the result is valid for MODE.  Currently, MODE must be V4SFmode and
7021
   SCALE must be SImode. */
7022
rtx
7023
spu_gen_exp2 (enum machine_mode mode, rtx scale)
7024
{
7025
  gcc_assert (mode == V4SFmode);
7026
  gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7027
  if (GET_CODE (scale) != CONST_INT)
7028
    {
7029
      /* unsigned int exp = (127 + scale) << 23;
7030
        __vector float m = (__vector float) spu_splats (exp); */
7031
      rtx reg = force_reg (SImode, scale);
7032
      rtx exp = gen_reg_rtx (SImode);
7033
      rtx mul = gen_reg_rtx (mode);
7034
      emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7035
      emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7036
      emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7037
      return mul;
7038
    }
7039
  else
7040
    {
7041
      HOST_WIDE_INT exp = 127 + INTVAL (scale);
7042
      unsigned char arr[16];
7043
      arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7044
      arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7045
      arr[2] = arr[6] = arr[10] = arr[14] = 0;
7046
      arr[3] = arr[7] = arr[11] = arr[15] = 0;
7047
      return array_to_constant (mode, arr);
7048
    }
7049
}
7050
 
7051
/* After reload, just change the convert into a move instruction
7052
   or a dead instruction. */
7053
void
7054
spu_split_convert (rtx ops[])
7055
{
7056
  if (REGNO (ops[0]) == REGNO (ops[1]))
7057
    emit_note (NOTE_INSN_DELETED);
7058
  else
7059
    {
7060
      /* Use TImode always as this might help hard reg copyprop.  */
7061
      rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7062
      rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7063
      emit_insn (gen_move_insn (op0, op1));
7064
    }
7065
}
7066
 
7067
void
7068
spu_function_profiler (FILE * file, int labelno)
7069
{
7070
  fprintf (file, "# profile\n");
7071
  fprintf (file, "brsl $75,  _mcount\n");
7072
}
7073
 
7074
#include "gt-spu.h"

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.