1 |
282 |
jeremybenn |
/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
|
2 |
|
|
|
3 |
|
|
This file is free software; you can redistribute it and/or modify it under
|
4 |
|
|
the terms of the GNU General Public License as published by the Free
|
5 |
|
|
Software Foundation; either version 3 of the License, or (at your option)
|
6 |
|
|
any later version.
|
7 |
|
|
|
8 |
|
|
This file is distributed in the hope that it will be useful, but WITHOUT
|
9 |
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
10 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
11 |
|
|
for more details.
|
12 |
|
|
|
13 |
|
|
You should have received a copy of the GNU General Public License
|
14 |
|
|
along with GCC; see the file COPYING3. If not see
|
15 |
|
|
<http://www.gnu.org/licenses/>. */
|
16 |
|
|
|
17 |
|
|
#include "config.h"
|
18 |
|
|
#include "system.h"
|
19 |
|
|
#include "coretypes.h"
|
20 |
|
|
#include "tm.h"
|
21 |
|
|
#include "rtl.h"
|
22 |
|
|
#include "regs.h"
|
23 |
|
|
#include "hard-reg-set.h"
|
24 |
|
|
#include "real.h"
|
25 |
|
|
#include "insn-config.h"
|
26 |
|
|
#include "conditions.h"
|
27 |
|
|
#include "insn-attr.h"
|
28 |
|
|
#include "flags.h"
|
29 |
|
|
#include "recog.h"
|
30 |
|
|
#include "obstack.h"
|
31 |
|
|
#include "tree.h"
|
32 |
|
|
#include "expr.h"
|
33 |
|
|
#include "optabs.h"
|
34 |
|
|
#include "except.h"
|
35 |
|
|
#include "function.h"
|
36 |
|
|
#include "output.h"
|
37 |
|
|
#include "basic-block.h"
|
38 |
|
|
#include "integrate.h"
|
39 |
|
|
#include "toplev.h"
|
40 |
|
|
#include "ggc.h"
|
41 |
|
|
#include "hashtab.h"
|
42 |
|
|
#include "tm_p.h"
|
43 |
|
|
#include "target.h"
|
44 |
|
|
#include "target-def.h"
|
45 |
|
|
#include "langhooks.h"
|
46 |
|
|
#include "reload.h"
|
47 |
|
|
#include "cfglayout.h"
|
48 |
|
|
#include "sched-int.h"
|
49 |
|
|
#include "params.h"
|
50 |
|
|
#include "assert.h"
|
51 |
|
|
#include "machmode.h"
|
52 |
|
|
#include "gimple.h"
|
53 |
|
|
#include "tm-constrs.h"
|
54 |
|
|
#include "ddg.h"
|
55 |
|
|
#include "sbitmap.h"
|
56 |
|
|
#include "timevar.h"
|
57 |
|
|
#include "df.h"
|
58 |
|
|
|
59 |
|
|
/* Builtin types, data and prototypes. */
|
60 |
|
|
|
61 |
|
|
enum spu_builtin_type_index
|
62 |
|
|
{
|
63 |
|
|
SPU_BTI_END_OF_PARAMS,
|
64 |
|
|
|
65 |
|
|
/* We create new type nodes for these. */
|
66 |
|
|
SPU_BTI_V16QI,
|
67 |
|
|
SPU_BTI_V8HI,
|
68 |
|
|
SPU_BTI_V4SI,
|
69 |
|
|
SPU_BTI_V2DI,
|
70 |
|
|
SPU_BTI_V4SF,
|
71 |
|
|
SPU_BTI_V2DF,
|
72 |
|
|
SPU_BTI_UV16QI,
|
73 |
|
|
SPU_BTI_UV8HI,
|
74 |
|
|
SPU_BTI_UV4SI,
|
75 |
|
|
SPU_BTI_UV2DI,
|
76 |
|
|
|
77 |
|
|
/* A 16-byte type. (Implemented with V16QI_type_node) */
|
78 |
|
|
SPU_BTI_QUADWORD,
|
79 |
|
|
|
80 |
|
|
/* These all correspond to intSI_type_node */
|
81 |
|
|
SPU_BTI_7,
|
82 |
|
|
SPU_BTI_S7,
|
83 |
|
|
SPU_BTI_U7,
|
84 |
|
|
SPU_BTI_S10,
|
85 |
|
|
SPU_BTI_S10_4,
|
86 |
|
|
SPU_BTI_U14,
|
87 |
|
|
SPU_BTI_16,
|
88 |
|
|
SPU_BTI_S16,
|
89 |
|
|
SPU_BTI_S16_2,
|
90 |
|
|
SPU_BTI_U16,
|
91 |
|
|
SPU_BTI_U16_2,
|
92 |
|
|
SPU_BTI_U18,
|
93 |
|
|
|
94 |
|
|
/* These correspond to the standard types */
|
95 |
|
|
SPU_BTI_INTQI,
|
96 |
|
|
SPU_BTI_INTHI,
|
97 |
|
|
SPU_BTI_INTSI,
|
98 |
|
|
SPU_BTI_INTDI,
|
99 |
|
|
|
100 |
|
|
SPU_BTI_UINTQI,
|
101 |
|
|
SPU_BTI_UINTHI,
|
102 |
|
|
SPU_BTI_UINTSI,
|
103 |
|
|
SPU_BTI_UINTDI,
|
104 |
|
|
|
105 |
|
|
SPU_BTI_FLOAT,
|
106 |
|
|
SPU_BTI_DOUBLE,
|
107 |
|
|
|
108 |
|
|
SPU_BTI_VOID,
|
109 |
|
|
SPU_BTI_PTR,
|
110 |
|
|
|
111 |
|
|
SPU_BTI_MAX
|
112 |
|
|
};
|
113 |
|
|
|
114 |
|
|
#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
|
115 |
|
|
#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
|
116 |
|
|
#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
|
117 |
|
|
#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
|
118 |
|
|
#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
|
119 |
|
|
#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
|
120 |
|
|
#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
|
121 |
|
|
#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
|
122 |
|
|
#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
|
123 |
|
|
#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
|
124 |
|
|
|
125 |
|
|
static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
|
126 |
|
|
|
127 |
|
|
struct spu_builtin_range
|
128 |
|
|
{
|
129 |
|
|
int low, high;
|
130 |
|
|
};
|
131 |
|
|
|
132 |
|
|
static struct spu_builtin_range spu_builtin_range[] = {
|
133 |
|
|
{-0x40ll, 0x7fll}, /* SPU_BTI_7 */
|
134 |
|
|
{-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
|
135 |
|
|
{0ll, 0x7fll}, /* SPU_BTI_U7 */
|
136 |
|
|
{-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
|
137 |
|
|
{-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
|
138 |
|
|
{0ll, 0x3fffll}, /* SPU_BTI_U14 */
|
139 |
|
|
{-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
|
140 |
|
|
{-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
|
141 |
|
|
{-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
|
142 |
|
|
{0ll, 0xffffll}, /* SPU_BTI_U16 */
|
143 |
|
|
{0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
|
144 |
|
|
{0ll, 0x3ffffll}, /* SPU_BTI_U18 */
|
145 |
|
|
};
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
/* Target specific attribute specifications. */
|
149 |
|
|
char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
|
150 |
|
|
|
151 |
|
|
/* Prototypes and external defs. */
|
152 |
|
|
static void spu_init_builtins (void);
|
153 |
|
|
static tree spu_builtin_decl (unsigned, bool);
|
154 |
|
|
static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
|
155 |
|
|
static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
|
156 |
|
|
static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
|
157 |
|
|
static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
|
158 |
|
|
bool, addr_space_t);
|
159 |
|
|
static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
|
160 |
|
|
static rtx get_pic_reg (void);
|
161 |
|
|
static int need_to_save_reg (int regno, int saving);
|
162 |
|
|
static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
|
163 |
|
|
static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
|
164 |
|
|
static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
|
165 |
|
|
rtx scratch);
|
166 |
|
|
static void emit_nop_for_insn (rtx insn);
|
167 |
|
|
static bool insn_clobbers_hbr (rtx insn);
|
168 |
|
|
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
|
169 |
|
|
int distance, sbitmap blocks);
|
170 |
|
|
static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
|
171 |
|
|
enum machine_mode dmode);
|
172 |
|
|
static rtx get_branch_target (rtx branch);
|
173 |
|
|
static void spu_machine_dependent_reorg (void);
|
174 |
|
|
static int spu_sched_issue_rate (void);
|
175 |
|
|
static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
|
176 |
|
|
int can_issue_more);
|
177 |
|
|
static int get_pipe (rtx insn);
|
178 |
|
|
static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
|
179 |
|
|
static void spu_sched_init_global (FILE *, int, int);
|
180 |
|
|
static void spu_sched_init (FILE *, int, int);
|
181 |
|
|
static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
|
182 |
|
|
static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
|
183 |
|
|
int flags,
|
184 |
|
|
unsigned char *no_add_attrs);
|
185 |
|
|
static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
|
186 |
|
|
int flags,
|
187 |
|
|
unsigned char *no_add_attrs);
|
188 |
|
|
static int spu_naked_function_p (tree func);
|
189 |
|
|
static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
190 |
|
|
const_tree type, unsigned char named);
|
191 |
|
|
static tree spu_build_builtin_va_list (void);
|
192 |
|
|
static void spu_va_start (tree, rtx);
|
193 |
|
|
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
|
194 |
|
|
gimple_seq * pre_p, gimple_seq * post_p);
|
195 |
|
|
static int store_with_one_insn_p (rtx mem);
|
196 |
|
|
static int mem_is_padded_component_ref (rtx x);
|
197 |
|
|
static int reg_aligned_for_addr (rtx x);
|
198 |
|
|
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
|
199 |
|
|
static void spu_asm_globalize_label (FILE * file, const char *name);
|
200 |
|
|
static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
|
201 |
|
|
int *total, bool speed);
|
202 |
|
|
static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
|
203 |
|
|
static void spu_init_libfuncs (void);
|
204 |
|
|
static bool spu_return_in_memory (const_tree type, const_tree fntype);
|
205 |
|
|
static void fix_range (const char *);
|
206 |
|
|
static void spu_encode_section_info (tree, rtx, int);
|
207 |
|
|
static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
|
208 |
|
|
static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
|
209 |
|
|
addr_space_t);
|
210 |
|
|
static tree spu_builtin_mul_widen_even (tree);
|
211 |
|
|
static tree spu_builtin_mul_widen_odd (tree);
|
212 |
|
|
static tree spu_builtin_mask_for_load (void);
|
213 |
|
|
static int spu_builtin_vectorization_cost (bool);
|
214 |
|
|
static bool spu_vector_alignment_reachable (const_tree, bool);
|
215 |
|
|
static tree spu_builtin_vec_perm (tree, tree *);
|
216 |
|
|
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
|
217 |
|
|
static enum machine_mode spu_addr_space_address_mode (addr_space_t);
|
218 |
|
|
static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
|
219 |
|
|
static rtx spu_addr_space_convert (rtx, tree, tree);
|
220 |
|
|
static int spu_sms_res_mii (struct ddg *g);
|
221 |
|
|
static void asm_file_start (void);
|
222 |
|
|
static unsigned int spu_section_type_flags (tree, const char *, int);
|
223 |
|
|
static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
|
224 |
|
|
static void spu_unique_section (tree, int);
|
225 |
|
|
static rtx spu_expand_load (rtx, rtx, rtx, int);
|
226 |
|
|
static void spu_trampoline_init (rtx, tree, rtx);
|
227 |
|
|
|
228 |
|
|
extern const char *reg_names[];
|
229 |
|
|
|
230 |
|
|
/* Which instruction set architecture to use. */
|
231 |
|
|
int spu_arch;
|
232 |
|
|
/* Which cpu are we tuning for. */
|
233 |
|
|
int spu_tune;
|
234 |
|
|
|
235 |
|
|
/* The hardware requires 8 insns between a hint and the branch it
|
236 |
|
|
effects. This variable describes how many rtl instructions the
|
237 |
|
|
compiler needs to see before inserting a hint, and then the compiler
|
238 |
|
|
will insert enough nops to make it at least 8 insns. The default is
|
239 |
|
|
for the compiler to allow up to 2 nops be emitted. The nops are
|
240 |
|
|
inserted in pairs, so we round down. */
|
241 |
|
|
int spu_hint_dist = (8*4) - (2*4);
|
242 |
|
|
|
243 |
|
|
/* Determines whether we run variable tracking in machine dependent
|
244 |
|
|
reorganization. */
|
245 |
|
|
static int spu_flag_var_tracking;
|
246 |
|
|
|
247 |
|
|
enum spu_immediate {
|
248 |
|
|
SPU_NONE,
|
249 |
|
|
SPU_IL,
|
250 |
|
|
SPU_ILA,
|
251 |
|
|
SPU_ILH,
|
252 |
|
|
SPU_ILHU,
|
253 |
|
|
SPU_ORI,
|
254 |
|
|
SPU_ORHI,
|
255 |
|
|
SPU_ORBI,
|
256 |
|
|
SPU_IOHL
|
257 |
|
|
};
|
258 |
|
|
enum immediate_class
|
259 |
|
|
{
|
260 |
|
|
IC_POOL, /* constant pool */
|
261 |
|
|
IC_IL1, /* one il* instruction */
|
262 |
|
|
IC_IL2, /* both ilhu and iohl instructions */
|
263 |
|
|
IC_IL1s, /* one il* instruction */
|
264 |
|
|
IC_IL2s, /* both ilhu and iohl instructions */
|
265 |
|
|
IC_FSMBI, /* the fsmbi instruction */
|
266 |
|
|
IC_CPAT, /* one of the c*d instructions */
|
267 |
|
|
IC_FSMBI2 /* fsmbi plus 1 other instruction */
|
268 |
|
|
};
|
269 |
|
|
|
270 |
|
|
static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
|
271 |
|
|
static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
|
272 |
|
|
static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
|
273 |
|
|
static enum immediate_class classify_immediate (rtx op,
|
274 |
|
|
enum machine_mode mode);
|
275 |
|
|
|
276 |
|
|
static enum machine_mode spu_unwind_word_mode (void);
|
277 |
|
|
|
278 |
|
|
static enum machine_mode
|
279 |
|
|
spu_libgcc_cmp_return_mode (void);
|
280 |
|
|
|
281 |
|
|
static enum machine_mode
|
282 |
|
|
spu_libgcc_shift_count_mode (void);
|
283 |
|
|
|
284 |
|
|
/* Pointer mode for __ea references. */
|
285 |
|
|
#define EAmode (spu_ea_model != 32 ? DImode : SImode)
|
286 |
|
|
|
287 |
|
|
|
288 |
|
|
/* Table of machine attributes. */
|
289 |
|
|
static const struct attribute_spec spu_attribute_table[] =
|
290 |
|
|
{
|
291 |
|
|
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
292 |
|
|
{ "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
|
293 |
|
|
{ "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
|
294 |
|
|
{ NULL, 0, 0, false, false, false, NULL }
|
295 |
|
|
};
|
296 |
|
|
|
297 |
|
|
/* TARGET overrides. */
|
298 |
|
|
|
299 |
|
|
#undef TARGET_ADDR_SPACE_POINTER_MODE
|
300 |
|
|
#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
|
301 |
|
|
|
302 |
|
|
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
|
303 |
|
|
#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
|
304 |
|
|
|
305 |
|
|
#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
|
306 |
|
|
#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
|
307 |
|
|
spu_addr_space_legitimate_address_p
|
308 |
|
|
|
309 |
|
|
#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
|
310 |
|
|
#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
|
311 |
|
|
|
312 |
|
|
#undef TARGET_ADDR_SPACE_SUBSET_P
|
313 |
|
|
#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
|
314 |
|
|
|
315 |
|
|
#undef TARGET_ADDR_SPACE_CONVERT
|
316 |
|
|
#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
|
317 |
|
|
|
318 |
|
|
#undef TARGET_INIT_BUILTINS
|
319 |
|
|
#define TARGET_INIT_BUILTINS spu_init_builtins
|
320 |
|
|
#undef TARGET_BUILTIN_DECL
|
321 |
|
|
#define TARGET_BUILTIN_DECL spu_builtin_decl
|
322 |
|
|
|
323 |
|
|
#undef TARGET_EXPAND_BUILTIN
|
324 |
|
|
#define TARGET_EXPAND_BUILTIN spu_expand_builtin
|
325 |
|
|
|
326 |
|
|
#undef TARGET_UNWIND_WORD_MODE
|
327 |
|
|
#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
|
328 |
|
|
|
329 |
|
|
#undef TARGET_LEGITIMIZE_ADDRESS
|
330 |
|
|
#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
|
331 |
|
|
|
332 |
|
|
/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
|
333 |
|
|
and .quad for the debugger. When it is known that the assembler is fixed,
|
334 |
|
|
these can be removed. */
|
335 |
|
|
#undef TARGET_ASM_UNALIGNED_SI_OP
|
336 |
|
|
#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
|
337 |
|
|
|
338 |
|
|
#undef TARGET_ASM_ALIGNED_DI_OP
|
339 |
|
|
#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
|
340 |
|
|
|
341 |
|
|
/* The .8byte directive doesn't seem to work well for a 32 bit
|
342 |
|
|
architecture. */
|
343 |
|
|
#undef TARGET_ASM_UNALIGNED_DI_OP
|
344 |
|
|
#define TARGET_ASM_UNALIGNED_DI_OP NULL
|
345 |
|
|
|
346 |
|
|
#undef TARGET_RTX_COSTS
|
347 |
|
|
#define TARGET_RTX_COSTS spu_rtx_costs
|
348 |
|
|
|
349 |
|
|
#undef TARGET_ADDRESS_COST
|
350 |
|
|
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
|
351 |
|
|
|
352 |
|
|
#undef TARGET_SCHED_ISSUE_RATE
|
353 |
|
|
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
|
354 |
|
|
|
355 |
|
|
#undef TARGET_SCHED_INIT_GLOBAL
|
356 |
|
|
#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
|
357 |
|
|
|
358 |
|
|
#undef TARGET_SCHED_INIT
|
359 |
|
|
#define TARGET_SCHED_INIT spu_sched_init
|
360 |
|
|
|
361 |
|
|
#undef TARGET_SCHED_VARIABLE_ISSUE
|
362 |
|
|
#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
|
363 |
|
|
|
364 |
|
|
#undef TARGET_SCHED_REORDER
|
365 |
|
|
#define TARGET_SCHED_REORDER spu_sched_reorder
|
366 |
|
|
|
367 |
|
|
#undef TARGET_SCHED_REORDER2
|
368 |
|
|
#define TARGET_SCHED_REORDER2 spu_sched_reorder
|
369 |
|
|
|
370 |
|
|
#undef TARGET_SCHED_ADJUST_COST
|
371 |
|
|
#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
|
372 |
|
|
|
373 |
|
|
#undef TARGET_ATTRIBUTE_TABLE
|
374 |
|
|
#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
|
375 |
|
|
|
376 |
|
|
#undef TARGET_ASM_INTEGER
|
377 |
|
|
#define TARGET_ASM_INTEGER spu_assemble_integer
|
378 |
|
|
|
379 |
|
|
#undef TARGET_SCALAR_MODE_SUPPORTED_P
|
380 |
|
|
#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
|
381 |
|
|
|
382 |
|
|
#undef TARGET_VECTOR_MODE_SUPPORTED_P
|
383 |
|
|
#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
|
384 |
|
|
|
385 |
|
|
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
|
386 |
|
|
#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
|
387 |
|
|
|
388 |
|
|
#undef TARGET_ASM_GLOBALIZE_LABEL
|
389 |
|
|
#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
|
390 |
|
|
|
391 |
|
|
#undef TARGET_PASS_BY_REFERENCE
|
392 |
|
|
#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
|
393 |
|
|
|
394 |
|
|
#undef TARGET_MUST_PASS_IN_STACK
|
395 |
|
|
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
|
396 |
|
|
|
397 |
|
|
#undef TARGET_BUILD_BUILTIN_VA_LIST
|
398 |
|
|
#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
|
399 |
|
|
|
400 |
|
|
#undef TARGET_EXPAND_BUILTIN_VA_START
|
401 |
|
|
#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
|
402 |
|
|
|
403 |
|
|
#undef TARGET_SETUP_INCOMING_VARARGS
|
404 |
|
|
#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
|
405 |
|
|
|
406 |
|
|
#undef TARGET_MACHINE_DEPENDENT_REORG
|
407 |
|
|
#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
|
408 |
|
|
|
409 |
|
|
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
|
410 |
|
|
#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
|
411 |
|
|
|
412 |
|
|
#undef TARGET_DEFAULT_TARGET_FLAGS
|
413 |
|
|
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
|
414 |
|
|
|
415 |
|
|
#undef TARGET_INIT_LIBFUNCS
|
416 |
|
|
#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
|
417 |
|
|
|
418 |
|
|
#undef TARGET_RETURN_IN_MEMORY
|
419 |
|
|
#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
|
420 |
|
|
|
421 |
|
|
#undef TARGET_ENCODE_SECTION_INFO
|
422 |
|
|
#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
|
423 |
|
|
|
424 |
|
|
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
|
425 |
|
|
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
|
426 |
|
|
|
427 |
|
|
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
|
428 |
|
|
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
|
429 |
|
|
|
430 |
|
|
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
|
431 |
|
|
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
|
432 |
|
|
|
433 |
|
|
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
434 |
|
|
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
|
435 |
|
|
|
436 |
|
|
#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
|
437 |
|
|
#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
|
438 |
|
|
|
439 |
|
|
#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
|
440 |
|
|
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
|
441 |
|
|
|
442 |
|
|
#undef TARGET_LIBGCC_CMP_RETURN_MODE
|
443 |
|
|
#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
|
444 |
|
|
|
445 |
|
|
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
|
446 |
|
|
#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
|
447 |
|
|
|
448 |
|
|
#undef TARGET_SCHED_SMS_RES_MII
|
449 |
|
|
#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
|
450 |
|
|
|
451 |
|
|
#undef TARGET_ASM_FILE_START
|
452 |
|
|
#define TARGET_ASM_FILE_START asm_file_start
|
453 |
|
|
|
454 |
|
|
#undef TARGET_SECTION_TYPE_FLAGS
|
455 |
|
|
#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
|
456 |
|
|
|
457 |
|
|
#undef TARGET_ASM_SELECT_SECTION
|
458 |
|
|
#define TARGET_ASM_SELECT_SECTION spu_select_section
|
459 |
|
|
|
460 |
|
|
#undef TARGET_ASM_UNIQUE_SECTION
|
461 |
|
|
#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
|
462 |
|
|
|
463 |
|
|
#undef TARGET_LEGITIMATE_ADDRESS_P
|
464 |
|
|
#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
|
465 |
|
|
|
466 |
|
|
#undef TARGET_TRAMPOLINE_INIT
|
467 |
|
|
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
|
468 |
|
|
|
469 |
|
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
470 |
|
|
|
471 |
|
|
void
|
472 |
|
|
spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
|
473 |
|
|
{
|
474 |
|
|
/* Override some of the default param values. With so many registers
|
475 |
|
|
larger values are better for these params. */
|
476 |
|
|
MAX_PENDING_LIST_LENGTH = 128;
|
477 |
|
|
|
478 |
|
|
/* With so many registers this is better on by default. */
|
479 |
|
|
flag_rename_registers = 1;
|
480 |
|
|
}
|
481 |
|
|
|
482 |
|
|
/* Sometimes certain combinations of command options do not make sense
|
483 |
|
|
on a particular target machine. You can define a macro
|
484 |
|
|
OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
|
485 |
|
|
executed once just after all the command options have been parsed. */
|
486 |
|
|
void
|
487 |
|
|
spu_override_options (void)
|
488 |
|
|
{
|
489 |
|
|
/* Small loops will be unpeeled at -O3. For SPU it is more important
|
490 |
|
|
to keep code small by default. */
|
491 |
|
|
if (!flag_unroll_loops && !flag_peel_loops
|
492 |
|
|
&& !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
|
493 |
|
|
PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
|
494 |
|
|
|
495 |
|
|
flag_omit_frame_pointer = 1;
|
496 |
|
|
|
497 |
|
|
/* Functions must be 8 byte aligned so we correctly handle dual issue */
|
498 |
|
|
if (align_functions < 8)
|
499 |
|
|
align_functions = 8;
|
500 |
|
|
|
501 |
|
|
spu_hint_dist = 8*4 - spu_max_nops*4;
|
502 |
|
|
if (spu_hint_dist < 0)
|
503 |
|
|
spu_hint_dist = 0;
|
504 |
|
|
|
505 |
|
|
if (spu_fixed_range_string)
|
506 |
|
|
fix_range (spu_fixed_range_string);
|
507 |
|
|
|
508 |
|
|
/* Determine processor architectural level. */
|
509 |
|
|
if (spu_arch_string)
|
510 |
|
|
{
|
511 |
|
|
if (strcmp (&spu_arch_string[0], "cell") == 0)
|
512 |
|
|
spu_arch = PROCESSOR_CELL;
|
513 |
|
|
else if (strcmp (&spu_arch_string[0], "celledp") == 0)
|
514 |
|
|
spu_arch = PROCESSOR_CELLEDP;
|
515 |
|
|
else
|
516 |
|
|
error ("Unknown architecture '%s'", &spu_arch_string[0]);
|
517 |
|
|
}
|
518 |
|
|
|
519 |
|
|
/* Determine processor to tune for. */
|
520 |
|
|
if (spu_tune_string)
|
521 |
|
|
{
|
522 |
|
|
if (strcmp (&spu_tune_string[0], "cell") == 0)
|
523 |
|
|
spu_tune = PROCESSOR_CELL;
|
524 |
|
|
else if (strcmp (&spu_tune_string[0], "celledp") == 0)
|
525 |
|
|
spu_tune = PROCESSOR_CELLEDP;
|
526 |
|
|
else
|
527 |
|
|
error ("Unknown architecture '%s'", &spu_tune_string[0]);
|
528 |
|
|
}
|
529 |
|
|
|
530 |
|
|
/* Change defaults according to the processor architecture. */
|
531 |
|
|
if (spu_arch == PROCESSOR_CELLEDP)
|
532 |
|
|
{
|
533 |
|
|
/* If no command line option has been otherwise specified, change
|
534 |
|
|
the default to -mno-safe-hints on celledp -- only the original
|
535 |
|
|
Cell/B.E. processors require this workaround. */
|
536 |
|
|
if (!(target_flags_explicit & MASK_SAFE_HINTS))
|
537 |
|
|
target_flags &= ~MASK_SAFE_HINTS;
|
538 |
|
|
}
|
539 |
|
|
|
540 |
|
|
REAL_MODE_FORMAT (SFmode) = &spu_single_format;
|
541 |
|
|
}
|
542 |
|
|
|
543 |
|
|
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
|
544 |
|
|
struct attribute_spec.handler. */
|
545 |
|
|
|
546 |
|
|
/* True if MODE is valid for the target. By "valid", we mean able to
|
547 |
|
|
be manipulated in non-trivial ways. In particular, this means all
|
548 |
|
|
the arithmetic is supported. */
|
549 |
|
|
static bool
|
550 |
|
|
spu_scalar_mode_supported_p (enum machine_mode mode)
|
551 |
|
|
{
|
552 |
|
|
switch (mode)
|
553 |
|
|
{
|
554 |
|
|
case QImode:
|
555 |
|
|
case HImode:
|
556 |
|
|
case SImode:
|
557 |
|
|
case SFmode:
|
558 |
|
|
case DImode:
|
559 |
|
|
case TImode:
|
560 |
|
|
case DFmode:
|
561 |
|
|
return true;
|
562 |
|
|
|
563 |
|
|
default:
|
564 |
|
|
return false;
|
565 |
|
|
}
|
566 |
|
|
}
|
567 |
|
|
|
568 |
|
|
/* Similarly for vector modes. "Supported" here is less strict. At
|
569 |
|
|
least some operations are supported; need to check optabs or builtins
|
570 |
|
|
for further details. */
|
571 |
|
|
static bool
|
572 |
|
|
spu_vector_mode_supported_p (enum machine_mode mode)
|
573 |
|
|
{
|
574 |
|
|
switch (mode)
|
575 |
|
|
{
|
576 |
|
|
case V16QImode:
|
577 |
|
|
case V8HImode:
|
578 |
|
|
case V4SImode:
|
579 |
|
|
case V2DImode:
|
580 |
|
|
case V4SFmode:
|
581 |
|
|
case V2DFmode:
|
582 |
|
|
return true;
|
583 |
|
|
|
584 |
|
|
default:
|
585 |
|
|
return false;
|
586 |
|
|
}
|
587 |
|
|
}
|
588 |
|
|
|
589 |
|
|
/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
|
590 |
|
|
least significant bytes of the outer mode. This function returns
|
591 |
|
|
TRUE for the SUBREG's where this is correct. */
|
592 |
|
|
int
|
593 |
|
|
valid_subreg (rtx op)
|
594 |
|
|
{
|
595 |
|
|
enum machine_mode om = GET_MODE (op);
|
596 |
|
|
enum machine_mode im = GET_MODE (SUBREG_REG (op));
|
597 |
|
|
return om != VOIDmode && im != VOIDmode
|
598 |
|
|
&& (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
|
599 |
|
|
|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
|
600 |
|
|
|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
|
601 |
|
|
}
|
602 |
|
|
|
603 |
|
|
/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
|
604 |
|
|
and adjust the start offset. */
|
605 |
|
|
static rtx
|
606 |
|
|
adjust_operand (rtx op, HOST_WIDE_INT * start)
|
607 |
|
|
{
|
608 |
|
|
enum machine_mode mode;
|
609 |
|
|
int op_size;
|
610 |
|
|
/* Strip any paradoxical SUBREG. */
|
611 |
|
|
if (GET_CODE (op) == SUBREG
|
612 |
|
|
&& (GET_MODE_BITSIZE (GET_MODE (op))
|
613 |
|
|
> GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
|
614 |
|
|
{
|
615 |
|
|
if (start)
|
616 |
|
|
*start -=
|
617 |
|
|
GET_MODE_BITSIZE (GET_MODE (op)) -
|
618 |
|
|
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
|
619 |
|
|
op = SUBREG_REG (op);
|
620 |
|
|
}
|
621 |
|
|
/* If it is smaller than SI, assure a SUBREG */
|
622 |
|
|
op_size = GET_MODE_BITSIZE (GET_MODE (op));
|
623 |
|
|
if (op_size < 32)
|
624 |
|
|
{
|
625 |
|
|
if (start)
|
626 |
|
|
*start += 32 - op_size;
|
627 |
|
|
op_size = 32;
|
628 |
|
|
}
|
629 |
|
|
/* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
|
630 |
|
|
mode = mode_for_size (op_size, MODE_INT, 0);
|
631 |
|
|
if (mode != GET_MODE (op))
|
632 |
|
|
op = gen_rtx_SUBREG (mode, op, 0);
|
633 |
|
|
return op;
|
634 |
|
|
}
|
635 |
|
|
|
636 |
|
|
void
|
637 |
|
|
spu_expand_extv (rtx ops[], int unsignedp)
|
638 |
|
|
{
|
639 |
|
|
rtx dst = ops[0], src = ops[1];
|
640 |
|
|
HOST_WIDE_INT width = INTVAL (ops[2]);
|
641 |
|
|
HOST_WIDE_INT start = INTVAL (ops[3]);
|
642 |
|
|
HOST_WIDE_INT align_mask;
|
643 |
|
|
rtx s0, s1, mask, r0;
|
644 |
|
|
|
645 |
|
|
gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
|
646 |
|
|
|
647 |
|
|
if (MEM_P (src))
|
648 |
|
|
{
|
649 |
|
|
/* First, determine if we need 1 TImode load or 2. We need only 1
|
650 |
|
|
if the bits being extracted do not cross the alignment boundary
|
651 |
|
|
as determined by the MEM and its address. */
|
652 |
|
|
|
653 |
|
|
align_mask = -MEM_ALIGN (src);
|
654 |
|
|
if ((start & align_mask) == ((start + width - 1) & align_mask))
|
655 |
|
|
{
|
656 |
|
|
/* Alignment is sufficient for 1 load. */
|
657 |
|
|
s0 = gen_reg_rtx (TImode);
|
658 |
|
|
r0 = spu_expand_load (s0, 0, src, start / 8);
|
659 |
|
|
start &= 7;
|
660 |
|
|
if (r0)
|
661 |
|
|
emit_insn (gen_rotqby_ti (s0, s0, r0));
|
662 |
|
|
}
|
663 |
|
|
else
|
664 |
|
|
{
|
665 |
|
|
/* Need 2 loads. */
|
666 |
|
|
s0 = gen_reg_rtx (TImode);
|
667 |
|
|
s1 = gen_reg_rtx (TImode);
|
668 |
|
|
r0 = spu_expand_load (s0, s1, src, start / 8);
|
669 |
|
|
start &= 7;
|
670 |
|
|
|
671 |
|
|
gcc_assert (start + width <= 128);
|
672 |
|
|
if (r0)
|
673 |
|
|
{
|
674 |
|
|
rtx r1 = gen_reg_rtx (SImode);
|
675 |
|
|
mask = gen_reg_rtx (TImode);
|
676 |
|
|
emit_move_insn (mask, GEN_INT (-1));
|
677 |
|
|
emit_insn (gen_rotqby_ti (s0, s0, r0));
|
678 |
|
|
emit_insn (gen_rotqby_ti (s1, s1, r0));
|
679 |
|
|
if (GET_CODE (r0) == CONST_INT)
|
680 |
|
|
r1 = GEN_INT (INTVAL (r0) & 15);
|
681 |
|
|
else
|
682 |
|
|
emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
|
683 |
|
|
emit_insn (gen_shlqby_ti (mask, mask, r1));
|
684 |
|
|
emit_insn (gen_selb (s0, s1, s0, mask));
|
685 |
|
|
}
|
686 |
|
|
}
|
687 |
|
|
|
688 |
|
|
}
|
689 |
|
|
else if (GET_CODE (src) == SUBREG)
|
690 |
|
|
{
|
691 |
|
|
rtx r = SUBREG_REG (src);
|
692 |
|
|
gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
|
693 |
|
|
s0 = gen_reg_rtx (TImode);
|
694 |
|
|
if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
|
695 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
|
696 |
|
|
else
|
697 |
|
|
emit_move_insn (s0, src);
|
698 |
|
|
}
|
699 |
|
|
else
|
700 |
|
|
{
|
701 |
|
|
gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
|
702 |
|
|
s0 = gen_reg_rtx (TImode);
|
703 |
|
|
emit_move_insn (s0, src);
|
704 |
|
|
}
|
705 |
|
|
|
706 |
|
|
/* Now s0 is TImode and contains the bits to extract at start. */
|
707 |
|
|
|
708 |
|
|
if (start)
|
709 |
|
|
emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
|
710 |
|
|
|
711 |
|
|
if (128 - width)
|
712 |
|
|
{
|
713 |
|
|
tree c = build_int_cst (NULL_TREE, 128 - width);
|
714 |
|
|
s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
|
715 |
|
|
}
|
716 |
|
|
|
717 |
|
|
emit_move_insn (dst, s0);
|
718 |
|
|
}
|
719 |
|
|
|
720 |
|
|
void
|
721 |
|
|
spu_expand_insv (rtx ops[])
|
722 |
|
|
{
|
723 |
|
|
HOST_WIDE_INT width = INTVAL (ops[1]);
|
724 |
|
|
HOST_WIDE_INT start = INTVAL (ops[2]);
|
725 |
|
|
HOST_WIDE_INT maskbits;
|
726 |
|
|
enum machine_mode dst_mode, src_mode;
|
727 |
|
|
rtx dst = ops[0], src = ops[3];
|
728 |
|
|
int dst_size, src_size;
|
729 |
|
|
rtx mask;
|
730 |
|
|
rtx shift_reg;
|
731 |
|
|
int shift;
|
732 |
|
|
|
733 |
|
|
|
734 |
|
|
if (GET_CODE (ops[0]) == MEM)
|
735 |
|
|
dst = gen_reg_rtx (TImode);
|
736 |
|
|
else
|
737 |
|
|
dst = adjust_operand (dst, &start);
|
738 |
|
|
dst_mode = GET_MODE (dst);
|
739 |
|
|
dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
|
740 |
|
|
|
741 |
|
|
if (CONSTANT_P (src))
|
742 |
|
|
{
|
743 |
|
|
enum machine_mode m =
|
744 |
|
|
(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
|
745 |
|
|
src = force_reg (m, convert_to_mode (m, src, 0));
|
746 |
|
|
}
|
747 |
|
|
src = adjust_operand (src, 0);
|
748 |
|
|
src_mode = GET_MODE (src);
|
749 |
|
|
src_size = GET_MODE_BITSIZE (GET_MODE (src));
|
750 |
|
|
|
751 |
|
|
mask = gen_reg_rtx (dst_mode);
|
752 |
|
|
shift_reg = gen_reg_rtx (dst_mode);
|
753 |
|
|
shift = dst_size - start - width;
|
754 |
|
|
|
755 |
|
|
/* It's not safe to use subreg here because the compiler assumes
|
756 |
|
|
that the SUBREG_REG is right justified in the SUBREG. */
|
757 |
|
|
convert_move (shift_reg, src, 1);
|
758 |
|
|
|
759 |
|
|
if (shift > 0)
|
760 |
|
|
{
|
761 |
|
|
switch (dst_mode)
|
762 |
|
|
{
|
763 |
|
|
case SImode:
|
764 |
|
|
emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
|
765 |
|
|
break;
|
766 |
|
|
case DImode:
|
767 |
|
|
emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
|
768 |
|
|
break;
|
769 |
|
|
case TImode:
|
770 |
|
|
emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
|
771 |
|
|
break;
|
772 |
|
|
default:
|
773 |
|
|
abort ();
|
774 |
|
|
}
|
775 |
|
|
}
|
776 |
|
|
else if (shift < 0)
|
777 |
|
|
abort ();
|
778 |
|
|
|
779 |
|
|
switch (dst_size)
|
780 |
|
|
{
|
781 |
|
|
case 32:
|
782 |
|
|
maskbits = (-1ll << (32 - width - start));
|
783 |
|
|
if (start)
|
784 |
|
|
maskbits += (1ll << (32 - start));
|
785 |
|
|
emit_move_insn (mask, GEN_INT (maskbits));
|
786 |
|
|
break;
|
787 |
|
|
case 64:
|
788 |
|
|
maskbits = (-1ll << (64 - width - start));
|
789 |
|
|
if (start)
|
790 |
|
|
maskbits += (1ll << (64 - start));
|
791 |
|
|
emit_move_insn (mask, GEN_INT (maskbits));
|
792 |
|
|
break;
|
793 |
|
|
case 128:
|
794 |
|
|
{
|
795 |
|
|
unsigned char arr[16];
|
796 |
|
|
int i = start / 8;
|
797 |
|
|
memset (arr, 0, sizeof (arr));
|
798 |
|
|
arr[i] = 0xff >> (start & 7);
|
799 |
|
|
for (i++; i <= (start + width - 1) / 8; i++)
|
800 |
|
|
arr[i] = 0xff;
|
801 |
|
|
arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
|
802 |
|
|
emit_move_insn (mask, array_to_constant (TImode, arr));
|
803 |
|
|
}
|
804 |
|
|
break;
|
805 |
|
|
default:
|
806 |
|
|
abort ();
|
807 |
|
|
}
|
808 |
|
|
if (GET_CODE (ops[0]) == MEM)
|
809 |
|
|
{
|
810 |
|
|
rtx low = gen_reg_rtx (SImode);
|
811 |
|
|
rtx rotl = gen_reg_rtx (SImode);
|
812 |
|
|
rtx mask0 = gen_reg_rtx (TImode);
|
813 |
|
|
rtx addr;
|
814 |
|
|
rtx addr0;
|
815 |
|
|
rtx addr1;
|
816 |
|
|
rtx mem;
|
817 |
|
|
|
818 |
|
|
addr = force_reg (Pmode, XEXP (ops[0], 0));
|
819 |
|
|
addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
|
820 |
|
|
emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
|
821 |
|
|
emit_insn (gen_negsi2 (rotl, low));
|
822 |
|
|
emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
|
823 |
|
|
emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
|
824 |
|
|
mem = change_address (ops[0], TImode, addr0);
|
825 |
|
|
set_mem_alias_set (mem, 0);
|
826 |
|
|
emit_move_insn (dst, mem);
|
827 |
|
|
emit_insn (gen_selb (dst, dst, shift_reg, mask0));
|
828 |
|
|
if (start + width > MEM_ALIGN (ops[0]))
|
829 |
|
|
{
|
830 |
|
|
rtx shl = gen_reg_rtx (SImode);
|
831 |
|
|
rtx mask1 = gen_reg_rtx (TImode);
|
832 |
|
|
rtx dst1 = gen_reg_rtx (TImode);
|
833 |
|
|
rtx mem1;
|
834 |
|
|
addr1 = plus_constant (addr, 16);
|
835 |
|
|
addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
|
836 |
|
|
emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
|
837 |
|
|
emit_insn (gen_shlqby_ti (mask1, mask, shl));
|
838 |
|
|
mem1 = change_address (ops[0], TImode, addr1);
|
839 |
|
|
set_mem_alias_set (mem1, 0);
|
840 |
|
|
emit_move_insn (dst1, mem1);
|
841 |
|
|
emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
|
842 |
|
|
emit_move_insn (mem1, dst1);
|
843 |
|
|
}
|
844 |
|
|
emit_move_insn (mem, dst);
|
845 |
|
|
}
|
846 |
|
|
else
|
847 |
|
|
emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
|
848 |
|
|
}
|
849 |
|
|
|
850 |
|
|
|
851 |
|
|
int
|
852 |
|
|
spu_expand_block_move (rtx ops[])
|
853 |
|
|
{
|
854 |
|
|
HOST_WIDE_INT bytes, align, offset;
|
855 |
|
|
rtx src, dst, sreg, dreg, target;
|
856 |
|
|
int i;
|
857 |
|
|
if (GET_CODE (ops[2]) != CONST_INT
|
858 |
|
|
|| GET_CODE (ops[3]) != CONST_INT
|
859 |
|
|
|| INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
|
860 |
|
|
return 0;
|
861 |
|
|
|
862 |
|
|
bytes = INTVAL (ops[2]);
|
863 |
|
|
align = INTVAL (ops[3]);
|
864 |
|
|
|
865 |
|
|
if (bytes <= 0)
|
866 |
|
|
return 1;
|
867 |
|
|
|
868 |
|
|
dst = ops[0];
|
869 |
|
|
src = ops[1];
|
870 |
|
|
|
871 |
|
|
if (align == 16)
|
872 |
|
|
{
|
873 |
|
|
for (offset = 0; offset + 16 <= bytes; offset += 16)
|
874 |
|
|
{
|
875 |
|
|
dst = adjust_address (ops[0], V16QImode, offset);
|
876 |
|
|
src = adjust_address (ops[1], V16QImode, offset);
|
877 |
|
|
emit_move_insn (dst, src);
|
878 |
|
|
}
|
879 |
|
|
if (offset < bytes)
|
880 |
|
|
{
|
881 |
|
|
rtx mask;
|
882 |
|
|
unsigned char arr[16] = { 0 };
|
883 |
|
|
for (i = 0; i < bytes - offset; i++)
|
884 |
|
|
arr[i] = 0xff;
|
885 |
|
|
dst = adjust_address (ops[0], V16QImode, offset);
|
886 |
|
|
src = adjust_address (ops[1], V16QImode, offset);
|
887 |
|
|
mask = gen_reg_rtx (V16QImode);
|
888 |
|
|
sreg = gen_reg_rtx (V16QImode);
|
889 |
|
|
dreg = gen_reg_rtx (V16QImode);
|
890 |
|
|
target = gen_reg_rtx (V16QImode);
|
891 |
|
|
emit_move_insn (mask, array_to_constant (V16QImode, arr));
|
892 |
|
|
emit_move_insn (dreg, dst);
|
893 |
|
|
emit_move_insn (sreg, src);
|
894 |
|
|
emit_insn (gen_selb (target, dreg, sreg, mask));
|
895 |
|
|
emit_move_insn (dst, target);
|
896 |
|
|
}
|
897 |
|
|
return 1;
|
898 |
|
|
}
|
899 |
|
|
return 0;
|
900 |
|
|
}
|
901 |
|
|
|
902 |
|
|
enum spu_comp_code
|
903 |
|
|
{ SPU_EQ, SPU_GT, SPU_GTU };
|
904 |
|
|
|
905 |
|
|
int spu_comp_icode[12][3] = {
|
906 |
|
|
{CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
|
907 |
|
|
{CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
|
908 |
|
|
{CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
|
909 |
|
|
{CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
|
910 |
|
|
{CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
|
911 |
|
|
{CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
|
912 |
|
|
{CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
|
913 |
|
|
{CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
|
914 |
|
|
{CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
|
915 |
|
|
{CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
|
916 |
|
|
{CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
|
917 |
|
|
{CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
|
918 |
|
|
};
|
919 |
|
|
|
920 |
|
|
/* Generate a compare for CODE. Return a brand-new rtx that represents
|
921 |
|
|
the result of the compare. GCC can figure this out too if we don't
|
922 |
|
|
provide all variations of compares, but GCC always wants to use
|
923 |
|
|
WORD_MODE, we can generate better code in most cases if we do it
|
924 |
|
|
ourselves. */
|
925 |
|
|
void
|
926 |
|
|
spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
|
927 |
|
|
{
|
928 |
|
|
int reverse_compare = 0;
|
929 |
|
|
int reverse_test = 0;
|
930 |
|
|
rtx compare_result, eq_result;
|
931 |
|
|
rtx comp_rtx, eq_rtx;
|
932 |
|
|
enum machine_mode comp_mode;
|
933 |
|
|
enum machine_mode op_mode;
|
934 |
|
|
enum spu_comp_code scode, eq_code;
|
935 |
|
|
enum insn_code ior_code;
|
936 |
|
|
enum rtx_code code = GET_CODE (cmp);
|
937 |
|
|
rtx op0 = XEXP (cmp, 0);
|
938 |
|
|
rtx op1 = XEXP (cmp, 1);
|
939 |
|
|
int index;
|
940 |
|
|
int eq_test = 0;
|
941 |
|
|
|
942 |
|
|
/* When op1 is a CONST_INT change (X >= C) to (X > C-1),
|
943 |
|
|
and so on, to keep the constant in operand 1. */
|
944 |
|
|
if (GET_CODE (op1) == CONST_INT)
|
945 |
|
|
{
|
946 |
|
|
HOST_WIDE_INT val = INTVAL (op1) - 1;
|
947 |
|
|
if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
|
948 |
|
|
switch (code)
|
949 |
|
|
{
|
950 |
|
|
case GE:
|
951 |
|
|
op1 = GEN_INT (val);
|
952 |
|
|
code = GT;
|
953 |
|
|
break;
|
954 |
|
|
case LT:
|
955 |
|
|
op1 = GEN_INT (val);
|
956 |
|
|
code = LE;
|
957 |
|
|
break;
|
958 |
|
|
case GEU:
|
959 |
|
|
op1 = GEN_INT (val);
|
960 |
|
|
code = GTU;
|
961 |
|
|
break;
|
962 |
|
|
case LTU:
|
963 |
|
|
op1 = GEN_INT (val);
|
964 |
|
|
code = LEU;
|
965 |
|
|
break;
|
966 |
|
|
default:
|
967 |
|
|
break;
|
968 |
|
|
}
|
969 |
|
|
}
|
970 |
|
|
|
971 |
|
|
comp_mode = SImode;
|
972 |
|
|
op_mode = GET_MODE (op0);
|
973 |
|
|
|
974 |
|
|
switch (code)
|
975 |
|
|
{
|
976 |
|
|
case GE:
|
977 |
|
|
scode = SPU_GT;
|
978 |
|
|
if (HONOR_NANS (op_mode))
|
979 |
|
|
{
|
980 |
|
|
reverse_compare = 0;
|
981 |
|
|
reverse_test = 0;
|
982 |
|
|
eq_test = 1;
|
983 |
|
|
eq_code = SPU_EQ;
|
984 |
|
|
}
|
985 |
|
|
else
|
986 |
|
|
{
|
987 |
|
|
reverse_compare = 1;
|
988 |
|
|
reverse_test = 1;
|
989 |
|
|
}
|
990 |
|
|
break;
|
991 |
|
|
case LE:
|
992 |
|
|
scode = SPU_GT;
|
993 |
|
|
if (HONOR_NANS (op_mode))
|
994 |
|
|
{
|
995 |
|
|
reverse_compare = 1;
|
996 |
|
|
reverse_test = 0;
|
997 |
|
|
eq_test = 1;
|
998 |
|
|
eq_code = SPU_EQ;
|
999 |
|
|
}
|
1000 |
|
|
else
|
1001 |
|
|
{
|
1002 |
|
|
reverse_compare = 0;
|
1003 |
|
|
reverse_test = 1;
|
1004 |
|
|
}
|
1005 |
|
|
break;
|
1006 |
|
|
case LT:
|
1007 |
|
|
reverse_compare = 1;
|
1008 |
|
|
reverse_test = 0;
|
1009 |
|
|
scode = SPU_GT;
|
1010 |
|
|
break;
|
1011 |
|
|
case GEU:
|
1012 |
|
|
reverse_compare = 1;
|
1013 |
|
|
reverse_test = 1;
|
1014 |
|
|
scode = SPU_GTU;
|
1015 |
|
|
break;
|
1016 |
|
|
case LEU:
|
1017 |
|
|
reverse_compare = 0;
|
1018 |
|
|
reverse_test = 1;
|
1019 |
|
|
scode = SPU_GTU;
|
1020 |
|
|
break;
|
1021 |
|
|
case LTU:
|
1022 |
|
|
reverse_compare = 1;
|
1023 |
|
|
reverse_test = 0;
|
1024 |
|
|
scode = SPU_GTU;
|
1025 |
|
|
break;
|
1026 |
|
|
case NE:
|
1027 |
|
|
reverse_compare = 0;
|
1028 |
|
|
reverse_test = 1;
|
1029 |
|
|
scode = SPU_EQ;
|
1030 |
|
|
break;
|
1031 |
|
|
|
1032 |
|
|
case EQ:
|
1033 |
|
|
scode = SPU_EQ;
|
1034 |
|
|
break;
|
1035 |
|
|
case GT:
|
1036 |
|
|
scode = SPU_GT;
|
1037 |
|
|
break;
|
1038 |
|
|
case GTU:
|
1039 |
|
|
scode = SPU_GTU;
|
1040 |
|
|
break;
|
1041 |
|
|
default:
|
1042 |
|
|
scode = SPU_EQ;
|
1043 |
|
|
break;
|
1044 |
|
|
}
|
1045 |
|
|
|
1046 |
|
|
switch (op_mode)
|
1047 |
|
|
{
|
1048 |
|
|
case QImode:
|
1049 |
|
|
index = 0;
|
1050 |
|
|
comp_mode = QImode;
|
1051 |
|
|
break;
|
1052 |
|
|
case HImode:
|
1053 |
|
|
index = 1;
|
1054 |
|
|
comp_mode = HImode;
|
1055 |
|
|
break;
|
1056 |
|
|
case SImode:
|
1057 |
|
|
index = 2;
|
1058 |
|
|
break;
|
1059 |
|
|
case DImode:
|
1060 |
|
|
index = 3;
|
1061 |
|
|
break;
|
1062 |
|
|
case TImode:
|
1063 |
|
|
index = 4;
|
1064 |
|
|
break;
|
1065 |
|
|
case SFmode:
|
1066 |
|
|
index = 5;
|
1067 |
|
|
break;
|
1068 |
|
|
case DFmode:
|
1069 |
|
|
index = 6;
|
1070 |
|
|
break;
|
1071 |
|
|
case V16QImode:
|
1072 |
|
|
index = 7;
|
1073 |
|
|
comp_mode = op_mode;
|
1074 |
|
|
break;
|
1075 |
|
|
case V8HImode:
|
1076 |
|
|
index = 8;
|
1077 |
|
|
comp_mode = op_mode;
|
1078 |
|
|
break;
|
1079 |
|
|
case V4SImode:
|
1080 |
|
|
index = 9;
|
1081 |
|
|
comp_mode = op_mode;
|
1082 |
|
|
break;
|
1083 |
|
|
case V4SFmode:
|
1084 |
|
|
index = 10;
|
1085 |
|
|
comp_mode = V4SImode;
|
1086 |
|
|
break;
|
1087 |
|
|
case V2DFmode:
|
1088 |
|
|
index = 11;
|
1089 |
|
|
comp_mode = V2DImode;
|
1090 |
|
|
break;
|
1091 |
|
|
case V2DImode:
|
1092 |
|
|
default:
|
1093 |
|
|
abort ();
|
1094 |
|
|
}
|
1095 |
|
|
|
1096 |
|
|
if (GET_MODE (op1) == DFmode
|
1097 |
|
|
&& (scode != SPU_GT && scode != SPU_EQ))
|
1098 |
|
|
abort ();
|
1099 |
|
|
|
1100 |
|
|
if (is_set == 0 && op1 == const0_rtx
|
1101 |
|
|
&& (GET_MODE (op0) == SImode
|
1102 |
|
|
|| GET_MODE (op0) == HImode) && scode == SPU_EQ)
|
1103 |
|
|
{
|
1104 |
|
|
/* Don't need to set a register with the result when we are
|
1105 |
|
|
comparing against zero and branching. */
|
1106 |
|
|
reverse_test = !reverse_test;
|
1107 |
|
|
compare_result = op0;
|
1108 |
|
|
}
|
1109 |
|
|
else
|
1110 |
|
|
{
|
1111 |
|
|
compare_result = gen_reg_rtx (comp_mode);
|
1112 |
|
|
|
1113 |
|
|
if (reverse_compare)
|
1114 |
|
|
{
|
1115 |
|
|
rtx t = op1;
|
1116 |
|
|
op1 = op0;
|
1117 |
|
|
op0 = t;
|
1118 |
|
|
}
|
1119 |
|
|
|
1120 |
|
|
if (spu_comp_icode[index][scode] == 0)
|
1121 |
|
|
abort ();
|
1122 |
|
|
|
1123 |
|
|
if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
|
1124 |
|
|
(op0, op_mode))
|
1125 |
|
|
op0 = force_reg (op_mode, op0);
|
1126 |
|
|
if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
|
1127 |
|
|
(op1, op_mode))
|
1128 |
|
|
op1 = force_reg (op_mode, op1);
|
1129 |
|
|
comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
|
1130 |
|
|
op0, op1);
|
1131 |
|
|
if (comp_rtx == 0)
|
1132 |
|
|
abort ();
|
1133 |
|
|
emit_insn (comp_rtx);
|
1134 |
|
|
|
1135 |
|
|
if (eq_test)
|
1136 |
|
|
{
|
1137 |
|
|
eq_result = gen_reg_rtx (comp_mode);
|
1138 |
|
|
eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
|
1139 |
|
|
op0, op1);
|
1140 |
|
|
if (eq_rtx == 0)
|
1141 |
|
|
abort ();
|
1142 |
|
|
emit_insn (eq_rtx);
|
1143 |
|
|
ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
|
1144 |
|
|
gcc_assert (ior_code != CODE_FOR_nothing);
|
1145 |
|
|
emit_insn (GEN_FCN (ior_code)
|
1146 |
|
|
(compare_result, compare_result, eq_result));
|
1147 |
|
|
}
|
1148 |
|
|
}
|
1149 |
|
|
|
1150 |
|
|
if (is_set == 0)
|
1151 |
|
|
{
|
1152 |
|
|
rtx bcomp;
|
1153 |
|
|
rtx loc_ref;
|
1154 |
|
|
|
1155 |
|
|
/* We don't have branch on QI compare insns, so we convert the
|
1156 |
|
|
QI compare result to a HI result. */
|
1157 |
|
|
if (comp_mode == QImode)
|
1158 |
|
|
{
|
1159 |
|
|
rtx old_res = compare_result;
|
1160 |
|
|
compare_result = gen_reg_rtx (HImode);
|
1161 |
|
|
comp_mode = HImode;
|
1162 |
|
|
emit_insn (gen_extendqihi2 (compare_result, old_res));
|
1163 |
|
|
}
|
1164 |
|
|
|
1165 |
|
|
if (reverse_test)
|
1166 |
|
|
bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
|
1167 |
|
|
else
|
1168 |
|
|
bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
|
1169 |
|
|
|
1170 |
|
|
loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
|
1171 |
|
|
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
|
1172 |
|
|
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
|
1173 |
|
|
loc_ref, pc_rtx)));
|
1174 |
|
|
}
|
1175 |
|
|
else if (is_set == 2)
|
1176 |
|
|
{
|
1177 |
|
|
rtx target = operands[0];
|
1178 |
|
|
int compare_size = GET_MODE_BITSIZE (comp_mode);
|
1179 |
|
|
int target_size = GET_MODE_BITSIZE (GET_MODE (target));
|
1180 |
|
|
enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
|
1181 |
|
|
rtx select_mask;
|
1182 |
|
|
rtx op_t = operands[2];
|
1183 |
|
|
rtx op_f = operands[3];
|
1184 |
|
|
|
1185 |
|
|
/* The result of the comparison can be SI, HI or QI mode. Create a
|
1186 |
|
|
mask based on that result. */
|
1187 |
|
|
if (target_size > compare_size)
|
1188 |
|
|
{
|
1189 |
|
|
select_mask = gen_reg_rtx (mode);
|
1190 |
|
|
emit_insn (gen_extend_compare (select_mask, compare_result));
|
1191 |
|
|
}
|
1192 |
|
|
else if (target_size < compare_size)
|
1193 |
|
|
select_mask =
|
1194 |
|
|
gen_rtx_SUBREG (mode, compare_result,
|
1195 |
|
|
(compare_size - target_size) / BITS_PER_UNIT);
|
1196 |
|
|
else if (comp_mode != mode)
|
1197 |
|
|
select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
|
1198 |
|
|
else
|
1199 |
|
|
select_mask = compare_result;
|
1200 |
|
|
|
1201 |
|
|
if (GET_MODE (target) != GET_MODE (op_t)
|
1202 |
|
|
|| GET_MODE (target) != GET_MODE (op_f))
|
1203 |
|
|
abort ();
|
1204 |
|
|
|
1205 |
|
|
if (reverse_test)
|
1206 |
|
|
emit_insn (gen_selb (target, op_t, op_f, select_mask));
|
1207 |
|
|
else
|
1208 |
|
|
emit_insn (gen_selb (target, op_f, op_t, select_mask));
|
1209 |
|
|
}
|
1210 |
|
|
else
|
1211 |
|
|
{
|
1212 |
|
|
rtx target = operands[0];
|
1213 |
|
|
if (reverse_test)
|
1214 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, compare_result,
|
1215 |
|
|
gen_rtx_NOT (comp_mode, compare_result)));
|
1216 |
|
|
if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
|
1217 |
|
|
emit_insn (gen_extendhisi2 (target, compare_result));
|
1218 |
|
|
else if (GET_MODE (target) == SImode
|
1219 |
|
|
&& GET_MODE (compare_result) == QImode)
|
1220 |
|
|
emit_insn (gen_extend_compare (target, compare_result));
|
1221 |
|
|
else
|
1222 |
|
|
emit_move_insn (target, compare_result);
|
1223 |
|
|
}
|
1224 |
|
|
}
|
1225 |
|
|
|
1226 |
|
|
HOST_WIDE_INT
|
1227 |
|
|
const_double_to_hwint (rtx x)
|
1228 |
|
|
{
|
1229 |
|
|
HOST_WIDE_INT val;
|
1230 |
|
|
REAL_VALUE_TYPE rv;
|
1231 |
|
|
if (GET_MODE (x) == SFmode)
|
1232 |
|
|
{
|
1233 |
|
|
REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
|
1234 |
|
|
REAL_VALUE_TO_TARGET_SINGLE (rv, val);
|
1235 |
|
|
}
|
1236 |
|
|
else if (GET_MODE (x) == DFmode)
|
1237 |
|
|
{
|
1238 |
|
|
long l[2];
|
1239 |
|
|
REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
|
1240 |
|
|
REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
|
1241 |
|
|
val = l[0];
|
1242 |
|
|
val = (val << 32) | (l[1] & 0xffffffff);
|
1243 |
|
|
}
|
1244 |
|
|
else
|
1245 |
|
|
abort ();
|
1246 |
|
|
return val;
|
1247 |
|
|
}
|
1248 |
|
|
|
1249 |
|
|
rtx
|
1250 |
|
|
hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
|
1251 |
|
|
{
|
1252 |
|
|
long tv[2];
|
1253 |
|
|
REAL_VALUE_TYPE rv;
|
1254 |
|
|
gcc_assert (mode == SFmode || mode == DFmode);
|
1255 |
|
|
|
1256 |
|
|
if (mode == SFmode)
|
1257 |
|
|
tv[0] = (v << 32) >> 32;
|
1258 |
|
|
else if (mode == DFmode)
|
1259 |
|
|
{
|
1260 |
|
|
tv[1] = (v << 32) >> 32;
|
1261 |
|
|
tv[0] = v >> 32;
|
1262 |
|
|
}
|
1263 |
|
|
real_from_target (&rv, tv, mode);
|
1264 |
|
|
return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
|
1265 |
|
|
}
|
1266 |
|
|
|
1267 |
|
|
void
|
1268 |
|
|
print_operand_address (FILE * file, register rtx addr)
|
1269 |
|
|
{
|
1270 |
|
|
rtx reg;
|
1271 |
|
|
rtx offset;
|
1272 |
|
|
|
1273 |
|
|
if (GET_CODE (addr) == AND
|
1274 |
|
|
&& GET_CODE (XEXP (addr, 1)) == CONST_INT
|
1275 |
|
|
&& INTVAL (XEXP (addr, 1)) == -16)
|
1276 |
|
|
addr = XEXP (addr, 0);
|
1277 |
|
|
|
1278 |
|
|
switch (GET_CODE (addr))
|
1279 |
|
|
{
|
1280 |
|
|
case REG:
|
1281 |
|
|
fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
|
1282 |
|
|
break;
|
1283 |
|
|
|
1284 |
|
|
case PLUS:
|
1285 |
|
|
reg = XEXP (addr, 0);
|
1286 |
|
|
offset = XEXP (addr, 1);
|
1287 |
|
|
if (GET_CODE (offset) == REG)
|
1288 |
|
|
{
|
1289 |
|
|
fprintf (file, "%s,%s", reg_names[REGNO (reg)],
|
1290 |
|
|
reg_names[REGNO (offset)]);
|
1291 |
|
|
}
|
1292 |
|
|
else if (GET_CODE (offset) == CONST_INT)
|
1293 |
|
|
{
|
1294 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
|
1295 |
|
|
INTVAL (offset), reg_names[REGNO (reg)]);
|
1296 |
|
|
}
|
1297 |
|
|
else
|
1298 |
|
|
abort ();
|
1299 |
|
|
break;
|
1300 |
|
|
|
1301 |
|
|
case CONST:
|
1302 |
|
|
case LABEL_REF:
|
1303 |
|
|
case SYMBOL_REF:
|
1304 |
|
|
case CONST_INT:
|
1305 |
|
|
output_addr_const (file, addr);
|
1306 |
|
|
break;
|
1307 |
|
|
|
1308 |
|
|
default:
|
1309 |
|
|
debug_rtx (addr);
|
1310 |
|
|
abort ();
|
1311 |
|
|
}
|
1312 |
|
|
}
|
1313 |
|
|
|
1314 |
|
|
void
|
1315 |
|
|
print_operand (FILE * file, rtx x, int code)
|
1316 |
|
|
{
|
1317 |
|
|
enum machine_mode mode = GET_MODE (x);
|
1318 |
|
|
HOST_WIDE_INT val;
|
1319 |
|
|
unsigned char arr[16];
|
1320 |
|
|
int xcode = GET_CODE (x);
|
1321 |
|
|
int i, info;
|
1322 |
|
|
if (GET_MODE (x) == VOIDmode)
|
1323 |
|
|
switch (code)
|
1324 |
|
|
{
|
1325 |
|
|
case 'L': /* 128 bits, signed */
|
1326 |
|
|
case 'm': /* 128 bits, signed */
|
1327 |
|
|
case 'T': /* 128 bits, signed */
|
1328 |
|
|
case 't': /* 128 bits, signed */
|
1329 |
|
|
mode = TImode;
|
1330 |
|
|
break;
|
1331 |
|
|
case 'K': /* 64 bits, signed */
|
1332 |
|
|
case 'k': /* 64 bits, signed */
|
1333 |
|
|
case 'D': /* 64 bits, signed */
|
1334 |
|
|
case 'd': /* 64 bits, signed */
|
1335 |
|
|
mode = DImode;
|
1336 |
|
|
break;
|
1337 |
|
|
case 'J': /* 32 bits, signed */
|
1338 |
|
|
case 'j': /* 32 bits, signed */
|
1339 |
|
|
case 's': /* 32 bits, signed */
|
1340 |
|
|
case 'S': /* 32 bits, signed */
|
1341 |
|
|
mode = SImode;
|
1342 |
|
|
break;
|
1343 |
|
|
}
|
1344 |
|
|
switch (code)
|
1345 |
|
|
{
|
1346 |
|
|
|
1347 |
|
|
case 'j': /* 32 bits, signed */
|
1348 |
|
|
case 'k': /* 64 bits, signed */
|
1349 |
|
|
case 'm': /* 128 bits, signed */
|
1350 |
|
|
if (xcode == CONST_INT
|
1351 |
|
|
|| xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
|
1352 |
|
|
{
|
1353 |
|
|
gcc_assert (logical_immediate_p (x, mode));
|
1354 |
|
|
constant_to_array (mode, x, arr);
|
1355 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
1356 |
|
|
val = trunc_int_for_mode (val, SImode);
|
1357 |
|
|
switch (which_logical_immediate (val))
|
1358 |
|
|
{
|
1359 |
|
|
case SPU_ORI:
|
1360 |
|
|
break;
|
1361 |
|
|
case SPU_ORHI:
|
1362 |
|
|
fprintf (file, "h");
|
1363 |
|
|
break;
|
1364 |
|
|
case SPU_ORBI:
|
1365 |
|
|
fprintf (file, "b");
|
1366 |
|
|
break;
|
1367 |
|
|
default:
|
1368 |
|
|
gcc_unreachable();
|
1369 |
|
|
}
|
1370 |
|
|
}
|
1371 |
|
|
else
|
1372 |
|
|
gcc_unreachable();
|
1373 |
|
|
return;
|
1374 |
|
|
|
1375 |
|
|
case 'J': /* 32 bits, signed */
|
1376 |
|
|
case 'K': /* 64 bits, signed */
|
1377 |
|
|
case 'L': /* 128 bits, signed */
|
1378 |
|
|
if (xcode == CONST_INT
|
1379 |
|
|
|| xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
|
1380 |
|
|
{
|
1381 |
|
|
gcc_assert (logical_immediate_p (x, mode)
|
1382 |
|
|
|| iohl_immediate_p (x, mode));
|
1383 |
|
|
constant_to_array (mode, x, arr);
|
1384 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
1385 |
|
|
val = trunc_int_for_mode (val, SImode);
|
1386 |
|
|
switch (which_logical_immediate (val))
|
1387 |
|
|
{
|
1388 |
|
|
case SPU_ORI:
|
1389 |
|
|
case SPU_IOHL:
|
1390 |
|
|
break;
|
1391 |
|
|
case SPU_ORHI:
|
1392 |
|
|
val = trunc_int_for_mode (val, HImode);
|
1393 |
|
|
break;
|
1394 |
|
|
case SPU_ORBI:
|
1395 |
|
|
val = trunc_int_for_mode (val, QImode);
|
1396 |
|
|
break;
|
1397 |
|
|
default:
|
1398 |
|
|
gcc_unreachable();
|
1399 |
|
|
}
|
1400 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
|
1401 |
|
|
}
|
1402 |
|
|
else
|
1403 |
|
|
gcc_unreachable();
|
1404 |
|
|
return;
|
1405 |
|
|
|
1406 |
|
|
case 't': /* 128 bits, signed */
|
1407 |
|
|
case 'd': /* 64 bits, signed */
|
1408 |
|
|
case 's': /* 32 bits, signed */
|
1409 |
|
|
if (CONSTANT_P (x))
|
1410 |
|
|
{
|
1411 |
|
|
enum immediate_class c = classify_immediate (x, mode);
|
1412 |
|
|
switch (c)
|
1413 |
|
|
{
|
1414 |
|
|
case IC_IL1:
|
1415 |
|
|
constant_to_array (mode, x, arr);
|
1416 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
1417 |
|
|
val = trunc_int_for_mode (val, SImode);
|
1418 |
|
|
switch (which_immediate_load (val))
|
1419 |
|
|
{
|
1420 |
|
|
case SPU_IL:
|
1421 |
|
|
break;
|
1422 |
|
|
case SPU_ILA:
|
1423 |
|
|
fprintf (file, "a");
|
1424 |
|
|
break;
|
1425 |
|
|
case SPU_ILH:
|
1426 |
|
|
fprintf (file, "h");
|
1427 |
|
|
break;
|
1428 |
|
|
case SPU_ILHU:
|
1429 |
|
|
fprintf (file, "hu");
|
1430 |
|
|
break;
|
1431 |
|
|
default:
|
1432 |
|
|
gcc_unreachable ();
|
1433 |
|
|
}
|
1434 |
|
|
break;
|
1435 |
|
|
case IC_CPAT:
|
1436 |
|
|
constant_to_array (mode, x, arr);
|
1437 |
|
|
cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
|
1438 |
|
|
if (info == 1)
|
1439 |
|
|
fprintf (file, "b");
|
1440 |
|
|
else if (info == 2)
|
1441 |
|
|
fprintf (file, "h");
|
1442 |
|
|
else if (info == 4)
|
1443 |
|
|
fprintf (file, "w");
|
1444 |
|
|
else if (info == 8)
|
1445 |
|
|
fprintf (file, "d");
|
1446 |
|
|
break;
|
1447 |
|
|
case IC_IL1s:
|
1448 |
|
|
if (xcode == CONST_VECTOR)
|
1449 |
|
|
{
|
1450 |
|
|
x = CONST_VECTOR_ELT (x, 0);
|
1451 |
|
|
xcode = GET_CODE (x);
|
1452 |
|
|
}
|
1453 |
|
|
if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
|
1454 |
|
|
fprintf (file, "a");
|
1455 |
|
|
else if (xcode == HIGH)
|
1456 |
|
|
fprintf (file, "hu");
|
1457 |
|
|
break;
|
1458 |
|
|
case IC_FSMBI:
|
1459 |
|
|
case IC_FSMBI2:
|
1460 |
|
|
case IC_IL2:
|
1461 |
|
|
case IC_IL2s:
|
1462 |
|
|
case IC_POOL:
|
1463 |
|
|
abort ();
|
1464 |
|
|
}
|
1465 |
|
|
}
|
1466 |
|
|
else
|
1467 |
|
|
gcc_unreachable ();
|
1468 |
|
|
return;
|
1469 |
|
|
|
1470 |
|
|
case 'T': /* 128 bits, signed */
|
1471 |
|
|
case 'D': /* 64 bits, signed */
|
1472 |
|
|
case 'S': /* 32 bits, signed */
|
1473 |
|
|
if (CONSTANT_P (x))
|
1474 |
|
|
{
|
1475 |
|
|
enum immediate_class c = classify_immediate (x, mode);
|
1476 |
|
|
switch (c)
|
1477 |
|
|
{
|
1478 |
|
|
case IC_IL1:
|
1479 |
|
|
constant_to_array (mode, x, arr);
|
1480 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
1481 |
|
|
val = trunc_int_for_mode (val, SImode);
|
1482 |
|
|
switch (which_immediate_load (val))
|
1483 |
|
|
{
|
1484 |
|
|
case SPU_IL:
|
1485 |
|
|
case SPU_ILA:
|
1486 |
|
|
break;
|
1487 |
|
|
case SPU_ILH:
|
1488 |
|
|
case SPU_ILHU:
|
1489 |
|
|
val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
|
1490 |
|
|
break;
|
1491 |
|
|
default:
|
1492 |
|
|
gcc_unreachable ();
|
1493 |
|
|
}
|
1494 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
|
1495 |
|
|
break;
|
1496 |
|
|
case IC_FSMBI:
|
1497 |
|
|
constant_to_array (mode, x, arr);
|
1498 |
|
|
val = 0;
|
1499 |
|
|
for (i = 0; i < 16; i++)
|
1500 |
|
|
{
|
1501 |
|
|
val <<= 1;
|
1502 |
|
|
val |= arr[i] & 1;
|
1503 |
|
|
}
|
1504 |
|
|
print_operand (file, GEN_INT (val), 0);
|
1505 |
|
|
break;
|
1506 |
|
|
case IC_CPAT:
|
1507 |
|
|
constant_to_array (mode, x, arr);
|
1508 |
|
|
cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
|
1509 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
|
1510 |
|
|
break;
|
1511 |
|
|
case IC_IL1s:
|
1512 |
|
|
if (xcode == HIGH)
|
1513 |
|
|
x = XEXP (x, 0);
|
1514 |
|
|
if (GET_CODE (x) == CONST_VECTOR)
|
1515 |
|
|
x = CONST_VECTOR_ELT (x, 0);
|
1516 |
|
|
output_addr_const (file, x);
|
1517 |
|
|
if (xcode == HIGH)
|
1518 |
|
|
fprintf (file, "@h");
|
1519 |
|
|
break;
|
1520 |
|
|
case IC_IL2:
|
1521 |
|
|
case IC_IL2s:
|
1522 |
|
|
case IC_FSMBI2:
|
1523 |
|
|
case IC_POOL:
|
1524 |
|
|
abort ();
|
1525 |
|
|
}
|
1526 |
|
|
}
|
1527 |
|
|
else
|
1528 |
|
|
gcc_unreachable ();
|
1529 |
|
|
return;
|
1530 |
|
|
|
1531 |
|
|
case 'C':
|
1532 |
|
|
if (xcode == CONST_INT)
|
1533 |
|
|
{
|
1534 |
|
|
/* Only 4 least significant bits are relevant for generate
|
1535 |
|
|
control word instructions. */
|
1536 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
|
1537 |
|
|
return;
|
1538 |
|
|
}
|
1539 |
|
|
break;
|
1540 |
|
|
|
1541 |
|
|
case 'M': /* print code for c*d */
|
1542 |
|
|
if (GET_CODE (x) == CONST_INT)
|
1543 |
|
|
switch (INTVAL (x))
|
1544 |
|
|
{
|
1545 |
|
|
case 1:
|
1546 |
|
|
fprintf (file, "b");
|
1547 |
|
|
break;
|
1548 |
|
|
case 2:
|
1549 |
|
|
fprintf (file, "h");
|
1550 |
|
|
break;
|
1551 |
|
|
case 4:
|
1552 |
|
|
fprintf (file, "w");
|
1553 |
|
|
break;
|
1554 |
|
|
case 8:
|
1555 |
|
|
fprintf (file, "d");
|
1556 |
|
|
break;
|
1557 |
|
|
default:
|
1558 |
|
|
gcc_unreachable();
|
1559 |
|
|
}
|
1560 |
|
|
else
|
1561 |
|
|
gcc_unreachable();
|
1562 |
|
|
return;
|
1563 |
|
|
|
1564 |
|
|
case 'N': /* Negate the operand */
|
1565 |
|
|
if (xcode == CONST_INT)
|
1566 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
|
1567 |
|
|
else if (xcode == CONST_VECTOR)
|
1568 |
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
|
1569 |
|
|
-INTVAL (CONST_VECTOR_ELT (x, 0)));
|
1570 |
|
|
return;
|
1571 |
|
|
|
1572 |
|
|
case 'I': /* enable/disable interrupts */
|
1573 |
|
|
if (xcode == CONST_INT)
|
1574 |
|
|
fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
|
1575 |
|
|
return;
|
1576 |
|
|
|
1577 |
|
|
case 'b': /* branch modifiers */
|
1578 |
|
|
if (xcode == REG)
|
1579 |
|
|
fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
|
1580 |
|
|
else if (COMPARISON_P (x))
|
1581 |
|
|
fprintf (file, "%s", xcode == NE ? "n" : "");
|
1582 |
|
|
return;
|
1583 |
|
|
|
1584 |
|
|
case 'i': /* indirect call */
|
1585 |
|
|
if (xcode == MEM)
|
1586 |
|
|
{
|
1587 |
|
|
if (GET_CODE (XEXP (x, 0)) == REG)
|
1588 |
|
|
/* Used in indirect function calls. */
|
1589 |
|
|
fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
|
1590 |
|
|
else
|
1591 |
|
|
output_address (XEXP (x, 0));
|
1592 |
|
|
}
|
1593 |
|
|
return;
|
1594 |
|
|
|
1595 |
|
|
case 'p': /* load/store */
|
1596 |
|
|
if (xcode == MEM)
|
1597 |
|
|
{
|
1598 |
|
|
x = XEXP (x, 0);
|
1599 |
|
|
xcode = GET_CODE (x);
|
1600 |
|
|
}
|
1601 |
|
|
if (xcode == AND)
|
1602 |
|
|
{
|
1603 |
|
|
x = XEXP (x, 0);
|
1604 |
|
|
xcode = GET_CODE (x);
|
1605 |
|
|
}
|
1606 |
|
|
if (xcode == REG)
|
1607 |
|
|
fprintf (file, "d");
|
1608 |
|
|
else if (xcode == CONST_INT)
|
1609 |
|
|
fprintf (file, "a");
|
1610 |
|
|
else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
|
1611 |
|
|
fprintf (file, "r");
|
1612 |
|
|
else if (xcode == PLUS || xcode == LO_SUM)
|
1613 |
|
|
{
|
1614 |
|
|
if (GET_CODE (XEXP (x, 1)) == REG)
|
1615 |
|
|
fprintf (file, "x");
|
1616 |
|
|
else
|
1617 |
|
|
fprintf (file, "d");
|
1618 |
|
|
}
|
1619 |
|
|
return;
|
1620 |
|
|
|
1621 |
|
|
case 'e':
|
1622 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1623 |
|
|
val &= 0x7;
|
1624 |
|
|
output_addr_const (file, GEN_INT (val));
|
1625 |
|
|
return;
|
1626 |
|
|
|
1627 |
|
|
case 'f':
|
1628 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1629 |
|
|
val &= 0x1f;
|
1630 |
|
|
output_addr_const (file, GEN_INT (val));
|
1631 |
|
|
return;
|
1632 |
|
|
|
1633 |
|
|
case 'g':
|
1634 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1635 |
|
|
val &= 0x3f;
|
1636 |
|
|
output_addr_const (file, GEN_INT (val));
|
1637 |
|
|
return;
|
1638 |
|
|
|
1639 |
|
|
case 'h':
|
1640 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1641 |
|
|
val = (val >> 3) & 0x1f;
|
1642 |
|
|
output_addr_const (file, GEN_INT (val));
|
1643 |
|
|
return;
|
1644 |
|
|
|
1645 |
|
|
case 'E':
|
1646 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1647 |
|
|
val = -val;
|
1648 |
|
|
val &= 0x7;
|
1649 |
|
|
output_addr_const (file, GEN_INT (val));
|
1650 |
|
|
return;
|
1651 |
|
|
|
1652 |
|
|
case 'F':
|
1653 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1654 |
|
|
val = -val;
|
1655 |
|
|
val &= 0x1f;
|
1656 |
|
|
output_addr_const (file, GEN_INT (val));
|
1657 |
|
|
return;
|
1658 |
|
|
|
1659 |
|
|
case 'G':
|
1660 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1661 |
|
|
val = -val;
|
1662 |
|
|
val &= 0x3f;
|
1663 |
|
|
output_addr_const (file, GEN_INT (val));
|
1664 |
|
|
return;
|
1665 |
|
|
|
1666 |
|
|
case 'H':
|
1667 |
|
|
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
|
1668 |
|
|
val = -(val & -8ll);
|
1669 |
|
|
val = (val >> 3) & 0x1f;
|
1670 |
|
|
output_addr_const (file, GEN_INT (val));
|
1671 |
|
|
return;
|
1672 |
|
|
|
1673 |
|
|
case 'v':
|
1674 |
|
|
case 'w':
|
1675 |
|
|
constant_to_array (mode, x, arr);
|
1676 |
|
|
val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
|
1677 |
|
|
output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
|
1678 |
|
|
return;
|
1679 |
|
|
|
1680 |
|
|
case 0:
|
1681 |
|
|
if (xcode == REG)
|
1682 |
|
|
fprintf (file, "%s", reg_names[REGNO (x)]);
|
1683 |
|
|
else if (xcode == MEM)
|
1684 |
|
|
output_address (XEXP (x, 0));
|
1685 |
|
|
else if (xcode == CONST_VECTOR)
|
1686 |
|
|
print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
|
1687 |
|
|
else
|
1688 |
|
|
output_addr_const (file, x);
|
1689 |
|
|
return;
|
1690 |
|
|
|
1691 |
|
|
/* unused letters
|
1692 |
|
|
o qr u yz
|
1693 |
|
|
AB OPQR UVWXYZ */
|
1694 |
|
|
default:
|
1695 |
|
|
output_operand_lossage ("invalid %%xn code");
|
1696 |
|
|
}
|
1697 |
|
|
gcc_unreachable ();
|
1698 |
|
|
}
|
1699 |
|
|
|
1700 |
|
|
extern char call_used_regs[];
|
1701 |
|
|
|
1702 |
|
|
/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
|
1703 |
|
|
caller saved register. For leaf functions it is more efficient to
|
1704 |
|
|
use a volatile register because we won't need to save and restore the
|
1705 |
|
|
pic register. This routine is only valid after register allocation
|
1706 |
|
|
is completed, so we can pick an unused register. */
|
1707 |
|
|
static rtx
|
1708 |
|
|
get_pic_reg (void)
|
1709 |
|
|
{
|
1710 |
|
|
rtx pic_reg = pic_offset_table_rtx;
|
1711 |
|
|
if (!reload_completed && !reload_in_progress)
|
1712 |
|
|
abort ();
|
1713 |
|
|
if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
|
1714 |
|
|
pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
|
1715 |
|
|
return pic_reg;
|
1716 |
|
|
}
|
1717 |
|
|
|
1718 |
|
|
/* Split constant addresses to handle cases that are too large.
|
1719 |
|
|
Add in the pic register when in PIC mode.
|
1720 |
|
|
Split immediates that require more than 1 instruction. */
|
1721 |
|
|
int
|
1722 |
|
|
spu_split_immediate (rtx * ops)
|
1723 |
|
|
{
|
1724 |
|
|
enum machine_mode mode = GET_MODE (ops[0]);
|
1725 |
|
|
enum immediate_class c = classify_immediate (ops[1], mode);
|
1726 |
|
|
|
1727 |
|
|
switch (c)
|
1728 |
|
|
{
|
1729 |
|
|
case IC_IL2:
|
1730 |
|
|
{
|
1731 |
|
|
unsigned char arrhi[16];
|
1732 |
|
|
unsigned char arrlo[16];
|
1733 |
|
|
rtx to, temp, hi, lo;
|
1734 |
|
|
int i;
|
1735 |
|
|
enum machine_mode imode = mode;
|
1736 |
|
|
/* We need to do reals as ints because the constant used in the
|
1737 |
|
|
IOR might not be a legitimate real constant. */
|
1738 |
|
|
imode = int_mode_for_mode (mode);
|
1739 |
|
|
constant_to_array (mode, ops[1], arrhi);
|
1740 |
|
|
if (imode != mode)
|
1741 |
|
|
to = simplify_gen_subreg (imode, ops[0], mode, 0);
|
1742 |
|
|
else
|
1743 |
|
|
to = ops[0];
|
1744 |
|
|
temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
|
1745 |
|
|
for (i = 0; i < 16; i += 4)
|
1746 |
|
|
{
|
1747 |
|
|
arrlo[i + 2] = arrhi[i + 2];
|
1748 |
|
|
arrlo[i + 3] = arrhi[i + 3];
|
1749 |
|
|
arrlo[i + 0] = arrlo[i + 1] = 0;
|
1750 |
|
|
arrhi[i + 2] = arrhi[i + 3] = 0;
|
1751 |
|
|
}
|
1752 |
|
|
hi = array_to_constant (imode, arrhi);
|
1753 |
|
|
lo = array_to_constant (imode, arrlo);
|
1754 |
|
|
emit_move_insn (temp, hi);
|
1755 |
|
|
emit_insn (gen_rtx_SET
|
1756 |
|
|
(VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
|
1757 |
|
|
return 1;
|
1758 |
|
|
}
|
1759 |
|
|
case IC_FSMBI2:
|
1760 |
|
|
{
|
1761 |
|
|
unsigned char arr_fsmbi[16];
|
1762 |
|
|
unsigned char arr_andbi[16];
|
1763 |
|
|
rtx to, reg_fsmbi, reg_and;
|
1764 |
|
|
int i;
|
1765 |
|
|
enum machine_mode imode = mode;
|
1766 |
|
|
/* We need to do reals as ints because the constant used in the
|
1767 |
|
|
* AND might not be a legitimate real constant. */
|
1768 |
|
|
imode = int_mode_for_mode (mode);
|
1769 |
|
|
constant_to_array (mode, ops[1], arr_fsmbi);
|
1770 |
|
|
if (imode != mode)
|
1771 |
|
|
to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
|
1772 |
|
|
else
|
1773 |
|
|
to = ops[0];
|
1774 |
|
|
for (i = 0; i < 16; i++)
|
1775 |
|
|
if (arr_fsmbi[i] != 0)
|
1776 |
|
|
{
|
1777 |
|
|
arr_andbi[0] = arr_fsmbi[i];
|
1778 |
|
|
arr_fsmbi[i] = 0xff;
|
1779 |
|
|
}
|
1780 |
|
|
for (i = 1; i < 16; i++)
|
1781 |
|
|
arr_andbi[i] = arr_andbi[0];
|
1782 |
|
|
reg_fsmbi = array_to_constant (imode, arr_fsmbi);
|
1783 |
|
|
reg_and = array_to_constant (imode, arr_andbi);
|
1784 |
|
|
emit_move_insn (to, reg_fsmbi);
|
1785 |
|
|
emit_insn (gen_rtx_SET
|
1786 |
|
|
(VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
|
1787 |
|
|
return 1;
|
1788 |
|
|
}
|
1789 |
|
|
case IC_POOL:
|
1790 |
|
|
if (reload_in_progress || reload_completed)
|
1791 |
|
|
{
|
1792 |
|
|
rtx mem = force_const_mem (mode, ops[1]);
|
1793 |
|
|
if (TARGET_LARGE_MEM)
|
1794 |
|
|
{
|
1795 |
|
|
rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
|
1796 |
|
|
emit_move_insn (addr, XEXP (mem, 0));
|
1797 |
|
|
mem = replace_equiv_address (mem, addr);
|
1798 |
|
|
}
|
1799 |
|
|
emit_move_insn (ops[0], mem);
|
1800 |
|
|
return 1;
|
1801 |
|
|
}
|
1802 |
|
|
break;
|
1803 |
|
|
case IC_IL1s:
|
1804 |
|
|
case IC_IL2s:
|
1805 |
|
|
if (reload_completed && GET_CODE (ops[1]) != HIGH)
|
1806 |
|
|
{
|
1807 |
|
|
if (c == IC_IL2s)
|
1808 |
|
|
{
|
1809 |
|
|
emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
|
1810 |
|
|
emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
|
1811 |
|
|
}
|
1812 |
|
|
else if (flag_pic)
|
1813 |
|
|
emit_insn (gen_pic (ops[0], ops[1]));
|
1814 |
|
|
if (flag_pic)
|
1815 |
|
|
{
|
1816 |
|
|
rtx pic_reg = get_pic_reg ();
|
1817 |
|
|
emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
|
1818 |
|
|
crtl->uses_pic_offset_table = 1;
|
1819 |
|
|
}
|
1820 |
|
|
return flag_pic || c == IC_IL2s;
|
1821 |
|
|
}
|
1822 |
|
|
break;
|
1823 |
|
|
case IC_IL1:
|
1824 |
|
|
case IC_FSMBI:
|
1825 |
|
|
case IC_CPAT:
|
1826 |
|
|
break;
|
1827 |
|
|
}
|
1828 |
|
|
return 0;
|
1829 |
|
|
}
|
1830 |
|
|
|
1831 |
|
|
/* SAVING is TRUE when we are generating the actual load and store
|
1832 |
|
|
instructions for REGNO. When determining the size of the stack
|
1833 |
|
|
needed for saving register we must allocate enough space for the
|
1834 |
|
|
worst case, because we don't always have the information early enough
|
1835 |
|
|
to not allocate it. But we can at least eliminate the actual loads
|
1836 |
|
|
and stores during the prologue/epilogue. */
|
1837 |
|
|
static int
|
1838 |
|
|
need_to_save_reg (int regno, int saving)
|
1839 |
|
|
{
|
1840 |
|
|
if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
|
1841 |
|
|
return 1;
|
1842 |
|
|
if (flag_pic
|
1843 |
|
|
&& regno == PIC_OFFSET_TABLE_REGNUM
|
1844 |
|
|
&& (!saving || crtl->uses_pic_offset_table)
|
1845 |
|
|
&& (!saving
|
1846 |
|
|
|| !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
|
1847 |
|
|
return 1;
|
1848 |
|
|
return 0;
|
1849 |
|
|
}
|
1850 |
|
|
|
1851 |
|
|
/* This function is only correct starting with local register
|
1852 |
|
|
allocation */
|
1853 |
|
|
int
|
1854 |
|
|
spu_saved_regs_size (void)
|
1855 |
|
|
{
|
1856 |
|
|
int reg_save_size = 0;
|
1857 |
|
|
int regno;
|
1858 |
|
|
|
1859 |
|
|
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
|
1860 |
|
|
if (need_to_save_reg (regno, 0))
|
1861 |
|
|
reg_save_size += 0x10;
|
1862 |
|
|
return reg_save_size;
|
1863 |
|
|
}
|
1864 |
|
|
|
1865 |
|
|
static rtx
|
1866 |
|
|
frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
|
1867 |
|
|
{
|
1868 |
|
|
rtx reg = gen_rtx_REG (V4SImode, regno);
|
1869 |
|
|
rtx mem =
|
1870 |
|
|
gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
|
1871 |
|
|
return emit_insn (gen_movv4si (mem, reg));
|
1872 |
|
|
}
|
1873 |
|
|
|
1874 |
|
|
static rtx
|
1875 |
|
|
frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
|
1876 |
|
|
{
|
1877 |
|
|
rtx reg = gen_rtx_REG (V4SImode, regno);
|
1878 |
|
|
rtx mem =
|
1879 |
|
|
gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
|
1880 |
|
|
return emit_insn (gen_movv4si (reg, mem));
|
1881 |
|
|
}
|
1882 |
|
|
|
1883 |
|
|
/* This happens after reload, so we need to expand it. */
|
1884 |
|
|
static rtx
|
1885 |
|
|
frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
|
1886 |
|
|
{
|
1887 |
|
|
rtx insn;
|
1888 |
|
|
if (satisfies_constraint_K (GEN_INT (imm)))
|
1889 |
|
|
{
|
1890 |
|
|
insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
|
1891 |
|
|
}
|
1892 |
|
|
else
|
1893 |
|
|
{
|
1894 |
|
|
emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
|
1895 |
|
|
insn = emit_insn (gen_addsi3 (dst, src, scratch));
|
1896 |
|
|
if (REGNO (src) == REGNO (scratch))
|
1897 |
|
|
abort ();
|
1898 |
|
|
}
|
1899 |
|
|
return insn;
|
1900 |
|
|
}
|
1901 |
|
|
|
1902 |
|
|
/* Return nonzero if this function is known to have a null epilogue. */
|
1903 |
|
|
|
1904 |
|
|
int
|
1905 |
|
|
direct_return (void)
|
1906 |
|
|
{
|
1907 |
|
|
if (reload_completed)
|
1908 |
|
|
{
|
1909 |
|
|
if (cfun->static_chain_decl == 0
|
1910 |
|
|
&& (spu_saved_regs_size ()
|
1911 |
|
|
+ get_frame_size ()
|
1912 |
|
|
+ crtl->outgoing_args_size
|
1913 |
|
|
+ crtl->args.pretend_args_size == 0)
|
1914 |
|
|
&& current_function_is_leaf)
|
1915 |
|
|
return 1;
|
1916 |
|
|
}
|
1917 |
|
|
return 0;
|
1918 |
|
|
}
|
1919 |
|
|
|
1920 |
|
|
/*
|
1921 |
|
|
The stack frame looks like this:
|
1922 |
|
|
+-------------+
|
1923 |
|
|
| incoming |
|
1924 |
|
|
| args |
|
1925 |
|
|
AP -> +-------------+
|
1926 |
|
|
| $lr save |
|
1927 |
|
|
+-------------+
|
1928 |
|
|
prev SP | back chain |
|
1929 |
|
|
+-------------+
|
1930 |
|
|
| var args |
|
1931 |
|
|
| reg save | crtl->args.pretend_args_size bytes
|
1932 |
|
|
+-------------+
|
1933 |
|
|
| ... |
|
1934 |
|
|
| saved regs | spu_saved_regs_size() bytes
|
1935 |
|
|
FP -> +-------------+
|
1936 |
|
|
| ... |
|
1937 |
|
|
| vars | get_frame_size() bytes
|
1938 |
|
|
HFP -> +-------------+
|
1939 |
|
|
| ... |
|
1940 |
|
|
| outgoing |
|
1941 |
|
|
| args | crtl->outgoing_args_size bytes
|
1942 |
|
|
+-------------+
|
1943 |
|
|
| $lr of next |
|
1944 |
|
|
| frame |
|
1945 |
|
|
+-------------+
|
1946 |
|
|
| back chain |
|
1947 |
|
|
SP -> +-------------+
|
1948 |
|
|
|
1949 |
|
|
*/
|
1950 |
|
|
void
|
1951 |
|
|
spu_expand_prologue (void)
|
1952 |
|
|
{
|
1953 |
|
|
HOST_WIDE_INT size = get_frame_size (), offset, regno;
|
1954 |
|
|
HOST_WIDE_INT total_size;
|
1955 |
|
|
HOST_WIDE_INT saved_regs_size;
|
1956 |
|
|
rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
|
1957 |
|
|
rtx scratch_reg_0, scratch_reg_1;
|
1958 |
|
|
rtx insn, real;
|
1959 |
|
|
|
1960 |
|
|
if (flag_pic && optimize == 0)
|
1961 |
|
|
crtl->uses_pic_offset_table = 1;
|
1962 |
|
|
|
1963 |
|
|
if (spu_naked_function_p (current_function_decl))
|
1964 |
|
|
return;
|
1965 |
|
|
|
1966 |
|
|
scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
|
1967 |
|
|
scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
|
1968 |
|
|
|
1969 |
|
|
saved_regs_size = spu_saved_regs_size ();
|
1970 |
|
|
total_size = size + saved_regs_size
|
1971 |
|
|
+ crtl->outgoing_args_size
|
1972 |
|
|
+ crtl->args.pretend_args_size;
|
1973 |
|
|
|
1974 |
|
|
if (!current_function_is_leaf
|
1975 |
|
|
|| cfun->calls_alloca || total_size > 0)
|
1976 |
|
|
total_size += STACK_POINTER_OFFSET;
|
1977 |
|
|
|
1978 |
|
|
/* Save this first because code after this might use the link
|
1979 |
|
|
register as a scratch register. */
|
1980 |
|
|
if (!current_function_is_leaf)
|
1981 |
|
|
{
|
1982 |
|
|
insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
|
1983 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
1984 |
|
|
}
|
1985 |
|
|
|
1986 |
|
|
if (total_size > 0)
|
1987 |
|
|
{
|
1988 |
|
|
offset = -crtl->args.pretend_args_size;
|
1989 |
|
|
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
|
1990 |
|
|
if (need_to_save_reg (regno, 1))
|
1991 |
|
|
{
|
1992 |
|
|
offset -= 16;
|
1993 |
|
|
insn = frame_emit_store (regno, sp_reg, offset);
|
1994 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
1995 |
|
|
}
|
1996 |
|
|
}
|
1997 |
|
|
|
1998 |
|
|
if (flag_pic && crtl->uses_pic_offset_table)
|
1999 |
|
|
{
|
2000 |
|
|
rtx pic_reg = get_pic_reg ();
|
2001 |
|
|
insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
|
2002 |
|
|
insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
|
2003 |
|
|
}
|
2004 |
|
|
|
2005 |
|
|
if (total_size > 0)
|
2006 |
|
|
{
|
2007 |
|
|
if (flag_stack_check)
|
2008 |
|
|
{
|
2009 |
|
|
/* We compare against total_size-1 because
|
2010 |
|
|
($sp >= total_size) <=> ($sp > total_size-1) */
|
2011 |
|
|
rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
|
2012 |
|
|
rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
|
2013 |
|
|
rtx size_v4si = spu_const (V4SImode, total_size - 1);
|
2014 |
|
|
if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
|
2015 |
|
|
{
|
2016 |
|
|
emit_move_insn (scratch_v4si, size_v4si);
|
2017 |
|
|
size_v4si = scratch_v4si;
|
2018 |
|
|
}
|
2019 |
|
|
emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
|
2020 |
|
|
emit_insn (gen_vec_extractv4si
|
2021 |
|
|
(scratch_reg_0, scratch_v4si, GEN_INT (1)));
|
2022 |
|
|
emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
|
2023 |
|
|
}
|
2024 |
|
|
|
2025 |
|
|
/* Adjust the stack pointer, and make sure scratch_reg_0 contains
|
2026 |
|
|
the value of the previous $sp because we save it as the back
|
2027 |
|
|
chain. */
|
2028 |
|
|
if (total_size <= 2000)
|
2029 |
|
|
{
|
2030 |
|
|
/* In this case we save the back chain first. */
|
2031 |
|
|
insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
|
2032 |
|
|
insn =
|
2033 |
|
|
frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
|
2034 |
|
|
}
|
2035 |
|
|
else
|
2036 |
|
|
{
|
2037 |
|
|
insn = emit_move_insn (scratch_reg_0, sp_reg);
|
2038 |
|
|
insn =
|
2039 |
|
|
frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
|
2040 |
|
|
}
|
2041 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
2042 |
|
|
real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
|
2043 |
|
|
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
|
2044 |
|
|
|
2045 |
|
|
if (total_size > 2000)
|
2046 |
|
|
{
|
2047 |
|
|
/* Save the back chain ptr */
|
2048 |
|
|
insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
|
2049 |
|
|
}
|
2050 |
|
|
|
2051 |
|
|
if (frame_pointer_needed)
|
2052 |
|
|
{
|
2053 |
|
|
rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
|
2054 |
|
|
HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
|
2055 |
|
|
+ crtl->outgoing_args_size;
|
2056 |
|
|
/* Set the new frame_pointer */
|
2057 |
|
|
insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
|
2058 |
|
|
RTX_FRAME_RELATED_P (insn) = 1;
|
2059 |
|
|
real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
|
2060 |
|
|
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
|
2061 |
|
|
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
|
2062 |
|
|
}
|
2063 |
|
|
}
|
2064 |
|
|
|
2065 |
|
|
}
|
2066 |
|
|
|
2067 |
|
|
void
|
2068 |
|
|
spu_expand_epilogue (bool sibcall_p)
|
2069 |
|
|
{
|
2070 |
|
|
int size = get_frame_size (), offset, regno;
|
2071 |
|
|
HOST_WIDE_INT saved_regs_size, total_size;
|
2072 |
|
|
rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
|
2073 |
|
|
rtx jump, scratch_reg_0;
|
2074 |
|
|
|
2075 |
|
|
if (spu_naked_function_p (current_function_decl))
|
2076 |
|
|
return;
|
2077 |
|
|
|
2078 |
|
|
scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
|
2079 |
|
|
|
2080 |
|
|
saved_regs_size = spu_saved_regs_size ();
|
2081 |
|
|
total_size = size + saved_regs_size
|
2082 |
|
|
+ crtl->outgoing_args_size
|
2083 |
|
|
+ crtl->args.pretend_args_size;
|
2084 |
|
|
|
2085 |
|
|
if (!current_function_is_leaf
|
2086 |
|
|
|| cfun->calls_alloca || total_size > 0)
|
2087 |
|
|
total_size += STACK_POINTER_OFFSET;
|
2088 |
|
|
|
2089 |
|
|
if (total_size > 0)
|
2090 |
|
|
{
|
2091 |
|
|
if (cfun->calls_alloca)
|
2092 |
|
|
frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
|
2093 |
|
|
else
|
2094 |
|
|
frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
|
2095 |
|
|
|
2096 |
|
|
|
2097 |
|
|
if (saved_regs_size > 0)
|
2098 |
|
|
{
|
2099 |
|
|
offset = -crtl->args.pretend_args_size;
|
2100 |
|
|
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
|
2101 |
|
|
if (need_to_save_reg (regno, 1))
|
2102 |
|
|
{
|
2103 |
|
|
offset -= 0x10;
|
2104 |
|
|
frame_emit_load (regno, sp_reg, offset);
|
2105 |
|
|
}
|
2106 |
|
|
}
|
2107 |
|
|
}
|
2108 |
|
|
|
2109 |
|
|
if (!current_function_is_leaf)
|
2110 |
|
|
frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
|
2111 |
|
|
|
2112 |
|
|
if (!sibcall_p)
|
2113 |
|
|
{
|
2114 |
|
|
emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
|
2115 |
|
|
jump = emit_jump_insn (gen__return ());
|
2116 |
|
|
emit_barrier_after (jump);
|
2117 |
|
|
}
|
2118 |
|
|
|
2119 |
|
|
}
|
2120 |
|
|
|
2121 |
|
|
rtx
|
2122 |
|
|
spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
|
2123 |
|
|
{
|
2124 |
|
|
if (count != 0)
|
2125 |
|
|
return 0;
|
2126 |
|
|
/* This is inefficient because it ends up copying to a save-register
|
2127 |
|
|
which then gets saved even though $lr has already been saved. But
|
2128 |
|
|
it does generate better code for leaf functions and we don't need
|
2129 |
|
|
to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
|
2130 |
|
|
used for __builtin_return_address anyway, so maybe we don't care if
|
2131 |
|
|
it's inefficient. */
|
2132 |
|
|
return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
|
2133 |
|
|
}
|
2134 |
|
|
|
2135 |
|
|
|
2136 |
|
|
/* Given VAL, generate a constant appropriate for MODE.
|
2137 |
|
|
If MODE is a vector mode, every element will be VAL.
|
2138 |
|
|
For TImode, VAL will be zero extended to 128 bits. */
|
2139 |
|
|
rtx
|
2140 |
|
|
spu_const (enum machine_mode mode, HOST_WIDE_INT val)
|
2141 |
|
|
{
|
2142 |
|
|
rtx inner;
|
2143 |
|
|
rtvec v;
|
2144 |
|
|
int units, i;
|
2145 |
|
|
|
2146 |
|
|
gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
|
2147 |
|
|
|| GET_MODE_CLASS (mode) == MODE_FLOAT
|
2148 |
|
|
|| GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
2149 |
|
|
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
|
2150 |
|
|
|
2151 |
|
|
if (GET_MODE_CLASS (mode) == MODE_INT)
|
2152 |
|
|
return immed_double_const (val, 0, mode);
|
2153 |
|
|
|
2154 |
|
|
/* val is the bit representation of the float */
|
2155 |
|
|
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
2156 |
|
|
return hwint_to_const_double (mode, val);
|
2157 |
|
|
|
2158 |
|
|
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
2159 |
|
|
inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
|
2160 |
|
|
else
|
2161 |
|
|
inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
|
2162 |
|
|
|
2163 |
|
|
units = GET_MODE_NUNITS (mode);
|
2164 |
|
|
|
2165 |
|
|
v = rtvec_alloc (units);
|
2166 |
|
|
|
2167 |
|
|
for (i = 0; i < units; ++i)
|
2168 |
|
|
RTVEC_ELT (v, i) = inner;
|
2169 |
|
|
|
2170 |
|
|
return gen_rtx_CONST_VECTOR (mode, v);
|
2171 |
|
|
}
|
2172 |
|
|
|
2173 |
|
|
/* Create a MODE vector constant from 4 ints. */
|
2174 |
|
|
rtx
|
2175 |
|
|
spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
|
2176 |
|
|
{
|
2177 |
|
|
unsigned char arr[16];
|
2178 |
|
|
arr[0] = (a >> 24) & 0xff;
|
2179 |
|
|
arr[1] = (a >> 16) & 0xff;
|
2180 |
|
|
arr[2] = (a >> 8) & 0xff;
|
2181 |
|
|
arr[3] = (a >> 0) & 0xff;
|
2182 |
|
|
arr[4] = (b >> 24) & 0xff;
|
2183 |
|
|
arr[5] = (b >> 16) & 0xff;
|
2184 |
|
|
arr[6] = (b >> 8) & 0xff;
|
2185 |
|
|
arr[7] = (b >> 0) & 0xff;
|
2186 |
|
|
arr[8] = (c >> 24) & 0xff;
|
2187 |
|
|
arr[9] = (c >> 16) & 0xff;
|
2188 |
|
|
arr[10] = (c >> 8) & 0xff;
|
2189 |
|
|
arr[11] = (c >> 0) & 0xff;
|
2190 |
|
|
arr[12] = (d >> 24) & 0xff;
|
2191 |
|
|
arr[13] = (d >> 16) & 0xff;
|
2192 |
|
|
arr[14] = (d >> 8) & 0xff;
|
2193 |
|
|
arr[15] = (d >> 0) & 0xff;
|
2194 |
|
|
return array_to_constant(mode, arr);
|
2195 |
|
|
}
|
2196 |
|
|
|
2197 |
|
|
/* branch hint stuff */
|
2198 |
|
|
|
2199 |
|
|
/* An array of these is used to propagate hints to predecessor blocks. */
|
2200 |
|
|
struct spu_bb_info
|
2201 |
|
|
{
|
2202 |
|
|
rtx prop_jump; /* propagated from another block */
|
2203 |
|
|
int bb_index; /* the original block. */
|
2204 |
|
|
};
|
2205 |
|
|
static struct spu_bb_info *spu_bb_info;
|
2206 |
|
|
|
2207 |
|
|
#define STOP_HINT_P(INSN) \
|
2208 |
|
|
(GET_CODE(INSN) == CALL_INSN \
|
2209 |
|
|
|| INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
|
2210 |
|
|
|| INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
|
2211 |
|
|
|
2212 |
|
|
/* 1 when RTX is a hinted branch or its target. We keep track of
|
2213 |
|
|
what has been hinted so the safe-hint code can test it easily. */
|
2214 |
|
|
#define HINTED_P(RTX) \
|
2215 |
|
|
(RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
|
2216 |
|
|
|
2217 |
|
|
/* 1 when RTX is an insn that must be scheduled on an even boundary. */
|
2218 |
|
|
#define SCHED_ON_EVEN_P(RTX) \
|
2219 |
|
|
(RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
|
2220 |
|
|
|
2221 |
|
|
/* Emit a nop for INSN such that the two will dual issue. This assumes
|
2222 |
|
|
INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
|
2223 |
|
|
We check for TImode to handle a MULTI1 insn which has dual issued its
|
2224 |
|
|
first instruction. get_pipe returns -1 for MULTI0, inline asm, or
|
2225 |
|
|
ADDR_VEC insns. */
|
2226 |
|
|
static void
|
2227 |
|
|
emit_nop_for_insn (rtx insn)
|
2228 |
|
|
{
|
2229 |
|
|
int p;
|
2230 |
|
|
rtx new_insn;
|
2231 |
|
|
p = get_pipe (insn);
|
2232 |
|
|
if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
|
2233 |
|
|
new_insn = emit_insn_after (gen_lnop (), insn);
|
2234 |
|
|
else if (p == 1 && GET_MODE (insn) == TImode)
|
2235 |
|
|
{
|
2236 |
|
|
new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
|
2237 |
|
|
PUT_MODE (new_insn, TImode);
|
2238 |
|
|
PUT_MODE (insn, VOIDmode);
|
2239 |
|
|
}
|
2240 |
|
|
else
|
2241 |
|
|
new_insn = emit_insn_after (gen_lnop (), insn);
|
2242 |
|
|
recog_memoized (new_insn);
|
2243 |
|
|
}
|
2244 |
|
|
|
2245 |
|
|
/* Insert nops in basic blocks to meet dual issue alignment
|
2246 |
|
|
requirements. Also make sure hbrp and hint instructions are at least
|
2247 |
|
|
one cycle apart, possibly inserting a nop. */
|
2248 |
|
|
static void
|
2249 |
|
|
pad_bb(void)
|
2250 |
|
|
{
|
2251 |
|
|
rtx insn, next_insn, prev_insn, hbr_insn = 0;
|
2252 |
|
|
int length;
|
2253 |
|
|
int addr;
|
2254 |
|
|
|
2255 |
|
|
/* This sets up INSN_ADDRESSES. */
|
2256 |
|
|
shorten_branches (get_insns ());
|
2257 |
|
|
|
2258 |
|
|
/* Keep track of length added by nops. */
|
2259 |
|
|
length = 0;
|
2260 |
|
|
|
2261 |
|
|
prev_insn = 0;
|
2262 |
|
|
insn = get_insns ();
|
2263 |
|
|
if (!active_insn_p (insn))
|
2264 |
|
|
insn = next_active_insn (insn);
|
2265 |
|
|
for (; insn; insn = next_insn)
|
2266 |
|
|
{
|
2267 |
|
|
next_insn = next_active_insn (insn);
|
2268 |
|
|
if (INSN_CODE (insn) == CODE_FOR_iprefetch
|
2269 |
|
|
|| INSN_CODE (insn) == CODE_FOR_hbr)
|
2270 |
|
|
{
|
2271 |
|
|
if (hbr_insn)
|
2272 |
|
|
{
|
2273 |
|
|
int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
|
2274 |
|
|
int a1 = INSN_ADDRESSES (INSN_UID (insn));
|
2275 |
|
|
if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
|
2276 |
|
|
|| (a1 - a0 == 4))
|
2277 |
|
|
{
|
2278 |
|
|
prev_insn = emit_insn_before (gen_lnop (), insn);
|
2279 |
|
|
PUT_MODE (prev_insn, GET_MODE (insn));
|
2280 |
|
|
PUT_MODE (insn, TImode);
|
2281 |
|
|
length += 4;
|
2282 |
|
|
}
|
2283 |
|
|
}
|
2284 |
|
|
hbr_insn = insn;
|
2285 |
|
|
}
|
2286 |
|
|
if (INSN_CODE (insn) == CODE_FOR_blockage)
|
2287 |
|
|
{
|
2288 |
|
|
if (GET_MODE (insn) == TImode)
|
2289 |
|
|
PUT_MODE (next_insn, TImode);
|
2290 |
|
|
insn = next_insn;
|
2291 |
|
|
next_insn = next_active_insn (insn);
|
2292 |
|
|
}
|
2293 |
|
|
addr = INSN_ADDRESSES (INSN_UID (insn));
|
2294 |
|
|
if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
|
2295 |
|
|
{
|
2296 |
|
|
if (((addr + length) & 7) != 0)
|
2297 |
|
|
{
|
2298 |
|
|
emit_nop_for_insn (prev_insn);
|
2299 |
|
|
length += 4;
|
2300 |
|
|
}
|
2301 |
|
|
}
|
2302 |
|
|
else if (GET_MODE (insn) == TImode
|
2303 |
|
|
&& ((next_insn && GET_MODE (next_insn) != TImode)
|
2304 |
|
|
|| get_attr_type (insn) == TYPE_MULTI0)
|
2305 |
|
|
&& ((addr + length) & 7) != 0)
|
2306 |
|
|
{
|
2307 |
|
|
/* prev_insn will always be set because the first insn is
|
2308 |
|
|
always 8-byte aligned. */
|
2309 |
|
|
emit_nop_for_insn (prev_insn);
|
2310 |
|
|
length += 4;
|
2311 |
|
|
}
|
2312 |
|
|
prev_insn = insn;
|
2313 |
|
|
}
|
2314 |
|
|
}
|
2315 |
|
|
|
2316 |
|
|
|
2317 |
|
|
/* Routines for branch hints. */
|
2318 |
|
|
|
2319 |
|
|
static void
|
2320 |
|
|
spu_emit_branch_hint (rtx before, rtx branch, rtx target,
|
2321 |
|
|
int distance, sbitmap blocks)
|
2322 |
|
|
{
|
2323 |
|
|
rtx branch_label = 0;
|
2324 |
|
|
rtx hint;
|
2325 |
|
|
rtx insn;
|
2326 |
|
|
rtx table;
|
2327 |
|
|
|
2328 |
|
|
if (before == 0 || branch == 0 || target == 0)
|
2329 |
|
|
return;
|
2330 |
|
|
|
2331 |
|
|
/* While scheduling we require hints to be no further than 600, so
|
2332 |
|
|
we need to enforce that here too */
|
2333 |
|
|
if (distance > 600)
|
2334 |
|
|
return;
|
2335 |
|
|
|
2336 |
|
|
/* If we have a Basic block note, emit it after the basic block note. */
|
2337 |
378 |
julius |
if (NOTE_INSN_BASIC_BLOCK_P (before))
|
2338 |
282 |
jeremybenn |
before = NEXT_INSN (before);
|
2339 |
|
|
|
2340 |
|
|
branch_label = gen_label_rtx ();
|
2341 |
|
|
LABEL_NUSES (branch_label)++;
|
2342 |
|
|
LABEL_PRESERVE_P (branch_label) = 1;
|
2343 |
|
|
insn = emit_label_before (branch_label, branch);
|
2344 |
|
|
branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
|
2345 |
|
|
SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
|
2346 |
|
|
|
2347 |
|
|
hint = emit_insn_before (gen_hbr (branch_label, target), before);
|
2348 |
|
|
recog_memoized (hint);
|
2349 |
|
|
HINTED_P (branch) = 1;
|
2350 |
|
|
|
2351 |
|
|
if (GET_CODE (target) == LABEL_REF)
|
2352 |
|
|
HINTED_P (XEXP (target, 0)) = 1;
|
2353 |
|
|
else if (tablejump_p (branch, 0, &table))
|
2354 |
|
|
{
|
2355 |
|
|
rtvec vec;
|
2356 |
|
|
int j;
|
2357 |
|
|
if (GET_CODE (PATTERN (table)) == ADDR_VEC)
|
2358 |
|
|
vec = XVEC (PATTERN (table), 0);
|
2359 |
|
|
else
|
2360 |
|
|
vec = XVEC (PATTERN (table), 1);
|
2361 |
|
|
for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
|
2362 |
|
|
HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
|
2363 |
|
|
}
|
2364 |
|
|
|
2365 |
|
|
if (distance >= 588)
|
2366 |
|
|
{
|
2367 |
|
|
/* Make sure the hint isn't scheduled any earlier than this point,
|
2368 |
|
|
which could make it too far for the branch offest to fit */
|
2369 |
|
|
recog_memoized (emit_insn_before (gen_blockage (), hint));
|
2370 |
|
|
}
|
2371 |
|
|
else if (distance <= 8 * 4)
|
2372 |
|
|
{
|
2373 |
|
|
/* To guarantee at least 8 insns between the hint and branch we
|
2374 |
|
|
insert nops. */
|
2375 |
|
|
int d;
|
2376 |
|
|
for (d = distance; d < 8 * 4; d += 4)
|
2377 |
|
|
{
|
2378 |
|
|
insn =
|
2379 |
|
|
emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
|
2380 |
|
|
recog_memoized (insn);
|
2381 |
|
|
}
|
2382 |
|
|
|
2383 |
|
|
/* Make sure any nops inserted aren't scheduled before the hint. */
|
2384 |
|
|
recog_memoized (emit_insn_after (gen_blockage (), hint));
|
2385 |
|
|
|
2386 |
|
|
/* Make sure any nops inserted aren't scheduled after the call. */
|
2387 |
|
|
if (CALL_P (branch) && distance < 8 * 4)
|
2388 |
|
|
recog_memoized (emit_insn_before (gen_blockage (), branch));
|
2389 |
|
|
}
|
2390 |
|
|
}
|
2391 |
|
|
|
2392 |
|
|
/* Returns 0 if we don't want a hint for this branch. Otherwise return
|
2393 |
|
|
the rtx for the branch target. */
|
2394 |
|
|
static rtx
|
2395 |
|
|
get_branch_target (rtx branch)
|
2396 |
|
|
{
|
2397 |
|
|
if (GET_CODE (branch) == JUMP_INSN)
|
2398 |
|
|
{
|
2399 |
|
|
rtx set, src;
|
2400 |
|
|
|
2401 |
|
|
/* Return statements */
|
2402 |
|
|
if (GET_CODE (PATTERN (branch)) == RETURN)
|
2403 |
|
|
return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
|
2404 |
|
|
|
2405 |
|
|
/* jump table */
|
2406 |
|
|
if (GET_CODE (PATTERN (branch)) == ADDR_VEC
|
2407 |
|
|
|| GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
|
2408 |
|
|
return 0;
|
2409 |
|
|
|
2410 |
|
|
/* ASM GOTOs. */
|
2411 |
|
|
if (extract_asm_operands (PATTERN (branch)) != NULL)
|
2412 |
|
|
return NULL;
|
2413 |
|
|
|
2414 |
|
|
set = single_set (branch);
|
2415 |
|
|
src = SET_SRC (set);
|
2416 |
|
|
if (GET_CODE (SET_DEST (set)) != PC)
|
2417 |
|
|
abort ();
|
2418 |
|
|
|
2419 |
|
|
if (GET_CODE (src) == IF_THEN_ELSE)
|
2420 |
|
|
{
|
2421 |
|
|
rtx lab = 0;
|
2422 |
|
|
rtx note = find_reg_note (branch, REG_BR_PROB, 0);
|
2423 |
|
|
if (note)
|
2424 |
|
|
{
|
2425 |
|
|
/* If the more probable case is not a fall through, then
|
2426 |
|
|
try a branch hint. */
|
2427 |
|
|
HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
|
2428 |
|
|
if (prob > (REG_BR_PROB_BASE * 6 / 10)
|
2429 |
|
|
&& GET_CODE (XEXP (src, 1)) != PC)
|
2430 |
|
|
lab = XEXP (src, 1);
|
2431 |
|
|
else if (prob < (REG_BR_PROB_BASE * 4 / 10)
|
2432 |
|
|
&& GET_CODE (XEXP (src, 2)) != PC)
|
2433 |
|
|
lab = XEXP (src, 2);
|
2434 |
|
|
}
|
2435 |
|
|
if (lab)
|
2436 |
|
|
{
|
2437 |
|
|
if (GET_CODE (lab) == RETURN)
|
2438 |
|
|
return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
|
2439 |
|
|
return lab;
|
2440 |
|
|
}
|
2441 |
|
|
return 0;
|
2442 |
|
|
}
|
2443 |
|
|
|
2444 |
|
|
return src;
|
2445 |
|
|
}
|
2446 |
|
|
else if (GET_CODE (branch) == CALL_INSN)
|
2447 |
|
|
{
|
2448 |
|
|
rtx call;
|
2449 |
|
|
/* All of our call patterns are in a PARALLEL and the CALL is
|
2450 |
|
|
the first pattern in the PARALLEL. */
|
2451 |
|
|
if (GET_CODE (PATTERN (branch)) != PARALLEL)
|
2452 |
|
|
abort ();
|
2453 |
|
|
call = XVECEXP (PATTERN (branch), 0, 0);
|
2454 |
|
|
if (GET_CODE (call) == SET)
|
2455 |
|
|
call = SET_SRC (call);
|
2456 |
|
|
if (GET_CODE (call) != CALL)
|
2457 |
|
|
abort ();
|
2458 |
|
|
return XEXP (XEXP (call, 0), 0);
|
2459 |
|
|
}
|
2460 |
|
|
return 0;
|
2461 |
|
|
}
|
2462 |
|
|
|
2463 |
|
|
/* The special $hbr register is used to prevent the insn scheduler from
|
2464 |
|
|
moving hbr insns across instructions which invalidate them. It
|
2465 |
|
|
should only be used in a clobber, and this function searches for
|
2466 |
|
|
insns which clobber it. */
|
2467 |
|
|
static bool
|
2468 |
|
|
insn_clobbers_hbr (rtx insn)
|
2469 |
|
|
{
|
2470 |
|
|
if (INSN_P (insn)
|
2471 |
|
|
&& GET_CODE (PATTERN (insn)) == PARALLEL)
|
2472 |
|
|
{
|
2473 |
|
|
rtx parallel = PATTERN (insn);
|
2474 |
|
|
rtx clobber;
|
2475 |
|
|
int j;
|
2476 |
|
|
for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
|
2477 |
|
|
{
|
2478 |
|
|
clobber = XVECEXP (parallel, 0, j);
|
2479 |
|
|
if (GET_CODE (clobber) == CLOBBER
|
2480 |
|
|
&& GET_CODE (XEXP (clobber, 0)) == REG
|
2481 |
|
|
&& REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
|
2482 |
|
|
return 1;
|
2483 |
|
|
}
|
2484 |
|
|
}
|
2485 |
|
|
return 0;
|
2486 |
|
|
}
|
2487 |
|
|
|
2488 |
|
|
/* Search up to 32 insns starting at FIRST:
|
2489 |
|
|
- at any kind of hinted branch, just return
|
2490 |
|
|
- at any unconditional branch in the first 15 insns, just return
|
2491 |
|
|
- at a call or indirect branch, after the first 15 insns, force it to
|
2492 |
|
|
an even address and return
|
2493 |
|
|
- at any unconditional branch, after the first 15 insns, force it to
|
2494 |
|
|
an even address.
|
2495 |
|
|
At then end of the search, insert an hbrp within 4 insns of FIRST,
|
2496 |
|
|
and an hbrp within 16 instructions of FIRST.
|
2497 |
|
|
*/
|
2498 |
|
|
static void
|
2499 |
|
|
insert_hbrp_for_ilb_runout (rtx first)
|
2500 |
|
|
{
|
2501 |
|
|
rtx insn, before_4 = 0, before_16 = 0;
|
2502 |
|
|
int addr = 0, length, first_addr = -1;
|
2503 |
|
|
int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
|
2504 |
|
|
int insert_lnop_after = 0;
|
2505 |
|
|
for (insn = first; insn; insn = NEXT_INSN (insn))
|
2506 |
|
|
if (INSN_P (insn))
|
2507 |
|
|
{
|
2508 |
|
|
if (first_addr == -1)
|
2509 |
|
|
first_addr = INSN_ADDRESSES (INSN_UID (insn));
|
2510 |
|
|
addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
|
2511 |
|
|
length = get_attr_length (insn);
|
2512 |
|
|
|
2513 |
|
|
if (before_4 == 0 && addr + length >= 4 * 4)
|
2514 |
|
|
before_4 = insn;
|
2515 |
|
|
/* We test for 14 instructions because the first hbrp will add
|
2516 |
|
|
up to 2 instructions. */
|
2517 |
|
|
if (before_16 == 0 && addr + length >= 14 * 4)
|
2518 |
|
|
before_16 = insn;
|
2519 |
|
|
|
2520 |
|
|
if (INSN_CODE (insn) == CODE_FOR_hbr)
|
2521 |
|
|
{
|
2522 |
|
|
/* Make sure an hbrp is at least 2 cycles away from a hint.
|
2523 |
|
|
Insert an lnop after the hbrp when necessary. */
|
2524 |
|
|
if (before_4 == 0 && addr > 0)
|
2525 |
|
|
{
|
2526 |
|
|
before_4 = insn;
|
2527 |
|
|
insert_lnop_after |= 1;
|
2528 |
|
|
}
|
2529 |
|
|
else if (before_4 && addr <= 4 * 4)
|
2530 |
|
|
insert_lnop_after |= 1;
|
2531 |
|
|
if (before_16 == 0 && addr > 10 * 4)
|
2532 |
|
|
{
|
2533 |
|
|
before_16 = insn;
|
2534 |
|
|
insert_lnop_after |= 2;
|
2535 |
|
|
}
|
2536 |
|
|
else if (before_16 && addr <= 14 * 4)
|
2537 |
|
|
insert_lnop_after |= 2;
|
2538 |
|
|
}
|
2539 |
|
|
|
2540 |
|
|
if (INSN_CODE (insn) == CODE_FOR_iprefetch)
|
2541 |
|
|
{
|
2542 |
|
|
if (addr < hbrp_addr0)
|
2543 |
|
|
hbrp_addr0 = addr;
|
2544 |
|
|
else if (addr < hbrp_addr1)
|
2545 |
|
|
hbrp_addr1 = addr;
|
2546 |
|
|
}
|
2547 |
|
|
|
2548 |
|
|
if (CALL_P (insn) || JUMP_P (insn))
|
2549 |
|
|
{
|
2550 |
|
|
if (HINTED_P (insn))
|
2551 |
|
|
return;
|
2552 |
|
|
|
2553 |
|
|
/* Any branch after the first 15 insns should be on an even
|
2554 |
|
|
address to avoid a special case branch. There might be
|
2555 |
|
|
some nops and/or hbrps inserted, so we test after 10
|
2556 |
|
|
insns. */
|
2557 |
|
|
if (addr > 10 * 4)
|
2558 |
|
|
SCHED_ON_EVEN_P (insn) = 1;
|
2559 |
|
|
}
|
2560 |
|
|
|
2561 |
|
|
if (CALL_P (insn) || tablejump_p (insn, 0, 0))
|
2562 |
|
|
return;
|
2563 |
|
|
|
2564 |
|
|
|
2565 |
|
|
if (addr + length >= 32 * 4)
|
2566 |
|
|
{
|
2567 |
|
|
gcc_assert (before_4 && before_16);
|
2568 |
|
|
if (hbrp_addr0 > 4 * 4)
|
2569 |
|
|
{
|
2570 |
|
|
insn =
|
2571 |
|
|
emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
|
2572 |
|
|
recog_memoized (insn);
|
2573 |
|
|
INSN_ADDRESSES_NEW (insn,
|
2574 |
|
|
INSN_ADDRESSES (INSN_UID (before_4)));
|
2575 |
|
|
PUT_MODE (insn, GET_MODE (before_4));
|
2576 |
|
|
PUT_MODE (before_4, TImode);
|
2577 |
|
|
if (insert_lnop_after & 1)
|
2578 |
|
|
{
|
2579 |
|
|
insn = emit_insn_before (gen_lnop (), before_4);
|
2580 |
|
|
recog_memoized (insn);
|
2581 |
|
|
INSN_ADDRESSES_NEW (insn,
|
2582 |
|
|
INSN_ADDRESSES (INSN_UID (before_4)));
|
2583 |
|
|
PUT_MODE (insn, TImode);
|
2584 |
|
|
}
|
2585 |
|
|
}
|
2586 |
|
|
if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
|
2587 |
|
|
&& hbrp_addr1 > 16 * 4)
|
2588 |
|
|
{
|
2589 |
|
|
insn =
|
2590 |
|
|
emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
|
2591 |
|
|
recog_memoized (insn);
|
2592 |
|
|
INSN_ADDRESSES_NEW (insn,
|
2593 |
|
|
INSN_ADDRESSES (INSN_UID (before_16)));
|
2594 |
|
|
PUT_MODE (insn, GET_MODE (before_16));
|
2595 |
|
|
PUT_MODE (before_16, TImode);
|
2596 |
|
|
if (insert_lnop_after & 2)
|
2597 |
|
|
{
|
2598 |
|
|
insn = emit_insn_before (gen_lnop (), before_16);
|
2599 |
|
|
recog_memoized (insn);
|
2600 |
|
|
INSN_ADDRESSES_NEW (insn,
|
2601 |
|
|
INSN_ADDRESSES (INSN_UID
|
2602 |
|
|
(before_16)));
|
2603 |
|
|
PUT_MODE (insn, TImode);
|
2604 |
|
|
}
|
2605 |
|
|
}
|
2606 |
|
|
return;
|
2607 |
|
|
}
|
2608 |
|
|
}
|
2609 |
|
|
else if (BARRIER_P (insn))
|
2610 |
|
|
return;
|
2611 |
|
|
|
2612 |
|
|
}
|
2613 |
|
|
|
2614 |
|
|
/* The SPU might hang when it executes 48 inline instructions after a
|
2615 |
|
|
hinted branch jumps to its hinted target. The beginning of a
|
2616 |
|
|
function and the return from a call might have been hinted, and must
|
2617 |
|
|
be handled as well. To prevent a hang we insert 2 hbrps. The first
|
2618 |
|
|
should be within 6 insns of the branch target. The second should be
|
2619 |
|
|
within 22 insns of the branch target. When determining if hbrps are
|
2620 |
|
|
necessary, we look for only 32 inline instructions, because up to to
|
2621 |
|
|
12 nops and 4 hbrps could be inserted. Similarily, when inserting
|
2622 |
|
|
new hbrps, we insert them within 4 and 16 insns of the target. */
|
2623 |
|
|
static void
|
2624 |
|
|
insert_hbrp (void)
|
2625 |
|
|
{
|
2626 |
|
|
rtx insn;
|
2627 |
|
|
if (TARGET_SAFE_HINTS)
|
2628 |
|
|
{
|
2629 |
|
|
shorten_branches (get_insns ());
|
2630 |
|
|
/* Insert hbrp at beginning of function */
|
2631 |
|
|
insn = next_active_insn (get_insns ());
|
2632 |
|
|
if (insn)
|
2633 |
|
|
insert_hbrp_for_ilb_runout (insn);
|
2634 |
|
|
/* Insert hbrp after hinted targets. */
|
2635 |
|
|
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
2636 |
|
|
if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
|
2637 |
|
|
insert_hbrp_for_ilb_runout (next_active_insn (insn));
|
2638 |
|
|
}
|
2639 |
|
|
}
|
2640 |
|
|
|
2641 |
|
|
static int in_spu_reorg;
|
2642 |
|
|
|
2643 |
|
|
/* Insert branch hints. There are no branch optimizations after this
|
2644 |
|
|
pass, so it's safe to set our branch hints now. */
|
2645 |
|
|
static void
|
2646 |
|
|
spu_machine_dependent_reorg (void)
|
2647 |
|
|
{
|
2648 |
|
|
sbitmap blocks;
|
2649 |
|
|
basic_block bb;
|
2650 |
|
|
rtx branch, insn;
|
2651 |
|
|
rtx branch_target = 0;
|
2652 |
|
|
int branch_addr = 0, insn_addr, required_dist = 0;
|
2653 |
|
|
int i;
|
2654 |
|
|
unsigned int j;
|
2655 |
|
|
|
2656 |
|
|
if (!TARGET_BRANCH_HINTS || optimize == 0)
|
2657 |
|
|
{
|
2658 |
|
|
/* We still do it for unoptimized code because an external
|
2659 |
|
|
function might have hinted a call or return. */
|
2660 |
|
|
insert_hbrp ();
|
2661 |
|
|
pad_bb ();
|
2662 |
|
|
return;
|
2663 |
|
|
}
|
2664 |
|
|
|
2665 |
|
|
blocks = sbitmap_alloc (last_basic_block);
|
2666 |
|
|
sbitmap_zero (blocks);
|
2667 |
|
|
|
2668 |
|
|
in_spu_reorg = 1;
|
2669 |
|
|
compute_bb_for_insn ();
|
2670 |
|
|
|
2671 |
|
|
compact_blocks ();
|
2672 |
|
|
|
2673 |
|
|
spu_bb_info =
|
2674 |
|
|
(struct spu_bb_info *) xcalloc (n_basic_blocks,
|
2675 |
|
|
sizeof (struct spu_bb_info));
|
2676 |
|
|
|
2677 |
|
|
/* We need exact insn addresses and lengths. */
|
2678 |
|
|
shorten_branches (get_insns ());
|
2679 |
|
|
|
2680 |
|
|
for (i = n_basic_blocks - 1; i >= 0; i--)
|
2681 |
|
|
{
|
2682 |
|
|
bb = BASIC_BLOCK (i);
|
2683 |
|
|
branch = 0;
|
2684 |
|
|
if (spu_bb_info[i].prop_jump)
|
2685 |
|
|
{
|
2686 |
|
|
branch = spu_bb_info[i].prop_jump;
|
2687 |
|
|
branch_target = get_branch_target (branch);
|
2688 |
|
|
branch_addr = INSN_ADDRESSES (INSN_UID (branch));
|
2689 |
|
|
required_dist = spu_hint_dist;
|
2690 |
|
|
}
|
2691 |
|
|
/* Search from end of a block to beginning. In this loop, find
|
2692 |
|
|
jumps which need a branch and emit them only when:
|
2693 |
|
|
- it's an indirect branch and we're at the insn which sets
|
2694 |
|
|
the register
|
2695 |
|
|
- we're at an insn that will invalidate the hint. e.g., a
|
2696 |
|
|
call, another hint insn, inline asm that clobbers $hbr, and
|
2697 |
|
|
some inlined operations (divmodsi4). Don't consider jumps
|
2698 |
|
|
because they are only at the end of a block and are
|
2699 |
|
|
considered when we are deciding whether to propagate
|
2700 |
|
|
- we're getting too far away from the branch. The hbr insns
|
2701 |
|
|
only have a signed 10 bit offset
|
2702 |
|
|
We go back as far as possible so the branch will be considered
|
2703 |
|
|
for propagation when we get to the beginning of the block. */
|
2704 |
|
|
for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
|
2705 |
|
|
{
|
2706 |
|
|
if (INSN_P (insn))
|
2707 |
|
|
{
|
2708 |
|
|
insn_addr = INSN_ADDRESSES (INSN_UID (insn));
|
2709 |
|
|
if (branch
|
2710 |
|
|
&& ((GET_CODE (branch_target) == REG
|
2711 |
|
|
&& set_of (branch_target, insn) != NULL_RTX)
|
2712 |
|
|
|| insn_clobbers_hbr (insn)
|
2713 |
|
|
|| branch_addr - insn_addr > 600))
|
2714 |
|
|
{
|
2715 |
|
|
rtx next = NEXT_INSN (insn);
|
2716 |
|
|
int next_addr = INSN_ADDRESSES (INSN_UID (next));
|
2717 |
|
|
if (insn != BB_END (bb)
|
2718 |
|
|
&& branch_addr - next_addr >= required_dist)
|
2719 |
|
|
{
|
2720 |
|
|
if (dump_file)
|
2721 |
|
|
fprintf (dump_file,
|
2722 |
|
|
"hint for %i in block %i before %i\n",
|
2723 |
|
|
INSN_UID (branch), bb->index,
|
2724 |
|
|
INSN_UID (next));
|
2725 |
|
|
spu_emit_branch_hint (next, branch, branch_target,
|
2726 |
|
|
branch_addr - next_addr, blocks);
|
2727 |
|
|
}
|
2728 |
|
|
branch = 0;
|
2729 |
|
|
}
|
2730 |
|
|
|
2731 |
|
|
/* JUMP_P will only be true at the end of a block. When
|
2732 |
|
|
branch is already set it means we've previously decided
|
2733 |
|
|
to propagate a hint for that branch into this block. */
|
2734 |
|
|
if (CALL_P (insn) || (JUMP_P (insn) && !branch))
|
2735 |
|
|
{
|
2736 |
|
|
branch = 0;
|
2737 |
|
|
if ((branch_target = get_branch_target (insn)))
|
2738 |
|
|
{
|
2739 |
|
|
branch = insn;
|
2740 |
|
|
branch_addr = insn_addr;
|
2741 |
|
|
required_dist = spu_hint_dist;
|
2742 |
|
|
}
|
2743 |
|
|
}
|
2744 |
|
|
}
|
2745 |
|
|
if (insn == BB_HEAD (bb))
|
2746 |
|
|
break;
|
2747 |
|
|
}
|
2748 |
|
|
|
2749 |
|
|
if (branch)
|
2750 |
|
|
{
|
2751 |
|
|
/* If we haven't emitted a hint for this branch yet, it might
|
2752 |
|
|
be profitable to emit it in one of the predecessor blocks,
|
2753 |
|
|
especially for loops. */
|
2754 |
|
|
rtx bbend;
|
2755 |
|
|
basic_block prev = 0, prop = 0, prev2 = 0;
|
2756 |
|
|
int loop_exit = 0, simple_loop = 0;
|
2757 |
|
|
int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
|
2758 |
|
|
|
2759 |
|
|
for (j = 0; j < EDGE_COUNT (bb->preds); j++)
|
2760 |
|
|
if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
|
2761 |
|
|
prev = EDGE_PRED (bb, j)->src;
|
2762 |
|
|
else
|
2763 |
|
|
prev2 = EDGE_PRED (bb, j)->src;
|
2764 |
|
|
|
2765 |
|
|
for (j = 0; j < EDGE_COUNT (bb->succs); j++)
|
2766 |
|
|
if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
|
2767 |
|
|
loop_exit = 1;
|
2768 |
|
|
else if (EDGE_SUCC (bb, j)->dest == bb)
|
2769 |
|
|
simple_loop = 1;
|
2770 |
|
|
|
2771 |
|
|
/* If this branch is a loop exit then propagate to previous
|
2772 |
|
|
fallthru block. This catches the cases when it is a simple
|
2773 |
|
|
loop or when there is an initial branch into the loop. */
|
2774 |
|
|
if (prev && (loop_exit || simple_loop)
|
2775 |
|
|
&& prev->loop_depth <= bb->loop_depth)
|
2776 |
|
|
prop = prev;
|
2777 |
|
|
|
2778 |
|
|
/* If there is only one adjacent predecessor. Don't propagate
|
2779 |
|
|
outside this loop. This loop_depth test isn't perfect, but
|
2780 |
|
|
I'm not sure the loop_father member is valid at this point. */
|
2781 |
|
|
else if (prev && single_pred_p (bb)
|
2782 |
|
|
&& prev->loop_depth == bb->loop_depth)
|
2783 |
|
|
prop = prev;
|
2784 |
|
|
|
2785 |
|
|
/* If this is the JOIN block of a simple IF-THEN then
|
2786 |
|
|
propogate the hint to the HEADER block. */
|
2787 |
|
|
else if (prev && prev2
|
2788 |
|
|
&& EDGE_COUNT (bb->preds) == 2
|
2789 |
|
|
&& EDGE_COUNT (prev->preds) == 1
|
2790 |
|
|
&& EDGE_PRED (prev, 0)->src == prev2
|
2791 |
|
|
&& prev2->loop_depth == bb->loop_depth
|
2792 |
|
|
&& GET_CODE (branch_target) != REG)
|
2793 |
|
|
prop = prev;
|
2794 |
|
|
|
2795 |
|
|
/* Don't propagate when:
|
2796 |
|
|
- this is a simple loop and the hint would be too far
|
2797 |
|
|
- this is not a simple loop and there are 16 insns in
|
2798 |
|
|
this block already
|
2799 |
|
|
- the predecessor block ends in a branch that will be
|
2800 |
|
|
hinted
|
2801 |
|
|
- the predecessor block ends in an insn that invalidates
|
2802 |
|
|
the hint */
|
2803 |
|
|
if (prop
|
2804 |
|
|
&& prop->index >= 0
|
2805 |
|
|
&& (bbend = BB_END (prop))
|
2806 |
|
|
&& branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
|
2807 |
|
|
(simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
|
2808 |
|
|
&& (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
|
2809 |
|
|
{
|
2810 |
|
|
if (dump_file)
|
2811 |
|
|
fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
|
2812 |
|
|
"for %i (loop_exit %i simple_loop %i dist %i)\n",
|
2813 |
|
|
bb->index, prop->index, bb->loop_depth,
|
2814 |
|
|
INSN_UID (branch), loop_exit, simple_loop,
|
2815 |
|
|
branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
|
2816 |
|
|
|
2817 |
|
|
spu_bb_info[prop->index].prop_jump = branch;
|
2818 |
|
|
spu_bb_info[prop->index].bb_index = i;
|
2819 |
|
|
}
|
2820 |
|
|
else if (branch_addr - next_addr >= required_dist)
|
2821 |
|
|
{
|
2822 |
|
|
if (dump_file)
|
2823 |
|
|
fprintf (dump_file, "hint for %i in block %i before %i\n",
|
2824 |
|
|
INSN_UID (branch), bb->index,
|
2825 |
|
|
INSN_UID (NEXT_INSN (insn)));
|
2826 |
|
|
spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
|
2827 |
|
|
branch_addr - next_addr, blocks);
|
2828 |
|
|
}
|
2829 |
|
|
branch = 0;
|
2830 |
|
|
}
|
2831 |
|
|
}
|
2832 |
|
|
free (spu_bb_info);
|
2833 |
|
|
|
2834 |
|
|
if (!sbitmap_empty_p (blocks))
|
2835 |
|
|
find_many_sub_basic_blocks (blocks);
|
2836 |
|
|
|
2837 |
|
|
/* We have to schedule to make sure alignment is ok. */
|
2838 |
|
|
FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
|
2839 |
|
|
|
2840 |
|
|
/* The hints need to be scheduled, so call it again. */
|
2841 |
|
|
schedule_insns ();
|
2842 |
|
|
|
2843 |
|
|
insert_hbrp ();
|
2844 |
|
|
|
2845 |
|
|
pad_bb ();
|
2846 |
|
|
|
2847 |
|
|
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
2848 |
|
|
if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
|
2849 |
|
|
{
|
2850 |
|
|
/* Adjust the LABEL_REF in a hint when we have inserted a nop
|
2851 |
|
|
between its branch label and the branch . We don't move the
|
2852 |
|
|
label because GCC expects it at the beginning of the block. */
|
2853 |
|
|
rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
|
2854 |
|
|
rtx label_ref = XVECEXP (unspec, 0, 0);
|
2855 |
|
|
rtx label = XEXP (label_ref, 0);
|
2856 |
|
|
rtx branch;
|
2857 |
|
|
int offset = 0;
|
2858 |
|
|
for (branch = NEXT_INSN (label);
|
2859 |
|
|
!JUMP_P (branch) && !CALL_P (branch);
|
2860 |
|
|
branch = NEXT_INSN (branch))
|
2861 |
|
|
if (NONJUMP_INSN_P (branch))
|
2862 |
|
|
offset += get_attr_length (branch);
|
2863 |
|
|
if (offset > 0)
|
2864 |
|
|
XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
|
2865 |
|
|
}
|
2866 |
|
|
|
2867 |
|
|
if (spu_flag_var_tracking)
|
2868 |
|
|
{
|
2869 |
|
|
df_analyze ();
|
2870 |
|
|
timevar_push (TV_VAR_TRACKING);
|
2871 |
|
|
variable_tracking_main ();
|
2872 |
|
|
timevar_pop (TV_VAR_TRACKING);
|
2873 |
|
|
df_finish_pass (false);
|
2874 |
|
|
}
|
2875 |
|
|
|
2876 |
|
|
free_bb_for_insn ();
|
2877 |
|
|
|
2878 |
|
|
in_spu_reorg = 0;
|
2879 |
|
|
}
|
2880 |
|
|
|
2881 |
|
|
|
2882 |
|
|
/* Insn scheduling routines, primarily for dual issue. */
|
2883 |
|
|
static int
|
2884 |
|
|
spu_sched_issue_rate (void)
|
2885 |
|
|
{
|
2886 |
|
|
return 2;
|
2887 |
|
|
}
|
2888 |
|
|
|
2889 |
|
|
static int
|
2890 |
|
|
uses_ls_unit(rtx insn)
|
2891 |
|
|
{
|
2892 |
|
|
rtx set = single_set (insn);
|
2893 |
|
|
if (set != 0
|
2894 |
|
|
&& (GET_CODE (SET_DEST (set)) == MEM
|
2895 |
|
|
|| GET_CODE (SET_SRC (set)) == MEM))
|
2896 |
|
|
return 1;
|
2897 |
|
|
return 0;
|
2898 |
|
|
}
|
2899 |
|
|
|
2900 |
|
|
static int
|
2901 |
|
|
get_pipe (rtx insn)
|
2902 |
|
|
{
|
2903 |
|
|
enum attr_type t;
|
2904 |
|
|
/* Handle inline asm */
|
2905 |
|
|
if (INSN_CODE (insn) == -1)
|
2906 |
|
|
return -1;
|
2907 |
|
|
t = get_attr_type (insn);
|
2908 |
|
|
switch (t)
|
2909 |
|
|
{
|
2910 |
|
|
case TYPE_CONVERT:
|
2911 |
|
|
return -2;
|
2912 |
|
|
case TYPE_MULTI0:
|
2913 |
|
|
return -1;
|
2914 |
|
|
|
2915 |
|
|
case TYPE_FX2:
|
2916 |
|
|
case TYPE_FX3:
|
2917 |
|
|
case TYPE_SPR:
|
2918 |
|
|
case TYPE_NOP:
|
2919 |
|
|
case TYPE_FXB:
|
2920 |
|
|
case TYPE_FPD:
|
2921 |
|
|
case TYPE_FP6:
|
2922 |
|
|
case TYPE_FP7:
|
2923 |
|
|
return 0;
|
2924 |
|
|
|
2925 |
|
|
case TYPE_LNOP:
|
2926 |
|
|
case TYPE_SHUF:
|
2927 |
|
|
case TYPE_LOAD:
|
2928 |
|
|
case TYPE_STORE:
|
2929 |
|
|
case TYPE_BR:
|
2930 |
|
|
case TYPE_MULTI1:
|
2931 |
|
|
case TYPE_HBR:
|
2932 |
|
|
case TYPE_IPREFETCH:
|
2933 |
|
|
return 1;
|
2934 |
|
|
default:
|
2935 |
|
|
abort ();
|
2936 |
|
|
}
|
2937 |
|
|
}
|
2938 |
|
|
|
2939 |
|
|
|
2940 |
|
|
/* haifa-sched.c has a static variable that keeps track of the current
|
2941 |
|
|
cycle. It is passed to spu_sched_reorder, and we record it here for
|
2942 |
|
|
use by spu_sched_variable_issue. It won't be accurate if the
|
2943 |
|
|
scheduler updates it's clock_var between the two calls. */
|
2944 |
|
|
static int clock_var;
|
2945 |
|
|
|
2946 |
|
|
/* This is used to keep track of insn alignment. Set to 0 at the
|
2947 |
|
|
beginning of each block and increased by the "length" attr of each
|
2948 |
|
|
insn scheduled. */
|
2949 |
|
|
static int spu_sched_length;
|
2950 |
|
|
|
2951 |
|
|
/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
|
2952 |
|
|
ready list appropriately in spu_sched_reorder(). */
|
2953 |
|
|
static int pipe0_clock;
|
2954 |
|
|
static int pipe1_clock;
|
2955 |
|
|
|
2956 |
|
|
static int prev_clock_var;
|
2957 |
|
|
|
2958 |
|
|
static int prev_priority;
|
2959 |
|
|
|
2960 |
|
|
/* The SPU needs to load the next ilb sometime during the execution of
|
2961 |
|
|
the previous ilb. There is a potential conflict if every cycle has a
|
2962 |
|
|
load or store. To avoid the conflict we make sure the load/store
|
2963 |
|
|
unit is free for at least one cycle during the execution of insns in
|
2964 |
|
|
the previous ilb. */
|
2965 |
|
|
static int spu_ls_first;
|
2966 |
|
|
static int prev_ls_clock;
|
2967 |
|
|
|
2968 |
|
|
static void
|
2969 |
|
|
spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
|
2970 |
|
|
int max_ready ATTRIBUTE_UNUSED)
|
2971 |
|
|
{
|
2972 |
|
|
spu_sched_length = 0;
|
2973 |
|
|
}
|
2974 |
|
|
|
2975 |
|
|
static void
|
2976 |
|
|
spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
|
2977 |
|
|
int max_ready ATTRIBUTE_UNUSED)
|
2978 |
|
|
{
|
2979 |
|
|
if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
|
2980 |
|
|
{
|
2981 |
|
|
/* When any block might be at least 8-byte aligned, assume they
|
2982 |
|
|
will all be at least 8-byte aligned to make sure dual issue
|
2983 |
|
|
works out correctly. */
|
2984 |
|
|
spu_sched_length = 0;
|
2985 |
|
|
}
|
2986 |
|
|
spu_ls_first = INT_MAX;
|
2987 |
|
|
clock_var = -1;
|
2988 |
|
|
prev_ls_clock = -1;
|
2989 |
|
|
pipe0_clock = -1;
|
2990 |
|
|
pipe1_clock = -1;
|
2991 |
|
|
prev_clock_var = -1;
|
2992 |
|
|
prev_priority = -1;
|
2993 |
|
|
}
|
2994 |
|
|
|
2995 |
|
|
static int
|
2996 |
|
|
spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
|
2997 |
|
|
int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
|
2998 |
|
|
{
|
2999 |
|
|
int len;
|
3000 |
|
|
int p;
|
3001 |
|
|
if (GET_CODE (PATTERN (insn)) == USE
|
3002 |
|
|
|| GET_CODE (PATTERN (insn)) == CLOBBER
|
3003 |
|
|
|| (len = get_attr_length (insn)) == 0)
|
3004 |
|
|
return more;
|
3005 |
|
|
|
3006 |
|
|
spu_sched_length += len;
|
3007 |
|
|
|
3008 |
|
|
/* Reset on inline asm */
|
3009 |
|
|
if (INSN_CODE (insn) == -1)
|
3010 |
|
|
{
|
3011 |
|
|
spu_ls_first = INT_MAX;
|
3012 |
|
|
pipe0_clock = -1;
|
3013 |
|
|
pipe1_clock = -1;
|
3014 |
|
|
return 0;
|
3015 |
|
|
}
|
3016 |
|
|
p = get_pipe (insn);
|
3017 |
|
|
if (p == 0)
|
3018 |
|
|
pipe0_clock = clock_var;
|
3019 |
|
|
else
|
3020 |
|
|
pipe1_clock = clock_var;
|
3021 |
|
|
|
3022 |
|
|
if (in_spu_reorg)
|
3023 |
|
|
{
|
3024 |
|
|
if (clock_var - prev_ls_clock > 1
|
3025 |
|
|
|| INSN_CODE (insn) == CODE_FOR_iprefetch)
|
3026 |
|
|
spu_ls_first = INT_MAX;
|
3027 |
|
|
if (uses_ls_unit (insn))
|
3028 |
|
|
{
|
3029 |
|
|
if (spu_ls_first == INT_MAX)
|
3030 |
|
|
spu_ls_first = spu_sched_length;
|
3031 |
|
|
prev_ls_clock = clock_var;
|
3032 |
|
|
}
|
3033 |
|
|
|
3034 |
|
|
/* The scheduler hasn't inserted the nop, but we will later on.
|
3035 |
|
|
Include those nops in spu_sched_length. */
|
3036 |
|
|
if (prev_clock_var == clock_var && (spu_sched_length & 7))
|
3037 |
|
|
spu_sched_length += 4;
|
3038 |
|
|
prev_clock_var = clock_var;
|
3039 |
|
|
|
3040 |
|
|
/* more is -1 when called from spu_sched_reorder for new insns
|
3041 |
|
|
that don't have INSN_PRIORITY */
|
3042 |
|
|
if (more >= 0)
|
3043 |
|
|
prev_priority = INSN_PRIORITY (insn);
|
3044 |
|
|
}
|
3045 |
|
|
|
3046 |
|
|
/* Always try issueing more insns. spu_sched_reorder will decide
|
3047 |
|
|
when the cycle should be advanced. */
|
3048 |
|
|
return 1;
|
3049 |
|
|
}
|
3050 |
|
|
|
3051 |
|
|
/* This function is called for both TARGET_SCHED_REORDER and
|
3052 |
|
|
TARGET_SCHED_REORDER2. */
|
3053 |
|
|
static int
|
3054 |
|
|
spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
|
3055 |
|
|
rtx *ready, int *nreadyp, int clock)
|
3056 |
|
|
{
|
3057 |
|
|
int i, nready = *nreadyp;
|
3058 |
|
|
int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
|
3059 |
|
|
rtx insn;
|
3060 |
|
|
|
3061 |
|
|
clock_var = clock;
|
3062 |
|
|
|
3063 |
|
|
if (nready <= 0 || pipe1_clock >= clock)
|
3064 |
|
|
return 0;
|
3065 |
|
|
|
3066 |
|
|
/* Find any rtl insns that don't generate assembly insns and schedule
|
3067 |
|
|
them first. */
|
3068 |
|
|
for (i = nready - 1; i >= 0; i--)
|
3069 |
|
|
{
|
3070 |
|
|
insn = ready[i];
|
3071 |
|
|
if (INSN_CODE (insn) == -1
|
3072 |
|
|
|| INSN_CODE (insn) == CODE_FOR_blockage
|
3073 |
|
|
|| (INSN_P (insn) && get_attr_length (insn) == 0))
|
3074 |
|
|
{
|
3075 |
|
|
ready[i] = ready[nready - 1];
|
3076 |
|
|
ready[nready - 1] = insn;
|
3077 |
|
|
return 1;
|
3078 |
|
|
}
|
3079 |
|
|
}
|
3080 |
|
|
|
3081 |
|
|
pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
|
3082 |
|
|
for (i = 0; i < nready; i++)
|
3083 |
|
|
if (INSN_CODE (ready[i]) != -1)
|
3084 |
|
|
{
|
3085 |
|
|
insn = ready[i];
|
3086 |
|
|
switch (get_attr_type (insn))
|
3087 |
|
|
{
|
3088 |
|
|
default:
|
3089 |
|
|
case TYPE_MULTI0:
|
3090 |
|
|
case TYPE_CONVERT:
|
3091 |
|
|
case TYPE_FX2:
|
3092 |
|
|
case TYPE_FX3:
|
3093 |
|
|
case TYPE_SPR:
|
3094 |
|
|
case TYPE_NOP:
|
3095 |
|
|
case TYPE_FXB:
|
3096 |
|
|
case TYPE_FPD:
|
3097 |
|
|
case TYPE_FP6:
|
3098 |
|
|
case TYPE_FP7:
|
3099 |
|
|
pipe_0 = i;
|
3100 |
|
|
break;
|
3101 |
|
|
case TYPE_LOAD:
|
3102 |
|
|
case TYPE_STORE:
|
3103 |
|
|
pipe_ls = i;
|
3104 |
|
|
case TYPE_LNOP:
|
3105 |
|
|
case TYPE_SHUF:
|
3106 |
|
|
case TYPE_BR:
|
3107 |
|
|
case TYPE_MULTI1:
|
3108 |
|
|
case TYPE_HBR:
|
3109 |
|
|
pipe_1 = i;
|
3110 |
|
|
break;
|
3111 |
|
|
case TYPE_IPREFETCH:
|
3112 |
|
|
pipe_hbrp = i;
|
3113 |
|
|
break;
|
3114 |
|
|
}
|
3115 |
|
|
}
|
3116 |
|
|
|
3117 |
|
|
/* In the first scheduling phase, schedule loads and stores together
|
3118 |
|
|
to increase the chance they will get merged during postreload CSE. */
|
3119 |
|
|
if (!reload_completed && pipe_ls >= 0)
|
3120 |
|
|
{
|
3121 |
|
|
insn = ready[pipe_ls];
|
3122 |
|
|
ready[pipe_ls] = ready[nready - 1];
|
3123 |
|
|
ready[nready - 1] = insn;
|
3124 |
|
|
return 1;
|
3125 |
|
|
}
|
3126 |
|
|
|
3127 |
|
|
/* If there is an hbrp ready, prefer it over other pipe 1 insns. */
|
3128 |
|
|
if (pipe_hbrp >= 0)
|
3129 |
|
|
pipe_1 = pipe_hbrp;
|
3130 |
|
|
|
3131 |
|
|
/* When we have loads/stores in every cycle of the last 15 insns and
|
3132 |
|
|
we are about to schedule another load/store, emit an hbrp insn
|
3133 |
|
|
instead. */
|
3134 |
|
|
if (in_spu_reorg
|
3135 |
|
|
&& spu_sched_length - spu_ls_first >= 4 * 15
|
3136 |
|
|
&& !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
|
3137 |
|
|
{
|
3138 |
|
|
insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
|
3139 |
|
|
recog_memoized (insn);
|
3140 |
|
|
if (pipe0_clock < clock)
|
3141 |
|
|
PUT_MODE (insn, TImode);
|
3142 |
|
|
spu_sched_variable_issue (file, verbose, insn, -1);
|
3143 |
|
|
return 0;
|
3144 |
|
|
}
|
3145 |
|
|
|
3146 |
|
|
/* In general, we want to emit nops to increase dual issue, but dual
|
3147 |
|
|
issue isn't faster when one of the insns could be scheduled later
|
3148 |
|
|
without effecting the critical path. We look at INSN_PRIORITY to
|
3149 |
|
|
make a good guess, but it isn't perfect so -mdual-nops=n can be
|
3150 |
|
|
used to effect it. */
|
3151 |
|
|
if (in_spu_reorg && spu_dual_nops < 10)
|
3152 |
|
|
{
|
3153 |
|
|
/* When we are at an even address and we are not issueing nops to
|
3154 |
|
|
improve scheduling then we need to advance the cycle. */
|
3155 |
|
|
if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
|
3156 |
|
|
&& (spu_dual_nops == 0
|
3157 |
|
|
|| (pipe_1 != -1
|
3158 |
|
|
&& prev_priority >
|
3159 |
|
|
INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
|
3160 |
|
|
return 0;
|
3161 |
|
|
|
3162 |
|
|
/* When at an odd address, schedule the highest priority insn
|
3163 |
|
|
without considering pipeline. */
|
3164 |
|
|
if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
|
3165 |
|
|
&& (spu_dual_nops == 0
|
3166 |
|
|
|| (prev_priority >
|
3167 |
|
|
INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
|
3168 |
|
|
return 1;
|
3169 |
|
|
}
|
3170 |
|
|
|
3171 |
|
|
|
3172 |
|
|
/* We haven't issued a pipe0 insn yet this cycle, if there is a
|
3173 |
|
|
pipe0 insn in the ready list, schedule it. */
|
3174 |
|
|
if (pipe0_clock < clock && pipe_0 >= 0)
|
3175 |
|
|
schedule_i = pipe_0;
|
3176 |
|
|
|
3177 |
|
|
/* Either we've scheduled a pipe0 insn already or there is no pipe0
|
3178 |
|
|
insn to schedule. Put a pipe1 insn at the front of the ready list. */
|
3179 |
|
|
else
|
3180 |
|
|
schedule_i = pipe_1;
|
3181 |
|
|
|
3182 |
|
|
if (schedule_i > -1)
|
3183 |
|
|
{
|
3184 |
|
|
insn = ready[schedule_i];
|
3185 |
|
|
ready[schedule_i] = ready[nready - 1];
|
3186 |
|
|
ready[nready - 1] = insn;
|
3187 |
|
|
return 1;
|
3188 |
|
|
}
|
3189 |
|
|
return 0;
|
3190 |
|
|
}
|
3191 |
|
|
|
3192 |
|
|
/* INSN is dependent on DEP_INSN. */
|
3193 |
|
|
static int
|
3194 |
|
|
spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
3195 |
|
|
{
|
3196 |
|
|
rtx set;
|
3197 |
|
|
|
3198 |
|
|
/* The blockage pattern is used to prevent instructions from being
|
3199 |
|
|
moved across it and has no cost. */
|
3200 |
|
|
if (INSN_CODE (insn) == CODE_FOR_blockage
|
3201 |
|
|
|| INSN_CODE (dep_insn) == CODE_FOR_blockage)
|
3202 |
|
|
return 0;
|
3203 |
|
|
|
3204 |
|
|
if ((INSN_P (insn) && get_attr_length (insn) == 0)
|
3205 |
|
|
|| (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
|
3206 |
|
|
return 0;
|
3207 |
|
|
|
3208 |
|
|
/* Make sure hbrps are spread out. */
|
3209 |
|
|
if (INSN_CODE (insn) == CODE_FOR_iprefetch
|
3210 |
|
|
&& INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
|
3211 |
|
|
return 8;
|
3212 |
|
|
|
3213 |
|
|
/* Make sure hints and hbrps are 2 cycles apart. */
|
3214 |
|
|
if ((INSN_CODE (insn) == CODE_FOR_iprefetch
|
3215 |
|
|
|| INSN_CODE (insn) == CODE_FOR_hbr)
|
3216 |
|
|
&& (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
|
3217 |
|
|
|| INSN_CODE (dep_insn) == CODE_FOR_hbr))
|
3218 |
|
|
return 2;
|
3219 |
|
|
|
3220 |
|
|
/* An hbrp has no real dependency on other insns. */
|
3221 |
|
|
if (INSN_CODE (insn) == CODE_FOR_iprefetch
|
3222 |
|
|
|| INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
|
3223 |
|
|
return 0;
|
3224 |
|
|
|
3225 |
|
|
/* Assuming that it is unlikely an argument register will be used in
|
3226 |
|
|
the first cycle of the called function, we reduce the cost for
|
3227 |
|
|
slightly better scheduling of dep_insn. When not hinted, the
|
3228 |
|
|
mispredicted branch would hide the cost as well. */
|
3229 |
|
|
if (CALL_P (insn))
|
3230 |
|
|
{
|
3231 |
|
|
rtx target = get_branch_target (insn);
|
3232 |
|
|
if (GET_CODE (target) != REG || !set_of (target, insn))
|
3233 |
|
|
return cost - 2;
|
3234 |
|
|
return cost;
|
3235 |
|
|
}
|
3236 |
|
|
|
3237 |
|
|
/* And when returning from a function, let's assume the return values
|
3238 |
|
|
are completed sooner too. */
|
3239 |
|
|
if (CALL_P (dep_insn))
|
3240 |
|
|
return cost - 2;
|
3241 |
|
|
|
3242 |
|
|
/* Make sure an instruction that loads from the back chain is schedule
|
3243 |
|
|
away from the return instruction so a hint is more likely to get
|
3244 |
|
|
issued. */
|
3245 |
|
|
if (INSN_CODE (insn) == CODE_FOR__return
|
3246 |
|
|
&& (set = single_set (dep_insn))
|
3247 |
|
|
&& GET_CODE (SET_DEST (set)) == REG
|
3248 |
|
|
&& REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
|
3249 |
|
|
return 20;
|
3250 |
|
|
|
3251 |
|
|
/* The dfa scheduler sets cost to 0 for all anti-dependencies and the
|
3252 |
|
|
scheduler makes every insn in a block anti-dependent on the final
|
3253 |
|
|
jump_insn. We adjust here so higher cost insns will get scheduled
|
3254 |
|
|
earlier. */
|
3255 |
|
|
if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
|
3256 |
|
|
return insn_cost (dep_insn) - 3;
|
3257 |
|
|
|
3258 |
|
|
return cost;
|
3259 |
|
|
}
|
3260 |
|
|
|
3261 |
|
|
/* Create a CONST_DOUBLE from a string. */
|
3262 |
|
|
struct rtx_def *
|
3263 |
|
|
spu_float_const (const char *string, enum machine_mode mode)
|
3264 |
|
|
{
|
3265 |
|
|
REAL_VALUE_TYPE value;
|
3266 |
|
|
value = REAL_VALUE_ATOF (string, mode);
|
3267 |
|
|
return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
|
3268 |
|
|
}
|
3269 |
|
|
|
3270 |
|
|
int
|
3271 |
|
|
spu_constant_address_p (rtx x)
|
3272 |
|
|
{
|
3273 |
|
|
return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
|
3274 |
|
|
|| GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
|
3275 |
|
|
|| GET_CODE (x) == HIGH);
|
3276 |
|
|
}
|
3277 |
|
|
|
3278 |
|
|
static enum spu_immediate
|
3279 |
|
|
which_immediate_load (HOST_WIDE_INT val)
|
3280 |
|
|
{
|
3281 |
|
|
gcc_assert (val == trunc_int_for_mode (val, SImode));
|
3282 |
|
|
|
3283 |
|
|
if (val >= -0x8000 && val <= 0x7fff)
|
3284 |
|
|
return SPU_IL;
|
3285 |
|
|
if (val >= 0 && val <= 0x3ffff)
|
3286 |
|
|
return SPU_ILA;
|
3287 |
|
|
if ((val & 0xffff) == ((val >> 16) & 0xffff))
|
3288 |
|
|
return SPU_ILH;
|
3289 |
|
|
if ((val & 0xffff) == 0)
|
3290 |
|
|
return SPU_ILHU;
|
3291 |
|
|
|
3292 |
|
|
return SPU_NONE;
|
3293 |
|
|
}
|
3294 |
|
|
|
3295 |
|
|
/* Return true when OP can be loaded by one of the il instructions, or
|
3296 |
|
|
when flow2 is not completed and OP can be loaded using ilhu and iohl. */
|
3297 |
|
|
int
|
3298 |
|
|
immediate_load_p (rtx op, enum machine_mode mode)
|
3299 |
|
|
{
|
3300 |
|
|
if (CONSTANT_P (op))
|
3301 |
|
|
{
|
3302 |
|
|
enum immediate_class c = classify_immediate (op, mode);
|
3303 |
|
|
return c == IC_IL1 || c == IC_IL1s
|
3304 |
|
|
|| (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
|
3305 |
|
|
}
|
3306 |
|
|
return 0;
|
3307 |
|
|
}
|
3308 |
|
|
|
3309 |
|
|
/* Return true if the first SIZE bytes of arr is a constant that can be
|
3310 |
|
|
generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
|
3311 |
|
|
represent the size and offset of the instruction to use. */
|
3312 |
|
|
static int
|
3313 |
|
|
cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
|
3314 |
|
|
{
|
3315 |
|
|
int cpat, run, i, start;
|
3316 |
|
|
cpat = 1;
|
3317 |
|
|
run = 0;
|
3318 |
|
|
start = -1;
|
3319 |
|
|
for (i = 0; i < size && cpat; i++)
|
3320 |
|
|
if (arr[i] != i+16)
|
3321 |
|
|
{
|
3322 |
|
|
if (!run)
|
3323 |
|
|
{
|
3324 |
|
|
start = i;
|
3325 |
|
|
if (arr[i] == 3)
|
3326 |
|
|
run = 1;
|
3327 |
|
|
else if (arr[i] == 2 && arr[i+1] == 3)
|
3328 |
|
|
run = 2;
|
3329 |
|
|
else if (arr[i] == 0)
|
3330 |
|
|
{
|
3331 |
|
|
while (arr[i+run] == run && i+run < 16)
|
3332 |
|
|
run++;
|
3333 |
|
|
if (run != 4 && run != 8)
|
3334 |
|
|
cpat = 0;
|
3335 |
|
|
}
|
3336 |
|
|
else
|
3337 |
|
|
cpat = 0;
|
3338 |
|
|
if ((i & (run-1)) != 0)
|
3339 |
|
|
cpat = 0;
|
3340 |
|
|
i += run;
|
3341 |
|
|
}
|
3342 |
|
|
else
|
3343 |
|
|
cpat = 0;
|
3344 |
|
|
}
|
3345 |
|
|
if (cpat && (run || size < 16))
|
3346 |
|
|
{
|
3347 |
|
|
if (run == 0)
|
3348 |
|
|
run = 1;
|
3349 |
|
|
if (prun)
|
3350 |
|
|
*prun = run;
|
3351 |
|
|
if (pstart)
|
3352 |
|
|
*pstart = start == -1 ? 16-run : start;
|
3353 |
|
|
return 1;
|
3354 |
|
|
}
|
3355 |
|
|
return 0;
|
3356 |
|
|
}
|
3357 |
|
|
|
3358 |
|
|
/* OP is a CONSTANT_P. Determine what instructions can be used to load
|
3359 |
|
|
it into a register. MODE is only valid when OP is a CONST_INT. */
|
3360 |
|
|
static enum immediate_class
|
3361 |
|
|
classify_immediate (rtx op, enum machine_mode mode)
|
3362 |
|
|
{
|
3363 |
|
|
HOST_WIDE_INT val;
|
3364 |
|
|
unsigned char arr[16];
|
3365 |
|
|
int i, j, repeated, fsmbi, repeat;
|
3366 |
|
|
|
3367 |
|
|
gcc_assert (CONSTANT_P (op));
|
3368 |
|
|
|
3369 |
|
|
if (GET_MODE (op) != VOIDmode)
|
3370 |
|
|
mode = GET_MODE (op);
|
3371 |
|
|
|
3372 |
|
|
/* A V4SI const_vector with all identical symbols is ok. */
|
3373 |
|
|
if (!flag_pic
|
3374 |
|
|
&& mode == V4SImode
|
3375 |
|
|
&& GET_CODE (op) == CONST_VECTOR
|
3376 |
|
|
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
|
3377 |
|
|
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
|
3378 |
|
|
&& CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
|
3379 |
|
|
&& CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
|
3380 |
|
|
&& CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
|
3381 |
|
|
op = CONST_VECTOR_ELT (op, 0);
|
3382 |
|
|
|
3383 |
|
|
switch (GET_CODE (op))
|
3384 |
|
|
{
|
3385 |
|
|
case SYMBOL_REF:
|
3386 |
|
|
case LABEL_REF:
|
3387 |
|
|
return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
|
3388 |
|
|
|
3389 |
|
|
case CONST:
|
3390 |
|
|
/* We can never know if the resulting address fits in 18 bits and can be
|
3391 |
|
|
loaded with ila. For now, assume the address will not overflow if
|
3392 |
|
|
the displacement is "small" (fits 'K' constraint). */
|
3393 |
|
|
if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
|
3394 |
|
|
{
|
3395 |
|
|
rtx sym = XEXP (XEXP (op, 0), 0);
|
3396 |
|
|
rtx cst = XEXP (XEXP (op, 0), 1);
|
3397 |
|
|
|
3398 |
|
|
if (GET_CODE (sym) == SYMBOL_REF
|
3399 |
|
|
&& GET_CODE (cst) == CONST_INT
|
3400 |
|
|
&& satisfies_constraint_K (cst))
|
3401 |
|
|
return IC_IL1s;
|
3402 |
|
|
}
|
3403 |
|
|
return IC_IL2s;
|
3404 |
|
|
|
3405 |
|
|
case HIGH:
|
3406 |
|
|
return IC_IL1s;
|
3407 |
|
|
|
3408 |
|
|
case CONST_VECTOR:
|
3409 |
|
|
for (i = 0; i < GET_MODE_NUNITS (mode); i++)
|
3410 |
|
|
if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
|
3411 |
|
|
&& GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
|
3412 |
|
|
return IC_POOL;
|
3413 |
|
|
/* Fall through. */
|
3414 |
|
|
|
3415 |
|
|
case CONST_INT:
|
3416 |
|
|
case CONST_DOUBLE:
|
3417 |
|
|
constant_to_array (mode, op, arr);
|
3418 |
|
|
|
3419 |
|
|
/* Check that each 4-byte slot is identical. */
|
3420 |
|
|
repeated = 1;
|
3421 |
|
|
for (i = 4; i < 16; i += 4)
|
3422 |
|
|
for (j = 0; j < 4; j++)
|
3423 |
|
|
if (arr[j] != arr[i + j])
|
3424 |
|
|
repeated = 0;
|
3425 |
|
|
|
3426 |
|
|
if (repeated)
|
3427 |
|
|
{
|
3428 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
3429 |
|
|
val = trunc_int_for_mode (val, SImode);
|
3430 |
|
|
|
3431 |
|
|
if (which_immediate_load (val) != SPU_NONE)
|
3432 |
|
|
return IC_IL1;
|
3433 |
|
|
}
|
3434 |
|
|
|
3435 |
|
|
/* Any mode of 2 bytes or smaller can be loaded with an il
|
3436 |
|
|
instruction. */
|
3437 |
|
|
gcc_assert (GET_MODE_SIZE (mode) > 2);
|
3438 |
|
|
|
3439 |
|
|
fsmbi = 1;
|
3440 |
|
|
repeat = 0;
|
3441 |
|
|
for (i = 0; i < 16 && fsmbi; i++)
|
3442 |
|
|
if (arr[i] != 0 && repeat == 0)
|
3443 |
|
|
repeat = arr[i];
|
3444 |
|
|
else if (arr[i] != 0 && arr[i] != repeat)
|
3445 |
|
|
fsmbi = 0;
|
3446 |
|
|
if (fsmbi)
|
3447 |
|
|
return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
|
3448 |
|
|
|
3449 |
|
|
if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
|
3450 |
|
|
return IC_CPAT;
|
3451 |
|
|
|
3452 |
|
|
if (repeated)
|
3453 |
|
|
return IC_IL2;
|
3454 |
|
|
|
3455 |
|
|
return IC_POOL;
|
3456 |
|
|
default:
|
3457 |
|
|
break;
|
3458 |
|
|
}
|
3459 |
|
|
gcc_unreachable ();
|
3460 |
|
|
}
|
3461 |
|
|
|
3462 |
|
|
static enum spu_immediate
|
3463 |
|
|
which_logical_immediate (HOST_WIDE_INT val)
|
3464 |
|
|
{
|
3465 |
|
|
gcc_assert (val == trunc_int_for_mode (val, SImode));
|
3466 |
|
|
|
3467 |
|
|
if (val >= -0x200 && val <= 0x1ff)
|
3468 |
|
|
return SPU_ORI;
|
3469 |
|
|
if (val >= 0 && val <= 0xffff)
|
3470 |
|
|
return SPU_IOHL;
|
3471 |
|
|
if ((val & 0xffff) == ((val >> 16) & 0xffff))
|
3472 |
|
|
{
|
3473 |
|
|
val = trunc_int_for_mode (val, HImode);
|
3474 |
|
|
if (val >= -0x200 && val <= 0x1ff)
|
3475 |
|
|
return SPU_ORHI;
|
3476 |
|
|
if ((val & 0xff) == ((val >> 8) & 0xff))
|
3477 |
|
|
{
|
3478 |
|
|
val = trunc_int_for_mode (val, QImode);
|
3479 |
|
|
if (val >= -0x200 && val <= 0x1ff)
|
3480 |
|
|
return SPU_ORBI;
|
3481 |
|
|
}
|
3482 |
|
|
}
|
3483 |
|
|
return SPU_NONE;
|
3484 |
|
|
}
|
3485 |
|
|
|
3486 |
|
|
/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
|
3487 |
|
|
CONST_DOUBLEs. */
|
3488 |
|
|
static int
|
3489 |
|
|
const_vector_immediate_p (rtx x)
|
3490 |
|
|
{
|
3491 |
|
|
int i;
|
3492 |
|
|
gcc_assert (GET_CODE (x) == CONST_VECTOR);
|
3493 |
|
|
for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
|
3494 |
|
|
if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
|
3495 |
|
|
&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
|
3496 |
|
|
return 0;
|
3497 |
|
|
return 1;
|
3498 |
|
|
}
|
3499 |
|
|
|
3500 |
|
|
int
|
3501 |
|
|
logical_immediate_p (rtx op, enum machine_mode mode)
|
3502 |
|
|
{
|
3503 |
|
|
HOST_WIDE_INT val;
|
3504 |
|
|
unsigned char arr[16];
|
3505 |
|
|
int i, j;
|
3506 |
|
|
|
3507 |
|
|
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|
3508 |
|
|
|| GET_CODE (op) == CONST_VECTOR);
|
3509 |
|
|
|
3510 |
|
|
if (GET_CODE (op) == CONST_VECTOR
|
3511 |
|
|
&& !const_vector_immediate_p (op))
|
3512 |
|
|
return 0;
|
3513 |
|
|
|
3514 |
|
|
if (GET_MODE (op) != VOIDmode)
|
3515 |
|
|
mode = GET_MODE (op);
|
3516 |
|
|
|
3517 |
|
|
constant_to_array (mode, op, arr);
|
3518 |
|
|
|
3519 |
|
|
/* Check that bytes are repeated. */
|
3520 |
|
|
for (i = 4; i < 16; i += 4)
|
3521 |
|
|
for (j = 0; j < 4; j++)
|
3522 |
|
|
if (arr[j] != arr[i + j])
|
3523 |
|
|
return 0;
|
3524 |
|
|
|
3525 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
3526 |
|
|
val = trunc_int_for_mode (val, SImode);
|
3527 |
|
|
|
3528 |
|
|
i = which_logical_immediate (val);
|
3529 |
|
|
return i != SPU_NONE && i != SPU_IOHL;
|
3530 |
|
|
}
|
3531 |
|
|
|
3532 |
|
|
int
|
3533 |
|
|
iohl_immediate_p (rtx op, enum machine_mode mode)
|
3534 |
|
|
{
|
3535 |
|
|
HOST_WIDE_INT val;
|
3536 |
|
|
unsigned char arr[16];
|
3537 |
|
|
int i, j;
|
3538 |
|
|
|
3539 |
|
|
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|
3540 |
|
|
|| GET_CODE (op) == CONST_VECTOR);
|
3541 |
|
|
|
3542 |
|
|
if (GET_CODE (op) == CONST_VECTOR
|
3543 |
|
|
&& !const_vector_immediate_p (op))
|
3544 |
|
|
return 0;
|
3545 |
|
|
|
3546 |
|
|
if (GET_MODE (op) != VOIDmode)
|
3547 |
|
|
mode = GET_MODE (op);
|
3548 |
|
|
|
3549 |
|
|
constant_to_array (mode, op, arr);
|
3550 |
|
|
|
3551 |
|
|
/* Check that bytes are repeated. */
|
3552 |
|
|
for (i = 4; i < 16; i += 4)
|
3553 |
|
|
for (j = 0; j < 4; j++)
|
3554 |
|
|
if (arr[j] != arr[i + j])
|
3555 |
|
|
return 0;
|
3556 |
|
|
|
3557 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
3558 |
|
|
val = trunc_int_for_mode (val, SImode);
|
3559 |
|
|
|
3560 |
|
|
return val >= 0 && val <= 0xffff;
|
3561 |
|
|
}
|
3562 |
|
|
|
3563 |
|
|
int
|
3564 |
|
|
arith_immediate_p (rtx op, enum machine_mode mode,
|
3565 |
|
|
HOST_WIDE_INT low, HOST_WIDE_INT high)
|
3566 |
|
|
{
|
3567 |
|
|
HOST_WIDE_INT val;
|
3568 |
|
|
unsigned char arr[16];
|
3569 |
|
|
int bytes, i, j;
|
3570 |
|
|
|
3571 |
|
|
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|
3572 |
|
|
|| GET_CODE (op) == CONST_VECTOR);
|
3573 |
|
|
|
3574 |
|
|
if (GET_CODE (op) == CONST_VECTOR
|
3575 |
|
|
&& !const_vector_immediate_p (op))
|
3576 |
|
|
return 0;
|
3577 |
|
|
|
3578 |
|
|
if (GET_MODE (op) != VOIDmode)
|
3579 |
|
|
mode = GET_MODE (op);
|
3580 |
|
|
|
3581 |
|
|
constant_to_array (mode, op, arr);
|
3582 |
|
|
|
3583 |
|
|
if (VECTOR_MODE_P (mode))
|
3584 |
|
|
mode = GET_MODE_INNER (mode);
|
3585 |
|
|
|
3586 |
|
|
bytes = GET_MODE_SIZE (mode);
|
3587 |
|
|
mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
|
3588 |
|
|
|
3589 |
|
|
/* Check that bytes are repeated. */
|
3590 |
|
|
for (i = bytes; i < 16; i += bytes)
|
3591 |
|
|
for (j = 0; j < bytes; j++)
|
3592 |
|
|
if (arr[j] != arr[i + j])
|
3593 |
|
|
return 0;
|
3594 |
|
|
|
3595 |
|
|
val = arr[0];
|
3596 |
|
|
for (j = 1; j < bytes; j++)
|
3597 |
|
|
val = (val << 8) | arr[j];
|
3598 |
|
|
|
3599 |
|
|
val = trunc_int_for_mode (val, mode);
|
3600 |
|
|
|
3601 |
|
|
return val >= low && val <= high;
|
3602 |
|
|
}
|
3603 |
|
|
|
3604 |
|
|
/* TRUE when op is an immediate and an exact power of 2, and given that
|
3605 |
|
|
OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
|
3606 |
|
|
all entries must be the same. */
|
3607 |
|
|
bool
|
3608 |
|
|
exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
|
3609 |
|
|
{
|
3610 |
|
|
enum machine_mode int_mode;
|
3611 |
|
|
HOST_WIDE_INT val;
|
3612 |
|
|
unsigned char arr[16];
|
3613 |
|
|
int bytes, i, j;
|
3614 |
|
|
|
3615 |
|
|
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|
3616 |
|
|
|| GET_CODE (op) == CONST_VECTOR);
|
3617 |
|
|
|
3618 |
|
|
if (GET_CODE (op) == CONST_VECTOR
|
3619 |
|
|
&& !const_vector_immediate_p (op))
|
3620 |
|
|
return 0;
|
3621 |
|
|
|
3622 |
|
|
if (GET_MODE (op) != VOIDmode)
|
3623 |
|
|
mode = GET_MODE (op);
|
3624 |
|
|
|
3625 |
|
|
constant_to_array (mode, op, arr);
|
3626 |
|
|
|
3627 |
|
|
if (VECTOR_MODE_P (mode))
|
3628 |
|
|
mode = GET_MODE_INNER (mode);
|
3629 |
|
|
|
3630 |
|
|
bytes = GET_MODE_SIZE (mode);
|
3631 |
|
|
int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
|
3632 |
|
|
|
3633 |
|
|
/* Check that bytes are repeated. */
|
3634 |
|
|
for (i = bytes; i < 16; i += bytes)
|
3635 |
|
|
for (j = 0; j < bytes; j++)
|
3636 |
|
|
if (arr[j] != arr[i + j])
|
3637 |
|
|
return 0;
|
3638 |
|
|
|
3639 |
|
|
val = arr[0];
|
3640 |
|
|
for (j = 1; j < bytes; j++)
|
3641 |
|
|
val = (val << 8) | arr[j];
|
3642 |
|
|
|
3643 |
|
|
val = trunc_int_for_mode (val, int_mode);
|
3644 |
|
|
|
3645 |
|
|
/* Currently, we only handle SFmode */
|
3646 |
|
|
gcc_assert (mode == SFmode);
|
3647 |
|
|
if (mode == SFmode)
|
3648 |
|
|
{
|
3649 |
|
|
int exp = (val >> 23) - 127;
|
3650 |
|
|
return val > 0 && (val & 0x007fffff) == 0
|
3651 |
|
|
&& exp >= low && exp <= high;
|
3652 |
|
|
}
|
3653 |
|
|
return FALSE;
|
3654 |
|
|
}
|
3655 |
|
|
|
3656 |
|
|
/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
|
3657 |
|
|
|
3658 |
|
|
static int
|
3659 |
|
|
ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
|
3660 |
|
|
{
|
3661 |
|
|
rtx x = *px;
|
3662 |
|
|
tree decl;
|
3663 |
|
|
|
3664 |
|
|
if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
|
3665 |
|
|
{
|
3666 |
|
|
rtx plus = XEXP (x, 0);
|
3667 |
|
|
rtx op0 = XEXP (plus, 0);
|
3668 |
|
|
rtx op1 = XEXP (plus, 1);
|
3669 |
|
|
if (GET_CODE (op1) == CONST_INT)
|
3670 |
|
|
x = op0;
|
3671 |
|
|
}
|
3672 |
|
|
|
3673 |
|
|
return (GET_CODE (x) == SYMBOL_REF
|
3674 |
|
|
&& (decl = SYMBOL_REF_DECL (x)) != 0
|
3675 |
|
|
&& TREE_CODE (decl) == VAR_DECL
|
3676 |
|
|
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)));
|
3677 |
|
|
}
|
3678 |
|
|
|
3679 |
|
|
/* We accept:
|
3680 |
|
|
- any 32-bit constant (SImode, SFmode)
|
3681 |
|
|
- any constant that can be generated with fsmbi (any mode)
|
3682 |
|
|
- a 64-bit constant where the high and low bits are identical
|
3683 |
|
|
(DImode, DFmode)
|
3684 |
|
|
- a 128-bit constant where the four 32-bit words match. */
|
3685 |
|
|
int
|
3686 |
|
|
spu_legitimate_constant_p (rtx x)
|
3687 |
|
|
{
|
3688 |
|
|
if (GET_CODE (x) == HIGH)
|
3689 |
|
|
x = XEXP (x, 0);
|
3690 |
|
|
|
3691 |
|
|
/* Reject any __ea qualified reference. These can't appear in
|
3692 |
|
|
instructions but must be forced to the constant pool. */
|
3693 |
|
|
if (for_each_rtx (&x, ea_symbol_ref, 0))
|
3694 |
|
|
return 0;
|
3695 |
|
|
|
3696 |
|
|
/* V4SI with all identical symbols is valid. */
|
3697 |
|
|
if (!flag_pic
|
3698 |
|
|
&& GET_MODE (x) == V4SImode
|
3699 |
|
|
&& (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
|
3700 |
|
|
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
|
3701 |
|
|
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
|
3702 |
|
|
return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
|
3703 |
|
|
&& CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
|
3704 |
|
|
&& CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
|
3705 |
|
|
|
3706 |
|
|
if (GET_CODE (x) == CONST_VECTOR
|
3707 |
|
|
&& !const_vector_immediate_p (x))
|
3708 |
|
|
return 0;
|
3709 |
|
|
return 1;
|
3710 |
|
|
}
|
3711 |
|
|
|
3712 |
|
|
/* Valid address are:
|
3713 |
|
|
- symbol_ref, label_ref, const
|
3714 |
|
|
- reg
|
3715 |
|
|
- reg + const_int, where const_int is 16 byte aligned
|
3716 |
|
|
- reg + reg, alignment doesn't matter
|
3717 |
|
|
The alignment matters in the reg+const case because lqd and stqd
|
3718 |
|
|
ignore the 4 least significant bits of the const. We only care about
|
3719 |
|
|
16 byte modes because the expand phase will change all smaller MEM
|
3720 |
|
|
references to TImode. */
|
3721 |
|
|
static bool
|
3722 |
|
|
spu_legitimate_address_p (enum machine_mode mode,
|
3723 |
|
|
rtx x, bool reg_ok_strict)
|
3724 |
|
|
{
|
3725 |
|
|
int aligned = GET_MODE_SIZE (mode) >= 16;
|
3726 |
|
|
if (aligned
|
3727 |
|
|
&& GET_CODE (x) == AND
|
3728 |
|
|
&& GET_CODE (XEXP (x, 1)) == CONST_INT
|
3729 |
|
|
&& INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
|
3730 |
|
|
x = XEXP (x, 0);
|
3731 |
|
|
switch (GET_CODE (x))
|
3732 |
|
|
{
|
3733 |
|
|
case LABEL_REF:
|
3734 |
|
|
return !TARGET_LARGE_MEM;
|
3735 |
|
|
|
3736 |
|
|
case SYMBOL_REF:
|
3737 |
|
|
case CONST:
|
3738 |
|
|
/* Keep __ea references until reload so that spu_expand_mov can see them
|
3739 |
|
|
in MEMs. */
|
3740 |
|
|
if (ea_symbol_ref (&x, 0))
|
3741 |
|
|
return !reload_in_progress && !reload_completed;
|
3742 |
|
|
return !TARGET_LARGE_MEM;
|
3743 |
|
|
|
3744 |
|
|
case CONST_INT:
|
3745 |
|
|
return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
|
3746 |
|
|
|
3747 |
|
|
case SUBREG:
|
3748 |
|
|
x = XEXP (x, 0);
|
3749 |
|
|
if (REG_P (x))
|
3750 |
|
|
return 0;
|
3751 |
|
|
|
3752 |
|
|
case REG:
|
3753 |
|
|
return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
|
3754 |
|
|
|
3755 |
|
|
case PLUS:
|
3756 |
|
|
case LO_SUM:
|
3757 |
|
|
{
|
3758 |
|
|
rtx op0 = XEXP (x, 0);
|
3759 |
|
|
rtx op1 = XEXP (x, 1);
|
3760 |
|
|
if (GET_CODE (op0) == SUBREG)
|
3761 |
|
|
op0 = XEXP (op0, 0);
|
3762 |
|
|
if (GET_CODE (op1) == SUBREG)
|
3763 |
|
|
op1 = XEXP (op1, 0);
|
3764 |
|
|
if (GET_CODE (op0) == REG
|
3765 |
|
|
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
|
3766 |
|
|
&& GET_CODE (op1) == CONST_INT
|
3767 |
|
|
&& INTVAL (op1) >= -0x2000
|
3768 |
|
|
&& INTVAL (op1) <= 0x1fff
|
3769 |
|
|
&& (!aligned || (INTVAL (op1) & 15) == 0))
|
3770 |
|
|
return TRUE;
|
3771 |
|
|
if (GET_CODE (op0) == REG
|
3772 |
|
|
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
|
3773 |
|
|
&& GET_CODE (op1) == REG
|
3774 |
|
|
&& INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
|
3775 |
|
|
return TRUE;
|
3776 |
|
|
}
|
3777 |
|
|
break;
|
3778 |
|
|
|
3779 |
|
|
default:
|
3780 |
|
|
break;
|
3781 |
|
|
}
|
3782 |
|
|
return FALSE;
|
3783 |
|
|
}
|
3784 |
|
|
|
3785 |
|
|
/* Like spu_legitimate_address_p, except with named addresses. */
|
3786 |
|
|
static bool
|
3787 |
|
|
spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
|
3788 |
|
|
bool reg_ok_strict, addr_space_t as)
|
3789 |
|
|
{
|
3790 |
|
|
if (as == ADDR_SPACE_EA)
|
3791 |
|
|
return (REG_P (x) && (GET_MODE (x) == EAmode));
|
3792 |
|
|
|
3793 |
|
|
else if (as != ADDR_SPACE_GENERIC)
|
3794 |
|
|
gcc_unreachable ();
|
3795 |
|
|
|
3796 |
|
|
return spu_legitimate_address_p (mode, x, reg_ok_strict);
|
3797 |
|
|
}
|
3798 |
|
|
|
3799 |
|
|
/* When the address is reg + const_int, force the const_int into a
|
3800 |
|
|
register. */
|
3801 |
|
|
rtx
|
3802 |
|
|
spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
|
3803 |
|
|
enum machine_mode mode ATTRIBUTE_UNUSED)
|
3804 |
|
|
{
|
3805 |
|
|
rtx op0, op1;
|
3806 |
|
|
/* Make sure both operands are registers. */
|
3807 |
|
|
if (GET_CODE (x) == PLUS)
|
3808 |
|
|
{
|
3809 |
|
|
op0 = XEXP (x, 0);
|
3810 |
|
|
op1 = XEXP (x, 1);
|
3811 |
|
|
if (ALIGNED_SYMBOL_REF_P (op0))
|
3812 |
|
|
{
|
3813 |
|
|
op0 = force_reg (Pmode, op0);
|
3814 |
|
|
mark_reg_pointer (op0, 128);
|
3815 |
|
|
}
|
3816 |
|
|
else if (GET_CODE (op0) != REG)
|
3817 |
|
|
op0 = force_reg (Pmode, op0);
|
3818 |
|
|
if (ALIGNED_SYMBOL_REF_P (op1))
|
3819 |
|
|
{
|
3820 |
|
|
op1 = force_reg (Pmode, op1);
|
3821 |
|
|
mark_reg_pointer (op1, 128);
|
3822 |
|
|
}
|
3823 |
|
|
else if (GET_CODE (op1) != REG)
|
3824 |
|
|
op1 = force_reg (Pmode, op1);
|
3825 |
|
|
x = gen_rtx_PLUS (Pmode, op0, op1);
|
3826 |
|
|
}
|
3827 |
|
|
return x;
|
3828 |
|
|
}
|
3829 |
|
|
|
3830 |
|
|
/* Like spu_legitimate_address, except with named address support. */
|
3831 |
|
|
static rtx
|
3832 |
|
|
spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
|
3833 |
|
|
addr_space_t as)
|
3834 |
|
|
{
|
3835 |
|
|
if (as != ADDR_SPACE_GENERIC)
|
3836 |
|
|
return x;
|
3837 |
|
|
|
3838 |
|
|
return spu_legitimize_address (x, oldx, mode);
|
3839 |
|
|
}
|
3840 |
|
|
|
3841 |
|
|
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
|
3842 |
|
|
struct attribute_spec.handler. */
|
3843 |
|
|
static tree
|
3844 |
|
|
spu_handle_fndecl_attribute (tree * node,
|
3845 |
|
|
tree name,
|
3846 |
|
|
tree args ATTRIBUTE_UNUSED,
|
3847 |
|
|
int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
|
3848 |
|
|
{
|
3849 |
|
|
if (TREE_CODE (*node) != FUNCTION_DECL)
|
3850 |
|
|
{
|
3851 |
|
|
warning (0, "%qE attribute only applies to functions",
|
3852 |
|
|
name);
|
3853 |
|
|
*no_add_attrs = true;
|
3854 |
|
|
}
|
3855 |
|
|
|
3856 |
|
|
return NULL_TREE;
|
3857 |
|
|
}
|
3858 |
|
|
|
3859 |
|
|
/* Handle the "vector" attribute. */
|
3860 |
|
|
static tree
|
3861 |
|
|
spu_handle_vector_attribute (tree * node, tree name,
|
3862 |
|
|
tree args ATTRIBUTE_UNUSED,
|
3863 |
|
|
int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
|
3864 |
|
|
{
|
3865 |
|
|
tree type = *node, result = NULL_TREE;
|
3866 |
|
|
enum machine_mode mode;
|
3867 |
|
|
int unsigned_p;
|
3868 |
|
|
|
3869 |
|
|
while (POINTER_TYPE_P (type)
|
3870 |
|
|
|| TREE_CODE (type) == FUNCTION_TYPE
|
3871 |
|
|
|| TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
|
3872 |
|
|
type = TREE_TYPE (type);
|
3873 |
|
|
|
3874 |
|
|
mode = TYPE_MODE (type);
|
3875 |
|
|
|
3876 |
|
|
unsigned_p = TYPE_UNSIGNED (type);
|
3877 |
|
|
switch (mode)
|
3878 |
|
|
{
|
3879 |
|
|
case DImode:
|
3880 |
|
|
result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
|
3881 |
|
|
break;
|
3882 |
|
|
case SImode:
|
3883 |
|
|
result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
|
3884 |
|
|
break;
|
3885 |
|
|
case HImode:
|
3886 |
|
|
result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
|
3887 |
|
|
break;
|
3888 |
|
|
case QImode:
|
3889 |
|
|
result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
|
3890 |
|
|
break;
|
3891 |
|
|
case SFmode:
|
3892 |
|
|
result = V4SF_type_node;
|
3893 |
|
|
break;
|
3894 |
|
|
case DFmode:
|
3895 |
|
|
result = V2DF_type_node;
|
3896 |
|
|
break;
|
3897 |
|
|
default:
|
3898 |
|
|
break;
|
3899 |
|
|
}
|
3900 |
|
|
|
3901 |
|
|
/* Propagate qualifiers attached to the element type
|
3902 |
|
|
onto the vector type. */
|
3903 |
|
|
if (result && result != type && TYPE_QUALS (type))
|
3904 |
|
|
result = build_qualified_type (result, TYPE_QUALS (type));
|
3905 |
|
|
|
3906 |
|
|
*no_add_attrs = true; /* No need to hang on to the attribute. */
|
3907 |
|
|
|
3908 |
|
|
if (!result)
|
3909 |
|
|
warning (0, "%qE attribute ignored", name);
|
3910 |
|
|
else
|
3911 |
|
|
*node = lang_hooks.types.reconstruct_complex_type (*node, result);
|
3912 |
|
|
|
3913 |
|
|
return NULL_TREE;
|
3914 |
|
|
}
|
3915 |
|
|
|
3916 |
|
|
/* Return nonzero if FUNC is a naked function. */
|
3917 |
|
|
static int
|
3918 |
|
|
spu_naked_function_p (tree func)
|
3919 |
|
|
{
|
3920 |
|
|
tree a;
|
3921 |
|
|
|
3922 |
|
|
if (TREE_CODE (func) != FUNCTION_DECL)
|
3923 |
|
|
abort ();
|
3924 |
|
|
|
3925 |
|
|
a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
|
3926 |
|
|
return a != NULL_TREE;
|
3927 |
|
|
}
|
3928 |
|
|
|
3929 |
|
|
int
|
3930 |
|
|
spu_initial_elimination_offset (int from, int to)
|
3931 |
|
|
{
|
3932 |
|
|
int saved_regs_size = spu_saved_regs_size ();
|
3933 |
|
|
int sp_offset = 0;
|
3934 |
|
|
if (!current_function_is_leaf || crtl->outgoing_args_size
|
3935 |
|
|
|| get_frame_size () || saved_regs_size)
|
3936 |
|
|
sp_offset = STACK_POINTER_OFFSET;
|
3937 |
|
|
if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
|
3938 |
|
|
return get_frame_size () + crtl->outgoing_args_size + sp_offset;
|
3939 |
|
|
else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
|
3940 |
|
|
return get_frame_size ();
|
3941 |
|
|
else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
|
3942 |
|
|
return sp_offset + crtl->outgoing_args_size
|
3943 |
|
|
+ get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
|
3944 |
|
|
else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
|
3945 |
|
|
return get_frame_size () + saved_regs_size + sp_offset;
|
3946 |
|
|
else
|
3947 |
|
|
gcc_unreachable ();
|
3948 |
|
|
}
|
3949 |
|
|
|
3950 |
|
|
rtx
|
3951 |
|
|
spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
|
3952 |
|
|
{
|
3953 |
|
|
enum machine_mode mode = TYPE_MODE (type);
|
3954 |
|
|
int byte_size = ((mode == BLKmode)
|
3955 |
|
|
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
|
3956 |
|
|
|
3957 |
|
|
/* Make sure small structs are left justified in a register. */
|
3958 |
|
|
if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
|
3959 |
|
|
&& byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
|
3960 |
|
|
{
|
3961 |
|
|
enum machine_mode smode;
|
3962 |
|
|
rtvec v;
|
3963 |
|
|
int i;
|
3964 |
|
|
int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
3965 |
|
|
int n = byte_size / UNITS_PER_WORD;
|
3966 |
|
|
v = rtvec_alloc (nregs);
|
3967 |
|
|
for (i = 0; i < n; i++)
|
3968 |
|
|
{
|
3969 |
|
|
RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
|
3970 |
|
|
gen_rtx_REG (TImode,
|
3971 |
|
|
FIRST_RETURN_REGNUM
|
3972 |
|
|
+ i),
|
3973 |
|
|
GEN_INT (UNITS_PER_WORD * i));
|
3974 |
|
|
byte_size -= UNITS_PER_WORD;
|
3975 |
|
|
}
|
3976 |
|
|
|
3977 |
|
|
if (n < nregs)
|
3978 |
|
|
{
|
3979 |
|
|
if (byte_size < 4)
|
3980 |
|
|
byte_size = 4;
|
3981 |
|
|
smode =
|
3982 |
|
|
smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
|
3983 |
|
|
RTVEC_ELT (v, n) =
|
3984 |
|
|
gen_rtx_EXPR_LIST (VOIDmode,
|
3985 |
|
|
gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
|
3986 |
|
|
GEN_INT (UNITS_PER_WORD * n));
|
3987 |
|
|
}
|
3988 |
|
|
return gen_rtx_PARALLEL (mode, v);
|
3989 |
|
|
}
|
3990 |
|
|
return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
|
3991 |
|
|
}
|
3992 |
|
|
|
3993 |
|
|
rtx
|
3994 |
|
|
spu_function_arg (CUMULATIVE_ARGS cum,
|
3995 |
|
|
enum machine_mode mode,
|
3996 |
|
|
tree type, int named ATTRIBUTE_UNUSED)
|
3997 |
|
|
{
|
3998 |
|
|
int byte_size;
|
3999 |
|
|
|
4000 |
|
|
if (cum >= MAX_REGISTER_ARGS)
|
4001 |
|
|
return 0;
|
4002 |
|
|
|
4003 |
|
|
byte_size = ((mode == BLKmode)
|
4004 |
|
|
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
|
4005 |
|
|
|
4006 |
|
|
/* The ABI does not allow parameters to be passed partially in
|
4007 |
|
|
reg and partially in stack. */
|
4008 |
|
|
if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
|
4009 |
|
|
return 0;
|
4010 |
|
|
|
4011 |
|
|
/* Make sure small structs are left justified in a register. */
|
4012 |
|
|
if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
|
4013 |
|
|
&& byte_size < UNITS_PER_WORD && byte_size > 0)
|
4014 |
|
|
{
|
4015 |
|
|
enum machine_mode smode;
|
4016 |
|
|
rtx gr_reg;
|
4017 |
|
|
if (byte_size < 4)
|
4018 |
|
|
byte_size = 4;
|
4019 |
|
|
smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
|
4020 |
|
|
gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
|
4021 |
|
|
gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
|
4022 |
|
|
const0_rtx);
|
4023 |
|
|
return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
|
4024 |
|
|
}
|
4025 |
|
|
else
|
4026 |
|
|
return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
|
4027 |
|
|
}
|
4028 |
|
|
|
4029 |
|
|
/* Variable sized types are passed by reference. */
|
4030 |
|
|
static bool
|
4031 |
|
|
spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
|
4032 |
|
|
enum machine_mode mode ATTRIBUTE_UNUSED,
|
4033 |
|
|
const_tree type, bool named ATTRIBUTE_UNUSED)
|
4034 |
|
|
{
|
4035 |
|
|
return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
|
4036 |
|
|
}
|
4037 |
|
|
|
4038 |
|
|
|
4039 |
|
|
/* Var args. */
|
4040 |
|
|
|
4041 |
|
|
/* Create and return the va_list datatype.
|
4042 |
|
|
|
4043 |
|
|
On SPU, va_list is an array type equivalent to
|
4044 |
|
|
|
4045 |
|
|
typedef struct __va_list_tag
|
4046 |
|
|
{
|
4047 |
|
|
void *__args __attribute__((__aligned(16)));
|
4048 |
|
|
void *__skip __attribute__((__aligned(16)));
|
4049 |
|
|
|
4050 |
|
|
} va_list[1];
|
4051 |
|
|
|
4052 |
|
|
where __args points to the arg that will be returned by the next
|
4053 |
|
|
va_arg(), and __skip points to the previous stack frame such that
|
4054 |
|
|
when __args == __skip we should advance __args by 32 bytes. */
|
4055 |
|
|
static tree
|
4056 |
|
|
spu_build_builtin_va_list (void)
|
4057 |
|
|
{
|
4058 |
|
|
tree f_args, f_skip, record, type_decl;
|
4059 |
|
|
bool owp;
|
4060 |
|
|
|
4061 |
|
|
record = (*lang_hooks.types.make_type) (RECORD_TYPE);
|
4062 |
|
|
|
4063 |
|
|
type_decl =
|
4064 |
|
|
build_decl (BUILTINS_LOCATION,
|
4065 |
|
|
TYPE_DECL, get_identifier ("__va_list_tag"), record);
|
4066 |
|
|
|
4067 |
|
|
f_args = build_decl (BUILTINS_LOCATION,
|
4068 |
|
|
FIELD_DECL, get_identifier ("__args"), ptr_type_node);
|
4069 |
|
|
f_skip = build_decl (BUILTINS_LOCATION,
|
4070 |
|
|
FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
|
4071 |
|
|
|
4072 |
|
|
DECL_FIELD_CONTEXT (f_args) = record;
|
4073 |
|
|
DECL_ALIGN (f_args) = 128;
|
4074 |
|
|
DECL_USER_ALIGN (f_args) = 1;
|
4075 |
|
|
|
4076 |
|
|
DECL_FIELD_CONTEXT (f_skip) = record;
|
4077 |
|
|
DECL_ALIGN (f_skip) = 128;
|
4078 |
|
|
DECL_USER_ALIGN (f_skip) = 1;
|
4079 |
|
|
|
4080 |
|
|
TREE_CHAIN (record) = type_decl;
|
4081 |
|
|
TYPE_NAME (record) = type_decl;
|
4082 |
|
|
TYPE_FIELDS (record) = f_args;
|
4083 |
|
|
TREE_CHAIN (f_args) = f_skip;
|
4084 |
|
|
|
4085 |
|
|
/* We know this is being padded and we want it too. It is an internal
|
4086 |
|
|
type so hide the warnings from the user. */
|
4087 |
|
|
owp = warn_padded;
|
4088 |
|
|
warn_padded = false;
|
4089 |
|
|
|
4090 |
|
|
layout_type (record);
|
4091 |
|
|
|
4092 |
|
|
warn_padded = owp;
|
4093 |
|
|
|
4094 |
|
|
/* The correct type is an array type of one element. */
|
4095 |
|
|
return build_array_type (record, build_index_type (size_zero_node));
|
4096 |
|
|
}
|
4097 |
|
|
|
4098 |
|
|
/* Implement va_start by filling the va_list structure VALIST.
|
4099 |
|
|
NEXTARG points to the first anonymous stack argument.
|
4100 |
|
|
|
4101 |
|
|
The following global variables are used to initialize
|
4102 |
|
|
the va_list structure:
|
4103 |
|
|
|
4104 |
|
|
crtl->args.info;
|
4105 |
|
|
the CUMULATIVE_ARGS for this function
|
4106 |
|
|
|
4107 |
|
|
crtl->args.arg_offset_rtx:
|
4108 |
|
|
holds the offset of the first anonymous stack argument
|
4109 |
|
|
(relative to the virtual arg pointer). */
|
4110 |
|
|
|
4111 |
|
|
static void
|
4112 |
|
|
spu_va_start (tree valist, rtx nextarg)
|
4113 |
|
|
{
|
4114 |
|
|
tree f_args, f_skip;
|
4115 |
|
|
tree args, skip, t;
|
4116 |
|
|
|
4117 |
|
|
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
|
4118 |
|
|
f_skip = TREE_CHAIN (f_args);
|
4119 |
|
|
|
4120 |
|
|
valist = build_va_arg_indirect_ref (valist);
|
4121 |
|
|
args =
|
4122 |
|
|
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
|
4123 |
|
|
skip =
|
4124 |
|
|
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
|
4125 |
|
|
|
4126 |
|
|
/* Find the __args area. */
|
4127 |
|
|
t = make_tree (TREE_TYPE (args), nextarg);
|
4128 |
|
|
if (crtl->args.pretend_args_size > 0)
|
4129 |
|
|
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
|
4130 |
|
|
size_int (-STACK_POINTER_OFFSET));
|
4131 |
|
|
t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
|
4132 |
|
|
TREE_SIDE_EFFECTS (t) = 1;
|
4133 |
|
|
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
|
4134 |
|
|
|
4135 |
|
|
/* Find the __skip area. */
|
4136 |
|
|
t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
|
4137 |
|
|
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
|
4138 |
|
|
size_int (crtl->args.pretend_args_size
|
4139 |
|
|
- STACK_POINTER_OFFSET));
|
4140 |
|
|
t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
|
4141 |
|
|
TREE_SIDE_EFFECTS (t) = 1;
|
4142 |
|
|
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
|
4143 |
|
|
}
|
4144 |
|
|
|
4145 |
|
|
/* Gimplify va_arg by updating the va_list structure
|
4146 |
|
|
VALIST as required to retrieve an argument of type
|
4147 |
|
|
TYPE, and returning that argument.
|
4148 |
|
|
|
4149 |
|
|
ret = va_arg(VALIST, TYPE);
|
4150 |
|
|
|
4151 |
|
|
generates code equivalent to:
|
4152 |
|
|
|
4153 |
|
|
paddedsize = (sizeof(TYPE) + 15) & -16;
|
4154 |
|
|
if (VALIST.__args + paddedsize > VALIST.__skip
|
4155 |
|
|
&& VALIST.__args <= VALIST.__skip)
|
4156 |
|
|
addr = VALIST.__skip + 32;
|
4157 |
|
|
else
|
4158 |
|
|
addr = VALIST.__args;
|
4159 |
|
|
VALIST.__args = addr + paddedsize;
|
4160 |
|
|
ret = *(TYPE *)addr;
|
4161 |
|
|
*/
|
4162 |
|
|
static tree
|
4163 |
|
|
spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
|
4164 |
|
|
gimple_seq * post_p ATTRIBUTE_UNUSED)
|
4165 |
|
|
{
|
4166 |
|
|
tree f_args, f_skip;
|
4167 |
|
|
tree args, skip;
|
4168 |
|
|
HOST_WIDE_INT size, rsize;
|
4169 |
|
|
tree paddedsize, addr, tmp;
|
4170 |
|
|
bool pass_by_reference_p;
|
4171 |
|
|
|
4172 |
|
|
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
|
4173 |
|
|
f_skip = TREE_CHAIN (f_args);
|
4174 |
|
|
|
4175 |
|
|
valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
|
4176 |
|
|
args =
|
4177 |
|
|
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
|
4178 |
|
|
skip =
|
4179 |
|
|
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
|
4180 |
|
|
|
4181 |
|
|
addr = create_tmp_var (ptr_type_node, "va_arg");
|
4182 |
|
|
|
4183 |
|
|
/* if an object is dynamically sized, a pointer to it is passed
|
4184 |
|
|
instead of the object itself. */
|
4185 |
|
|
pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
|
4186 |
|
|
false);
|
4187 |
|
|
if (pass_by_reference_p)
|
4188 |
|
|
type = build_pointer_type (type);
|
4189 |
|
|
size = int_size_in_bytes (type);
|
4190 |
|
|
rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
|
4191 |
|
|
|
4192 |
|
|
/* build conditional expression to calculate addr. The expression
|
4193 |
|
|
will be gimplified later. */
|
4194 |
|
|
paddedsize = size_int (rsize);
|
4195 |
|
|
tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
|
4196 |
|
|
tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
|
4197 |
|
|
build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
|
4198 |
|
|
build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
|
4199 |
|
|
unshare_expr (skip)));
|
4200 |
|
|
|
4201 |
|
|
tmp = build3 (COND_EXPR, ptr_type_node, tmp,
|
4202 |
|
|
build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
|
4203 |
|
|
size_int (32)), unshare_expr (args));
|
4204 |
|
|
|
4205 |
|
|
gimplify_assign (addr, tmp, pre_p);
|
4206 |
|
|
|
4207 |
|
|
/* update VALIST.__args */
|
4208 |
|
|
tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
|
4209 |
|
|
gimplify_assign (unshare_expr (args), tmp, pre_p);
|
4210 |
|
|
|
4211 |
|
|
addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
|
4212 |
|
|
addr);
|
4213 |
|
|
|
4214 |
|
|
if (pass_by_reference_p)
|
4215 |
|
|
addr = build_va_arg_indirect_ref (addr);
|
4216 |
|
|
|
4217 |
|
|
return build_va_arg_indirect_ref (addr);
|
4218 |
|
|
}
|
4219 |
|
|
|
4220 |
|
|
/* Save parameter registers starting with the register that corresponds
|
4221 |
|
|
to the first unnamed parameters. If the first unnamed parameter is
|
4222 |
|
|
in the stack then save no registers. Set pretend_args_size to the
|
4223 |
|
|
amount of space needed to save the registers. */
|
4224 |
|
|
void
|
4225 |
|
|
spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
|
4226 |
|
|
tree type, int *pretend_size, int no_rtl)
|
4227 |
|
|
{
|
4228 |
|
|
if (!no_rtl)
|
4229 |
|
|
{
|
4230 |
|
|
rtx tmp;
|
4231 |
|
|
int regno;
|
4232 |
|
|
int offset;
|
4233 |
|
|
int ncum = *cum;
|
4234 |
|
|
|
4235 |
|
|
/* cum currently points to the last named argument, we want to
|
4236 |
|
|
start at the next argument. */
|
4237 |
|
|
FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
|
4238 |
|
|
|
4239 |
|
|
offset = -STACK_POINTER_OFFSET;
|
4240 |
|
|
for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
|
4241 |
|
|
{
|
4242 |
|
|
tmp = gen_frame_mem (V4SImode,
|
4243 |
|
|
plus_constant (virtual_incoming_args_rtx,
|
4244 |
|
|
offset));
|
4245 |
|
|
emit_move_insn (tmp,
|
4246 |
|
|
gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
|
4247 |
|
|
offset += 16;
|
4248 |
|
|
}
|
4249 |
|
|
*pretend_size = offset + STACK_POINTER_OFFSET;
|
4250 |
|
|
}
|
4251 |
|
|
}
|
4252 |
|
|
|
4253 |
|
|
void
|
4254 |
|
|
spu_conditional_register_usage (void)
|
4255 |
|
|
{
|
4256 |
|
|
if (flag_pic)
|
4257 |
|
|
{
|
4258 |
|
|
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
|
4259 |
|
|
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
|
4260 |
|
|
}
|
4261 |
|
|
}
|
4262 |
|
|
|
4263 |
|
|
/* This is called any time we inspect the alignment of a register for
|
4264 |
|
|
addresses. */
|
4265 |
|
|
static int
|
4266 |
|
|
reg_aligned_for_addr (rtx x)
|
4267 |
|
|
{
|
4268 |
|
|
int regno =
|
4269 |
|
|
REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
|
4270 |
|
|
return REGNO_POINTER_ALIGN (regno) >= 128;
|
4271 |
|
|
}
|
4272 |
|
|
|
4273 |
|
|
/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
|
4274 |
|
|
into its SYMBOL_REF_FLAGS. */
|
4275 |
|
|
static void
|
4276 |
|
|
spu_encode_section_info (tree decl, rtx rtl, int first)
|
4277 |
|
|
{
|
4278 |
|
|
default_encode_section_info (decl, rtl, first);
|
4279 |
|
|
|
4280 |
|
|
/* If a variable has a forced alignment to < 16 bytes, mark it with
|
4281 |
|
|
SYMBOL_FLAG_ALIGN1. */
|
4282 |
|
|
if (TREE_CODE (decl) == VAR_DECL
|
4283 |
|
|
&& DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
|
4284 |
|
|
SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
|
4285 |
|
|
}
|
4286 |
|
|
|
4287 |
|
|
/* Return TRUE if we are certain the mem refers to a complete object
|
4288 |
|
|
which is both 16-byte aligned and padded to a 16-byte boundary. This
|
4289 |
|
|
would make it safe to store with a single instruction.
|
4290 |
|
|
We guarantee the alignment and padding for static objects by aligning
|
4291 |
|
|
all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
|
4292 |
|
|
FIXME: We currently cannot guarantee this for objects on the stack
|
4293 |
|
|
because assign_parm_setup_stack calls assign_stack_local with the
|
4294 |
|
|
alignment of the parameter mode and in that case the alignment never
|
4295 |
|
|
gets adjusted by LOCAL_ALIGNMENT. */
|
4296 |
|
|
static int
|
4297 |
|
|
store_with_one_insn_p (rtx mem)
|
4298 |
|
|
{
|
4299 |
|
|
enum machine_mode mode = GET_MODE (mem);
|
4300 |
|
|
rtx addr = XEXP (mem, 0);
|
4301 |
|
|
if (mode == BLKmode)
|
4302 |
|
|
return 0;
|
4303 |
|
|
if (GET_MODE_SIZE (mode) >= 16)
|
4304 |
|
|
return 1;
|
4305 |
|
|
/* Only static objects. */
|
4306 |
|
|
if (GET_CODE (addr) == SYMBOL_REF)
|
4307 |
|
|
{
|
4308 |
|
|
/* We use the associated declaration to make sure the access is
|
4309 |
|
|
referring to the whole object.
|
4310 |
|
|
We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
|
4311 |
|
|
if it is necessary. Will there be cases where one exists, and
|
4312 |
|
|
the other does not? Will there be cases where both exist, but
|
4313 |
|
|
have different types? */
|
4314 |
|
|
tree decl = MEM_EXPR (mem);
|
4315 |
|
|
if (decl
|
4316 |
|
|
&& TREE_CODE (decl) == VAR_DECL
|
4317 |
|
|
&& GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
|
4318 |
|
|
return 1;
|
4319 |
|
|
decl = SYMBOL_REF_DECL (addr);
|
4320 |
|
|
if (decl
|
4321 |
|
|
&& TREE_CODE (decl) == VAR_DECL
|
4322 |
|
|
&& GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
|
4323 |
|
|
return 1;
|
4324 |
|
|
}
|
4325 |
|
|
return 0;
|
4326 |
|
|
}
|
4327 |
|
|
|
4328 |
|
|
/* Return 1 when the address is not valid for a simple load and store as
|
4329 |
|
|
required by the '_mov*' patterns. We could make this less strict
|
4330 |
|
|
for loads, but we prefer mem's to look the same so they are more
|
4331 |
|
|
likely to be merged. */
|
4332 |
|
|
static int
|
4333 |
|
|
address_needs_split (rtx mem)
|
4334 |
|
|
{
|
4335 |
|
|
if (GET_MODE_SIZE (GET_MODE (mem)) < 16
|
4336 |
|
|
&& (GET_MODE_SIZE (GET_MODE (mem)) < 4
|
4337 |
|
|
|| !(store_with_one_insn_p (mem)
|
4338 |
|
|
|| mem_is_padded_component_ref (mem))))
|
4339 |
|
|
return 1;
|
4340 |
|
|
|
4341 |
|
|
return 0;
|
4342 |
|
|
}
|
4343 |
|
|
|
4344 |
|
|
static GTY(()) rtx cache_fetch; /* __cache_fetch function */
|
4345 |
|
|
static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
|
4346 |
|
|
static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
|
4347 |
|
|
|
4348 |
|
|
/* MEM is known to be an __ea qualified memory access. Emit a call to
|
4349 |
|
|
fetch the ppu memory to local store, and return its address in local
|
4350 |
|
|
store. */
|
4351 |
|
|
|
4352 |
|
|
static void
|
4353 |
|
|
ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
|
4354 |
|
|
{
|
4355 |
|
|
if (is_store)
|
4356 |
|
|
{
|
4357 |
|
|
rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
|
4358 |
|
|
if (!cache_fetch_dirty)
|
4359 |
|
|
cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
|
4360 |
|
|
emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
|
4361 |
|
|
2, ea_addr, EAmode, ndirty, SImode);
|
4362 |
|
|
}
|
4363 |
|
|
else
|
4364 |
|
|
{
|
4365 |
|
|
if (!cache_fetch)
|
4366 |
|
|
cache_fetch = init_one_libfunc ("__cache_fetch");
|
4367 |
|
|
emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
|
4368 |
|
|
1, ea_addr, EAmode);
|
4369 |
|
|
}
|
4370 |
|
|
}
|
4371 |
|
|
|
4372 |
|
|
/* Like ea_load_store, but do the cache tag comparison and, for stores,
|
4373 |
|
|
dirty bit marking, inline.
|
4374 |
|
|
|
4375 |
|
|
The cache control data structure is an array of
|
4376 |
|
|
|
4377 |
|
|
struct __cache_tag_array
|
4378 |
|
|
{
|
4379 |
|
|
unsigned int tag_lo[4];
|
4380 |
|
|
unsigned int tag_hi[4];
|
4381 |
|
|
void *data_pointer[4];
|
4382 |
|
|
int reserved[4];
|
4383 |
|
|
vector unsigned short dirty_bits[4];
|
4384 |
|
|
} */
|
4385 |
|
|
|
4386 |
|
|
static void
|
4387 |
|
|
ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
|
4388 |
|
|
{
|
4389 |
|
|
rtx ea_addr_si;
|
4390 |
|
|
HOST_WIDE_INT v;
|
4391 |
|
|
rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
|
4392 |
|
|
rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
|
4393 |
|
|
rtx index_mask = gen_reg_rtx (SImode);
|
4394 |
|
|
rtx tag_arr = gen_reg_rtx (Pmode);
|
4395 |
|
|
rtx splat_mask = gen_reg_rtx (TImode);
|
4396 |
|
|
rtx splat = gen_reg_rtx (V4SImode);
|
4397 |
|
|
rtx splat_hi = NULL_RTX;
|
4398 |
|
|
rtx tag_index = gen_reg_rtx (Pmode);
|
4399 |
|
|
rtx block_off = gen_reg_rtx (SImode);
|
4400 |
|
|
rtx tag_addr = gen_reg_rtx (Pmode);
|
4401 |
|
|
rtx tag = gen_reg_rtx (V4SImode);
|
4402 |
|
|
rtx cache_tag = gen_reg_rtx (V4SImode);
|
4403 |
|
|
rtx cache_tag_hi = NULL_RTX;
|
4404 |
|
|
rtx cache_ptrs = gen_reg_rtx (TImode);
|
4405 |
|
|
rtx cache_ptrs_si = gen_reg_rtx (SImode);
|
4406 |
|
|
rtx tag_equal = gen_reg_rtx (V4SImode);
|
4407 |
|
|
rtx tag_equal_hi = NULL_RTX;
|
4408 |
|
|
rtx tag_eq_pack = gen_reg_rtx (V4SImode);
|
4409 |
|
|
rtx tag_eq_pack_si = gen_reg_rtx (SImode);
|
4410 |
|
|
rtx eq_index = gen_reg_rtx (SImode);
|
4411 |
|
|
rtx bcomp, hit_label, hit_ref, cont_label, insn;
|
4412 |
|
|
|
4413 |
|
|
if (spu_ea_model != 32)
|
4414 |
|
|
{
|
4415 |
|
|
splat_hi = gen_reg_rtx (V4SImode);
|
4416 |
|
|
cache_tag_hi = gen_reg_rtx (V4SImode);
|
4417 |
|
|
tag_equal_hi = gen_reg_rtx (V4SImode);
|
4418 |
|
|
}
|
4419 |
|
|
|
4420 |
|
|
emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
|
4421 |
|
|
emit_move_insn (tag_arr, tag_arr_sym);
|
4422 |
|
|
v = 0x0001020300010203LL;
|
4423 |
|
|
emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
|
4424 |
|
|
ea_addr_si = ea_addr;
|
4425 |
|
|
if (spu_ea_model != 32)
|
4426 |
|
|
ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
|
4427 |
|
|
|
4428 |
|
|
/* tag_index = ea_addr & (tag_array_size - 128) */
|
4429 |
|
|
emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
|
4430 |
|
|
|
4431 |
|
|
/* splat ea_addr to all 4 slots. */
|
4432 |
|
|
emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
|
4433 |
|
|
/* Similarly for high 32 bits of ea_addr. */
|
4434 |
|
|
if (spu_ea_model != 32)
|
4435 |
|
|
emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
|
4436 |
|
|
|
4437 |
|
|
/* block_off = ea_addr & 127 */
|
4438 |
|
|
emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
|
4439 |
|
|
|
4440 |
|
|
/* tag_addr = tag_arr + tag_index */
|
4441 |
|
|
emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
|
4442 |
|
|
|
4443 |
|
|
/* Read cache tags. */
|
4444 |
|
|
emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
|
4445 |
|
|
if (spu_ea_model != 32)
|
4446 |
|
|
emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
|
4447 |
|
|
plus_constant (tag_addr, 16)));
|
4448 |
|
|
|
4449 |
|
|
/* tag = ea_addr & -128 */
|
4450 |
|
|
emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
|
4451 |
|
|
|
4452 |
|
|
/* Read all four cache data pointers. */
|
4453 |
|
|
emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
|
4454 |
|
|
plus_constant (tag_addr, 32)));
|
4455 |
|
|
|
4456 |
|
|
/* Compare tags. */
|
4457 |
|
|
emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
|
4458 |
|
|
if (spu_ea_model != 32)
|
4459 |
|
|
{
|
4460 |
|
|
emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
|
4461 |
|
|
emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
|
4462 |
|
|
}
|
4463 |
|
|
|
4464 |
|
|
/* At most one of the tags compare equal, so tag_equal has one
|
4465 |
|
|
32-bit slot set to all 1's, with the other slots all zero.
|
4466 |
|
|
gbb picks off low bit from each byte in the 128-bit registers,
|
4467 |
|
|
so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
|
4468 |
|
|
we have a hit. */
|
4469 |
|
|
emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
|
4470 |
|
|
emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
|
4471 |
|
|
|
4472 |
|
|
/* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
|
4473 |
|
|
emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
|
4474 |
|
|
|
4475 |
|
|
/* Allowing us to rotate the corresponding cache data pointer to slot0.
|
4476 |
|
|
(rotating eq_index mod 16 bytes). */
|
4477 |
|
|
emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
|
4478 |
|
|
emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
|
4479 |
|
|
|
4480 |
|
|
/* Add block offset to form final data address. */
|
4481 |
|
|
emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
|
4482 |
|
|
|
4483 |
|
|
/* Check that we did hit. */
|
4484 |
|
|
hit_label = gen_label_rtx ();
|
4485 |
|
|
hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
|
4486 |
|
|
bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
|
4487 |
|
|
insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
|
4488 |
|
|
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
|
4489 |
|
|
hit_ref, pc_rtx)));
|
4490 |
|
|
/* Say that this branch is very likely to happen. */
|
4491 |
|
|
v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
|
4492 |
|
|
REG_NOTES (insn)
|
4493 |
|
|
= gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
|
4494 |
|
|
|
4495 |
|
|
ea_load_store (mem, is_store, ea_addr, data_addr);
|
4496 |
|
|
cont_label = gen_label_rtx ();
|
4497 |
|
|
emit_jump_insn (gen_jump (cont_label));
|
4498 |
|
|
emit_barrier ();
|
4499 |
|
|
|
4500 |
|
|
emit_label (hit_label);
|
4501 |
|
|
|
4502 |
|
|
if (is_store)
|
4503 |
|
|
{
|
4504 |
|
|
HOST_WIDE_INT v_hi;
|
4505 |
|
|
rtx dirty_bits = gen_reg_rtx (TImode);
|
4506 |
|
|
rtx dirty_off = gen_reg_rtx (SImode);
|
4507 |
|
|
rtx dirty_128 = gen_reg_rtx (TImode);
|
4508 |
|
|
rtx neg_block_off = gen_reg_rtx (SImode);
|
4509 |
|
|
|
4510 |
|
|
/* Set up mask with one dirty bit per byte of the mem we are
|
4511 |
|
|
writing, starting from top bit. */
|
4512 |
|
|
v_hi = v = -1;
|
4513 |
|
|
v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
|
4514 |
|
|
if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
|
4515 |
|
|
{
|
4516 |
|
|
v_hi = v;
|
4517 |
|
|
v = 0;
|
4518 |
|
|
}
|
4519 |
|
|
emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
|
4520 |
|
|
|
4521 |
|
|
/* Form index into cache dirty_bits. eq_index is one of
|
4522 |
|
|
0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
|
4523 |
|
|
0x40, 0x50, 0x60 or 0x70 which just happens to be the
|
4524 |
|
|
offset to each of the four dirty_bits elements. */
|
4525 |
|
|
emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
|
4526 |
|
|
|
4527 |
|
|
emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
|
4528 |
|
|
|
4529 |
|
|
/* Rotate bit mask to proper bit. */
|
4530 |
|
|
emit_insn (gen_negsi2 (neg_block_off, block_off));
|
4531 |
|
|
emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
|
4532 |
|
|
emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
|
4533 |
|
|
|
4534 |
|
|
/* Or in the new dirty bits. */
|
4535 |
|
|
emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
|
4536 |
|
|
|
4537 |
|
|
/* Store. */
|
4538 |
|
|
emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
|
4539 |
|
|
}
|
4540 |
|
|
|
4541 |
|
|
emit_label (cont_label);
|
4542 |
|
|
}
|
4543 |
|
|
|
4544 |
|
|
static rtx
|
4545 |
|
|
expand_ea_mem (rtx mem, bool is_store)
|
4546 |
|
|
{
|
4547 |
|
|
rtx ea_addr;
|
4548 |
|
|
rtx data_addr = gen_reg_rtx (Pmode);
|
4549 |
|
|
rtx new_mem;
|
4550 |
|
|
|
4551 |
|
|
ea_addr = force_reg (EAmode, XEXP (mem, 0));
|
4552 |
|
|
if (optimize_size || optimize == 0)
|
4553 |
|
|
ea_load_store (mem, is_store, ea_addr, data_addr);
|
4554 |
|
|
else
|
4555 |
|
|
ea_load_store_inline (mem, is_store, ea_addr, data_addr);
|
4556 |
|
|
|
4557 |
|
|
if (ea_alias_set == -1)
|
4558 |
|
|
ea_alias_set = new_alias_set ();
|
4559 |
|
|
|
4560 |
|
|
/* We generate a new MEM RTX to refer to the copy of the data
|
4561 |
|
|
in the cache. We do not copy memory attributes (except the
|
4562 |
|
|
alignment) from the original MEM, as they may no longer apply
|
4563 |
|
|
to the cache copy. */
|
4564 |
|
|
new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
|
4565 |
|
|
set_mem_alias_set (new_mem, ea_alias_set);
|
4566 |
|
|
set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
|
4567 |
|
|
|
4568 |
|
|
return new_mem;
|
4569 |
|
|
}
|
4570 |
|
|
|
4571 |
|
|
int
|
4572 |
|
|
spu_expand_mov (rtx * ops, enum machine_mode mode)
|
4573 |
|
|
{
|
4574 |
|
|
if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
|
4575 |
|
|
abort ();
|
4576 |
|
|
|
4577 |
|
|
if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
|
4578 |
|
|
{
|
4579 |
|
|
rtx from = SUBREG_REG (ops[1]);
|
4580 |
|
|
enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
|
4581 |
|
|
|
4582 |
|
|
gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
|
4583 |
|
|
&& GET_MODE_CLASS (imode) == MODE_INT
|
4584 |
|
|
&& subreg_lowpart_p (ops[1]));
|
4585 |
|
|
|
4586 |
|
|
if (GET_MODE_SIZE (imode) < 4)
|
4587 |
|
|
imode = SImode;
|
4588 |
|
|
if (imode != GET_MODE (from))
|
4589 |
|
|
from = gen_rtx_SUBREG (imode, from, 0);
|
4590 |
|
|
|
4591 |
|
|
if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
|
4592 |
|
|
{
|
4593 |
|
|
enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
|
4594 |
|
|
emit_insn (GEN_FCN (icode) (ops[0], from));
|
4595 |
|
|
}
|
4596 |
|
|
else
|
4597 |
|
|
emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
|
4598 |
|
|
return 1;
|
4599 |
|
|
}
|
4600 |
|
|
|
4601 |
|
|
/* At least one of the operands needs to be a register. */
|
4602 |
|
|
if ((reload_in_progress | reload_completed) == 0
|
4603 |
|
|
&& !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
|
4604 |
|
|
{
|
4605 |
|
|
rtx temp = force_reg (mode, ops[1]);
|
4606 |
|
|
emit_move_insn (ops[0], temp);
|
4607 |
|
|
return 1;
|
4608 |
|
|
}
|
4609 |
|
|
if (reload_in_progress || reload_completed)
|
4610 |
|
|
{
|
4611 |
|
|
if (CONSTANT_P (ops[1]))
|
4612 |
|
|
return spu_split_immediate (ops);
|
4613 |
|
|
return 0;
|
4614 |
|
|
}
|
4615 |
|
|
|
4616 |
|
|
/* Catch the SImode immediates greater than 0x7fffffff, and sign
|
4617 |
|
|
extend them. */
|
4618 |
|
|
if (GET_CODE (ops[1]) == CONST_INT)
|
4619 |
|
|
{
|
4620 |
|
|
HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
|
4621 |
|
|
if (val != INTVAL (ops[1]))
|
4622 |
|
|
{
|
4623 |
|
|
emit_move_insn (ops[0], GEN_INT (val));
|
4624 |
|
|
return 1;
|
4625 |
|
|
}
|
4626 |
|
|
}
|
4627 |
|
|
if (MEM_P (ops[0]))
|
4628 |
|
|
{
|
4629 |
|
|
if (MEM_ADDR_SPACE (ops[0]))
|
4630 |
|
|
ops[0] = expand_ea_mem (ops[0], true);
|
4631 |
|
|
return spu_split_store (ops);
|
4632 |
|
|
}
|
4633 |
|
|
if (MEM_P (ops[1]))
|
4634 |
|
|
{
|
4635 |
|
|
if (MEM_ADDR_SPACE (ops[1]))
|
4636 |
|
|
ops[1] = expand_ea_mem (ops[1], false);
|
4637 |
|
|
return spu_split_load (ops);
|
4638 |
|
|
}
|
4639 |
|
|
|
4640 |
|
|
return 0;
|
4641 |
|
|
}
|
4642 |
|
|
|
4643 |
|
|
static void
|
4644 |
|
|
spu_convert_move (rtx dst, rtx src)
|
4645 |
|
|
{
|
4646 |
|
|
enum machine_mode mode = GET_MODE (dst);
|
4647 |
|
|
enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
|
4648 |
|
|
rtx reg;
|
4649 |
|
|
gcc_assert (GET_MODE (src) == TImode);
|
4650 |
|
|
reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
|
4651 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, reg,
|
4652 |
|
|
gen_rtx_TRUNCATE (int_mode,
|
4653 |
|
|
gen_rtx_LSHIFTRT (TImode, src,
|
4654 |
|
|
GEN_INT (int_mode == DImode ? 64 : 96)))));
|
4655 |
|
|
if (int_mode != mode)
|
4656 |
|
|
{
|
4657 |
|
|
reg = simplify_gen_subreg (mode, reg, int_mode, 0);
|
4658 |
|
|
emit_move_insn (dst, reg);
|
4659 |
|
|
}
|
4660 |
|
|
}
|
4661 |
|
|
|
4662 |
|
|
/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
|
4663 |
|
|
the address from SRC and SRC+16. Return a REG or CONST_INT that
|
4664 |
|
|
specifies how many bytes to rotate the loaded registers, plus any
|
4665 |
|
|
extra from EXTRA_ROTQBY. The address and rotate amounts are
|
4666 |
|
|
normalized to improve merging of loads and rotate computations. */
|
4667 |
|
|
static rtx
|
4668 |
|
|
spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
|
4669 |
|
|
{
|
4670 |
|
|
rtx addr = XEXP (src, 0);
|
4671 |
|
|
rtx p0, p1, rot, addr0, addr1;
|
4672 |
|
|
int rot_amt;
|
4673 |
|
|
|
4674 |
|
|
rot = 0;
|
4675 |
|
|
rot_amt = 0;
|
4676 |
|
|
|
4677 |
|
|
if (MEM_ALIGN (src) >= 128)
|
4678 |
|
|
/* Address is already aligned; simply perform a TImode load. */ ;
|
4679 |
|
|
else if (GET_CODE (addr) == PLUS)
|
4680 |
|
|
{
|
4681 |
|
|
/* 8 cases:
|
4682 |
|
|
aligned reg + aligned reg => lqx
|
4683 |
|
|
aligned reg + unaligned reg => lqx, rotqby
|
4684 |
|
|
aligned reg + aligned const => lqd
|
4685 |
|
|
aligned reg + unaligned const => lqd, rotqbyi
|
4686 |
|
|
unaligned reg + aligned reg => lqx, rotqby
|
4687 |
|
|
unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
|
4688 |
|
|
unaligned reg + aligned const => lqd, rotqby
|
4689 |
|
|
unaligned reg + unaligned const -> not allowed by legitimate address
|
4690 |
|
|
*/
|
4691 |
|
|
p0 = XEXP (addr, 0);
|
4692 |
|
|
p1 = XEXP (addr, 1);
|
4693 |
|
|
if (!reg_aligned_for_addr (p0))
|
4694 |
|
|
{
|
4695 |
|
|
if (REG_P (p1) && !reg_aligned_for_addr (p1))
|
4696 |
|
|
{
|
4697 |
|
|
rot = gen_reg_rtx (SImode);
|
4698 |
|
|
emit_insn (gen_addsi3 (rot, p0, p1));
|
4699 |
|
|
}
|
4700 |
|
|
else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
|
4701 |
|
|
{
|
4702 |
|
|
if (INTVAL (p1) > 0
|
4703 |
|
|
&& REG_POINTER (p0)
|
4704 |
|
|
&& INTVAL (p1) * BITS_PER_UNIT
|
4705 |
|
|
< REGNO_POINTER_ALIGN (REGNO (p0)))
|
4706 |
|
|
{
|
4707 |
|
|
rot = gen_reg_rtx (SImode);
|
4708 |
|
|
emit_insn (gen_addsi3 (rot, p0, p1));
|
4709 |
|
|
addr = p0;
|
4710 |
|
|
}
|
4711 |
|
|
else
|
4712 |
|
|
{
|
4713 |
|
|
rtx x = gen_reg_rtx (SImode);
|
4714 |
|
|
emit_move_insn (x, p1);
|
4715 |
|
|
if (!spu_arith_operand (p1, SImode))
|
4716 |
|
|
p1 = x;
|
4717 |
|
|
rot = gen_reg_rtx (SImode);
|
4718 |
|
|
emit_insn (gen_addsi3 (rot, p0, p1));
|
4719 |
|
|
addr = gen_rtx_PLUS (Pmode, p0, x);
|
4720 |
|
|
}
|
4721 |
|
|
}
|
4722 |
|
|
else
|
4723 |
|
|
rot = p0;
|
4724 |
|
|
}
|
4725 |
|
|
else
|
4726 |
|
|
{
|
4727 |
|
|
if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
|
4728 |
|
|
{
|
4729 |
|
|
rot_amt = INTVAL (p1) & 15;
|
4730 |
|
|
if (INTVAL (p1) & -16)
|
4731 |
|
|
{
|
4732 |
|
|
p1 = GEN_INT (INTVAL (p1) & -16);
|
4733 |
|
|
addr = gen_rtx_PLUS (SImode, p0, p1);
|
4734 |
|
|
}
|
4735 |
|
|
else
|
4736 |
|
|
addr = p0;
|
4737 |
|
|
}
|
4738 |
|
|
else if (REG_P (p1) && !reg_aligned_for_addr (p1))
|
4739 |
|
|
rot = p1;
|
4740 |
|
|
}
|
4741 |
|
|
}
|
4742 |
|
|
else if (REG_P (addr))
|
4743 |
|
|
{
|
4744 |
|
|
if (!reg_aligned_for_addr (addr))
|
4745 |
|
|
rot = addr;
|
4746 |
|
|
}
|
4747 |
|
|
else if (GET_CODE (addr) == CONST)
|
4748 |
|
|
{
|
4749 |
|
|
if (GET_CODE (XEXP (addr, 0)) == PLUS
|
4750 |
|
|
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
|
4751 |
|
|
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
|
4752 |
|
|
{
|
4753 |
|
|
rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
|
4754 |
|
|
if (rot_amt & -16)
|
4755 |
|
|
addr = gen_rtx_CONST (Pmode,
|
4756 |
|
|
gen_rtx_PLUS (Pmode,
|
4757 |
|
|
XEXP (XEXP (addr, 0), 0),
|
4758 |
|
|
GEN_INT (rot_amt & -16)));
|
4759 |
|
|
else
|
4760 |
|
|
addr = XEXP (XEXP (addr, 0), 0);
|
4761 |
|
|
}
|
4762 |
|
|
else
|
4763 |
|
|
{
|
4764 |
|
|
rot = gen_reg_rtx (Pmode);
|
4765 |
|
|
emit_move_insn (rot, addr);
|
4766 |
|
|
}
|
4767 |
|
|
}
|
4768 |
|
|
else if (GET_CODE (addr) == CONST_INT)
|
4769 |
|
|
{
|
4770 |
|
|
rot_amt = INTVAL (addr);
|
4771 |
|
|
addr = GEN_INT (rot_amt & -16);
|
4772 |
|
|
}
|
4773 |
|
|
else if (!ALIGNED_SYMBOL_REF_P (addr))
|
4774 |
|
|
{
|
4775 |
|
|
rot = gen_reg_rtx (Pmode);
|
4776 |
|
|
emit_move_insn (rot, addr);
|
4777 |
|
|
}
|
4778 |
|
|
|
4779 |
|
|
rot_amt += extra_rotby;
|
4780 |
|
|
|
4781 |
|
|
rot_amt &= 15;
|
4782 |
|
|
|
4783 |
|
|
if (rot && rot_amt)
|
4784 |
|
|
{
|
4785 |
|
|
rtx x = gen_reg_rtx (SImode);
|
4786 |
|
|
emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
|
4787 |
|
|
rot = x;
|
4788 |
|
|
rot_amt = 0;
|
4789 |
|
|
}
|
4790 |
|
|
if (!rot && rot_amt)
|
4791 |
|
|
rot = GEN_INT (rot_amt);
|
4792 |
|
|
|
4793 |
|
|
addr0 = copy_rtx (addr);
|
4794 |
|
|
addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
|
4795 |
|
|
emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
|
4796 |
|
|
|
4797 |
|
|
if (dst1)
|
4798 |
|
|
{
|
4799 |
|
|
addr1 = plus_constant (copy_rtx (addr), 16);
|
4800 |
|
|
addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
|
4801 |
|
|
emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
|
4802 |
|
|
}
|
4803 |
|
|
|
4804 |
|
|
return rot;
|
4805 |
|
|
}
|
4806 |
|
|
|
4807 |
|
|
int
|
4808 |
|
|
spu_split_load (rtx * ops)
|
4809 |
|
|
{
|
4810 |
|
|
enum machine_mode mode = GET_MODE (ops[0]);
|
4811 |
|
|
rtx addr, load, rot;
|
4812 |
|
|
int rot_amt;
|
4813 |
|
|
|
4814 |
|
|
if (GET_MODE_SIZE (mode) >= 16)
|
4815 |
|
|
return 0;
|
4816 |
|
|
|
4817 |
|
|
addr = XEXP (ops[1], 0);
|
4818 |
|
|
gcc_assert (GET_CODE (addr) != AND);
|
4819 |
|
|
|
4820 |
|
|
if (!address_needs_split (ops[1]))
|
4821 |
|
|
{
|
4822 |
|
|
ops[1] = change_address (ops[1], TImode, addr);
|
4823 |
|
|
load = gen_reg_rtx (TImode);
|
4824 |
|
|
emit_insn (gen__movti (load, ops[1]));
|
4825 |
|
|
spu_convert_move (ops[0], load);
|
4826 |
|
|
return 1;
|
4827 |
|
|
}
|
4828 |
|
|
|
4829 |
|
|
rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
|
4830 |
|
|
|
4831 |
|
|
load = gen_reg_rtx (TImode);
|
4832 |
|
|
rot = spu_expand_load (load, 0, ops[1], rot_amt);
|
4833 |
|
|
|
4834 |
|
|
if (rot)
|
4835 |
|
|
emit_insn (gen_rotqby_ti (load, load, rot));
|
4836 |
|
|
|
4837 |
|
|
spu_convert_move (ops[0], load);
|
4838 |
|
|
return 1;
|
4839 |
|
|
}
|
4840 |
|
|
|
4841 |
|
|
int
|
4842 |
|
|
spu_split_store (rtx * ops)
|
4843 |
|
|
{
|
4844 |
|
|
enum machine_mode mode = GET_MODE (ops[0]);
|
4845 |
|
|
rtx reg;
|
4846 |
|
|
rtx addr, p0, p1, p1_lo, smem;
|
4847 |
|
|
int aform;
|
4848 |
|
|
int scalar;
|
4849 |
|
|
|
4850 |
|
|
if (GET_MODE_SIZE (mode) >= 16)
|
4851 |
|
|
return 0;
|
4852 |
|
|
|
4853 |
|
|
addr = XEXP (ops[0], 0);
|
4854 |
|
|
gcc_assert (GET_CODE (addr) != AND);
|
4855 |
|
|
|
4856 |
|
|
if (!address_needs_split (ops[0]))
|
4857 |
|
|
{
|
4858 |
|
|
reg = gen_reg_rtx (TImode);
|
4859 |
|
|
emit_insn (gen_spu_convert (reg, ops[1]));
|
4860 |
|
|
ops[0] = change_address (ops[0], TImode, addr);
|
4861 |
|
|
emit_move_insn (ops[0], reg);
|
4862 |
|
|
return 1;
|
4863 |
|
|
}
|
4864 |
|
|
|
4865 |
|
|
if (GET_CODE (addr) == PLUS)
|
4866 |
|
|
{
|
4867 |
|
|
/* 8 cases:
|
4868 |
|
|
aligned reg + aligned reg => lqx, c?x, shuf, stqx
|
4869 |
|
|
aligned reg + unaligned reg => lqx, c?x, shuf, stqx
|
4870 |
|
|
aligned reg + aligned const => lqd, c?d, shuf, stqx
|
4871 |
|
|
aligned reg + unaligned const => lqd, c?d, shuf, stqx
|
4872 |
|
|
unaligned reg + aligned reg => lqx, c?x, shuf, stqx
|
4873 |
|
|
unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
|
4874 |
|
|
unaligned reg + aligned const => lqd, c?d, shuf, stqx
|
4875 |
|
|
unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
|
4876 |
|
|
*/
|
4877 |
|
|
aform = 0;
|
4878 |
|
|
p0 = XEXP (addr, 0);
|
4879 |
|
|
p1 = p1_lo = XEXP (addr, 1);
|
4880 |
|
|
if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
|
4881 |
|
|
{
|
4882 |
|
|
p1_lo = GEN_INT (INTVAL (p1) & 15);
|
4883 |
|
|
if (reg_aligned_for_addr (p0))
|
4884 |
|
|
{
|
4885 |
|
|
p1 = GEN_INT (INTVAL (p1) & -16);
|
4886 |
|
|
if (p1 == const0_rtx)
|
4887 |
|
|
addr = p0;
|
4888 |
|
|
else
|
4889 |
|
|
addr = gen_rtx_PLUS (SImode, p0, p1);
|
4890 |
|
|
}
|
4891 |
|
|
else
|
4892 |
|
|
{
|
4893 |
|
|
rtx x = gen_reg_rtx (SImode);
|
4894 |
|
|
emit_move_insn (x, p1);
|
4895 |
|
|
addr = gen_rtx_PLUS (SImode, p0, x);
|
4896 |
|
|
}
|
4897 |
|
|
}
|
4898 |
|
|
}
|
4899 |
|
|
else if (REG_P (addr))
|
4900 |
|
|
{
|
4901 |
|
|
aform = 0;
|
4902 |
|
|
p0 = addr;
|
4903 |
|
|
p1 = p1_lo = const0_rtx;
|
4904 |
|
|
}
|
4905 |
|
|
else
|
4906 |
|
|
{
|
4907 |
|
|
aform = 1;
|
4908 |
|
|
p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
|
4909 |
|
|
p1 = 0; /* aform doesn't use p1 */
|
4910 |
|
|
p1_lo = addr;
|
4911 |
|
|
if (ALIGNED_SYMBOL_REF_P (addr))
|
4912 |
|
|
p1_lo = const0_rtx;
|
4913 |
|
|
else if (GET_CODE (addr) == CONST
|
4914 |
|
|
&& GET_CODE (XEXP (addr, 0)) == PLUS
|
4915 |
|
|
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
|
4916 |
|
|
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
|
4917 |
|
|
{
|
4918 |
|
|
HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
|
4919 |
|
|
if ((v & -16) != 0)
|
4920 |
|
|
addr = gen_rtx_CONST (Pmode,
|
4921 |
|
|
gen_rtx_PLUS (Pmode,
|
4922 |
|
|
XEXP (XEXP (addr, 0), 0),
|
4923 |
|
|
GEN_INT (v & -16)));
|
4924 |
|
|
else
|
4925 |
|
|
addr = XEXP (XEXP (addr, 0), 0);
|
4926 |
|
|
p1_lo = GEN_INT (v & 15);
|
4927 |
|
|
}
|
4928 |
|
|
else if (GET_CODE (addr) == CONST_INT)
|
4929 |
|
|
{
|
4930 |
|
|
p1_lo = GEN_INT (INTVAL (addr) & 15);
|
4931 |
|
|
addr = GEN_INT (INTVAL (addr) & -16);
|
4932 |
|
|
}
|
4933 |
|
|
else
|
4934 |
|
|
{
|
4935 |
|
|
p1_lo = gen_reg_rtx (SImode);
|
4936 |
|
|
emit_move_insn (p1_lo, addr);
|
4937 |
|
|
}
|
4938 |
|
|
}
|
4939 |
|
|
|
4940 |
|
|
reg = gen_reg_rtx (TImode);
|
4941 |
|
|
|
4942 |
|
|
scalar = store_with_one_insn_p (ops[0]);
|
4943 |
|
|
if (!scalar)
|
4944 |
|
|
{
|
4945 |
|
|
/* We could copy the flags from the ops[0] MEM to mem here,
|
4946 |
|
|
We don't because we want this load to be optimized away if
|
4947 |
|
|
possible, and copying the flags will prevent that in certain
|
4948 |
|
|
cases, e.g. consider the volatile flag. */
|
4949 |
|
|
|
4950 |
|
|
rtx pat = gen_reg_rtx (TImode);
|
4951 |
|
|
rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
|
4952 |
|
|
set_mem_alias_set (lmem, 0);
|
4953 |
|
|
emit_insn (gen_movti (reg, lmem));
|
4954 |
|
|
|
4955 |
|
|
if (!p0 || reg_aligned_for_addr (p0))
|
4956 |
|
|
p0 = stack_pointer_rtx;
|
4957 |
|
|
if (!p1_lo)
|
4958 |
|
|
p1_lo = const0_rtx;
|
4959 |
|
|
|
4960 |
|
|
emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
|
4961 |
|
|
emit_insn (gen_shufb (reg, ops[1], reg, pat));
|
4962 |
|
|
}
|
4963 |
|
|
else
|
4964 |
|
|
{
|
4965 |
|
|
if (GET_CODE (ops[1]) == REG)
|
4966 |
|
|
emit_insn (gen_spu_convert (reg, ops[1]));
|
4967 |
|
|
else if (GET_CODE (ops[1]) == SUBREG)
|
4968 |
|
|
emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
|
4969 |
|
|
else
|
4970 |
|
|
abort ();
|
4971 |
|
|
}
|
4972 |
|
|
|
4973 |
|
|
if (GET_MODE_SIZE (mode) < 4 && scalar)
|
4974 |
|
|
emit_insn (gen_ashlti3
|
4975 |
|
|
(reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
|
4976 |
|
|
|
4977 |
|
|
smem = change_address (ops[0], TImode, copy_rtx (addr));
|
4978 |
|
|
/* We can't use the previous alias set because the memory has changed
|
4979 |
|
|
size and can potentially overlap objects of other types. */
|
4980 |
|
|
set_mem_alias_set (smem, 0);
|
4981 |
|
|
|
4982 |
|
|
emit_insn (gen_movti (smem, reg));
|
4983 |
|
|
return 1;
|
4984 |
|
|
}
|
4985 |
|
|
|
4986 |
|
|
/* Return TRUE if X is MEM which is a struct member reference
|
4987 |
|
|
and the member can safely be loaded and stored with a single
|
4988 |
|
|
instruction because it is padded. */
|
4989 |
|
|
static int
|
4990 |
|
|
mem_is_padded_component_ref (rtx x)
|
4991 |
|
|
{
|
4992 |
|
|
tree t = MEM_EXPR (x);
|
4993 |
|
|
tree r;
|
4994 |
|
|
if (!t || TREE_CODE (t) != COMPONENT_REF)
|
4995 |
|
|
return 0;
|
4996 |
|
|
t = TREE_OPERAND (t, 1);
|
4997 |
|
|
if (!t || TREE_CODE (t) != FIELD_DECL
|
4998 |
|
|
|| DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
|
4999 |
|
|
return 0;
|
5000 |
|
|
/* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
|
5001 |
|
|
r = DECL_FIELD_CONTEXT (t);
|
5002 |
|
|
if (!r || TREE_CODE (r) != RECORD_TYPE)
|
5003 |
|
|
return 0;
|
5004 |
|
|
/* Make sure they are the same mode */
|
5005 |
|
|
if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
|
5006 |
|
|
return 0;
|
5007 |
|
|
/* If there are no following fields then the field alignment assures
|
5008 |
|
|
the structure is padded to the alignment which means this field is
|
5009 |
|
|
padded too. */
|
5010 |
|
|
if (TREE_CHAIN (t) == 0)
|
5011 |
|
|
return 1;
|
5012 |
|
|
/* If the following field is also aligned then this field will be
|
5013 |
|
|
padded. */
|
5014 |
|
|
t = TREE_CHAIN (t);
|
5015 |
|
|
if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
|
5016 |
|
|
return 1;
|
5017 |
|
|
return 0;
|
5018 |
|
|
}
|
5019 |
|
|
|
5020 |
|
|
/* Parse the -mfixed-range= option string. */
|
5021 |
|
|
static void
|
5022 |
|
|
fix_range (const char *const_str)
|
5023 |
|
|
{
|
5024 |
|
|
int i, first, last;
|
5025 |
|
|
char *str, *dash, *comma;
|
5026 |
|
|
|
5027 |
|
|
/* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
|
5028 |
|
|
REG2 are either register names or register numbers. The effect
|
5029 |
|
|
of this option is to mark the registers in the range from REG1 to
|
5030 |
|
|
REG2 as ``fixed'' so they won't be used by the compiler. */
|
5031 |
|
|
|
5032 |
|
|
i = strlen (const_str);
|
5033 |
|
|
str = (char *) alloca (i + 1);
|
5034 |
|
|
memcpy (str, const_str, i + 1);
|
5035 |
|
|
|
5036 |
|
|
while (1)
|
5037 |
|
|
{
|
5038 |
|
|
dash = strchr (str, '-');
|
5039 |
|
|
if (!dash)
|
5040 |
|
|
{
|
5041 |
|
|
warning (0, "value of -mfixed-range must have form REG1-REG2");
|
5042 |
|
|
return;
|
5043 |
|
|
}
|
5044 |
|
|
*dash = '\0';
|
5045 |
|
|
comma = strchr (dash + 1, ',');
|
5046 |
|
|
if (comma)
|
5047 |
|
|
*comma = '\0';
|
5048 |
|
|
|
5049 |
|
|
first = decode_reg_name (str);
|
5050 |
|
|
if (first < 0)
|
5051 |
|
|
{
|
5052 |
|
|
warning (0, "unknown register name: %s", str);
|
5053 |
|
|
return;
|
5054 |
|
|
}
|
5055 |
|
|
|
5056 |
|
|
last = decode_reg_name (dash + 1);
|
5057 |
|
|
if (last < 0)
|
5058 |
|
|
{
|
5059 |
|
|
warning (0, "unknown register name: %s", dash + 1);
|
5060 |
|
|
return;
|
5061 |
|
|
}
|
5062 |
|
|
|
5063 |
|
|
*dash = '-';
|
5064 |
|
|
|
5065 |
|
|
if (first > last)
|
5066 |
|
|
{
|
5067 |
|
|
warning (0, "%s-%s is an empty range", str, dash + 1);
|
5068 |
|
|
return;
|
5069 |
|
|
}
|
5070 |
|
|
|
5071 |
|
|
for (i = first; i <= last; ++i)
|
5072 |
|
|
fixed_regs[i] = call_used_regs[i] = 1;
|
5073 |
|
|
|
5074 |
|
|
if (!comma)
|
5075 |
|
|
break;
|
5076 |
|
|
|
5077 |
|
|
*comma = ',';
|
5078 |
|
|
str = comma + 1;
|
5079 |
|
|
}
|
5080 |
|
|
}
|
5081 |
|
|
|
5082 |
|
|
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
|
5083 |
|
|
can be generated using the fsmbi instruction. */
|
5084 |
|
|
int
|
5085 |
|
|
fsmbi_const_p (rtx x)
|
5086 |
|
|
{
|
5087 |
|
|
if (CONSTANT_P (x))
|
5088 |
|
|
{
|
5089 |
|
|
/* We can always choose TImode for CONST_INT because the high bits
|
5090 |
|
|
of an SImode will always be all 1s, i.e., valid for fsmbi. */
|
5091 |
|
|
enum immediate_class c = classify_immediate (x, TImode);
|
5092 |
|
|
return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
|
5093 |
|
|
}
|
5094 |
|
|
return 0;
|
5095 |
|
|
}
|
5096 |
|
|
|
5097 |
|
|
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
|
5098 |
|
|
can be generated using the cbd, chd, cwd or cdd instruction. */
|
5099 |
|
|
int
|
5100 |
|
|
cpat_const_p (rtx x, enum machine_mode mode)
|
5101 |
|
|
{
|
5102 |
|
|
if (CONSTANT_P (x))
|
5103 |
|
|
{
|
5104 |
|
|
enum immediate_class c = classify_immediate (x, mode);
|
5105 |
|
|
return c == IC_CPAT;
|
5106 |
|
|
}
|
5107 |
|
|
return 0;
|
5108 |
|
|
}
|
5109 |
|
|
|
5110 |
|
|
rtx
|
5111 |
|
|
gen_cpat_const (rtx * ops)
|
5112 |
|
|
{
|
5113 |
|
|
unsigned char dst[16];
|
5114 |
|
|
int i, offset, shift, isize;
|
5115 |
|
|
if (GET_CODE (ops[3]) != CONST_INT
|
5116 |
|
|
|| GET_CODE (ops[2]) != CONST_INT
|
5117 |
|
|
|| (GET_CODE (ops[1]) != CONST_INT
|
5118 |
|
|
&& GET_CODE (ops[1]) != REG))
|
5119 |
|
|
return 0;
|
5120 |
|
|
if (GET_CODE (ops[1]) == REG
|
5121 |
|
|
&& (!REG_POINTER (ops[1])
|
5122 |
|
|
|| REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
|
5123 |
|
|
return 0;
|
5124 |
|
|
|
5125 |
|
|
for (i = 0; i < 16; i++)
|
5126 |
|
|
dst[i] = i + 16;
|
5127 |
|
|
isize = INTVAL (ops[3]);
|
5128 |
|
|
if (isize == 1)
|
5129 |
|
|
shift = 3;
|
5130 |
|
|
else if (isize == 2)
|
5131 |
|
|
shift = 2;
|
5132 |
|
|
else
|
5133 |
|
|
shift = 0;
|
5134 |
|
|
offset = (INTVAL (ops[2]) +
|
5135 |
|
|
(GET_CODE (ops[1]) ==
|
5136 |
|
|
CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
|
5137 |
|
|
for (i = 0; i < isize; i++)
|
5138 |
|
|
dst[offset + i] = i + shift;
|
5139 |
|
|
return array_to_constant (TImode, dst);
|
5140 |
|
|
}
|
5141 |
|
|
|
5142 |
|
|
/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
|
5143 |
|
|
array. Use MODE for CONST_INT's. When the constant's mode is smaller
|
5144 |
|
|
than 16 bytes, the value is repeated across the rest of the array. */
|
5145 |
|
|
void
|
5146 |
|
|
constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
|
5147 |
|
|
{
|
5148 |
|
|
HOST_WIDE_INT val;
|
5149 |
|
|
int i, j, first;
|
5150 |
|
|
|
5151 |
|
|
memset (arr, 0, 16);
|
5152 |
|
|
mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
|
5153 |
|
|
if (GET_CODE (x) == CONST_INT
|
5154 |
|
|
|| (GET_CODE (x) == CONST_DOUBLE
|
5155 |
|
|
&& (mode == SFmode || mode == DFmode)))
|
5156 |
|
|
{
|
5157 |
|
|
gcc_assert (mode != VOIDmode && mode != BLKmode);
|
5158 |
|
|
|
5159 |
|
|
if (GET_CODE (x) == CONST_DOUBLE)
|
5160 |
|
|
val = const_double_to_hwint (x);
|
5161 |
|
|
else
|
5162 |
|
|
val = INTVAL (x);
|
5163 |
|
|
first = GET_MODE_SIZE (mode) - 1;
|
5164 |
|
|
for (i = first; i >= 0; i--)
|
5165 |
|
|
{
|
5166 |
|
|
arr[i] = val & 0xff;
|
5167 |
|
|
val >>= 8;
|
5168 |
|
|
}
|
5169 |
|
|
/* Splat the constant across the whole array. */
|
5170 |
|
|
for (j = 0, i = first + 1; i < 16; i++)
|
5171 |
|
|
{
|
5172 |
|
|
arr[i] = arr[j];
|
5173 |
|
|
j = (j == first) ? 0 : j + 1;
|
5174 |
|
|
}
|
5175 |
|
|
}
|
5176 |
|
|
else if (GET_CODE (x) == CONST_DOUBLE)
|
5177 |
|
|
{
|
5178 |
|
|
val = CONST_DOUBLE_LOW (x);
|
5179 |
|
|
for (i = 15; i >= 8; i--)
|
5180 |
|
|
{
|
5181 |
|
|
arr[i] = val & 0xff;
|
5182 |
|
|
val >>= 8;
|
5183 |
|
|
}
|
5184 |
|
|
val = CONST_DOUBLE_HIGH (x);
|
5185 |
|
|
for (i = 7; i >= 0; i--)
|
5186 |
|
|
{
|
5187 |
|
|
arr[i] = val & 0xff;
|
5188 |
|
|
val >>= 8;
|
5189 |
|
|
}
|
5190 |
|
|
}
|
5191 |
|
|
else if (GET_CODE (x) == CONST_VECTOR)
|
5192 |
|
|
{
|
5193 |
|
|
int units;
|
5194 |
|
|
rtx elt;
|
5195 |
|
|
mode = GET_MODE_INNER (mode);
|
5196 |
|
|
units = CONST_VECTOR_NUNITS (x);
|
5197 |
|
|
for (i = 0; i < units; i++)
|
5198 |
|
|
{
|
5199 |
|
|
elt = CONST_VECTOR_ELT (x, i);
|
5200 |
|
|
if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
|
5201 |
|
|
{
|
5202 |
|
|
if (GET_CODE (elt) == CONST_DOUBLE)
|
5203 |
|
|
val = const_double_to_hwint (elt);
|
5204 |
|
|
else
|
5205 |
|
|
val = INTVAL (elt);
|
5206 |
|
|
first = GET_MODE_SIZE (mode) - 1;
|
5207 |
|
|
if (first + i * GET_MODE_SIZE (mode) > 16)
|
5208 |
|
|
abort ();
|
5209 |
|
|
for (j = first; j >= 0; j--)
|
5210 |
|
|
{
|
5211 |
|
|
arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
|
5212 |
|
|
val >>= 8;
|
5213 |
|
|
}
|
5214 |
|
|
}
|
5215 |
|
|
}
|
5216 |
|
|
}
|
5217 |
|
|
else
|
5218 |
|
|
gcc_unreachable();
|
5219 |
|
|
}
|
5220 |
|
|
|
5221 |
|
|
/* Convert a 16 byte array to a constant of mode MODE. When MODE is
|
5222 |
|
|
smaller than 16 bytes, use the bytes that would represent that value
|
5223 |
|
|
in a register, e.g., for QImode return the value of arr[3]. */
|
5224 |
|
|
rtx
|
5225 |
|
|
array_to_constant (enum machine_mode mode, const unsigned char arr[16])
|
5226 |
|
|
{
|
5227 |
|
|
enum machine_mode inner_mode;
|
5228 |
|
|
rtvec v;
|
5229 |
|
|
int units, size, i, j, k;
|
5230 |
|
|
HOST_WIDE_INT val;
|
5231 |
|
|
|
5232 |
|
|
if (GET_MODE_CLASS (mode) == MODE_INT
|
5233 |
|
|
&& GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
|
5234 |
|
|
{
|
5235 |
|
|
j = GET_MODE_SIZE (mode);
|
5236 |
|
|
i = j < 4 ? 4 - j : 0;
|
5237 |
|
|
for (val = 0; i < j; i++)
|
5238 |
|
|
val = (val << 8) | arr[i];
|
5239 |
|
|
val = trunc_int_for_mode (val, mode);
|
5240 |
|
|
return GEN_INT (val);
|
5241 |
|
|
}
|
5242 |
|
|
|
5243 |
|
|
if (mode == TImode)
|
5244 |
|
|
{
|
5245 |
|
|
HOST_WIDE_INT high;
|
5246 |
|
|
for (i = high = 0; i < 8; i++)
|
5247 |
|
|
high = (high << 8) | arr[i];
|
5248 |
|
|
for (i = 8, val = 0; i < 16; i++)
|
5249 |
|
|
val = (val << 8) | arr[i];
|
5250 |
|
|
return immed_double_const (val, high, TImode);
|
5251 |
|
|
}
|
5252 |
|
|
if (mode == SFmode)
|
5253 |
|
|
{
|
5254 |
|
|
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
|
5255 |
|
|
val = trunc_int_for_mode (val, SImode);
|
5256 |
|
|
return hwint_to_const_double (SFmode, val);
|
5257 |
|
|
}
|
5258 |
|
|
if (mode == DFmode)
|
5259 |
|
|
{
|
5260 |
|
|
for (i = 0, val = 0; i < 8; i++)
|
5261 |
|
|
val = (val << 8) | arr[i];
|
5262 |
|
|
return hwint_to_const_double (DFmode, val);
|
5263 |
|
|
}
|
5264 |
|
|
|
5265 |
|
|
if (!VECTOR_MODE_P (mode))
|
5266 |
|
|
abort ();
|
5267 |
|
|
|
5268 |
|
|
units = GET_MODE_NUNITS (mode);
|
5269 |
|
|
size = GET_MODE_UNIT_SIZE (mode);
|
5270 |
|
|
inner_mode = GET_MODE_INNER (mode);
|
5271 |
|
|
v = rtvec_alloc (units);
|
5272 |
|
|
|
5273 |
|
|
for (k = i = 0; i < units; ++i)
|
5274 |
|
|
{
|
5275 |
|
|
val = 0;
|
5276 |
|
|
for (j = 0; j < size; j++, k++)
|
5277 |
|
|
val = (val << 8) | arr[k];
|
5278 |
|
|
|
5279 |
|
|
if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
|
5280 |
|
|
RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
|
5281 |
|
|
else
|
5282 |
|
|
RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
|
5283 |
|
|
}
|
5284 |
|
|
if (k > 16)
|
5285 |
|
|
abort ();
|
5286 |
|
|
|
5287 |
|
|
return gen_rtx_CONST_VECTOR (mode, v);
|
5288 |
|
|
}
|
5289 |
|
|
|
5290 |
|
|
static void
|
5291 |
|
|
reloc_diagnostic (rtx x)
|
5292 |
|
|
{
|
5293 |
|
|
tree decl = 0;
|
5294 |
|
|
if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
|
5295 |
|
|
return;
|
5296 |
|
|
|
5297 |
|
|
if (GET_CODE (x) == SYMBOL_REF)
|
5298 |
|
|
decl = SYMBOL_REF_DECL (x);
|
5299 |
|
|
else if (GET_CODE (x) == CONST
|
5300 |
|
|
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
|
5301 |
|
|
decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
|
5302 |
|
|
|
5303 |
|
|
/* SYMBOL_REF_DECL is not necessarily a DECL. */
|
5304 |
|
|
if (decl && !DECL_P (decl))
|
5305 |
|
|
decl = 0;
|
5306 |
|
|
|
5307 |
|
|
/* The decl could be a string constant. */
|
5308 |
|
|
if (decl && DECL_P (decl))
|
5309 |
|
|
{
|
5310 |
|
|
location_t loc;
|
5311 |
|
|
/* We use last_assemble_variable_decl to get line information. It's
|
5312 |
|
|
not always going to be right and might not even be close, but will
|
5313 |
|
|
be right for the more common cases. */
|
5314 |
|
|
if (!last_assemble_variable_decl || in_section == ctors_section)
|
5315 |
|
|
loc = DECL_SOURCE_LOCATION (decl);
|
5316 |
|
|
else
|
5317 |
|
|
loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
|
5318 |
|
|
|
5319 |
|
|
if (TARGET_WARN_RELOC)
|
5320 |
|
|
warning_at (loc, 0,
|
5321 |
|
|
"creating run-time relocation for %qD", decl);
|
5322 |
|
|
else
|
5323 |
|
|
error_at (loc,
|
5324 |
|
|
"creating run-time relocation for %qD", decl);
|
5325 |
|
|
}
|
5326 |
|
|
else
|
5327 |
|
|
{
|
5328 |
|
|
if (TARGET_WARN_RELOC)
|
5329 |
|
|
warning_at (input_location, 0, "creating run-time relocation");
|
5330 |
|
|
else
|
5331 |
|
|
error_at (input_location, "creating run-time relocation");
|
5332 |
|
|
}
|
5333 |
|
|
}
|
5334 |
|
|
|
5335 |
|
|
/* Hook into assemble_integer so we can generate an error for run-time
|
5336 |
|
|
relocations. The SPU ABI disallows them. */
|
5337 |
|
|
static bool
|
5338 |
|
|
spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
|
5339 |
|
|
{
|
5340 |
|
|
/* By default run-time relocations aren't supported, but we allow them
|
5341 |
|
|
in case users support it in their own run-time loader. And we provide
|
5342 |
|
|
a warning for those users that don't. */
|
5343 |
|
|
if ((GET_CODE (x) == SYMBOL_REF)
|
5344 |
|
|
|| GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
|
5345 |
|
|
reloc_diagnostic (x);
|
5346 |
|
|
|
5347 |
|
|
return default_assemble_integer (x, size, aligned_p);
|
5348 |
|
|
}
|
5349 |
|
|
|
5350 |
|
|
static void
|
5351 |
|
|
spu_asm_globalize_label (FILE * file, const char *name)
|
5352 |
|
|
{
|
5353 |
|
|
fputs ("\t.global\t", file);
|
5354 |
|
|
assemble_name (file, name);
|
5355 |
|
|
fputs ("\n", file);
|
5356 |
|
|
}
|
5357 |
|
|
|
5358 |
|
|
static bool
|
5359 |
|
|
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
|
5360 |
|
|
bool speed ATTRIBUTE_UNUSED)
|
5361 |
|
|
{
|
5362 |
|
|
enum machine_mode mode = GET_MODE (x);
|
5363 |
|
|
int cost = COSTS_N_INSNS (2);
|
5364 |
|
|
|
5365 |
|
|
/* Folding to a CONST_VECTOR will use extra space but there might
|
5366 |
|
|
be only a small savings in cycles. We'd like to use a CONST_VECTOR
|
5367 |
|
|
only if it allows us to fold away multiple insns. Changing the cost
|
5368 |
|
|
of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
|
5369 |
|
|
because this cost will only be compared against a single insn.
|
5370 |
|
|
if (code == CONST_VECTOR)
|
5371 |
|
|
return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
|
5372 |
|
|
*/
|
5373 |
|
|
|
5374 |
|
|
/* Use defaults for float operations. Not accurate but good enough. */
|
5375 |
|
|
if (mode == DFmode)
|
5376 |
|
|
{
|
5377 |
|
|
*total = COSTS_N_INSNS (13);
|
5378 |
|
|
return true;
|
5379 |
|
|
}
|
5380 |
|
|
if (mode == SFmode)
|
5381 |
|
|
{
|
5382 |
|
|
*total = COSTS_N_INSNS (6);
|
5383 |
|
|
return true;
|
5384 |
|
|
}
|
5385 |
|
|
switch (code)
|
5386 |
|
|
{
|
5387 |
|
|
case CONST_INT:
|
5388 |
|
|
if (satisfies_constraint_K (x))
|
5389 |
|
|
*total = 0;
|
5390 |
|
|
else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
|
5391 |
|
|
*total = COSTS_N_INSNS (1);
|
5392 |
|
|
else
|
5393 |
|
|
*total = COSTS_N_INSNS (3);
|
5394 |
|
|
return true;
|
5395 |
|
|
|
5396 |
|
|
case CONST:
|
5397 |
|
|
*total = COSTS_N_INSNS (3);
|
5398 |
|
|
return true;
|
5399 |
|
|
|
5400 |
|
|
case LABEL_REF:
|
5401 |
|
|
case SYMBOL_REF:
|
5402 |
|
|
*total = COSTS_N_INSNS (0);
|
5403 |
|
|
return true;
|
5404 |
|
|
|
5405 |
|
|
case CONST_DOUBLE:
|
5406 |
|
|
*total = COSTS_N_INSNS (5);
|
5407 |
|
|
return true;
|
5408 |
|
|
|
5409 |
|
|
case FLOAT_EXTEND:
|
5410 |
|
|
case FLOAT_TRUNCATE:
|
5411 |
|
|
case FLOAT:
|
5412 |
|
|
case UNSIGNED_FLOAT:
|
5413 |
|
|
case FIX:
|
5414 |
|
|
case UNSIGNED_FIX:
|
5415 |
|
|
*total = COSTS_N_INSNS (7);
|
5416 |
|
|
return true;
|
5417 |
|
|
|
5418 |
|
|
case PLUS:
|
5419 |
|
|
if (mode == TImode)
|
5420 |
|
|
{
|
5421 |
|
|
*total = COSTS_N_INSNS (9);
|
5422 |
|
|
return true;
|
5423 |
|
|
}
|
5424 |
|
|
break;
|
5425 |
|
|
|
5426 |
|
|
case MULT:
|
5427 |
|
|
cost =
|
5428 |
|
|
GET_CODE (XEXP (x, 0)) ==
|
5429 |
|
|
REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
|
5430 |
|
|
if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
|
5431 |
|
|
{
|
5432 |
|
|
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
|
5433 |
|
|
{
|
5434 |
|
|
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
|
5435 |
|
|
cost = COSTS_N_INSNS (14);
|
5436 |
|
|
if ((val & 0xffff) == 0)
|
5437 |
|
|
cost = COSTS_N_INSNS (9);
|
5438 |
|
|
else if (val > 0 && val < 0x10000)
|
5439 |
|
|
cost = COSTS_N_INSNS (11);
|
5440 |
|
|
}
|
5441 |
|
|
}
|
5442 |
|
|
*total = cost;
|
5443 |
|
|
return true;
|
5444 |
|
|
case DIV:
|
5445 |
|
|
case UDIV:
|
5446 |
|
|
case MOD:
|
5447 |
|
|
case UMOD:
|
5448 |
|
|
*total = COSTS_N_INSNS (20);
|
5449 |
|
|
return true;
|
5450 |
|
|
case ROTATE:
|
5451 |
|
|
case ROTATERT:
|
5452 |
|
|
case ASHIFT:
|
5453 |
|
|
case ASHIFTRT:
|
5454 |
|
|
case LSHIFTRT:
|
5455 |
|
|
*total = COSTS_N_INSNS (4);
|
5456 |
|
|
return true;
|
5457 |
|
|
case UNSPEC:
|
5458 |
|
|
if (XINT (x, 1) == UNSPEC_CONVERT)
|
5459 |
|
|
*total = COSTS_N_INSNS (0);
|
5460 |
|
|
else
|
5461 |
|
|
*total = COSTS_N_INSNS (4);
|
5462 |
|
|
return true;
|
5463 |
|
|
}
|
5464 |
|
|
/* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
|
5465 |
|
|
if (GET_MODE_CLASS (mode) == MODE_INT
|
5466 |
|
|
&& GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
|
5467 |
|
|
cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
|
5468 |
|
|
* (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
|
5469 |
|
|
*total = cost;
|
5470 |
|
|
return true;
|
5471 |
|
|
}
|
5472 |
|
|
|
5473 |
|
|
static enum machine_mode
|
5474 |
|
|
spu_unwind_word_mode (void)
|
5475 |
|
|
{
|
5476 |
|
|
return SImode;
|
5477 |
|
|
}
|
5478 |
|
|
|
5479 |
|
|
/* Decide whether we can make a sibling call to a function. DECL is the
|
5480 |
|
|
declaration of the function being targeted by the call and EXP is the
|
5481 |
|
|
CALL_EXPR representing the call. */
|
5482 |
|
|
static bool
|
5483 |
|
|
spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
|
5484 |
|
|
{
|
5485 |
|
|
return decl && !TARGET_LARGE_MEM;
|
5486 |
|
|
}
|
5487 |
|
|
|
5488 |
|
|
/* We need to correctly update the back chain pointer and the Available
|
5489 |
|
|
Stack Size (which is in the second slot of the sp register.) */
|
5490 |
|
|
void
|
5491 |
|
|
spu_allocate_stack (rtx op0, rtx op1)
|
5492 |
|
|
{
|
5493 |
|
|
HOST_WIDE_INT v;
|
5494 |
|
|
rtx chain = gen_reg_rtx (V4SImode);
|
5495 |
|
|
rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
|
5496 |
|
|
rtx sp = gen_reg_rtx (V4SImode);
|
5497 |
|
|
rtx splatted = gen_reg_rtx (V4SImode);
|
5498 |
|
|
rtx pat = gen_reg_rtx (TImode);
|
5499 |
|
|
|
5500 |
|
|
/* copy the back chain so we can save it back again. */
|
5501 |
|
|
emit_move_insn (chain, stack_bot);
|
5502 |
|
|
|
5503 |
|
|
op1 = force_reg (SImode, op1);
|
5504 |
|
|
|
5505 |
|
|
v = 0x1020300010203ll;
|
5506 |
|
|
emit_move_insn (pat, immed_double_const (v, v, TImode));
|
5507 |
|
|
emit_insn (gen_shufb (splatted, op1, op1, pat));
|
5508 |
|
|
|
5509 |
|
|
emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
|
5510 |
|
|
emit_insn (gen_subv4si3 (sp, sp, splatted));
|
5511 |
|
|
|
5512 |
|
|
if (flag_stack_check)
|
5513 |
|
|
{
|
5514 |
|
|
rtx avail = gen_reg_rtx(SImode);
|
5515 |
|
|
rtx result = gen_reg_rtx(SImode);
|
5516 |
|
|
emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
|
5517 |
|
|
emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
|
5518 |
|
|
emit_insn (gen_spu_heq (result, GEN_INT(0) ));
|
5519 |
|
|
}
|
5520 |
|
|
|
5521 |
|
|
emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
|
5522 |
|
|
|
5523 |
|
|
emit_move_insn (stack_bot, chain);
|
5524 |
|
|
|
5525 |
|
|
emit_move_insn (op0, virtual_stack_dynamic_rtx);
|
5526 |
|
|
}
|
5527 |
|
|
|
5528 |
|
|
void
|
5529 |
|
|
spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
|
5530 |
|
|
{
|
5531 |
|
|
static unsigned char arr[16] =
|
5532 |
|
|
{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
|
5533 |
|
|
rtx temp = gen_reg_rtx (SImode);
|
5534 |
|
|
rtx temp2 = gen_reg_rtx (SImode);
|
5535 |
|
|
rtx temp3 = gen_reg_rtx (V4SImode);
|
5536 |
|
|
rtx temp4 = gen_reg_rtx (V4SImode);
|
5537 |
|
|
rtx pat = gen_reg_rtx (TImode);
|
5538 |
|
|
rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
|
5539 |
|
|
|
5540 |
|
|
/* Restore the backchain from the first word, sp from the second. */
|
5541 |
|
|
emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
|
5542 |
|
|
emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
|
5543 |
|
|
|
5544 |
|
|
emit_move_insn (pat, array_to_constant (TImode, arr));
|
5545 |
|
|
|
5546 |
|
|
/* Compute Available Stack Size for sp */
|
5547 |
|
|
emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
|
5548 |
|
|
emit_insn (gen_shufb (temp3, temp, temp, pat));
|
5549 |
|
|
|
5550 |
|
|
/* Compute Available Stack Size for back chain */
|
5551 |
|
|
emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
|
5552 |
|
|
emit_insn (gen_shufb (temp4, temp2, temp2, pat));
|
5553 |
|
|
emit_insn (gen_addv4si3 (temp4, sp, temp4));
|
5554 |
|
|
|
5555 |
|
|
emit_insn (gen_addv4si3 (sp, sp, temp3));
|
5556 |
|
|
emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
|
5557 |
|
|
}
|
5558 |
|
|
|
5559 |
|
|
static void
|
5560 |
|
|
spu_init_libfuncs (void)
|
5561 |
|
|
{
|
5562 |
|
|
set_optab_libfunc (smul_optab, DImode, "__muldi3");
|
5563 |
|
|
set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
|
5564 |
|
|
set_optab_libfunc (smod_optab, DImode, "__moddi3");
|
5565 |
|
|
set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
|
5566 |
|
|
set_optab_libfunc (umod_optab, DImode, "__umoddi3");
|
5567 |
|
|
set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
|
5568 |
|
|
set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
|
5569 |
|
|
set_optab_libfunc (clz_optab, DImode, "__clzdi2");
|
5570 |
|
|
set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
|
5571 |
|
|
set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
|
5572 |
|
|
set_optab_libfunc (parity_optab, DImode, "__paritydi2");
|
5573 |
|
|
|
5574 |
|
|
set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
|
5575 |
|
|
set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
|
5576 |
|
|
|
5577 |
|
|
set_optab_libfunc (smul_optab, TImode, "__multi3");
|
5578 |
|
|
set_optab_libfunc (sdiv_optab, TImode, "__divti3");
|
5579 |
|
|
set_optab_libfunc (smod_optab, TImode, "__modti3");
|
5580 |
|
|
set_optab_libfunc (udiv_optab, TImode, "__udivti3");
|
5581 |
|
|
set_optab_libfunc (umod_optab, TImode, "__umodti3");
|
5582 |
|
|
set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
|
5583 |
|
|
}
|
5584 |
|
|
|
5585 |
|
|
/* Make a subreg, stripping any existing subreg. We could possibly just
|
5586 |
|
|
call simplify_subreg, but in this case we know what we want. */
|
5587 |
|
|
rtx
|
5588 |
|
|
spu_gen_subreg (enum machine_mode mode, rtx x)
|
5589 |
|
|
{
|
5590 |
|
|
if (GET_CODE (x) == SUBREG)
|
5591 |
|
|
x = SUBREG_REG (x);
|
5592 |
|
|
if (GET_MODE (x) == mode)
|
5593 |
|
|
return x;
|
5594 |
|
|
return gen_rtx_SUBREG (mode, x, 0);
|
5595 |
|
|
}
|
5596 |
|
|
|
5597 |
|
|
static bool
|
5598 |
|
|
spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
5599 |
|
|
{
|
5600 |
|
|
return (TYPE_MODE (type) == BLKmode
|
5601 |
|
|
&& ((type) == 0
|
5602 |
|
|
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|
5603 |
|
|
|| int_size_in_bytes (type) >
|
5604 |
|
|
(MAX_REGISTER_RETURN * UNITS_PER_WORD)));
|
5605 |
|
|
}
|
5606 |
|
|
|
5607 |
|
|
/* Create the built-in types and functions */
|
5608 |
|
|
|
5609 |
|
|
enum spu_function_code
|
5610 |
|
|
{
|
5611 |
|
|
#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
|
5612 |
|
|
#include "spu-builtins.def"
|
5613 |
|
|
#undef DEF_BUILTIN
|
5614 |
|
|
NUM_SPU_BUILTINS
|
5615 |
|
|
};
|
5616 |
|
|
|
5617 |
|
|
extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
|
5618 |
|
|
|
5619 |
|
|
struct spu_builtin_description spu_builtins[] = {
|
5620 |
|
|
#define DEF_BUILTIN(fcode, icode, name, type, params) \
|
5621 |
|
|
{fcode, icode, name, type, params, NULL_TREE},
|
5622 |
|
|
#include "spu-builtins.def"
|
5623 |
|
|
#undef DEF_BUILTIN
|
5624 |
|
|
};
|
5625 |
|
|
|
5626 |
|
|
/* Returns the rs6000 builtin decl for CODE. */
|
5627 |
|
|
|
5628 |
|
|
static tree
|
5629 |
|
|
spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
|
5630 |
|
|
{
|
5631 |
|
|
if (code >= NUM_SPU_BUILTINS)
|
5632 |
|
|
return error_mark_node;
|
5633 |
|
|
|
5634 |
|
|
return spu_builtins[code].fndecl;
|
5635 |
|
|
}
|
5636 |
|
|
|
5637 |
|
|
|
5638 |
|
|
static void
|
5639 |
|
|
spu_init_builtins (void)
|
5640 |
|
|
{
|
5641 |
|
|
struct spu_builtin_description *d;
|
5642 |
|
|
unsigned int i;
|
5643 |
|
|
|
5644 |
|
|
V16QI_type_node = build_vector_type (intQI_type_node, 16);
|
5645 |
|
|
V8HI_type_node = build_vector_type (intHI_type_node, 8);
|
5646 |
|
|
V4SI_type_node = build_vector_type (intSI_type_node, 4);
|
5647 |
|
|
V2DI_type_node = build_vector_type (intDI_type_node, 2);
|
5648 |
|
|
V4SF_type_node = build_vector_type (float_type_node, 4);
|
5649 |
|
|
V2DF_type_node = build_vector_type (double_type_node, 2);
|
5650 |
|
|
|
5651 |
|
|
unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
|
5652 |
|
|
unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
|
5653 |
|
|
unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
|
5654 |
|
|
unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
|
5655 |
|
|
|
5656 |
|
|
spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
|
5657 |
|
|
|
5658 |
|
|
spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
|
5659 |
|
|
spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
|
5660 |
|
|
spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
|
5661 |
|
|
spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
|
5662 |
|
|
spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
|
5663 |
|
|
spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
|
5664 |
|
|
spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
|
5665 |
|
|
spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
|
5666 |
|
|
spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
|
5667 |
|
|
spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
|
5668 |
|
|
spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
|
5669 |
|
|
spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
|
5670 |
|
|
|
5671 |
|
|
spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
|
5672 |
|
|
spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
|
5673 |
|
|
spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
|
5674 |
|
|
spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
|
5675 |
|
|
spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
|
5676 |
|
|
spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
|
5677 |
|
|
spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
|
5678 |
|
|
spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
|
5679 |
|
|
|
5680 |
|
|
spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
|
5681 |
|
|
spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
|
5682 |
|
|
|
5683 |
|
|
spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
|
5684 |
|
|
|
5685 |
|
|
spu_builtin_types[SPU_BTI_PTR] =
|
5686 |
|
|
build_pointer_type (build_qualified_type
|
5687 |
|
|
(void_type_node,
|
5688 |
|
|
TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
|
5689 |
|
|
|
5690 |
|
|
/* For each builtin we build a new prototype. The tree code will make
|
5691 |
|
|
sure nodes are shared. */
|
5692 |
|
|
for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
|
5693 |
|
|
{
|
5694 |
|
|
tree p;
|
5695 |
|
|
char name[64]; /* build_function will make a copy. */
|
5696 |
|
|
int parm;
|
5697 |
|
|
|
5698 |
|
|
if (d->name == 0)
|
5699 |
|
|
continue;
|
5700 |
|
|
|
5701 |
|
|
/* Find last parm. */
|
5702 |
|
|
for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
|
5703 |
|
|
;
|
5704 |
|
|
|
5705 |
|
|
p = void_list_node;
|
5706 |
|
|
while (parm > 1)
|
5707 |
|
|
p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
|
5708 |
|
|
|
5709 |
|
|
p = build_function_type (spu_builtin_types[d->parm[0]], p);
|
5710 |
|
|
|
5711 |
|
|
sprintf (name, "__builtin_%s", d->name);
|
5712 |
|
|
d->fndecl =
|
5713 |
|
|
add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
|
5714 |
|
|
NULL, NULL_TREE);
|
5715 |
|
|
if (d->fcode == SPU_MASK_FOR_LOAD)
|
5716 |
|
|
TREE_READONLY (d->fndecl) = 1;
|
5717 |
|
|
|
5718 |
|
|
/* These builtins don't throw. */
|
5719 |
|
|
TREE_NOTHROW (d->fndecl) = 1;
|
5720 |
|
|
}
|
5721 |
|
|
}
|
5722 |
|
|
|
5723 |
|
|
void
|
5724 |
|
|
spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
|
5725 |
|
|
{
|
5726 |
|
|
static unsigned char arr[16] =
|
5727 |
|
|
{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
|
5728 |
|
|
|
5729 |
|
|
rtx temp = gen_reg_rtx (Pmode);
|
5730 |
|
|
rtx temp2 = gen_reg_rtx (V4SImode);
|
5731 |
|
|
rtx temp3 = gen_reg_rtx (V4SImode);
|
5732 |
|
|
rtx pat = gen_reg_rtx (TImode);
|
5733 |
|
|
rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
|
5734 |
|
|
|
5735 |
|
|
emit_move_insn (pat, array_to_constant (TImode, arr));
|
5736 |
|
|
|
5737 |
|
|
/* Restore the sp. */
|
5738 |
|
|
emit_move_insn (temp, op1);
|
5739 |
|
|
emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
|
5740 |
|
|
|
5741 |
|
|
/* Compute available stack size for sp. */
|
5742 |
|
|
emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
|
5743 |
|
|
emit_insn (gen_shufb (temp3, temp, temp, pat));
|
5744 |
|
|
|
5745 |
|
|
emit_insn (gen_addv4si3 (sp, sp, temp3));
|
5746 |
|
|
emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
|
5747 |
|
|
}
|
5748 |
|
|
|
5749 |
|
|
int
|
5750 |
|
|
spu_safe_dma (HOST_WIDE_INT channel)
|
5751 |
|
|
{
|
5752 |
|
|
return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
|
5753 |
|
|
}
|
5754 |
|
|
|
5755 |
|
|
void
|
5756 |
|
|
spu_builtin_splats (rtx ops[])
|
5757 |
|
|
{
|
5758 |
|
|
enum machine_mode mode = GET_MODE (ops[0]);
|
5759 |
|
|
if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
|
5760 |
|
|
{
|
5761 |
|
|
unsigned char arr[16];
|
5762 |
|
|
constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
|
5763 |
|
|
emit_move_insn (ops[0], array_to_constant (mode, arr));
|
5764 |
|
|
}
|
5765 |
|
|
else
|
5766 |
|
|
{
|
5767 |
|
|
rtx reg = gen_reg_rtx (TImode);
|
5768 |
|
|
rtx shuf;
|
5769 |
|
|
if (GET_CODE (ops[1]) != REG
|
5770 |
|
|
&& GET_CODE (ops[1]) != SUBREG)
|
5771 |
|
|
ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
|
5772 |
|
|
switch (mode)
|
5773 |
|
|
{
|
5774 |
|
|
case V2DImode:
|
5775 |
|
|
case V2DFmode:
|
5776 |
|
|
shuf =
|
5777 |
|
|
immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
|
5778 |
|
|
TImode);
|
5779 |
|
|
break;
|
5780 |
|
|
case V4SImode:
|
5781 |
|
|
case V4SFmode:
|
5782 |
|
|
shuf =
|
5783 |
|
|
immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
|
5784 |
|
|
TImode);
|
5785 |
|
|
break;
|
5786 |
|
|
case V8HImode:
|
5787 |
|
|
shuf =
|
5788 |
|
|
immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
|
5789 |
|
|
TImode);
|
5790 |
|
|
break;
|
5791 |
|
|
case V16QImode:
|
5792 |
|
|
shuf =
|
5793 |
|
|
immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
|
5794 |
|
|
TImode);
|
5795 |
|
|
break;
|
5796 |
|
|
default:
|
5797 |
|
|
abort ();
|
5798 |
|
|
}
|
5799 |
|
|
emit_move_insn (reg, shuf);
|
5800 |
|
|
emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
|
5801 |
|
|
}
|
5802 |
|
|
}
|
5803 |
|
|
|
5804 |
|
|
void
|
5805 |
|
|
spu_builtin_extract (rtx ops[])
|
5806 |
|
|
{
|
5807 |
|
|
enum machine_mode mode;
|
5808 |
|
|
rtx rot, from, tmp;
|
5809 |
|
|
|
5810 |
|
|
mode = GET_MODE (ops[1]);
|
5811 |
|
|
|
5812 |
|
|
if (GET_CODE (ops[2]) == CONST_INT)
|
5813 |
|
|
{
|
5814 |
|
|
switch (mode)
|
5815 |
|
|
{
|
5816 |
|
|
case V16QImode:
|
5817 |
|
|
emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
|
5818 |
|
|
break;
|
5819 |
|
|
case V8HImode:
|
5820 |
|
|
emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
|
5821 |
|
|
break;
|
5822 |
|
|
case V4SFmode:
|
5823 |
|
|
emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
|
5824 |
|
|
break;
|
5825 |
|
|
case V4SImode:
|
5826 |
|
|
emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
|
5827 |
|
|
break;
|
5828 |
|
|
case V2DImode:
|
5829 |
|
|
emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
|
5830 |
|
|
break;
|
5831 |
|
|
case V2DFmode:
|
5832 |
|
|
emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
|
5833 |
|
|
break;
|
5834 |
|
|
default:
|
5835 |
|
|
abort ();
|
5836 |
|
|
}
|
5837 |
|
|
return;
|
5838 |
|
|
}
|
5839 |
|
|
|
5840 |
|
|
from = spu_gen_subreg (TImode, ops[1]);
|
5841 |
|
|
rot = gen_reg_rtx (TImode);
|
5842 |
|
|
tmp = gen_reg_rtx (SImode);
|
5843 |
|
|
|
5844 |
|
|
switch (mode)
|
5845 |
|
|
{
|
5846 |
|
|
case V16QImode:
|
5847 |
|
|
emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
|
5848 |
|
|
break;
|
5849 |
|
|
case V8HImode:
|
5850 |
|
|
emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
|
5851 |
|
|
emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
|
5852 |
|
|
break;
|
5853 |
|
|
case V4SFmode:
|
5854 |
|
|
case V4SImode:
|
5855 |
|
|
emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
|
5856 |
|
|
break;
|
5857 |
|
|
case V2DImode:
|
5858 |
|
|
case V2DFmode:
|
5859 |
|
|
emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
|
5860 |
|
|
break;
|
5861 |
|
|
default:
|
5862 |
|
|
abort ();
|
5863 |
|
|
}
|
5864 |
|
|
emit_insn (gen_rotqby_ti (rot, from, tmp));
|
5865 |
|
|
|
5866 |
|
|
emit_insn (gen_spu_convert (ops[0], rot));
|
5867 |
|
|
}
|
5868 |
|
|
|
5869 |
|
|
void
|
5870 |
|
|
spu_builtin_insert (rtx ops[])
|
5871 |
|
|
{
|
5872 |
|
|
enum machine_mode mode = GET_MODE (ops[0]);
|
5873 |
|
|
enum machine_mode imode = GET_MODE_INNER (mode);
|
5874 |
|
|
rtx mask = gen_reg_rtx (TImode);
|
5875 |
|
|
rtx offset;
|
5876 |
|
|
|
5877 |
|
|
if (GET_CODE (ops[3]) == CONST_INT)
|
5878 |
|
|
offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
|
5879 |
|
|
else
|
5880 |
|
|
{
|
5881 |
|
|
offset = gen_reg_rtx (SImode);
|
5882 |
|
|
emit_insn (gen_mulsi3
|
5883 |
|
|
(offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
|
5884 |
|
|
}
|
5885 |
|
|
emit_insn (gen_cpat
|
5886 |
|
|
(mask, stack_pointer_rtx, offset,
|
5887 |
|
|
GEN_INT (GET_MODE_SIZE (imode))));
|
5888 |
|
|
emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
|
5889 |
|
|
}
|
5890 |
|
|
|
5891 |
|
|
void
|
5892 |
|
|
spu_builtin_promote (rtx ops[])
|
5893 |
|
|
{
|
5894 |
|
|
enum machine_mode mode, imode;
|
5895 |
|
|
rtx rot, from, offset;
|
5896 |
|
|
HOST_WIDE_INT pos;
|
5897 |
|
|
|
5898 |
|
|
mode = GET_MODE (ops[0]);
|
5899 |
|
|
imode = GET_MODE_INNER (mode);
|
5900 |
|
|
|
5901 |
|
|
from = gen_reg_rtx (TImode);
|
5902 |
|
|
rot = spu_gen_subreg (TImode, ops[0]);
|
5903 |
|
|
|
5904 |
|
|
emit_insn (gen_spu_convert (from, ops[1]));
|
5905 |
|
|
|
5906 |
|
|
if (GET_CODE (ops[2]) == CONST_INT)
|
5907 |
|
|
{
|
5908 |
|
|
pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
|
5909 |
|
|
if (GET_MODE_SIZE (imode) < 4)
|
5910 |
|
|
pos += 4 - GET_MODE_SIZE (imode);
|
5911 |
|
|
offset = GEN_INT (pos & 15);
|
5912 |
|
|
}
|
5913 |
|
|
else
|
5914 |
|
|
{
|
5915 |
|
|
offset = gen_reg_rtx (SImode);
|
5916 |
|
|
switch (mode)
|
5917 |
|
|
{
|
5918 |
|
|
case V16QImode:
|
5919 |
|
|
emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
|
5920 |
|
|
break;
|
5921 |
|
|
case V8HImode:
|
5922 |
|
|
emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
|
5923 |
|
|
emit_insn (gen_addsi3 (offset, offset, offset));
|
5924 |
|
|
break;
|
5925 |
|
|
case V4SFmode:
|
5926 |
|
|
case V4SImode:
|
5927 |
|
|
emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
|
5928 |
|
|
emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
|
5929 |
|
|
break;
|
5930 |
|
|
case V2DImode:
|
5931 |
|
|
case V2DFmode:
|
5932 |
|
|
emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
|
5933 |
|
|
break;
|
5934 |
|
|
default:
|
5935 |
|
|
abort ();
|
5936 |
|
|
}
|
5937 |
|
|
}
|
5938 |
|
|
emit_insn (gen_rotqby_ti (rot, from, offset));
|
5939 |
|
|
}
|
5940 |
|
|
|
5941 |
|
|
static void
|
5942 |
|
|
spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
|
5943 |
|
|
{
|
5944 |
|
|
rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
|
5945 |
|
|
rtx shuf = gen_reg_rtx (V4SImode);
|
5946 |
|
|
rtx insn = gen_reg_rtx (V4SImode);
|
5947 |
|
|
rtx shufc;
|
5948 |
|
|
rtx insnc;
|
5949 |
|
|
rtx mem;
|
5950 |
|
|
|
5951 |
|
|
fnaddr = force_reg (SImode, fnaddr);
|
5952 |
|
|
cxt = force_reg (SImode, cxt);
|
5953 |
|
|
|
5954 |
|
|
if (TARGET_LARGE_MEM)
|
5955 |
|
|
{
|
5956 |
|
|
rtx rotl = gen_reg_rtx (V4SImode);
|
5957 |
|
|
rtx mask = gen_reg_rtx (V4SImode);
|
5958 |
|
|
rtx bi = gen_reg_rtx (SImode);
|
5959 |
|
|
static unsigned char const shufa[16] = {
|
5960 |
|
|
2, 3, 0, 1, 18, 19, 16, 17,
|
5961 |
|
|
0, 1, 2, 3, 16, 17, 18, 19
|
5962 |
|
|
};
|
5963 |
|
|
static unsigned char const insna[16] = {
|
5964 |
|
|
0x41, 0, 0, 79,
|
5965 |
|
|
0x41, 0, 0, STATIC_CHAIN_REGNUM,
|
5966 |
|
|
0x60, 0x80, 0, 79,
|
5967 |
|
|
0x60, 0x80, 0, STATIC_CHAIN_REGNUM
|
5968 |
|
|
};
|
5969 |
|
|
|
5970 |
|
|
shufc = force_reg (TImode, array_to_constant (TImode, shufa));
|
5971 |
|
|
insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
|
5972 |
|
|
|
5973 |
|
|
emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
|
5974 |
|
|
emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
|
5975 |
|
|
emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
|
5976 |
|
|
emit_insn (gen_selb (insn, insnc, rotl, mask));
|
5977 |
|
|
|
5978 |
|
|
mem = adjust_address (m_tramp, V4SImode, 0);
|
5979 |
|
|
emit_move_insn (mem, insn);
|
5980 |
|
|
|
5981 |
|
|
emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
|
5982 |
|
|
mem = adjust_address (m_tramp, Pmode, 16);
|
5983 |
|
|
emit_move_insn (mem, bi);
|
5984 |
|
|
}
|
5985 |
|
|
else
|
5986 |
|
|
{
|
5987 |
|
|
rtx scxt = gen_reg_rtx (SImode);
|
5988 |
|
|
rtx sfnaddr = gen_reg_rtx (SImode);
|
5989 |
|
|
static unsigned char const insna[16] = {
|
5990 |
|
|
0x42, 0, 0, STATIC_CHAIN_REGNUM,
|
5991 |
|
|
0x30, 0, 0, 0,
|
5992 |
|
|
0, 0, 0, 0,
|
5993 |
|
|
0, 0, 0, 0
|
5994 |
|
|
};
|
5995 |
|
|
|
5996 |
|
|
shufc = gen_reg_rtx (TImode);
|
5997 |
|
|
insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
|
5998 |
|
|
|
5999 |
|
|
/* By or'ing all of cxt with the ila opcode we are assuming cxt
|
6000 |
|
|
fits 18 bits and the last 4 are zeros. This will be true if
|
6001 |
|
|
the stack pointer is initialized to 0x3fff0 at program start,
|
6002 |
|
|
otherwise the ila instruction will be garbage. */
|
6003 |
|
|
|
6004 |
|
|
emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
|
6005 |
|
|
emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
|
6006 |
|
|
emit_insn (gen_cpat
|
6007 |
|
|
(shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
|
6008 |
|
|
emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
|
6009 |
|
|
emit_insn (gen_iorv4si3 (insn, insnc, shuf));
|
6010 |
|
|
|
6011 |
|
|
mem = adjust_address (m_tramp, V4SImode, 0);
|
6012 |
|
|
emit_move_insn (mem, insn);
|
6013 |
|
|
}
|
6014 |
|
|
emit_insn (gen_sync ());
|
6015 |
|
|
}
|
6016 |
|
|
|
6017 |
|
|
void
|
6018 |
|
|
spu_expand_sign_extend (rtx ops[])
|
6019 |
|
|
{
|
6020 |
|
|
unsigned char arr[16];
|
6021 |
|
|
rtx pat = gen_reg_rtx (TImode);
|
6022 |
|
|
rtx sign, c;
|
6023 |
|
|
int i, last;
|
6024 |
|
|
last = GET_MODE (ops[0]) == DImode ? 7 : 15;
|
6025 |
|
|
if (GET_MODE (ops[1]) == QImode)
|
6026 |
|
|
{
|
6027 |
|
|
sign = gen_reg_rtx (HImode);
|
6028 |
|
|
emit_insn (gen_extendqihi2 (sign, ops[1]));
|
6029 |
|
|
for (i = 0; i < 16; i++)
|
6030 |
|
|
arr[i] = 0x12;
|
6031 |
|
|
arr[last] = 0x13;
|
6032 |
|
|
}
|
6033 |
|
|
else
|
6034 |
|
|
{
|
6035 |
|
|
for (i = 0; i < 16; i++)
|
6036 |
|
|
arr[i] = 0x10;
|
6037 |
|
|
switch (GET_MODE (ops[1]))
|
6038 |
|
|
{
|
6039 |
|
|
case HImode:
|
6040 |
|
|
sign = gen_reg_rtx (SImode);
|
6041 |
|
|
emit_insn (gen_extendhisi2 (sign, ops[1]));
|
6042 |
|
|
arr[last] = 0x03;
|
6043 |
|
|
arr[last - 1] = 0x02;
|
6044 |
|
|
break;
|
6045 |
|
|
case SImode:
|
6046 |
|
|
sign = gen_reg_rtx (SImode);
|
6047 |
|
|
emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
|
6048 |
|
|
for (i = 0; i < 4; i++)
|
6049 |
|
|
arr[last - i] = 3 - i;
|
6050 |
|
|
break;
|
6051 |
|
|
case DImode:
|
6052 |
|
|
sign = gen_reg_rtx (SImode);
|
6053 |
|
|
c = gen_reg_rtx (SImode);
|
6054 |
|
|
emit_insn (gen_spu_convert (c, ops[1]));
|
6055 |
|
|
emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
|
6056 |
|
|
for (i = 0; i < 8; i++)
|
6057 |
|
|
arr[last - i] = 7 - i;
|
6058 |
|
|
break;
|
6059 |
|
|
default:
|
6060 |
|
|
abort ();
|
6061 |
|
|
}
|
6062 |
|
|
}
|
6063 |
|
|
emit_move_insn (pat, array_to_constant (TImode, arr));
|
6064 |
|
|
emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
|
6065 |
|
|
}
|
6066 |
|
|
|
6067 |
|
|
/* expand vector initialization. If there are any constant parts,
|
6068 |
|
|
load constant parts first. Then load any non-constant parts. */
|
6069 |
|
|
void
|
6070 |
|
|
spu_expand_vector_init (rtx target, rtx vals)
|
6071 |
|
|
{
|
6072 |
|
|
enum machine_mode mode = GET_MODE (target);
|
6073 |
|
|
int n_elts = GET_MODE_NUNITS (mode);
|
6074 |
|
|
int n_var = 0;
|
6075 |
|
|
bool all_same = true;
|
6076 |
|
|
rtx first, x = NULL_RTX, first_constant = NULL_RTX;
|
6077 |
|
|
int i;
|
6078 |
|
|
|
6079 |
|
|
first = XVECEXP (vals, 0, 0);
|
6080 |
|
|
for (i = 0; i < n_elts; ++i)
|
6081 |
|
|
{
|
6082 |
|
|
x = XVECEXP (vals, 0, i);
|
6083 |
|
|
if (!(CONST_INT_P (x)
|
6084 |
|
|
|| GET_CODE (x) == CONST_DOUBLE
|
6085 |
|
|
|| GET_CODE (x) == CONST_FIXED))
|
6086 |
|
|
++n_var;
|
6087 |
|
|
else
|
6088 |
|
|
{
|
6089 |
|
|
if (first_constant == NULL_RTX)
|
6090 |
|
|
first_constant = x;
|
6091 |
|
|
}
|
6092 |
|
|
if (i > 0 && !rtx_equal_p (x, first))
|
6093 |
|
|
all_same = false;
|
6094 |
|
|
}
|
6095 |
|
|
|
6096 |
|
|
/* if all elements are the same, use splats to repeat elements */
|
6097 |
|
|
if (all_same)
|
6098 |
|
|
{
|
6099 |
|
|
if (!CONSTANT_P (first)
|
6100 |
|
|
&& !register_operand (first, GET_MODE (x)))
|
6101 |
|
|
first = force_reg (GET_MODE (first), first);
|
6102 |
|
|
emit_insn (gen_spu_splats (target, first));
|
6103 |
|
|
return;
|
6104 |
|
|
}
|
6105 |
|
|
|
6106 |
|
|
/* load constant parts */
|
6107 |
|
|
if (n_var != n_elts)
|
6108 |
|
|
{
|
6109 |
|
|
if (n_var == 0)
|
6110 |
|
|
{
|
6111 |
|
|
emit_move_insn (target,
|
6112 |
|
|
gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
|
6113 |
|
|
}
|
6114 |
|
|
else
|
6115 |
|
|
{
|
6116 |
|
|
rtx constant_parts_rtx = copy_rtx (vals);
|
6117 |
|
|
|
6118 |
|
|
gcc_assert (first_constant != NULL_RTX);
|
6119 |
|
|
/* fill empty slots with the first constant, this increases
|
6120 |
|
|
our chance of using splats in the recursive call below. */
|
6121 |
|
|
for (i = 0; i < n_elts; ++i)
|
6122 |
|
|
{
|
6123 |
|
|
x = XVECEXP (constant_parts_rtx, 0, i);
|
6124 |
|
|
if (!(CONST_INT_P (x)
|
6125 |
|
|
|| GET_CODE (x) == CONST_DOUBLE
|
6126 |
|
|
|| GET_CODE (x) == CONST_FIXED))
|
6127 |
|
|
XVECEXP (constant_parts_rtx, 0, i) = first_constant;
|
6128 |
|
|
}
|
6129 |
|
|
|
6130 |
|
|
spu_expand_vector_init (target, constant_parts_rtx);
|
6131 |
|
|
}
|
6132 |
|
|
}
|
6133 |
|
|
|
6134 |
|
|
/* load variable parts */
|
6135 |
|
|
if (n_var != 0)
|
6136 |
|
|
{
|
6137 |
|
|
rtx insert_operands[4];
|
6138 |
|
|
|
6139 |
|
|
insert_operands[0] = target;
|
6140 |
|
|
insert_operands[2] = target;
|
6141 |
|
|
for (i = 0; i < n_elts; ++i)
|
6142 |
|
|
{
|
6143 |
|
|
x = XVECEXP (vals, 0, i);
|
6144 |
|
|
if (!(CONST_INT_P (x)
|
6145 |
|
|
|| GET_CODE (x) == CONST_DOUBLE
|
6146 |
|
|
|| GET_CODE (x) == CONST_FIXED))
|
6147 |
|
|
{
|
6148 |
|
|
if (!register_operand (x, GET_MODE (x)))
|
6149 |
|
|
x = force_reg (GET_MODE (x), x);
|
6150 |
|
|
insert_operands[1] = x;
|
6151 |
|
|
insert_operands[3] = GEN_INT (i);
|
6152 |
|
|
spu_builtin_insert (insert_operands);
|
6153 |
|
|
}
|
6154 |
|
|
}
|
6155 |
|
|
}
|
6156 |
|
|
}
|
6157 |
|
|
|
6158 |
|
|
/* Return insn index for the vector compare instruction for given CODE,
|
6159 |
|
|
and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
|
6160 |
|
|
|
6161 |
|
|
static int
|
6162 |
|
|
get_vec_cmp_insn (enum rtx_code code,
|
6163 |
|
|
enum machine_mode dest_mode,
|
6164 |
|
|
enum machine_mode op_mode)
|
6165 |
|
|
|
6166 |
|
|
{
|
6167 |
|
|
switch (code)
|
6168 |
|
|
{
|
6169 |
|
|
case EQ:
|
6170 |
|
|
if (dest_mode == V16QImode && op_mode == V16QImode)
|
6171 |
|
|
return CODE_FOR_ceq_v16qi;
|
6172 |
|
|
if (dest_mode == V8HImode && op_mode == V8HImode)
|
6173 |
|
|
return CODE_FOR_ceq_v8hi;
|
6174 |
|
|
if (dest_mode == V4SImode && op_mode == V4SImode)
|
6175 |
|
|
return CODE_FOR_ceq_v4si;
|
6176 |
|
|
if (dest_mode == V4SImode && op_mode == V4SFmode)
|
6177 |
|
|
return CODE_FOR_ceq_v4sf;
|
6178 |
|
|
if (dest_mode == V2DImode && op_mode == V2DFmode)
|
6179 |
|
|
return CODE_FOR_ceq_v2df;
|
6180 |
|
|
break;
|
6181 |
|
|
case GT:
|
6182 |
|
|
if (dest_mode == V16QImode && op_mode == V16QImode)
|
6183 |
|
|
return CODE_FOR_cgt_v16qi;
|
6184 |
|
|
if (dest_mode == V8HImode && op_mode == V8HImode)
|
6185 |
|
|
return CODE_FOR_cgt_v8hi;
|
6186 |
|
|
if (dest_mode == V4SImode && op_mode == V4SImode)
|
6187 |
|
|
return CODE_FOR_cgt_v4si;
|
6188 |
|
|
if (dest_mode == V4SImode && op_mode == V4SFmode)
|
6189 |
|
|
return CODE_FOR_cgt_v4sf;
|
6190 |
|
|
if (dest_mode == V2DImode && op_mode == V2DFmode)
|
6191 |
|
|
return CODE_FOR_cgt_v2df;
|
6192 |
|
|
break;
|
6193 |
|
|
case GTU:
|
6194 |
|
|
if (dest_mode == V16QImode && op_mode == V16QImode)
|
6195 |
|
|
return CODE_FOR_clgt_v16qi;
|
6196 |
|
|
if (dest_mode == V8HImode && op_mode == V8HImode)
|
6197 |
|
|
return CODE_FOR_clgt_v8hi;
|
6198 |
|
|
if (dest_mode == V4SImode && op_mode == V4SImode)
|
6199 |
|
|
return CODE_FOR_clgt_v4si;
|
6200 |
|
|
break;
|
6201 |
|
|
default:
|
6202 |
|
|
break;
|
6203 |
|
|
}
|
6204 |
|
|
return -1;
|
6205 |
|
|
}
|
6206 |
|
|
|
6207 |
|
|
/* Emit vector compare for operands OP0 and OP1 using code RCODE.
|
6208 |
|
|
DMODE is expected destination mode. This is a recursive function. */
|
6209 |
|
|
|
6210 |
|
|
static rtx
|
6211 |
|
|
spu_emit_vector_compare (enum rtx_code rcode,
|
6212 |
|
|
rtx op0, rtx op1,
|
6213 |
|
|
enum machine_mode dmode)
|
6214 |
|
|
{
|
6215 |
|
|
int vec_cmp_insn;
|
6216 |
|
|
rtx mask;
|
6217 |
|
|
enum machine_mode dest_mode;
|
6218 |
|
|
enum machine_mode op_mode = GET_MODE (op1);
|
6219 |
|
|
|
6220 |
|
|
gcc_assert (GET_MODE (op0) == GET_MODE (op1));
|
6221 |
|
|
|
6222 |
|
|
/* Floating point vector compare instructions uses destination V4SImode.
|
6223 |
|
|
Double floating point vector compare instructions uses destination V2DImode.
|
6224 |
|
|
Move destination to appropriate mode later. */
|
6225 |
|
|
if (dmode == V4SFmode)
|
6226 |
|
|
dest_mode = V4SImode;
|
6227 |
|
|
else if (dmode == V2DFmode)
|
6228 |
|
|
dest_mode = V2DImode;
|
6229 |
|
|
else
|
6230 |
|
|
dest_mode = dmode;
|
6231 |
|
|
|
6232 |
|
|
mask = gen_reg_rtx (dest_mode);
|
6233 |
|
|
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
|
6234 |
|
|
|
6235 |
|
|
if (vec_cmp_insn == -1)
|
6236 |
|
|
{
|
6237 |
|
|
bool swap_operands = false;
|
6238 |
|
|
bool try_again = false;
|
6239 |
|
|
switch (rcode)
|
6240 |
|
|
{
|
6241 |
|
|
case LT:
|
6242 |
|
|
rcode = GT;
|
6243 |
|
|
swap_operands = true;
|
6244 |
|
|
try_again = true;
|
6245 |
|
|
break;
|
6246 |
|
|
case LTU:
|
6247 |
|
|
rcode = GTU;
|
6248 |
|
|
swap_operands = true;
|
6249 |
|
|
try_again = true;
|
6250 |
|
|
break;
|
6251 |
|
|
case NE:
|
6252 |
|
|
/* Treat A != B as ~(A==B). */
|
6253 |
|
|
{
|
6254 |
|
|
enum insn_code nor_code;
|
6255 |
|
|
rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
|
6256 |
|
|
nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
|
6257 |
|
|
gcc_assert (nor_code != CODE_FOR_nothing);
|
6258 |
|
|
emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
|
6259 |
|
|
if (dmode != dest_mode)
|
6260 |
|
|
{
|
6261 |
|
|
rtx temp = gen_reg_rtx (dest_mode);
|
6262 |
|
|
convert_move (temp, mask, 0);
|
6263 |
|
|
return temp;
|
6264 |
|
|
}
|
6265 |
|
|
return mask;
|
6266 |
|
|
}
|
6267 |
|
|
break;
|
6268 |
|
|
case GE:
|
6269 |
|
|
case GEU:
|
6270 |
|
|
case LE:
|
6271 |
|
|
case LEU:
|
6272 |
|
|
/* Try GT/GTU/LT/LTU OR EQ */
|
6273 |
|
|
{
|
6274 |
|
|
rtx c_rtx, eq_rtx;
|
6275 |
|
|
enum insn_code ior_code;
|
6276 |
|
|
enum rtx_code new_code;
|
6277 |
|
|
|
6278 |
|
|
switch (rcode)
|
6279 |
|
|
{
|
6280 |
|
|
case GE: new_code = GT; break;
|
6281 |
|
|
case GEU: new_code = GTU; break;
|
6282 |
|
|
case LE: new_code = LT; break;
|
6283 |
|
|
case LEU: new_code = LTU; break;
|
6284 |
|
|
default:
|
6285 |
|
|
gcc_unreachable ();
|
6286 |
|
|
}
|
6287 |
|
|
|
6288 |
|
|
c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
|
6289 |
|
|
eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
|
6290 |
|
|
|
6291 |
|
|
ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
|
6292 |
|
|
gcc_assert (ior_code != CODE_FOR_nothing);
|
6293 |
|
|
emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
|
6294 |
|
|
if (dmode != dest_mode)
|
6295 |
|
|
{
|
6296 |
|
|
rtx temp = gen_reg_rtx (dest_mode);
|
6297 |
|
|
convert_move (temp, mask, 0);
|
6298 |
|
|
return temp;
|
6299 |
|
|
}
|
6300 |
|
|
return mask;
|
6301 |
|
|
}
|
6302 |
|
|
break;
|
6303 |
|
|
default:
|
6304 |
|
|
gcc_unreachable ();
|
6305 |
|
|
}
|
6306 |
|
|
|
6307 |
|
|
/* You only get two chances. */
|
6308 |
|
|
if (try_again)
|
6309 |
|
|
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
|
6310 |
|
|
|
6311 |
|
|
gcc_assert (vec_cmp_insn != -1);
|
6312 |
|
|
|
6313 |
|
|
if (swap_operands)
|
6314 |
|
|
{
|
6315 |
|
|
rtx tmp;
|
6316 |
|
|
tmp = op0;
|
6317 |
|
|
op0 = op1;
|
6318 |
|
|
op1 = tmp;
|
6319 |
|
|
}
|
6320 |
|
|
}
|
6321 |
|
|
|
6322 |
|
|
emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
|
6323 |
|
|
if (dmode != dest_mode)
|
6324 |
|
|
{
|
6325 |
|
|
rtx temp = gen_reg_rtx (dest_mode);
|
6326 |
|
|
convert_move (temp, mask, 0);
|
6327 |
|
|
return temp;
|
6328 |
|
|
}
|
6329 |
|
|
return mask;
|
6330 |
|
|
}
|
6331 |
|
|
|
6332 |
|
|
|
6333 |
|
|
/* Emit vector conditional expression.
|
6334 |
|
|
DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
|
6335 |
|
|
CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
|
6336 |
|
|
|
6337 |
|
|
int
|
6338 |
|
|
spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
|
6339 |
|
|
rtx cond, rtx cc_op0, rtx cc_op1)
|
6340 |
|
|
{
|
6341 |
|
|
enum machine_mode dest_mode = GET_MODE (dest);
|
6342 |
|
|
enum rtx_code rcode = GET_CODE (cond);
|
6343 |
|
|
rtx mask;
|
6344 |
|
|
|
6345 |
|
|
/* Get the vector mask for the given relational operations. */
|
6346 |
|
|
mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
|
6347 |
|
|
|
6348 |
|
|
emit_insn(gen_selb (dest, op2, op1, mask));
|
6349 |
|
|
|
6350 |
|
|
return 1;
|
6351 |
|
|
}
|
6352 |
|
|
|
6353 |
|
|
static rtx
|
6354 |
|
|
spu_force_reg (enum machine_mode mode, rtx op)
|
6355 |
|
|
{
|
6356 |
|
|
rtx x, r;
|
6357 |
|
|
if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
|
6358 |
|
|
{
|
6359 |
|
|
if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
|
6360 |
|
|
|| GET_MODE (op) == BLKmode)
|
6361 |
|
|
return force_reg (mode, convert_to_mode (mode, op, 0));
|
6362 |
|
|
abort ();
|
6363 |
|
|
}
|
6364 |
|
|
|
6365 |
|
|
r = force_reg (GET_MODE (op), op);
|
6366 |
|
|
if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
|
6367 |
|
|
{
|
6368 |
|
|
x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
|
6369 |
|
|
if (x)
|
6370 |
|
|
return x;
|
6371 |
|
|
}
|
6372 |
|
|
|
6373 |
|
|
x = gen_reg_rtx (mode);
|
6374 |
|
|
emit_insn (gen_spu_convert (x, r));
|
6375 |
|
|
return x;
|
6376 |
|
|
}
|
6377 |
|
|
|
6378 |
|
|
static void
|
6379 |
|
|
spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
|
6380 |
|
|
{
|
6381 |
|
|
HOST_WIDE_INT v = 0;
|
6382 |
|
|
int lsbits;
|
6383 |
|
|
/* Check the range of immediate operands. */
|
6384 |
|
|
if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
|
6385 |
|
|
{
|
6386 |
|
|
int range = p - SPU_BTI_7;
|
6387 |
|
|
|
6388 |
|
|
if (!CONSTANT_P (op))
|
6389 |
|
|
error ("%s expects an integer literal in the range [%d, %d].",
|
6390 |
|
|
d->name,
|
6391 |
|
|
spu_builtin_range[range].low, spu_builtin_range[range].high);
|
6392 |
|
|
|
6393 |
|
|
if (GET_CODE (op) == CONST
|
6394 |
|
|
&& (GET_CODE (XEXP (op, 0)) == PLUS
|
6395 |
|
|
|| GET_CODE (XEXP (op, 0)) == MINUS))
|
6396 |
|
|
{
|
6397 |
|
|
v = INTVAL (XEXP (XEXP (op, 0), 1));
|
6398 |
|
|
op = XEXP (XEXP (op, 0), 0);
|
6399 |
|
|
}
|
6400 |
|
|
else if (GET_CODE (op) == CONST_INT)
|
6401 |
|
|
v = INTVAL (op);
|
6402 |
|
|
else if (GET_CODE (op) == CONST_VECTOR
|
6403 |
|
|
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
|
6404 |
|
|
v = INTVAL (CONST_VECTOR_ELT (op, 0));
|
6405 |
|
|
|
6406 |
|
|
/* The default for v is 0 which is valid in every range. */
|
6407 |
|
|
if (v < spu_builtin_range[range].low
|
6408 |
|
|
|| v > spu_builtin_range[range].high)
|
6409 |
|
|
error ("%s expects an integer literal in the range [%d, %d]. ("
|
6410 |
|
|
HOST_WIDE_INT_PRINT_DEC ")",
|
6411 |
|
|
d->name,
|
6412 |
|
|
spu_builtin_range[range].low, spu_builtin_range[range].high,
|
6413 |
|
|
v);
|
6414 |
|
|
|
6415 |
|
|
switch (p)
|
6416 |
|
|
{
|
6417 |
|
|
case SPU_BTI_S10_4:
|
6418 |
|
|
lsbits = 4;
|
6419 |
|
|
break;
|
6420 |
|
|
case SPU_BTI_U16_2:
|
6421 |
|
|
/* This is only used in lqa, and stqa. Even though the insns
|
6422 |
|
|
encode 16 bits of the address (all but the 2 least
|
6423 |
|
|
significant), only 14 bits are used because it is masked to
|
6424 |
|
|
be 16 byte aligned. */
|
6425 |
|
|
lsbits = 4;
|
6426 |
|
|
break;
|
6427 |
|
|
case SPU_BTI_S16_2:
|
6428 |
|
|
/* This is used for lqr and stqr. */
|
6429 |
|
|
lsbits = 2;
|
6430 |
|
|
break;
|
6431 |
|
|
default:
|
6432 |
|
|
lsbits = 0;
|
6433 |
|
|
}
|
6434 |
|
|
|
6435 |
|
|
if (GET_CODE (op) == LABEL_REF
|
6436 |
|
|
|| (GET_CODE (op) == SYMBOL_REF
|
6437 |
|
|
&& SYMBOL_REF_FUNCTION_P (op))
|
6438 |
|
|
|| (v & ((1 << lsbits) - 1)) != 0)
|
6439 |
|
|
warning (0, "%d least significant bits of %s are ignored.", lsbits,
|
6440 |
|
|
d->name);
|
6441 |
|
|
}
|
6442 |
|
|
}
|
6443 |
|
|
|
6444 |
|
|
|
6445 |
|
|
static int
|
6446 |
|
|
expand_builtin_args (struct spu_builtin_description *d, tree exp,
|
6447 |
|
|
rtx target, rtx ops[])
|
6448 |
|
|
{
|
6449 |
|
|
enum insn_code icode = (enum insn_code) d->icode;
|
6450 |
|
|
int i = 0, a;
|
6451 |
|
|
|
6452 |
|
|
/* Expand the arguments into rtl. */
|
6453 |
|
|
|
6454 |
|
|
if (d->parm[0] != SPU_BTI_VOID)
|
6455 |
|
|
ops[i++] = target;
|
6456 |
|
|
|
6457 |
|
|
for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
|
6458 |
|
|
{
|
6459 |
|
|
tree arg = CALL_EXPR_ARG (exp, a);
|
6460 |
|
|
if (arg == 0)
|
6461 |
|
|
abort ();
|
6462 |
|
|
ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
|
6463 |
|
|
}
|
6464 |
|
|
|
6465 |
|
|
/* The insn pattern may have additional operands (SCRATCH).
|
6466 |
|
|
Return the number of actual non-SCRATCH operands. */
|
6467 |
|
|
gcc_assert (i <= insn_data[icode].n_operands);
|
6468 |
|
|
return i;
|
6469 |
|
|
}
|
6470 |
|
|
|
6471 |
|
|
static rtx
|
6472 |
|
|
spu_expand_builtin_1 (struct spu_builtin_description *d,
|
6473 |
|
|
tree exp, rtx target)
|
6474 |
|
|
{
|
6475 |
|
|
rtx pat;
|
6476 |
|
|
rtx ops[8];
|
6477 |
|
|
enum insn_code icode = (enum insn_code) d->icode;
|
6478 |
|
|
enum machine_mode mode, tmode;
|
6479 |
|
|
int i, p;
|
6480 |
|
|
int n_operands;
|
6481 |
|
|
tree return_type;
|
6482 |
|
|
|
6483 |
|
|
/* Set up ops[] with values from arglist. */
|
6484 |
|
|
n_operands = expand_builtin_args (d, exp, target, ops);
|
6485 |
|
|
|
6486 |
|
|
/* Handle the target operand which must be operand 0. */
|
6487 |
|
|
i = 0;
|
6488 |
|
|
if (d->parm[0] != SPU_BTI_VOID)
|
6489 |
|
|
{
|
6490 |
|
|
|
6491 |
|
|
/* We prefer the mode specified for the match_operand otherwise
|
6492 |
|
|
use the mode from the builtin function prototype. */
|
6493 |
|
|
tmode = insn_data[d->icode].operand[0].mode;
|
6494 |
|
|
if (tmode == VOIDmode)
|
6495 |
|
|
tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
|
6496 |
|
|
|
6497 |
|
|
/* Try to use target because not using it can lead to extra copies
|
6498 |
|
|
and when we are using all of the registers extra copies leads
|
6499 |
|
|
to extra spills. */
|
6500 |
|
|
if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
|
6501 |
|
|
ops[0] = target;
|
6502 |
|
|
else
|
6503 |
|
|
target = ops[0] = gen_reg_rtx (tmode);
|
6504 |
|
|
|
6505 |
|
|
if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
|
6506 |
|
|
abort ();
|
6507 |
|
|
|
6508 |
|
|
i++;
|
6509 |
|
|
}
|
6510 |
|
|
|
6511 |
|
|
if (d->fcode == SPU_MASK_FOR_LOAD)
|
6512 |
|
|
{
|
6513 |
|
|
enum machine_mode mode = insn_data[icode].operand[1].mode;
|
6514 |
|
|
tree arg;
|
6515 |
|
|
rtx addr, op, pat;
|
6516 |
|
|
|
6517 |
|
|
/* get addr */
|
6518 |
|
|
arg = CALL_EXPR_ARG (exp, 0);
|
6519 |
|
|
gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
|
6520 |
|
|
op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
|
6521 |
|
|
addr = memory_address (mode, op);
|
6522 |
|
|
|
6523 |
|
|
/* negate addr */
|
6524 |
|
|
op = gen_reg_rtx (GET_MODE (addr));
|
6525 |
|
|
emit_insn (gen_rtx_SET (VOIDmode, op,
|
6526 |
|
|
gen_rtx_NEG (GET_MODE (addr), addr)));
|
6527 |
|
|
op = gen_rtx_MEM (mode, op);
|
6528 |
|
|
|
6529 |
|
|
pat = GEN_FCN (icode) (target, op);
|
6530 |
|
|
if (!pat)
|
6531 |
|
|
return 0;
|
6532 |
|
|
emit_insn (pat);
|
6533 |
|
|
return target;
|
6534 |
|
|
}
|
6535 |
|
|
|
6536 |
|
|
/* Ignore align_hint, but still expand it's args in case they have
|
6537 |
|
|
side effects. */
|
6538 |
|
|
if (icode == CODE_FOR_spu_align_hint)
|
6539 |
|
|
return 0;
|
6540 |
|
|
|
6541 |
|
|
/* Handle the rest of the operands. */
|
6542 |
|
|
for (p = 1; i < n_operands; i++, p++)
|
6543 |
|
|
{
|
6544 |
|
|
if (insn_data[d->icode].operand[i].mode != VOIDmode)
|
6545 |
|
|
mode = insn_data[d->icode].operand[i].mode;
|
6546 |
|
|
else
|
6547 |
|
|
mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
|
6548 |
|
|
|
6549 |
|
|
/* mode can be VOIDmode here for labels */
|
6550 |
|
|
|
6551 |
|
|
/* For specific intrinsics with an immediate operand, e.g.,
|
6552 |
|
|
si_ai(), we sometimes need to convert the scalar argument to a
|
6553 |
|
|
vector argument by splatting the scalar. */
|
6554 |
|
|
if (VECTOR_MODE_P (mode)
|
6555 |
|
|
&& (GET_CODE (ops[i]) == CONST_INT
|
6556 |
|
|
|| GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
|
6557 |
|
|
|| GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
|
6558 |
|
|
{
|
6559 |
|
|
if (GET_CODE (ops[i]) == CONST_INT)
|
6560 |
|
|
ops[i] = spu_const (mode, INTVAL (ops[i]));
|
6561 |
|
|
else
|
6562 |
|
|
{
|
6563 |
|
|
rtx reg = gen_reg_rtx (mode);
|
6564 |
|
|
enum machine_mode imode = GET_MODE_INNER (mode);
|
6565 |
|
|
if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
|
6566 |
|
|
ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
|
6567 |
|
|
if (imode != GET_MODE (ops[i]))
|
6568 |
|
|
ops[i] = convert_to_mode (imode, ops[i],
|
6569 |
|
|
TYPE_UNSIGNED (spu_builtin_types
|
6570 |
|
|
[d->parm[i]]));
|
6571 |
|
|
emit_insn (gen_spu_splats (reg, ops[i]));
|
6572 |
|
|
ops[i] = reg;
|
6573 |
|
|
}
|
6574 |
|
|
}
|
6575 |
|
|
|
6576 |
|
|
spu_check_builtin_parm (d, ops[i], d->parm[p]);
|
6577 |
|
|
|
6578 |
|
|
if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
|
6579 |
|
|
ops[i] = spu_force_reg (mode, ops[i]);
|
6580 |
|
|
}
|
6581 |
|
|
|
6582 |
|
|
switch (n_operands)
|
6583 |
|
|
{
|
6584 |
|
|
case 0:
|
6585 |
|
|
pat = GEN_FCN (icode) (0);
|
6586 |
|
|
break;
|
6587 |
|
|
case 1:
|
6588 |
|
|
pat = GEN_FCN (icode) (ops[0]);
|
6589 |
|
|
break;
|
6590 |
|
|
case 2:
|
6591 |
|
|
pat = GEN_FCN (icode) (ops[0], ops[1]);
|
6592 |
|
|
break;
|
6593 |
|
|
case 3:
|
6594 |
|
|
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
|
6595 |
|
|
break;
|
6596 |
|
|
case 4:
|
6597 |
|
|
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
|
6598 |
|
|
break;
|
6599 |
|
|
case 5:
|
6600 |
|
|
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
|
6601 |
|
|
break;
|
6602 |
|
|
case 6:
|
6603 |
|
|
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
|
6604 |
|
|
break;
|
6605 |
|
|
default:
|
6606 |
|
|
abort ();
|
6607 |
|
|
}
|
6608 |
|
|
|
6609 |
|
|
if (!pat)
|
6610 |
|
|
abort ();
|
6611 |
|
|
|
6612 |
|
|
if (d->type == B_CALL || d->type == B_BISLED)
|
6613 |
|
|
emit_call_insn (pat);
|
6614 |
|
|
else if (d->type == B_JUMP)
|
6615 |
|
|
{
|
6616 |
|
|
emit_jump_insn (pat);
|
6617 |
|
|
emit_barrier ();
|
6618 |
|
|
}
|
6619 |
|
|
else
|
6620 |
|
|
emit_insn (pat);
|
6621 |
|
|
|
6622 |
|
|
return_type = spu_builtin_types[d->parm[0]];
|
6623 |
|
|
if (d->parm[0] != SPU_BTI_VOID
|
6624 |
|
|
&& GET_MODE (target) != TYPE_MODE (return_type))
|
6625 |
|
|
{
|
6626 |
|
|
/* target is the return value. It should always be the mode of
|
6627 |
|
|
the builtin function prototype. */
|
6628 |
|
|
target = spu_force_reg (TYPE_MODE (return_type), target);
|
6629 |
|
|
}
|
6630 |
|
|
|
6631 |
|
|
return target;
|
6632 |
|
|
}
|
6633 |
|
|
|
6634 |
|
|
rtx
|
6635 |
|
|
spu_expand_builtin (tree exp,
|
6636 |
|
|
rtx target,
|
6637 |
|
|
rtx subtarget ATTRIBUTE_UNUSED,
|
6638 |
|
|
enum machine_mode mode ATTRIBUTE_UNUSED,
|
6639 |
|
|
int ignore ATTRIBUTE_UNUSED)
|
6640 |
|
|
{
|
6641 |
|
|
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
|
6642 |
|
|
unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
|
6643 |
|
|
struct spu_builtin_description *d;
|
6644 |
|
|
|
6645 |
|
|
if (fcode < NUM_SPU_BUILTINS)
|
6646 |
|
|
{
|
6647 |
|
|
d = &spu_builtins[fcode];
|
6648 |
|
|
|
6649 |
|
|
return spu_expand_builtin_1 (d, exp, target);
|
6650 |
|
|
}
|
6651 |
|
|
abort ();
|
6652 |
|
|
}
|
6653 |
|
|
|
6654 |
|
|
/* Implement targetm.vectorize.builtin_mul_widen_even. */
|
6655 |
|
|
static tree
|
6656 |
|
|
spu_builtin_mul_widen_even (tree type)
|
6657 |
|
|
{
|
6658 |
|
|
switch (TYPE_MODE (type))
|
6659 |
|
|
{
|
6660 |
|
|
case V8HImode:
|
6661 |
|
|
if (TYPE_UNSIGNED (type))
|
6662 |
|
|
return spu_builtins[SPU_MULE_0].fndecl;
|
6663 |
|
|
else
|
6664 |
|
|
return spu_builtins[SPU_MULE_1].fndecl;
|
6665 |
|
|
break;
|
6666 |
|
|
default:
|
6667 |
|
|
return NULL_TREE;
|
6668 |
|
|
}
|
6669 |
|
|
}
|
6670 |
|
|
|
6671 |
|
|
/* Implement targetm.vectorize.builtin_mul_widen_odd. */
|
6672 |
|
|
static tree
|
6673 |
|
|
spu_builtin_mul_widen_odd (tree type)
|
6674 |
|
|
{
|
6675 |
|
|
switch (TYPE_MODE (type))
|
6676 |
|
|
{
|
6677 |
|
|
case V8HImode:
|
6678 |
|
|
if (TYPE_UNSIGNED (type))
|
6679 |
|
|
return spu_builtins[SPU_MULO_1].fndecl;
|
6680 |
|
|
else
|
6681 |
|
|
return spu_builtins[SPU_MULO_0].fndecl;
|
6682 |
|
|
break;
|
6683 |
|
|
default:
|
6684 |
|
|
return NULL_TREE;
|
6685 |
|
|
}
|
6686 |
|
|
}
|
6687 |
|
|
|
6688 |
|
|
/* Implement targetm.vectorize.builtin_mask_for_load. */
|
6689 |
|
|
static tree
|
6690 |
|
|
spu_builtin_mask_for_load (void)
|
6691 |
|
|
{
|
6692 |
|
|
struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
|
6693 |
|
|
gcc_assert (d);
|
6694 |
|
|
return d->fndecl;
|
6695 |
|
|
}
|
6696 |
|
|
|
6697 |
|
|
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
6698 |
|
|
static int
|
6699 |
|
|
spu_builtin_vectorization_cost (bool runtime_test)
|
6700 |
|
|
{
|
6701 |
|
|
/* If the branch of the runtime test is taken - i.e. - the vectorized
|
6702 |
|
|
version is skipped - this incurs a misprediction cost (because the
|
6703 |
|
|
vectorized version is expected to be the fall-through). So we subtract
|
6704 |
|
|
the latency of a mispredicted branch from the costs that are incurred
|
6705 |
|
|
when the vectorized version is executed. */
|
6706 |
|
|
if (runtime_test)
|
6707 |
|
|
return -19;
|
6708 |
|
|
else
|
6709 |
|
|
return 0;
|
6710 |
|
|
}
|
6711 |
|
|
|
6712 |
|
|
/* Return true iff, data reference of TYPE can reach vector alignment (16)
|
6713 |
|
|
after applying N number of iterations. This routine does not determine
|
6714 |
|
|
how may iterations are required to reach desired alignment. */
|
6715 |
|
|
|
6716 |
|
|
static bool
|
6717 |
|
|
spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
|
6718 |
|
|
{
|
6719 |
|
|
if (is_packed)
|
6720 |
|
|
return false;
|
6721 |
|
|
|
6722 |
|
|
/* All other types are naturally aligned. */
|
6723 |
|
|
return true;
|
6724 |
|
|
}
|
6725 |
|
|
|
6726 |
|
|
/* Implement targetm.vectorize.builtin_vec_perm. */
|
6727 |
|
|
tree
|
6728 |
|
|
spu_builtin_vec_perm (tree type, tree *mask_element_type)
|
6729 |
|
|
{
|
6730 |
|
|
struct spu_builtin_description *d;
|
6731 |
|
|
|
6732 |
|
|
*mask_element_type = unsigned_char_type_node;
|
6733 |
|
|
|
6734 |
|
|
switch (TYPE_MODE (type))
|
6735 |
|
|
{
|
6736 |
|
|
case V16QImode:
|
6737 |
|
|
if (TYPE_UNSIGNED (type))
|
6738 |
|
|
d = &spu_builtins[SPU_SHUFFLE_0];
|
6739 |
|
|
else
|
6740 |
|
|
d = &spu_builtins[SPU_SHUFFLE_1];
|
6741 |
|
|
break;
|
6742 |
|
|
|
6743 |
|
|
case V8HImode:
|
6744 |
|
|
if (TYPE_UNSIGNED (type))
|
6745 |
|
|
d = &spu_builtins[SPU_SHUFFLE_2];
|
6746 |
|
|
else
|
6747 |
|
|
d = &spu_builtins[SPU_SHUFFLE_3];
|
6748 |
|
|
break;
|
6749 |
|
|
|
6750 |
|
|
case V4SImode:
|
6751 |
|
|
if (TYPE_UNSIGNED (type))
|
6752 |
|
|
d = &spu_builtins[SPU_SHUFFLE_4];
|
6753 |
|
|
else
|
6754 |
|
|
d = &spu_builtins[SPU_SHUFFLE_5];
|
6755 |
|
|
break;
|
6756 |
|
|
|
6757 |
|
|
case V2DImode:
|
6758 |
|
|
if (TYPE_UNSIGNED (type))
|
6759 |
|
|
d = &spu_builtins[SPU_SHUFFLE_6];
|
6760 |
|
|
else
|
6761 |
|
|
d = &spu_builtins[SPU_SHUFFLE_7];
|
6762 |
|
|
break;
|
6763 |
|
|
|
6764 |
|
|
case V4SFmode:
|
6765 |
|
|
d = &spu_builtins[SPU_SHUFFLE_8];
|
6766 |
|
|
break;
|
6767 |
|
|
|
6768 |
|
|
case V2DFmode:
|
6769 |
|
|
d = &spu_builtins[SPU_SHUFFLE_9];
|
6770 |
|
|
break;
|
6771 |
|
|
|
6772 |
|
|
default:
|
6773 |
|
|
return NULL_TREE;
|
6774 |
|
|
}
|
6775 |
|
|
|
6776 |
|
|
gcc_assert (d);
|
6777 |
|
|
return d->fndecl;
|
6778 |
|
|
}
|
6779 |
|
|
|
6780 |
|
|
/* Return the appropriate mode for a named address pointer. */
|
6781 |
|
|
static enum machine_mode
|
6782 |
|
|
spu_addr_space_pointer_mode (addr_space_t addrspace)
|
6783 |
|
|
{
|
6784 |
|
|
switch (addrspace)
|
6785 |
|
|
{
|
6786 |
|
|
case ADDR_SPACE_GENERIC:
|
6787 |
|
|
return ptr_mode;
|
6788 |
|
|
case ADDR_SPACE_EA:
|
6789 |
|
|
return EAmode;
|
6790 |
|
|
default:
|
6791 |
|
|
gcc_unreachable ();
|
6792 |
|
|
}
|
6793 |
|
|
}
|
6794 |
|
|
|
6795 |
|
|
/* Return the appropriate mode for a named address address. */
|
6796 |
|
|
static enum machine_mode
|
6797 |
|
|
spu_addr_space_address_mode (addr_space_t addrspace)
|
6798 |
|
|
{
|
6799 |
|
|
switch (addrspace)
|
6800 |
|
|
{
|
6801 |
|
|
case ADDR_SPACE_GENERIC:
|
6802 |
|
|
return Pmode;
|
6803 |
|
|
case ADDR_SPACE_EA:
|
6804 |
|
|
return EAmode;
|
6805 |
|
|
default:
|
6806 |
|
|
gcc_unreachable ();
|
6807 |
|
|
}
|
6808 |
|
|
}
|
6809 |
|
|
|
6810 |
|
|
/* Determine if one named address space is a subset of another. */
|
6811 |
|
|
|
6812 |
|
|
static bool
|
6813 |
|
|
spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
|
6814 |
|
|
{
|
6815 |
|
|
gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
|
6816 |
|
|
gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
|
6817 |
|
|
|
6818 |
|
|
if (subset == superset)
|
6819 |
|
|
return true;
|
6820 |
|
|
|
6821 |
|
|
/* If we have -mno-address-space-conversion, treat __ea and generic as not
|
6822 |
|
|
being subsets but instead as disjoint address spaces. */
|
6823 |
|
|
else if (!TARGET_ADDRESS_SPACE_CONVERSION)
|
6824 |
|
|
return false;
|
6825 |
|
|
|
6826 |
|
|
else
|
6827 |
|
|
return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
|
6828 |
|
|
}
|
6829 |
|
|
|
6830 |
|
|
/* Convert from one address space to another. */
|
6831 |
|
|
static rtx
|
6832 |
|
|
spu_addr_space_convert (rtx op, tree from_type, tree to_type)
|
6833 |
|
|
{
|
6834 |
|
|
addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
|
6835 |
|
|
addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
|
6836 |
|
|
|
6837 |
|
|
gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
|
6838 |
|
|
gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
|
6839 |
|
|
|
6840 |
|
|
if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
|
6841 |
|
|
{
|
6842 |
|
|
rtx result, ls;
|
6843 |
|
|
|
6844 |
|
|
ls = gen_const_mem (DImode,
|
6845 |
|
|
gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
|
6846 |
|
|
set_mem_align (ls, 128);
|
6847 |
|
|
|
6848 |
|
|
result = gen_reg_rtx (Pmode);
|
6849 |
|
|
ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
|
6850 |
|
|
op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
|
6851 |
|
|
ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
|
6852 |
|
|
ls, const0_rtx, Pmode, 1);
|
6853 |
|
|
|
6854 |
|
|
emit_insn (gen_subsi3 (result, op, ls));
|
6855 |
|
|
|
6856 |
|
|
return result;
|
6857 |
|
|
}
|
6858 |
|
|
|
6859 |
|
|
else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
|
6860 |
|
|
{
|
6861 |
|
|
rtx result, ls;
|
6862 |
|
|
|
6863 |
|
|
ls = gen_const_mem (DImode,
|
6864 |
|
|
gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
|
6865 |
|
|
set_mem_align (ls, 128);
|
6866 |
|
|
|
6867 |
|
|
result = gen_reg_rtx (EAmode);
|
6868 |
|
|
ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
|
6869 |
|
|
op = force_reg (Pmode, op);
|
6870 |
|
|
ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
|
6871 |
|
|
ls, const0_rtx, EAmode, 1);
|
6872 |
|
|
op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
|
6873 |
|
|
|
6874 |
|
|
if (EAmode == SImode)
|
6875 |
|
|
emit_insn (gen_addsi3 (result, op, ls));
|
6876 |
|
|
else
|
6877 |
|
|
emit_insn (gen_adddi3 (result, op, ls));
|
6878 |
|
|
|
6879 |
|
|
return result;
|
6880 |
|
|
}
|
6881 |
|
|
|
6882 |
|
|
else
|
6883 |
|
|
gcc_unreachable ();
|
6884 |
|
|
}
|
6885 |
|
|
|
6886 |
|
|
|
6887 |
|
|
/* Count the total number of instructions in each pipe and return the
|
6888 |
|
|
maximum, which is used as the Minimum Iteration Interval (MII)
|
6889 |
|
|
in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
|
6890 |
|
|
-2 are instructions that can go in pipe0 or pipe1. */
|
6891 |
|
|
static int
|
6892 |
|
|
spu_sms_res_mii (struct ddg *g)
|
6893 |
|
|
{
|
6894 |
|
|
int i;
|
6895 |
|
|
unsigned t[4] = {0, 0, 0, 0};
|
6896 |
|
|
|
6897 |
|
|
for (i = 0; i < g->num_nodes; i++)
|
6898 |
|
|
{
|
6899 |
|
|
rtx insn = g->nodes[i].insn;
|
6900 |
|
|
int p = get_pipe (insn) + 2;
|
6901 |
|
|
|
6902 |
|
|
assert (p >= 0);
|
6903 |
|
|
assert (p < 4);
|
6904 |
|
|
|
6905 |
|
|
t[p]++;
|
6906 |
|
|
if (dump_file && INSN_P (insn))
|
6907 |
|
|
fprintf (dump_file, "i%d %s %d %d\n",
|
6908 |
|
|
INSN_UID (insn),
|
6909 |
|
|
insn_data[INSN_CODE(insn)].name,
|
6910 |
|
|
p, t[p]);
|
6911 |
|
|
}
|
6912 |
|
|
if (dump_file)
|
6913 |
|
|
fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
|
6914 |
|
|
|
6915 |
|
|
return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
|
6916 |
|
|
}
|
6917 |
|
|
|
6918 |
|
|
|
6919 |
|
|
void
|
6920 |
|
|
spu_init_expanders (void)
|
6921 |
|
|
{
|
6922 |
|
|
if (cfun)
|
6923 |
|
|
{
|
6924 |
|
|
rtx r0, r1;
|
6925 |
|
|
/* HARD_FRAME_REGISTER is only 128 bit aligned when
|
6926 |
|
|
frame_pointer_needed is true. We don't know that until we're
|
6927 |
|
|
expanding the prologue. */
|
6928 |
|
|
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
|
6929 |
|
|
|
6930 |
|
|
/* A number of passes use LAST_VIRTUAL_REGISTER+1 and
|
6931 |
|
|
LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
|
6932 |
|
|
to be treated as aligned, so generate them here. */
|
6933 |
|
|
r0 = gen_reg_rtx (SImode);
|
6934 |
|
|
r1 = gen_reg_rtx (SImode);
|
6935 |
|
|
mark_reg_pointer (r0, 128);
|
6936 |
|
|
mark_reg_pointer (r1, 128);
|
6937 |
|
|
gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
|
6938 |
|
|
&& REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
|
6939 |
|
|
}
|
6940 |
|
|
}
|
6941 |
|
|
|
6942 |
|
|
static enum machine_mode
|
6943 |
|
|
spu_libgcc_cmp_return_mode (void)
|
6944 |
|
|
{
|
6945 |
|
|
|
6946 |
|
|
/* For SPU word mode is TI mode so it is better to use SImode
|
6947 |
|
|
for compare returns. */
|
6948 |
|
|
return SImode;
|
6949 |
|
|
}
|
6950 |
|
|
|
6951 |
|
|
static enum machine_mode
|
6952 |
|
|
spu_libgcc_shift_count_mode (void)
|
6953 |
|
|
{
|
6954 |
|
|
/* For SPU word mode is TI mode so it is better to use SImode
|
6955 |
|
|
for shift counts. */
|
6956 |
|
|
return SImode;
|
6957 |
|
|
}
|
6958 |
|
|
|
6959 |
|
|
/* An early place to adjust some flags after GCC has finished processing
|
6960 |
|
|
* them. */
|
6961 |
|
|
static void
|
6962 |
|
|
asm_file_start (void)
|
6963 |
|
|
{
|
6964 |
|
|
/* Variable tracking should be run after all optimizations which
|
6965 |
|
|
change order of insns. It also needs a valid CFG. */
|
6966 |
|
|
spu_flag_var_tracking = flag_var_tracking;
|
6967 |
|
|
flag_var_tracking = 0;
|
6968 |
|
|
|
6969 |
|
|
default_file_start ();
|
6970 |
|
|
}
|
6971 |
|
|
|
6972 |
|
|
/* Implement targetm.section_type_flags. */
|
6973 |
|
|
static unsigned int
|
6974 |
|
|
spu_section_type_flags (tree decl, const char *name, int reloc)
|
6975 |
|
|
{
|
6976 |
|
|
/* .toe needs to have type @nobits. */
|
6977 |
|
|
if (strcmp (name, ".toe") == 0)
|
6978 |
|
|
return SECTION_BSS;
|
6979 |
|
|
/* Don't load _ea into the current address space. */
|
6980 |
|
|
if (strcmp (name, "._ea") == 0)
|
6981 |
|
|
return SECTION_WRITE | SECTION_DEBUG;
|
6982 |
|
|
return default_section_type_flags (decl, name, reloc);
|
6983 |
|
|
}
|
6984 |
|
|
|
6985 |
|
|
/* Implement targetm.select_section. */
|
6986 |
|
|
static section *
|
6987 |
|
|
spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
|
6988 |
|
|
{
|
6989 |
|
|
/* Variables and constants defined in the __ea address space
|
6990 |
|
|
go into a special section named "._ea". */
|
6991 |
|
|
if (TREE_TYPE (decl) != error_mark_node
|
6992 |
|
|
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
|
6993 |
|
|
{
|
6994 |
|
|
/* We might get called with string constants, but get_named_section
|
6995 |
|
|
doesn't like them as they are not DECLs. Also, we need to set
|
6996 |
|
|
flags in that case. */
|
6997 |
|
|
if (!DECL_P (decl))
|
6998 |
|
|
return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
|
6999 |
|
|
|
7000 |
|
|
return get_named_section (decl, "._ea", reloc);
|
7001 |
|
|
}
|
7002 |
|
|
|
7003 |
|
|
return default_elf_select_section (decl, reloc, align);
|
7004 |
|
|
}
|
7005 |
|
|
|
7006 |
|
|
/* Implement targetm.unique_section. */
|
7007 |
|
|
static void
|
7008 |
|
|
spu_unique_section (tree decl, int reloc)
|
7009 |
|
|
{
|
7010 |
|
|
/* We don't support unique section names in the __ea address
|
7011 |
|
|
space for now. */
|
7012 |
|
|
if (TREE_TYPE (decl) != error_mark_node
|
7013 |
|
|
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
|
7014 |
|
|
return;
|
7015 |
|
|
|
7016 |
|
|
default_unique_section (decl, reloc);
|
7017 |
|
|
}
|
7018 |
|
|
|
7019 |
|
|
/* Generate a constant or register which contains 2^SCALE. We assume
|
7020 |
|
|
the result is valid for MODE. Currently, MODE must be V4SFmode and
|
7021 |
|
|
SCALE must be SImode. */
|
7022 |
|
|
rtx
|
7023 |
|
|
spu_gen_exp2 (enum machine_mode mode, rtx scale)
|
7024 |
|
|
{
|
7025 |
|
|
gcc_assert (mode == V4SFmode);
|
7026 |
|
|
gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
|
7027 |
|
|
if (GET_CODE (scale) != CONST_INT)
|
7028 |
|
|
{
|
7029 |
|
|
/* unsigned int exp = (127 + scale) << 23;
|
7030 |
|
|
__vector float m = (__vector float) spu_splats (exp); */
|
7031 |
|
|
rtx reg = force_reg (SImode, scale);
|
7032 |
|
|
rtx exp = gen_reg_rtx (SImode);
|
7033 |
|
|
rtx mul = gen_reg_rtx (mode);
|
7034 |
|
|
emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
|
7035 |
|
|
emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
|
7036 |
|
|
emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
|
7037 |
|
|
return mul;
|
7038 |
|
|
}
|
7039 |
|
|
else
|
7040 |
|
|
{
|
7041 |
|
|
HOST_WIDE_INT exp = 127 + INTVAL (scale);
|
7042 |
|
|
unsigned char arr[16];
|
7043 |
|
|
arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
|
7044 |
|
|
arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
|
7045 |
|
|
arr[2] = arr[6] = arr[10] = arr[14] = 0;
|
7046 |
|
|
arr[3] = arr[7] = arr[11] = arr[15] = 0;
|
7047 |
|
|
return array_to_constant (mode, arr);
|
7048 |
|
|
}
|
7049 |
|
|
}
|
7050 |
|
|
|
7051 |
|
|
/* After reload, just change the convert into a move instruction
|
7052 |
|
|
or a dead instruction. */
|
7053 |
|
|
void
|
7054 |
|
|
spu_split_convert (rtx ops[])
|
7055 |
|
|
{
|
7056 |
|
|
if (REGNO (ops[0]) == REGNO (ops[1]))
|
7057 |
|
|
emit_note (NOTE_INSN_DELETED);
|
7058 |
|
|
else
|
7059 |
|
|
{
|
7060 |
|
|
/* Use TImode always as this might help hard reg copyprop. */
|
7061 |
|
|
rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
|
7062 |
|
|
rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
|
7063 |
|
|
emit_insn (gen_move_insn (op0, op1));
|
7064 |
|
|
}
|
7065 |
|
|
}
|
7066 |
|
|
|
7067 |
|
|
void
|
7068 |
|
|
spu_function_profiler (FILE * file, int labelno)
|
7069 |
|
|
{
|
7070 |
|
|
fprintf (file, "# profile\n");
|
7071 |
|
|
fprintf (file, "brsl $75, _mcount\n");
|
7072 |
|
|
}
|
7073 |
|
|
|
7074 |
|
|
#include "gt-spu.h"
|