OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /openrisc/tags/gnu-src/gcc-4.5.1/gcc-4.5.1-or32-1.0rc2/gcc/config/spu
    from Rev 378 to Rev 384
    Reverse comparison

Rev 378 → Rev 384

/spu.c
0,0 → 1,7074
/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
 
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
 
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
 
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "insn-attr.h"
#include "flags.h"
#include "recog.h"
#include "obstack.h"
#include "tree.h"
#include "expr.h"
#include "optabs.h"
#include "except.h"
#include "function.h"
#include "output.h"
#include "basic-block.h"
#include "integrate.h"
#include "toplev.h"
#include "ggc.h"
#include "hashtab.h"
#include "tm_p.h"
#include "target.h"
#include "target-def.h"
#include "langhooks.h"
#include "reload.h"
#include "cfglayout.h"
#include "sched-int.h"
#include "params.h"
#include "assert.h"
#include "machmode.h"
#include "gimple.h"
#include "tm-constrs.h"
#include "ddg.h"
#include "sbitmap.h"
#include "timevar.h"
#include "df.h"
 
/* Builtin types, data and prototypes. */
 
enum spu_builtin_type_index
{
SPU_BTI_END_OF_PARAMS,
 
/* We create new type nodes for these. */
SPU_BTI_V16QI,
SPU_BTI_V8HI,
SPU_BTI_V4SI,
SPU_BTI_V2DI,
SPU_BTI_V4SF,
SPU_BTI_V2DF,
SPU_BTI_UV16QI,
SPU_BTI_UV8HI,
SPU_BTI_UV4SI,
SPU_BTI_UV2DI,
 
/* A 16-byte type. (Implemented with V16QI_type_node) */
SPU_BTI_QUADWORD,
 
/* These all correspond to intSI_type_node */
SPU_BTI_7,
SPU_BTI_S7,
SPU_BTI_U7,
SPU_BTI_S10,
SPU_BTI_S10_4,
SPU_BTI_U14,
SPU_BTI_16,
SPU_BTI_S16,
SPU_BTI_S16_2,
SPU_BTI_U16,
SPU_BTI_U16_2,
SPU_BTI_U18,
 
/* These correspond to the standard types */
SPU_BTI_INTQI,
SPU_BTI_INTHI,
SPU_BTI_INTSI,
SPU_BTI_INTDI,
 
SPU_BTI_UINTQI,
SPU_BTI_UINTHI,
SPU_BTI_UINTSI,
SPU_BTI_UINTDI,
 
SPU_BTI_FLOAT,
SPU_BTI_DOUBLE,
 
SPU_BTI_VOID,
SPU_BTI_PTR,
 
SPU_BTI_MAX
};
 
#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
 
static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
 
struct spu_builtin_range
{
int low, high;
};
 
static struct spu_builtin_range spu_builtin_range[] = {
{-0x40ll, 0x7fll}, /* SPU_BTI_7 */
{-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
{0ll, 0x7fll}, /* SPU_BTI_U7 */
{-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
{-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
{0ll, 0x3fffll}, /* SPU_BTI_U14 */
{-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
{-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
{-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
{0ll, 0xffffll}, /* SPU_BTI_U16 */
{0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
{0ll, 0x3ffffll}, /* SPU_BTI_U18 */
};
 
/* Target specific attribute specifications. */
char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
 
/* Prototypes and external defs. */
static void spu_init_builtins (void);
static tree spu_builtin_decl (unsigned, bool);
static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
bool, addr_space_t);
static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
static rtx get_pic_reg (void);
static int need_to_save_reg (int regno, int saving);
static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
rtx scratch);
static void emit_nop_for_insn (rtx insn);
static bool insn_clobbers_hbr (rtx insn);
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
int distance, sbitmap blocks);
static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
enum machine_mode dmode);
static rtx get_branch_target (rtx branch);
static void spu_machine_dependent_reorg (void);
static int spu_sched_issue_rate (void);
static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
int can_issue_more);
static int get_pipe (rtx insn);
static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
static void spu_sched_init_global (FILE *, int, int);
static void spu_sched_init (FILE *, int, int);
static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
int flags,
unsigned char *no_add_attrs);
static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
int flags,
unsigned char *no_add_attrs);
static int spu_naked_function_p (tree func);
static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
const_tree type, unsigned char named);
static tree spu_build_builtin_va_list (void);
static void spu_va_start (tree, rtx);
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
gimple_seq * pre_p, gimple_seq * post_p);
static int store_with_one_insn_p (rtx mem);
static int mem_is_padded_component_ref (rtx x);
static int reg_aligned_for_addr (rtx x);
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
static void spu_asm_globalize_label (FILE * file, const char *name);
static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
int *total, bool speed);
static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
static void spu_init_libfuncs (void);
static bool spu_return_in_memory (const_tree type, const_tree fntype);
static void fix_range (const char *);
static void spu_encode_section_info (tree, rtx, int);
static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
addr_space_t);
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
static int spu_builtin_vectorization_cost (bool);
static bool spu_vector_alignment_reachable (const_tree, bool);
static tree spu_builtin_vec_perm (tree, tree *);
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
static enum machine_mode spu_addr_space_address_mode (addr_space_t);
static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
static rtx spu_addr_space_convert (rtx, tree, tree);
static int spu_sms_res_mii (struct ddg *g);
static void asm_file_start (void);
static unsigned int spu_section_type_flags (tree, const char *, int);
static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
static void spu_unique_section (tree, int);
static rtx spu_expand_load (rtx, rtx, rtx, int);
static void spu_trampoline_init (rtx, tree, rtx);
 
extern const char *reg_names[];
 
/* Which instruction set architecture to use. */
int spu_arch;
/* Which cpu are we tuning for. */
int spu_tune;
 
/* The hardware requires 8 insns between a hint and the branch it
effects. This variable describes how many rtl instructions the
compiler needs to see before inserting a hint, and then the compiler
will insert enough nops to make it at least 8 insns. The default is
for the compiler to allow up to 2 nops be emitted. The nops are
inserted in pairs, so we round down. */
int spu_hint_dist = (8*4) - (2*4);
 
/* Determines whether we run variable tracking in machine dependent
reorganization. */
static int spu_flag_var_tracking;
 
enum spu_immediate {
SPU_NONE,
SPU_IL,
SPU_ILA,
SPU_ILH,
SPU_ILHU,
SPU_ORI,
SPU_ORHI,
SPU_ORBI,
SPU_IOHL
};
enum immediate_class
{
IC_POOL, /* constant pool */
IC_IL1, /* one il* instruction */
IC_IL2, /* both ilhu and iohl instructions */
IC_IL1s, /* one il* instruction */
IC_IL2s, /* both ilhu and iohl instructions */
IC_FSMBI, /* the fsmbi instruction */
IC_CPAT, /* one of the c*d instructions */
IC_FSMBI2 /* fsmbi plus 1 other instruction */
};
 
static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
static enum immediate_class classify_immediate (rtx op,
enum machine_mode mode);
 
static enum machine_mode spu_unwind_word_mode (void);
 
static enum machine_mode
spu_libgcc_cmp_return_mode (void);
 
static enum machine_mode
spu_libgcc_shift_count_mode (void);
 
/* Pointer mode for __ea references. */
#define EAmode (spu_ea_model != 32 ? DImode : SImode)
 
/* Table of machine attributes. */
static const struct attribute_spec spu_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
{ "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
{ "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
{ NULL, 0, 0, false, false, false, NULL }
};
/* TARGET overrides. */
 
#undef TARGET_ADDR_SPACE_POINTER_MODE
#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
 
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
 
#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
spu_addr_space_legitimate_address_p
 
#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
 
#undef TARGET_ADDR_SPACE_SUBSET_P
#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
 
#undef TARGET_ADDR_SPACE_CONVERT
#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
 
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS spu_init_builtins
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL spu_builtin_decl
 
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN spu_expand_builtin
 
#undef TARGET_UNWIND_WORD_MODE
#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
 
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
 
/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
and .quad for the debugger. When it is known that the assembler is fixed,
these can be removed. */
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
 
#undef TARGET_ASM_ALIGNED_DI_OP
#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
 
/* The .8byte directive doesn't seem to work well for a 32 bit
architecture. */
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP NULL
 
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS spu_rtx_costs
 
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
 
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
 
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
 
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT spu_sched_init
 
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
 
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER spu_sched_reorder
 
#undef TARGET_SCHED_REORDER2
#define TARGET_SCHED_REORDER2 spu_sched_reorder
 
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
 
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
 
#undef TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER spu_assemble_integer
 
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
 
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
 
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
 
#undef TARGET_ASM_GLOBALIZE_LABEL
#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
 
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
 
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
 
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
 
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
 
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
 
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
 
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
 
#undef TARGET_DEFAULT_TARGET_FLAGS
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
 
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
 
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
 
#undef TARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
 
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
 
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
 
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
 
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
 
#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
 
#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
 
#undef TARGET_LIBGCC_CMP_RETURN_MODE
#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
 
#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
 
#undef TARGET_SCHED_SMS_RES_MII
#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
 
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START asm_file_start
 
#undef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
 
#undef TARGET_ASM_SELECT_SECTION
#define TARGET_ASM_SELECT_SECTION spu_select_section
 
#undef TARGET_ASM_UNIQUE_SECTION
#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
 
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
 
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
 
struct gcc_target targetm = TARGET_INITIALIZER;
 
void
spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
{
/* Override some of the default param values. With so many registers
larger values are better for these params. */
MAX_PENDING_LIST_LENGTH = 128;
 
/* With so many registers this is better on by default. */
flag_rename_registers = 1;
}
 
/* Sometimes certain combinations of command options do not make sense
on a particular target machine. You can define a macro
OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
executed once just after all the command options have been parsed. */
void
spu_override_options (void)
{
/* Small loops will be unpeeled at -O3. For SPU it is more important
to keep code small by default. */
if (!flag_unroll_loops && !flag_peel_loops
&& !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
 
flag_omit_frame_pointer = 1;
 
/* Functions must be 8 byte aligned so we correctly handle dual issue */
if (align_functions < 8)
align_functions = 8;
 
spu_hint_dist = 8*4 - spu_max_nops*4;
if (spu_hint_dist < 0)
spu_hint_dist = 0;
 
if (spu_fixed_range_string)
fix_range (spu_fixed_range_string);
 
/* Determine processor architectural level. */
if (spu_arch_string)
{
if (strcmp (&spu_arch_string[0], "cell") == 0)
spu_arch = PROCESSOR_CELL;
else if (strcmp (&spu_arch_string[0], "celledp") == 0)
spu_arch = PROCESSOR_CELLEDP;
else
error ("Unknown architecture '%s'", &spu_arch_string[0]);
}
 
/* Determine processor to tune for. */
if (spu_tune_string)
{
if (strcmp (&spu_tune_string[0], "cell") == 0)
spu_tune = PROCESSOR_CELL;
else if (strcmp (&spu_tune_string[0], "celledp") == 0)
spu_tune = PROCESSOR_CELLEDP;
else
error ("Unknown architecture '%s'", &spu_tune_string[0]);
}
 
/* Change defaults according to the processor architecture. */
if (spu_arch == PROCESSOR_CELLEDP)
{
/* If no command line option has been otherwise specified, change
the default to -mno-safe-hints on celledp -- only the original
Cell/B.E. processors require this workaround. */
if (!(target_flags_explicit & MASK_SAFE_HINTS))
target_flags &= ~MASK_SAFE_HINTS;
}
 
REAL_MODE_FORMAT (SFmode) = &spu_single_format;
}
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
struct attribute_spec.handler. */
 
/* True if MODE is valid for the target. By "valid", we mean able to
be manipulated in non-trivial ways. In particular, this means all
the arithmetic is supported. */
static bool
spu_scalar_mode_supported_p (enum machine_mode mode)
{
switch (mode)
{
case QImode:
case HImode:
case SImode:
case SFmode:
case DImode:
case TImode:
case DFmode:
return true;
 
default:
return false;
}
}
 
/* Similarly for vector modes. "Supported" here is less strict. At
least some operations are supported; need to check optabs or builtins
for further details. */
static bool
spu_vector_mode_supported_p (enum machine_mode mode)
{
switch (mode)
{
case V16QImode:
case V8HImode:
case V4SImode:
case V2DImode:
case V4SFmode:
case V2DFmode:
return true;
 
default:
return false;
}
}
 
/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
least significant bytes of the outer mode. This function returns
TRUE for the SUBREG's where this is correct. */
int
valid_subreg (rtx op)
{
enum machine_mode om = GET_MODE (op);
enum machine_mode im = GET_MODE (SUBREG_REG (op));
return om != VOIDmode && im != VOIDmode
&& (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
}
 
/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
and adjust the start offset. */
static rtx
adjust_operand (rtx op, HOST_WIDE_INT * start)
{
enum machine_mode mode;
int op_size;
/* Strip any paradoxical SUBREG. */
if (GET_CODE (op) == SUBREG
&& (GET_MODE_BITSIZE (GET_MODE (op))
> GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
{
if (start)
*start -=
GET_MODE_BITSIZE (GET_MODE (op)) -
GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
op = SUBREG_REG (op);
}
/* If it is smaller than SI, assure a SUBREG */
op_size = GET_MODE_BITSIZE (GET_MODE (op));
if (op_size < 32)
{
if (start)
*start += 32 - op_size;
op_size = 32;
}
/* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
mode = mode_for_size (op_size, MODE_INT, 0);
if (mode != GET_MODE (op))
op = gen_rtx_SUBREG (mode, op, 0);
return op;
}
 
void
spu_expand_extv (rtx ops[], int unsignedp)
{
rtx dst = ops[0], src = ops[1];
HOST_WIDE_INT width = INTVAL (ops[2]);
HOST_WIDE_INT start = INTVAL (ops[3]);
HOST_WIDE_INT align_mask;
rtx s0, s1, mask, r0;
 
gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
 
if (MEM_P (src))
{
/* First, determine if we need 1 TImode load or 2. We need only 1
if the bits being extracted do not cross the alignment boundary
as determined by the MEM and its address. */
 
align_mask = -MEM_ALIGN (src);
if ((start & align_mask) == ((start + width - 1) & align_mask))
{
/* Alignment is sufficient for 1 load. */
s0 = gen_reg_rtx (TImode);
r0 = spu_expand_load (s0, 0, src, start / 8);
start &= 7;
if (r0)
emit_insn (gen_rotqby_ti (s0, s0, r0));
}
else
{
/* Need 2 loads. */
s0 = gen_reg_rtx (TImode);
s1 = gen_reg_rtx (TImode);
r0 = spu_expand_load (s0, s1, src, start / 8);
start &= 7;
 
gcc_assert (start + width <= 128);
if (r0)
{
rtx r1 = gen_reg_rtx (SImode);
mask = gen_reg_rtx (TImode);
emit_move_insn (mask, GEN_INT (-1));
emit_insn (gen_rotqby_ti (s0, s0, r0));
emit_insn (gen_rotqby_ti (s1, s1, r0));
if (GET_CODE (r0) == CONST_INT)
r1 = GEN_INT (INTVAL (r0) & 15);
else
emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
emit_insn (gen_shlqby_ti (mask, mask, r1));
emit_insn (gen_selb (s0, s1, s0, mask));
}
}
 
}
else if (GET_CODE (src) == SUBREG)
{
rtx r = SUBREG_REG (src);
gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
s0 = gen_reg_rtx (TImode);
if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
else
emit_move_insn (s0, src);
}
else
{
gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
s0 = gen_reg_rtx (TImode);
emit_move_insn (s0, src);
}
 
/* Now s0 is TImode and contains the bits to extract at start. */
 
if (start)
emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
 
if (128 - width)
{
tree c = build_int_cst (NULL_TREE, 128 - width);
s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
}
 
emit_move_insn (dst, s0);
}
 
void
spu_expand_insv (rtx ops[])
{
HOST_WIDE_INT width = INTVAL (ops[1]);
HOST_WIDE_INT start = INTVAL (ops[2]);
HOST_WIDE_INT maskbits;
enum machine_mode dst_mode, src_mode;
rtx dst = ops[0], src = ops[3];
int dst_size, src_size;
rtx mask;
rtx shift_reg;
int shift;
 
 
if (GET_CODE (ops[0]) == MEM)
dst = gen_reg_rtx (TImode);
else
dst = adjust_operand (dst, &start);
dst_mode = GET_MODE (dst);
dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
 
if (CONSTANT_P (src))
{
enum machine_mode m =
(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
src = force_reg (m, convert_to_mode (m, src, 0));
}
src = adjust_operand (src, 0);
src_mode = GET_MODE (src);
src_size = GET_MODE_BITSIZE (GET_MODE (src));
 
mask = gen_reg_rtx (dst_mode);
shift_reg = gen_reg_rtx (dst_mode);
shift = dst_size - start - width;
 
/* It's not safe to use subreg here because the compiler assumes
that the SUBREG_REG is right justified in the SUBREG. */
convert_move (shift_reg, src, 1);
 
if (shift > 0)
{
switch (dst_mode)
{
case SImode:
emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
break;
case DImode:
emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
break;
case TImode:
emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
break;
default:
abort ();
}
}
else if (shift < 0)
abort ();
 
switch (dst_size)
{
case 32:
maskbits = (-1ll << (32 - width - start));
if (start)
maskbits += (1ll << (32 - start));
emit_move_insn (mask, GEN_INT (maskbits));
break;
case 64:
maskbits = (-1ll << (64 - width - start));
if (start)
maskbits += (1ll << (64 - start));
emit_move_insn (mask, GEN_INT (maskbits));
break;
case 128:
{
unsigned char arr[16];
int i = start / 8;
memset (arr, 0, sizeof (arr));
arr[i] = 0xff >> (start & 7);
for (i++; i <= (start + width - 1) / 8; i++)
arr[i] = 0xff;
arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
emit_move_insn (mask, array_to_constant (TImode, arr));
}
break;
default:
abort ();
}
if (GET_CODE (ops[0]) == MEM)
{
rtx low = gen_reg_rtx (SImode);
rtx rotl = gen_reg_rtx (SImode);
rtx mask0 = gen_reg_rtx (TImode);
rtx addr;
rtx addr0;
rtx addr1;
rtx mem;
 
addr = force_reg (Pmode, XEXP (ops[0], 0));
addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
emit_insn (gen_negsi2 (rotl, low));
emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
mem = change_address (ops[0], TImode, addr0);
set_mem_alias_set (mem, 0);
emit_move_insn (dst, mem);
emit_insn (gen_selb (dst, dst, shift_reg, mask0));
if (start + width > MEM_ALIGN (ops[0]))
{
rtx shl = gen_reg_rtx (SImode);
rtx mask1 = gen_reg_rtx (TImode);
rtx dst1 = gen_reg_rtx (TImode);
rtx mem1;
addr1 = plus_constant (addr, 16);
addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
emit_insn (gen_shlqby_ti (mask1, mask, shl));
mem1 = change_address (ops[0], TImode, addr1);
set_mem_alias_set (mem1, 0);
emit_move_insn (dst1, mem1);
emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
emit_move_insn (mem1, dst1);
}
emit_move_insn (mem, dst);
}
else
emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
}
 
 
int
spu_expand_block_move (rtx ops[])
{
HOST_WIDE_INT bytes, align, offset;
rtx src, dst, sreg, dreg, target;
int i;
if (GET_CODE (ops[2]) != CONST_INT
|| GET_CODE (ops[3]) != CONST_INT
|| INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
return 0;
 
bytes = INTVAL (ops[2]);
align = INTVAL (ops[3]);
 
if (bytes <= 0)
return 1;
 
dst = ops[0];
src = ops[1];
 
if (align == 16)
{
for (offset = 0; offset + 16 <= bytes; offset += 16)
{
dst = adjust_address (ops[0], V16QImode, offset);
src = adjust_address (ops[1], V16QImode, offset);
emit_move_insn (dst, src);
}
if (offset < bytes)
{
rtx mask;
unsigned char arr[16] = { 0 };
for (i = 0; i < bytes - offset; i++)
arr[i] = 0xff;
dst = adjust_address (ops[0], V16QImode, offset);
src = adjust_address (ops[1], V16QImode, offset);
mask = gen_reg_rtx (V16QImode);
sreg = gen_reg_rtx (V16QImode);
dreg = gen_reg_rtx (V16QImode);
target = gen_reg_rtx (V16QImode);
emit_move_insn (mask, array_to_constant (V16QImode, arr));
emit_move_insn (dreg, dst);
emit_move_insn (sreg, src);
emit_insn (gen_selb (target, dreg, sreg, mask));
emit_move_insn (dst, target);
}
return 1;
}
return 0;
}
 
enum spu_comp_code
{ SPU_EQ, SPU_GT, SPU_GTU };
 
int spu_comp_icode[12][3] = {
{CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
{CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
{CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
{CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
{CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
{CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
{CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
{CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
{CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
{CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
{CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
{CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
};
 
/* Generate a compare for CODE. Return a brand-new rtx that represents
the result of the compare. GCC can figure this out too if we don't
provide all variations of compares, but GCC always wants to use
WORD_MODE, we can generate better code in most cases if we do it
ourselves. */
void
spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
{
int reverse_compare = 0;
int reverse_test = 0;
rtx compare_result, eq_result;
rtx comp_rtx, eq_rtx;
enum machine_mode comp_mode;
enum machine_mode op_mode;
enum spu_comp_code scode, eq_code;
enum insn_code ior_code;
enum rtx_code code = GET_CODE (cmp);
rtx op0 = XEXP (cmp, 0);
rtx op1 = XEXP (cmp, 1);
int index;
int eq_test = 0;
 
/* When op1 is a CONST_INT change (X >= C) to (X > C-1),
and so on, to keep the constant in operand 1. */
if (GET_CODE (op1) == CONST_INT)
{
HOST_WIDE_INT val = INTVAL (op1) - 1;
if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
switch (code)
{
case GE:
op1 = GEN_INT (val);
code = GT;
break;
case LT:
op1 = GEN_INT (val);
code = LE;
break;
case GEU:
op1 = GEN_INT (val);
code = GTU;
break;
case LTU:
op1 = GEN_INT (val);
code = LEU;
break;
default:
break;
}
}
 
comp_mode = SImode;
op_mode = GET_MODE (op0);
 
switch (code)
{
case GE:
scode = SPU_GT;
if (HONOR_NANS (op_mode))
{
reverse_compare = 0;
reverse_test = 0;
eq_test = 1;
eq_code = SPU_EQ;
}
else
{
reverse_compare = 1;
reverse_test = 1;
}
break;
case LE:
scode = SPU_GT;
if (HONOR_NANS (op_mode))
{
reverse_compare = 1;
reverse_test = 0;
eq_test = 1;
eq_code = SPU_EQ;
}
else
{
reverse_compare = 0;
reverse_test = 1;
}
break;
case LT:
reverse_compare = 1;
reverse_test = 0;
scode = SPU_GT;
break;
case GEU:
reverse_compare = 1;
reverse_test = 1;
scode = SPU_GTU;
break;
case LEU:
reverse_compare = 0;
reverse_test = 1;
scode = SPU_GTU;
break;
case LTU:
reverse_compare = 1;
reverse_test = 0;
scode = SPU_GTU;
break;
case NE:
reverse_compare = 0;
reverse_test = 1;
scode = SPU_EQ;
break;
 
case EQ:
scode = SPU_EQ;
break;
case GT:
scode = SPU_GT;
break;
case GTU:
scode = SPU_GTU;
break;
default:
scode = SPU_EQ;
break;
}
 
switch (op_mode)
{
case QImode:
index = 0;
comp_mode = QImode;
break;
case HImode:
index = 1;
comp_mode = HImode;
break;
case SImode:
index = 2;
break;
case DImode:
index = 3;
break;
case TImode:
index = 4;
break;
case SFmode:
index = 5;
break;
case DFmode:
index = 6;
break;
case V16QImode:
index = 7;
comp_mode = op_mode;
break;
case V8HImode:
index = 8;
comp_mode = op_mode;
break;
case V4SImode:
index = 9;
comp_mode = op_mode;
break;
case V4SFmode:
index = 10;
comp_mode = V4SImode;
break;
case V2DFmode:
index = 11;
comp_mode = V2DImode;
break;
case V2DImode:
default:
abort ();
}
 
if (GET_MODE (op1) == DFmode
&& (scode != SPU_GT && scode != SPU_EQ))
abort ();
 
if (is_set == 0 && op1 == const0_rtx
&& (GET_MODE (op0) == SImode
|| GET_MODE (op0) == HImode) && scode == SPU_EQ)
{
/* Don't need to set a register with the result when we are
comparing against zero and branching. */
reverse_test = !reverse_test;
compare_result = op0;
}
else
{
compare_result = gen_reg_rtx (comp_mode);
 
if (reverse_compare)
{
rtx t = op1;
op1 = op0;
op0 = t;
}
 
if (spu_comp_icode[index][scode] == 0)
abort ();
 
if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
(op0, op_mode))
op0 = force_reg (op_mode, op0);
if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
(op1, op_mode))
op1 = force_reg (op_mode, op1);
comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
op0, op1);
if (comp_rtx == 0)
abort ();
emit_insn (comp_rtx);
 
if (eq_test)
{
eq_result = gen_reg_rtx (comp_mode);
eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
op0, op1);
if (eq_rtx == 0)
abort ();
emit_insn (eq_rtx);
ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
gcc_assert (ior_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (ior_code)
(compare_result, compare_result, eq_result));
}
}
 
if (is_set == 0)
{
rtx bcomp;
rtx loc_ref;
 
/* We don't have branch on QI compare insns, so we convert the
QI compare result to a HI result. */
if (comp_mode == QImode)
{
rtx old_res = compare_result;
compare_result = gen_reg_rtx (HImode);
comp_mode = HImode;
emit_insn (gen_extendqihi2 (compare_result, old_res));
}
 
if (reverse_test)
bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
else
bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
 
loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
loc_ref, pc_rtx)));
}
else if (is_set == 2)
{
rtx target = operands[0];
int compare_size = GET_MODE_BITSIZE (comp_mode);
int target_size = GET_MODE_BITSIZE (GET_MODE (target));
enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
rtx select_mask;
rtx op_t = operands[2];
rtx op_f = operands[3];
 
/* The result of the comparison can be SI, HI or QI mode. Create a
mask based on that result. */
if (target_size > compare_size)
{
select_mask = gen_reg_rtx (mode);
emit_insn (gen_extend_compare (select_mask, compare_result));
}
else if (target_size < compare_size)
select_mask =
gen_rtx_SUBREG (mode, compare_result,
(compare_size - target_size) / BITS_PER_UNIT);
else if (comp_mode != mode)
select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
else
select_mask = compare_result;
 
if (GET_MODE (target) != GET_MODE (op_t)
|| GET_MODE (target) != GET_MODE (op_f))
abort ();
 
if (reverse_test)
emit_insn (gen_selb (target, op_t, op_f, select_mask));
else
emit_insn (gen_selb (target, op_f, op_t, select_mask));
}
else
{
rtx target = operands[0];
if (reverse_test)
emit_insn (gen_rtx_SET (VOIDmode, compare_result,
gen_rtx_NOT (comp_mode, compare_result)));
if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
emit_insn (gen_extendhisi2 (target, compare_result));
else if (GET_MODE (target) == SImode
&& GET_MODE (compare_result) == QImode)
emit_insn (gen_extend_compare (target, compare_result));
else
emit_move_insn (target, compare_result);
}
}
 
HOST_WIDE_INT
const_double_to_hwint (rtx x)
{
HOST_WIDE_INT val;
REAL_VALUE_TYPE rv;
if (GET_MODE (x) == SFmode)
{
REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
REAL_VALUE_TO_TARGET_SINGLE (rv, val);
}
else if (GET_MODE (x) == DFmode)
{
long l[2];
REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
val = l[0];
val = (val << 32) | (l[1] & 0xffffffff);
}
else
abort ();
return val;
}
 
rtx
hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
{
long tv[2];
REAL_VALUE_TYPE rv;
gcc_assert (mode == SFmode || mode == DFmode);
 
if (mode == SFmode)
tv[0] = (v << 32) >> 32;
else if (mode == DFmode)
{
tv[1] = (v << 32) >> 32;
tv[0] = v >> 32;
}
real_from_target (&rv, tv, mode);
return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
}
 
void
print_operand_address (FILE * file, register rtx addr)
{
rtx reg;
rtx offset;
 
if (GET_CODE (addr) == AND
&& GET_CODE (XEXP (addr, 1)) == CONST_INT
&& INTVAL (XEXP (addr, 1)) == -16)
addr = XEXP (addr, 0);
 
switch (GET_CODE (addr))
{
case REG:
fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
break;
 
case PLUS:
reg = XEXP (addr, 0);
offset = XEXP (addr, 1);
if (GET_CODE (offset) == REG)
{
fprintf (file, "%s,%s", reg_names[REGNO (reg)],
reg_names[REGNO (offset)]);
}
else if (GET_CODE (offset) == CONST_INT)
{
fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
INTVAL (offset), reg_names[REGNO (reg)]);
}
else
abort ();
break;
 
case CONST:
case LABEL_REF:
case SYMBOL_REF:
case CONST_INT:
output_addr_const (file, addr);
break;
 
default:
debug_rtx (addr);
abort ();
}
}
 
void
print_operand (FILE * file, rtx x, int code)
{
enum machine_mode mode = GET_MODE (x);
HOST_WIDE_INT val;
unsigned char arr[16];
int xcode = GET_CODE (x);
int i, info;
if (GET_MODE (x) == VOIDmode)
switch (code)
{
case 'L': /* 128 bits, signed */
case 'm': /* 128 bits, signed */
case 'T': /* 128 bits, signed */
case 't': /* 128 bits, signed */
mode = TImode;
break;
case 'K': /* 64 bits, signed */
case 'k': /* 64 bits, signed */
case 'D': /* 64 bits, signed */
case 'd': /* 64 bits, signed */
mode = DImode;
break;
case 'J': /* 32 bits, signed */
case 'j': /* 32 bits, signed */
case 's': /* 32 bits, signed */
case 'S': /* 32 bits, signed */
mode = SImode;
break;
}
switch (code)
{
 
case 'j': /* 32 bits, signed */
case 'k': /* 64 bits, signed */
case 'm': /* 128 bits, signed */
if (xcode == CONST_INT
|| xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
{
gcc_assert (logical_immediate_p (x, mode));
constant_to_array (mode, x, arr);
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
switch (which_logical_immediate (val))
{
case SPU_ORI:
break;
case SPU_ORHI:
fprintf (file, "h");
break;
case SPU_ORBI:
fprintf (file, "b");
break;
default:
gcc_unreachable();
}
}
else
gcc_unreachable();
return;
 
case 'J': /* 32 bits, signed */
case 'K': /* 64 bits, signed */
case 'L': /* 128 bits, signed */
if (xcode == CONST_INT
|| xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
{
gcc_assert (logical_immediate_p (x, mode)
|| iohl_immediate_p (x, mode));
constant_to_array (mode, x, arr);
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
switch (which_logical_immediate (val))
{
case SPU_ORI:
case SPU_IOHL:
break;
case SPU_ORHI:
val = trunc_int_for_mode (val, HImode);
break;
case SPU_ORBI:
val = trunc_int_for_mode (val, QImode);
break;
default:
gcc_unreachable();
}
fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
}
else
gcc_unreachable();
return;
 
case 't': /* 128 bits, signed */
case 'd': /* 64 bits, signed */
case 's': /* 32 bits, signed */
if (CONSTANT_P (x))
{
enum immediate_class c = classify_immediate (x, mode);
switch (c)
{
case IC_IL1:
constant_to_array (mode, x, arr);
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
switch (which_immediate_load (val))
{
case SPU_IL:
break;
case SPU_ILA:
fprintf (file, "a");
break;
case SPU_ILH:
fprintf (file, "h");
break;
case SPU_ILHU:
fprintf (file, "hu");
break;
default:
gcc_unreachable ();
}
break;
case IC_CPAT:
constant_to_array (mode, x, arr);
cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
if (info == 1)
fprintf (file, "b");
else if (info == 2)
fprintf (file, "h");
else if (info == 4)
fprintf (file, "w");
else if (info == 8)
fprintf (file, "d");
break;
case IC_IL1s:
if (xcode == CONST_VECTOR)
{
x = CONST_VECTOR_ELT (x, 0);
xcode = GET_CODE (x);
}
if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
fprintf (file, "a");
else if (xcode == HIGH)
fprintf (file, "hu");
break;
case IC_FSMBI:
case IC_FSMBI2:
case IC_IL2:
case IC_IL2s:
case IC_POOL:
abort ();
}
}
else
gcc_unreachable ();
return;
 
case 'T': /* 128 bits, signed */
case 'D': /* 64 bits, signed */
case 'S': /* 32 bits, signed */
if (CONSTANT_P (x))
{
enum immediate_class c = classify_immediate (x, mode);
switch (c)
{
case IC_IL1:
constant_to_array (mode, x, arr);
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
switch (which_immediate_load (val))
{
case SPU_IL:
case SPU_ILA:
break;
case SPU_ILH:
case SPU_ILHU:
val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
break;
default:
gcc_unreachable ();
}
fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
break;
case IC_FSMBI:
constant_to_array (mode, x, arr);
val = 0;
for (i = 0; i < 16; i++)
{
val <<= 1;
val |= arr[i] & 1;
}
print_operand (file, GEN_INT (val), 0);
break;
case IC_CPAT:
constant_to_array (mode, x, arr);
cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
break;
case IC_IL1s:
if (xcode == HIGH)
x = XEXP (x, 0);
if (GET_CODE (x) == CONST_VECTOR)
x = CONST_VECTOR_ELT (x, 0);
output_addr_const (file, x);
if (xcode == HIGH)
fprintf (file, "@h");
break;
case IC_IL2:
case IC_IL2s:
case IC_FSMBI2:
case IC_POOL:
abort ();
}
}
else
gcc_unreachable ();
return;
 
case 'C':
if (xcode == CONST_INT)
{
/* Only 4 least significant bits are relevant for generate
control word instructions. */
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
return;
}
break;
 
case 'M': /* print code for c*d */
if (GET_CODE (x) == CONST_INT)
switch (INTVAL (x))
{
case 1:
fprintf (file, "b");
break;
case 2:
fprintf (file, "h");
break;
case 4:
fprintf (file, "w");
break;
case 8:
fprintf (file, "d");
break;
default:
gcc_unreachable();
}
else
gcc_unreachable();
return;
 
case 'N': /* Negate the operand */
if (xcode == CONST_INT)
fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
else if (xcode == CONST_VECTOR)
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
-INTVAL (CONST_VECTOR_ELT (x, 0)));
return;
 
case 'I': /* enable/disable interrupts */
if (xcode == CONST_INT)
fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
return;
 
case 'b': /* branch modifiers */
if (xcode == REG)
fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
else if (COMPARISON_P (x))
fprintf (file, "%s", xcode == NE ? "n" : "");
return;
 
case 'i': /* indirect call */
if (xcode == MEM)
{
if (GET_CODE (XEXP (x, 0)) == REG)
/* Used in indirect function calls. */
fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
else
output_address (XEXP (x, 0));
}
return;
 
case 'p': /* load/store */
if (xcode == MEM)
{
x = XEXP (x, 0);
xcode = GET_CODE (x);
}
if (xcode == AND)
{
x = XEXP (x, 0);
xcode = GET_CODE (x);
}
if (xcode == REG)
fprintf (file, "d");
else if (xcode == CONST_INT)
fprintf (file, "a");
else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
fprintf (file, "r");
else if (xcode == PLUS || xcode == LO_SUM)
{
if (GET_CODE (XEXP (x, 1)) == REG)
fprintf (file, "x");
else
fprintf (file, "d");
}
return;
 
case 'e':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val &= 0x7;
output_addr_const (file, GEN_INT (val));
return;
 
case 'f':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val &= 0x1f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'g':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val &= 0x3f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'h':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val = (val >> 3) & 0x1f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'E':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val = -val;
val &= 0x7;
output_addr_const (file, GEN_INT (val));
return;
 
case 'F':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val = -val;
val &= 0x1f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'G':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val = -val;
val &= 0x3f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'H':
val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
val = -(val & -8ll);
val = (val >> 3) & 0x1f;
output_addr_const (file, GEN_INT (val));
return;
 
case 'v':
case 'w':
constant_to_array (mode, x, arr);
val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
return;
 
case 0:
if (xcode == REG)
fprintf (file, "%s", reg_names[REGNO (x)]);
else if (xcode == MEM)
output_address (XEXP (x, 0));
else if (xcode == CONST_VECTOR)
print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
else
output_addr_const (file, x);
return;
 
/* unused letters
o qr u yz
AB OPQR UVWXYZ */
default:
output_operand_lossage ("invalid %%xn code");
}
gcc_unreachable ();
}
 
extern char call_used_regs[];
 
/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
caller saved register. For leaf functions it is more efficient to
use a volatile register because we won't need to save and restore the
pic register. This routine is only valid after register allocation
is completed, so we can pick an unused register. */
static rtx
get_pic_reg (void)
{
rtx pic_reg = pic_offset_table_rtx;
if (!reload_completed && !reload_in_progress)
abort ();
if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
return pic_reg;
}
 
/* Split constant addresses to handle cases that are too large.
Add in the pic register when in PIC mode.
Split immediates that require more than 1 instruction. */
int
spu_split_immediate (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
enum immediate_class c = classify_immediate (ops[1], mode);
 
switch (c)
{
case IC_IL2:
{
unsigned char arrhi[16];
unsigned char arrlo[16];
rtx to, temp, hi, lo;
int i;
enum machine_mode imode = mode;
/* We need to do reals as ints because the constant used in the
IOR might not be a legitimate real constant. */
imode = int_mode_for_mode (mode);
constant_to_array (mode, ops[1], arrhi);
if (imode != mode)
to = simplify_gen_subreg (imode, ops[0], mode, 0);
else
to = ops[0];
temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
for (i = 0; i < 16; i += 4)
{
arrlo[i + 2] = arrhi[i + 2];
arrlo[i + 3] = arrhi[i + 3];
arrlo[i + 0] = arrlo[i + 1] = 0;
arrhi[i + 2] = arrhi[i + 3] = 0;
}
hi = array_to_constant (imode, arrhi);
lo = array_to_constant (imode, arrlo);
emit_move_insn (temp, hi);
emit_insn (gen_rtx_SET
(VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
return 1;
}
case IC_FSMBI2:
{
unsigned char arr_fsmbi[16];
unsigned char arr_andbi[16];
rtx to, reg_fsmbi, reg_and;
int i;
enum machine_mode imode = mode;
/* We need to do reals as ints because the constant used in the
* AND might not be a legitimate real constant. */
imode = int_mode_for_mode (mode);
constant_to_array (mode, ops[1], arr_fsmbi);
if (imode != mode)
to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
else
to = ops[0];
for (i = 0; i < 16; i++)
if (arr_fsmbi[i] != 0)
{
arr_andbi[0] = arr_fsmbi[i];
arr_fsmbi[i] = 0xff;
}
for (i = 1; i < 16; i++)
arr_andbi[i] = arr_andbi[0];
reg_fsmbi = array_to_constant (imode, arr_fsmbi);
reg_and = array_to_constant (imode, arr_andbi);
emit_move_insn (to, reg_fsmbi);
emit_insn (gen_rtx_SET
(VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
return 1;
}
case IC_POOL:
if (reload_in_progress || reload_completed)
{
rtx mem = force_const_mem (mode, ops[1]);
if (TARGET_LARGE_MEM)
{
rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
emit_move_insn (addr, XEXP (mem, 0));
mem = replace_equiv_address (mem, addr);
}
emit_move_insn (ops[0], mem);
return 1;
}
break;
case IC_IL1s:
case IC_IL2s:
if (reload_completed && GET_CODE (ops[1]) != HIGH)
{
if (c == IC_IL2s)
{
emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
}
else if (flag_pic)
emit_insn (gen_pic (ops[0], ops[1]));
if (flag_pic)
{
rtx pic_reg = get_pic_reg ();
emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
crtl->uses_pic_offset_table = 1;
}
return flag_pic || c == IC_IL2s;
}
break;
case IC_IL1:
case IC_FSMBI:
case IC_CPAT:
break;
}
return 0;
}
 
/* SAVING is TRUE when we are generating the actual load and store
instructions for REGNO. When determining the size of the stack
needed for saving register we must allocate enough space for the
worst case, because we don't always have the information early enough
to not allocate it. But we can at least eliminate the actual loads
and stores during the prologue/epilogue. */
static int
need_to_save_reg (int regno, int saving)
{
if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
return 1;
if (flag_pic
&& regno == PIC_OFFSET_TABLE_REGNUM
&& (!saving || crtl->uses_pic_offset_table)
&& (!saving
|| !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
return 1;
return 0;
}
 
/* This function is only correct starting with local register
allocation */
int
spu_saved_regs_size (void)
{
int reg_save_size = 0;
int regno;
 
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
if (need_to_save_reg (regno, 0))
reg_save_size += 0x10;
return reg_save_size;
}
 
static rtx
frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
{
rtx reg = gen_rtx_REG (V4SImode, regno);
rtx mem =
gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
return emit_insn (gen_movv4si (mem, reg));
}
 
static rtx
frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
{
rtx reg = gen_rtx_REG (V4SImode, regno);
rtx mem =
gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
return emit_insn (gen_movv4si (reg, mem));
}
 
/* This happens after reload, so we need to expand it. */
static rtx
frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
{
rtx insn;
if (satisfies_constraint_K (GEN_INT (imm)))
{
insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
}
else
{
emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
insn = emit_insn (gen_addsi3 (dst, src, scratch));
if (REGNO (src) == REGNO (scratch))
abort ();
}
return insn;
}
 
/* Return nonzero if this function is known to have a null epilogue. */
 
int
direct_return (void)
{
if (reload_completed)
{
if (cfun->static_chain_decl == 0
&& (spu_saved_regs_size ()
+ get_frame_size ()
+ crtl->outgoing_args_size
+ crtl->args.pretend_args_size == 0)
&& current_function_is_leaf)
return 1;
}
return 0;
}
 
/*
The stack frame looks like this:
+-------------+
| incoming |
| args |
AP -> +-------------+
| $lr save |
+-------------+
prev SP | back chain |
+-------------+
| var args |
| reg save | crtl->args.pretend_args_size bytes
+-------------+
| ... |
| saved regs | spu_saved_regs_size() bytes
FP -> +-------------+
| ... |
| vars | get_frame_size() bytes
HFP -> +-------------+
| ... |
| outgoing |
| args | crtl->outgoing_args_size bytes
+-------------+
| $lr of next |
| frame |
+-------------+
| back chain |
SP -> +-------------+
 
*/
void
spu_expand_prologue (void)
{
HOST_WIDE_INT size = get_frame_size (), offset, regno;
HOST_WIDE_INT total_size;
HOST_WIDE_INT saved_regs_size;
rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx scratch_reg_0, scratch_reg_1;
rtx insn, real;
 
if (flag_pic && optimize == 0)
crtl->uses_pic_offset_table = 1;
 
if (spu_naked_function_p (current_function_decl))
return;
 
scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
 
saved_regs_size = spu_saved_regs_size ();
total_size = size + saved_regs_size
+ crtl->outgoing_args_size
+ crtl->args.pretend_args_size;
 
if (!current_function_is_leaf
|| cfun->calls_alloca || total_size > 0)
total_size += STACK_POINTER_OFFSET;
 
/* Save this first because code after this might use the link
register as a scratch register. */
if (!current_function_is_leaf)
{
insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
RTX_FRAME_RELATED_P (insn) = 1;
}
 
if (total_size > 0)
{
offset = -crtl->args.pretend_args_size;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
if (need_to_save_reg (regno, 1))
{
offset -= 16;
insn = frame_emit_store (regno, sp_reg, offset);
RTX_FRAME_RELATED_P (insn) = 1;
}
}
 
if (flag_pic && crtl->uses_pic_offset_table)
{
rtx pic_reg = get_pic_reg ();
insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
}
 
if (total_size > 0)
{
if (flag_stack_check)
{
/* We compare against total_size-1 because
($sp >= total_size) <=> ($sp > total_size-1) */
rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
rtx size_v4si = spu_const (V4SImode, total_size - 1);
if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
{
emit_move_insn (scratch_v4si, size_v4si);
size_v4si = scratch_v4si;
}
emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
emit_insn (gen_vec_extractv4si
(scratch_reg_0, scratch_v4si, GEN_INT (1)));
emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
}
 
/* Adjust the stack pointer, and make sure scratch_reg_0 contains
the value of the previous $sp because we save it as the back
chain. */
if (total_size <= 2000)
{
/* In this case we save the back chain first. */
insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
insn =
frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
}
else
{
insn = emit_move_insn (scratch_reg_0, sp_reg);
insn =
frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
}
RTX_FRAME_RELATED_P (insn) = 1;
real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
if (total_size > 2000)
{
/* Save the back chain ptr */
insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
}
 
if (frame_pointer_needed)
{
rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
+ crtl->outgoing_args_size;
/* Set the new frame_pointer */
insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
RTX_FRAME_RELATED_P (insn) = 1;
real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
}
}
 
}
 
void
spu_expand_epilogue (bool sibcall_p)
{
int size = get_frame_size (), offset, regno;
HOST_WIDE_INT saved_regs_size, total_size;
rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx jump, scratch_reg_0;
 
if (spu_naked_function_p (current_function_decl))
return;
 
scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
 
saved_regs_size = spu_saved_regs_size ();
total_size = size + saved_regs_size
+ crtl->outgoing_args_size
+ crtl->args.pretend_args_size;
 
if (!current_function_is_leaf
|| cfun->calls_alloca || total_size > 0)
total_size += STACK_POINTER_OFFSET;
 
if (total_size > 0)
{
if (cfun->calls_alloca)
frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
else
frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
 
 
if (saved_regs_size > 0)
{
offset = -crtl->args.pretend_args_size;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
if (need_to_save_reg (regno, 1))
{
offset -= 0x10;
frame_emit_load (regno, sp_reg, offset);
}
}
}
 
if (!current_function_is_leaf)
frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
 
if (!sibcall_p)
{
emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
jump = emit_jump_insn (gen__return ());
emit_barrier_after (jump);
}
 
}
 
rtx
spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
{
if (count != 0)
return 0;
/* This is inefficient because it ends up copying to a save-register
which then gets saved even though $lr has already been saved. But
it does generate better code for leaf functions and we don't need
to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
used for __builtin_return_address anyway, so maybe we don't care if
it's inefficient. */
return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
}
 
/* Given VAL, generate a constant appropriate for MODE.
If MODE is a vector mode, every element will be VAL.
For TImode, VAL will be zero extended to 128 bits. */
rtx
spu_const (enum machine_mode mode, HOST_WIDE_INT val)
{
rtx inner;
rtvec v;
int units, i;
 
gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
|| GET_MODE_CLASS (mode) == MODE_FLOAT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
 
if (GET_MODE_CLASS (mode) == MODE_INT)
return immed_double_const (val, 0, mode);
 
/* val is the bit representation of the float */
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
return hwint_to_const_double (mode, val);
 
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
else
inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
 
units = GET_MODE_NUNITS (mode);
 
v = rtvec_alloc (units);
 
for (i = 0; i < units; ++i)
RTVEC_ELT (v, i) = inner;
 
return gen_rtx_CONST_VECTOR (mode, v);
}
 
/* Create a MODE vector constant from 4 ints. */
rtx
spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
{
unsigned char arr[16];
arr[0] = (a >> 24) & 0xff;
arr[1] = (a >> 16) & 0xff;
arr[2] = (a >> 8) & 0xff;
arr[3] = (a >> 0) & 0xff;
arr[4] = (b >> 24) & 0xff;
arr[5] = (b >> 16) & 0xff;
arr[6] = (b >> 8) & 0xff;
arr[7] = (b >> 0) & 0xff;
arr[8] = (c >> 24) & 0xff;
arr[9] = (c >> 16) & 0xff;
arr[10] = (c >> 8) & 0xff;
arr[11] = (c >> 0) & 0xff;
arr[12] = (d >> 24) & 0xff;
arr[13] = (d >> 16) & 0xff;
arr[14] = (d >> 8) & 0xff;
arr[15] = (d >> 0) & 0xff;
return array_to_constant(mode, arr);
}
/* branch hint stuff */
 
/* An array of these is used to propagate hints to predecessor blocks. */
struct spu_bb_info
{
rtx prop_jump; /* propagated from another block */
int bb_index; /* the original block. */
};
static struct spu_bb_info *spu_bb_info;
 
#define STOP_HINT_P(INSN) \
(GET_CODE(INSN) == CALL_INSN \
|| INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
|| INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
 
/* 1 when RTX is a hinted branch or its target. We keep track of
what has been hinted so the safe-hint code can test it easily. */
#define HINTED_P(RTX) \
(RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
 
/* 1 when RTX is an insn that must be scheduled on an even boundary. */
#define SCHED_ON_EVEN_P(RTX) \
(RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
 
/* Emit a nop for INSN such that the two will dual issue. This assumes
INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
We check for TImode to handle a MULTI1 insn which has dual issued its
first instruction. get_pipe returns -1 for MULTI0, inline asm, or
ADDR_VEC insns. */
static void
emit_nop_for_insn (rtx insn)
{
int p;
rtx new_insn;
p = get_pipe (insn);
if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
new_insn = emit_insn_after (gen_lnop (), insn);
else if (p == 1 && GET_MODE (insn) == TImode)
{
new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
PUT_MODE (new_insn, TImode);
PUT_MODE (insn, VOIDmode);
}
else
new_insn = emit_insn_after (gen_lnop (), insn);
recog_memoized (new_insn);
}
 
/* Insert nops in basic blocks to meet dual issue alignment
requirements. Also make sure hbrp and hint instructions are at least
one cycle apart, possibly inserting a nop. */
static void
pad_bb(void)
{
rtx insn, next_insn, prev_insn, hbr_insn = 0;
int length;
int addr;
 
/* This sets up INSN_ADDRESSES. */
shorten_branches (get_insns ());
 
/* Keep track of length added by nops. */
length = 0;
 
prev_insn = 0;
insn = get_insns ();
if (!active_insn_p (insn))
insn = next_active_insn (insn);
for (; insn; insn = next_insn)
{
next_insn = next_active_insn (insn);
if (INSN_CODE (insn) == CODE_FOR_iprefetch
|| INSN_CODE (insn) == CODE_FOR_hbr)
{
if (hbr_insn)
{
int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
int a1 = INSN_ADDRESSES (INSN_UID (insn));
if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
|| (a1 - a0 == 4))
{
prev_insn = emit_insn_before (gen_lnop (), insn);
PUT_MODE (prev_insn, GET_MODE (insn));
PUT_MODE (insn, TImode);
length += 4;
}
}
hbr_insn = insn;
}
if (INSN_CODE (insn) == CODE_FOR_blockage)
{
if (GET_MODE (insn) == TImode)
PUT_MODE (next_insn, TImode);
insn = next_insn;
next_insn = next_active_insn (insn);
}
addr = INSN_ADDRESSES (INSN_UID (insn));
if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
{
if (((addr + length) & 7) != 0)
{
emit_nop_for_insn (prev_insn);
length += 4;
}
}
else if (GET_MODE (insn) == TImode
&& ((next_insn && GET_MODE (next_insn) != TImode)
|| get_attr_type (insn) == TYPE_MULTI0)
&& ((addr + length) & 7) != 0)
{
/* prev_insn will always be set because the first insn is
always 8-byte aligned. */
emit_nop_for_insn (prev_insn);
length += 4;
}
prev_insn = insn;
}
}
 
/* Routines for branch hints. */
 
static void
spu_emit_branch_hint (rtx before, rtx branch, rtx target,
int distance, sbitmap blocks)
{
rtx branch_label = 0;
rtx hint;
rtx insn;
rtx table;
 
if (before == 0 || branch == 0 || target == 0)
return;
 
/* While scheduling we require hints to be no further than 600, so
we need to enforce that here too */
if (distance > 600)
return;
 
/* If we have a Basic block note, emit it after the basic block note. */
if (NOTE_INSN_BASIC_BLOCK_P (before))
before = NEXT_INSN (before);
 
branch_label = gen_label_rtx ();
LABEL_NUSES (branch_label)++;
LABEL_PRESERVE_P (branch_label) = 1;
insn = emit_label_before (branch_label, branch);
branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
 
hint = emit_insn_before (gen_hbr (branch_label, target), before);
recog_memoized (hint);
HINTED_P (branch) = 1;
 
if (GET_CODE (target) == LABEL_REF)
HINTED_P (XEXP (target, 0)) = 1;
else if (tablejump_p (branch, 0, &table))
{
rtvec vec;
int j;
if (GET_CODE (PATTERN (table)) == ADDR_VEC)
vec = XVEC (PATTERN (table), 0);
else
vec = XVEC (PATTERN (table), 1);
for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
}
 
if (distance >= 588)
{
/* Make sure the hint isn't scheduled any earlier than this point,
which could make it too far for the branch offest to fit */
recog_memoized (emit_insn_before (gen_blockage (), hint));
}
else if (distance <= 8 * 4)
{
/* To guarantee at least 8 insns between the hint and branch we
insert nops. */
int d;
for (d = distance; d < 8 * 4; d += 4)
{
insn =
emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
recog_memoized (insn);
}
 
/* Make sure any nops inserted aren't scheduled before the hint. */
recog_memoized (emit_insn_after (gen_blockage (), hint));
 
/* Make sure any nops inserted aren't scheduled after the call. */
if (CALL_P (branch) && distance < 8 * 4)
recog_memoized (emit_insn_before (gen_blockage (), branch));
}
}
 
/* Returns 0 if we don't want a hint for this branch. Otherwise return
the rtx for the branch target. */
static rtx
get_branch_target (rtx branch)
{
if (GET_CODE (branch) == JUMP_INSN)
{
rtx set, src;
 
/* Return statements */
if (GET_CODE (PATTERN (branch)) == RETURN)
return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
 
/* jump table */
if (GET_CODE (PATTERN (branch)) == ADDR_VEC
|| GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
return 0;
 
/* ASM GOTOs. */
if (extract_asm_operands (PATTERN (branch)) != NULL)
return NULL;
 
set = single_set (branch);
src = SET_SRC (set);
if (GET_CODE (SET_DEST (set)) != PC)
abort ();
 
if (GET_CODE (src) == IF_THEN_ELSE)
{
rtx lab = 0;
rtx note = find_reg_note (branch, REG_BR_PROB, 0);
if (note)
{
/* If the more probable case is not a fall through, then
try a branch hint. */
HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
if (prob > (REG_BR_PROB_BASE * 6 / 10)
&& GET_CODE (XEXP (src, 1)) != PC)
lab = XEXP (src, 1);
else if (prob < (REG_BR_PROB_BASE * 4 / 10)
&& GET_CODE (XEXP (src, 2)) != PC)
lab = XEXP (src, 2);
}
if (lab)
{
if (GET_CODE (lab) == RETURN)
return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
return lab;
}
return 0;
}
 
return src;
}
else if (GET_CODE (branch) == CALL_INSN)
{
rtx call;
/* All of our call patterns are in a PARALLEL and the CALL is
the first pattern in the PARALLEL. */
if (GET_CODE (PATTERN (branch)) != PARALLEL)
abort ();
call = XVECEXP (PATTERN (branch), 0, 0);
if (GET_CODE (call) == SET)
call = SET_SRC (call);
if (GET_CODE (call) != CALL)
abort ();
return XEXP (XEXP (call, 0), 0);
}
return 0;
}
 
/* The special $hbr register is used to prevent the insn scheduler from
moving hbr insns across instructions which invalidate them. It
should only be used in a clobber, and this function searches for
insns which clobber it. */
static bool
insn_clobbers_hbr (rtx insn)
{
if (INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == PARALLEL)
{
rtx parallel = PATTERN (insn);
rtx clobber;
int j;
for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
{
clobber = XVECEXP (parallel, 0, j);
if (GET_CODE (clobber) == CLOBBER
&& GET_CODE (XEXP (clobber, 0)) == REG
&& REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
return 1;
}
}
return 0;
}
 
/* Search up to 32 insns starting at FIRST:
- at any kind of hinted branch, just return
- at any unconditional branch in the first 15 insns, just return
- at a call or indirect branch, after the first 15 insns, force it to
an even address and return
- at any unconditional branch, after the first 15 insns, force it to
an even address.
At then end of the search, insert an hbrp within 4 insns of FIRST,
and an hbrp within 16 instructions of FIRST.
*/
static void
insert_hbrp_for_ilb_runout (rtx first)
{
rtx insn, before_4 = 0, before_16 = 0;
int addr = 0, length, first_addr = -1;
int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
int insert_lnop_after = 0;
for (insn = first; insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
{
if (first_addr == -1)
first_addr = INSN_ADDRESSES (INSN_UID (insn));
addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
length = get_attr_length (insn);
 
if (before_4 == 0 && addr + length >= 4 * 4)
before_4 = insn;
/* We test for 14 instructions because the first hbrp will add
up to 2 instructions. */
if (before_16 == 0 && addr + length >= 14 * 4)
before_16 = insn;
 
if (INSN_CODE (insn) == CODE_FOR_hbr)
{
/* Make sure an hbrp is at least 2 cycles away from a hint.
Insert an lnop after the hbrp when necessary. */
if (before_4 == 0 && addr > 0)
{
before_4 = insn;
insert_lnop_after |= 1;
}
else if (before_4 && addr <= 4 * 4)
insert_lnop_after |= 1;
if (before_16 == 0 && addr > 10 * 4)
{
before_16 = insn;
insert_lnop_after |= 2;
}
else if (before_16 && addr <= 14 * 4)
insert_lnop_after |= 2;
}
 
if (INSN_CODE (insn) == CODE_FOR_iprefetch)
{
if (addr < hbrp_addr0)
hbrp_addr0 = addr;
else if (addr < hbrp_addr1)
hbrp_addr1 = addr;
}
 
if (CALL_P (insn) || JUMP_P (insn))
{
if (HINTED_P (insn))
return;
 
/* Any branch after the first 15 insns should be on an even
address to avoid a special case branch. There might be
some nops and/or hbrps inserted, so we test after 10
insns. */
if (addr > 10 * 4)
SCHED_ON_EVEN_P (insn) = 1;
}
 
if (CALL_P (insn) || tablejump_p (insn, 0, 0))
return;
 
 
if (addr + length >= 32 * 4)
{
gcc_assert (before_4 && before_16);
if (hbrp_addr0 > 4 * 4)
{
insn =
emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
recog_memoized (insn);
INSN_ADDRESSES_NEW (insn,
INSN_ADDRESSES (INSN_UID (before_4)));
PUT_MODE (insn, GET_MODE (before_4));
PUT_MODE (before_4, TImode);
if (insert_lnop_after & 1)
{
insn = emit_insn_before (gen_lnop (), before_4);
recog_memoized (insn);
INSN_ADDRESSES_NEW (insn,
INSN_ADDRESSES (INSN_UID (before_4)));
PUT_MODE (insn, TImode);
}
}
if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
&& hbrp_addr1 > 16 * 4)
{
insn =
emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
recog_memoized (insn);
INSN_ADDRESSES_NEW (insn,
INSN_ADDRESSES (INSN_UID (before_16)));
PUT_MODE (insn, GET_MODE (before_16));
PUT_MODE (before_16, TImode);
if (insert_lnop_after & 2)
{
insn = emit_insn_before (gen_lnop (), before_16);
recog_memoized (insn);
INSN_ADDRESSES_NEW (insn,
INSN_ADDRESSES (INSN_UID
(before_16)));
PUT_MODE (insn, TImode);
}
}
return;
}
}
else if (BARRIER_P (insn))
return;
 
}
 
/* The SPU might hang when it executes 48 inline instructions after a
hinted branch jumps to its hinted target. The beginning of a
function and the return from a call might have been hinted, and must
be handled as well. To prevent a hang we insert 2 hbrps. The first
should be within 6 insns of the branch target. The second should be
within 22 insns of the branch target. When determining if hbrps are
necessary, we look for only 32 inline instructions, because up to to
12 nops and 4 hbrps could be inserted. Similarily, when inserting
new hbrps, we insert them within 4 and 16 insns of the target. */
static void
insert_hbrp (void)
{
rtx insn;
if (TARGET_SAFE_HINTS)
{
shorten_branches (get_insns ());
/* Insert hbrp at beginning of function */
insn = next_active_insn (get_insns ());
if (insn)
insert_hbrp_for_ilb_runout (insn);
/* Insert hbrp after hinted targets. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
insert_hbrp_for_ilb_runout (next_active_insn (insn));
}
}
 
static int in_spu_reorg;
 
/* Insert branch hints. There are no branch optimizations after this
pass, so it's safe to set our branch hints now. */
static void
spu_machine_dependent_reorg (void)
{
sbitmap blocks;
basic_block bb;
rtx branch, insn;
rtx branch_target = 0;
int branch_addr = 0, insn_addr, required_dist = 0;
int i;
unsigned int j;
 
if (!TARGET_BRANCH_HINTS || optimize == 0)
{
/* We still do it for unoptimized code because an external
function might have hinted a call or return. */
insert_hbrp ();
pad_bb ();
return;
}
 
blocks = sbitmap_alloc (last_basic_block);
sbitmap_zero (blocks);
 
in_spu_reorg = 1;
compute_bb_for_insn ();
 
compact_blocks ();
 
spu_bb_info =
(struct spu_bb_info *) xcalloc (n_basic_blocks,
sizeof (struct spu_bb_info));
 
/* We need exact insn addresses and lengths. */
shorten_branches (get_insns ());
 
for (i = n_basic_blocks - 1; i >= 0; i--)
{
bb = BASIC_BLOCK (i);
branch = 0;
if (spu_bb_info[i].prop_jump)
{
branch = spu_bb_info[i].prop_jump;
branch_target = get_branch_target (branch);
branch_addr = INSN_ADDRESSES (INSN_UID (branch));
required_dist = spu_hint_dist;
}
/* Search from end of a block to beginning. In this loop, find
jumps which need a branch and emit them only when:
- it's an indirect branch and we're at the insn which sets
the register
- we're at an insn that will invalidate the hint. e.g., a
call, another hint insn, inline asm that clobbers $hbr, and
some inlined operations (divmodsi4). Don't consider jumps
because they are only at the end of a block and are
considered when we are deciding whether to propagate
- we're getting too far away from the branch. The hbr insns
only have a signed 10 bit offset
We go back as far as possible so the branch will be considered
for propagation when we get to the beginning of the block. */
for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
{
if (INSN_P (insn))
{
insn_addr = INSN_ADDRESSES (INSN_UID (insn));
if (branch
&& ((GET_CODE (branch_target) == REG
&& set_of (branch_target, insn) != NULL_RTX)
|| insn_clobbers_hbr (insn)
|| branch_addr - insn_addr > 600))
{
rtx next = NEXT_INSN (insn);
int next_addr = INSN_ADDRESSES (INSN_UID (next));
if (insn != BB_END (bb)
&& branch_addr - next_addr >= required_dist)
{
if (dump_file)
fprintf (dump_file,
"hint for %i in block %i before %i\n",
INSN_UID (branch), bb->index,
INSN_UID (next));
spu_emit_branch_hint (next, branch, branch_target,
branch_addr - next_addr, blocks);
}
branch = 0;
}
 
/* JUMP_P will only be true at the end of a block. When
branch is already set it means we've previously decided
to propagate a hint for that branch into this block. */
if (CALL_P (insn) || (JUMP_P (insn) && !branch))
{
branch = 0;
if ((branch_target = get_branch_target (insn)))
{
branch = insn;
branch_addr = insn_addr;
required_dist = spu_hint_dist;
}
}
}
if (insn == BB_HEAD (bb))
break;
}
 
if (branch)
{
/* If we haven't emitted a hint for this branch yet, it might
be profitable to emit it in one of the predecessor blocks,
especially for loops. */
rtx bbend;
basic_block prev = 0, prop = 0, prev2 = 0;
int loop_exit = 0, simple_loop = 0;
int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
 
for (j = 0; j < EDGE_COUNT (bb->preds); j++)
if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
prev = EDGE_PRED (bb, j)->src;
else
prev2 = EDGE_PRED (bb, j)->src;
 
for (j = 0; j < EDGE_COUNT (bb->succs); j++)
if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
loop_exit = 1;
else if (EDGE_SUCC (bb, j)->dest == bb)
simple_loop = 1;
 
/* If this branch is a loop exit then propagate to previous
fallthru block. This catches the cases when it is a simple
loop or when there is an initial branch into the loop. */
if (prev && (loop_exit || simple_loop)
&& prev->loop_depth <= bb->loop_depth)
prop = prev;
 
/* If there is only one adjacent predecessor. Don't propagate
outside this loop. This loop_depth test isn't perfect, but
I'm not sure the loop_father member is valid at this point. */
else if (prev && single_pred_p (bb)
&& prev->loop_depth == bb->loop_depth)
prop = prev;
 
/* If this is the JOIN block of a simple IF-THEN then
propogate the hint to the HEADER block. */
else if (prev && prev2
&& EDGE_COUNT (bb->preds) == 2
&& EDGE_COUNT (prev->preds) == 1
&& EDGE_PRED (prev, 0)->src == prev2
&& prev2->loop_depth == bb->loop_depth
&& GET_CODE (branch_target) != REG)
prop = prev;
 
/* Don't propagate when:
- this is a simple loop and the hint would be too far
- this is not a simple loop and there are 16 insns in
this block already
- the predecessor block ends in a branch that will be
hinted
- the predecessor block ends in an insn that invalidates
the hint */
if (prop
&& prop->index >= 0
&& (bbend = BB_END (prop))
&& branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
(simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
&& (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
{
if (dump_file)
fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
"for %i (loop_exit %i simple_loop %i dist %i)\n",
bb->index, prop->index, bb->loop_depth,
INSN_UID (branch), loop_exit, simple_loop,
branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
 
spu_bb_info[prop->index].prop_jump = branch;
spu_bb_info[prop->index].bb_index = i;
}
else if (branch_addr - next_addr >= required_dist)
{
if (dump_file)
fprintf (dump_file, "hint for %i in block %i before %i\n",
INSN_UID (branch), bb->index,
INSN_UID (NEXT_INSN (insn)));
spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
branch_addr - next_addr, blocks);
}
branch = 0;
}
}
free (spu_bb_info);
 
if (!sbitmap_empty_p (blocks))
find_many_sub_basic_blocks (blocks);
 
/* We have to schedule to make sure alignment is ok. */
FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
 
/* The hints need to be scheduled, so call it again. */
schedule_insns ();
 
insert_hbrp ();
 
pad_bb ();
 
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
{
/* Adjust the LABEL_REF in a hint when we have inserted a nop
between its branch label and the branch . We don't move the
label because GCC expects it at the beginning of the block. */
rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
rtx label_ref = XVECEXP (unspec, 0, 0);
rtx label = XEXP (label_ref, 0);
rtx branch;
int offset = 0;
for (branch = NEXT_INSN (label);
!JUMP_P (branch) && !CALL_P (branch);
branch = NEXT_INSN (branch))
if (NONJUMP_INSN_P (branch))
offset += get_attr_length (branch);
if (offset > 0)
XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
}
 
if (spu_flag_var_tracking)
{
df_analyze ();
timevar_push (TV_VAR_TRACKING);
variable_tracking_main ();
timevar_pop (TV_VAR_TRACKING);
df_finish_pass (false);
}
 
free_bb_for_insn ();
 
in_spu_reorg = 0;
}
 
/* Insn scheduling routines, primarily for dual issue. */
static int
spu_sched_issue_rate (void)
{
return 2;
}
 
static int
uses_ls_unit(rtx insn)
{
rtx set = single_set (insn);
if (set != 0
&& (GET_CODE (SET_DEST (set)) == MEM
|| GET_CODE (SET_SRC (set)) == MEM))
return 1;
return 0;
}
 
static int
get_pipe (rtx insn)
{
enum attr_type t;
/* Handle inline asm */
if (INSN_CODE (insn) == -1)
return -1;
t = get_attr_type (insn);
switch (t)
{
case TYPE_CONVERT:
return -2;
case TYPE_MULTI0:
return -1;
 
case TYPE_FX2:
case TYPE_FX3:
case TYPE_SPR:
case TYPE_NOP:
case TYPE_FXB:
case TYPE_FPD:
case TYPE_FP6:
case TYPE_FP7:
return 0;
 
case TYPE_LNOP:
case TYPE_SHUF:
case TYPE_LOAD:
case TYPE_STORE:
case TYPE_BR:
case TYPE_MULTI1:
case TYPE_HBR:
case TYPE_IPREFETCH:
return 1;
default:
abort ();
}
}
 
 
/* haifa-sched.c has a static variable that keeps track of the current
cycle. It is passed to spu_sched_reorder, and we record it here for
use by spu_sched_variable_issue. It won't be accurate if the
scheduler updates it's clock_var between the two calls. */
static int clock_var;
 
/* This is used to keep track of insn alignment. Set to 0 at the
beginning of each block and increased by the "length" attr of each
insn scheduled. */
static int spu_sched_length;
 
/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
ready list appropriately in spu_sched_reorder(). */
static int pipe0_clock;
static int pipe1_clock;
 
static int prev_clock_var;
 
static int prev_priority;
 
/* The SPU needs to load the next ilb sometime during the execution of
the previous ilb. There is a potential conflict if every cycle has a
load or store. To avoid the conflict we make sure the load/store
unit is free for at least one cycle during the execution of insns in
the previous ilb. */
static int spu_ls_first;
static int prev_ls_clock;
 
static void
spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
int max_ready ATTRIBUTE_UNUSED)
{
spu_sched_length = 0;
}
 
static void
spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
int max_ready ATTRIBUTE_UNUSED)
{
if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
{
/* When any block might be at least 8-byte aligned, assume they
will all be at least 8-byte aligned to make sure dual issue
works out correctly. */
spu_sched_length = 0;
}
spu_ls_first = INT_MAX;
clock_var = -1;
prev_ls_clock = -1;
pipe0_clock = -1;
pipe1_clock = -1;
prev_clock_var = -1;
prev_priority = -1;
}
 
static int
spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
{
int len;
int p;
if (GET_CODE (PATTERN (insn)) == USE
|| GET_CODE (PATTERN (insn)) == CLOBBER
|| (len = get_attr_length (insn)) == 0)
return more;
 
spu_sched_length += len;
 
/* Reset on inline asm */
if (INSN_CODE (insn) == -1)
{
spu_ls_first = INT_MAX;
pipe0_clock = -1;
pipe1_clock = -1;
return 0;
}
p = get_pipe (insn);
if (p == 0)
pipe0_clock = clock_var;
else
pipe1_clock = clock_var;
 
if (in_spu_reorg)
{
if (clock_var - prev_ls_clock > 1
|| INSN_CODE (insn) == CODE_FOR_iprefetch)
spu_ls_first = INT_MAX;
if (uses_ls_unit (insn))
{
if (spu_ls_first == INT_MAX)
spu_ls_first = spu_sched_length;
prev_ls_clock = clock_var;
}
 
/* The scheduler hasn't inserted the nop, but we will later on.
Include those nops in spu_sched_length. */
if (prev_clock_var == clock_var && (spu_sched_length & 7))
spu_sched_length += 4;
prev_clock_var = clock_var;
 
/* more is -1 when called from spu_sched_reorder for new insns
that don't have INSN_PRIORITY */
if (more >= 0)
prev_priority = INSN_PRIORITY (insn);
}
 
/* Always try issueing more insns. spu_sched_reorder will decide
when the cycle should be advanced. */
return 1;
}
 
/* This function is called for both TARGET_SCHED_REORDER and
TARGET_SCHED_REORDER2. */
static int
spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
rtx *ready, int *nreadyp, int clock)
{
int i, nready = *nreadyp;
int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
rtx insn;
 
clock_var = clock;
 
if (nready <= 0 || pipe1_clock >= clock)
return 0;
 
/* Find any rtl insns that don't generate assembly insns and schedule
them first. */
for (i = nready - 1; i >= 0; i--)
{
insn = ready[i];
if (INSN_CODE (insn) == -1
|| INSN_CODE (insn) == CODE_FOR_blockage
|| (INSN_P (insn) && get_attr_length (insn) == 0))
{
ready[i] = ready[nready - 1];
ready[nready - 1] = insn;
return 1;
}
}
 
pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
for (i = 0; i < nready; i++)
if (INSN_CODE (ready[i]) != -1)
{
insn = ready[i];
switch (get_attr_type (insn))
{
default:
case TYPE_MULTI0:
case TYPE_CONVERT:
case TYPE_FX2:
case TYPE_FX3:
case TYPE_SPR:
case TYPE_NOP:
case TYPE_FXB:
case TYPE_FPD:
case TYPE_FP6:
case TYPE_FP7:
pipe_0 = i;
break;
case TYPE_LOAD:
case TYPE_STORE:
pipe_ls = i;
case TYPE_LNOP:
case TYPE_SHUF:
case TYPE_BR:
case TYPE_MULTI1:
case TYPE_HBR:
pipe_1 = i;
break;
case TYPE_IPREFETCH:
pipe_hbrp = i;
break;
}
}
 
/* In the first scheduling phase, schedule loads and stores together
to increase the chance they will get merged during postreload CSE. */
if (!reload_completed && pipe_ls >= 0)
{
insn = ready[pipe_ls];
ready[pipe_ls] = ready[nready - 1];
ready[nready - 1] = insn;
return 1;
}
 
/* If there is an hbrp ready, prefer it over other pipe 1 insns. */
if (pipe_hbrp >= 0)
pipe_1 = pipe_hbrp;
 
/* When we have loads/stores in every cycle of the last 15 insns and
we are about to schedule another load/store, emit an hbrp insn
instead. */
if (in_spu_reorg
&& spu_sched_length - spu_ls_first >= 4 * 15
&& !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
{
insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
recog_memoized (insn);
if (pipe0_clock < clock)
PUT_MODE (insn, TImode);
spu_sched_variable_issue (file, verbose, insn, -1);
return 0;
}
 
/* In general, we want to emit nops to increase dual issue, but dual
issue isn't faster when one of the insns could be scheduled later
without effecting the critical path. We look at INSN_PRIORITY to
make a good guess, but it isn't perfect so -mdual-nops=n can be
used to effect it. */
if (in_spu_reorg && spu_dual_nops < 10)
{
/* When we are at an even address and we are not issueing nops to
improve scheduling then we need to advance the cycle. */
if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
&& (spu_dual_nops == 0
|| (pipe_1 != -1
&& prev_priority >
INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
return 0;
 
/* When at an odd address, schedule the highest priority insn
without considering pipeline. */
if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
&& (spu_dual_nops == 0
|| (prev_priority >
INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
return 1;
}
 
 
/* We haven't issued a pipe0 insn yet this cycle, if there is a
pipe0 insn in the ready list, schedule it. */
if (pipe0_clock < clock && pipe_0 >= 0)
schedule_i = pipe_0;
 
/* Either we've scheduled a pipe0 insn already or there is no pipe0
insn to schedule. Put a pipe1 insn at the front of the ready list. */
else
schedule_i = pipe_1;
 
if (schedule_i > -1)
{
insn = ready[schedule_i];
ready[schedule_i] = ready[nready - 1];
ready[nready - 1] = insn;
return 1;
}
return 0;
}
 
/* INSN is dependent on DEP_INSN. */
static int
spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
rtx set;
 
/* The blockage pattern is used to prevent instructions from being
moved across it and has no cost. */
if (INSN_CODE (insn) == CODE_FOR_blockage
|| INSN_CODE (dep_insn) == CODE_FOR_blockage)
return 0;
 
if ((INSN_P (insn) && get_attr_length (insn) == 0)
|| (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
return 0;
 
/* Make sure hbrps are spread out. */
if (INSN_CODE (insn) == CODE_FOR_iprefetch
&& INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
return 8;
 
/* Make sure hints and hbrps are 2 cycles apart. */
if ((INSN_CODE (insn) == CODE_FOR_iprefetch
|| INSN_CODE (insn) == CODE_FOR_hbr)
&& (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
|| INSN_CODE (dep_insn) == CODE_FOR_hbr))
return 2;
 
/* An hbrp has no real dependency on other insns. */
if (INSN_CODE (insn) == CODE_FOR_iprefetch
|| INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
return 0;
 
/* Assuming that it is unlikely an argument register will be used in
the first cycle of the called function, we reduce the cost for
slightly better scheduling of dep_insn. When not hinted, the
mispredicted branch would hide the cost as well. */
if (CALL_P (insn))
{
rtx target = get_branch_target (insn);
if (GET_CODE (target) != REG || !set_of (target, insn))
return cost - 2;
return cost;
}
 
/* And when returning from a function, let's assume the return values
are completed sooner too. */
if (CALL_P (dep_insn))
return cost - 2;
 
/* Make sure an instruction that loads from the back chain is schedule
away from the return instruction so a hint is more likely to get
issued. */
if (INSN_CODE (insn) == CODE_FOR__return
&& (set = single_set (dep_insn))
&& GET_CODE (SET_DEST (set)) == REG
&& REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
return 20;
 
/* The dfa scheduler sets cost to 0 for all anti-dependencies and the
scheduler makes every insn in a block anti-dependent on the final
jump_insn. We adjust here so higher cost insns will get scheduled
earlier. */
if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
return insn_cost (dep_insn) - 3;
 
return cost;
}
/* Create a CONST_DOUBLE from a string. */
struct rtx_def *
spu_float_const (const char *string, enum machine_mode mode)
{
REAL_VALUE_TYPE value;
value = REAL_VALUE_ATOF (string, mode);
return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
}
 
int
spu_constant_address_p (rtx x)
{
return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
|| GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
|| GET_CODE (x) == HIGH);
}
 
static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val)
{
gcc_assert (val == trunc_int_for_mode (val, SImode));
 
if (val >= -0x8000 && val <= 0x7fff)
return SPU_IL;
if (val >= 0 && val <= 0x3ffff)
return SPU_ILA;
if ((val & 0xffff) == ((val >> 16) & 0xffff))
return SPU_ILH;
if ((val & 0xffff) == 0)
return SPU_ILHU;
 
return SPU_NONE;
}
 
/* Return true when OP can be loaded by one of the il instructions, or
when flow2 is not completed and OP can be loaded using ilhu and iohl. */
int
immediate_load_p (rtx op, enum machine_mode mode)
{
if (CONSTANT_P (op))
{
enum immediate_class c = classify_immediate (op, mode);
return c == IC_IL1 || c == IC_IL1s
|| (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
}
return 0;
}
 
/* Return true if the first SIZE bytes of arr is a constant that can be
generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
represent the size and offset of the instruction to use. */
static int
cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
{
int cpat, run, i, start;
cpat = 1;
run = 0;
start = -1;
for (i = 0; i < size && cpat; i++)
if (arr[i] != i+16)
{
if (!run)
{
start = i;
if (arr[i] == 3)
run = 1;
else if (arr[i] == 2 && arr[i+1] == 3)
run = 2;
else if (arr[i] == 0)
{
while (arr[i+run] == run && i+run < 16)
run++;
if (run != 4 && run != 8)
cpat = 0;
}
else
cpat = 0;
if ((i & (run-1)) != 0)
cpat = 0;
i += run;
}
else
cpat = 0;
}
if (cpat && (run || size < 16))
{
if (run == 0)
run = 1;
if (prun)
*prun = run;
if (pstart)
*pstart = start == -1 ? 16-run : start;
return 1;
}
return 0;
}
 
/* OP is a CONSTANT_P. Determine what instructions can be used to load
it into a register. MODE is only valid when OP is a CONST_INT. */
static enum immediate_class
classify_immediate (rtx op, enum machine_mode mode)
{
HOST_WIDE_INT val;
unsigned char arr[16];
int i, j, repeated, fsmbi, repeat;
 
gcc_assert (CONSTANT_P (op));
 
if (GET_MODE (op) != VOIDmode)
mode = GET_MODE (op);
 
/* A V4SI const_vector with all identical symbols is ok. */
if (!flag_pic
&& mode == V4SImode
&& GET_CODE (op) == CONST_VECTOR
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
&& CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
&& CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
&& CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
op = CONST_VECTOR_ELT (op, 0);
 
switch (GET_CODE (op))
{
case SYMBOL_REF:
case LABEL_REF:
return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
 
case CONST:
/* We can never know if the resulting address fits in 18 bits and can be
loaded with ila. For now, assume the address will not overflow if
the displacement is "small" (fits 'K' constraint). */
if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
{
rtx sym = XEXP (XEXP (op, 0), 0);
rtx cst = XEXP (XEXP (op, 0), 1);
 
if (GET_CODE (sym) == SYMBOL_REF
&& GET_CODE (cst) == CONST_INT
&& satisfies_constraint_K (cst))
return IC_IL1s;
}
return IC_IL2s;
 
case HIGH:
return IC_IL1s;
 
case CONST_VECTOR:
for (i = 0; i < GET_MODE_NUNITS (mode); i++)
if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
&& GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
return IC_POOL;
/* Fall through. */
 
case CONST_INT:
case CONST_DOUBLE:
constant_to_array (mode, op, arr);
 
/* Check that each 4-byte slot is identical. */
repeated = 1;
for (i = 4; i < 16; i += 4)
for (j = 0; j < 4; j++)
if (arr[j] != arr[i + j])
repeated = 0;
 
if (repeated)
{
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
 
if (which_immediate_load (val) != SPU_NONE)
return IC_IL1;
}
 
/* Any mode of 2 bytes or smaller can be loaded with an il
instruction. */
gcc_assert (GET_MODE_SIZE (mode) > 2);
 
fsmbi = 1;
repeat = 0;
for (i = 0; i < 16 && fsmbi; i++)
if (arr[i] != 0 && repeat == 0)
repeat = arr[i];
else if (arr[i] != 0 && arr[i] != repeat)
fsmbi = 0;
if (fsmbi)
return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
 
if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
return IC_CPAT;
 
if (repeated)
return IC_IL2;
 
return IC_POOL;
default:
break;
}
gcc_unreachable ();
}
 
static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val)
{
gcc_assert (val == trunc_int_for_mode (val, SImode));
 
if (val >= -0x200 && val <= 0x1ff)
return SPU_ORI;
if (val >= 0 && val <= 0xffff)
return SPU_IOHL;
if ((val & 0xffff) == ((val >> 16) & 0xffff))
{
val = trunc_int_for_mode (val, HImode);
if (val >= -0x200 && val <= 0x1ff)
return SPU_ORHI;
if ((val & 0xff) == ((val >> 8) & 0xff))
{
val = trunc_int_for_mode (val, QImode);
if (val >= -0x200 && val <= 0x1ff)
return SPU_ORBI;
}
}
return SPU_NONE;
}
 
/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
CONST_DOUBLEs. */
static int
const_vector_immediate_p (rtx x)
{
int i;
gcc_assert (GET_CODE (x) == CONST_VECTOR);
for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
return 0;
return 1;
}
 
int
logical_immediate_p (rtx op, enum machine_mode mode)
{
HOST_WIDE_INT val;
unsigned char arr[16];
int i, j;
 
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|| GET_CODE (op) == CONST_VECTOR);
 
if (GET_CODE (op) == CONST_VECTOR
&& !const_vector_immediate_p (op))
return 0;
 
if (GET_MODE (op) != VOIDmode)
mode = GET_MODE (op);
 
constant_to_array (mode, op, arr);
 
/* Check that bytes are repeated. */
for (i = 4; i < 16; i += 4)
for (j = 0; j < 4; j++)
if (arr[j] != arr[i + j])
return 0;
 
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
 
i = which_logical_immediate (val);
return i != SPU_NONE && i != SPU_IOHL;
}
 
int
iohl_immediate_p (rtx op, enum machine_mode mode)
{
HOST_WIDE_INT val;
unsigned char arr[16];
int i, j;
 
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|| GET_CODE (op) == CONST_VECTOR);
 
if (GET_CODE (op) == CONST_VECTOR
&& !const_vector_immediate_p (op))
return 0;
 
if (GET_MODE (op) != VOIDmode)
mode = GET_MODE (op);
 
constant_to_array (mode, op, arr);
 
/* Check that bytes are repeated. */
for (i = 4; i < 16; i += 4)
for (j = 0; j < 4; j++)
if (arr[j] != arr[i + j])
return 0;
 
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
 
return val >= 0 && val <= 0xffff;
}
 
int
arith_immediate_p (rtx op, enum machine_mode mode,
HOST_WIDE_INT low, HOST_WIDE_INT high)
{
HOST_WIDE_INT val;
unsigned char arr[16];
int bytes, i, j;
 
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|| GET_CODE (op) == CONST_VECTOR);
 
if (GET_CODE (op) == CONST_VECTOR
&& !const_vector_immediate_p (op))
return 0;
 
if (GET_MODE (op) != VOIDmode)
mode = GET_MODE (op);
 
constant_to_array (mode, op, arr);
 
if (VECTOR_MODE_P (mode))
mode = GET_MODE_INNER (mode);
 
bytes = GET_MODE_SIZE (mode);
mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
 
/* Check that bytes are repeated. */
for (i = bytes; i < 16; i += bytes)
for (j = 0; j < bytes; j++)
if (arr[j] != arr[i + j])
return 0;
 
val = arr[0];
for (j = 1; j < bytes; j++)
val = (val << 8) | arr[j];
 
val = trunc_int_for_mode (val, mode);
 
return val >= low && val <= high;
}
 
/* TRUE when op is an immediate and an exact power of 2, and given that
OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
all entries must be the same. */
bool
exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
{
enum machine_mode int_mode;
HOST_WIDE_INT val;
unsigned char arr[16];
int bytes, i, j;
 
gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
|| GET_CODE (op) == CONST_VECTOR);
 
if (GET_CODE (op) == CONST_VECTOR
&& !const_vector_immediate_p (op))
return 0;
 
if (GET_MODE (op) != VOIDmode)
mode = GET_MODE (op);
 
constant_to_array (mode, op, arr);
 
if (VECTOR_MODE_P (mode))
mode = GET_MODE_INNER (mode);
 
bytes = GET_MODE_SIZE (mode);
int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
 
/* Check that bytes are repeated. */
for (i = bytes; i < 16; i += bytes)
for (j = 0; j < bytes; j++)
if (arr[j] != arr[i + j])
return 0;
 
val = arr[0];
for (j = 1; j < bytes; j++)
val = (val << 8) | arr[j];
 
val = trunc_int_for_mode (val, int_mode);
 
/* Currently, we only handle SFmode */
gcc_assert (mode == SFmode);
if (mode == SFmode)
{
int exp = (val >> 23) - 127;
return val > 0 && (val & 0x007fffff) == 0
&& exp >= low && exp <= high;
}
return FALSE;
}
 
/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
 
static int
ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
{
rtx x = *px;
tree decl;
 
if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
{
rtx plus = XEXP (x, 0);
rtx op0 = XEXP (plus, 0);
rtx op1 = XEXP (plus, 1);
if (GET_CODE (op1) == CONST_INT)
x = op0;
}
 
return (GET_CODE (x) == SYMBOL_REF
&& (decl = SYMBOL_REF_DECL (x)) != 0
&& TREE_CODE (decl) == VAR_DECL
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)));
}
 
/* We accept:
- any 32-bit constant (SImode, SFmode)
- any constant that can be generated with fsmbi (any mode)
- a 64-bit constant where the high and low bits are identical
(DImode, DFmode)
- a 128-bit constant where the four 32-bit words match. */
int
spu_legitimate_constant_p (rtx x)
{
if (GET_CODE (x) == HIGH)
x = XEXP (x, 0);
 
/* Reject any __ea qualified reference. These can't appear in
instructions but must be forced to the constant pool. */
if (for_each_rtx (&x, ea_symbol_ref, 0))
return 0;
 
/* V4SI with all identical symbols is valid. */
if (!flag_pic
&& GET_MODE (x) == V4SImode
&& (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
&& CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
&& CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
 
if (GET_CODE (x) == CONST_VECTOR
&& !const_vector_immediate_p (x))
return 0;
return 1;
}
 
/* Valid address are:
- symbol_ref, label_ref, const
- reg
- reg + const_int, where const_int is 16 byte aligned
- reg + reg, alignment doesn't matter
The alignment matters in the reg+const case because lqd and stqd
ignore the 4 least significant bits of the const. We only care about
16 byte modes because the expand phase will change all smaller MEM
references to TImode. */
static bool
spu_legitimate_address_p (enum machine_mode mode,
rtx x, bool reg_ok_strict)
{
int aligned = GET_MODE_SIZE (mode) >= 16;
if (aligned
&& GET_CODE (x) == AND
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
x = XEXP (x, 0);
switch (GET_CODE (x))
{
case LABEL_REF:
return !TARGET_LARGE_MEM;
 
case SYMBOL_REF:
case CONST:
/* Keep __ea references until reload so that spu_expand_mov can see them
in MEMs. */
if (ea_symbol_ref (&x, 0))
return !reload_in_progress && !reload_completed;
return !TARGET_LARGE_MEM;
 
case CONST_INT:
return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
 
case SUBREG:
x = XEXP (x, 0);
if (REG_P (x))
return 0;
 
case REG:
return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
 
case PLUS:
case LO_SUM:
{
rtx op0 = XEXP (x, 0);
rtx op1 = XEXP (x, 1);
if (GET_CODE (op0) == SUBREG)
op0 = XEXP (op0, 0);
if (GET_CODE (op1) == SUBREG)
op1 = XEXP (op1, 0);
if (GET_CODE (op0) == REG
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
&& GET_CODE (op1) == CONST_INT
&& INTVAL (op1) >= -0x2000
&& INTVAL (op1) <= 0x1fff
&& (!aligned || (INTVAL (op1) & 15) == 0))
return TRUE;
if (GET_CODE (op0) == REG
&& INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
&& GET_CODE (op1) == REG
&& INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
return TRUE;
}
break;
 
default:
break;
}
return FALSE;
}
 
/* Like spu_legitimate_address_p, except with named addresses. */
static bool
spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
bool reg_ok_strict, addr_space_t as)
{
if (as == ADDR_SPACE_EA)
return (REG_P (x) && (GET_MODE (x) == EAmode));
 
else if (as != ADDR_SPACE_GENERIC)
gcc_unreachable ();
 
return spu_legitimate_address_p (mode, x, reg_ok_strict);
}
 
/* When the address is reg + const_int, force the const_int into a
register. */
rtx
spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED)
{
rtx op0, op1;
/* Make sure both operands are registers. */
if (GET_CODE (x) == PLUS)
{
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (ALIGNED_SYMBOL_REF_P (op0))
{
op0 = force_reg (Pmode, op0);
mark_reg_pointer (op0, 128);
}
else if (GET_CODE (op0) != REG)
op0 = force_reg (Pmode, op0);
if (ALIGNED_SYMBOL_REF_P (op1))
{
op1 = force_reg (Pmode, op1);
mark_reg_pointer (op1, 128);
}
else if (GET_CODE (op1) != REG)
op1 = force_reg (Pmode, op1);
x = gen_rtx_PLUS (Pmode, op0, op1);
}
return x;
}
 
/* Like spu_legitimate_address, except with named address support. */
static rtx
spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
addr_space_t as)
{
if (as != ADDR_SPACE_GENERIC)
return x;
 
return spu_legitimize_address (x, oldx, mode);
}
 
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
struct attribute_spec.handler. */
static tree
spu_handle_fndecl_attribute (tree * node,
tree name,
tree args ATTRIBUTE_UNUSED,
int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
{
if (TREE_CODE (*node) != FUNCTION_DECL)
{
warning (0, "%qE attribute only applies to functions",
name);
*no_add_attrs = true;
}
 
return NULL_TREE;
}
 
/* Handle the "vector" attribute. */
static tree
spu_handle_vector_attribute (tree * node, tree name,
tree args ATTRIBUTE_UNUSED,
int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
{
tree type = *node, result = NULL_TREE;
enum machine_mode mode;
int unsigned_p;
 
while (POINTER_TYPE_P (type)
|| TREE_CODE (type) == FUNCTION_TYPE
|| TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
type = TREE_TYPE (type);
 
mode = TYPE_MODE (type);
 
unsigned_p = TYPE_UNSIGNED (type);
switch (mode)
{
case DImode:
result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
break;
case SImode:
result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
break;
case HImode:
result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
break;
case QImode:
result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
break;
case SFmode:
result = V4SF_type_node;
break;
case DFmode:
result = V2DF_type_node;
break;
default:
break;
}
 
/* Propagate qualifiers attached to the element type
onto the vector type. */
if (result && result != type && TYPE_QUALS (type))
result = build_qualified_type (result, TYPE_QUALS (type));
 
*no_add_attrs = true; /* No need to hang on to the attribute. */
 
if (!result)
warning (0, "%qE attribute ignored", name);
else
*node = lang_hooks.types.reconstruct_complex_type (*node, result);
 
return NULL_TREE;
}
 
/* Return nonzero if FUNC is a naked function. */
static int
spu_naked_function_p (tree func)
{
tree a;
 
if (TREE_CODE (func) != FUNCTION_DECL)
abort ();
 
a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
return a != NULL_TREE;
}
 
int
spu_initial_elimination_offset (int from, int to)
{
int saved_regs_size = spu_saved_regs_size ();
int sp_offset = 0;
if (!current_function_is_leaf || crtl->outgoing_args_size
|| get_frame_size () || saved_regs_size)
sp_offset = STACK_POINTER_OFFSET;
if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
return get_frame_size () + crtl->outgoing_args_size + sp_offset;
else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
return get_frame_size ();
else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
return sp_offset + crtl->outgoing_args_size
+ get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
return get_frame_size () + saved_regs_size + sp_offset;
else
gcc_unreachable ();
}
 
rtx
spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
{
enum machine_mode mode = TYPE_MODE (type);
int byte_size = ((mode == BLKmode)
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
 
/* Make sure small structs are left justified in a register. */
if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
&& byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
{
enum machine_mode smode;
rtvec v;
int i;
int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
int n = byte_size / UNITS_PER_WORD;
v = rtvec_alloc (nregs);
for (i = 0; i < n; i++)
{
RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (TImode,
FIRST_RETURN_REGNUM
+ i),
GEN_INT (UNITS_PER_WORD * i));
byte_size -= UNITS_PER_WORD;
}
 
if (n < nregs)
{
if (byte_size < 4)
byte_size = 4;
smode =
smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
RTVEC_ELT (v, n) =
gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
GEN_INT (UNITS_PER_WORD * n));
}
return gen_rtx_PARALLEL (mode, v);
}
return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
}
 
rtx
spu_function_arg (CUMULATIVE_ARGS cum,
enum machine_mode mode,
tree type, int named ATTRIBUTE_UNUSED)
{
int byte_size;
 
if (cum >= MAX_REGISTER_ARGS)
return 0;
 
byte_size = ((mode == BLKmode)
? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
 
/* The ABI does not allow parameters to be passed partially in
reg and partially in stack. */
if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
return 0;
 
/* Make sure small structs are left justified in a register. */
if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
&& byte_size < UNITS_PER_WORD && byte_size > 0)
{
enum machine_mode smode;
rtx gr_reg;
if (byte_size < 4)
byte_size = 4;
smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
const0_rtx);
return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
}
else
return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
}
 
/* Variable sized types are passed by reference. */
static bool
spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
const_tree type, bool named ATTRIBUTE_UNUSED)
{
return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
}
 
/* Var args. */
 
/* Create and return the va_list datatype.
 
On SPU, va_list is an array type equivalent to
 
typedef struct __va_list_tag
{
void *__args __attribute__((__aligned(16)));
void *__skip __attribute__((__aligned(16)));
} va_list[1];
 
where __args points to the arg that will be returned by the next
va_arg(), and __skip points to the previous stack frame such that
when __args == __skip we should advance __args by 32 bytes. */
static tree
spu_build_builtin_va_list (void)
{
tree f_args, f_skip, record, type_decl;
bool owp;
 
record = (*lang_hooks.types.make_type) (RECORD_TYPE);
 
type_decl =
build_decl (BUILTINS_LOCATION,
TYPE_DECL, get_identifier ("__va_list_tag"), record);
 
f_args = build_decl (BUILTINS_LOCATION,
FIELD_DECL, get_identifier ("__args"), ptr_type_node);
f_skip = build_decl (BUILTINS_LOCATION,
FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
 
DECL_FIELD_CONTEXT (f_args) = record;
DECL_ALIGN (f_args) = 128;
DECL_USER_ALIGN (f_args) = 1;
 
DECL_FIELD_CONTEXT (f_skip) = record;
DECL_ALIGN (f_skip) = 128;
DECL_USER_ALIGN (f_skip) = 1;
 
TREE_CHAIN (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_args;
TREE_CHAIN (f_args) = f_skip;
 
/* We know this is being padded and we want it too. It is an internal
type so hide the warnings from the user. */
owp = warn_padded;
warn_padded = false;
 
layout_type (record);
 
warn_padded = owp;
 
/* The correct type is an array type of one element. */
return build_array_type (record, build_index_type (size_zero_node));
}
 
/* Implement va_start by filling the va_list structure VALIST.
NEXTARG points to the first anonymous stack argument.
 
The following global variables are used to initialize
the va_list structure:
 
crtl->args.info;
the CUMULATIVE_ARGS for this function
 
crtl->args.arg_offset_rtx:
holds the offset of the first anonymous stack argument
(relative to the virtual arg pointer). */
 
static void
spu_va_start (tree valist, rtx nextarg)
{
tree f_args, f_skip;
tree args, skip, t;
 
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
f_skip = TREE_CHAIN (f_args);
 
valist = build_va_arg_indirect_ref (valist);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
 
/* Find the __args area. */
t = make_tree (TREE_TYPE (args), nextarg);
if (crtl->args.pretend_args_size > 0)
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
size_int (-STACK_POINTER_OFFSET));
t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
/* Find the __skip area. */
t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
size_int (crtl->args.pretend_args_size
- STACK_POINTER_OFFSET));
t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
 
/* Gimplify va_arg by updating the va_list structure
VALIST as required to retrieve an argument of type
TYPE, and returning that argument.
ret = va_arg(VALIST, TYPE);
 
generates code equivalent to:
paddedsize = (sizeof(TYPE) + 15) & -16;
if (VALIST.__args + paddedsize > VALIST.__skip
&& VALIST.__args <= VALIST.__skip)
addr = VALIST.__skip + 32;
else
addr = VALIST.__args;
VALIST.__args = addr + paddedsize;
ret = *(TYPE *)addr;
*/
static tree
spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
gimple_seq * post_p ATTRIBUTE_UNUSED)
{
tree f_args, f_skip;
tree args, skip;
HOST_WIDE_INT size, rsize;
tree paddedsize, addr, tmp;
bool pass_by_reference_p;
 
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
f_skip = TREE_CHAIN (f_args);
 
valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
 
addr = create_tmp_var (ptr_type_node, "va_arg");
 
/* if an object is dynamically sized, a pointer to it is passed
instead of the object itself. */
pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
false);
if (pass_by_reference_p)
type = build_pointer_type (type);
size = int_size_in_bytes (type);
rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
 
/* build conditional expression to calculate addr. The expression
will be gimplified later. */
paddedsize = size_int (rsize);
tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
unshare_expr (skip)));
 
tmp = build3 (COND_EXPR, ptr_type_node, tmp,
build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
size_int (32)), unshare_expr (args));
 
gimplify_assign (addr, tmp, pre_p);
 
/* update VALIST.__args */
tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
gimplify_assign (unshare_expr (args), tmp, pre_p);
 
addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
addr);
 
if (pass_by_reference_p)
addr = build_va_arg_indirect_ref (addr);
 
return build_va_arg_indirect_ref (addr);
}
 
/* Save parameter registers starting with the register that corresponds
to the first unnamed parameters. If the first unnamed parameter is
in the stack then save no registers. Set pretend_args_size to the
amount of space needed to save the registers. */
void
spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
tree type, int *pretend_size, int no_rtl)
{
if (!no_rtl)
{
rtx tmp;
int regno;
int offset;
int ncum = *cum;
 
/* cum currently points to the last named argument, we want to
start at the next argument. */
FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
 
offset = -STACK_POINTER_OFFSET;
for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
{
tmp = gen_frame_mem (V4SImode,
plus_constant (virtual_incoming_args_rtx,
offset));
emit_move_insn (tmp,
gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
offset += 16;
}
*pretend_size = offset + STACK_POINTER_OFFSET;
}
}
void
spu_conditional_register_usage (void)
{
if (flag_pic)
{
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
}
}
 
/* This is called any time we inspect the alignment of a register for
addresses. */
static int
reg_aligned_for_addr (rtx x)
{
int regno =
REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
return REGNO_POINTER_ALIGN (regno) >= 128;
}
 
/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
into its SYMBOL_REF_FLAGS. */
static void
spu_encode_section_info (tree decl, rtx rtl, int first)
{
default_encode_section_info (decl, rtl, first);
 
/* If a variable has a forced alignment to < 16 bytes, mark it with
SYMBOL_FLAG_ALIGN1. */
if (TREE_CODE (decl) == VAR_DECL
&& DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
}
 
/* Return TRUE if we are certain the mem refers to a complete object
which is both 16-byte aligned and padded to a 16-byte boundary. This
would make it safe to store with a single instruction.
We guarantee the alignment and padding for static objects by aligning
all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
FIXME: We currently cannot guarantee this for objects on the stack
because assign_parm_setup_stack calls assign_stack_local with the
alignment of the parameter mode and in that case the alignment never
gets adjusted by LOCAL_ALIGNMENT. */
static int
store_with_one_insn_p (rtx mem)
{
enum machine_mode mode = GET_MODE (mem);
rtx addr = XEXP (mem, 0);
if (mode == BLKmode)
return 0;
if (GET_MODE_SIZE (mode) >= 16)
return 1;
/* Only static objects. */
if (GET_CODE (addr) == SYMBOL_REF)
{
/* We use the associated declaration to make sure the access is
referring to the whole object.
We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
if it is necessary. Will there be cases where one exists, and
the other does not? Will there be cases where both exist, but
have different types? */
tree decl = MEM_EXPR (mem);
if (decl
&& TREE_CODE (decl) == VAR_DECL
&& GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
return 1;
decl = SYMBOL_REF_DECL (addr);
if (decl
&& TREE_CODE (decl) == VAR_DECL
&& GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
return 1;
}
return 0;
}
 
/* Return 1 when the address is not valid for a simple load and store as
required by the '_mov*' patterns. We could make this less strict
for loads, but we prefer mem's to look the same so they are more
likely to be merged. */
static int
address_needs_split (rtx mem)
{
if (GET_MODE_SIZE (GET_MODE (mem)) < 16
&& (GET_MODE_SIZE (GET_MODE (mem)) < 4
|| !(store_with_one_insn_p (mem)
|| mem_is_padded_component_ref (mem))))
return 1;
 
return 0;
}
 
static GTY(()) rtx cache_fetch; /* __cache_fetch function */
static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
 
/* MEM is known to be an __ea qualified memory access. Emit a call to
fetch the ppu memory to local store, and return its address in local
store. */
 
static void
ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
{
if (is_store)
{
rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
if (!cache_fetch_dirty)
cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
2, ea_addr, EAmode, ndirty, SImode);
}
else
{
if (!cache_fetch)
cache_fetch = init_one_libfunc ("__cache_fetch");
emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
1, ea_addr, EAmode);
}
}
 
/* Like ea_load_store, but do the cache tag comparison and, for stores,
dirty bit marking, inline.
 
The cache control data structure is an array of
 
struct __cache_tag_array
{
unsigned int tag_lo[4];
unsigned int tag_hi[4];
void *data_pointer[4];
int reserved[4];
vector unsigned short dirty_bits[4];
} */
 
static void
ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
{
rtx ea_addr_si;
HOST_WIDE_INT v;
rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
rtx index_mask = gen_reg_rtx (SImode);
rtx tag_arr = gen_reg_rtx (Pmode);
rtx splat_mask = gen_reg_rtx (TImode);
rtx splat = gen_reg_rtx (V4SImode);
rtx splat_hi = NULL_RTX;
rtx tag_index = gen_reg_rtx (Pmode);
rtx block_off = gen_reg_rtx (SImode);
rtx tag_addr = gen_reg_rtx (Pmode);
rtx tag = gen_reg_rtx (V4SImode);
rtx cache_tag = gen_reg_rtx (V4SImode);
rtx cache_tag_hi = NULL_RTX;
rtx cache_ptrs = gen_reg_rtx (TImode);
rtx cache_ptrs_si = gen_reg_rtx (SImode);
rtx tag_equal = gen_reg_rtx (V4SImode);
rtx tag_equal_hi = NULL_RTX;
rtx tag_eq_pack = gen_reg_rtx (V4SImode);
rtx tag_eq_pack_si = gen_reg_rtx (SImode);
rtx eq_index = gen_reg_rtx (SImode);
rtx bcomp, hit_label, hit_ref, cont_label, insn;
 
if (spu_ea_model != 32)
{
splat_hi = gen_reg_rtx (V4SImode);
cache_tag_hi = gen_reg_rtx (V4SImode);
tag_equal_hi = gen_reg_rtx (V4SImode);
}
 
emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
emit_move_insn (tag_arr, tag_arr_sym);
v = 0x0001020300010203LL;
emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
ea_addr_si = ea_addr;
if (spu_ea_model != 32)
ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
 
/* tag_index = ea_addr & (tag_array_size - 128) */
emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
 
/* splat ea_addr to all 4 slots. */
emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
/* Similarly for high 32 bits of ea_addr. */
if (spu_ea_model != 32)
emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
 
/* block_off = ea_addr & 127 */
emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
 
/* tag_addr = tag_arr + tag_index */
emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
 
/* Read cache tags. */
emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
if (spu_ea_model != 32)
emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
plus_constant (tag_addr, 16)));
 
/* tag = ea_addr & -128 */
emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
 
/* Read all four cache data pointers. */
emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
plus_constant (tag_addr, 32)));
 
/* Compare tags. */
emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
if (spu_ea_model != 32)
{
emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
}
 
/* At most one of the tags compare equal, so tag_equal has one
32-bit slot set to all 1's, with the other slots all zero.
gbb picks off low bit from each byte in the 128-bit registers,
so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
we have a hit. */
emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
 
/* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
 
/* Allowing us to rotate the corresponding cache data pointer to slot0.
(rotating eq_index mod 16 bytes). */
emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
 
/* Add block offset to form final data address. */
emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
 
/* Check that we did hit. */
hit_label = gen_label_rtx ();
hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
hit_ref, pc_rtx)));
/* Say that this branch is very likely to happen. */
v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
REG_NOTES (insn)
= gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
 
ea_load_store (mem, is_store, ea_addr, data_addr);
cont_label = gen_label_rtx ();
emit_jump_insn (gen_jump (cont_label));
emit_barrier ();
 
emit_label (hit_label);
 
if (is_store)
{
HOST_WIDE_INT v_hi;
rtx dirty_bits = gen_reg_rtx (TImode);
rtx dirty_off = gen_reg_rtx (SImode);
rtx dirty_128 = gen_reg_rtx (TImode);
rtx neg_block_off = gen_reg_rtx (SImode);
 
/* Set up mask with one dirty bit per byte of the mem we are
writing, starting from top bit. */
v_hi = v = -1;
v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
{
v_hi = v;
v = 0;
}
emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
 
/* Form index into cache dirty_bits. eq_index is one of
0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
0x40, 0x50, 0x60 or 0x70 which just happens to be the
offset to each of the four dirty_bits elements. */
emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
 
emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
 
/* Rotate bit mask to proper bit. */
emit_insn (gen_negsi2 (neg_block_off, block_off));
emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
 
/* Or in the new dirty bits. */
emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
 
/* Store. */
emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
}
 
emit_label (cont_label);
}
 
static rtx
expand_ea_mem (rtx mem, bool is_store)
{
rtx ea_addr;
rtx data_addr = gen_reg_rtx (Pmode);
rtx new_mem;
 
ea_addr = force_reg (EAmode, XEXP (mem, 0));
if (optimize_size || optimize == 0)
ea_load_store (mem, is_store, ea_addr, data_addr);
else
ea_load_store_inline (mem, is_store, ea_addr, data_addr);
 
if (ea_alias_set == -1)
ea_alias_set = new_alias_set ();
 
/* We generate a new MEM RTX to refer to the copy of the data
in the cache. We do not copy memory attributes (except the
alignment) from the original MEM, as they may no longer apply
to the cache copy. */
new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
set_mem_alias_set (new_mem, ea_alias_set);
set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
 
return new_mem;
}
 
int
spu_expand_mov (rtx * ops, enum machine_mode mode)
{
if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abort ();
 
if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
{
rtx from = SUBREG_REG (ops[1]);
enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
 
gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_CLASS (imode) == MODE_INT
&& subreg_lowpart_p (ops[1]));
 
if (GET_MODE_SIZE (imode) < 4)
imode = SImode;
if (imode != GET_MODE (from))
from = gen_rtx_SUBREG (imode, from, 0);
 
if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
{
enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
emit_insn (GEN_FCN (icode) (ops[0], from));
}
else
emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
return 1;
}
 
/* At least one of the operands needs to be a register. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
{
rtx temp = force_reg (mode, ops[1]);
emit_move_insn (ops[0], temp);
return 1;
}
if (reload_in_progress || reload_completed)
{
if (CONSTANT_P (ops[1]))
return spu_split_immediate (ops);
return 0;
}
 
/* Catch the SImode immediates greater than 0x7fffffff, and sign
extend them. */
if (GET_CODE (ops[1]) == CONST_INT)
{
HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
if (val != INTVAL (ops[1]))
{
emit_move_insn (ops[0], GEN_INT (val));
return 1;
}
}
if (MEM_P (ops[0]))
{
if (MEM_ADDR_SPACE (ops[0]))
ops[0] = expand_ea_mem (ops[0], true);
return spu_split_store (ops);
}
if (MEM_P (ops[1]))
{
if (MEM_ADDR_SPACE (ops[1]))
ops[1] = expand_ea_mem (ops[1], false);
return spu_split_load (ops);
}
 
return 0;
}
 
static void
spu_convert_move (rtx dst, rtx src)
{
enum machine_mode mode = GET_MODE (dst);
enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
rtx reg;
gcc_assert (GET_MODE (src) == TImode);
reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
emit_insn (gen_rtx_SET (VOIDmode, reg,
gen_rtx_TRUNCATE (int_mode,
gen_rtx_LSHIFTRT (TImode, src,
GEN_INT (int_mode == DImode ? 64 : 96)))));
if (int_mode != mode)
{
reg = simplify_gen_subreg (mode, reg, int_mode, 0);
emit_move_insn (dst, reg);
}
}
 
/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
the address from SRC and SRC+16. Return a REG or CONST_INT that
specifies how many bytes to rotate the loaded registers, plus any
extra from EXTRA_ROTQBY. The address and rotate amounts are
normalized to improve merging of loads and rotate computations. */
static rtx
spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
{
rtx addr = XEXP (src, 0);
rtx p0, p1, rot, addr0, addr1;
int rot_amt;
 
rot = 0;
rot_amt = 0;
 
if (MEM_ALIGN (src) >= 128)
/* Address is already aligned; simply perform a TImode load. */ ;
else if (GET_CODE (addr) == PLUS)
{
/* 8 cases:
aligned reg + aligned reg => lqx
aligned reg + unaligned reg => lqx, rotqby
aligned reg + aligned const => lqd
aligned reg + unaligned const => lqd, rotqbyi
unaligned reg + aligned reg => lqx, rotqby
unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
unaligned reg + aligned const => lqd, rotqby
unaligned reg + unaligned const -> not allowed by legitimate address
*/
p0 = XEXP (addr, 0);
p1 = XEXP (addr, 1);
if (!reg_aligned_for_addr (p0))
{
if (REG_P (p1) && !reg_aligned_for_addr (p1))
{
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
}
else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
{
if (INTVAL (p1) > 0
&& REG_POINTER (p0)
&& INTVAL (p1) * BITS_PER_UNIT
< REGNO_POINTER_ALIGN (REGNO (p0)))
{
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
addr = p0;
}
else
{
rtx x = gen_reg_rtx (SImode);
emit_move_insn (x, p1);
if (!spu_arith_operand (p1, SImode))
p1 = x;
rot = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (rot, p0, p1));
addr = gen_rtx_PLUS (Pmode, p0, x);
}
}
else
rot = p0;
}
else
{
if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
{
rot_amt = INTVAL (p1) & 15;
if (INTVAL (p1) & -16)
{
p1 = GEN_INT (INTVAL (p1) & -16);
addr = gen_rtx_PLUS (SImode, p0, p1);
}
else
addr = p0;
}
else if (REG_P (p1) && !reg_aligned_for_addr (p1))
rot = p1;
}
}
else if (REG_P (addr))
{
if (!reg_aligned_for_addr (addr))
rot = addr;
}
else if (GET_CODE (addr) == CONST)
{
if (GET_CODE (XEXP (addr, 0)) == PLUS
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
{
rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
if (rot_amt & -16)
addr = gen_rtx_CONST (Pmode,
gen_rtx_PLUS (Pmode,
XEXP (XEXP (addr, 0), 0),
GEN_INT (rot_amt & -16)));
else
addr = XEXP (XEXP (addr, 0), 0);
}
else
{
rot = gen_reg_rtx (Pmode);
emit_move_insn (rot, addr);
}
}
else if (GET_CODE (addr) == CONST_INT)
{
rot_amt = INTVAL (addr);
addr = GEN_INT (rot_amt & -16);
}
else if (!ALIGNED_SYMBOL_REF_P (addr))
{
rot = gen_reg_rtx (Pmode);
emit_move_insn (rot, addr);
}
 
rot_amt += extra_rotby;
 
rot_amt &= 15;
 
if (rot && rot_amt)
{
rtx x = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
rot = x;
rot_amt = 0;
}
if (!rot && rot_amt)
rot = GEN_INT (rot_amt);
 
addr0 = copy_rtx (addr);
addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
 
if (dst1)
{
addr1 = plus_constant (copy_rtx (addr), 16);
addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
}
 
return rot;
}
 
int
spu_split_load (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
rtx addr, load, rot;
int rot_amt;
 
if (GET_MODE_SIZE (mode) >= 16)
return 0;
 
addr = XEXP (ops[1], 0);
gcc_assert (GET_CODE (addr) != AND);
 
if (!address_needs_split (ops[1]))
{
ops[1] = change_address (ops[1], TImode, addr);
load = gen_reg_rtx (TImode);
emit_insn (gen__movti (load, ops[1]));
spu_convert_move (ops[0], load);
return 1;
}
 
rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
 
load = gen_reg_rtx (TImode);
rot = spu_expand_load (load, 0, ops[1], rot_amt);
 
if (rot)
emit_insn (gen_rotqby_ti (load, load, rot));
 
spu_convert_move (ops[0], load);
return 1;
}
 
int
spu_split_store (rtx * ops)
{
enum machine_mode mode = GET_MODE (ops[0]);
rtx reg;
rtx addr, p0, p1, p1_lo, smem;
int aform;
int scalar;
 
if (GET_MODE_SIZE (mode) >= 16)
return 0;
 
addr = XEXP (ops[0], 0);
gcc_assert (GET_CODE (addr) != AND);
 
if (!address_needs_split (ops[0]))
{
reg = gen_reg_rtx (TImode);
emit_insn (gen_spu_convert (reg, ops[1]));
ops[0] = change_address (ops[0], TImode, addr);
emit_move_insn (ops[0], reg);
return 1;
}
 
if (GET_CODE (addr) == PLUS)
{
/* 8 cases:
aligned reg + aligned reg => lqx, c?x, shuf, stqx
aligned reg + unaligned reg => lqx, c?x, shuf, stqx
aligned reg + aligned const => lqd, c?d, shuf, stqx
aligned reg + unaligned const => lqd, c?d, shuf, stqx
unaligned reg + aligned reg => lqx, c?x, shuf, stqx
unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
unaligned reg + aligned const => lqd, c?d, shuf, stqx
unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
*/
aform = 0;
p0 = XEXP (addr, 0);
p1 = p1_lo = XEXP (addr, 1);
if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
{
p1_lo = GEN_INT (INTVAL (p1) & 15);
if (reg_aligned_for_addr (p0))
{
p1 = GEN_INT (INTVAL (p1) & -16);
if (p1 == const0_rtx)
addr = p0;
else
addr = gen_rtx_PLUS (SImode, p0, p1);
}
else
{
rtx x = gen_reg_rtx (SImode);
emit_move_insn (x, p1);
addr = gen_rtx_PLUS (SImode, p0, x);
}
}
}
else if (REG_P (addr))
{
aform = 0;
p0 = addr;
p1 = p1_lo = const0_rtx;
}
else
{
aform = 1;
p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
p1 = 0; /* aform doesn't use p1 */
p1_lo = addr;
if (ALIGNED_SYMBOL_REF_P (addr))
p1_lo = const0_rtx;
else if (GET_CODE (addr) == CONST
&& GET_CODE (XEXP (addr, 0)) == PLUS
&& ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
{
HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
if ((v & -16) != 0)
addr = gen_rtx_CONST (Pmode,
gen_rtx_PLUS (Pmode,
XEXP (XEXP (addr, 0), 0),
GEN_INT (v & -16)));
else
addr = XEXP (XEXP (addr, 0), 0);
p1_lo = GEN_INT (v & 15);
}
else if (GET_CODE (addr) == CONST_INT)
{
p1_lo = GEN_INT (INTVAL (addr) & 15);
addr = GEN_INT (INTVAL (addr) & -16);
}
else
{
p1_lo = gen_reg_rtx (SImode);
emit_move_insn (p1_lo, addr);
}
}
 
reg = gen_reg_rtx (TImode);
 
scalar = store_with_one_insn_p (ops[0]);
if (!scalar)
{
/* We could copy the flags from the ops[0] MEM to mem here,
We don't because we want this load to be optimized away if
possible, and copying the flags will prevent that in certain
cases, e.g. consider the volatile flag. */
 
rtx pat = gen_reg_rtx (TImode);
rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
set_mem_alias_set (lmem, 0);
emit_insn (gen_movti (reg, lmem));
 
if (!p0 || reg_aligned_for_addr (p0))
p0 = stack_pointer_rtx;
if (!p1_lo)
p1_lo = const0_rtx;
 
emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
emit_insn (gen_shufb (reg, ops[1], reg, pat));
}
else
{
if (GET_CODE (ops[1]) == REG)
emit_insn (gen_spu_convert (reg, ops[1]));
else if (GET_CODE (ops[1]) == SUBREG)
emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
else
abort ();
}
 
if (GET_MODE_SIZE (mode) < 4 && scalar)
emit_insn (gen_ashlti3
(reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
 
smem = change_address (ops[0], TImode, copy_rtx (addr));
/* We can't use the previous alias set because the memory has changed
size and can potentially overlap objects of other types. */
set_mem_alias_set (smem, 0);
 
emit_insn (gen_movti (smem, reg));
return 1;
}
 
/* Return TRUE if X is MEM which is a struct member reference
and the member can safely be loaded and stored with a single
instruction because it is padded. */
static int
mem_is_padded_component_ref (rtx x)
{
tree t = MEM_EXPR (x);
tree r;
if (!t || TREE_CODE (t) != COMPONENT_REF)
return 0;
t = TREE_OPERAND (t, 1);
if (!t || TREE_CODE (t) != FIELD_DECL
|| DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
return 0;
/* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
r = DECL_FIELD_CONTEXT (t);
if (!r || TREE_CODE (r) != RECORD_TYPE)
return 0;
/* Make sure they are the same mode */
if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
return 0;
/* If there are no following fields then the field alignment assures
the structure is padded to the alignment which means this field is
padded too. */
if (TREE_CHAIN (t) == 0)
return 1;
/* If the following field is also aligned then this field will be
padded. */
t = TREE_CHAIN (t);
if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
return 1;
return 0;
}
 
/* Parse the -mfixed-range= option string. */
static void
fix_range (const char *const_str)
{
int i, first, last;
char *str, *dash, *comma;
/* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
REG2 are either register names or register numbers. The effect
of this option is to mark the registers in the range from REG1 to
REG2 as ``fixed'' so they won't be used by the compiler. */
i = strlen (const_str);
str = (char *) alloca (i + 1);
memcpy (str, const_str, i + 1);
while (1)
{
dash = strchr (str, '-');
if (!dash)
{
warning (0, "value of -mfixed-range must have form REG1-REG2");
return;
}
*dash = '\0';
comma = strchr (dash + 1, ',');
if (comma)
*comma = '\0';
first = decode_reg_name (str);
if (first < 0)
{
warning (0, "unknown register name: %s", str);
return;
}
last = decode_reg_name (dash + 1);
if (last < 0)
{
warning (0, "unknown register name: %s", dash + 1);
return;
}
*dash = '-';
if (first > last)
{
warning (0, "%s-%s is an empty range", str, dash + 1);
return;
}
for (i = first; i <= last; ++i)
fixed_regs[i] = call_used_regs[i] = 1;
 
if (!comma)
break;
 
*comma = ',';
str = comma + 1;
}
}
 
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
can be generated using the fsmbi instruction. */
int
fsmbi_const_p (rtx x)
{
if (CONSTANT_P (x))
{
/* We can always choose TImode for CONST_INT because the high bits
of an SImode will always be all 1s, i.e., valid for fsmbi. */
enum immediate_class c = classify_immediate (x, TImode);
return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
}
return 0;
}
 
/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
can be generated using the cbd, chd, cwd or cdd instruction. */
int
cpat_const_p (rtx x, enum machine_mode mode)
{
if (CONSTANT_P (x))
{
enum immediate_class c = classify_immediate (x, mode);
return c == IC_CPAT;
}
return 0;
}
 
rtx
gen_cpat_const (rtx * ops)
{
unsigned char dst[16];
int i, offset, shift, isize;
if (GET_CODE (ops[3]) != CONST_INT
|| GET_CODE (ops[2]) != CONST_INT
|| (GET_CODE (ops[1]) != CONST_INT
&& GET_CODE (ops[1]) != REG))
return 0;
if (GET_CODE (ops[1]) == REG
&& (!REG_POINTER (ops[1])
|| REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
return 0;
 
for (i = 0; i < 16; i++)
dst[i] = i + 16;
isize = INTVAL (ops[3]);
if (isize == 1)
shift = 3;
else if (isize == 2)
shift = 2;
else
shift = 0;
offset = (INTVAL (ops[2]) +
(GET_CODE (ops[1]) ==
CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
for (i = 0; i < isize; i++)
dst[offset + i] = i + shift;
return array_to_constant (TImode, dst);
}
 
/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
array. Use MODE for CONST_INT's. When the constant's mode is smaller
than 16 bytes, the value is repeated across the rest of the array. */
void
constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
{
HOST_WIDE_INT val;
int i, j, first;
 
memset (arr, 0, 16);
mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
if (GET_CODE (x) == CONST_INT
|| (GET_CODE (x) == CONST_DOUBLE
&& (mode == SFmode || mode == DFmode)))
{
gcc_assert (mode != VOIDmode && mode != BLKmode);
 
if (GET_CODE (x) == CONST_DOUBLE)
val = const_double_to_hwint (x);
else
val = INTVAL (x);
first = GET_MODE_SIZE (mode) - 1;
for (i = first; i >= 0; i--)
{
arr[i] = val & 0xff;
val >>= 8;
}
/* Splat the constant across the whole array. */
for (j = 0, i = first + 1; i < 16; i++)
{
arr[i] = arr[j];
j = (j == first) ? 0 : j + 1;
}
}
else if (GET_CODE (x) == CONST_DOUBLE)
{
val = CONST_DOUBLE_LOW (x);
for (i = 15; i >= 8; i--)
{
arr[i] = val & 0xff;
val >>= 8;
}
val = CONST_DOUBLE_HIGH (x);
for (i = 7; i >= 0; i--)
{
arr[i] = val & 0xff;
val >>= 8;
}
}
else if (GET_CODE (x) == CONST_VECTOR)
{
int units;
rtx elt;
mode = GET_MODE_INNER (mode);
units = CONST_VECTOR_NUNITS (x);
for (i = 0; i < units; i++)
{
elt = CONST_VECTOR_ELT (x, i);
if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
{
if (GET_CODE (elt) == CONST_DOUBLE)
val = const_double_to_hwint (elt);
else
val = INTVAL (elt);
first = GET_MODE_SIZE (mode) - 1;
if (first + i * GET_MODE_SIZE (mode) > 16)
abort ();
for (j = first; j >= 0; j--)
{
arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
val >>= 8;
}
}
}
}
else
gcc_unreachable();
}
 
/* Convert a 16 byte array to a constant of mode MODE. When MODE is
smaller than 16 bytes, use the bytes that would represent that value
in a register, e.g., for QImode return the value of arr[3]. */
rtx
array_to_constant (enum machine_mode mode, const unsigned char arr[16])
{
enum machine_mode inner_mode;
rtvec v;
int units, size, i, j, k;
HOST_WIDE_INT val;
 
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
{
j = GET_MODE_SIZE (mode);
i = j < 4 ? 4 - j : 0;
for (val = 0; i < j; i++)
val = (val << 8) | arr[i];
val = trunc_int_for_mode (val, mode);
return GEN_INT (val);
}
 
if (mode == TImode)
{
HOST_WIDE_INT high;
for (i = high = 0; i < 8; i++)
high = (high << 8) | arr[i];
for (i = 8, val = 0; i < 16; i++)
val = (val << 8) | arr[i];
return immed_double_const (val, high, TImode);
}
if (mode == SFmode)
{
val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
val = trunc_int_for_mode (val, SImode);
return hwint_to_const_double (SFmode, val);
}
if (mode == DFmode)
{
for (i = 0, val = 0; i < 8; i++)
val = (val << 8) | arr[i];
return hwint_to_const_double (DFmode, val);
}
 
if (!VECTOR_MODE_P (mode))
abort ();
 
units = GET_MODE_NUNITS (mode);
size = GET_MODE_UNIT_SIZE (mode);
inner_mode = GET_MODE_INNER (mode);
v = rtvec_alloc (units);
 
for (k = i = 0; i < units; ++i)
{
val = 0;
for (j = 0; j < size; j++, k++)
val = (val << 8) | arr[k];
 
if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
else
RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
}
if (k > 16)
abort ();
 
return gen_rtx_CONST_VECTOR (mode, v);
}
 
static void
reloc_diagnostic (rtx x)
{
tree decl = 0;
if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
return;
 
if (GET_CODE (x) == SYMBOL_REF)
decl = SYMBOL_REF_DECL (x);
else if (GET_CODE (x) == CONST
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
 
/* SYMBOL_REF_DECL is not necessarily a DECL. */
if (decl && !DECL_P (decl))
decl = 0;
 
/* The decl could be a string constant. */
if (decl && DECL_P (decl))
{
location_t loc;
/* We use last_assemble_variable_decl to get line information. It's
not always going to be right and might not even be close, but will
be right for the more common cases. */
if (!last_assemble_variable_decl || in_section == ctors_section)
loc = DECL_SOURCE_LOCATION (decl);
else
loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
 
if (TARGET_WARN_RELOC)
warning_at (loc, 0,
"creating run-time relocation for %qD", decl);
else
error_at (loc,
"creating run-time relocation for %qD", decl);
}
else
{
if (TARGET_WARN_RELOC)
warning_at (input_location, 0, "creating run-time relocation");
else
error_at (input_location, "creating run-time relocation");
}
}
 
/* Hook into assemble_integer so we can generate an error for run-time
relocations. The SPU ABI disallows them. */
static bool
spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
{
/* By default run-time relocations aren't supported, but we allow them
in case users support it in their own run-time loader. And we provide
a warning for those users that don't. */
if ((GET_CODE (x) == SYMBOL_REF)
|| GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
reloc_diagnostic (x);
 
return default_assemble_integer (x, size, aligned_p);
}
 
static void
spu_asm_globalize_label (FILE * file, const char *name)
{
fputs ("\t.global\t", file);
assemble_name (file, name);
fputs ("\n", file);
}
 
static bool
spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
int cost = COSTS_N_INSNS (2);
 
/* Folding to a CONST_VECTOR will use extra space but there might
be only a small savings in cycles. We'd like to use a CONST_VECTOR
only if it allows us to fold away multiple insns. Changing the cost
of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
because this cost will only be compared against a single insn.
if (code == CONST_VECTOR)
return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
*/
 
/* Use defaults for float operations. Not accurate but good enough. */
if (mode == DFmode)
{
*total = COSTS_N_INSNS (13);
return true;
}
if (mode == SFmode)
{
*total = COSTS_N_INSNS (6);
return true;
}
switch (code)
{
case CONST_INT:
if (satisfies_constraint_K (x))
*total = 0;
else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
return true;
 
case CONST:
*total = COSTS_N_INSNS (3);
return true;
 
case LABEL_REF:
case SYMBOL_REF:
*total = COSTS_N_INSNS (0);
return true;
 
case CONST_DOUBLE:
*total = COSTS_N_INSNS (5);
return true;
 
case FLOAT_EXTEND:
case FLOAT_TRUNCATE:
case FLOAT:
case UNSIGNED_FLOAT:
case FIX:
case UNSIGNED_FIX:
*total = COSTS_N_INSNS (7);
return true;
 
case PLUS:
if (mode == TImode)
{
*total = COSTS_N_INSNS (9);
return true;
}
break;
 
case MULT:
cost =
GET_CODE (XEXP (x, 0)) ==
REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
{
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
cost = COSTS_N_INSNS (14);
if ((val & 0xffff) == 0)
cost = COSTS_N_INSNS (9);
else if (val > 0 && val < 0x10000)
cost = COSTS_N_INSNS (11);
}
}
*total = cost;
return true;
case DIV:
case UDIV:
case MOD:
case UMOD:
*total = COSTS_N_INSNS (20);
return true;
case ROTATE:
case ROTATERT:
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
*total = COSTS_N_INSNS (4);
return true;
case UNSPEC:
if (XINT (x, 1) == UNSPEC_CONVERT)
*total = COSTS_N_INSNS (0);
else
*total = COSTS_N_INSNS (4);
return true;
}
/* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
* (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
*total = cost;
return true;
}
 
static enum machine_mode
spu_unwind_word_mode (void)
{
return SImode;
}
 
/* Decide whether we can make a sibling call to a function. DECL is the
declaration of the function being targeted by the call and EXP is the
CALL_EXPR representing the call. */
static bool
spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
{
return decl && !TARGET_LARGE_MEM;
}
 
/* We need to correctly update the back chain pointer and the Available
Stack Size (which is in the second slot of the sp register.) */
void
spu_allocate_stack (rtx op0, rtx op1)
{
HOST_WIDE_INT v;
rtx chain = gen_reg_rtx (V4SImode);
rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
rtx sp = gen_reg_rtx (V4SImode);
rtx splatted = gen_reg_rtx (V4SImode);
rtx pat = gen_reg_rtx (TImode);
 
/* copy the back chain so we can save it back again. */
emit_move_insn (chain, stack_bot);
 
op1 = force_reg (SImode, op1);
 
v = 0x1020300010203ll;
emit_move_insn (pat, immed_double_const (v, v, TImode));
emit_insn (gen_shufb (splatted, op1, op1, pat));
 
emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
emit_insn (gen_subv4si3 (sp, sp, splatted));
 
if (flag_stack_check)
{
rtx avail = gen_reg_rtx(SImode);
rtx result = gen_reg_rtx(SImode);
emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
emit_insn (gen_spu_heq (result, GEN_INT(0) ));
}
 
emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
 
emit_move_insn (stack_bot, chain);
 
emit_move_insn (op0, virtual_stack_dynamic_rtx);
}
 
void
spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
{
static unsigned char arr[16] =
{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
rtx temp = gen_reg_rtx (SImode);
rtx temp2 = gen_reg_rtx (SImode);
rtx temp3 = gen_reg_rtx (V4SImode);
rtx temp4 = gen_reg_rtx (V4SImode);
rtx pat = gen_reg_rtx (TImode);
rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
 
/* Restore the backchain from the first word, sp from the second. */
emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
 
emit_move_insn (pat, array_to_constant (TImode, arr));
 
/* Compute Available Stack Size for sp */
emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
emit_insn (gen_shufb (temp3, temp, temp, pat));
 
/* Compute Available Stack Size for back chain */
emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
emit_insn (gen_shufb (temp4, temp2, temp2, pat));
emit_insn (gen_addv4si3 (temp4, sp, temp4));
 
emit_insn (gen_addv4si3 (sp, sp, temp3));
emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
}
 
static void
spu_init_libfuncs (void)
{
set_optab_libfunc (smul_optab, DImode, "__muldi3");
set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
set_optab_libfunc (smod_optab, DImode, "__moddi3");
set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
set_optab_libfunc (umod_optab, DImode, "__umoddi3");
set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
set_optab_libfunc (clz_optab, DImode, "__clzdi2");
set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
set_optab_libfunc (parity_optab, DImode, "__paritydi2");
 
set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
 
set_optab_libfunc (smul_optab, TImode, "__multi3");
set_optab_libfunc (sdiv_optab, TImode, "__divti3");
set_optab_libfunc (smod_optab, TImode, "__modti3");
set_optab_libfunc (udiv_optab, TImode, "__udivti3");
set_optab_libfunc (umod_optab, TImode, "__umodti3");
set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
}
 
/* Make a subreg, stripping any existing subreg. We could possibly just
call simplify_subreg, but in this case we know what we want. */
rtx
spu_gen_subreg (enum machine_mode mode, rtx x)
{
if (GET_CODE (x) == SUBREG)
x = SUBREG_REG (x);
if (GET_MODE (x) == mode)
return x;
return gen_rtx_SUBREG (mode, x, 0);
}
 
static bool
spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
return (TYPE_MODE (type) == BLKmode
&& ((type) == 0
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|| int_size_in_bytes (type) >
(MAX_REGISTER_RETURN * UNITS_PER_WORD)));
}
/* Create the built-in types and functions */
 
enum spu_function_code
{
#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
#include "spu-builtins.def"
#undef DEF_BUILTIN
NUM_SPU_BUILTINS
};
 
extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
 
struct spu_builtin_description spu_builtins[] = {
#define DEF_BUILTIN(fcode, icode, name, type, params) \
{fcode, icode, name, type, params, NULL_TREE},
#include "spu-builtins.def"
#undef DEF_BUILTIN
};
 
/* Returns the rs6000 builtin decl for CODE. */
 
static tree
spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
{
if (code >= NUM_SPU_BUILTINS)
return error_mark_node;
return spu_builtins[code].fndecl;
}
 
 
static void
spu_init_builtins (void)
{
struct spu_builtin_description *d;
unsigned int i;
 
V16QI_type_node = build_vector_type (intQI_type_node, 16);
V8HI_type_node = build_vector_type (intHI_type_node, 8);
V4SI_type_node = build_vector_type (intSI_type_node, 4);
V2DI_type_node = build_vector_type (intDI_type_node, 2);
V4SF_type_node = build_vector_type (float_type_node, 4);
V2DF_type_node = build_vector_type (double_type_node, 2);
 
unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
 
spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
 
spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
 
spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
 
spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
 
spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
 
spu_builtin_types[SPU_BTI_PTR] =
build_pointer_type (build_qualified_type
(void_type_node,
TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
 
/* For each builtin we build a new prototype. The tree code will make
sure nodes are shared. */
for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
{
tree p;
char name[64]; /* build_function will make a copy. */
int parm;
 
if (d->name == 0)
continue;
 
/* Find last parm. */
for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
;
 
p = void_list_node;
while (parm > 1)
p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
 
p = build_function_type (spu_builtin_types[d->parm[0]], p);
 
sprintf (name, "__builtin_%s", d->name);
d->fndecl =
add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
NULL, NULL_TREE);
if (d->fcode == SPU_MASK_FOR_LOAD)
TREE_READONLY (d->fndecl) = 1;
 
/* These builtins don't throw. */
TREE_NOTHROW (d->fndecl) = 1;
}
}
 
void
spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
{
static unsigned char arr[16] =
{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
 
rtx temp = gen_reg_rtx (Pmode);
rtx temp2 = gen_reg_rtx (V4SImode);
rtx temp3 = gen_reg_rtx (V4SImode);
rtx pat = gen_reg_rtx (TImode);
rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
 
emit_move_insn (pat, array_to_constant (TImode, arr));
 
/* Restore the sp. */
emit_move_insn (temp, op1);
emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
 
/* Compute available stack size for sp. */
emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
emit_insn (gen_shufb (temp3, temp, temp, pat));
 
emit_insn (gen_addv4si3 (sp, sp, temp3));
emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
}
 
int
spu_safe_dma (HOST_WIDE_INT channel)
{
return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
}
 
void
spu_builtin_splats (rtx ops[])
{
enum machine_mode mode = GET_MODE (ops[0]);
if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
{
unsigned char arr[16];
constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
emit_move_insn (ops[0], array_to_constant (mode, arr));
}
else
{
rtx reg = gen_reg_rtx (TImode);
rtx shuf;
if (GET_CODE (ops[1]) != REG
&& GET_CODE (ops[1]) != SUBREG)
ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
switch (mode)
{
case V2DImode:
case V2DFmode:
shuf =
immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
TImode);
break;
case V4SImode:
case V4SFmode:
shuf =
immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
TImode);
break;
case V8HImode:
shuf =
immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
TImode);
break;
case V16QImode:
shuf =
immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
TImode);
break;
default:
abort ();
}
emit_move_insn (reg, shuf);
emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
}
}
 
void
spu_builtin_extract (rtx ops[])
{
enum machine_mode mode;
rtx rot, from, tmp;
 
mode = GET_MODE (ops[1]);
 
if (GET_CODE (ops[2]) == CONST_INT)
{
switch (mode)
{
case V16QImode:
emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
break;
case V8HImode:
emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
break;
case V4SFmode:
emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
break;
case V4SImode:
emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
break;
case V2DImode:
emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
break;
case V2DFmode:
emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
break;
default:
abort ();
}
return;
}
 
from = spu_gen_subreg (TImode, ops[1]);
rot = gen_reg_rtx (TImode);
tmp = gen_reg_rtx (SImode);
 
switch (mode)
{
case V16QImode:
emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
break;
case V8HImode:
emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
break;
case V4SFmode:
case V4SImode:
emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
break;
case V2DImode:
case V2DFmode:
emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
break;
default:
abort ();
}
emit_insn (gen_rotqby_ti (rot, from, tmp));
 
emit_insn (gen_spu_convert (ops[0], rot));
}
 
void
spu_builtin_insert (rtx ops[])
{
enum machine_mode mode = GET_MODE (ops[0]);
enum machine_mode imode = GET_MODE_INNER (mode);
rtx mask = gen_reg_rtx (TImode);
rtx offset;
 
if (GET_CODE (ops[3]) == CONST_INT)
offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
else
{
offset = gen_reg_rtx (SImode);
emit_insn (gen_mulsi3
(offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
}
emit_insn (gen_cpat
(mask, stack_pointer_rtx, offset,
GEN_INT (GET_MODE_SIZE (imode))));
emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
}
 
void
spu_builtin_promote (rtx ops[])
{
enum machine_mode mode, imode;
rtx rot, from, offset;
HOST_WIDE_INT pos;
 
mode = GET_MODE (ops[0]);
imode = GET_MODE_INNER (mode);
 
from = gen_reg_rtx (TImode);
rot = spu_gen_subreg (TImode, ops[0]);
 
emit_insn (gen_spu_convert (from, ops[1]));
 
if (GET_CODE (ops[2]) == CONST_INT)
{
pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
if (GET_MODE_SIZE (imode) < 4)
pos += 4 - GET_MODE_SIZE (imode);
offset = GEN_INT (pos & 15);
}
else
{
offset = gen_reg_rtx (SImode);
switch (mode)
{
case V16QImode:
emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
break;
case V8HImode:
emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
emit_insn (gen_addsi3 (offset, offset, offset));
break;
case V4SFmode:
case V4SImode:
emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
break;
case V2DImode:
case V2DFmode:
emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
break;
default:
abort ();
}
}
emit_insn (gen_rotqby_ti (rot, from, offset));
}
 
static void
spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
{
rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
rtx shuf = gen_reg_rtx (V4SImode);
rtx insn = gen_reg_rtx (V4SImode);
rtx shufc;
rtx insnc;
rtx mem;
 
fnaddr = force_reg (SImode, fnaddr);
cxt = force_reg (SImode, cxt);
 
if (TARGET_LARGE_MEM)
{
rtx rotl = gen_reg_rtx (V4SImode);
rtx mask = gen_reg_rtx (V4SImode);
rtx bi = gen_reg_rtx (SImode);
static unsigned char const shufa[16] = {
2, 3, 0, 1, 18, 19, 16, 17,
0, 1, 2, 3, 16, 17, 18, 19
};
static unsigned char const insna[16] = {
0x41, 0, 0, 79,
0x41, 0, 0, STATIC_CHAIN_REGNUM,
0x60, 0x80, 0, 79,
0x60, 0x80, 0, STATIC_CHAIN_REGNUM
};
 
shufc = force_reg (TImode, array_to_constant (TImode, shufa));
insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
 
emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
emit_insn (gen_selb (insn, insnc, rotl, mask));
 
mem = adjust_address (m_tramp, V4SImode, 0);
emit_move_insn (mem, insn);
 
emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
mem = adjust_address (m_tramp, Pmode, 16);
emit_move_insn (mem, bi);
}
else
{
rtx scxt = gen_reg_rtx (SImode);
rtx sfnaddr = gen_reg_rtx (SImode);
static unsigned char const insna[16] = {
0x42, 0, 0, STATIC_CHAIN_REGNUM,
0x30, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0
};
 
shufc = gen_reg_rtx (TImode);
insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
 
/* By or'ing all of cxt with the ila opcode we are assuming cxt
fits 18 bits and the last 4 are zeros. This will be true if
the stack pointer is initialized to 0x3fff0 at program start,
otherwise the ila instruction will be garbage. */
 
emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
emit_insn (gen_cpat
(shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
emit_insn (gen_iorv4si3 (insn, insnc, shuf));
 
mem = adjust_address (m_tramp, V4SImode, 0);
emit_move_insn (mem, insn);
}
emit_insn (gen_sync ());
}
 
void
spu_expand_sign_extend (rtx ops[])
{
unsigned char arr[16];
rtx pat = gen_reg_rtx (TImode);
rtx sign, c;
int i, last;
last = GET_MODE (ops[0]) == DImode ? 7 : 15;
if (GET_MODE (ops[1]) == QImode)
{
sign = gen_reg_rtx (HImode);
emit_insn (gen_extendqihi2 (sign, ops[1]));
for (i = 0; i < 16; i++)
arr[i] = 0x12;
arr[last] = 0x13;
}
else
{
for (i = 0; i < 16; i++)
arr[i] = 0x10;
switch (GET_MODE (ops[1]))
{
case HImode:
sign = gen_reg_rtx (SImode);
emit_insn (gen_extendhisi2 (sign, ops[1]));
arr[last] = 0x03;
arr[last - 1] = 0x02;
break;
case SImode:
sign = gen_reg_rtx (SImode);
emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
for (i = 0; i < 4; i++)
arr[last - i] = 3 - i;
break;
case DImode:
sign = gen_reg_rtx (SImode);
c = gen_reg_rtx (SImode);
emit_insn (gen_spu_convert (c, ops[1]));
emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
for (i = 0; i < 8; i++)
arr[last - i] = 7 - i;
break;
default:
abort ();
}
}
emit_move_insn (pat, array_to_constant (TImode, arr));
emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
}
 
/* expand vector initialization. If there are any constant parts,
load constant parts first. Then load any non-constant parts. */
void
spu_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0;
bool all_same = true;
rtx first, x = NULL_RTX, first_constant = NULL_RTX;
int i;
 
first = XVECEXP (vals, 0, 0);
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
|| GET_CODE (x) == CONST_FIXED))
++n_var;
else
{
if (first_constant == NULL_RTX)
first_constant = x;
}
if (i > 0 && !rtx_equal_p (x, first))
all_same = false;
}
 
/* if all elements are the same, use splats to repeat elements */
if (all_same)
{
if (!CONSTANT_P (first)
&& !register_operand (first, GET_MODE (x)))
first = force_reg (GET_MODE (first), first);
emit_insn (gen_spu_splats (target, first));
return;
}
 
/* load constant parts */
if (n_var != n_elts)
{
if (n_var == 0)
{
emit_move_insn (target,
gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
}
else
{
rtx constant_parts_rtx = copy_rtx (vals);
 
gcc_assert (first_constant != NULL_RTX);
/* fill empty slots with the first constant, this increases
our chance of using splats in the recursive call below. */
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (constant_parts_rtx, 0, i);
if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
|| GET_CODE (x) == CONST_FIXED))
XVECEXP (constant_parts_rtx, 0, i) = first_constant;
}
 
spu_expand_vector_init (target, constant_parts_rtx);
}
}
 
/* load variable parts */
if (n_var != 0)
{
rtx insert_operands[4];
 
insert_operands[0] = target;
insert_operands[2] = target;
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
if (!(CONST_INT_P (x)
|| GET_CODE (x) == CONST_DOUBLE
|| GET_CODE (x) == CONST_FIXED))
{
if (!register_operand (x, GET_MODE (x)))
x = force_reg (GET_MODE (x), x);
insert_operands[1] = x;
insert_operands[3] = GEN_INT (i);
spu_builtin_insert (insert_operands);
}
}
}
}
 
/* Return insn index for the vector compare instruction for given CODE,
and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
 
static int
get_vec_cmp_insn (enum rtx_code code,
enum machine_mode dest_mode,
enum machine_mode op_mode)
 
{
switch (code)
{
case EQ:
if (dest_mode == V16QImode && op_mode == V16QImode)
return CODE_FOR_ceq_v16qi;
if (dest_mode == V8HImode && op_mode == V8HImode)
return CODE_FOR_ceq_v8hi;
if (dest_mode == V4SImode && op_mode == V4SImode)
return CODE_FOR_ceq_v4si;
if (dest_mode == V4SImode && op_mode == V4SFmode)
return CODE_FOR_ceq_v4sf;
if (dest_mode == V2DImode && op_mode == V2DFmode)
return CODE_FOR_ceq_v2df;
break;
case GT:
if (dest_mode == V16QImode && op_mode == V16QImode)
return CODE_FOR_cgt_v16qi;
if (dest_mode == V8HImode && op_mode == V8HImode)
return CODE_FOR_cgt_v8hi;
if (dest_mode == V4SImode && op_mode == V4SImode)
return CODE_FOR_cgt_v4si;
if (dest_mode == V4SImode && op_mode == V4SFmode)
return CODE_FOR_cgt_v4sf;
if (dest_mode == V2DImode && op_mode == V2DFmode)
return CODE_FOR_cgt_v2df;
break;
case GTU:
if (dest_mode == V16QImode && op_mode == V16QImode)
return CODE_FOR_clgt_v16qi;
if (dest_mode == V8HImode && op_mode == V8HImode)
return CODE_FOR_clgt_v8hi;
if (dest_mode == V4SImode && op_mode == V4SImode)
return CODE_FOR_clgt_v4si;
break;
default:
break;
}
return -1;
}
 
/* Emit vector compare for operands OP0 and OP1 using code RCODE.
DMODE is expected destination mode. This is a recursive function. */
 
static rtx
spu_emit_vector_compare (enum rtx_code rcode,
rtx op0, rtx op1,
enum machine_mode dmode)
{
int vec_cmp_insn;
rtx mask;
enum machine_mode dest_mode;
enum machine_mode op_mode = GET_MODE (op1);
 
gcc_assert (GET_MODE (op0) == GET_MODE (op1));
 
/* Floating point vector compare instructions uses destination V4SImode.
Double floating point vector compare instructions uses destination V2DImode.
Move destination to appropriate mode later. */
if (dmode == V4SFmode)
dest_mode = V4SImode;
else if (dmode == V2DFmode)
dest_mode = V2DImode;
else
dest_mode = dmode;
 
mask = gen_reg_rtx (dest_mode);
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
 
if (vec_cmp_insn == -1)
{
bool swap_operands = false;
bool try_again = false;
switch (rcode)
{
case LT:
rcode = GT;
swap_operands = true;
try_again = true;
break;
case LTU:
rcode = GTU;
swap_operands = true;
try_again = true;
break;
case NE:
/* Treat A != B as ~(A==B). */
{
enum insn_code nor_code;
rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
gcc_assert (nor_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
if (dmode != dest_mode)
{
rtx temp = gen_reg_rtx (dest_mode);
convert_move (temp, mask, 0);
return temp;
}
return mask;
}
break;
case GE:
case GEU:
case LE:
case LEU:
/* Try GT/GTU/LT/LTU OR EQ */
{
rtx c_rtx, eq_rtx;
enum insn_code ior_code;
enum rtx_code new_code;
 
switch (rcode)
{
case GE: new_code = GT; break;
case GEU: new_code = GTU; break;
case LE: new_code = LT; break;
case LEU: new_code = LTU; break;
default:
gcc_unreachable ();
}
 
c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
 
ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
gcc_assert (ior_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
if (dmode != dest_mode)
{
rtx temp = gen_reg_rtx (dest_mode);
convert_move (temp, mask, 0);
return temp;
}
return mask;
}
break;
default:
gcc_unreachable ();
}
 
/* You only get two chances. */
if (try_again)
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
 
gcc_assert (vec_cmp_insn != -1);
 
if (swap_operands)
{
rtx tmp;
tmp = op0;
op0 = op1;
op1 = tmp;
}
}
 
emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
if (dmode != dest_mode)
{
rtx temp = gen_reg_rtx (dest_mode);
convert_move (temp, mask, 0);
return temp;
}
return mask;
}
 
 
/* Emit vector conditional expression.
DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
 
int
spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
rtx cond, rtx cc_op0, rtx cc_op1)
{
enum machine_mode dest_mode = GET_MODE (dest);
enum rtx_code rcode = GET_CODE (cond);
rtx mask;
/* Get the vector mask for the given relational operations. */
mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
 
emit_insn(gen_selb (dest, op2, op1, mask));
 
return 1;
}
 
static rtx
spu_force_reg (enum machine_mode mode, rtx op)
{
rtx x, r;
if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
{
if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
|| GET_MODE (op) == BLKmode)
return force_reg (mode, convert_to_mode (mode, op, 0));
abort ();
}
 
r = force_reg (GET_MODE (op), op);
if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
{
x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
if (x)
return x;
}
 
x = gen_reg_rtx (mode);
emit_insn (gen_spu_convert (x, r));
return x;
}
 
static void
spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
{
HOST_WIDE_INT v = 0;
int lsbits;
/* Check the range of immediate operands. */
if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
{
int range = p - SPU_BTI_7;
 
if (!CONSTANT_P (op))
error ("%s expects an integer literal in the range [%d, %d].",
d->name,
spu_builtin_range[range].low, spu_builtin_range[range].high);
 
if (GET_CODE (op) == CONST
&& (GET_CODE (XEXP (op, 0)) == PLUS
|| GET_CODE (XEXP (op, 0)) == MINUS))
{
v = INTVAL (XEXP (XEXP (op, 0), 1));
op = XEXP (XEXP (op, 0), 0);
}
else if (GET_CODE (op) == CONST_INT)
v = INTVAL (op);
else if (GET_CODE (op) == CONST_VECTOR
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
v = INTVAL (CONST_VECTOR_ELT (op, 0));
 
/* The default for v is 0 which is valid in every range. */
if (v < spu_builtin_range[range].low
|| v > spu_builtin_range[range].high)
error ("%s expects an integer literal in the range [%d, %d]. ("
HOST_WIDE_INT_PRINT_DEC ")",
d->name,
spu_builtin_range[range].low, spu_builtin_range[range].high,
v);
 
switch (p)
{
case SPU_BTI_S10_4:
lsbits = 4;
break;
case SPU_BTI_U16_2:
/* This is only used in lqa, and stqa. Even though the insns
encode 16 bits of the address (all but the 2 least
significant), only 14 bits are used because it is masked to
be 16 byte aligned. */
lsbits = 4;
break;
case SPU_BTI_S16_2:
/* This is used for lqr and stqr. */
lsbits = 2;
break;
default:
lsbits = 0;
}
 
if (GET_CODE (op) == LABEL_REF
|| (GET_CODE (op) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (op))
|| (v & ((1 << lsbits) - 1)) != 0)
warning (0, "%d least significant bits of %s are ignored.", lsbits,
d->name);
}
}
 
 
static int
expand_builtin_args (struct spu_builtin_description *d, tree exp,
rtx target, rtx ops[])
{
enum insn_code icode = (enum insn_code) d->icode;
int i = 0, a;
 
/* Expand the arguments into rtl. */
 
if (d->parm[0] != SPU_BTI_VOID)
ops[i++] = target;
 
for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
{
tree arg = CALL_EXPR_ARG (exp, a);
if (arg == 0)
abort ();
ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
}
 
/* The insn pattern may have additional operands (SCRATCH).
Return the number of actual non-SCRATCH operands. */
gcc_assert (i <= insn_data[icode].n_operands);
return i;
}
 
static rtx
spu_expand_builtin_1 (struct spu_builtin_description *d,
tree exp, rtx target)
{
rtx pat;
rtx ops[8];
enum insn_code icode = (enum insn_code) d->icode;
enum machine_mode mode, tmode;
int i, p;
int n_operands;
tree return_type;
 
/* Set up ops[] with values from arglist. */
n_operands = expand_builtin_args (d, exp, target, ops);
 
/* Handle the target operand which must be operand 0. */
i = 0;
if (d->parm[0] != SPU_BTI_VOID)
{
 
/* We prefer the mode specified for the match_operand otherwise
use the mode from the builtin function prototype. */
tmode = insn_data[d->icode].operand[0].mode;
if (tmode == VOIDmode)
tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
 
/* Try to use target because not using it can lead to extra copies
and when we are using all of the registers extra copies leads
to extra spills. */
if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
ops[0] = target;
else
target = ops[0] = gen_reg_rtx (tmode);
 
if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
abort ();
 
i++;
}
 
if (d->fcode == SPU_MASK_FOR_LOAD)
{
enum machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
rtx addr, op, pat;
 
/* get addr */
arg = CALL_EXPR_ARG (exp, 0);
gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
addr = memory_address (mode, op);
 
/* negate addr */
op = gen_reg_rtx (GET_MODE (addr));
emit_insn (gen_rtx_SET (VOIDmode, op,
gen_rtx_NEG (GET_MODE (addr), addr)));
op = gen_rtx_MEM (mode, op);
 
pat = GEN_FCN (icode) (target, op);
if (!pat)
return 0;
emit_insn (pat);
return target;
}
 
/* Ignore align_hint, but still expand it's args in case they have
side effects. */
if (icode == CODE_FOR_spu_align_hint)
return 0;
 
/* Handle the rest of the operands. */
for (p = 1; i < n_operands; i++, p++)
{
if (insn_data[d->icode].operand[i].mode != VOIDmode)
mode = insn_data[d->icode].operand[i].mode;
else
mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
 
/* mode can be VOIDmode here for labels */
 
/* For specific intrinsics with an immediate operand, e.g.,
si_ai(), we sometimes need to convert the scalar argument to a
vector argument by splatting the scalar. */
if (VECTOR_MODE_P (mode)
&& (GET_CODE (ops[i]) == CONST_INT
|| GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
|| GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
{
if (GET_CODE (ops[i]) == CONST_INT)
ops[i] = spu_const (mode, INTVAL (ops[i]));
else
{
rtx reg = gen_reg_rtx (mode);
enum machine_mode imode = GET_MODE_INNER (mode);
if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
if (imode != GET_MODE (ops[i]))
ops[i] = convert_to_mode (imode, ops[i],
TYPE_UNSIGNED (spu_builtin_types
[d->parm[i]]));
emit_insn (gen_spu_splats (reg, ops[i]));
ops[i] = reg;
}
}
 
spu_check_builtin_parm (d, ops[i], d->parm[p]);
 
if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
ops[i] = spu_force_reg (mode, ops[i]);
}
 
switch (n_operands)
{
case 0:
pat = GEN_FCN (icode) (0);
break;
case 1:
pat = GEN_FCN (icode) (ops[0]);
break;
case 2:
pat = GEN_FCN (icode) (ops[0], ops[1]);
break;
case 3:
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
break;
case 4:
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
break;
case 5:
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
break;
case 6:
pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
break;
default:
abort ();
}
 
if (!pat)
abort ();
 
if (d->type == B_CALL || d->type == B_BISLED)
emit_call_insn (pat);
else if (d->type == B_JUMP)
{
emit_jump_insn (pat);
emit_barrier ();
}
else
emit_insn (pat);
 
return_type = spu_builtin_types[d->parm[0]];
if (d->parm[0] != SPU_BTI_VOID
&& GET_MODE (target) != TYPE_MODE (return_type))
{
/* target is the return value. It should always be the mode of
the builtin function prototype. */
target = spu_force_reg (TYPE_MODE (return_type), target);
}
 
return target;
}
 
rtx
spu_expand_builtin (tree exp,
rtx target,
rtx subtarget ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
struct spu_builtin_description *d;
 
if (fcode < NUM_SPU_BUILTINS)
{
d = &spu_builtins[fcode];
 
return spu_expand_builtin_1 (d, exp, target);
}
abort ();
}
 
/* Implement targetm.vectorize.builtin_mul_widen_even. */
static tree
spu_builtin_mul_widen_even (tree type)
{
switch (TYPE_MODE (type))
{
case V8HImode:
if (TYPE_UNSIGNED (type))
return spu_builtins[SPU_MULE_0].fndecl;
else
return spu_builtins[SPU_MULE_1].fndecl;
break;
default:
return NULL_TREE;
}
}
 
/* Implement targetm.vectorize.builtin_mul_widen_odd. */
static tree
spu_builtin_mul_widen_odd (tree type)
{
switch (TYPE_MODE (type))
{
case V8HImode:
if (TYPE_UNSIGNED (type))
return spu_builtins[SPU_MULO_1].fndecl;
else
return spu_builtins[SPU_MULO_0].fndecl;
break;
default:
return NULL_TREE;
}
}
 
/* Implement targetm.vectorize.builtin_mask_for_load. */
static tree
spu_builtin_mask_for_load (void)
{
struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
gcc_assert (d);
return d->fndecl;
}
 
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
spu_builtin_vectorization_cost (bool runtime_test)
{
/* If the branch of the runtime test is taken - i.e. - the vectorized
version is skipped - this incurs a misprediction cost (because the
vectorized version is expected to be the fall-through). So we subtract
the latency of a mispredicted branch from the costs that are incurred
when the vectorized version is executed. */
if (runtime_test)
return -19;
else
return 0;
}
 
/* Return true iff, data reference of TYPE can reach vector alignment (16)
after applying N number of iterations. This routine does not determine
how may iterations are required to reach desired alignment. */
 
static bool
spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
{
if (is_packed)
return false;
 
/* All other types are naturally aligned. */
return true;
}
 
/* Implement targetm.vectorize.builtin_vec_perm. */
tree
spu_builtin_vec_perm (tree type, tree *mask_element_type)
{
struct spu_builtin_description *d;
 
*mask_element_type = unsigned_char_type_node;
 
switch (TYPE_MODE (type))
{
case V16QImode:
if (TYPE_UNSIGNED (type))
d = &spu_builtins[SPU_SHUFFLE_0];
else
d = &spu_builtins[SPU_SHUFFLE_1];
break;
 
case V8HImode:
if (TYPE_UNSIGNED (type))
d = &spu_builtins[SPU_SHUFFLE_2];
else
d = &spu_builtins[SPU_SHUFFLE_3];
break;
 
case V4SImode:
if (TYPE_UNSIGNED (type))
d = &spu_builtins[SPU_SHUFFLE_4];
else
d = &spu_builtins[SPU_SHUFFLE_5];
break;
 
case V2DImode:
if (TYPE_UNSIGNED (type))
d = &spu_builtins[SPU_SHUFFLE_6];
else
d = &spu_builtins[SPU_SHUFFLE_7];
break;
 
case V4SFmode:
d = &spu_builtins[SPU_SHUFFLE_8];
break;
 
case V2DFmode:
d = &spu_builtins[SPU_SHUFFLE_9];
break;
 
default:
return NULL_TREE;
}
 
gcc_assert (d);
return d->fndecl;
}
 
/* Return the appropriate mode for a named address pointer. */
static enum machine_mode
spu_addr_space_pointer_mode (addr_space_t addrspace)
{
switch (addrspace)
{
case ADDR_SPACE_GENERIC:
return ptr_mode;
case ADDR_SPACE_EA:
return EAmode;
default:
gcc_unreachable ();
}
}
 
/* Return the appropriate mode for a named address address. */
static enum machine_mode
spu_addr_space_address_mode (addr_space_t addrspace)
{
switch (addrspace)
{
case ADDR_SPACE_GENERIC:
return Pmode;
case ADDR_SPACE_EA:
return EAmode;
default:
gcc_unreachable ();
}
}
 
/* Determine if one named address space is a subset of another. */
 
static bool
spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
{
gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
 
if (subset == superset)
return true;
 
/* If we have -mno-address-space-conversion, treat __ea and generic as not
being subsets but instead as disjoint address spaces. */
else if (!TARGET_ADDRESS_SPACE_CONVERSION)
return false;
 
else
return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
}
 
/* Convert from one address space to another. */
static rtx
spu_addr_space_convert (rtx op, tree from_type, tree to_type)
{
addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
 
gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
 
if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
{
rtx result, ls;
 
ls = gen_const_mem (DImode,
gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
set_mem_align (ls, 128);
 
result = gen_reg_rtx (Pmode);
ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
ls, const0_rtx, Pmode, 1);
 
emit_insn (gen_subsi3 (result, op, ls));
 
return result;
}
 
else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
{
rtx result, ls;
 
ls = gen_const_mem (DImode,
gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
set_mem_align (ls, 128);
 
result = gen_reg_rtx (EAmode);
ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
op = force_reg (Pmode, op);
ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
ls, const0_rtx, EAmode, 1);
op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
 
if (EAmode == SImode)
emit_insn (gen_addsi3 (result, op, ls));
else
emit_insn (gen_adddi3 (result, op, ls));
 
return result;
}
 
else
gcc_unreachable ();
}
 
 
/* Count the total number of instructions in each pipe and return the
maximum, which is used as the Minimum Iteration Interval (MII)
in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
-2 are instructions that can go in pipe0 or pipe1. */
static int
spu_sms_res_mii (struct ddg *g)
{
int i;
unsigned t[4] = {0, 0, 0, 0};
 
for (i = 0; i < g->num_nodes; i++)
{
rtx insn = g->nodes[i].insn;
int p = get_pipe (insn) + 2;
 
assert (p >= 0);
assert (p < 4);
 
t[p]++;
if (dump_file && INSN_P (insn))
fprintf (dump_file, "i%d %s %d %d\n",
INSN_UID (insn),
insn_data[INSN_CODE(insn)].name,
p, t[p]);
}
if (dump_file)
fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
 
return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
}
 
 
void
spu_init_expanders (void)
{
if (cfun)
{
rtx r0, r1;
/* HARD_FRAME_REGISTER is only 128 bit aligned when
frame_pointer_needed is true. We don't know that until we're
expanding the prologue. */
REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
 
/* A number of passes use LAST_VIRTUAL_REGISTER+1 and
LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
to be treated as aligned, so generate them here. */
r0 = gen_reg_rtx (SImode);
r1 = gen_reg_rtx (SImode);
mark_reg_pointer (r0, 128);
mark_reg_pointer (r1, 128);
gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
&& REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
}
}
 
static enum machine_mode
spu_libgcc_cmp_return_mode (void)
{
 
/* For SPU word mode is TI mode so it is better to use SImode
for compare returns. */
return SImode;
}
 
static enum machine_mode
spu_libgcc_shift_count_mode (void)
{
/* For SPU word mode is TI mode so it is better to use SImode
for shift counts. */
return SImode;
}
 
/* An early place to adjust some flags after GCC has finished processing
* them. */
static void
asm_file_start (void)
{
/* Variable tracking should be run after all optimizations which
change order of insns. It also needs a valid CFG. */
spu_flag_var_tracking = flag_var_tracking;
flag_var_tracking = 0;
 
default_file_start ();
}
 
/* Implement targetm.section_type_flags. */
static unsigned int
spu_section_type_flags (tree decl, const char *name, int reloc)
{
/* .toe needs to have type @nobits. */
if (strcmp (name, ".toe") == 0)
return SECTION_BSS;
/* Don't load _ea into the current address space. */
if (strcmp (name, "._ea") == 0)
return SECTION_WRITE | SECTION_DEBUG;
return default_section_type_flags (decl, name, reloc);
}
 
/* Implement targetm.select_section. */
static section *
spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
{
/* Variables and constants defined in the __ea address space
go into a special section named "._ea". */
if (TREE_TYPE (decl) != error_mark_node
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
{
/* We might get called with string constants, but get_named_section
doesn't like them as they are not DECLs. Also, we need to set
flags in that case. */
if (!DECL_P (decl))
return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
 
return get_named_section (decl, "._ea", reloc);
}
 
return default_elf_select_section (decl, reloc, align);
}
 
/* Implement targetm.unique_section. */
static void
spu_unique_section (tree decl, int reloc)
{
/* We don't support unique section names in the __ea address
space for now. */
if (TREE_TYPE (decl) != error_mark_node
&& TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
return;
 
default_unique_section (decl, reloc);
}
 
/* Generate a constant or register which contains 2^SCALE. We assume
the result is valid for MODE. Currently, MODE must be V4SFmode and
SCALE must be SImode. */
rtx
spu_gen_exp2 (enum machine_mode mode, rtx scale)
{
gcc_assert (mode == V4SFmode);
gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
if (GET_CODE (scale) != CONST_INT)
{
/* unsigned int exp = (127 + scale) << 23;
__vector float m = (__vector float) spu_splats (exp); */
rtx reg = force_reg (SImode, scale);
rtx exp = gen_reg_rtx (SImode);
rtx mul = gen_reg_rtx (mode);
emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
return mul;
}
else
{
HOST_WIDE_INT exp = 127 + INTVAL (scale);
unsigned char arr[16];
arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
arr[2] = arr[6] = arr[10] = arr[14] = 0;
arr[3] = arr[7] = arr[11] = arr[15] = 0;
return array_to_constant (mode, arr);
}
}
 
/* After reload, just change the convert into a move instruction
or a dead instruction. */
void
spu_split_convert (rtx ops[])
{
if (REGNO (ops[0]) == REGNO (ops[1]))
emit_note (NOTE_INSN_DELETED);
else
{
/* Use TImode always as this might help hard reg copyprop. */
rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
emit_insn (gen_move_insn (op0, op1));
}
}
 
void
spu_function_profiler (FILE * file, int labelno)
{
fprintf (file, "# profile\n");
fprintf (file, "brsl $75, _mcount\n");
}
 
#include "gt-spu.h"
spu.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu_cache.h =================================================================== --- spu_cache.h (nonexistent) +++ spu_cache.h (revision 384) @@ -0,0 +1,39 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _SPU_CACHE_H +#define _SPU_CACHE_H + +void *__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty); +void *__cache_fetch (__ea void *ea); +void __cache_evict (__ea void *ea); +void __cache_flush (void); +void __cache_touch (__ea void *ea); + +#define cache_fetch_dirty(_ea, _n_bytes_dirty) \ + __cache_fetch_dirty(_ea, _n_bytes_dirty) + +#define cache_fetch(_ea) __cache_fetch(_ea) +#define cache_touch(_ea) __cache_touch(_ea) +#define cache_evict(_ea) __cache_evict(_ea) +#define cache_flush() __cache_flush() + +#endif
spu_cache.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu-protos.h =================================================================== --- spu-protos.h (nonexistent) +++ spu-protos.h (revision 384) @@ -0,0 +1,97 @@ +/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef _SPU_PROTOS_ +#define _SPU_PROTOS_ + +#include "rtl.h" + +extern void spu_cpu_cpp_builtins (struct cpp_reader * pfile); +extern void builtin_define_std (const char *); +extern void spu_optimization_options (int level, int size); +extern void spu_override_options (void); +extern void spu_c_common_override_options (void); +extern int valid_subreg (rtx op); +extern void spu_expand_extv (rtx * ops, int unsignedp); +extern void spu_expand_insv (rtx * ops); +extern int spu_expand_block_move (rtx * ops); +extern void spu_emit_branch_or_set (int is_set, rtx cmp, rtx * operands); +extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); +extern HOST_WIDE_INT const_double_to_hwint (rtx x); +extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v); +extern void print_operand_address (FILE * file, register rtx addr); +extern void print_operand (FILE * file, rtx x, int code); +extern int spu_split_immediate (rtx * ops); +extern int spu_saved_regs_size (void); +extern int direct_return (void); +extern void spu_expand_prologue (void); +extern void spu_expand_epilogue (unsigned char sibcall_p); +extern rtx spu_return_addr (int count, rtx frame); +extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val); +extern rtx spu_const_from_ints (enum machine_mode mode, + int a, int b, int c, int d); +extern struct rtx_def *spu_float_const (const char *string, + enum machine_mode mode); +extern int immediate_load_p (rtx op, enum machine_mode mode); +extern int logical_immediate_p (rtx op, enum machine_mode mode); +extern int iohl_immediate_p (rtx op, enum machine_mode mode); +extern int arith_immediate_p (rtx op, enum machine_mode mode, + HOST_WIDE_INT low, HOST_WIDE_INT high); +extern bool exp2_immediate_p (rtx op, enum machine_mode mode, int low, + int high); +extern int spu_constant_address_p (rtx x); +extern int spu_legitimate_constant_p (rtx x); +extern int spu_initial_elimination_offset (int from, int to); +extern rtx spu_function_value (const_tree type, const_tree func); +extern rtx spu_function_arg (int cum, enum machine_mode mode, tree type, + int named); +extern void spu_setup_incoming_varargs (int *cum, enum machine_mode mode, + tree type, int *pretend_size, + int no_rtl); +extern void spu_conditional_register_usage (void); +extern int spu_expand_mov (rtx * ops, enum machine_mode mode); +extern int spu_split_load (rtx * ops); +extern int spu_split_store (rtx * ops); +extern int fsmbi_const_p (rtx x); +extern int cpat_const_p (rtx x, enum machine_mode mode); +extern rtx gen_cpat_const (rtx * ops); +extern void constant_to_array (enum machine_mode mode, rtx x, + unsigned char *arr); +extern rtx array_to_constant (enum machine_mode mode, const unsigned char *arr); +extern rtx spu_gen_exp2 (enum machine_mode mode, rtx x); +extern void spu_allocate_stack (rtx op0, rtx op1); +extern void spu_restore_stack_nonlocal (rtx op0, rtx op1); +extern void spu_restore_stack_block (rtx op0, rtx op1); +extern rtx spu_gen_subreg (enum machine_mode mode, rtx x); +extern int spu_safe_dma(HOST_WIDE_INT channel); +extern void spu_builtin_splats (rtx ops[]); +extern void spu_builtin_extract (rtx ops[]); +extern void spu_builtin_insert (rtx ops[]); +extern void spu_builtin_promote (rtx ops[]); +extern void spu_expand_sign_extend (rtx ops[]); +extern void spu_expand_vector_init (rtx target, rtx vals); +extern void spu_init_expanders (void); +extern void spu_split_convert (rtx *); +extern void spu_function_profiler (FILE *, int); + +/* spu-c.c */ +extern tree spu_resolve_overloaded_builtin (location_t, tree fndecl, + void *fnargs); +extern rtx spu_expand_builtin (tree exp, rtx target, rtx subtarget, + enum machine_mode mode, int ignore); +extern rtx spu_expand_builtin (tree, rtx, rtx, enum machine_mode, int); + +#endif
spu-protos.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: predicates.md =================================================================== --- predicates.md (nonexistent) +++ predicates.md (revision 384) @@ -0,0 +1,122 @@ +;; Predicate definitions for CELL SPU +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return 1 if operand is constant zero of its mode +(define_predicate "const_zero_operand" + (and (match_code "const_int,const,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +(define_predicate "const_one_operand" + (and (match_code "const_int,const,const_double,const_vector") + (match_test "op == CONST1_RTX (mode)"))) + +(define_predicate "spu_reg_operand" + (and (match_operand 0 "register_operand") + (ior (not (match_code "subreg")) + (match_test "valid_subreg (op)")))) + +(define_predicate "spu_nonimm_operand" + (and (match_operand 0 "nonimmediate_operand") + (ior (not (match_code "subreg")) + (match_test "valid_subreg (op)")))) + +(define_predicate "spu_nonmem_operand" + (and (match_operand 0 "nonmemory_operand") + (ior (not (match_code "subreg")) + (match_test "valid_subreg (op)")))) + +(define_predicate "spu_mov_operand" + (ior (match_operand 0 "memory_operand") + (match_operand 0 "spu_nonmem_operand"))) + +(define_predicate "spu_dest_operand" + (ior (match_operand 0 "memory_operand") + (match_operand 0 "spu_reg_operand"))) + +(define_predicate "call_operand" + (and (match_code "mem") + (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op)) + || (satisfies_constraint_R (op) + && REGNO (XEXP (op, 0)) != FRAME_POINTER_REGNUM + && REGNO (XEXP (op, 0)) != ARG_POINTER_REGNUM + && (REGNO (XEXP (op, 0)) < FIRST_PSEUDO_REGISTER + || REGNO (XEXP (op, 0)) > LAST_VIRTUAL_REGISTER))"))) + +(define_predicate "vec_imm_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "spu_legitimate_constant_p (op)"))) + +(define_predicate "spu_arith_operand" + (match_code "reg,subreg,const_int,const_vector") + { + if (spu_reg_operand (op, mode)) + return 1; + if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_VECTOR) + return arith_immediate_p (op, mode, -0x200, 0x1ff); + return 0; + }) + +(define_predicate "spu_logical_operand" + (match_code "reg,subreg,const_int,const_double,const_vector") + { + if (spu_reg_operand (op, mode)) + return 1; + if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE + || GET_CODE (op) == CONST_VECTOR) + return logical_immediate_p (op, mode); + return 0; + }) + +(define_predicate "spu_ior_operand" + (match_code "reg,subreg,const_int,const_double,const_vector") + { + if (spu_reg_operand (op, mode)) + return 1; + if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE + || GET_CODE (op) == CONST_VECTOR) + return logical_immediate_p (op, mode) + || iohl_immediate_p (op, mode); + return 0; + }) + +(define_predicate "imm_K_operand" + (and (match_code "const_int") + (match_test "arith_immediate_p (op, mode, -0x200, 0x1ff)"))) + +;; Return 1 if OP is a comparison operation that is valid for a branch insn. +;; We only check the opcode against the mode of the register value here. +(define_predicate "branch_comparison_operator" + (and (match_code "eq,ne") + (ior (match_test "GET_MODE (XEXP (op, 0)) == HImode") + (match_test "GET_MODE (XEXP (op, 0)) == SImode")))) + +(define_predicate "spu_inv_exp2_operand" + (and (match_code "const_double,const_vector") + (and (match_operand 0 "immediate_operand") + (match_test "exp2_immediate_p (op, mode, -126, 0)")))) + +(define_predicate "spu_exp2_operand" + (and (match_code "const_double,const_vector") + (and (match_operand 0 "immediate_operand") + (match_test "exp2_immediate_p (op, mode, 0, 127)")))) + +(define_predicate "shiftrt_operator" + (match_code "lshiftrt,ashiftrt")) + +(define_predicate "extend_operator" + (match_code "sign_extend,zero_extend")) + Index: divmodti4.c =================================================================== --- divmodti4.c (nonexistent) +++ divmodti4.c (revision 384) @@ -0,0 +1,166 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include + +typedef unsigned int UTItype __attribute__ ((mode (TI))); +typedef int TItype __attribute__ ((mode (TI))); +TItype __divti3 (TItype u, TItype v); +TItype __modti3 (TItype u, TItype v); +UTItype __udivti3 (UTItype u, UTItype v); +UTItype __umodti3 (UTItype u, UTItype v); +UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w); + +inline static unsigned int +count_leading_zeros (UTItype x) +{ + qword c = si_clz (*(qword *) & x); + qword cmp0 = si_cgti (c, 31); + qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4)); + qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8)); + qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4))); + s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8))); + s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12))); + return si_to_uint (s); +} + +/* Based on implementation of udivmodsi4, which is essentially + * an optimized version of gcc/config/udivmodsi4.c + clz %7,%2 + clz %4,%1 + il %5,1 + fsmbi %0,0 + sf %7,%4,%7 + ori %3,%1,0 + shl %5,%5,%7 + shl %4,%2,%7 +1: or %8,%0,%5 + rotmi %5,%5,-1 + clgt %6,%4,%3 + sf %7,%4,%3 + rotmi %4,%4,-1 + selb %0,%8,%0,%6 + selb %3,%7,%3,%6 +3: brnz %5,1b + */ + +UTItype +__udivmodti4 (UTItype num, UTItype den, UTItype * rp) +{ + qword shift = + si_from_uint (count_leading_zeros (den) - count_leading_zeros (num)); + qword n0 = *(qword *) & num; + qword d0 = *(qword *) & den; + qword bit = si_andi (si_fsmbi (1), 1); + qword r0 = si_il (0); + qword m1 = si_fsmbi (0x000f); + qword mask, r1, n1; + + d0 = si_shlqbybi (si_shlqbi (d0, shift), shift); + bit = si_shlqbybi (si_shlqbi (bit, shift), shift); + + do + { + r1 = si_or (r0, bit); + + // n1 = n0 - d0 in TImode + n1 = si_bg (d0, n0); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_bgx (d0, n0, n1); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_bgx (d0, n0, n1); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_sfx (d0, n0, n1); + + mask = si_fsm (si_cgti (n1, -1)); + r0 = si_selb (r0, r1, mask); + n0 = si_selb (n0, n1, mask); + bit = si_rotqmbii (bit, -1); + d0 = si_rotqmbii (d0, -1); + } + while (si_to_uint (si_orx (bit))); + if (rp) + *rp = *(UTItype *) & n0; + return *(UTItype *) & r0; +} + +UTItype +__udivti3 (UTItype n, UTItype d) +{ + return __udivmodti4 (n, d, (UTItype *)0); +} + +UTItype +__umodti3 (UTItype n, UTItype d) +{ + UTItype w; + __udivmodti4 (n, d, &w); + return w; +} + +TItype +__divti3 (TItype n, TItype d) +{ + int c = 0; + TItype w; + + if (n < 0) + { + c = ~c; + n = -n; + } + if (d < 0) + { + c = ~c; + d = -d; + } + + w = __udivmodti4 (n, d, (UTItype *)0); + if (c) + w = -w; + return w; +} + +TItype +__modti3 (TItype n, TItype d) +{ + int c = 0; + TItype w; + + if (n < 0) + { + c = ~c; + n = -n; + } + if (d < 0) + { + c = ~c; + d = -d; + } + + __udivmodti4 (n, d, (UTItype *) &w); + if (c) + w = -w; + return w; +}
divmodti4.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: mfc_tag_table.c =================================================================== --- mfc_tag_table.c (nonexistent) +++ mfc_tag_table.c (revision 384) @@ -0,0 +1,39 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* The free tag table used by the MFC tag manager, with tag0 + reserved for the overlay manager. */ +__vector unsigned int +__mfc_tag_table = (__vector unsigned int) { 0x7FFFFFFF, -1, -1, -1 }; + +/* Arrange to release tag0 if overlays are not present. */ +static void __mfc_tag_init (void) __attribute__ ((constructor)); + +static void +__mfc_tag_init (void) +{ + extern void _ovly_table __attribute__ ((weak)); + + if (&_ovly_table == 0) + __mfc_tag_table = (__vector unsigned int) { -1, -1, -1, -1 }; +}
mfc_tag_table.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: cachemgr.c =================================================================== --- cachemgr.c (nonexistent) +++ cachemgr.c (revision 384) @@ -0,0 +1,438 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include +#include +#include +#include + +extern unsigned long long __ea_local_store; +extern char __cache_tag_array_size; + +#define LINE_SIZE 128 +#define TAG_MASK (LINE_SIZE - 1) + +#define WAYS 4 +#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE) + +#define CACHE_LINES ((int) &__cache_tag_array_size / \ + sizeof (struct __cache_tag_array) * WAYS) + +struct __cache_tag_array +{ + unsigned int tag_lo[WAYS]; + unsigned int tag_hi[WAYS]; + void *base[WAYS]; + int reserved[WAYS]; + vector unsigned short dirty_bits[WAYS]; +}; + +extern struct __cache_tag_array __cache_tag_array[]; +extern char __cache[]; + +/* In order to make the code seem a little cleaner, and to avoid having + 64/32 bit ifdefs all over the place, we use macros. */ + +#ifdef __EA64__ +typedef unsigned long long addr; + +#define CHECK_TAG(_entry, _way, _tag) \ + ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \ + && (_entry)->tag_hi[(_way)] == ((_tag) >> 32)) + +#define GET_TAG(_entry, _way) \ + ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \ + | (unsigned long long)(_entry)->tag_lo[(_way)]) + +#define SET_TAG(_entry, _way, _tag) \ + (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \ + (_entry)->tag_hi[(_way)] = (_tag) >> 32 + +#else /*__EA32__*/ +typedef unsigned long addr; + +#define CHECK_TAG(_entry, _way, _tag) \ + ((_entry)->tag_lo[(_way)] == (_tag)) + +#define GET_TAG(_entry, _way) \ + ((_entry)->tag_lo[(_way)]) + +#define SET_TAG(_entry, _way, _tag) \ + (_entry)->tag_lo[(_way)] = (_tag) + +#endif + +/* In GET_ENTRY, we cast away the high 32 bits, + as the tag is only in the low 32. */ + +#define GET_ENTRY(_addr) \ + ((struct __cache_tag_array *) \ + si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \ + si_from_uint (SET_MASK)), \ + si_from_uint ((unsigned int) __cache_tag_array)))) + +#define GET_CACHE_LINE(_addr, _way) \ + ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE)); + +#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec)))) +#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1) +#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1) + +#define LS_FLAG 0x80000000 +#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG) +#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG) +#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG) + +static int dma_tag = 32; + +static void +__cache_evict_entry (struct __cache_tag_array *entry, int way) +{ + addr tag = GET_TAG (entry, way); + + if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way)) + { +#ifdef NONATOMIC + /* Non-atomic writes. */ + unsigned int oldmask, mach_stat; + char *line = ((void *) 0); + + /* Enter critical section. */ + mach_stat = spu_readch (SPU_RdMachStat); + spu_idisable (); + + /* Issue DMA request. */ + line = GET_CACHE_LINE (entry->tag_lo[way], way); + mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0); + + /* Wait for DMA completion. */ + oldmask = mfc_read_tag_mask (); + mfc_write_tag_mask (1 << dma_tag); + mfc_read_tag_status_all (); + mfc_write_tag_mask (oldmask); + + /* Leave critical section. */ + if (__builtin_expect (mach_stat & 1, 0)) + spu_ienable (); +#else + /* Allocate a buffer large enough that we know it has 128 bytes + that are 128 byte aligned (for DMA). */ + + char buffer[LINE_SIZE + 127]; + qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127); + qword *line = GET_CACHE_LINE (entry->tag_lo[way], way); + qword bits; + unsigned int mach_stat; + + /* Enter critical section. */ + mach_stat = spu_readch (SPU_RdMachStat); + spu_idisable (); + + do + { + /* We atomically read the current memory into a buffer + modify the dirty bytes in the buffer, and write it + back. If writeback fails, loop and try again. */ + + mfc_getllar (buf_ptr, tag, 0, 0); + mfc_read_atomic_status (); + + /* The method we're using to write 16 dirty bytes into + the buffer at a time uses fsmb which in turn uses + the least significant 16 bits of word 0, so we + load the bits and rotate so that the first bit of + the bitmap is in the first bit that fsmb will use. */ + + bits = (qword) entry->dirty_bits[way]; + bits = si_rotqbyi (bits, -2); + + /* Si_fsmb creates the mask of dirty bytes. + Use selb to nab the appropriate bits. */ + buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits)); + + /* Rotate to next 16 byte section of cache. */ + bits = si_rotqbyi (bits, 2); + + buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits)); + bits = si_rotqbyi (bits, 2); + + mfc_putllc (buf_ptr, tag, 0, 0); + } + while (mfc_read_atomic_status ()); + + /* Leave critical section. */ + if (__builtin_expect (mach_stat & 1, 0)) + spu_ienable (); +#endif + } + + /* In any case, marking the lo tag with 1 which denotes empty. */ + SET_EMPTY (entry, way); + entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0); +} + +void +__cache_evict (__ea void *ea) +{ + addr tag = (addr) ea & ~TAG_MASK; + struct __cache_tag_array *entry = GET_ENTRY (ea); + int i = 0; + + /* Cycles through all the possible ways an address could be at + and evicts the way if found. */ + + for (i = 0; i < WAYS; i++) + if (CHECK_TAG (entry, i, tag)) + __cache_evict_entry (entry, i); +} + +static void * +__cache_fill (int way, addr tag) +{ + unsigned int oldmask, mach_stat; + char *line = ((void *) 0); + + /* Reserve our DMA tag. */ + if (dma_tag == 32) + dma_tag = mfc_tag_reserve (); + + /* Enter critical section. */ + mach_stat = spu_readch (SPU_RdMachStat); + spu_idisable (); + + /* Issue DMA request. */ + line = GET_CACHE_LINE (tag, way); + mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0); + + /* Wait for DMA completion. */ + oldmask = mfc_read_tag_mask (); + mfc_write_tag_mask (1 << dma_tag); + mfc_read_tag_status_all (); + mfc_write_tag_mask (oldmask); + + /* Leave critical section. */ + if (__builtin_expect (mach_stat & 1, 0)) + spu_ienable (); + + return (void *) line; +} + +static void +__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way) +{ + + addr tag = (addr) ea & ~TAG_MASK; + unsigned int lru = 0; + int i = 0; + int idx = 0; + + /* If way > 4, then there are no empty slots, so we must evict + the least recently used entry. */ + if (way >= 4) + { + for (i = 0; i < WAYS; i++) + { + if (GET_LRU (entry, i) > lru) + { + lru = GET_LRU (entry, i); + idx = i; + } + } + __cache_evict_entry (entry, idx); + way = idx; + } + + /* Set the empty entry's tag and fill it's cache line. */ + + SET_TAG (entry, way, tag); + entry->reserved[way] = 0; + + /* Check if the address is just an effective address within the + SPU's local store. */ + + /* Because the LS is not 256k aligned, we can't do a nice and mask + here to compare, so we must check the whole range. */ + + if ((addr) ea >= (addr) __ea_local_store + && (addr) ea < (addr) (__ea_local_store + 0x40000)) + { + SET_IS_LS (entry, way); + entry->base[way] = + (void *) ((unsigned int) ((addr) ea - + (addr) __ea_local_store) & ~0x7f); + } + else + { + entry->base[way] = __cache_fill (way, tag); + } +} + +void * +__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty) +{ +#ifdef __EA64__ + unsigned int tag_hi; + qword etag_hi; +#endif + unsigned int tag_lo; + struct __cache_tag_array *entry; + + qword etag_lo; + qword equal; + qword bit_mask; + qword way; + + /* This first chunk, we merely fill the pointer and tag. */ + + entry = GET_ENTRY (ea); + +#ifndef __EA64__ + tag_lo = + si_to_uint (si_andc + (si_shufb + (si_from_uint ((addr) ea), si_from_uint (0), + si_from_uint (0x00010203)), si_from_uint (TAG_MASK))); +#else + tag_lo = + si_to_uint (si_andc + (si_shufb + (si_from_ullong ((addr) ea), si_from_uint (0), + si_from_uint (0x04050607)), si_from_uint (TAG_MASK))); + + tag_hi = + si_to_uint (si_shufb + (si_from_ullong ((addr) ea), si_from_uint (0), + si_from_uint (0x00010203))); +#endif + + /* Increment LRU in reserved bytes. */ + si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1), + si_from_ptr (entry), 48); + +missreturn: + /* Check if the entry's lo_tag is equal to the address' lo_tag. */ + etag_lo = si_lqd (si_from_ptr (entry), 0); + equal = si_ceq (etag_lo, si_from_uint (tag_lo)); +#ifdef __EA64__ + /* And the high tag too. */ + etag_hi = si_lqd (si_from_ptr (entry), 16); + equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi)))); +#endif + + if ((si_to_uint (si_orx (equal)) == 0)) + goto misshandler; + + if (n_bytes_dirty) + { + /* way = 0x40,0x50,0x60,0x70 for each way, which is also the + offset of the appropriate dirty bits. */ + way = si_shli (si_clz (si_gbb (equal)), 2); + + /* To create the bit_mask, we set it to all 1s (uint -1), then we + shift it over (128 - n_bytes_dirty) times. */ + + bit_mask = si_from_uint (-1); + + bit_mask = + si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8)); + + bit_mask = + si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8)); + + /* Rotate it around to the correct offset. */ + bit_mask = + si_rotqby (bit_mask, + si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8)); + + bit_mask = + si_rotqbi (bit_mask, + si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8)); + + /* Update the dirty bits. */ + si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask), + si_from_ptr (entry), way); + }; + + /* We've definitely found the right entry, set LRU (reserved) to 0 + maintaining the LS flag (MSB). */ + + si_stqd (si_andc + (si_lqd (si_from_ptr (entry), 48), + si_and (equal, si_from_uint (~(LS_FLAG)))), + si_from_ptr (entry), 48); + + return (void *) + si_to_uint (si_a + (si_orx + (si_and (si_lqd (si_from_ptr (entry), 32), equal)), + si_from_uint (((unsigned int) (addr) ea) & TAG_MASK))); + +misshandler: + equal = si_ceqi (etag_lo, 1); + __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2); + goto missreturn; +} + +void * +__cache_fetch (__ea void *ea) +{ + return __cache_fetch_dirty (ea, 0); +} + +void +__cache_touch (__ea void *ea __attribute__ ((unused))) +{ + /* NO-OP for now. */ +} + +void __cache_flush (void) __attribute__ ((destructor)); +void +__cache_flush (void) +{ + struct __cache_tag_array *entry = __cache_tag_array; + unsigned int i; + int j; + + /* Cycle through each cache entry and evict all used ways. */ + + for (i = 0; i < CACHE_LINES / WAYS; i++) + { + for (j = 0; j < WAYS; j++) + if (!CHECK_EMPTY (entry, j)) + __cache_evict_entry (entry, j); + + entry++; + } +}
cachemgr.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu-builtins.def =================================================================== --- spu-builtins.def (nonexistent) +++ spu-builtins.def (revision 384) @@ -0,0 +1,781 @@ +/* Definitions of builtin functions for the Synergistic Processing Unit (SPU). */ +/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +/* The first argument to these macros is the return type of the builtin, + * the rest are arguments of the builtin. */ +#define _A1(a) {a, SPU_BTI_END_OF_PARAMS} +#define _A2(a,b) {a, b, SPU_BTI_END_OF_PARAMS} +#define _A3(a,b,c) {a, b, c, SPU_BTI_END_OF_PARAMS} +#define _A4(a,b,c,d) {a, b, c, d, SPU_BTI_END_OF_PARAMS} + +/* definitions to support si intrinsic functions: (These and other builtin + * definitions must precede definitions of the overloaded generic intrinsics */ + +DEF_BUILTIN (SI_LQD, CODE_FOR_spu_lqd, "si_lqd", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4)) +DEF_BUILTIN (SI_LQX, CODE_FOR_spu_lqx, "si_lqx", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_LQA, CODE_FOR_spu_lqa, "si_lqa", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_U16_2)) +DEF_BUILTIN (SI_LQR, CODE_FOR_spu_lqr, "si_lqr", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_S16_2)) +DEF_BUILTIN (SI_STQD, CODE_FOR_spu_stqd, "si_stqd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4)) +DEF_BUILTIN (SI_STQX, CODE_FOR_spu_stqx, "si_stqx", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_STQA, CODE_FOR_spu_stqa, "si_stqa", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_U16_2)) +DEF_BUILTIN (SI_STQR, CODE_FOR_spu_stqr, "si_stqr", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_S16_2)) +DEF_BUILTIN (SI_CBD, CODE_FOR_spu_cbx, "si_cbd", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7)) +DEF_BUILTIN (SI_CBX, CODE_FOR_spu_cbx, "si_cbx", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CHD, CODE_FOR_spu_chx, "si_chd", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7)) +DEF_BUILTIN (SI_CHX, CODE_FOR_spu_chx, "si_chx", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CWD, CODE_FOR_spu_cwx, "si_cwd", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7)) +DEF_BUILTIN (SI_CWX, CODE_FOR_spu_cwx, "si_cwx", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CDD, CODE_FOR_spu_cdx, "si_cdd", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7)) +DEF_BUILTIN (SI_CDX, CODE_FOR_spu_cdx, "si_cdx", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ILH, CODE_FOR_movv8hi, "si_ilh", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_16)) +DEF_BUILTIN (SI_ILHU, CODE_FOR_spu_ilhu, "si_ilhu", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_16)) +DEF_BUILTIN (SI_IL, CODE_FOR_movv4si, "si_il", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_S16)) +DEF_BUILTIN (SI_ILA, CODE_FOR_movv4si, "si_ila", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_U18)) +DEF_BUILTIN (SI_IOHL, CODE_FOR_iorv4si3, "si_iohl", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U16)) +DEF_BUILTIN (SI_FSMBI, CODE_FOR_spu_fsmb, "si_fsmbi", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_16)) +DEF_BUILTIN (SI_AH, CODE_FOR_addv8hi3, "si_ah", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_AHI, CODE_FOR_addv8hi3, "si_ahi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_A, CODE_FOR_addv4si3, "si_a", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_AI, CODE_FOR_addv4si3, "si_ai", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ADDX, CODE_FOR_addx_v4si, "si_addx", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CG, CODE_FOR_cg_v4si, "si_cg", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CGX, CODE_FOR_cgx_v4si, "si_cgx", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SFH, CODE_FOR_spu_sfh, "si_sfh", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SFHI, CODE_FOR_spu_sfh, "si_sfhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_SF, CODE_FOR_spu_sf, "si_sf", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SFI, CODE_FOR_spu_sf, "si_sfi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_SFX, CODE_FOR_spu_sfx, "si_sfx", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_BG, CODE_FOR_spu_bg, "si_bg", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_BGX, CODE_FOR_spu_bgx, "si_bgx", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPY, CODE_FOR_spu_mpy, "si_mpy", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYU, CODE_FOR_spu_mpyu, "si_mpyu", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYI, CODE_FOR_spu_mpy, "si_mpyi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_MPYUI, CODE_FOR_spu_mpyu, "si_mpyui", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_MPYA, CODE_FOR_spu_mpya, "si_mpya", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYH, CODE_FOR_spu_mpyh, "si_mpyh", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYS, CODE_FOR_spu_mpys, "si_mpys", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYHH, CODE_FOR_spu_mpyhh, "si_mpyhh", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYHHU, CODE_FOR_spu_mpyhhu, "si_mpyhhu", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYHHA, CODE_FOR_spu_mpyhha, "si_mpyhha", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_MPYHHAU, CODE_FOR_spu_mpyhhau, "si_mpyhhau", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CLZ, CODE_FOR_clzv4si2, "si_clz", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CNTB, CODE_FOR_cntb_v16qi, "si_cntb", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FSMB, CODE_FOR_spu_fsmb, "si_fsmb", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FSMH, CODE_FOR_spu_fsmh, "si_fsmh", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FSM, CODE_FOR_spu_fsm, "si_fsm", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_GBB, CODE_FOR_spu_gbb, "si_gbb", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_GBH, CODE_FOR_spu_gbh, "si_gbh", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_GB, CODE_FOR_spu_gb, "si_gb", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_AVGB, CODE_FOR_spu_avgb, "si_avgb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ABSDB, CODE_FOR_spu_absdb, "si_absdb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SUMB, CODE_FOR_spu_sumb, "si_sumb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_XSBH, CODE_FOR_spu_xsbh, "si_xsbh", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_XSHW, CODE_FOR_spu_xshw, "si_xshw", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_XSWD, CODE_FOR_spu_xswd, "si_xswd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_AND, CODE_FOR_andv16qi3, "si_and", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ANDC, CODE_FOR_andc_v16qi, "si_andc", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ANDBI, CODE_FOR_andv16qi3, "si_andbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ANDHI, CODE_FOR_andv8hi3, "si_andhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ANDI, CODE_FOR_andv4si3, "si_andi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_OR, CODE_FOR_iorv16qi3, "si_or", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ORC, CODE_FOR_orc_v16qi, "si_orc", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ORBI, CODE_FOR_iorv16qi3, "si_orbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ORHI, CODE_FOR_iorv8hi3, "si_orhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ORI, CODE_FOR_iorv4si3, "si_ori", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_ORX, CODE_FOR_spu_orx, "si_orx", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_XOR, CODE_FOR_xorv16qi3, "si_xor", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_XORBI, CODE_FOR_xorv16qi3, "si_xorbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_XORHI, CODE_FOR_xorv8hi3, "si_xorhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_XORI, CODE_FOR_xorv4si3, "si_xori", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_NAND, CODE_FOR_nand_v16qi, "si_nand", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_NOR, CODE_FOR_nor_v16qi, "si_nor", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_EQV, CODE_FOR_eqv_v16qi, "si_eqv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SELB, CODE_FOR_selb, "si_selb", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHUFB, CODE_FOR_shufb, "si_shufb", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHLH, CODE_FOR_vashlv8hi3, "si_shlh", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHLHI, CODE_FOR_vashlv8hi3, "si_shlhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_SHL, CODE_FOR_vashlv4si3, "si_shl", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHLI, CODE_FOR_vashlv4si3, "si_shli", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_SHLQBI, CODE_FOR_shlqbi_ti, "si_shlqbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHLQBII, CODE_FOR_shlqbi_ti, "si_shlqbii", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_SHLQBY, CODE_FOR_shlqby_ti, "si_shlqby", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_SHLQBYI, CODE_FOR_shlqby_ti, "si_shlqbyi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_SHLQBYBI, CODE_FOR_shlqbybi_ti, "si_shlqbybi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTH, CODE_FOR_vrotlv8hi3, "si_roth", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTHI, CODE_FOR_vrotlv8hi3, "si_rothi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROT, CODE_FOR_vrotlv4si3, "si_rot", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTI, CODE_FOR_vrotlv4si3, "si_roti", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTQBY, CODE_FOR_rotqby_ti, "si_rotqby", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTQBYI, CODE_FOR_rotqby_ti, "si_rotqbyi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTQBYBI, CODE_FOR_rotqbybi_ti, "si_rotqbybi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTQBI, CODE_FOR_rotqbi_ti, "si_rotqbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTQBII, CODE_FOR_rotqbi_ti, "si_rotqbii", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTHM, CODE_FOR_rotm_v8hi, "si_rothm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTHMI, CODE_FOR_rotm_v8hi, "si_rothmi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTM, CODE_FOR_rotm_v4si, "si_rotm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTMI, CODE_FOR_rotm_v4si, "si_rotmi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTQMBY, CODE_FOR_rotqmby_ti, "si_rotqmby", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTQMBYI, CODE_FOR_rotqmby_ti, "si_rotqmbyi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTQMBI, CODE_FOR_rotqmbi_ti, "si_rotqmbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTQMBII, CODE_FOR_rotqmbi_ti, "si_rotqmbii", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTQMBYBI, CODE_FOR_rotqmbybi_ti, "si_rotqmbybi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTMAH, CODE_FOR_rotma_v8hi, "si_rotmah", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTMAHI, CODE_FOR_rotma_v8hi, "si_rotmahi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_ROTMA, CODE_FOR_rotma_v4si, "si_rotma", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_ROTMAI, CODE_FOR_rotma_v4si, "si_rotmai", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7)) +DEF_BUILTIN (SI_HEQ, CODE_FOR_spu_heq, "si_heq", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_HEQI, CODE_FOR_spu_heq, "si_heqi", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_HGT, CODE_FOR_spu_hgt, "si_hgt", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_HGTI, CODE_FOR_spu_hgt, "si_hgti", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_HLGT, CODE_FOR_spu_hlgt, "si_hlgt", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_HLGTI, CODE_FOR_spu_hlgt, "si_hlgti", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CEQB, CODE_FOR_ceq_v16qi, "si_ceqb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CEQBI, CODE_FOR_ceq_v16qi, "si_ceqbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CEQH, CODE_FOR_ceq_v8hi, "si_ceqh", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CEQHI, CODE_FOR_ceq_v8hi, "si_ceqhi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CEQ, CODE_FOR_ceq_v4si, "si_ceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CEQI, CODE_FOR_ceq_v4si, "si_ceqi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CGTB, CODE_FOR_cgt_v16qi, "si_cgtb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CGTBI, CODE_FOR_cgt_v16qi, "si_cgtbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CGTH, CODE_FOR_cgt_v8hi, "si_cgth", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CGTHI, CODE_FOR_cgt_v8hi, "si_cgthi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CGT, CODE_FOR_cgt_v4si, "si_cgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CGTI, CODE_FOR_cgt_v4si, "si_cgti", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CLGTB, CODE_FOR_clgt_v16qi, "si_clgtb", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CLGTBI, CODE_FOR_clgt_v16qi, "si_clgtbi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CLGTH, CODE_FOR_clgt_v8hi, "si_clgth", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CLGTHI, CODE_FOR_clgt_v8hi, "si_clgthi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_CLGT, CODE_FOR_clgt_v4si, "si_clgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CLGTI, CODE_FOR_clgt_v4si, "si_clgti", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10)) +DEF_BUILTIN (SI_BISLED, CODE_FOR_spu_bisled, "si_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_PTR)) +DEF_BUILTIN (SI_BISLEDD, CODE_FOR_spu_bisledd, "si_bisledd", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_PTR)) +DEF_BUILTIN (SI_BISLEDE, CODE_FOR_spu_bislede, "si_bislede", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_PTR)) +DEF_BUILTIN (SI_FA, CODE_FOR_addv4sf3, "si_fa", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFA, CODE_FOR_addv2df3, "si_dfa", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FS, CODE_FOR_subv4sf3, "si_fs", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFS, CODE_FOR_subv2df3, "si_dfs", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FM, CODE_FOR_mulv4sf3, "si_fm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFM, CODE_FOR_mulv2df3, "si_dfm", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FMA, CODE_FOR_fma_v4sf, "si_fma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFMA, CODE_FOR_fma_v2df, "si_dfma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFNMA, CODE_FOR_fnma_v2df, "si_dfnma", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FNMS, CODE_FOR_fnms_v4sf, "si_fnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFNMS, CODE_FOR_fnms_v2df, "si_dfnms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FMS, CODE_FOR_fms_v4sf, "si_fms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFMS, CODE_FOR_fms_v2df, "si_dfms", B_INSN, _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FREST, CODE_FOR_frest_v4sf, "si_frest", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FRSQEST, CODE_FOR_frsqest_v4sf, "si_frsqest", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FI, CODE_FOR_fi_v4sf, "si_fi", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_CSFLT, CODE_FOR_spu_csflt, "si_csflt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_CFLTS, CODE_FOR_spu_cflts, "si_cflts", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_CUFLT, CODE_FOR_spu_cuflt, "si_cuflt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_CFLTU, CODE_FOR_spu_cfltu, "si_cfltu", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_FRDS, CODE_FOR_spu_frds, "si_frds", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FESD, CODE_FOR_spu_fesd, "si_fesd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FCEQ, CODE_FOR_ceq_v4sf, "si_fceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCEQ, CODE_FOR_ceq_v2df, "si_dfceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FCMEQ, CODE_FOR_cmeq_v4sf, "si_fcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCMEQ, CODE_FOR_cmeq_v2df, "si_dfcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FCGT, CODE_FOR_cgt_v4sf, "si_fcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCGT, CODE_FOR_cgt_v2df, "si_dfcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FCMGT, CODE_FOR_cmgt_v4sf, "si_fcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFCMGT, CODE_FOR_cmgt_v2df, "si_dfcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_DFTSV, CODE_FOR_dftsv, "si_dftsv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_STOP, CODE_FOR_spu_stop, "si_stop", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_U14)) +DEF_BUILTIN (SI_STOPD, CODE_FOR_spu_stopd, "si_stopd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_LNOP, CODE_FOR_lnop, "si_lnop", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SI_NOP, CODE_FOR_nop, "si_nop", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SI_SYNC, CODE_FOR_sync, "si_sync", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SI_SYNCC, CODE_FOR_syncc, "si_syncc", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SI_DSYNC, CODE_FOR_dsync, "si_dsync", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SI_MFSPR, CODE_FOR_spu_mfspr, "si_mfspr", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_MTSPR, CODE_FOR_spu_mtspr, "si_mtspr", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_U7, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FSCRRD, CODE_FOR_spu_fscrrd, "si_fscrrd", B_INSN, _A1(SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FSCRWR, CODE_FOR_spu_fscrwr, "si_fscrwr", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_RDCH, CODE_FOR_spu_rdch, "si_rdch", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_RCHCNT, CODE_FOR_spu_rchcnt, "si_rchcnt", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_U7)) +DEF_BUILTIN (SI_WRCH, CODE_FOR_spu_wrch, "si_wrch", B_INSN, _A3(SPU_BTI_VOID, SPU_BTI_U7, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_CHAR, CODE_FOR_spu_convert, "si_to_char", B_INSN, _A2(SPU_BTI_INTQI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_UCHAR, CODE_FOR_spu_convert, "si_to_uchar", B_INSN, _A2(SPU_BTI_UINTQI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_SHORT, CODE_FOR_spu_convert, "si_to_short", B_INSN, _A2(SPU_BTI_INTHI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_USHORT, CODE_FOR_spu_convert, "si_to_ushort", B_INSN, _A2(SPU_BTI_UINTHI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_INT, CODE_FOR_spu_convert, "si_to_int", B_INSN, _A2(SPU_BTI_INTSI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_UINT, CODE_FOR_spu_convert, "si_to_uint", B_INSN, _A2(SPU_BTI_UINTSI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_LONG, CODE_FOR_spu_convert, "si_to_long", B_INSN, _A2(SPU_BTI_INTDI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_ULONG, CODE_FOR_spu_convert, "si_to_ulong", B_INSN, _A2(SPU_BTI_UINTDI, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_FLOAT, CODE_FOR_spu_convert, "si_to_float", B_INSN, _A2(SPU_BTI_FLOAT, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_DOUBLE, CODE_FOR_spu_convert, "si_to_double", B_INSN, _A2(SPU_BTI_DOUBLE, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_TO_PTR, CODE_FOR_spu_convert, "si_to_ptr", B_INSN, _A2(SPU_BTI_PTR, SPU_BTI_QUADWORD)) +DEF_BUILTIN (SI_FROM_CHAR, CODE_FOR_spu_convert, "si_from_char", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_INTQI)) +DEF_BUILTIN (SI_FROM_UCHAR, CODE_FOR_spu_convert, "si_from_uchar", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTQI)) +DEF_BUILTIN (SI_FROM_SHORT, CODE_FOR_spu_convert, "si_from_short", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_INTHI)) +DEF_BUILTIN (SI_FROM_USHORT, CODE_FOR_spu_convert, "si_from_ushort", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTHI)) +DEF_BUILTIN (SI_FROM_INT, CODE_FOR_spu_convert, "si_from_int", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_INTSI)) +DEF_BUILTIN (SI_FROM_UINT, CODE_FOR_spu_convert, "si_from_uint", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTSI)) +DEF_BUILTIN (SI_FROM_LONG, CODE_FOR_spu_convert, "si_from_long", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_INTDI)) +DEF_BUILTIN (SI_FROM_ULONG, CODE_FOR_spu_convert, "si_from_ulong", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTDI)) +DEF_BUILTIN (SI_FROM_FLOAT, CODE_FOR_spu_convert, "si_from_float", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_FLOAT)) +DEF_BUILTIN (SI_FROM_DOUBLE, CODE_FOR_spu_convert, "si_from_double", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_DOUBLE)) +DEF_BUILTIN (SI_FROM_PTR, CODE_FOR_spu_convert, "si_from_ptr", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_PTR)) + +/* definitions to support generic builtin functions: */ + +DEF_BUILTIN (SPU_CONVTS, CODE_FOR_spu_cflts, "spu_convts", B_INSN, _A3(SPU_BTI_V4SI, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_CONVTU, CODE_FOR_spu_cfltu, "spu_convtu", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_ROUNDTF, CODE_FOR_spu_frds, "spu_roundtf", B_INSN, _A2(SPU_BTI_V4SF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MULH, CODE_FOR_spu_mpyh, "spu_mulh", B_INSN, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_MULSR, CODE_FOR_spu_mpys, "spu_mulsr", B_INSN, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_FREST, CODE_FOR_frest_v4sf, "spu_frest", B_INSN, _A2(SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_FRSQEST, CODE_FOR_frsqest_v4sf, "spu_frsqest", B_INSN, _A2(SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_NMADD, CODE_FOR_fnma_v2df, "spu_nmadd", B_INSN, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_ABSD, CODE_FOR_spu_absdb, "spu_absd", B_INSN, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_AVG, CODE_FOR_spu_avgb, "spu_avg", B_INSN, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SUMB, CODE_FOR_spu_sumb, "spu_sumb", B_INSN, _A3(SPU_BTI_UV8HI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_BISLED, CODE_FOR_spu_bisled, "spu_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) +DEF_BUILTIN (SPU_BISLED_D, CODE_FOR_spu_bisledd, "spu_bisled_d", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) +DEF_BUILTIN (SPU_BISLED_E, CODE_FOR_spu_bislede, "spu_bisled_e", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR)) +DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR)) +DEF_BUILTIN (SPU_TESTSV, CODE_FOR_dftsv, "spu_testsv", B_INSN, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7)) + +/* definitions to support overloaded generic builtin functions: */ + +DEF_BUILTIN (SPU_CONVTF, CODE_FOR_nothing, "spu_convtf", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CONVTF_0, CODE_FOR_spu_cuflt, "spu_convtf_0", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_CONVTF_1, CODE_FOR_spu_csflt, "spu_convtf_1", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_EXTEND, CODE_FOR_nothing, "spu_extend", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_EXTEND_0, CODE_FOR_spu_xsbh, "spu_extend_0", B_INTERNAL, _A2(SPU_BTI_V8HI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_EXTEND_1, CODE_FOR_spu_xshw, "spu_extend_1", B_INTERNAL, _A2(SPU_BTI_V4SI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_EXTEND_2, CODE_FOR_spu_xswd, "spu_extend_2", B_INTERNAL, _A2(SPU_BTI_V2DI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_EXTEND_3, CODE_FOR_spu_fesd, "spu_extend_3", B_INTERNAL, _A2(SPU_BTI_V2DF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_ADD, CODE_FOR_nothing, "spu_add", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_ADD_0, CODE_FOR_addv4si3, "spu_add_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_ADD_1, CODE_FOR_addv4si3, "spu_add_1", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_ADD_2, CODE_FOR_addv8hi3, "spu_add_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_ADD_3, CODE_FOR_addv8hi3, "spu_add_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_ADD_4, CODE_FOR_addv4sf3, "spu_add_4", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_ADD_5, CODE_FOR_addv2df3, "spu_add_5", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_ADD_6, CODE_FOR_addv8hi3, "spu_add_6", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_ADD_7, CODE_FOR_addv8hi3, "spu_add_7", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_ADD_8, CODE_FOR_addv4si3, "spu_add_8", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_ADD_9, CODE_FOR_addv4si3, "spu_add_9", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_ADDX, CODE_FOR_nothing, "spu_addx", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_ADDX_0, CODE_FOR_addx_v4si, "spu_addx_0", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_ADDX_1, CODE_FOR_addx_v4si, "spu_addx_1", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_GENC, CODE_FOR_nothing, "spu_genc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_GENC_0, CODE_FOR_cg_v4si, "spu_genc_0", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_GENC_1, CODE_FOR_cg_v4si, "spu_genc_1", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_GENCX, CODE_FOR_nothing, "spu_gencx", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_GENCX_0, CODE_FOR_cgx_v4si, "spu_gencx_0", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_GENCX_1, CODE_FOR_cgx_v4si, "spu_gencx_1", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_MADD, CODE_FOR_nothing, "spu_madd", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MADD_0, CODE_FOR_spu_mpya, "spu_madd_0", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_MADD_1, CODE_FOR_fma_v4sf, "spu_madd_1", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MADD_2, CODE_FOR_fma_v2df, "spu_madd_2", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MSUB, CODE_FOR_nothing, "spu_msub", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MSUB_0, CODE_FOR_fms_v4sf, "spu_msub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MSUB_1, CODE_FOR_fms_v2df, "spu_msub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MHHADD, CODE_FOR_nothing, "spu_mhhadd", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MHHADD_0, CODE_FOR_spu_mpyhhau, "spu_mhhadd_0", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_MHHADD_1, CODE_FOR_spu_mpyhha, "spu_mhhadd_1", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_MULE, CODE_FOR_nothing, "spu_mule", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MULE_0, CODE_FOR_spu_mpyhhu, "spu_mule_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_MULE_1, CODE_FOR_spu_mpyhh, "spu_mule_1", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_MUL, CODE_FOR_nothing, "spu_mul", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MUL_0, CODE_FOR_mulv4sf3, "spu_mul_0", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MUL_1, CODE_FOR_mulv2df3, "spu_mul_1", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_MULO, CODE_FOR_nothing, "spu_mulo", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MULO_0, CODE_FOR_spu_mpy, "spu_mulo_0", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_MULO_1, CODE_FOR_spu_mpyu, "spu_mulo_1", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_MULO_2, CODE_FOR_spu_mpy, "spu_mulo_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_MULO_3, CODE_FOR_spu_mpyu, "spu_mulo_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_NMSUB, CODE_FOR_nothing, "spu_nmsub", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_NMSUB_0, CODE_FOR_fnms_v4sf, "spu_nmsub_0", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_NMSUB_1, CODE_FOR_fnms_v2df, "spu_nmsub_1", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_SUB, CODE_FOR_nothing, "spu_sub", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SUB_0, CODE_FOR_subv8hi3, "spu_sub_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SUB_1, CODE_FOR_subv8hi3, "spu_sub_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_SUB_2, CODE_FOR_subv4si3, "spu_sub_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SUB_3, CODE_FOR_subv4si3, "spu_sub_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_SUB_4, CODE_FOR_subv4sf3, "spu_sub_4", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_SUB_5, CODE_FOR_subv2df3, "spu_sub_5", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_SUB_6, CODE_FOR_subv8hi3, "spu_sub_6", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UINTHI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SUB_7, CODE_FOR_subv8hi3, "spu_sub_7", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_INTHI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_SUB_8, CODE_FOR_subv4si3, "spu_sub_8", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UINTSI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SUB_9, CODE_FOR_subv4si3, "spu_sub_9", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_INTSI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_SUBX, CODE_FOR_nothing, "spu_subx", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SUBX_0, CODE_FOR_sfx_v4si, "spu_subx_0", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SUBX_1, CODE_FOR_sfx_v4si, "spu_subx_1", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_GENB, CODE_FOR_nothing, "spu_genb", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_GENB_0, CODE_FOR_bg_v4si, "spu_genb_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_GENB_1, CODE_FOR_bg_v4si, "spu_genb_1", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_GENBX, CODE_FOR_nothing, "spu_genbx", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_GENBX_0, CODE_FOR_bgx_v4si, "spu_genbx_0", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_GENBX_1, CODE_FOR_bgx_v4si, "spu_genbx_1", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_CMPEQ, CODE_FOR_nothing, "spu_cmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPEQ_0, CODE_FOR_ceq_v16qi, "spu_cmpeq_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_CMPEQ_1, CODE_FOR_ceq_v16qi, "spu_cmpeq_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_CMPEQ_2, CODE_FOR_ceq_v8hi, "spu_cmpeq_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_CMPEQ_3, CODE_FOR_ceq_v8hi, "spu_cmpeq_3", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_CMPEQ_4, CODE_FOR_ceq_v4si, "spu_cmpeq_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_CMPEQ_5, CODE_FOR_ceq_v4si, "spu_cmpeq_5", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_CMPEQ_6, CODE_FOR_ceq_v4sf, "spu_cmpeq_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPEQ_7, CODE_FOR_ceq_v16qi, "spu_cmpeq_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_CMPEQ_8, CODE_FOR_ceq_v16qi, "spu_cmpeq_8", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_CMPEQ_9, CODE_FOR_ceq_v8hi, "spu_cmpeq_9", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_CMPEQ_10, CODE_FOR_ceq_v8hi, "spu_cmpeq_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_CMPEQ_11, CODE_FOR_ceq_v4si, "spu_cmpeq_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_CMPEQ_12, CODE_FOR_ceq_v4si, "spu_cmpeq_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_CMPEQ_13, CODE_FOR_ceq_v2df, "spu_cmpeq_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_nothing, "spu_cmpabseq", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPABSEQ_0, CODE_FOR_cmeq_v4sf, "spu_cmpabseq_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPABSEQ_1, CODE_FOR_cmeq_v2df, "spu_cmpabseq_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_CMPGT, CODE_FOR_nothing, "spu_cmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPGT_0, CODE_FOR_clgt_v16qi, "spu_cmpgt_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_CMPGT_1, CODE_FOR_cgt_v16qi, "spu_cmpgt_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_CMPGT_2, CODE_FOR_clgt_v8hi, "spu_cmpgt_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_CMPGT_3, CODE_FOR_cgt_v8hi, "spu_cmpgt_3", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_CMPGT_4, CODE_FOR_clgt_v4si, "spu_cmpgt_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_CMPGT_5, CODE_FOR_cgt_v4si, "spu_cmpgt_5", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_CMPGT_6, CODE_FOR_cgt_v4sf, "spu_cmpgt_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPGT_7, CODE_FOR_clgt_v16qi, "spu_cmpgt_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_CMPGT_8, CODE_FOR_cgt_v16qi, "spu_cmpgt_8", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_CMPGT_9, CODE_FOR_clgt_v8hi, "spu_cmpgt_9", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_CMPGT_10, CODE_FOR_cgt_v8hi, "spu_cmpgt_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_CMPGT_11, CODE_FOR_cgt_v4si, "spu_cmpgt_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_CMPGT_12, CODE_FOR_clgt_v4si, "spu_cmpgt_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_CMPGT_13, CODE_FOR_cgt_v2df, "spu_cmpgt_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_nothing, "spu_cmpabsgt", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CMPABSGT_0, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_CMPABSGT_1, CODE_FOR_cmgt_v2df, "spu_cmpabsgt_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_HCMPEQ, CODE_FOR_nothing, "spu_hcmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_HCMPEQ_0, CODE_FOR_spu_heq, "spu_hcmpeq_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_HCMPEQ_1, CODE_FOR_spu_heq, "spu_hcmpeq_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_HCMPGT, CODE_FOR_nothing, "spu_hcmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_HCMPGT_0, CODE_FOR_spu_hgt, "spu_hcmpgt_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_HCMPGT_1, CODE_FOR_spu_hlgt, "spu_hcmpgt_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_CNTB, CODE_FOR_nothing, "spu_cntb", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CNTB_0, CODE_FOR_cntb_v16qi, "spu_cntb_0", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_CNTB_1, CODE_FOR_cntb_v16qi, "spu_cntb_1", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_CNTLZ, CODE_FOR_nothing, "spu_cntlz", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_CNTLZ_0, CODE_FOR_clzv4si2, "spu_cntlz_0", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_CNTLZ_1, CODE_FOR_clzv4si2, "spu_cntlz_1", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_CNTLZ_2, CODE_FOR_clzv4si2, "spu_cntlz_2", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_GATHER, CODE_FOR_nothing, "spu_gather", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_GATHER_0, CODE_FOR_spu_gb, "spu_gather_0", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_GATHER_1, CODE_FOR_spu_gb, "spu_gather_1", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_GATHER_2, CODE_FOR_spu_gbh, "spu_gather_2", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_GATHER_3, CODE_FOR_spu_gbh, "spu_gather_3", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_GATHER_4, CODE_FOR_spu_gbb, "spu_gather_4", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_GATHER_5, CODE_FOR_spu_gbb, "spu_gather_5", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_GATHER_6, CODE_FOR_spu_gb, "spu_gather_6", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_MASKB, CODE_FOR_nothing, "spu_maskb", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MASKB_0, CODE_FOR_spu_fsmb, "spu_maskb_0", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_MASKB_1, CODE_FOR_spu_fsmb, "spu_maskb_1", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_MASKB_2, CODE_FOR_spu_fsmb, "spu_maskb_2", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_MASKB_3, CODE_FOR_spu_fsmb, "spu_maskb_3", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_MASKH, CODE_FOR_nothing, "spu_maskh", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MASKH_0, CODE_FOR_spu_fsmh, "spu_maskh_0", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_MASKH_1, CODE_FOR_spu_fsmh, "spu_maskh_1", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_MASKH_2, CODE_FOR_spu_fsmh, "spu_maskh_2", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_MASKH_3, CODE_FOR_spu_fsmh, "spu_maskh_3", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_MASKH_4, CODE_FOR_spu_fsmh, "spu_maskh_4", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_MASKH_5, CODE_FOR_spu_fsmh, "spu_maskh_5", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_MASKW, CODE_FOR_nothing, "spu_maskw", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MASKW_0, CODE_FOR_spu_fsm, "spu_maskw_0", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_MASKW_1, CODE_FOR_spu_fsm, "spu_maskw_1", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_MASKW_2, CODE_FOR_spu_fsm, "spu_maskw_2", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_MASKW_3, CODE_FOR_spu_fsm, "spu_maskw_3", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_MASKW_4, CODE_FOR_spu_fsm, "spu_maskw_4", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_MASKW_5, CODE_FOR_spu_fsm, "spu_maskw_5", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_SEL, CODE_FOR_nothing, "spu_sel", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SEL_0, CODE_FOR_selb, "spu_sel_0", B_INTERNAL, _A4(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_SEL_1, CODE_FOR_selb, "spu_sel_1", B_INTERNAL, _A4(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_SEL_2, CODE_FOR_selb, "spu_sel_2", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SEL_3, CODE_FOR_selb, "spu_sel_3", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SEL_4, CODE_FOR_selb, "spu_sel_4", B_INTERNAL, _A4(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SEL_5, CODE_FOR_selb, "spu_sel_5", B_INTERNAL, _A4(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SEL_6, CODE_FOR_selb, "spu_sel_6", B_INTERNAL, _A4(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SEL_7, CODE_FOR_selb, "spu_sel_7", B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SEL_8, CODE_FOR_selb, "spu_sel_8", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SEL_9, CODE_FOR_selb, "spu_sel_9", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_SHUFFLE, CODE_FOR_nothing, "spu_shuffle", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SHUFFLE_0, CODE_FOR_shufb, "spu_shuffle_0", B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_1, CODE_FOR_shufb, "spu_shuffle_1", B_INTERNAL, _A4(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_2, CODE_FOR_shufb, "spu_shuffle_2", B_INTERNAL, _A4(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_3, CODE_FOR_shufb, "spu_shuffle_3", B_INTERNAL, _A4(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_4, CODE_FOR_shufb, "spu_shuffle_4", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_5, CODE_FOR_shufb, "spu_shuffle_5", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_6, CODE_FOR_shufb, "spu_shuffle_6", B_INTERNAL, _A4(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_7, CODE_FOR_shufb, "spu_shuffle_7", B_INTERNAL, _A4(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_8, CODE_FOR_shufb, "spu_shuffle_8", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_SHUFFLE_9, CODE_FOR_shufb, "spu_shuffle_9", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_AND, CODE_FOR_nothing, "spu_and", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_AND_0, CODE_FOR_andv16qi3, "spu_and_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_AND_1, CODE_FOR_andv16qi3, "spu_and_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_AND_2, CODE_FOR_andv8hi3, "spu_and_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_AND_3, CODE_FOR_andv8hi3, "spu_and_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_AND_4, CODE_FOR_andv4si3, "spu_and_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_AND_5, CODE_FOR_andv4si3, "spu_and_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_AND_6, CODE_FOR_andv2di3, "spu_and_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_AND_7, CODE_FOR_andv2di3, "spu_and_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_AND_8, CODE_FOR_andv4si3, "spu_and_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_AND_9, CODE_FOR_andv2di3, "spu_and_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_AND_10, CODE_FOR_andv16qi3, "spu_and_10", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_AND_11, CODE_FOR_andv16qi3, "spu_and_11", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_AND_12, CODE_FOR_andv8hi3, "spu_and_12", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_AND_13, CODE_FOR_andv8hi3, "spu_and_13", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_AND_14, CODE_FOR_andv4si3, "spu_and_14", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_AND_15, CODE_FOR_andv4si3, "spu_and_15", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_ANDC, CODE_FOR_nothing, "spu_andc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_ANDC_0, CODE_FOR_andc_v2di, "spu_andc_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_ANDC_1, CODE_FOR_andc_v2di, "spu_andc_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_ANDC_2, CODE_FOR_andc_v4si, "spu_andc_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_ANDC_3, CODE_FOR_andc_v4si, "spu_andc_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_ANDC_4, CODE_FOR_andc_v8hi, "spu_andc_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_ANDC_5, CODE_FOR_andc_v8hi, "spu_andc_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_ANDC_6, CODE_FOR_andc_v16qi, "spu_andc_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_ANDC_7, CODE_FOR_andc_v16qi, "spu_andc_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_ANDC_8, CODE_FOR_andc_v4si, "spu_andc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_ANDC_9, CODE_FOR_andc_v2di, "spu_andc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_EQV, CODE_FOR_nothing, "spu_eqv", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_EQV_0, CODE_FOR_eqv_v2di, "spu_eqv_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_EQV_1, CODE_FOR_eqv_v2di, "spu_eqv_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_EQV_2, CODE_FOR_eqv_v4si, "spu_eqv_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_EQV_3, CODE_FOR_eqv_v4si, "spu_eqv_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_EQV_4, CODE_FOR_eqv_v8hi, "spu_eqv_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_EQV_5, CODE_FOR_eqv_v8hi, "spu_eqv_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_EQV_6, CODE_FOR_eqv_v16qi, "spu_eqv_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_EQV_7, CODE_FOR_eqv_v16qi, "spu_eqv_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_EQV_8, CODE_FOR_eqv_v4si, "spu_eqv_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_EQV_9, CODE_FOR_eqv_v2di, "spu_eqv_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_NAND, CODE_FOR_nothing, "spu_nand", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_NAND_0, CODE_FOR_nand_v2di, "spu_nand_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_NAND_1, CODE_FOR_nand_v2di, "spu_nand_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_NAND_2, CODE_FOR_nand_v4si, "spu_nand_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_NAND_3, CODE_FOR_nand_v4si, "spu_nand_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_NAND_4, CODE_FOR_nand_v8hi, "spu_nand_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_NAND_5, CODE_FOR_nand_v8hi, "spu_nand_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_NAND_6, CODE_FOR_nand_v16qi, "spu_nand_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_NAND_7, CODE_FOR_nand_v16qi, "spu_nand_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_NAND_8, CODE_FOR_nand_v4si, "spu_nand_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_NAND_9, CODE_FOR_nand_v2di, "spu_nand_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_NOR, CODE_FOR_nothing, "spu_nor", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_NOR_0, CODE_FOR_nor_v2di, "spu_nor_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_NOR_1, CODE_FOR_nor_v2di, "spu_nor_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_NOR_2, CODE_FOR_nor_v4si, "spu_nor_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_NOR_3, CODE_FOR_nor_v4si, "spu_nor_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_NOR_4, CODE_FOR_nor_v8hi, "spu_nor_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_NOR_5, CODE_FOR_nor_v8hi, "spu_nor_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_NOR_6, CODE_FOR_nor_v16qi, "spu_nor_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_NOR_7, CODE_FOR_nor_v16qi, "spu_nor_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_NOR_8, CODE_FOR_nor_v4si, "spu_nor_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_NOR_9, CODE_FOR_nor_v2di, "spu_nor_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_OR, CODE_FOR_nothing, "spu_or", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_OR_0, CODE_FOR_iorv16qi3, "spu_or_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_OR_1, CODE_FOR_iorv16qi3, "spu_or_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_OR_2, CODE_FOR_iorv8hi3, "spu_or_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_OR_3, CODE_FOR_iorv8hi3, "spu_or_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_OR_4, CODE_FOR_iorv4si3, "spu_or_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_OR_5, CODE_FOR_iorv4si3, "spu_or_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_OR_6, CODE_FOR_iorv2di3, "spu_or_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_OR_7, CODE_FOR_iorv2di3, "spu_or_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_OR_8, CODE_FOR_iorv4si3, "spu_or_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_OR_9, CODE_FOR_iorv2di3, "spu_or_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_OR_10, CODE_FOR_iorv16qi3, "spu_or_10", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_OR_11, CODE_FOR_iorv16qi3, "spu_or_11", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_OR_12, CODE_FOR_iorv8hi3, "spu_or_12", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_OR_13, CODE_FOR_iorv8hi3, "spu_or_13", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_OR_14, CODE_FOR_iorv4si3, "spu_or_14", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_OR_15, CODE_FOR_iorv4si3, "spu_or_15", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_ORC, CODE_FOR_nothing, "spu_orc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_ORC_0, CODE_FOR_orc_v2di, "spu_orc_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_ORC_1, CODE_FOR_orc_v2di, "spu_orc_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_ORC_2, CODE_FOR_orc_v4si, "spu_orc_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_ORC_3, CODE_FOR_orc_v4si, "spu_orc_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_ORC_4, CODE_FOR_orc_v8hi, "spu_orc_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_ORC_5, CODE_FOR_orc_v8hi, "spu_orc_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_ORC_6, CODE_FOR_orc_v16qi, "spu_orc_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_ORC_7, CODE_FOR_orc_v16qi, "spu_orc_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_ORC_8, CODE_FOR_orc_v4si, "spu_orc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_ORC_9, CODE_FOR_orc_v2di, "spu_orc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_ORX, CODE_FOR_nothing, "spu_orx", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_ORX_0, CODE_FOR_spu_orx, "spu_orx_0", B_INTERNAL, _A2(SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_ORX_1, CODE_FOR_spu_orx, "spu_orx_1", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_XOR, CODE_FOR_nothing, "spu_xor", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_XOR_0, CODE_FOR_xorv16qi3, "spu_xor_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI)) +DEF_BUILTIN (SPU_XOR_1, CODE_FOR_xorv16qi3, "spu_xor_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_V16QI)) +DEF_BUILTIN (SPU_XOR_2, CODE_FOR_xorv8hi3, "spu_xor_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_XOR_3, CODE_FOR_xorv8hi3, "spu_xor_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_XOR_4, CODE_FOR_xorv4si3, "spu_xor_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_XOR_5, CODE_FOR_xorv4si3, "spu_xor_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_XOR_6, CODE_FOR_xorv2di3, "spu_xor_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UV2DI)) +DEF_BUILTIN (SPU_XOR_7, CODE_FOR_xorv2di3, "spu_xor_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_V2DI)) +DEF_BUILTIN (SPU_XOR_8, CODE_FOR_xorv4si3, "spu_xor_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_V4SF)) +DEF_BUILTIN (SPU_XOR_9, CODE_FOR_xorv2di3, "spu_xor_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_V2DF)) +DEF_BUILTIN (SPU_XOR_10, CODE_FOR_xorv16qi3, "spu_xor_10", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_XOR_11, CODE_FOR_xorv16qi3, "spu_xor_11", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_XOR_12, CODE_FOR_xorv8hi3, "spu_xor_12", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_XOR_13, CODE_FOR_xorv8hi3, "spu_xor_13", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_XOR_14, CODE_FOR_xorv4si3, "spu_xor_14", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_XOR_15, CODE_FOR_xorv4si3, "spu_xor_15", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RL, CODE_FOR_nothing, "spu_rl", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RL_0, CODE_FOR_vrotlv8hi3, "spu_rl_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RL_1, CODE_FOR_vrotlv8hi3, "spu_rl_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RL_2, CODE_FOR_vrotlv4si3, "spu_rl_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RL_3, CODE_FOR_vrotlv4si3, "spu_rl_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RL_4, CODE_FOR_vrotlv8hi3, "spu_rl_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_RL_5, CODE_FOR_vrotlv8hi3, "spu_rl_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_RL_6, CODE_FOR_vrotlv4si3, "spu_rl_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RL_7, CODE_FOR_vrotlv4si3, "spu_rl_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW, CODE_FOR_nothing, "spu_rlqw", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLQW_0, CODE_FOR_rotqbi_ti, "spu_rlqw_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_1, CODE_FOR_rotqbi_ti, "spu_rlqw_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_2, CODE_FOR_rotqbi_ti, "spu_rlqw_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_3, CODE_FOR_rotqbi_ti, "spu_rlqw_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_4, CODE_FOR_rotqbi_ti, "spu_rlqw_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_5, CODE_FOR_rotqbi_ti, "spu_rlqw_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_6, CODE_FOR_rotqbi_ti, "spu_rlqw_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_7, CODE_FOR_rotqbi_ti, "spu_rlqw_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_8, CODE_FOR_rotqbi_ti, "spu_rlqw_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQW_9, CODE_FOR_rotqbi_ti, "spu_rlqw_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE, CODE_FOR_nothing, "spu_rlqwbyte", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLQWBYTE_0, CODE_FOR_rotqby_ti, "spu_rlqwbyte_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_1, CODE_FOR_rotqby_ti, "spu_rlqwbyte_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_2, CODE_FOR_rotqby_ti, "spu_rlqwbyte_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_3, CODE_FOR_rotqby_ti, "spu_rlqwbyte_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_4, CODE_FOR_rotqby_ti, "spu_rlqwbyte_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_5, CODE_FOR_rotqby_ti, "spu_rlqwbyte_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_6, CODE_FOR_rotqby_ti, "spu_rlqwbyte_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_7, CODE_FOR_rotqby_ti, "spu_rlqwbyte_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_8, CODE_FOR_rotqby_ti, "spu_rlqwbyte_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTE_9, CODE_FOR_rotqby_ti, "spu_rlqwbyte_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC, CODE_FOR_nothing, "spu_rlqwbytebc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLQWBYTEBC_0, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_1, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_2, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_3, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_4, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_5, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_6, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_7, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_8, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLQWBYTEBC_9, CODE_FOR_rotqbybi_ti, "spu_rlqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASK, CODE_FOR_nothing, "spu_rlmask", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLMASK_0, CODE_FOR_rotm_v8hi, "spu_rlmask_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RLMASK_1, CODE_FOR_rotm_v8hi, "spu_rlmask_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RLMASK_2, CODE_FOR_rotm_v4si, "spu_rlmask_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RLMASK_3, CODE_FOR_rotm_v4si, "spu_rlmask_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RLMASK_4, CODE_FOR_rotm_v8hi, "spu_rlmask_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASK_5, CODE_FOR_rotm_v8hi, "spu_rlmask_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASK_6, CODE_FOR_rotm_v4si, "spu_rlmask_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASK_7, CODE_FOR_rotm_v4si, "spu_rlmask_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKA, CODE_FOR_nothing, "spu_rlmaska", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLMASKA_0, CODE_FOR_rotma_v8hi, "spu_rlmaska_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RLMASKA_1, CODE_FOR_rotma_v8hi, "spu_rlmaska_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_V8HI)) +DEF_BUILTIN (SPU_RLMASKA_2, CODE_FOR_rotma_v4si, "spu_rlmaska_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RLMASKA_3, CODE_FOR_rotma_v4si, "spu_rlmaska_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_V4SI)) +DEF_BUILTIN (SPU_RLMASKA_4, CODE_FOR_rotma_v8hi, "spu_rlmaska_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKA_5, CODE_FOR_rotma_v8hi, "spu_rlmaska_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKA_6, CODE_FOR_rotma_v4si, "spu_rlmaska_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKA_7, CODE_FOR_rotma_v4si, "spu_rlmaska_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW, CODE_FOR_nothing, "spu_rlmaskqw", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLMASKQW_0, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_1, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_2, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_3, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_4, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_5, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_6, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_7, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_8, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQW_9, CODE_FOR_rotqmbi_ti, "spu_rlmaskqw_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE, CODE_FOR_nothing, "spu_rlmaskqwbyte", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_0, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_1, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_2, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_3, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_4, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_5, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_6, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_7, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_8, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTE_9, CODE_FOR_rotqmby_ti, "spu_rlmaskqwbyte_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC, CODE_FOR_nothing, "spu_rlmaskqwbytebc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_0, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_1, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_2, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_3, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_4, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_5, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_6, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_7, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_8, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_RLMASKQWBYTEBC_9, CODE_FOR_rotqmbybi_ti, "spu_rlmaskqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_SL, CODE_FOR_nothing, "spu_sl", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SL_0, CODE_FOR_vashlv8hi3, "spu_sl_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SL_1, CODE_FOR_vashlv8hi3, "spu_sl_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SL_2, CODE_FOR_vashlv4si3, "spu_sl_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SL_3, CODE_FOR_vashlv4si3, "spu_sl_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SL_4, CODE_FOR_vashlv8hi3, "spu_sl_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SL_5, CODE_FOR_vashlv8hi3, "spu_sl_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SL_6, CODE_FOR_vashlv4si3, "spu_sl_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SL_7, CODE_FOR_vashlv4si3, "spu_sl_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW, CODE_FOR_nothing, "spu_slqw", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SLQW_0, CODE_FOR_shlqbi_ti, "spu_slqw_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_1, CODE_FOR_shlqbi_ti, "spu_slqw_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_2, CODE_FOR_shlqbi_ti, "spu_slqw_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_3, CODE_FOR_shlqbi_ti, "spu_slqw_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_4, CODE_FOR_shlqbi_ti, "spu_slqw_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_5, CODE_FOR_shlqbi_ti, "spu_slqw_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_6, CODE_FOR_shlqbi_ti, "spu_slqw_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_7, CODE_FOR_shlqbi_ti, "spu_slqw_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_8, CODE_FOR_shlqbi_ti, "spu_slqw_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQW_9, CODE_FOR_shlqbi_ti, "spu_slqw_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE, CODE_FOR_nothing, "spu_slqwbyte", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SLQWBYTE_0, CODE_FOR_shlqby_ti, "spu_slqwbyte_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_1, CODE_FOR_shlqby_ti, "spu_slqwbyte_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_2, CODE_FOR_shlqby_ti, "spu_slqwbyte_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_3, CODE_FOR_shlqby_ti, "spu_slqwbyte_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_4, CODE_FOR_shlqby_ti, "spu_slqwbyte_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_5, CODE_FOR_shlqby_ti, "spu_slqwbyte_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_6, CODE_FOR_shlqby_ti, "spu_slqwbyte_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_7, CODE_FOR_shlqby_ti, "spu_slqwbyte_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_8, CODE_FOR_shlqby_ti, "spu_slqwbyte_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTE_9, CODE_FOR_shlqby_ti, "spu_slqwbyte_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC, CODE_FOR_nothing, "spu_slqwbytebc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SLQWBYTEBC_0, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_1, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_2, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_3, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_4, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_5, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_6, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_7, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_8, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SLQWBYTEBC_9, CODE_FOR_shlqbybi_ti, "spu_slqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SR, CODE_FOR_nothing, "spu_sr", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SR_0, CODE_FOR_vlshrv8hi3, "spu_sr_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SR_1, CODE_FOR_vlshrv8hi3, "spu_sr_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SR_2, CODE_FOR_vlshrv4si3, "spu_sr_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SR_3, CODE_FOR_vlshrv4si3, "spu_sr_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SR_4, CODE_FOR_vlshrv8hi3, "spu_sr_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SR_5, CODE_FOR_vlshrv8hi3, "spu_sr_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SR_6, CODE_FOR_vlshrv4si3, "spu_sr_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SR_7, CODE_FOR_vlshrv4si3, "spu_sr_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRA, CODE_FOR_nothing, "spu_sra", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SRA_0, CODE_FOR_vashrv8hi3, "spu_sra_0", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SRA_1, CODE_FOR_vashrv8hi3, "spu_sra_1", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UV8HI)) +DEF_BUILTIN (SPU_SRA_2, CODE_FOR_vashrv4si3, "spu_sra_2", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SRA_3, CODE_FOR_vashrv4si3, "spu_sra_3", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UV4SI)) +DEF_BUILTIN (SPU_SRA_4, CODE_FOR_vashrv8hi3, "spu_sra_4", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRA_5, CODE_FOR_vashrv8hi3, "spu_sra_5", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRA_6, CODE_FOR_vashrv4si3, "spu_sra_6", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRA_7, CODE_FOR_vashrv4si3, "spu_sra_7", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW, CODE_FOR_nothing, "spu_srqw", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SRQW_0, CODE_FOR_shrqbi_ti, "spu_srqw_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_1, CODE_FOR_shrqbi_ti, "spu_srqw_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_2, CODE_FOR_shrqbi_ti, "spu_srqw_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_3, CODE_FOR_shrqbi_ti, "spu_srqw_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_4, CODE_FOR_shrqbi_ti, "spu_srqw_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_5, CODE_FOR_shrqbi_ti, "spu_srqw_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_6, CODE_FOR_shrqbi_ti, "spu_srqw_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_7, CODE_FOR_shrqbi_ti, "spu_srqw_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_8, CODE_FOR_shrqbi_ti, "spu_srqw_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQW_9, CODE_FOR_shrqbi_ti, "spu_srqw_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE, CODE_FOR_nothing, "spu_srqwbyte", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SRQWBYTE_0, CODE_FOR_shrqby_ti, "spu_srqwbyte_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_1, CODE_FOR_shrqby_ti, "spu_srqwbyte_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_2, CODE_FOR_shrqby_ti, "spu_srqwbyte_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_3, CODE_FOR_shrqby_ti, "spu_srqwbyte_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_4, CODE_FOR_shrqby_ti, "spu_srqwbyte_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_5, CODE_FOR_shrqby_ti, "spu_srqwbyte_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_6, CODE_FOR_shrqby_ti, "spu_srqwbyte_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_7, CODE_FOR_shrqby_ti, "spu_srqwbyte_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_8, CODE_FOR_shrqby_ti, "spu_srqwbyte_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTE_9, CODE_FOR_shrqby_ti, "spu_srqwbyte_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC, CODE_FOR_nothing, "spu_srqwbytebc", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SRQWBYTEBC_0, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_V2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_1, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UV2DI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_2, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_V4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_3, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_4, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_V8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_5, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UV8HI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_6, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_V16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_7, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_8, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_V4SF, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SRQWBYTEBC_9, CODE_FOR_shrqbybi_ti, "spu_srqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_V2DF, SPU_BTI_UINTSI)) + +DEF_BUILTIN (SPU_SPLATS, CODE_FOR_nothing, "spu_splats", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_SPLATS_0, CODE_FOR_spu_splats, "spu_splats_0", B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTQI)) +DEF_BUILTIN (SPU_SPLATS_1, CODE_FOR_spu_splats, "spu_splats_1", B_INTERNAL, _A2(SPU_BTI_V16QI, SPU_BTI_INTQI)) +DEF_BUILTIN (SPU_SPLATS_2, CODE_FOR_spu_splats, "spu_splats_2", B_INTERNAL, _A2(SPU_BTI_UV8HI, SPU_BTI_UINTHI)) +DEF_BUILTIN (SPU_SPLATS_3, CODE_FOR_spu_splats, "spu_splats_3", B_INTERNAL, _A2(SPU_BTI_V8HI, SPU_BTI_INTHI)) +DEF_BUILTIN (SPU_SPLATS_4, CODE_FOR_spu_splats, "spu_splats_4", B_INTERNAL, _A2(SPU_BTI_UV4SI, SPU_BTI_UINTSI)) +DEF_BUILTIN (SPU_SPLATS_5, CODE_FOR_spu_splats, "spu_splats_5", B_INTERNAL, _A2(SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_SPLATS_6, CODE_FOR_spu_splats, "spu_splats_6", B_INTERNAL, _A2(SPU_BTI_UV2DI, SPU_BTI_UINTDI)) +DEF_BUILTIN (SPU_SPLATS_7, CODE_FOR_spu_splats, "spu_splats_7", B_INTERNAL, _A2(SPU_BTI_V2DI, SPU_BTI_INTDI)) +DEF_BUILTIN (SPU_SPLATS_8, CODE_FOR_spu_splats, "spu_splats_8", B_INTERNAL, _A2(SPU_BTI_V4SF, SPU_BTI_FLOAT)) +DEF_BUILTIN (SPU_SPLATS_9, CODE_FOR_spu_splats, "spu_splats_9", B_INTERNAL, _A2(SPU_BTI_V2DF, SPU_BTI_DOUBLE)) +DEF_BUILTIN (SPU_EXTRACT, CODE_FOR_nothing, "spu_extract", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_EXTRACT_0, CODE_FOR_spu_extract, "spu_extract_0", B_INTERNAL, _A3(SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_1, CODE_FOR_spu_extract, "spu_extract_1", B_INTERNAL, _A3(SPU_BTI_INTQI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_2, CODE_FOR_spu_extract, "spu_extract_2", B_INTERNAL, _A3(SPU_BTI_UINTHI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_3, CODE_FOR_spu_extract, "spu_extract_3", B_INTERNAL, _A3(SPU_BTI_INTHI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_4, CODE_FOR_spu_extract, "spu_extract_4", B_INTERNAL, _A3(SPU_BTI_UINTSI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_5, CODE_FOR_spu_extract, "spu_extract_5", B_INTERNAL, _A3(SPU_BTI_INTSI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_6, CODE_FOR_spu_extract, "spu_extract_6", B_INTERNAL, _A3(SPU_BTI_UINTDI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_7, CODE_FOR_spu_extract, "spu_extract_7", B_INTERNAL, _A3(SPU_BTI_INTDI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_8, CODE_FOR_spu_extract, "spu_extract_8", B_INTERNAL, _A3(SPU_BTI_FLOAT, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_EXTRACT_9, CODE_FOR_spu_extract, "spu_extract_9", B_INTERNAL, _A3(SPU_BTI_DOUBLE, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT, CODE_FOR_nothing, "spu_insert", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_INSERT_0, CODE_FOR_spu_insert, "spu_insert_0", B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_1, CODE_FOR_spu_insert, "spu_insert_1", B_INTERNAL, _A4(SPU_BTI_V16QI, SPU_BTI_INTQI, SPU_BTI_V16QI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_2, CODE_FOR_spu_insert, "spu_insert_2", B_INTERNAL, _A4(SPU_BTI_UV8HI, SPU_BTI_UINTHI, SPU_BTI_UV8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_3, CODE_FOR_spu_insert, "spu_insert_3", B_INTERNAL, _A4(SPU_BTI_V8HI, SPU_BTI_INTHI, SPU_BTI_V8HI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_4, CODE_FOR_spu_insert, "spu_insert_4", B_INTERNAL, _A4(SPU_BTI_UV4SI, SPU_BTI_UINTSI, SPU_BTI_UV4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_5, CODE_FOR_spu_insert, "spu_insert_5", B_INTERNAL, _A4(SPU_BTI_V4SI, SPU_BTI_INTSI, SPU_BTI_V4SI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_6, CODE_FOR_spu_insert, "spu_insert_6", B_INTERNAL, _A4(SPU_BTI_UV2DI, SPU_BTI_UINTDI, SPU_BTI_UV2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_7, CODE_FOR_spu_insert, "spu_insert_7", B_INTERNAL, _A4(SPU_BTI_V2DI, SPU_BTI_INTDI, SPU_BTI_V2DI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_8, CODE_FOR_spu_insert, "spu_insert_8", B_INTERNAL, _A4(SPU_BTI_V4SF, SPU_BTI_FLOAT, SPU_BTI_V4SF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_INSERT_9, CODE_FOR_spu_insert, "spu_insert_9", B_INTERNAL, _A4(SPU_BTI_V2DF, SPU_BTI_DOUBLE, SPU_BTI_V2DF, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE, CODE_FOR_nothing, "spu_promote", B_OVERLOAD, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_PROMOTE_0, CODE_FOR_spu_promote, "spu_promote_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_1, CODE_FOR_spu_promote, "spu_promote_1", B_INTERNAL, _A3(SPU_BTI_V16QI, SPU_BTI_INTQI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_2, CODE_FOR_spu_promote, "spu_promote_2", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_UINTHI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_3, CODE_FOR_spu_promote, "spu_promote_3", B_INTERNAL, _A3(SPU_BTI_V8HI, SPU_BTI_INTHI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_4, CODE_FOR_spu_promote, "spu_promote_4", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UINTSI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_5, CODE_FOR_spu_promote, "spu_promote_5", B_INTERNAL, _A3(SPU_BTI_V4SI, SPU_BTI_INTSI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_6, CODE_FOR_spu_promote, "spu_promote_6", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_UINTDI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_7, CODE_FOR_spu_promote, "spu_promote_7", B_INTERNAL, _A3(SPU_BTI_V2DI, SPU_BTI_INTDI, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_8, CODE_FOR_spu_promote, "spu_promote_8", B_INTERNAL, _A3(SPU_BTI_V4SF, SPU_BTI_FLOAT, SPU_BTI_INTSI)) +DEF_BUILTIN (SPU_PROMOTE_9, CODE_FOR_spu_promote, "spu_promote_9", B_INTERNAL, _A3(SPU_BTI_V2DF, SPU_BTI_DOUBLE, SPU_BTI_INTSI)) + +/* We need something that is not B_INTERNAL as a sentinel. */ + +/* These are for the convenience of implementing fma() in the standard + libraries. */ +DEF_BUILTIN (SCALAR_FMA, CODE_FOR_fma_sf, "fmas", B_INSN, _A4(SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT)) +DEF_BUILTIN (SCALAR_DFMA, CODE_FOR_fma_df, "dfmas", B_INSN, _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE)) + +DEF_BUILTIN (SPU_ALIGN_HINT, CODE_FOR_spu_align_hint,"spu_align_hint", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_7, SPU_BTI_7)) +#undef _A1 +#undef _A2 +#undef _A3 +#undef _A4 Index: spu-builtins.md =================================================================== --- spu-builtins.md (nonexistent) +++ spu-builtins.md (revision 384) @@ -0,0 +1,929 @@ +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; This includes expands for all the intrinsics. +;; spu_expand_builtin looks at the mode of match_operand. + + +;; load/store + +(define_expand "spu_lqd" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "")) + (const_int -16))))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) & 15) != 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & -16); + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx op2 = operands[2]; + operands[2] = force_reg (Pmode, operands[2]); + if (!ALIGNED_SYMBOL_REF_P (op2)) + emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16))); + } + }) + +(define_expand "spu_lqx" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_reg_operand" "")) + (const_int -16))))] + "" + "") + +(define_expand "spu_lqa" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "") + (const_int -16))))] + "" + { + if (GET_CODE (operands[1]) == CONST_INT + && (INTVAL (operands[1]) & 15) != 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & -16); + }) + +(define_expand "spu_lqr" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (mem:TI (and:SI (match_operand:SI 1 "address_operand" "") + (const_int -16))))] + "" + "") + +(define_expand "spu_stqd" + [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "")) + (const_int -16))) + (match_operand:TI 0 "spu_reg_operand" "r,r"))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) & 15) != 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & -16); + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx op2 = operands[2]; + operands[2] = force_reg (Pmode, operands[2]); + if (!ALIGNED_SYMBOL_REF_P (op2)) + emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16))); + } + }) + +(define_expand "spu_stqx" + [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_reg_operand" "")) + (const_int -16))) + (match_operand:TI 0 "spu_reg_operand" "r"))] + "" + "") + +(define_expand "spu_stqa" + [(set (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "") + (const_int -16))) + (match_operand:TI 0 "spu_reg_operand" "r"))] + "" + { + if (GET_CODE (operands[1]) == CONST_INT + && (INTVAL (operands[1]) & 15) != 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & -16); + }) + +(define_expand "spu_stqr" + [(set (mem:TI (and:SI (match_operand:SI 1 "address_operand" "") + (const_int -16))) + (match_operand:TI 0 "spu_reg_operand" ""))] + "" + "") + + +;; generate control word + +(define_expand "spu_cbx" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "") + (const_int 1)] UNSPEC_CPAT))] + "" + "") + +(define_expand "spu_chx" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "") + (const_int 2)] UNSPEC_CPAT))] + "" + "") + +(define_expand "spu_cwx" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "") + (const_int 4)] UNSPEC_CPAT))] + "" + "") + +(define_expand "spu_cdx" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" "") + (const_int 8)] UNSPEC_CPAT))] + "" + "") + + + +;; Constant formation + +(define_expand "spu_ilhu" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (const_vector:V4SI [(match_operand:SI 1 "immediate_operand" "")]))] + "" + "{ emit_insn(gen_movv4si(operands[0], spu_const(V4SImode, (INTVAL(operands[1]) << 16)))); + DONE; + }") + + +;; integer subtract +(define_expand "spu_sfh" + [(set (match_operand:V8HI 0 "spu_reg_operand" "") + (minus:V8HI (match_operand:V8HI 2 "spu_nonmem_operand" "") + (match_operand:V8HI 1 "spu_reg_operand" "")))] + "" + "") + +(define_expand "spu_sf" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (minus:V4SI (match_operand:V4SI 2 "spu_nonmem_operand" "") + (match_operand:V4SI 1 "spu_reg_operand" "")))] + "" + "") + +(define_expand "spu_sfx" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "") + (match_operand:V4SI 1 "spu_reg_operand" "") + (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_SFX))] + "" + "") + +(define_expand "spu_bg" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "") + (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_BG))] + "" + "") + +(define_expand "spu_bgx" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "") + (match_operand:V4SI 1 "spu_reg_operand" "") + (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_BGX))] + "" + "") + +;; integer multiply +(define_insn "spu_mpy" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r") + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r,r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_arith_operand" "r,B") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))] + "" + "@ + mpy\t%0,%1,%2 + mpyi\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyu" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r") + (mult:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r,r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_arith_operand" "r,B") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))] + "" + "@ + mpyu\t%0,%1,%2 + mpyui\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpya" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))) + (match_operand:V4SI 3 "spu_reg_operand" "r")))] + "" + "mpya\t%0,%1,%2,%3" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyh" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (ashift:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))) + (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))] + "" + "mpyh\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpys" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (ashiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))) + (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))] + "" + "mpys\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyhhu" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (mult:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))] + "" + "mpyhhu\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyhh" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))] + "" + "mpyhh\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyhhau" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (plus:V4SI + (mult:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))) + (match_operand:V4SI 3 "spu_reg_operand" "0")))] + "" + "mpyhhau\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "spu_mpyhha" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))) + (match_operand:V4SI 3 "spu_reg_operand" "0")))] + "" + "mpyhha\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +;; form select mask +(define_insn "spu_fsmb" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r,r") + (unspec:V16QI [(match_operand:SI 1 "spu_nonmem_operand" "r,MN")] UNSPEC_FSMB))] + "" + "@ + fsmb\t%0,%1 + fsmbi\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "spu_fsmh" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (unspec:V8HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSMH))] + "" + "fsmh\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "spu_fsm" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec:V4SI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSM))] + "" + "fsm\t%0,%1" + [(set_attr "type" "shuf")]) + + +;; gather bits +(define_insn "spu_gbb" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec:V4SI [(match_operand:V16QI 1 "spu_reg_operand" "r")] UNSPEC_GBB))] + "" + "gbb\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "spu_gbh" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec:V4SI [(match_operand:V8HI 1 "spu_reg_operand" "r")] UNSPEC_GBH))] + "" + "gbh\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "spu_gb" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_GB))] + "" + "gb\t%0,%1" + [(set_attr "type" "shuf")]) + +;; misc byte operations +(define_insn "spu_avgb" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r") + (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_AVGB))] + "" + "avgb\t%0,%1,%2" + [(set_attr "type" "fxb")]) + +(define_insn "spu_absdb" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r") + (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_ABSDB))] + "" + "absdb\t%0,%1,%2" + [(set_attr "type" "fxb")]) + +(define_insn "spu_sumb" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (unspec:V8HI [(match_operand:V16QI 1 "spu_reg_operand" "r") + (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_SUMB))] + "" + "sumb\t%0,%1,%2" + [(set_attr "type" "fxb")]) + +;; sign extend +(define_insn "spu_xsbh" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7) + (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))] + "" + "xsbh\t%0,%1") + +(define_insn "spu_xshw" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))] + "" + "xshw\t%0,%1") + +(define_insn "spu_xswd" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)]))))] + "" + "xswd\t%0,%1") + +;; or across + +(define_insn "spu_orx" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_ORX))] + "" + "orx\t%0,%1") + + +;; compare & halt +(define_insn "spu_heq" + [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r") + (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HEQ)] + "" + "@ + heq\t%0,%1 + heqi\t%0,%1") + +(define_insn "spu_hgt" + [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r") + (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HGT)] + "" + "@ + hgt\t%0,%1 + hgti\t%0,%1") + +(define_insn "spu_hlgt" + [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r") + (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HLGT)] + "" + "@ + hlgt\t%0,%1 + hlgti\t%0,%1") + +;; branches + +;; The description below hides the fact that bisled conditionally +;; executes the call depending on the value in channel 0. This was +;; done so that the description would conform to the format of a call +;; insn. Otherwise (if this were not part of call insn), the link +;; register, $lr, would not be saved/restored in the prologue/epilogue. + +(define_insn "spu_bisled" + [(parallel + [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r")) + (const_int 0)) + (clobber (reg:SI 0)) + (clobber (reg:SI 130)) + (use (match_operand:SI 1 "address_operand" "")) + (use (const_int 0))])] + "" + "bisled\t$lr,%0" + [(set_attr "type" "br")]) + +(define_insn "spu_bisledd" + [(parallel + [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r")) + (const_int 0)) + (clobber (reg:SI 0)) + (clobber (reg:SI 130)) + (use (match_operand:SI 1 "address_operand" "")) + (use (const_int 1))])] + "" + "bisledd\t$lr,%0" + [(set_attr "type" "br")]) + +(define_insn "spu_bislede" + [(parallel + [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r")) + (const_int 0)) + (clobber (reg:SI 0)) + (clobber (reg:SI 130)) + (use (match_operand:SI 1 "address_operand" "")) + (use (const_int 2))])] + "" + "bislede\t$lr,%0" + [(set_attr "type" "br")]) + +;; float convert +(define_expand "spu_csflt" + [(set (match_operand:V4SF 0 "spu_reg_operand") + (unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand") + (match_operand:SI 2 "spu_nonmem_operand")] 0 ))] + "" +{ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127)) + { + error ("spu_convtf expects an integer literal in the range [0, 127]."); + operands[2] = force_reg (SImode, operands[2]); + } + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx exp2; + rtx cnv = gen_reg_rtx (V4SFmode); + rtx scale = gen_reg_rtx (SImode); + rtx op2 = force_reg (SImode, operands[2]); + rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1)); + emit_insn (gen_subsi3 (scale, const1_rtx, op2)); + exp2 = spu_gen_exp2 (V4SFmode, scale); + emit_insn (gen_floatv4siv4sf2_mul (cnv, operands[1], m1)); + emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2)); + } + else + { + rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]); + emit_insn (gen_floatv4siv4sf2_div (operands[0], operands[1], exp2)); + } + DONE; +}) + +(define_expand "spu_cflts" + [(set (match_operand:V4SI 0 "spu_reg_operand") + (unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand") + (match_operand:SI 2 "spu_nonmem_operand")] 0 ))] + "" +{ + rtx exp2; + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127)) + { + error ("spu_convts expects an integer literal in the range [0, 127]."); + operands[2] = force_reg (SImode, operands[2]); + } + exp2 = spu_gen_exp2 (V4SFmode, operands[2]); + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx mul = gen_reg_rtx (V4SFmode); + emit_insn (gen_mulv4sf3 (mul, operands[1], exp2)); + emit_insn (gen_fix_truncv4sfv4si2 (operands[0], mul)); + } + else + emit_insn (gen_fix_truncv4sfv4si2_mul (operands[0], operands[1], exp2)); + DONE; +}) + +(define_expand "spu_cuflt" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand") + (match_operand:SI 2 "spu_nonmem_operand")] 0 ))] + "" +{ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127)) + { + error ("spu_convtf expects an integer literal in the range [0, 127]."); + operands[2] = force_reg (SImode, operands[2]); + } + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx exp2; + rtx cnv = gen_reg_rtx (V4SFmode); + rtx scale = gen_reg_rtx (SImode); + rtx op2 = force_reg (SImode, operands[2]); + rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1)); + emit_insn (gen_subsi3 (scale, const1_rtx, op2)); + exp2 = spu_gen_exp2 (V4SFmode, scale); + emit_insn (gen_floatunsv4siv4sf2_mul (cnv, operands[1], m1)); + emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2)); + } + else + { + rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]); + emit_insn (gen_floatunsv4siv4sf2_div (operands[0], operands[1], exp2)); + } + DONE; +}) + +(define_expand "spu_cfltu" + [(set (match_operand:V4SI 0 "spu_reg_operand") + (unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand") + (match_operand:SI 2 "spu_nonmem_operand")] 0 ))] + "" +{ + rtx exp2; + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127)) + { + error ("spu_convtu expects an integer literal in the range [0, 127]."); + operands[2] = force_reg (SImode, operands[2]); + } + exp2 = spu_gen_exp2 (V4SFmode, operands[2]); + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx mul = gen_reg_rtx (V4SFmode); + emit_insn (gen_mulv4sf3 (mul, operands[1], exp2)); + emit_insn (gen_fixuns_truncv4sfv4si2 (operands[0], mul)); + } + else + emit_insn (gen_fixuns_truncv4sfv4si2_mul (operands[0], operands[1], exp2)); + DONE; +}) + +(define_expand "spu_frds" + [(set (match_operand:V4SF 0 "spu_reg_operand" "") + (vec_select:V4SF + (vec_concat:V4SF + (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" "")) + (match_dup:V2SF 2)) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + "" + "operands[2] = spu_const(V2SFmode, 0);") + +(define_insn "_frds" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (vec_select:V4SF + (vec_concat:V4SF + (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" "r")) + (match_operand:V2SF 2 "vec_imm_operand" "i")) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + "" + "frds\t%0,%1" + [(set_attr "type" "fpd")]) + +(define_insn "spu_fesd" + [(set (match_operand:V2DF 0 "spu_reg_operand" "=r") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)]))))] + "" + "fesd\t%0,%1" + [(set_attr "type" "fpd")]) + +;; control +(define_insn "spu_stop" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "M")] UNSPEC_STOP)] + "" + "stop\t%0" + [(set_attr "type" "br")]) + +(define_insn "spu_stopd" + [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r") + (match_operand:SI 1 "spu_reg_operand" "r") + (match_operand:SI 2 "spu_reg_operand" "r")] UNSPEC_STOPD)] + "" + "stopd\t%0,%1,%2" + [(set_attr "type" "br")]) + +;; interrupt disable/enable +(define_expand "spu_idisable" + [(parallel + [(unspec_volatile [(const_int 0)] UNSPEC_SET_INTR) + (clobber (match_dup:SI 0)) + (clobber (mem:BLK (scratch)))])] + "" + "operands[0] = gen_reg_rtx (SImode);") + +(define_expand "spu_ienable" + [(parallel + [(unspec_volatile [(const_int 1)] UNSPEC_SET_INTR) + (clobber (match_dup:SI 0)) + (clobber (mem:BLK (scratch)))])] + "" + "operands[0] = gen_reg_rtx (SImode);") + +(define_insn "set_intr" + [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR) + (clobber (match_operand:SI 0 "spu_reg_operand" "=&r")) + (clobber (mem:BLK (scratch)))] + "! flag_pic" + "ila\t%0,.+8\;bi%I1\t%0" + [(set_attr "length" "8") + (set_attr "type" "multi0")]) + +(define_insn "set_intr_pic" + [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR) + (clobber (match_operand:SI 0 "spu_reg_operand" "=&r")) + (clobber (mem:BLK (scratch)))] + "flag_pic" + "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%I1\t%0" + [(set_attr "length" "12") + (set_attr "type" "multi1")]) + +(define_insn "set_intr_cc" + [(cond_exec (match_operator 1 "branch_comparison_operator" + [(match_operand 2 "spu_reg_operand" "r") + (const_int 0)]) + (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR) + (clobber (match_operand:SI 0 "spu_reg_operand" "=&r")) + (clobber (mem:BLK (scratch)))]))] + "! flag_pic" + "ila\t%0,.+8\;bi%b2%b1z%I3\t%2,%0" + [(set_attr "length" "8") + (set_attr "type" "multi0")]) + +(define_insn "set_intr_cc_pic" + [(cond_exec (match_operator 1 "branch_comparison_operator" + [(match_operand 2 "spu_reg_operand" "r") + (const_int 0)]) + (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR) + (clobber (match_operand:SI 0 "spu_reg_operand" "=&r")) + (clobber (mem:BLK (scratch)))]))] + "flag_pic" + "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%b2%b1z%I3\t%2,%0" + [(set_attr "length" "12") + (set_attr "type" "multi1")]) + +(define_insn "set_intr_return" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")] UNSPEC_SET_INTR) + (return)] + "" + "bi%I0\t$lr" + [(set_attr "type" "br")]) + +(define_peephole2 + [(parallel + [(unspec_volatile [(match_operand:SI 0 "const_int_operand")] UNSPEC_SET_INTR) + (clobber (match_operand:SI 1 "spu_reg_operand")) + (clobber (mem:BLK (scratch)))]) + (use (reg:SI 0)) + (return)] + "" + [(use (reg:SI 0)) + (parallel + [(unspec_volatile [(match_dup:SI 0)] UNSPEC_SET_INTR) + (return)])] + "") + +;; special purpose registers +(define_insn "spu_fscrrd" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec_volatile:V4SI [(const_int 6)] UNSPEC_FSCRRD))] + "" + "fscrrd\t%0" + [(set_attr "type" "spr")]) + +(define_insn "spu_fscrwr" + [(unspec_volatile [(match_operand:V4SI 0 "spu_reg_operand" "r")] UNSPEC_FSCRWR)] + "" + "fscrwr\t$0,%0" + [(set_attr "type" "spr")]) + +(define_insn "spu_mfspr" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_MFSPR))] + "" + "mfspr\t%0,$sp%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_mtspr" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J") + (match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_MTSPR)] + "" + "mtspr\t$sp%0,%1" + [(set_attr "type" "spr")]) + +;; channels +(define_expand "spu_rdch" + [(set (match_operand:V4SI 0 "spu_reg_operand" "") + (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RDCH))] + "" + "{ + if (spu_safe_dma (INTVAL (operands[1]))) + { + emit_insn (gen_spu_rdch_clobber (operands[0], operands[1])); + DONE; + } + }") + +(define_expand "spu_rchcnt" + [(set (match_operand:SI 0 "spu_reg_operand" "") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RCHCNT))] + "" + "{ + if (spu_safe_dma (INTVAL (operands[1]))) + { + emit_insn (gen_spu_rchcnt_clobber (operands[0], operands[1])); + DONE; + } + }") + +(define_expand "spu_wrch" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "") + (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_WRCH)] + "" + "{ + if (spu_safe_dma (INTVAL (operands[0]))) + { + emit_insn (gen_spu_wrch_clobber (operands[0], operands[1])); + DONE; + } + }") + +(define_insn "spu_rdch_noclobber" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH))] + "" + "rdch\t%0,$ch%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_rchcnt_noclobber" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT))] + "" + "rchcnt\t%0,$ch%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_wrch_noclobber" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J") + (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH)] + "" + "wrch\t$ch%0,%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_rdch_clobber" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH)) + (clobber (mem:BLK (scratch)))] + "" + "rdch\t%0,$ch%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_rchcnt_clobber" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT)) + (clobber (mem:BLK (scratch)))] + "" + "rchcnt\t%0,$ch%1" + [(set_attr "type" "spr")]) + +(define_insn "spu_wrch_clobber" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J") + (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH) + (clobber (mem:BLK (scratch)))] + "" + "wrch\t$ch%0,%1" + [(set_attr "type" "spr")]) + +(define_expand "spu_splats" + [(set (match_operand 0 "spu_reg_operand" "") + (vec_duplicate (match_operand 1 "spu_nonmem_operand" "")))] + "" + { + spu_builtin_splats(operands); + DONE; + }) + +(define_expand "spu_extract" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "") + (match_operand 2 "spu_nonmem_operand" "")] 0))] + "" + { + spu_builtin_extract (operands); + DONE; + }) + +(define_expand "spu_insert" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "") + (match_operand 2 "spu_reg_operand" "") + (match_operand:SI 3 "spu_nonmem_operand" "")] 0))] + "" + { + spu_builtin_insert(operands); + DONE; + }) + +(define_expand "spu_promote" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "") + (match_operand:SI 2 "immediate_operand" "")] 0))] + "" + { + spu_builtin_promote(operands); + DONE; + }) + +;; Currently doing nothing with this but expanding its args. +(define_expand "spu_align_hint" + [(unspec [(match_operand:SI 0 "address_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")] 0)] + "" + { + DONE; + }) + Index: spu_intrinsics.h =================================================================== --- spu_intrinsics.h (nonexistent) +++ spu_intrinsics.h (revision 384) @@ -0,0 +1,83 @@ +/* Definitions of Synergistic Processing Unit (SPU). */ +/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _SPU_INTRINSICS_H +#define _SPU_INTRINSICS_H + +#define vec_uchar16 __vector unsigned char +#define vec_char16 __vector signed char +#define vec_ushort8 __vector unsigned short +#define vec_short8 __vector signed short +#define vec_uint4 __vector unsigned int +#define vec_int4 __vector signed int +#define vec_ullong2 __vector unsigned long long +#define vec_llong2 __vector signed long long +#define vec_float4 __vector float +#define vec_double2 __vector double + +/* SPU Channel Defines + */ +#define SPU_RdEventStat 0 +#define SPU_WrEventMask 1 +#define SPU_WrEventAck 2 +#define SPU_RdSigNotify1 3 +#define SPU_RdSigNotify2 4 +#define SPU_WrDec 7 +#define SPU_RdDec 8 +#define SPU_RdEventMask 11 +#define SPU_RdMachStat 13 +#define SPU_WrSRR0 14 +#define SPU_RdSRR0 15 +#define SPU_WrOutMbox 28 +#define SPU_RdInMbox 29 +#define SPU_WrOutIntrMbox 30 + +/* MFC Channel Defines. + */ +#define MFC_WrMSSyncReq 9 +#define MFC_RdTagMask 12 +#define MFC_LSA 16 +#define MFC_EAH 17 +#define MFC_EAL 18 +#define MFC_Size 19 +#define MFC_TagID 20 +#define MFC_Cmd 21 +#define MFC_WrTagMask 22 +#define MFC_WrTagUpdate 23 +#define MFC_RdTagStat 24 +#define MFC_RdListStallStat 25 +#define MFC_WrListStallAck 26 +#define MFC_RdAtomicStat 27 + +/* Bit flag mnemonics for test special value. + */ +#define SPU_SV_NEG_DENORM 0x01 /* negative denormalized number */ +#define SPU_SV_POS_DENORM 0x02 /* positive denormalized number */ +#define SPU_SV_NEG_ZERO 0x04 /* negative zero */ +#define SPU_SV_POS_ZERO 0x08 /* positive zero */ +#define SPU_SV_NEG_INFINITY 0x10 /* negative infinity */ +#define SPU_SV_POS_INFINITY 0x20 /* positive infinity */ +#define SPU_SV_NAN 0x40 /* not a number */ + +#include + +#endif /* _SPU_INTRINSICS_H */
spu_intrinsics.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: cache.S =================================================================== --- cache.S (nonexistent) +++ cache.S (revision 384) @@ -0,0 +1,43 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + .data + .p2align 7 + .global __cache +__cache: + .rept __CACHE_SIZE__ * 8 + .fill 128 + .endr + + .p2align 7 + .global __cache_tag_array +__cache_tag_array: + .rept __CACHE_SIZE__ * 2 + .long 1, 1, 1, 1 + .fill 128-16 + .endr +__end_cache_tag_array: + + .globl __cache_tag_array_size + .set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array +
cache.S Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu.opt =================================================================== --- spu.opt (nonexistent) +++ spu.opt (revision 384) @@ -0,0 +1,105 @@ +; Options for the SPU port of the compiler +; Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + +; This file is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3 of the License, or (at your option) +; any later version. + +; This file is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mwarn-reloc +Target Report Mask(WARN_RELOC) +Emit warnings when run-time relocations are generated + +merror-reloc +Target Report Mask(ERROR_RELOC) +Emit errors when run-time relocations are generated + +mbranch-cost= +Target RejectNegative Joined UInteger Var(spu_branch_cost) Init(20) +Specify cost of branches (Default 20) + +msafe-dma +Target Report RejectNegative Mask(SAFE_DMA) +Make sure loads and stores are not moved past DMA instructions + +munsafe-dma +Target Report RejectNegative InverseMask(SAFE_DMA) +volatile must be specified on any memory that is effected by DMA + +mdual-nops +Target Report Var(spu_dual_nops,10) Init(10) +Insert nops when it might improve performance by allowing dual issue (default) + +mdual-nops= +Target RejectNegative Joined UInteger Var(spu_dual_nops) +Insert nops when it might improve performance by allowing dual issue (default) + +mstdmain +Target Report Mask(STD_MAIN) +Use standard main function as entry for startup + +mbranch-hints +Target Report Mask(BRANCH_HINTS) +Generate branch hints for branches + +mhint-max-nops= +Target RejectNegative Joined UInteger Var(spu_max_nops) Init(2) +Maximum number of nops to insert for a hint (Default 2) + +mhint-max-distance= +Target RejectNegative Joined Var(spu_max_distance_str) +Approximate maximum number of instructions to allow between a hint and its branch [125] + +msmall-mem +Target Report RejectNegative InverseMask(LARGE_MEM) +Generate code for 18 bit addressing + +mlarge-mem +Target Report RejectNegative Mask(LARGE_MEM) +Generate code for 32 bit addressing + +mfixed-range= +Target RejectNegative Joined Var(spu_fixed_range_string) +Specify range of registers to make fixed + +msafe-hints +Target Report Mask(SAFE_HINTS) +Insert hbrp instructions after hinted branch targets to avoid the SPU hang issue + +march= +Target RejectNegative Joined Var(spu_arch_string) +Generate code for given CPU + +mtune= +Target RejectNegative Joined Var(spu_tune_string) +Schedule code for given CPU + +mea32 +Target Report RejectNegative Var(spu_ea_model,32) Init(32) +Access variables in 32-bit PPU objects (default) + +mea64 +Target Report RejectNegative Var(spu_ea_model,64) VarExists +Access variables in 64-bit PPU objects + +maddress-space-conversion +Target Report Mask(ADDRESS_SPACE_CONVERSION) +Allow conversions between __ea and generic pointers (default) + +mcache-size= +Target Report RejectNegative Joined UInteger +Size (in KB) of software data cache + +matomic-updates +Target Report +Atomically write back software data cache lines (default) + Index: spu-c.c =================================================================== --- spu-c.c (nonexistent) +++ spu-c.c (revision 384) @@ -0,0 +1,240 @@ +/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "cpplib.h" +#include "tree.h" +#include "c-tree.h" +#include "c-pragma.h" +#include "function.h" +#include "rtl.h" +#include "expr.h" +#include "tm_p.h" +#include "langhooks.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "recog.h" +#include "optabs.h" + + +/* Keep the vector keywords handy for fast comparisons. */ +static GTY(()) tree __vector_keyword; +static GTY(()) tree vector_keyword; + +static cpp_hashnode * +spu_categorize_keyword (const cpp_token *tok) +{ + if (tok->type == CPP_NAME) + { + cpp_hashnode *ident = tok->val.node.node; + + if (ident == C_CPP_HASHNODE (vector_keyword) + || ident == C_CPP_HASHNODE (__vector_keyword)) + return C_CPP_HASHNODE (__vector_keyword); + else + return ident; + } + return 0; +} + +/* Called to decide whether a conditional macro should be expanded. + Since we have exactly one such macro (i.e, 'vector'), we do not + need to examine the 'tok' parameter. */ + +static cpp_hashnode * +spu_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) +{ + cpp_hashnode *expand_this = tok->val.node.node; + cpp_hashnode *ident; + + ident = spu_categorize_keyword (tok); + if (ident == C_CPP_HASHNODE (__vector_keyword)) + { + tok = cpp_peek_token (pfile, 0); + ident = spu_categorize_keyword (tok); + + if (ident) + { + enum rid rid_code = (enum rid)(ident->rid_code); + if (ident->type == NT_MACRO) + { + (void) cpp_get_token (pfile); + tok = cpp_peek_token (pfile, 0); + ident = spu_categorize_keyword (tok); + if (ident) + rid_code = (enum rid)(ident->rid_code); + } + + if (rid_code == RID_UNSIGNED || rid_code == RID_LONG + || rid_code == RID_SHORT || rid_code == RID_SIGNED + || rid_code == RID_INT || rid_code == RID_CHAR + || rid_code == RID_FLOAT || rid_code == RID_DOUBLE) + expand_this = C_CPP_HASHNODE (__vector_keyword); + } + } + return expand_this; +} + +/* target hook for resolve_overloaded_builtin(). Returns a function call + RTX if we can resolve the overloaded builtin */ +tree +spu_resolve_overloaded_builtin (location_t loc, tree fndecl, void *passed_args) +{ +#define SCALAR_TYPE_P(t) (INTEGRAL_TYPE_P (t) \ + || SCALAR_FLOAT_TYPE_P (t) \ + || POINTER_TYPE_P (t)) + VEC(tree,gc) *fnargs = (VEC(tree,gc) *) passed_args; + unsigned int nargs = VEC_length (tree, fnargs); + int new_fcode, fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS; + struct spu_builtin_description *desc; + tree match = NULL_TREE; + + /* The vector types are not available if the backend is not initialized. */ + gcc_assert (!flag_preprocess_only); + + desc = &spu_builtins[fcode]; + if (desc->type != B_OVERLOAD) + return NULL_TREE; + + /* Compare the signature of each internal builtin function with the + function arguments until a match is found. */ + + for (new_fcode = fcode + 1; spu_builtins[new_fcode].type == B_INTERNAL; + new_fcode++) + { + tree decl = spu_builtins[new_fcode].fndecl; + tree params = TYPE_ARG_TYPES (TREE_TYPE (decl)); + tree param; + bool all_scalar; + unsigned int p; + + /* Check whether all parameters are scalar. */ + all_scalar = true; + for (param = params; param != void_list_node; param = TREE_CHAIN (param)) + if (!SCALAR_TYPE_P (TREE_VALUE (param))) + all_scalar = false; + + for (param = params, p = 0; + param != void_list_node; + param = TREE_CHAIN (param), p++) + { + tree var, arg_type, param_type = TREE_VALUE (param); + + if (p >= nargs) + { + error ("insufficient arguments to overloaded function %s", + desc->name); + return error_mark_node; + } + + var = VEC_index (tree, fnargs, p); + + if (TREE_CODE (var) == NON_LVALUE_EXPR) + var = TREE_OPERAND (var, 0); + + if (TREE_CODE (var) == ERROR_MARK) + return NULL_TREE; /* Let somebody else deal with the problem. */ + + arg_type = TREE_TYPE (var); + + /* The intrinsics spec does not specify precisely how to + resolve generic intrinsics. We require an exact match + for vector types and let C do it's usual parameter type + checking/promotions for scalar arguments, except for the + first argument of intrinsics which don't have a vector + parameter. */ + if ((!SCALAR_TYPE_P (param_type) + || !SCALAR_TYPE_P (arg_type) + || (all_scalar && p == 0)) + && !lang_hooks.types_compatible_p (param_type, arg_type)) + break; + } + if (param == void_list_node) + { + if (p != nargs) + { + error ("too many arguments to overloaded function %s", + desc->name); + return error_mark_node; + } + + match = decl; + break; + } + } + + if (match == NULL_TREE) + { + error ("parameter list does not match a valid signature for %s()", + desc->name); + return error_mark_node; + } + + return build_function_call_vec (loc, match, fnargs, NULL); +#undef SCALAR_TYPE_P +} + + +void +spu_cpu_cpp_builtins (struct cpp_reader *pfile) +{ + builtin_define_std ("__SPU__"); + cpp_assert (pfile, "cpu=spu"); + cpp_assert (pfile, "machine=spu"); + if (spu_arch == PROCESSOR_CELLEDP) + builtin_define_std ("__SPU_EDP__"); + builtin_define_std ("__vector=__attribute__((__spu_vector__))"); + switch (spu_ea_model) + { + case 32: + builtin_define_std ("__EA32__"); + break; + case 64: + builtin_define_std ("__EA64__"); + break; + default: + gcc_unreachable (); + } + + if (!flag_iso) + { + /* Define this when supporting context-sensitive keywords. */ + cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__"); + cpp_define (pfile, "vector=vector"); + + /* Initialize vector keywords. */ + __vector_keyword = get_identifier ("__vector"); + C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL; + vector_keyword = get_identifier ("vector"); + C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL; + + /* Enable context-sensitive macros. */ + cpp_get_callbacks (pfile)->macro_to_expand = spu_macro_to_expand; + } +} + +void +spu_c_common_override_options (void) +{ + if (!TARGET_STD_MAIN) + { + /* Don't give warnings about the main() function. */ + warn_main = 0; + } +}
spu-c.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu_mfcio.h =================================================================== --- spu_mfcio.h (nonexistent) +++ spu_mfcio.h (revision 384) @@ -0,0 +1,342 @@ +/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef __SPU_MFCIO_H__ +#define __SPU_MFCIO_H__ 1 + +#include +#ifdef __IN_LIBGCC2 +typedef unsigned long long uint64_t; +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/****************************************************************/ +/* DMA list element structure*/ +/****************************************************************/ + +#ifdef __GNUC__ +__extension__ +#endif +typedef struct mfc_list_element { + uint64_t notify : 1; /** Stall-and-notify bit */ + uint64_t reserved : 16; + uint64_t size : 15; /** Transfer size */ + uint64_t eal : 32; /** Lower word of effective address */ +} mfc_list_element_t; + +/****************************************************************/ +/* DMA max/min size definitions. */ +/****************************************************************/ + +#define MFC_MIN_DMA_SIZE_SHIFT 4 /* 16 bytes */ +#define MFC_MAX_DMA_SIZE_SHIFT 14 /* 16384 bytes */ + +#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT) +#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT) + +#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1) +#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1) + +#define MFC_MIN_DMA_LIST_ELEMENTS 1 +#define MFC_MAX_DMA_LIST_ELEMENTS 2048 + +#define MFC_MIN_DMA_LIST_SIZE (MFC_MIN_DMA_LIST_ELEMENTS << 3) /* 8 bytes */ +#define MFC_MAX_DMA_LIST_SIZE (MFC_MAX_DMA_LIST_ELEMENTS << 3) /* 16K bytes */ + +/****************************************************************/ +/* MFC DMA command modifiers to identify classes of operations. */ +/****************************************************************/ + +/* Note: These commands modifier may be used in conjunction with the base + command types (i.e. MFC_PUT_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD) + to construct the various command permutations. */ + +#define MFC_BARRIER_ENABLE 0x0001 +#define MFC_FENCE_ENABLE 0x0002 +#define MFC_LIST_ENABLE 0x0004 +#define MFC_RESULT_ENABLE 0x0010 + +/****************************************************************/ +/* MFC DMA Put Commands */ +/****************************************************************/ + +#define MFC_PUT_CMD 0x0020 +#define MFC_PUTB_CMD (MFC_PUT_CMD | MFC_BARRIER_ENABLE) +#define MFC_PUTF_CMD (MFC_PUT_CMD | MFC_FENCE_ENABLE) +#define MFC_PUTL_CMD (MFC_PUT_CMD | MFC_LIST_ENABLE) +#define MFC_PUTLB_CMD (MFC_PUTL_CMD | MFC_BARRIER_ENABLE) +#define MFC_PUTLF_CMD (MFC_PUTL_CMD | MFC_FENCE_ENABLE) + +#define MFC_PUTR_CMD (MFC_PUT_CMD | MFC_RESULT_ENABLE) +#define MFC_PUTRB_CMD (MFC_PUTR_CMD | MFC_BARRIER_ENABLE) +#define MFC_PUTRF_CMD (MFC_PUTR_CMD | MFC_FENCE_ENABLE) +#define MFC_PUTRL_CMD (MFC_PUTR_CMD | MFC_LIST_ENABLE) +#define MFC_PUTRLB_CMD (MFC_PUTRL_CMD | MFC_BARRIER_ENABLE) +#define MFC_PUTRLF_CMD (MFC_PUTRL_CMD | MFC_FENCE_ENABLE) + +/****************************************************************/ +/* MFC DMA Get Commands */ +/****************************************************************/ + +#define MFC_GET_CMD 0x0040 +#define MFC_GETB_CMD (MFC_GET_CMD | MFC_BARRIER_ENABLE) +#define MFC_GETF_CMD (MFC_GET_CMD | MFC_FENCE_ENABLE) +#define MFC_GETL_CMD (MFC_GET_CMD | MFC_LIST_ENABLE) +#define MFC_GETLB_CMD (MFC_GETL_CMD | MFC_BARRIER_ENABLE) +#define MFC_GETLF_CMD (MFC_GETL_CMD | MFC_FENCE_ENABLE) + +/****************************************************************/ +/* MFC Synchronization Commands */ +/****************************************************************/ + +#define MFC_SNDSIG_CMD 0x00A0 +#define MFC_SNDSIGB_CMD (MFC_SNDSIG_CMD | MFC_BARRIER_ENABLE) +#define MFC_SNDSIGF_CMD (MFC_SNDSIG_CMD | MFC_FENCE_ENABLE) +#define MFC_BARRIER_CMD 0x00C0 +#define MFC_EIEIO_CMD 0x00C8 +#define MFC_SYNC_CMD 0x00CC + +/****************************************************************/ +/* MFC Atomic Commands */ +/****************************************************************/ + +#define MFC_GETLLAR_CMD 0x00D0 +#define MFC_PUTLLC_CMD 0x00B4 +#define MFC_PUTLLUC_CMD 0x00B0 +#define MFC_PUTQLLUC_CMD 0x00B8 + +/****************************************************************/ +/* MFC SL1 Storage Control Commands */ +/****************************************************************/ + +#define MFC_SDCRT_CMD 0x0080 +#define MFC_SDCRTST_CMD 0x0081 +#define MFC_SDCRZ_CMD 0x0089 +#define MFC_SDCRST_CMD 0x008D +#define MFC_SDCRF_CMD 0x008F + +/****************************************************************/ +/* Channel Defines */ +/****************************************************************/ + +/* Events Defines for channels + * 0 (SPU_RdEventStat), + * 1 (SPU_WrEventMask), and + * 2 (SPU_WrEventAck). + */ +#define MFC_TAG_STATUS_UPDATE_EVENT 0x00000001 +#define MFC_LIST_STALL_NOTIFY_EVENT 0x00000002 +#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT 0x00000008 +#define MFC_IN_MBOX_AVAILABLE_EVENT 0x00000010 +#define MFC_DECREMENTER_EVENT 0x00000020 +#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT 0x00000040 +#define MFC_OUT_MBOX_AVAILABLE_EVENT 0x00000080 +#define MFC_SIGNAL_NOTIFY_2_EVENT 0x00000100 +#define MFC_SIGNAL_NOTIFY_1_EVENT 0x00000200 +#define MFC_LLR_LOST_EVENT 0x00000400 +#define MFC_PRIV_ATTN_EVENT 0x00000800 +#define MFC_MULTI_SRC_SYNC_EVENT 0x00001000 + +/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate) */ +#define MFC_TAG_UPDATE_IMMEDIATE 0x0 +#define MFC_TAG_UPDATE_ANY 0x1 +#define MFC_TAG_UPDATE_ALL 0x2 + +/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat) */ +#define MFC_PUTLLC_STATUS 0x00000001 +#define MFC_PUTLLUC_STATUS 0x00000002 +#define MFC_GETLLAR_STATUS 0x00000004 + + +/****************************************************************/ +/* Definitions for constructing a 32-bit command word */ +/* including the transfer and replacement class id and the */ +/* command opcode. */ +/****************************************************************/ +#define MFC_CMD_WORD(_tid, _rid, _cmd) (((_tid)<<24)|((_rid)<<16)|(_cmd)) + + +/* Addressing Utilities */ +#define mfc_ea2h(ea) (unsigned int)((unsigned long long)(ea)>>32) +#define mfc_ea2l(ea) (unsigned int)(ea) +#define mfc_hl2ea(h,l) si_to_ullong(si_selb(si_from_uint(h),\ + si_rotqbyi(si_from_uint(l), -4),\ + si_fsmbi(0x0f0f))) +#define mfc_ceil128(v) (((v) + 127) & ~127) + +/* MFC DMA */ +#define mfc_put( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUT_CMD)) +#define mfc_putf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTF_CMD)) +#define mfc_putb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTB_CMD)) +#define mfc_get( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GET_CMD)) +#define mfc_getf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETF_CMD)) +#define mfc_getb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETB_CMD)) + +/* MFC list DMA */ +#define mfc_putl( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTL_CMD)) +#define mfc_putlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLF_CMD)) +#define mfc_putlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLB_CMD)) +#define mfc_getl( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETL_CMD)) +#define mfc_getlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLF_CMD)) +#define mfc_getlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLB_CMD)) + +/* MFC Atomic Update DMA */ +#define mfc_getllar( ls,ea,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128, 0,MFC_CMD_WORD(tid,rid,MFC_GETLLAR_CMD)) +#define mfc_putllc( ls,ea,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128, 0,MFC_CMD_WORD(tid,rid,MFC_PUTLLC_CMD)) +#define mfc_putlluc( ls,ea,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128, 0,MFC_CMD_WORD(tid,rid,MFC_PUTLLUC_CMD)) +#define mfc_putqlluc(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,tag,MFC_CMD_WORD(tid,rid,MFC_PUTQLLUC_CMD)) + +/* MFC Synchronization Commands */ +#define mfc_sndsig( ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIG_CMD)) +#define mfc_sndsigb(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGB_CMD)) +#define mfc_sndsigf(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGF_CMD)) +#define mfc_barrier(tag) spu_mfcdma32(0,0,0,tag,MFC_BARRIER_CMD) +#define mfc_eieio(tag,tid,rid) spu_mfcdma32(0,0,0,tag,MFC_CMD_WORD(tid,rid,MFC_EIEIO_CMD)) +#define mfc_sync(tag) spu_mfcdma32(0,0,0,tag,MFC_SYNC_CMD) + +/* MFC SL1 Storage Control Commands */ +#define mfc_sdcrt( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRT_CMD)) +#define mfc_sdcrtst(ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRTST_CMD)) +#define mfc_sdcrz( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRZ_CMD)) +#define mfc_sdcrst( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRST_CMD)) +#define mfc_sdcrf( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRF_CMD)) + +/* DMA Queue */ +#define mfc_stat_cmd_queue() spu_readchcnt(MFC_Cmd) + +/* MFC Tag-Status */ +#define mfc_write_tag_mask(mask) spu_writech(MFC_WrTagMask,mask) +#define mfc_read_tag_mask() spu_readch(MFC_RdTagMask) + +#define mfc_write_tag_update(ts) spu_writech(MFC_WrTagUpdate,ts) +#define mfc_write_tag_update_immediate() mfc_write_tag_update(MFC_TAG_UPDATE_IMMEDIATE) +#define mfc_write_tag_update_any() mfc_write_tag_update(MFC_TAG_UPDATE_ANY) +#define mfc_write_tag_update_all() mfc_write_tag_update(MFC_TAG_UPDATE_ALL) +#define mfc_stat_tag_update() spu_readchcnt(MFC_WrTagUpdate) + +#define mfc_read_tag_status() spu_readch(MFC_RdTagStat) +#define mfc_read_tag_status_immediate() (mfc_write_tag_update_immediate(), mfc_read_tag_status()) +#define mfc_read_tag_status_any() (mfc_write_tag_update_any(), mfc_read_tag_status()) +#define mfc_read_tag_status_all() (mfc_write_tag_update_all(), mfc_read_tag_status()) +#define mfc_stat_tag_status() spu_readchcnt(MFC_RdTagStat) + +/* MFC List Stall-and-Notify Tag */ +#define mfc_read_list_stall_status() spu_readch(MFC_RdListStallStat) +#define mfc_stat_list_stall_status() spu_readchcnt(MFC_RdListStallStat) +#define mfc_write_list_stall_ack(tag) spu_writech(MFC_WrListStallAck,tag) + +/* Atomic DMA */ +#define mfc_read_atomic_status() spu_readch(MFC_RdAtomicStat) +#define mfc_stat_atomic_status() spu_readchcnt(MFC_RdAtomicStat) + +/* MFC Multi-source Synchronization */ +#define mfc_write_multi_src_sync_request() spu_writech(MFC_WrMSSyncReq,0) +#define mfc_stat_multi_src_sync_request() spu_readchcnt(MFC_WrMSSyncReq) + +/* SPU Signal */ +#define spu_read_signal1() spu_readch(SPU_RdSigNotify1) +#define spu_stat_signal1() spu_readchcnt(SPU_RdSigNotify1) +#define spu_read_signal2() spu_readch(SPU_RdSigNotify2) +#define spu_stat_signal2() spu_readchcnt(SPU_RdSigNotify2) + +/* SPU/PPE Mailbox */ +#define spu_read_in_mbox() spu_readch(SPU_RdInMbox) +#define spu_stat_in_mbox() spu_readchcnt(SPU_RdInMbox) +#define spu_write_out_mbox(a) spu_writech(SPU_WrOutMbox,a) +#define spu_stat_out_mbox() spu_readchcnt(SPU_WrOutMbox) +#define spu_write_out_intr_mbox(a) spu_writech(SPU_WrOutIntrMbox,a) +#define spu_stat_out_intr_mbox() spu_readchcnt(SPU_WrOutIntrMbox) + +/* SPU Decrementer */ +#define spu_read_decrementer() spu_readch(SPU_RdDec) +#define spu_write_decrementer(cnt) spu_writech(SPU_WrDec,(cnt)) + +/* SPU Event */ +#define spu_read_event_status() spu_readch(SPU_RdEventStat) +#define spu_stat_event_status() spu_readchcnt(SPU_RdEventStat) +#define spu_write_event_mask(mask) spu_writech(SPU_WrEventMask,(mask)) +#define spu_write_event_ack(ack) spu_writech(SPU_WrEventAck,(ack)) +#define spu_read_event_mask() spu_readch(SPU_RdEventMask) + +/* SPU State Management */ +#define spu_read_machine_status() spu_readch(SPU_RdMachStat) +#define spu_write_srr0(srr0) spu_writech(SPU_WrSRR0,srr0) +#define spu_read_srr0() spu_readch(SPU_RdSRR0) + +/* Interrupt-Safe Critical Sections */ + +static __inline__ unsigned int mfc_begin_critical_section (void) + __attribute__ ((__always_inline__)); + +static __inline__ unsigned int +mfc_begin_critical_section (void) +{ +#ifdef SPU_MFCIO_INTERRUPT_SAFE + unsigned int __status = spu_read_machine_status (); + spu_idisable (); + return __status; +#else + return 0; +#endif +} + +static __inline__ void mfc_end_critical_section (unsigned int) + __attribute__ ((__always_inline__)); + +static __inline__ void +mfc_end_critical_section (unsigned int __status __attribute__ ((__unused__))) +{ +#ifdef SPU_MFCIO_INTERRUPT_SAFE + if (__status & 1) + spu_ienable (); +#endif +} + +/* MFC Tag Manager */ + +#define MFC_TAG_INVALID 0xFFFFFFFF +#define MFC_TAG_VALID 0x00000000 + +#define mfc_tag_reserve() \ + __mfc_tag_reserve() +#define mfc_tag_release(tag) \ + __mfc_tag_release((tag)) +#define mfc_multi_tag_reserve(nr_tags) \ + __mfc_multi_tag_reserve((nr_tags)) +#define mfc_multi_tag_release(tag, nr_tags) \ + __mfc_multi_tag_release((tag),(nr_tags)) + +extern unsigned int __mfc_tag_reserve (void); +extern unsigned int __mfc_tag_release (unsigned int); +extern unsigned int __mfc_multi_tag_reserve (unsigned int); +extern unsigned int __mfc_multi_tag_release (unsigned int, unsigned int); + +#ifdef __cplusplus +} +#endif + +#endif /* __SPU_MFCIO_H__ */
spu_mfcio.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: vmx2spu.h =================================================================== --- vmx2spu.h (nonexistent) +++ vmx2spu.h (revision 384) @@ -0,0 +1,3985 @@ +/* Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _VMX2SPU_H_ +#define _VMX2SPU_H_ 1 + +#ifdef __cplusplus + +#ifdef __SPU__ + +#include +#include + +/* This file maps generic VMX intrinsics and predicates to the SPU using + * overloaded C++ functions. + */ + +/************************************************************************ + * INTRINSICS + ************************************************************************/ + +/* vec_abs (vector absolute value) + * ======= + */ +static inline vec_char16 vec_abs(vec_char16 a) +{ + vec_char16 minus_a; + + minus_a = (vec_char16)(spu_add((vec_ushort8)(spu_and(spu_xor(a, 0xFF), 0x7F)), 0x101)); + return (spu_sel(minus_a, a, spu_cmpgt(a, -1))); +} + +static inline vec_short8 vec_abs(vec_short8 a) +{ + return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1))); +} + +static inline vec_int4 vec_abs(vec_int4 a) +{ + return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1))); +} + +static inline vec_float4 vec_abs(vec_float4 a) +{ + return ((vec_float4)(spu_rlmask(spu_sl((vec_uint4)(a), 1), -1))); +} + +/* vec_abss (vector absolute value saturate) + * ======== + */ +static inline vec_char16 vec_abss(vec_char16 a) +{ + vec_char16 minus_a; + + minus_a = (vec_char16)spu_add((vec_short8)(spu_xor(a, -1)), + (vec_short8)(spu_and(spu_cmpgt((vec_uchar16)(a), 0x80), 1))); + return (spu_sel(minus_a, a, spu_cmpgt(a, -1))); +} + +static inline vec_short8 vec_abss(vec_short8 a) +{ + vec_short8 minus_a; + + minus_a = spu_add(spu_sub(0, a), (vec_short8)(spu_cmpeq(a, ((vec_short8){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000})))); + return (spu_sel(minus_a, a, spu_cmpgt(a, -1))); +} + +static inline vec_int4 vec_abss(vec_int4 a) +{ + vec_int4 minus_a; + + minus_a = spu_add(spu_sub(0, a), (vec_int4)(spu_cmpeq(a, ((vec_int4){0x80000000,0x80000000,0x80000000,0x80000000})))); + return (spu_sel(minus_a, a, spu_cmpgt(a, -1))); +} + + +/* vec_add (vector add) + * ======= + */ +static inline vec_uchar16 vec_add(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(spu_sel(spu_add((vec_ushort8)(a), (vec_ushort8)(b)), + spu_add(spu_and((vec_ushort8)(a), 0xFF00), spu_and((vec_ushort8)(b), 0xFF00)), + spu_splats((unsigned short)(0xFF00))))); +} + +static inline vec_char16 vec_add(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b))); +} + +static inline vec_char16 vec_add(vec_bchar16 a, vec_char16 b) +{ + return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b))); +} + +static inline vec_char16 vec_add(vec_char16 a, vec_bchar16 b) +{ + return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b))); +} + +static inline vec_ushort8 vec_add(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_add(a, b)); +} + +static inline vec_short8 vec_add(vec_short8 a, vec_short8 b) +{ + return (spu_add(a, b)); +} + +static inline vec_short8 vec_add(vec_bshort8 a, vec_short8 b) +{ + return (spu_add((vec_short8)(a), b)); +} + +static inline vec_short8 vec_add(vec_short8 a, vec_bshort8 b) +{ + return (spu_add(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_add(vec_uint4 a, vec_uint4 b) +{ + return (spu_add(a, b)); +} + +static inline vec_int4 vec_add(vec_int4 a, vec_int4 b) +{ + return (spu_add(a, b)); +} + +static inline vec_int4 vec_add(vec_bint4 a, vec_int4 b) +{ + return (spu_add((vec_int4)(a), b)); +} + +static inline vec_int4 vec_add(vec_int4 a, vec_bint4 b) +{ + return (spu_add(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_add(vec_float4 a, vec_float4 b) +{ + return (spu_add(a, b)); +} + +/* vec_addc (vector add carryout unsigned word) + * ======== + */ +#define vec_addc(_a, _b) spu_genc(_a, _b) + +/* vec_adds (vector add saturated) + * ======== + */ +static inline vec_uchar16 vec_adds(vec_uchar16 a, vec_uchar16 b) +{ + vec_uchar16 s1, s2, s, d; + + s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8))); + s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF))); + s = spu_shuffle(s1, s2, ((vec_uchar16){0, 16, 2, 18, 4, 20, 6, 22, + 8, 24, 10, 26, 12, 28, 14, 30})); + d = spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23, + 9, 25, 11, 27, 13, 29, 15, 31})); + return (spu_or(d, spu_cmpeq(s, 1))); +} + +static inline vec_char16 vec_adds(vec_char16 a, vec_char16 b) +{ + vec_uchar16 s1, s2, s, d; + + s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8))); + s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF))); + s = spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23, + 9, 25, 11, 27, 13, 29, 15, 31})); + d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_and(s, (vec_uchar16)(spu_nor(a, b))), 0x7F)); + d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_nor(s, (vec_uchar16)(spu_nand(a, b))), 0x7F)); + return ((vec_char16)(d)); +} + +static inline vec_char16 vec_adds(vec_bchar16 a, vec_char16 b) +{ + return (vec_adds((vec_char16)(a), b)); +} + +static inline vec_char16 vec_adds(vec_char16 a, vec_bchar16 b) +{ + return (vec_adds(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_adds(vec_ushort8 a, vec_ushort8 b) +{ + vec_ushort8 s, d; + + s = spu_add(a, b); + d = spu_or(s, spu_rlmaska(spu_sel(spu_xor(s, -1), a, spu_eqv(a, b)), -15)); + return (d); +} + +static inline vec_short8 vec_adds(vec_short8 a, vec_short8 b) +{ + vec_short8 s, d; + + s = spu_add(a, b); + d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_and(s, spu_nor(a, b)), -15))); + d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_nor(s, spu_nand(a, b)), -15))); + return (d); +} + +static inline vec_short8 vec_adds(vec_bshort8 a, vec_short8 b) +{ + return (vec_adds((vec_short8)(a), b)); +} + +static inline vec_short8 vec_adds(vec_short8 a, vec_bshort8 b) +{ + return (vec_adds(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_adds(vec_uint4 a, vec_uint4 b) +{ + return (spu_or(spu_add(a, b), spu_rlmaska(spu_sl(spu_genc(a, b), 31), -31))); +} + +static inline vec_int4 vec_adds(vec_int4 a, vec_int4 b) +{ + vec_int4 s, d; + + s = spu_add(a, b); + d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)spu_rlmaska(spu_and(s, spu_nor(a, b)), -31)); + d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)spu_rlmaska(spu_nor(s, spu_nand(a, b)), -31)); + return (d); +} + +static inline vec_int4 vec_adds(vec_bint4 a, vec_int4 b) +{ + return (vec_adds((vec_int4)(a), b)); +} + +static inline vec_int4 vec_adds(vec_int4 a, vec_bint4 b) +{ + return (vec_adds(a, (vec_int4)(b))); +} + +/* vec_and (vector logical and) + * ======= + */ +static inline vec_uchar16 vec_and(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_and(a, b)); +} + +static inline vec_char16 vec_and(vec_char16 a, vec_char16 b) +{ + return (spu_and(a, b)); +} + +static inline vec_char16 vec_and(vec_bchar16 a, vec_char16 b) +{ + return (spu_and((vec_char16)(a), b)); +} + +static inline vec_char16 vec_and(vec_char16 a, vec_bchar16 b) +{ + return (spu_and(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_and(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_and(a, b)); +} + +static inline vec_short8 vec_and(vec_short8 a, vec_short8 b) +{ + return (spu_and(a, b)); +} + +static inline vec_short8 vec_and(vec_bshort8 a, vec_short8 b) +{ + return (spu_and((vec_short8)(a), b)); +} + +static inline vec_short8 vec_and(vec_short8 a, vec_bshort8 b) +{ + return (spu_and(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_and(vec_uint4 a, vec_uint4 b) +{ + return (spu_and(a, b)); +} + +static inline vec_int4 vec_and(vec_int4 a, vec_int4 b) +{ + return (spu_and(a, b)); +} + +static inline vec_int4 vec_and(vec_bint4 a, vec_int4 b) +{ + return (spu_and((vec_int4)(a), b)); +} + +static inline vec_int4 vec_and(vec_int4 a, vec_bint4 b) +{ + return (spu_and(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_and(vec_float4 a, vec_float4 b) +{ + return (spu_and(a, b)); +} + +static inline vec_float4 vec_and(vec_bint4 a, vec_float4 b) +{ + return (spu_and((vec_float4)(a),b)); +} + +static inline vec_float4 vec_and(vec_float4 a, vec_bint4 b) +{ + return (spu_and(a, (vec_float4)(b))); +} + + +/* vec_andc (vector logical and with complement) + * ======== + */ +static inline vec_uchar16 vec_andc(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_char16 vec_andc(vec_char16 a, vec_char16 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_char16 vec_andc(vec_bchar16 a, vec_char16 b) +{ + return (spu_andc((vec_char16)(a), b)); +} + +static inline vec_char16 vec_andc(vec_char16 a, vec_bchar16 b) +{ + return (spu_andc(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_andc(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_short8 vec_andc(vec_short8 a, vec_short8 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_short8 vec_andc(vec_bshort8 a, vec_short8 b) +{ + return (spu_andc((vec_short8)(a), b)); +} + +static inline vec_short8 vec_andc(vec_short8 a, vec_bshort8 b) +{ + return (spu_andc(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_andc(vec_uint4 a, vec_uint4 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_int4 vec_andc(vec_int4 a, vec_int4 b) +{ + return (spu_andc(a, b)); +} + +static inline vec_int4 vec_andc(vec_bint4 a, vec_int4 b) +{ + return (spu_andc((vec_int4)(a), b)); +} + +static inline vec_int4 vec_andc(vec_int4 a, vec_bint4 b) +{ + return (spu_andc(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_andc(vec_float4 a, vec_float4 b) +{ + return (spu_andc(a,b)); +} + +static inline vec_float4 vec_andc(vec_bint4 a, vec_float4 b) +{ + return (spu_andc((vec_float4)(a),b)); +} + +static inline vec_float4 vec_andc(vec_float4 a, vec_bint4 b) +{ + return (spu_andc(a, (vec_float4)(b))); +} + +/* vec_avg (vector average) + * ======= + */ +static inline vec_uchar16 vec_avg(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_avg(a, b)); +} + +static inline vec_char16 vec_avg(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(spu_xor(spu_avg((vec_uchar16)(a), (vec_uchar16)(b)), + (vec_uchar16)(spu_and(spu_xor(a,b), 0x80))))); +} + +static inline vec_ushort8 vec_avg(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), + spu_and(spu_or(a, b), 1))); +} + +static inline vec_short8 vec_avg(vec_short8 a, vec_short8 b) +{ + return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), + spu_and(spu_or(a, b), 1))); +} + +static inline vec_uint4 vec_avg(vec_uint4 a, vec_uint4 b) +{ + return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), + spu_and(spu_or(a, b), 1))); +} + +static inline vec_int4 vec_avg(vec_int4 a, vec_int4 b) +{ + return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), + spu_and(spu_or(a, b), 1))); +} + + +/* vec_ceil (vector ceiling) + * ======== + */ +static inline vec_float4 vec_ceil(vec_float4 a) +{ + vec_int4 exp; + vec_uint4 mask; + + a = spu_add(a, (vec_float4)(spu_and(spu_xor(spu_rlmaska((vec_int4)a, -31), -1), spu_splats((signed int)0x3F7FFFFF)))); + exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF))); + mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp); + mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31)); + mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1)); + + return ((vec_float4)(spu_andc((vec_uint4)(a), mask))); +} + + +/* vec_cmpb (vector compare bounds floating-point) + * ======== + */ +static inline vec_int4 vec_cmpb(vec_float4 a, vec_float4 b) +{ + vec_int4 b0 = (vec_int4)spu_splats(0x80000000); + vec_int4 b1 = (vec_int4)spu_splats(0x40000000); + + return (spu_or(spu_and((vec_int4)spu_cmpgt(a, b), b0), + spu_and((vec_int4)spu_cmpgt(spu_xor(b, (vec_float4)(b0)), a), b1))); +} + +/* vec_cmpeq (vector compare equal) + * ========= + */ +#define vec_cmpeq(_a, _b) spu_cmpeq(_a, _b) + + +/* vec_cmpge (vector compare greater than or equal) + * ========= + */ +static inline vec_bint4 vec_cmpge(vec_float4 a, vec_float4 b) +{ + return (spu_xor(spu_cmpgt(b, a), -1)); +} + + +/* vec_cmpgt (vector compare greater than) + * ========= + */ +#define vec_cmpgt(_a, _b) spu_cmpgt(_a, _b) + + +/* vec_cmple (vector compare less than or equal) + * ========= + */ +static inline vec_bint4 vec_cmple(vec_float4 a, vec_float4 b) +{ + return (spu_xor(spu_cmpgt(a, b), -1)); +} + + +/* vec_cmplt (vector compare less than) + * ========= + */ +#define vec_cmplt(_a, _b) spu_cmpgt(_b, _a) + + +/* vec_ctf (vector convert from fixed-point word) + * ======= + */ +#define vec_ctf(_a, _b) spu_convtf(_a, _b) + + +/* vec_cts (vector convert to signed fixed-point word saturate) + * ======= + */ +#define vec_cts(_a, _b) spu_convts(_a, _b) + + +/* vec_ctu (vector convert to unsigned fixed-point word saturate) + * ======= + */ +#define vec_ctu(_a, _b) spu_convtu(_a, _b) + + +/* vec_dss (vector data stream stop) + * ======= + */ +#define vec_dss(_a) + + +/* vec_dssall (vector data stream stop all) + * ========== + */ +#define vec_dssall() + + +/* vec_dst (vector data stream touch) + * ======= + */ +#define vec_dst(_a, _b, _c) + + +/* vec_dstst (vector data stream touch for store) + * ========= + */ +#define vec_dstst(_a, _b, _c) + + +/* vec_dststt (vector data stream touch for store transient) + * ========== + */ +#define vec_dststt(_a, _b, _c) + + +/* vec_dstt (vector data stream touch transient) + * ======== + */ +#define vec_dstt(_a, _b, _c) + + +/* vec_expte (vector is 2 raised tp the exponent estimate floating-point) + * ========= + */ +static inline vec_float4 vec_expte(vec_float4 a) +{ + vec_float4 bias, frac, exp; + vec_int4 ia; + + bias = (vec_float4)(spu_andc(spu_splats((signed int)0x3F7FFFFF), spu_rlmaska((vec_int4)(a), -31))); + ia = spu_convts(spu_add(a, bias), 0); + frac = spu_sub(spu_convtf(ia, 0), a); + exp = (vec_float4)(spu_sl(spu_add(ia, 127), 23)); + + return (spu_mul(spu_madd(spu_madd(spu_splats(0.17157287f), frac, spu_splats(-0.67157287f)), + frac, spu_splats(1.0f)), exp)); +} + + +/* vec_floor (vector floor) + * ========= + */ +static inline vec_float4 vec_floor(vec_float4 a) +{ + vec_int4 exp; + vec_uint4 mask; + + a = spu_sub(a, (vec_float4)(spu_and(spu_rlmaska((vec_int4)a, -31), spu_splats((signed int)0x3F7FFFFF)))); + exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF))); + mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp); + mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31)); + mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1)); + + return ((vec_float4)(spu_andc((vec_uint4)(a), mask))); +} + + +/* vec_ld (vector load indexed) + * ====== + */ +static inline vec_uchar16 vec_ld(int a, unsigned char *b) +{ + return (*((vec_uchar16 *)(b+a))); +} + +static inline vec_uchar16 vec_ld(int a, vec_uchar16 *b) +{ + return (*((vec_uchar16 *)((unsigned char *)(b)+a))); +} + +static inline vec_char16 vec_ld(int a, signed char *b) +{ + return (*((vec_char16 *)(b+a))); +} + +static inline vec_char16 vec_ld(int a, vec_char16 *b) +{ + return (*((vec_char16 *)((signed char *)(b)+a))); +} + +static inline vec_ushort8 vec_ld(int a, unsigned short *b) +{ + return (*((vec_ushort8 *)((unsigned char *)(b)+a))); +} + +static inline vec_ushort8 vec_ld(int a, vec_ushort8 *b) +{ + return (*((vec_ushort8 *)((unsigned char *)(b)+a))); +} + +static inline vec_short8 vec_ld(int a, signed short *b) +{ + return (*((vec_short8 *)((unsigned char *)(b)+a))); +} + +static inline vec_short8 vec_ld(int a, vec_short8 *b) +{ + return (*((vec_short8 *)((signed char *)(b)+a))); +} + +static inline vec_uint4 vec_ld(int a, unsigned int *b) +{ + return (*((vec_uint4 *)((unsigned char *)(b)+a))); +} + +static inline vec_uint4 vec_ld(int a, vec_uint4 *b) +{ + return (*((vec_uint4 *)((unsigned char *)(b)+a))); +} + +static inline vec_int4 vec_ld(int a, signed int *b) +{ + return (*((vec_int4 *)((unsigned char *)(b)+a))); +} + +static inline vec_int4 vec_ld(int a, vec_int4 *b) +{ + return (*((vec_int4 *)((signed char *)(b)+a))); +} + +static inline vec_float4 vec_ld(int a, float *b) +{ + return (*((vec_float4 *)((unsigned char *)(b)+a))); +} + +static inline vec_float4 vec_ld(int a, vec_float4 *b) +{ + return (*((vec_float4 *)((unsigned char *)(b)+a))); +} + +/* vec_lde (vector load element indexed) + * ======= + */ +static inline vec_uchar16 vec_lde(int a, unsigned char *b) +{ + return (*((vec_uchar16 *)(b+a))); +} + +static inline vec_char16 vec_lde(int a, signed char *b) +{ + return (*((vec_char16 *)(b+a))); +} + +static inline vec_ushort8 vec_lde(int a, unsigned short *b) +{ + return (*((vec_ushort8 *)((unsigned char *)(b)+a))); +} + +static inline vec_short8 vec_lde(int a, signed short *b) +{ + return (*((vec_short8 *)((unsigned char *)(b)+a))); +} + + +static inline vec_uint4 vec_lde(int a, unsigned int *b) +{ + return (*((vec_uint4 *)((unsigned char *)(b)+a))); +} + +static inline vec_int4 vec_lde(int a, signed int *b) +{ + return (*((vec_int4 *)((unsigned char *)(b)+a))); +} + + +static inline vec_float4 vec_lde(int a, float *b) +{ + return (*((vec_float4 *)((unsigned char *)(b)+a))); +} + +/* vec_ldl (vector load indexed LRU) + * ======= + */ +#define vec_ldl(_a, _b) vec_ld(_a, _b) + + +/* vec_loge (vector log2 estimate floating-point) + * ======== + */ +static inline vec_float4 vec_loge(vec_float4 a) +{ + vec_int4 exp; + vec_float4 frac; + + exp = spu_add((vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)), -127); + frac = (vec_float4)(spu_sub((vec_int4)(a), spu_sl(exp, 23))); + + return (spu_madd(spu_madd(spu_splats(-0.33985f), frac, spu_splats(2.01955f)), + frac, spu_sub(spu_convtf(exp, 0), spu_splats(1.6797f)))); +} + + +/* vec_lvsl (vector load for shift left) + * ======== + */ +static inline vec_uchar16 vec_lvsl(int a, unsigned char *b) +{ + return ((vec_uchar16)spu_add((vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))), + ((vec_ushort8){0x0001, 0x0203, 0x0405, 0x0607, + 0x0809, 0x0A0B, 0x0C0D, 0x0E0F}))); +} + +static inline vec_uchar16 vec_lvsl(int a, signed char *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsl(int a, unsigned short *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsl(int a, short *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsl(int a, unsigned int *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsl(int a, int *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsl(int a, float *b) +{ + return (vec_lvsl(a, (unsigned char *)b)); +} + + +/* vec_lvsr (vector load for shift right) + * ======== + */ +static inline vec_uchar16 vec_lvsr(int a, unsigned char *b) +{ + return ((vec_uchar16)(spu_sub(((vec_ushort8){0x1011, 0x1213, 0x1415, 0x1617, + 0x1819, 0x1A1B, 0x1C1D, 0x1E1F}), + (vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF)))))); +} + +static inline vec_uchar16 vec_lvsr(int a, signed char *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsr(int a, unsigned short *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsr(int a, short *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsr(int a, unsigned int *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsr(int a, int *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +static inline vec_uchar16 vec_lvsr(int a, float *b) +{ + return (vec_lvsr(a, (unsigned char *)b)); +} + +/* vec_madd (vector multiply add) + * ======== + */ +#define vec_madd(_a, _b, _c) spu_madd(_a, _b, _c) + + + +/* vec_madds (vector multiply add saturate) + * ========= + */ +static inline vec_short8 vec_madds(vec_short8 a, vec_short8 b, vec_short8 c) +{ + return (vec_adds(c, spu_sel((vec_short8)(spu_sl(spu_mule(a, b), 1)), + (vec_short8)(spu_rlmask(spu_mulo(a, b), -15)), + ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF})))); +} + +/* vec_max (vector maximum) + * ======= + */ +static inline vec_uchar16 vec_max(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_char16 vec_max(vec_char16 a, vec_char16 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_char16 vec_max(vec_bchar16 a, vec_char16 b) +{ + return (spu_sel(b, (vec_char16)(a), spu_cmpgt((vec_char16)(a), b))); +} + +static inline vec_char16 vec_max(vec_char16 a, vec_bchar16 b) +{ + return (spu_sel((vec_char16)(b), a, spu_cmpgt(a, (vec_char16)(b)))); +} + +static inline vec_ushort8 vec_max(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_short8 vec_max(vec_short8 a, vec_short8 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_short8 vec_max(vec_bshort8 a, vec_short8 b) +{ + return (spu_sel(b, (vec_short8)(a), spu_cmpgt((vec_short8)(a), b))); +} + +static inline vec_short8 vec_max(vec_short8 a, vec_bshort8 b) +{ + return (spu_sel((vec_short8)(b), a, spu_cmpgt(a, (vec_short8)(b)))); +} + +static inline vec_uint4 vec_max(vec_uint4 a, vec_uint4 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_int4 vec_max(vec_int4 a, vec_int4 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + +static inline vec_int4 vec_max(vec_bint4 a, vec_int4 b) +{ + return (spu_sel(b, (vec_int4)(a), spu_cmpgt((vec_int4)(a), b))); +} + +static inline vec_int4 vec_max(vec_int4 a, vec_bint4 b) +{ + return (spu_sel((vec_int4)(b), a, spu_cmpgt(a, (vec_int4)(b)))); +} + +static inline vec_float4 vec_max(vec_float4 a, vec_float4 b) +{ + return (spu_sel(b, a, spu_cmpgt(a, b))); +} + + +/* vec_mergeh (vector merge high) + * ========== + */ +static inline vec_uchar16 vec_mergeh(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23}))); +} + +static inline vec_char16 vec_mergeh(vec_char16 a, vec_char16 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23}))); +} + +static inline vec_ushort8 vec_mergeh(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, + 4, 5, 20, 21, 6, 7, 22, 23}))); +} + +static inline vec_short8 vec_mergeh(vec_short8 a, vec_short8 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, + 4, 5, 20, 21, 6, 7, 22, 23}))); +} + +static inline vec_uint4 vec_mergeh(vec_uint4 a, vec_uint4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, + 4, 5, 6, 7, 20, 21, 22, 23}))); +} + +static inline vec_int4 vec_mergeh(vec_int4 a, vec_int4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, + 4, 5, 6, 7, 20, 21, 22, 23}))); +} + +static inline vec_float4 vec_mergeh(vec_float4 a, vec_float4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, + 4, 5, 6, 7, 20, 21, 22, 23}))); +} + +/* vec_mergel (vector merge low) + * ========== + */ +static inline vec_uchar16 vec_mergel(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24, 9, 25, 10, 26, 11, 27, + 12, 28, 13, 29, 14, 30, 15, 31}))); +} + +static inline vec_char16 vec_mergel(vec_char16 a, vec_char16 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24, 9, 25, 10, 26, 11, 27, + 12, 28, 13, 29, 14, 30, 15, 31}))); +} + +static inline vec_ushort8 vec_mergel(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 24, 25, 10, 11, 26, 27, + 12, 13, 28, 29, 14, 15, 30, 31}))); +} + +static inline vec_short8 vec_mergel(vec_short8 a, vec_short8 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 24, 25, 10, 11, 26, 27, + 12, 13, 28, 29, 14, 15, 30, 31}))); +} + +static inline vec_uint4 vec_mergel(vec_uint4 a, vec_uint4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27, + 12, 13, 14, 15, 28, 29, 30, 31}))); +} + +static inline vec_int4 vec_mergel(vec_int4 a, vec_int4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27, + 12, 13, 14, 15, 28, 29, 30, 31}))); +} + +static inline vec_float4 vec_mergel(vec_float4 a, vec_float4 b) +{ + return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27, + 12, 13, 14, 15, 28, 29, 30, 31}))); +} + +/* vec_mfvscr (vector move from vector status and control register) + * ========== + */ +static inline vec_ushort8 vec_mfvscr() +{ + return ((vec_ushort8)spu_splats(0)); /* not supported */ +} + + +/* vec_min (vector minimum) + * ======= + */ +static inline vec_uchar16 vec_min(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_char16 vec_min(vec_char16 a, vec_char16 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_char16 vec_min(vec_bchar16 a, vec_char16 b) +{ + return (spu_sel((vec_char16)(a), b, spu_cmpgt((vec_char16)(a), b))); +} + +static inline vec_char16 vec_min(vec_char16 a, vec_bchar16 b) +{ + return (spu_sel(a, (vec_char16)(b), spu_cmpgt(a, (vec_char16)(b)))); +} + +static inline vec_ushort8 vec_min(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_short8 vec_min(vec_short8 a, vec_short8 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_short8 vec_min(vec_bshort8 a, vec_short8 b) +{ + return (spu_sel((vec_short8)(a), b, spu_cmpgt((vec_short8)(a), b))); +} + +static inline vec_short8 vec_min(vec_short8 a, vec_bshort8 b) +{ + return (spu_sel(a, (vec_short8)(b), spu_cmpgt(a, (vec_short8)(b)))); +} + +static inline vec_uint4 vec_min(vec_uint4 a, vec_uint4 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_int4 vec_min(vec_int4 a, vec_int4 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +static inline vec_int4 vec_min(vec_bint4 a, vec_int4 b) +{ + return (spu_sel((vec_int4)(a), b, spu_cmpgt((vec_int4)(a), b))); +} + +static inline vec_int4 vec_min(vec_int4 a, vec_bint4 b) +{ + return (spu_sel(a, (vec_int4)(b), spu_cmpgt(a, (vec_int4)(b)))); +} + +static inline vec_float4 vec_min(vec_float4 a, vec_float4 b) +{ + return (spu_sel(a, b, spu_cmpgt(a, b))); +} + +/* vec_mladd (vector multiply low and add unsigned half word) + * ========= + */ +static inline vec_short8 vec_mladd(vec_short8 a, vec_short8 b, vec_short8 c) +{ + return ((vec_short8)(spu_shuffle(spu_madd((vec_short8)(spu_rl((vec_uint4)(a), -16)), + (vec_short8)(spu_rl((vec_uint4)(b), -16)), + (vec_int4)(spu_rl((vec_uint4)(c), -16))), + spu_madd(a, b, spu_extend(c)), + ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23, + 10, 11, 26, 27, 14, 15, 30, 31})))); +} + + +static inline vec_ushort8 vec_mladd(vec_ushort8 a, vec_ushort8 b, vec_ushort8 c) +{ + return ((vec_ushort8)(vec_mladd((vec_short8)(a), (vec_short8)(b), (vec_short8)(c)))); +} + +static inline vec_short8 vec_mladd(vec_ushort8 a, vec_short8 b, vec_short8 c) +{ + return (vec_mladd((vec_short8)(a), b, c)); +} + +static inline vec_short8 vec_mladd(vec_short8 a, vec_ushort8 b, vec_ushort8 c) +{ + return (vec_mladd(a, (vec_short8)(b), (vec_short8)(c))); +} + + +/* vec_mradds (vector multiply round and add saturate) + * ========== + */ +static inline vec_short8 vec_mradds(vec_short8 a, vec_short8 b, vec_short8 c) +{ + vec_int4 round = (vec_int4)spu_splats(0x4000); + vec_short8 hi, lo; + + hi = (vec_short8)(spu_sl(spu_add(spu_mule(a, b), round), 1)); + lo = (vec_short8)(spu_rlmask(spu_add(spu_mulo(a, b), round), -15)); + + return (vec_adds(spu_sel(hi, lo, ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF})), c)); +} + + +/* vec_msum (vector multiply sum) + * ======== + */ +static inline vec_uint4 vec_msum(vec_uchar16 a, vec_uchar16 b, vec_uint4 c) +{ + vec_ushort8 a1, a2, b1, b2; + vec_uint4 p1, p2; + + a1 = spu_and((vec_ushort8)(a), 0xFF); + a2 = spu_rlmask((vec_ushort8)(a), -8); + b1 = spu_and((vec_ushort8)(b), 0xFF); + b2 = spu_rlmask((vec_ushort8)(b), -8); + + p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2))); + p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2))); + return (spu_add(p2, spu_add(p1, c))); +} + +static inline vec_int4 vec_msum(vec_char16 a, vec_uchar16 b, vec_int4 c) +{ + vec_short8 a1, a2, b1, b2; + vec_int4 p1, p2; + + a1 = (vec_short8)(spu_extend(a)); + a2 = spu_rlmaska((vec_short8)(a), -8); + b1 = (vec_short8)(spu_and((vec_ushort8)(b), 0xFF)); + b2 = (vec_short8)spu_rlmask((vec_ushort8)(b), -8); + + p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2))); + p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2))); + return (spu_add(p2, spu_add(p1, c))); +} + +static inline vec_uint4 vec_msum(vec_ushort8 a, vec_ushort8 b, vec_uint4 c) +{ + return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c)); +} + +static inline vec_int4 vec_msum(vec_short8 a, vec_short8 b, vec_int4 c) +{ + return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c)); +} + + +/* vec_msums (vector multiply sum saturate) + * ======== + */ +static inline vec_uint4 vec_msums(vec_ushort8 a, vec_ushort8 b, vec_uint4 c) +{ + vec_uint4 p1, p2; + + p1 = spu_mulo(a, b); + p2 = spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2)); + + return (vec_adds(p2, vec_adds(p1, c))); +} + +static inline vec_int4 vec_msums(vec_short8 a, vec_short8 b, vec_int4 c) +{ + return (vec_adds(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c)); +} + +/* vec_mtvscr (vector move to vector status and control register) + * ========== + */ +#define vec_mtvscr(_a) /* not supported */ + + +/* vec_mule (vector multiply even) + * ======== + */ +static inline vec_ushort8 vec_mule(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 hi, lo; + + hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_uint4)(a), -24)), + (vec_ushort8)(spu_rlmask((vec_uint4)(b), -24))); + lo = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_short8)(a), -8)), + (vec_ushort8)(spu_rlmask((vec_short8)(b), -8))); + + return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23, + 10, 11, 26, 27, 14, 15, 30, 31}))); +} + +static inline vec_short8 vec_mule(vec_char16 a, vec_char16 b) +{ + vec_short8 hi, lo; + + hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(a), -24)), + (vec_short8)(spu_rlmaska((vec_uint4)(b), -24))); + lo = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_short8)(a), -8)), + (vec_short8)(spu_rlmaska((vec_short8)(b), -8))); + + return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23, + 10, 11, 26, 27, 14, 15, 30, 31}))); +} + +static inline vec_uint4 vec_mule(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_mulo((vec_ushort8)spu_rlmask((vec_uint4)(a), -16), + (vec_ushort8)spu_rlmask((vec_uint4)(b), -16))); +} + + +static inline vec_int4 vec_mule(vec_short8 a, vec_short8 b) +{ + return (spu_mulo((vec_short8)spu_rlmaska((vec_int4)(a), -16), + (vec_short8)spu_rlmaska((vec_int4)(b), -16))); +} + + +/* vec_mulo (vector multiply odd) + * ======== + */ +static inline vec_ushort8 vec_mulo(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 hi, lo; + + hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(a), -16), 0xFF)), + (vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(b), -16), 0xFF))); + lo = (vec_ushort8)spu_mulo(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)); + + return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23, + 10, 11, 26, 27, 14, 15, 30, 31}))); +} + +static inline vec_short8 vec_mulo(vec_char16 a, vec_char16 b) +{ + vec_short8 aa, bb, hi, lo; + + aa = spu_extend(a); + bb = spu_extend(b); + + hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(aa), -16)), + (vec_short8)(spu_rlmaska((vec_uint4)(bb), -16))); + lo = (vec_short8)spu_mulo(aa, bb); + return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23, + 10, 11, 26, 27, 14, 15, 30, 31}))); +} + +static inline vec_uint4 vec_mulo(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_mulo(a, b)); +} + + +static inline vec_int4 vec_mulo(vec_short8 a, vec_short8 b) +{ + return (spu_mulo(a, b)); +} + + +/* vec_nmsub (vector negative multiply subtract) + * ========= + */ +#define vec_nmsub(_a, _b, _c) spu_nmsub(_a, _b, _c) + + +/* vec_nor (vector logical nor) + * ======= + */ +#define vec_nor(_a, _b) spu_nor(_a, _b) + + +/* vec_or (vector logical or) + * ====== + */ +static inline vec_uchar16 vec_or(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_or(a, b)); +} + +static inline vec_char16 vec_or(vec_char16 a, vec_char16 b) +{ + return (spu_or(a, b)); +} + +static inline vec_char16 vec_or(vec_bchar16 a, vec_char16 b) +{ + return (spu_or((vec_char16)(a), b)); +} + +static inline vec_char16 vec_or(vec_char16 a, vec_bchar16 b) +{ + return (spu_or(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_or(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_or(a, b)); +} + +static inline vec_short8 vec_or(vec_short8 a, vec_short8 b) +{ + return (spu_or(a, b)); +} + +static inline vec_short8 vec_or(vec_bshort8 a, vec_short8 b) +{ + return (spu_or((vec_short8)(a), b)); +} + +static inline vec_short8 vec_or(vec_short8 a, vec_bshort8 b) +{ + return (spu_or(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_or(vec_uint4 a, vec_uint4 b) +{ + return (spu_or(a, b)); +} + +static inline vec_int4 vec_or(vec_int4 a, vec_int4 b) +{ + return (spu_or(a, b)); +} + +static inline vec_int4 vec_or(vec_bint4 a, vec_int4 b) +{ + return (spu_or((vec_int4)(a), b)); +} + +static inline vec_int4 vec_or(vec_int4 a, vec_bint4 b) +{ + return (spu_or(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_or(vec_float4 a, vec_float4 b) +{ + return (spu_or(a, b)); +} + +static inline vec_float4 vec_or(vec_bint4 a, vec_float4 b) +{ + return (spu_or((vec_float4)(a),b)); +} + +static inline vec_float4 vec_or(vec_float4 a, vec_bint4 b) +{ + return (spu_or(a, (vec_float4)(b))); +} + + +/* vec_pack (vector pack) + * ======== + */ +static inline vec_uchar16 vec_pack(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_uchar16)spu_shuffle(a, b, ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31}))); +} + +static inline vec_char16 vec_pack(vec_short8 a, vec_short8 b) +{ + return ((vec_char16)spu_shuffle(a, b, ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31}))); +} + +static inline vec_ushort8 vec_pack(vec_uint4 a, vec_uint4 b) +{ + return ((vec_ushort8)spu_shuffle(a, b, ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}))); +} + +static inline vec_short8 vec_pack(vec_int4 a, vec_int4 b) +{ + return ((vec_short8)spu_shuffle(a, b, ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}))); +} + + +/* vec_packpx (vector pack pixel) + * ========== + */ +static inline vec_pixel8 vec_packpx(vec_uint4 a, vec_uint4 b) +{ + vec_uint4 x03FF = (vec_uint4)(spu_splats((unsigned short)0x03FF)); + vec_uint4 x001F = (vec_uint4)(spu_splats((unsigned short)0x001F)); + + return ((vec_pixel8)(spu_shuffle(spu_sel(spu_sel(spu_sl(a, 7), spu_sl(a, 10), x03FF), + spu_sl(a, 13), x001F), + spu_sel(spu_sel(spu_sl(b, 7), spu_sl(b, 10), x03FF), + spu_sl(b, 13), x001F), + ((vec_uchar16){ 0, 1, 4, 5, 8, 9, 12, 13, + 16, 17, 20, 21, 24, 25, 28, 29})))); +} + + +/* vec_packs (vector pack saturate) + * ========= + */ +static inline vec_uchar16 vec_packs(vec_ushort8 a, vec_ushort8 b) +{ + vec_ushort8 max = spu_splats((unsigned short)0x00FF); + + return ((vec_uchar16)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, 255)), + spu_sel(b, max, spu_cmpgt(b, 255)), + ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31})))); +} + +static inline vec_char16 vec_packs(vec_short8 a, vec_short8 b) +{ + vec_short8 max = spu_splats((signed short)0x007F); + vec_short8 min = spu_splats((signed short)0xFF80); + + return ((vec_char16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 127)), spu_cmpgt(a, -128)), + spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 127)), spu_cmpgt(b, -128)), + ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31})))); +} + +static inline vec_ushort8 vec_packs(vec_uint4 a, vec_uint4 b) +{ + vec_uint4 max = spu_splats((unsigned int)0x0000FFFF); + + return ((vec_ushort8)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, max)), + spu_sel(b, max, spu_cmpgt(b, max)), + ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31})))); +} + +static inline vec_short8 vec_packs(vec_int4 a, vec_int4 b) +{ + vec_int4 max = spu_splats((signed int)0x00007FFF); + vec_int4 min = spu_splats((signed int)0xFFFF8000); + + return ((vec_short8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)), + spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)), + ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31})))); +} + + +/* vec_packsu (vector pack saturate unsigned) + * ========== + */ +static inline vec_uchar16 vec_packsu(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_uchar16)spu_shuffle(spu_or(a, (vec_ushort8)(spu_cmpgt(a, 255))), + spu_or(b, (vec_ushort8)(spu_cmpgt(b, 255))), + ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31}))); +} + +static inline vec_uchar16 vec_packsu(vec_short8 a, vec_short8 b) +{ + vec_short8 max = spu_splats((signed short)0x00FF); + vec_short8 min = spu_splats((signed short)0x0000); + + return ((vec_uchar16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 255)), spu_cmpgt(a, 0)), + spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 255)), spu_cmpgt(b, 0)), + ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31})))); + + return (vec_packsu((vec_ushort8)(a), (vec_ushort8)(b))); +} + +static inline vec_ushort8 vec_packsu(vec_uint4 a, vec_uint4 b) +{ + vec_uint4 max = spu_splats((unsigned int)0xFFFF); + + return ((vec_ushort8)spu_shuffle(spu_or(a, (vec_uint4)(spu_cmpgt(a, max))), + spu_or(b, (vec_uint4)(spu_cmpgt(b, max))), + ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}))); +} + +static inline vec_ushort8 vec_packsu(vec_int4 a, vec_int4 b) +{ + vec_int4 max = spu_splats((signed int)0x0000FFFF); + vec_int4 min = spu_splats((signed int)0x00000000); + + return ((vec_ushort8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)), + spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)), + ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31})))); +} + + +/* vec_perm (vector permute) + * ======== + */ +static inline vec_uchar16 vec_perm(vec_uchar16 a, vec_uchar16 b, vec_uchar16 c) +{ + return (spu_shuffle(a, b, spu_and(c, 0x1F))); +} + +static inline vec_char16 vec_perm(vec_char16 a, vec_char16 b, vec_uchar16 c) +{ + return ((vec_char16)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + +static inline vec_ushort8 vec_perm(vec_ushort8 a, vec_ushort8 b, vec_uchar16 c) +{ + return ((vec_ushort8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + +static inline vec_short8 vec_perm(vec_short8 a, vec_short8 b, vec_uchar16 c) +{ + return ((vec_short8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + +static inline vec_uint4 vec_perm(vec_uint4 a, vec_uint4 b, vec_uchar16 c) +{ + return ((vec_uint4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + +static inline vec_int4 vec_perm(vec_int4 a, vec_int4 b, vec_uchar16 c) +{ + return ((vec_int4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + +static inline vec_float4 vec_perm(vec_float4 a, vec_float4 b, vec_uchar16 c) +{ + return ((vec_float4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c))); +} + + +/* vec_re (vector reciprocal estimate) + * ====== + */ +#define vec_re(_a) spu_re(_a) + + +/* vec_rl (vector rotate left) + * ====== + */ +static inline vec_uchar16 vec_rl(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 r1, r2; + + r1 = spu_rl(spu_and((vec_ushort8)(a), 0xFF), (vec_short8)spu_and((vec_ushort8)(b), 7)); + r2 = spu_rl(spu_and((vec_ushort8)(a), -256), (vec_short8)spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)); + return ((vec_uchar16)(spu_sel(spu_or(r2, spu_sl(r2, 8)), spu_or(r1, spu_rlmask(r1, -8)), spu_splats((unsigned short)0xFF)))); +} + +static inline vec_char16 vec_rl(vec_char16 a, vec_uchar16 b) +{ + return ((vec_char16)(vec_rl((vec_uchar16)(a), b))); +} + +static inline vec_ushort8 vec_rl(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_rl(a, (vec_short8)(b))); +} + +static inline vec_short8 vec_rl(vec_short8 a, vec_ushort8 b) +{ + return (spu_rl(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_rl(vec_uint4 a, vec_uint4 b) +{ + return (spu_rl(a, (vec_int4)(b))); +} + +static inline vec_int4 vec_rl(vec_int4 a, vec_uint4 b) +{ + return (spu_rl(a, (vec_int4)(b))); +} + + +/* vec_round (vector round) + * ========= + */ +static inline vec_float4 vec_round(vec_float4 a) +{ + vec_float4 s_half, s_one, d; + vec_uint4 odd; + vec_uint4 msb = spu_splats((unsigned int)0x80000000); + vec_float4 half = spu_splats(0.5f); + vec_int4 exp; + vec_uint4 mask; + + s_half = (vec_float4)(spu_sel((vec_uint4)(half), (vec_uint4)(a), msb)); + a = spu_add(a, s_half); + s_one = spu_add(s_half, s_half); + exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF))); + mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp); + mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31)); + mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1)); + + odd = spu_and((vec_uint4)(spu_convts(a, 0)), 1); + s_one = spu_andc(s_one, (vec_float4)spu_cmpeq(mask, 0)); + s_one = spu_and(s_one, spu_and((vec_float4)spu_cmpeq(spu_and((vec_uint4)(a), mask), 0), + (vec_float4)spu_cmpeq(odd, 1))); + d = spu_andc(a, (vec_float4)(mask)); + d = spu_sub(d, s_one); + return (d); +} + +/* vec_rsqrte (vector reciprocal square root estimate) + * ========== + */ +#define vec_rsqrte(_a) spu_rsqrte(_a) + + +/* vec_sel (vector select) + * ======= + */ +#define vec_sel(_a, _b, _c) spu_sel(_a, _b, _c) + + +/* vec_sl (vector shift left) + * ====== + */ +static inline vec_uchar16 vec_sl(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 hi, lo; + + lo = spu_and(spu_sl((vec_ushort8)(a), spu_and((vec_ushort8)(b), 7)), 0xFF); + hi = spu_sl(spu_and((vec_ushort8)(a), -256), spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)); + + return ((vec_uchar16)(spu_or(hi, lo))); +} + +static inline vec_char16 vec_sl(vec_char16 a, vec_uchar16 b) +{ + return ((vec_char16)(vec_sl((vec_uchar16)(a), b))); +} + +static inline vec_ushort8 vec_sl(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_sl(a, spu_and(b, 15))); +} + +static inline vec_short8 vec_sl(vec_short8 a, vec_ushort8 b) +{ + return (spu_sl(a, spu_and((vec_ushort8)(b), 15))); +} + +static inline vec_uint4 vec_sl(vec_uint4 a, vec_uint4 b) +{ + return (spu_sl(a, spu_and(b, 31))); +} + +static inline vec_int4 vec_sl(vec_int4 a, vec_uint4 b) +{ + return (spu_sl(a, spu_and(b, 31))); +} + + +/* vec_sld (vector shift left double) + * ======= + */ +#define vec_sld(_a, _b, _c) spu_shuffle(_a, _b, ((vec_uchar16){ 0+(_c), 1+(_c), 2+(_c), 3+(_c), \ + 4+(_c), 5+(_c), 6+(_c), 7+(_c), \ + 8+(_c), 9+(_c), 10+(_c), 11+(_c), \ + 12+(_c), 13+(_c), 14+(_c), 15+(_c)})) + + +/* vec_sll (vector shift left long) + * ======= + */ +#define vec_sll(_a, _b) spu_slqw(_a, spu_extract((vec_uint4)(_b), 0)) + + +/* vec_slo (vector shift left by octet) + * ======= + */ +#define vec_slo(_a, _b) spu_slqwbytebc(_a, spu_extract((vec_uint4)(_b), 3) & 0x7F) + + +/* vec_splat (vector splat) + * ========= + */ +#define vec_splat(_a, _b) spu_splats(spu_extract(_a, _b)) + + +/* vec_splat_s8 (vector splat signed byte) + * ============ + */ +#define vec_splat_s8(_a) spu_splats((signed char)(_a)) + + +/* vec_splat_s16 (vector splat signed half-word) + * ============= + */ +#define vec_splat_s16(_a) spu_splats((signed short)(_a)) + + +/* vec_splat_s32 (vector splat signed word) + * ============= + */ +#define vec_splat_s32(_a) spu_splats((signed int)(_a)) + + +/* vec_splat_u8 (vector splat unsigned byte) + * ============ + */ +#define vec_splat_u8(_a) spu_splats((unsigned char)(_a)) + + +/* vec_splat_u16 (vector splat unsigned half-word) + * ============= + */ +#define vec_splat_u16(_a) spu_splats((unsigned short)(_a)) + + +/* vec_splat_u32 (vector splat unsigned word) + * ============= + */ +#define vec_splat_u32(_a) spu_splats((unsigned int)(_a)) + + +/* vec_sr (vector shift right) + * ====== + */ +static inline vec_uchar16 vec_sr(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 hi, lo; + + lo = spu_rlmask(spu_and((vec_ushort8)(a), 0xFF), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7)))); + hi = spu_and(spu_rlmask((vec_ushort8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256); + + return ((vec_uchar16)(spu_or(hi, lo))); +} + +static inline vec_char16 vec_sr(vec_char16 a, vec_uchar16 b) +{ + return ((vec_char16)(vec_sr((vec_uchar16)(a), b))); +} + +static inline vec_ushort8 vec_sr(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_rlmask(a, spu_sub(0, (vec_short8)(spu_and(b, 15))))); +} + +static inline vec_short8 vec_sr(vec_short8 a, vec_ushort8 b) +{ + return ((vec_short8)(vec_sr((vec_ushort8)(a), b))); +} + +static inline vec_uint4 vec_sr(vec_uint4 a, vec_uint4 b) +{ + return (spu_rlmask(a, spu_sub(0, (vec_int4)(spu_and(b, 31))))); +} + +static inline vec_int4 vec_sr(vec_int4 a, vec_uint4 b) +{ + return ((vec_int4)(vec_sr((vec_uint4)(a), b))); +} + + +/* vec_sra (vector shift right algebraic) + * ======= + */ +static inline vec_char16 vec_sra(vec_char16 a, vec_uchar16 b) +{ + vec_short8 hi, lo; + + lo = spu_and(spu_rlmaska(spu_extend(a), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7)))), 0xFF); + hi = spu_and(spu_rlmaska((vec_short8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256); + + return ((vec_char16)(spu_or(hi, lo))); +} + +static inline vec_uchar16 vec_sra(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(vec_sra((vec_char16)(a), b))); +} + +static inline vec_short8 vec_sra(vec_short8 a, vec_ushort8 b) +{ + return (spu_rlmaska(a, spu_sub(0, (vec_short8)(spu_and(b, 15))))); +} + +static inline vec_ushort8 vec_sra(vec_ushort8 a, vec_ushort8 b) +{ + return ((vec_ushort8)(vec_sra((vec_short8)(a), b))); +} + +static inline vec_int4 vec_sra(vec_int4 a, vec_uint4 b) +{ + return (spu_rlmaska(a, spu_sub(0, (vec_int4)(spu_and(b, 31))))); +} + +static inline vec_uint4 vec_sra(vec_uint4 a, vec_uint4 b) +{ + return ((vec_uint4)(vec_sra((vec_int4)(a), b))); +} + + +/* vec_srl (vector shift right long) + * ======= + */ +#define vec_srl(_a, _b) spu_rlmaskqw(_a, 0-spu_extract((vec_int4)(_b), 3)) + + +/* vec_sro (vector shift right by octet) + * ======= + */ +#define vec_sro(_a, _b) spu_rlmaskqwbyte(_a, 0 - ((spu_extract((vec_int4)(_b), 3) >> 3) & 0xF)) + +/* vec_st (vector store indexed) + * ====== + */ +static inline void vec_st(vec_uchar16 a, int b, unsigned char *c) +{ + *((vec_uchar16 *)(c+b)) = a; +} + +static inline void vec_st(vec_uchar16 a, int b, vec_uchar16 *c) +{ + *((vec_uchar16 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_char16 a, int b, signed char *c) +{ + *((vec_char16 *)(c+b)) = a; +} + +static inline void vec_st(vec_char16 a, int b, vec_char16 *c) +{ + *((vec_char16 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_bchar16 a, int b, signed char *c) +{ + *((vec_bchar16 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_ushort8 a, int b, unsigned short *c) +{ + *((vec_ushort8 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_ushort8 a, int b, vec_ushort8 *c) +{ + *((vec_ushort8 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_short8 a, int b, signed short *c) +{ + *((vec_short8 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_short8 a, int b, vec_short8 *c) +{ + *((vec_short8 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_bshort8 a, int b, signed short *c) +{ + *((vec_bshort8 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_uint4 a, int b, unsigned int *c) +{ + *((vec_uint4 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_uint4 a, int b, vec_uint4 *c) +{ + *((vec_uint4 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_int4 a, int b, signed int *c) +{ + *((vec_int4 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_int4 a, int b, vec_int4 *c) +{ + *((vec_int4 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_bint4 a, int b, signed int *c) +{ + *((vec_bint4 *)((signed char *)(c)+b)) = a; +} + +static inline void vec_st(vec_float4 a, int b, float *c) +{ + *((vec_float4 *)((unsigned char *)(c)+b)) = a; +} + +static inline void vec_st(vec_float4 a, int b, vec_float4 *c) +{ + *((vec_float4 *)((unsigned char *)(c)+b)) = a; +} + + +/* vec_ste (vector store element indexed) + * ======= + */ +static inline void vec_ste(vec_uchar16 a, int b, unsigned char *c) +{ + unsigned char *ptr; + + ptr = c + b; + *ptr = spu_extract(a, (int)(ptr) & 15); +} + +static inline void vec_ste(vec_char16 a, int b, signed char *c) +{ + vec_ste((vec_uchar16)(a), b, (unsigned char *)(c)); +} + +static inline void vec_ste(vec_bchar16 a, int b, signed char *c) +{ + vec_ste((vec_uchar16)(a), b, (unsigned char *)(c)); +} + +static inline void vec_ste(vec_ushort8 a, int b, unsigned short *c) +{ + unsigned short *ptr; + + ptr = (unsigned short *)(((unsigned int)(c) + b) & ~1); + *ptr = spu_extract(a, ((int)(ptr) >> 1) & 7); +} + +static inline void vec_ste(vec_short8 a, int b, signed short *c) +{ + vec_ste((vec_ushort8)(a), b, (unsigned short *)(c)); +} + +static inline void vec_ste(vec_bshort8 a, int b, signed short *c) +{ + vec_ste((vec_ushort8)(a), b, (unsigned short *)(c)); +} + +static inline void vec_ste(vec_uint4 a, int b, unsigned int *c) +{ + unsigned int *ptr; + + ptr = (unsigned int *)(((unsigned int)(c) + b) & ~3); + *ptr = spu_extract(a, ((int)(ptr) >> 2) & 3); +} + +static inline void vec_ste(vec_int4 a, int b, signed int *c) +{ + vec_ste((vec_uint4)(a), b, (unsigned int *)(c)); +} + +static inline void vec_ste(vec_bint4 a, int b, signed int *c) +{ + vec_ste((vec_uint4)(a), b, (unsigned int *)(c)); +} + +static inline void vec_ste(vec_float4 a, int b, float *c) +{ + vec_ste((vec_uint4)(a), b, (unsigned int *)(c)); +} + + +/* vec_stl (vector store indexed LRU) + * ======= + */ +#define vec_stl(_a, _b, _c) vec_st(_a, _b, _c) + + +/* vec_sub (vector subtract) + * ======= + */ +static inline vec_uchar16 vec_sub(vec_uchar16 a, vec_uchar16 b) +{ + return ((vec_uchar16)(spu_sel(spu_sub((vec_ushort8)(a), (vec_ushort8)(b)), + spu_sub(spu_and((vec_ushort8)(a), -256), spu_and((vec_ushort8)(b), -256)), + spu_splats((unsigned short)0xFF00)))); +} + +static inline vec_char16 vec_sub(vec_char16 a, vec_char16 b) +{ + return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static inline vec_char16 vec_sub(vec_bchar16 a, vec_char16 b) +{ + return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static inline vec_char16 vec_sub(vec_char16 a, vec_bchar16 b) +{ + return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b)))); +} + +static inline vec_ushort8 vec_sub(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_sub(a, b)); +} + +static inline vec_short8 vec_sub(vec_short8 a, vec_short8 b) +{ + return (spu_sub(a, b)); +} + +static inline vec_short8 vec_sub(vec_bshort8 a, vec_short8 b) +{ + return (spu_sub((vec_short8)(a), b)); +} + +static inline vec_short8 vec_sub(vec_short8 a, vec_bshort8 b) +{ + return (spu_sub(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_sub(vec_uint4 a, vec_uint4 b) +{ + return (spu_sub(a, b)); +} + +static inline vec_int4 vec_sub(vec_int4 a, vec_int4 b) +{ + return (spu_sub(a, b)); +} + +static inline vec_int4 vec_sub(vec_bint4 a, vec_int4 b) +{ + return (spu_sub((vec_int4)(a), b)); +} + +static inline vec_int4 vec_sub(vec_int4 a, vec_bint4 b) +{ + return (spu_sub(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_sub(vec_float4 a, vec_float4 b) +{ + return (spu_sub(a, b)); +} + + +/* vec_subc (vector subtract carryout) + * ======== + */ +#define vec_subc(_a, _b) spu_genb(_a, _b) + + +/* vec_subs (vector subtract saturate) + * ======== + */ +static inline vec_uchar16 vec_subs(vec_uchar16 a, vec_uchar16 b) +{ + vec_ushort8 s1, s2; + vec_uchar16 s, d; + + s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)); + s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)); + s = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){0, 16, 2, 18, 4, 20, 6, 22, + 8, 24, 10, 26, 12, 28, 14, 30}))); + d = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23, + 9, 25, 11, 27, 13, 29, 15, 31}))); + return (spu_andc(d, s)); +} + +static inline vec_char16 vec_subs(vec_char16 a, vec_char16 b) +{ + vec_ushort8 s1, s2; + vec_uchar16 s, d; + + s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)); + s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)); + s = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23, + 9, 25, 11, 27, 13, 29, 15, 31}))); + d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_nor((vec_uchar16)(a), spu_nand(s, (vec_uchar16)(b))), 0x7F)); + d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_and((vec_uchar16)(a), spu_nor(s, (vec_uchar16)(b))), 0x7F)); + + return ((vec_char16)(d)); +} + +static inline vec_char16 vec_subs(vec_bchar16 a, vec_char16 b) +{ + return (vec_subs((vec_char16)(a), b)); +} + +static inline vec_char16 vec_subs(vec_char16 a, vec_bchar16 b) +{ + return (vec_subs(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_subs(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a))); +} + +static inline vec_short8 vec_subs(vec_short8 a, vec_short8 b) +{ + vec_short8 s; + vec_short8 d; + + s = spu_sub(a, b); + d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -15))); + d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -15))); + + return (d); +} + +static inline vec_short8 vec_subs(vec_bshort8 a, vec_short8 b) +{ + return ((vec_short8)(vec_subs((vec_short8)(a), b))); +} + +static inline vec_short8 vec_subs(vec_short8 a, vec_bshort8 b) +{ + return ((vec_short8)(vec_subs(a, (vec_short8)(b)))); +} + +static inline vec_uint4 vec_subs(vec_uint4 a, vec_uint4 b) +{ + return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a))); +} + +static inline vec_int4 vec_subs(vec_int4 a, vec_int4 b) +{ + vec_int4 s; + vec_int4 d; + + s = spu_sub(a, b); + d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -31))); + d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -31))); + + return (d); +} + +static inline vec_int4 vec_subs(vec_bint4 a, vec_int4 b) +{ + return ((vec_int4)(vec_subs((vec_int4)(a), b))); +} + +static inline vec_int4 vec_subs(vec_int4 a, vec_bint4 b) +{ + return ((vec_int4)(vec_subs(a, (vec_int4)(b)))); +} + + +/* vec_sum4s (vector sum across partial (1/4) saturated) + * ========= + */ +static inline vec_uint4 vec_sum4s(vec_uchar16 a, vec_uint4 b) +{ + vec_uint4 a01_23, a0123; + + a01_23 = (vec_uint4)(spu_add(spu_rlmask((vec_ushort8)(a), -8), + spu_and((vec_ushort8)(a), 0xFF))); + a0123 = spu_add(spu_rlmask(a01_23, -16), spu_and(a01_23, 0x1FF)); + return (vec_adds(a0123, b)); +} + +static inline vec_int4 vec_sum4s(vec_char16 a, vec_int4 b) +{ + vec_int4 a01_23, a0123; + + a01_23 = (vec_int4)(spu_add(spu_rlmaska((vec_short8)(a), -8), + spu_extend(a))); + a0123 = spu_add(spu_rlmaska(a01_23, -16), spu_extend((vec_short8)(a01_23))); + return (vec_adds(a0123, b)); +} + +static inline vec_int4 vec_sum4s(vec_short8 a, vec_int4 b) +{ + vec_int4 a0123; + + a0123 = spu_add(spu_rlmaska((vec_int4)(a), -16), spu_extend(a)); + return (vec_adds(a0123, b)); +} + + +/* vec_sum2s (vector sum across partial (1/2) saturated) + * ========= + */ +static inline vec_int4 vec_sum2s(vec_int4 a, vec_int4 b) +{ + vec_int4 c, d; + vec_int4 sign1, sign2, sign3; + vec_int4 carry, sum_l, sum_h, sat, sat_val; + + sign1 = spu_rlmaska(a, -31); + sign2 = spu_rlmaska(b, -31); + + c = spu_rlqwbyte(a, -4); + sign3 = spu_rlqwbyte(sign1, -4); + + carry = spu_genc(a, b); + sum_l = spu_add(a, b); + sum_h = spu_addx(sign1, sign2, carry); + + carry = spu_genc(sum_l, c); + sum_l = spu_add(sum_l, c); + sum_h = spu_addx(sum_h, sign3, carry); + + sign1 = spu_rlmaska(sum_l, -31); + sign2 = spu_rlmaska(sum_h, -31); + + sat_val = spu_xor(sign2, spu_splats((signed int)0x7FFFFFFF)); + + sat = spu_orc(spu_xor(sign1, sign2), (vec_int4)spu_cmpeq(sum_h, sign2)); + + d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), (vec_int4){0, -1, 0, -1}); + + return (d); +} + + +/* vec_sums (vector sum saturated) + * ======== + */ +static inline vec_int4 vec_sums(vec_int4 a, vec_int4 b) +{ + vec_int4 a0, a1, a2, c0, c1, c2, d; + vec_int4 sign_a, sign_b, sign_l, sign_h; + vec_int4 sum_l, sum_h, sat, sat_val; + + sign_a = spu_rlmaska(a, -31); + sign_b = spu_rlmaska(b, -31); + + a0 = spu_rlqwbyte(a, -12); + a1 = spu_rlqwbyte(a, -8); + a2 = spu_rlqwbyte(a, -4); + + sum_l = spu_add(a, b); + sum_h = spu_addx(sign_a, sign_b, spu_genc(a, b)); + + c2 = spu_genc(sum_l, a2); + sum_l = spu_add(sum_l, a2); + sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -4), c2); + + c1 = spu_genc(sum_l, a1); + sum_l = spu_add(sum_l, a1); + sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -8), c1); + + c0 = spu_genc(sum_l, a0); + sum_l = spu_add(sum_l, a0); + sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -12), c0); + + sign_l = spu_rlmaska(sum_l, -31); + sign_h = spu_rlmaska(sum_h, -31); + + sat_val = spu_xor(sign_h, spu_splats((signed int)0x7FFFFFFF)); + + sat = spu_orc(spu_xor(sign_l, sign_h), (vec_int4)spu_cmpeq(sum_h, sign_h)); + + d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), ((vec_int4){0, 0, 0, -1})); + + return (d); +} + + +/* vec_trunc (vector truncate) + * ========= + */ +static inline vec_float4 vec_trunc(vec_float4 a) +{ + vec_int4 exp; + vec_uint4 mask; + + exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF))); + mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp); + mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31)); + mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1)); + return (spu_andc(a, (vec_float4)(mask))); +} + +/* vec_unpackh (vector unpack high element) + * =========== + */ +static inline vec_short8 vec_unpackh(vec_char16 a) +{ + return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7})))); +} + +static inline vec_bshort8 vec_unpackh(vec_bchar16 a) +{ + return ((vec_bshort8)(vec_unpackh((vec_char16)(a)))); +} + +static inline vec_int4 vec_unpackh(vec_short8 a) +{ + return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 0, 1, 0, 0, 2, 3, + 0, 0, 4, 5, 0, 0, 6, 7})))); +} + +#ifdef SUPPORT_UNPACK_PIXEL +/* Due to type conflicts, unpacking of pixel types and boolean shorts + * can not simultaneously be supported. By default, the boolean short is + * supported. + */ +static inline vec_uint4 vec_unpackh(vec_pixel8 a) +{ + vec_ushort8 p1, p2; + + p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a.p), -7)), + spu_and((vec_ushort8)(a.p), 0x1F), + ((vec_uchar16){ 0, 128, 128, 17, 2, 128, 128, 19, + 4, 128, 128, 21, 6, 128, 128, 23})); + p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a.p), -5), 0x1F), + spu_and(spu_rlmask((vec_ushort8)(a.p), -10), 0x1F), + ((vec_uchar16){ 128, 17, 1, 128, 128, 19, 3, 128, + 128, 21, 5, 128, 128, 23, 7, 128})); + return ((vec_uint4)(spu_or(p1, p2))); +} + +#else + +static inline vec_bint4 vec_unpackh(vec_bshort8 a) +{ + return ((vec_bint4)(vec_unpackh((vec_short8)(a)))); +} +#endif + + + + + +/* vec_unpackl (vector unpack low element) + * =========== + */ +static inline vec_short8 vec_unpackl(vec_char16 a) +{ + return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, 15})))); +} + +static inline vec_bshort8 vec_unpackl(vec_bchar16 a) +{ + return ((vec_bshort8)(vec_unpackl((vec_char16)(a)))); +} + + +static inline vec_int4 vec_unpackl(vec_short8 a) +{ + return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 8, 9, 0, 0, 10, 11, + 0, 0,12,13, 0, 0, 14, 15})))); +} + + +#ifdef SUPPORT_UNPACK_PIXEL +/* Due to type conflicts, unpacking of pixel types and boolean shorts + * can not simultaneously be supported. By default, the boolean short is + * supported. + */ +static inline vec_uint4 vec_unpackl(vec_pixel8 a) +{ + vec_ushort8 p1, p2; + + p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a), -7)), + spu_and((vec_ushort8)(a), 0x1F), + ((vec_uchar16){ 8, 128, 128, 25, 10, 128, 128, 27, + 12, 128, 128, 29, 14, 128, 128, 31})); + p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a), -5), 0x1F), + spu_and(spu_rlmask((vec_ushort8)(a), -10), 0x1F), + ((vec_uchar16){ 128, 25, 9, 128, 128, 27, 11, 128, + 128, 29, 13, 128, 128, 31, 15, 128})); + return ((vec_uint4)(spu_or(p1, p2))); +} + +#else + +static inline vec_bint4 vec_unpackl(vec_bshort8 a) +{ + return ((vec_bint4)(vec_unpackl((vec_short8)(a)))); + +} +#endif + + + +/* vec_xor (vector logical xor) + * ====== + */ +static inline vec_uchar16 vec_xor(vec_uchar16 a, vec_uchar16 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_char16 vec_xor(vec_char16 a, vec_char16 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_char16 vec_xor(vec_bchar16 a, vec_char16 b) +{ + return (spu_xor((vec_char16)(a), b)); +} + +static inline vec_char16 vec_xor(vec_char16 a, vec_bchar16 b) +{ + return (spu_xor(a, (vec_char16)(b))); +} + +static inline vec_ushort8 vec_xor(vec_ushort8 a, vec_ushort8 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_short8 vec_xor(vec_short8 a, vec_short8 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_short8 vec_xor(vec_bshort8 a, vec_short8 b) +{ + return (spu_xor((vec_short8)(a), b)); +} + +static inline vec_short8 vec_xor(vec_short8 a, vec_bshort8 b) +{ + return (spu_xor(a, (vec_short8)(b))); +} + +static inline vec_uint4 vec_xor(vec_uint4 a, vec_uint4 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_int4 vec_xor(vec_int4 a, vec_int4 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_int4 vec_xor(vec_bint4 a, vec_int4 b) +{ + return (spu_xor((vec_int4)(a), b)); +} + +static inline vec_int4 vec_xor(vec_int4 a, vec_bint4 b) +{ + return (spu_xor(a, (vec_int4)(b))); +} + +static inline vec_float4 vec_xor(vec_float4 a, vec_float4 b) +{ + return (spu_xor(a, b)); +} + +static inline vec_float4 vec_xor(vec_bint4 a, vec_float4 b) +{ + return (spu_xor((vec_float4)(a),b)); +} + +static inline vec_float4 vec_xor(vec_float4 a, vec_bint4 b) +{ + return (spu_xor(a, (vec_float4)(b))); +} + +/************************************************************************ + * PREDICATES + ************************************************************************/ + +/* vec_all_eq (all elements equal) + * ========== + */ +static inline int vec_all_eq(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF)); +} + +static inline int vec_all_eq(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF)); +} + +static inline int vec_all_eq(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0xFFFF)); +} + +static inline int vec_all_eq(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0xFFFF)); +} + +static inline int vec_all_eq(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF)); +} + +static inline int vec_all_eq(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF)); +} + +static inline int vec_all_eq(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0xFF)); +} + +static inline int vec_all_eq(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0xFF)); +} + +static inline int vec_all_eq(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF)); +} + +static inline int vec_all_eq(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF)); +} + +static inline int vec_all_eq(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0xF)); +} + +static inline int vec_all_eq(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0xF)); +} + +static inline int vec_all_eq(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF)); +} + + +/* vec_all_ge (all elements greater than or equal) + * ========== + */ +static inline int vec_all_ge(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0)); +} + +static inline int vec_all_ge(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0)); +} + +static inline int vec_all_ge(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0)); +} + +static inline int vec_all_ge(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0)); +} + +static inline int vec_all_ge(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + + +/* vec_all_gt (all elements greater than) + * ========== + */ +static inline int vec_all_gt(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF)); +} + +static inline int vec_all_gt(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF)); +} + +static inline int vec_all_gt(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0xFFFF)); +} + +static inline int vec_all_gt(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0xFFFF)); +} + +static inline int vec_all_gt(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF)); +} + +static inline int vec_all_gt(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF)); +} + +static inline int vec_all_gt(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0xFF)); +} + +static inline int vec_all_gt(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0xFF)); +} + +static inline int vec_all_gt(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF)); +} + +static inline int vec_all_gt(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF)); +} + +static inline int vec_all_gt(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0xF)); +} + +static inline int vec_all_gt(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0xF)); +} + +static inline int vec_all_gt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF)); +} + + +/* vec_all_in (all elements in bounds) + * ========== + */ +static inline int vec_all_in(vec_float4 a, vec_float4 b) +{ + return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) == 0xF); +} + + +/* vec_all_le (all elements less than or equal) + * ========== + */ +static inline int vec_all_le(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0)); +} + +static inline int vec_all_le(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0)); +} + +static inline int vec_all_le(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0)); +} + +static inline int vec_all_le(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0)); +} + +static inline int vec_all_le(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + +static inline int vec_all_le(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0)); +} + +static inline int vec_all_le(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0)); +} + +static inline int vec_all_le(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + + +/* vec_all_lt (all elements less than) + * ========== + */ +static inline int vec_all_lt(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF)); +} + +static inline int vec_all_lt(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF)); +} + +static inline int vec_all_lt(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0xFFFF)); +} + +static inline int vec_all_lt(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0xFFFF)); +} + +static inline int vec_all_lt(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF)); +} + +static inline int vec_all_lt(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF)); +} + +static inline int vec_all_lt(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0xFF)); +} + +static inline int vec_all_lt(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0xFF)); +} + +static inline int vec_all_lt(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF)); +} + +static inline int vec_all_lt(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF)); +} + +static inline int vec_all_lt(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0xF)); +} + +static inline int vec_all_lt(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0xF)); +} + +static inline int vec_all_lt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF)); +} + + +/* vec_all_nan (all elements not a number) + * =========== + */ +static inline int vec_all_nan(vec_float4 a) +{ + vec_uint4 exp, man; + vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000); + + exp = spu_and((vec_uint4)(a), exp_mask); + man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF)); + return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), + spu_cmpeq(man, 0))), 0) == 0xF)); +} + +#define vec_all_nan(_a) (0) + + +/* vec_all_ne (all elements not equal) + * ========== + */ +static inline int vec_all_ne(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0)); +} + +static inline int vec_all_ne(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0)); +} + +static inline int vec_all_ne(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0)); +} + +static inline int vec_all_ne(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0)); +} + +static inline int vec_all_ne(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0)); +} + + +/* vec_all_nge (all elements not greater than or equal) + * =========== + */ +static inline int vec_all_nge(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF)); +} + + +/* vec_all_ngt (all elements not greater than) + * =========== + */ +static inline int vec_all_ngt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0)); +} + + +/* vec_all_nle (all elements not less than or equal) + * =========== + */ +static inline int vec_all_nle(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF)); +} + + +/* vec_all_nlt (all elements not less than) + * =========== + */ +static inline int vec_all_nlt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0)); +} + + +/* vec_all_numeric (all elements numeric) + * =========== + */ +static inline int vec_all_numeric(vec_float4 a) +{ + vec_uint4 exp; + + exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF); + return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) == 0)); +} + + + +/* vec_any_eq (any elements equal) + * ========== + */ +static inline int vec_any_eq(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0)); +} + +static inline int vec_any_eq(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0)); +} + +static inline int vec_any_eq(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0)); +} + +static inline int vec_any_eq(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0))); +} + +static inline int vec_any_eq(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0))); +} + +static inline int vec_any_eq(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq((vec_int4)(a), b), -31)), 0))); +} + +static inline int vec_any_eq(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, (vec_int4)(b)), -31)), 0))); +} + +static inline int vec_any_eq(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0))); +} + +/* vec_any_ge (any elements greater than or equal) + * ========== + */ +static inline int vec_any_ge(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF)); +} + +static inline int vec_any_ge(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF)); +} + +static inline int vec_any_ge(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0xFFFF)); +} + +static inline int vec_any_ge(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0xFFFF)); +} + +static inline int vec_any_ge(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF)); +} + +static inline int vec_any_ge(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF)); +} + +static inline int vec_any_ge(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0xFF)); +} + +static inline int vec_any_ge(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0xFF)); +} + +static inline int vec_any_ge(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF)); +} + +static inline int vec_any_ge(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF)); +} + +static inline int vec_any_ge(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) != 0xF)); +} + +static inline int vec_any_ge(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) != 0xF)); +} + +static inline int vec_any_ge(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF)); +} + + +/* vec_any_gt (any elements greater than) + * ========== + */ +static inline int vec_any_gt(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0)); +} + +static inline int vec_any_gt(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0)); +} + +static inline int vec_any_gt(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0)); +} + + +static inline int vec_any_gt(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0))); +} + +static inline int vec_any_gt(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0))); +} + +static inline int vec_any_gt(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(a), b), -31)), 0))); +} + +static inline int vec_any_gt(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, (vec_int4)(b)), -31)), 0))); +} + +static inline int vec_any_gt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0))); +} + +/* vec_any_le (any elements less than or equal) + * ========== + */ +static inline int vec_any_le(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF)); +} + +static inline int vec_any_le(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF)); +} + +static inline int vec_any_le(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0xFFFF)); +} + +static inline int vec_any_le(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0xFFFF)); +} + +static inline int vec_any_le(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF)); +} + +static inline int vec_any_le(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF)); +} + +static inline int vec_any_le(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0xFF)); +} + +static inline int vec_any_le(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0xFF)); +} + +static inline int vec_any_le(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF)); +} + +static inline int vec_any_le(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF)); +} + +static inline int vec_any_le(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) != 0xF)); +} + +static inline int vec_any_le(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) != 0xF)); +} + +static inline int vec_any_le(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF)); +} + + +/* vec_any_lt (any elements less than) + * ========== + */ +static inline int vec_any_lt(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0)); +} + +static inline int vec_any_lt(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0)); +} + +static inline int vec_any_lt(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0)); +} + +static inline int vec_any_lt(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0))); +} + +static inline int vec_any_lt(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0))); +} + +static inline int vec_any_lt(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, (vec_int4)(a)), -31)), 0))); +} + +static inline int vec_any_lt(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(b), a), -31)), 0))); +} + +static inline int vec_any_lt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0))); +} + +/* vec_any_nan (any elements not a number) + * =========== + */ +static inline int vec_any_nan(vec_float4 a) +{ + vec_uint4 exp, man; + vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000); + + exp = spu_and((vec_uint4)(a), exp_mask); + man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF)); + return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), + spu_cmpeq(man, 0))), 0) != 0)); +} + + +/* vec_any_ne (any elements not equal) + * ========== + */ +static inline int vec_any_ne(vec_uchar16 a, vec_uchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF)); +} + +static inline int vec_any_ne(vec_char16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF)); +} + +static inline int vec_any_ne(vec_bchar16 a, vec_char16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0xFFFF)); +} + +static inline int vec_any_ne(vec_char16 a, vec_bchar16 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0xFFFF)); +} + +static inline int vec_any_ne(vec_ushort8 a, vec_ushort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF)); +} + +static inline int vec_any_ne(vec_short8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF)); +} + +static inline int vec_any_ne(vec_bshort8 a, vec_short8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0xFF)); +} + +static inline int vec_any_ne(vec_short8 a, vec_bshort8 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0xFF)); +} + +static inline int vec_any_ne(vec_uint4 a, vec_uint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF)); +} + +static inline int vec_any_ne(vec_int4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF)); +} + +static inline int vec_any_ne(vec_bint4 a, vec_int4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) != 0xF)); +} + +static inline int vec_any_ne(vec_int4 a, vec_bint4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) != 0xF)); +} + +static inline int vec_any_ne(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF)); +} + + +/* vec_any_nge (any elements not greater than or equal) + * =========== + */ +static inline int vec_any_nge(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0))); +} + +/* vec_any_ngt (any elements not greater than) + * =========== + */ +static inline int vec_any_ngt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF)); +} + + +/* vec_any_nle (any elements not less than or equal) + * =========== + */ +static inline int vec_any_nle(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0)); +} + + +/* vec_any_nlt (any elements not less than) + * =========== + */ +static inline int vec_any_nlt(vec_float4 a, vec_float4 b) +{ + return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF)); +} + + +/* vec_any_numeric (any elements numeric) + * =============== + */ +static inline int vec_any_numeric(vec_float4 a) +{ + vec_uint4 exp; + + exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF); + return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) != 0xF)); +} + + +/* vec_any_out (any elements out of bounds) + * =========== + */ +static inline int vec_any_out(vec_float4 a, vec_float4 b) +{ + return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) != 0xF); +} + + +/* CBE Language Extension Intrinsics + */ + +/* vec_extract (extract element from vector) + * =========== + */ +#define vec_extract(_a, _element) spu_extract(_a, _element) + + +/* vec_insert (insert scalar into specified vector element) + * ========== + */ +#define vec_insert(_a, _b, _element) spu_insert(_a, _b, _element) + +/* vec_lvlx (load vector left indexed) + * ======== + */ +static inline vec_uchar16 vec_lvlx(int a, unsigned char *b) +{ + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_uchar16 vec_lvlx(int a, vec_uchar16 *b) +{ + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_char16 vec_lvlx(int a, signed char *b) +{ + vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_char16 vec_lvlx(int a, vec_char16 *b) +{ + vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_ushort8 vec_lvlx(int a, unsigned short *b) +{ + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_ushort8 vec_lvlx(int a, vec_ushort8 *b) +{ + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_short8 vec_lvlx(int a, signed short *b) +{ + vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_short8 vec_lvlx(int a, vec_short8 *b) +{ + vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_uint4 vec_lvlx(int a, unsigned int *b) +{ + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_uint4 vec_lvlx(int a, vec_uint4 *b) +{ + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_int4 vec_lvlx(int a, signed int *b) +{ + vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_int4 vec_lvlx(int a, vec_int4 *b) +{ + vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_float4 vec_lvlx(int a, float *b) +{ + vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + +static inline vec_float4 vec_lvlx(int a, vec_float4 *b) +{ + vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a); + return(spu_slqwbyte(*p, (unsigned int)p & 0xF)); +} + + +/* vec_lvlxl (load vector left indexed last) + * ========= + */ +#define vec_lvlxl(_a, _b) vec_lvlx(_a, _b) + + +/* vec_lvrx (load vector right indexed) + * ======== + */ +static inline vec_uchar16 vec_lvrx(int a, unsigned char *b) +{ + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_uchar16 vec_lvrx(int a, vec_uchar16 *b) +{ + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_char16 vec_lvrx(int a, signed char *b) +{ + vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_char16 vec_lvrx(int a, vec_char16 *b) +{ + vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_ushort8 vec_lvrx(int a, unsigned short *b) +{ + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_ushort8 vec_lvrx(int a, vec_ushort8 *b) +{ + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_short8 vec_lvrx(int a, signed short *b) +{ + vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_short8 vec_lvrx(int a, vec_short8 *b) +{ + vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_uint4 vec_lvrx(int a, unsigned int *b) +{ + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_uint4 vec_lvrx(int a, vec_uint4 *b) +{ + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_int4 vec_lvrx(int a, signed int *b) +{ + vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_int4 vec_lvrx(int a, vec_int4 *b) +{ + vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_float4 vec_lvrx(int a, float *b) +{ + vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + +static inline vec_float4 vec_lvrx(int a, vec_float4 *b) +{ + vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a); + return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16)); +} + + + +/* vec_lvrxl (load vector right indexed last) + * ========= + */ +#define vec_lvrxl(_a, _b) vec_lvrx(_a, _b) + + +/* vec_promote (promote scalar to a vector) + * =========== + */ +#define vec_promote(_a, _element) spu_promote(_a, _element) + + +/* vec_splats (splat scalar to a vector) + * ========== + */ +#define vec_splats(_a) spu_splats(_a) + + +/* vec_stvlx (store vector left indexed) + * ========= + */ +static inline void vec_stvlx(vec_uchar16 a, int b, unsigned char *c) +{ + int shift; + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvlx(vec_uchar16 a, int b, vec_uchar16 *c) +{ + int shift; + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvlx(vec_char16 a, int b, signed char *c) +{ + int shift; + vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvlx(vec_char16 a, int b, vec_char16 *c) +{ + int shift; + vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvlx(vec_ushort8 a, int b, unsigned short *c) +{ + int shift; + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvlx(vec_ushort8 a, int b, vec_ushort8 *c) +{ + int shift; + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvlx(vec_short8 a, int b, signed short *c) +{ + int shift; + vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvlx(vec_short8 a, int b, vec_short8 *c) +{ + int shift; + vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvlx(vec_uint4 a, int b, unsigned int *c) +{ + int shift; + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvlx(vec_uint4 a, int b, vec_uint4 *c) +{ + int shift; + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvlx(vec_int4 a, int b, signed int *c) +{ + int shift; + vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvlx(vec_int4 a, int b, vec_int4 *c) +{ + int shift; + vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvlx(vec_float4 a, int b, float *c) +{ + int shift; + vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvlx(vec_float4 a, int b, vec_float4 *c) +{ + int shift; + vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b); + + shift = -((int)p & 0xF); + *p = spu_sel(*p, + spu_rlmaskqwbyte(a, shift), + spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +/* vec_stvlxl (store vector left indexed last) + * ========== + */ +#define vec_stvlxl(_a, _b, _c) vec_stvlx(_a, _b, _c) + + +/* vec_stvrx (store vector right indexed) + * ========= + */ +static inline void vec_stvrx(vec_uchar16 a, int b, unsigned char *c) +{ + int shift; + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvrx(vec_uchar16 a, int b, vec_uchar16 *c) +{ + int shift; + vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvrx(vec_char16 a, int b, signed char *c) +{ + int shift; + vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvrx(vec_char16 a, int b, vec_char16 *c) +{ + int shift; + vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned char)0xFF), shift)); +} + +static inline void vec_stvrx(vec_ushort8 a, int b, unsigned short *c) +{ + int shift; + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvrx(vec_ushort8 a, int b, vec_ushort8 *c) +{ + int shift; + vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvrx(vec_short8 a, int b, signed short *c) +{ + int shift; + vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvrx(vec_short8 a, int b, vec_short8 *c) +{ + int shift; + vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift)); +} + +static inline void vec_stvrx(vec_uint4 a, int b, unsigned int *c) +{ + int shift; + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvrx(vec_uint4 a, int b, vec_uint4 *c) +{ + int shift; + vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvrx(vec_int4 a, int b, signed int *c) +{ + int shift; + vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvrx(vec_int4 a, int b, vec_int4 *c) +{ + int shift; + vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvrx(vec_float4 a, int b, float *c) +{ + int shift; + vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +static inline void vec_stvrx(vec_float4 a, int b, vec_float4 *c) +{ + int shift; + vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b); + + shift = 16-((int)p & 0xF); + *p = spu_sel(*p, + spu_slqwbyte(a, shift), + spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift)); +} + +/* vec_stvrxl (store vector right indexed last) + * ========== + */ +#define vec_stvrxl(_a, _b, _c) vec_stvrx(_a, _b, _c) + + +#endif /* __SPU__ */ +#endif /* __cplusplus */ +#endif /* !_VMX2SPU_H_ */
vmx2spu.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: mfc_tag_reserve.c =================================================================== --- mfc_tag_reserve.c (nonexistent) +++ mfc_tag_reserve.c (revision 384) @@ -0,0 +1,51 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include +extern vector unsigned int __mfc_tag_table; + +/* Reserves a DMA tag for exclusive use. This routine returns an available + tag id in the range 0 to 31 and marks the tag as reserved. If no tags + are available, MFC_DMA_TAG_INVALID is returned indicating that all tags + are already reserved. */ + +unsigned int +__mfc_tag_reserve (void) +{ + vector unsigned int mask = (vector unsigned int) + { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + vector unsigned int count_zeros, is_valid; + vector signed int count_neg; + + count_zeros = spu_cntlz (__mfc_tag_table); + count_neg = spu_sub (0, (vector signed int) count_zeros); + + mask = spu_rlmask (mask, (vector signed int) count_neg); + __mfc_tag_table = spu_andc (__mfc_tag_table, mask); + + is_valid = spu_cmpeq (count_zeros, 32); + count_zeros = spu_sel (count_zeros, is_valid, is_valid); + + return spu_extract (count_zeros, 0); +} +
mfc_tag_reserve.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: t-spu-elf =================================================================== --- t-spu-elf (nonexistent) +++ t-spu-elf (revision 384) @@ -0,0 +1,120 @@ +# Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) +# any later version. +# +# This file is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Define system directory to match STANDARD_INCLUDE_DIR in spu-elf.h, +# allowing combined SPU/PPU sysroot builds. +NATIVE_SYSTEM_HEADER_DIR = /include + +# Suppress building libgcc1.a +LIBGCC1 = +CROSS_LIBGCC1 = + +TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2 + +# We exclude those because the libgcc2.c default versions do not support +# the SPU single-precision format (round towards zero). We provide our +# own versions below. +LIB2FUNCS_EXCLUDE = _floatdisf _floatundisf + +# We provide our own version of __divdf3 that performs better and has +# better support for non-default rounding modes. +DPBIT_FUNCS := $(filter-out _div_df, $(DPBIT_FUNCS)) + +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/spu/float_unssidf.c \ + $(srcdir)/config/spu/float_unsdidf.c \ + $(srcdir)/config/spu/float_unsdisf.c \ + $(srcdir)/config/spu/float_disf.c \ + $(srcdir)/config/spu/mfc_tag_table.c \ + $(srcdir)/config/spu/mfc_tag_reserve.c \ + $(srcdir)/config/spu/mfc_tag_release.c \ + $(srcdir)/config/spu/mfc_multi_tag_reserve.c \ + $(srcdir)/config/spu/mfc_multi_tag_release.c \ + $(srcdir)/config/spu/multi3.c \ + $(srcdir)/config/spu/divmodti4.c \ + $(srcdir)/config/spu/divv2df3.c + +LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde.c \ + $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/spu/t-spu-elf + echo '#undef US_SOFTWARE_GOFAST' > dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/spu/t-spu-elf + echo '#define FLOAT' > fp-bit.c + echo '#undef US_SOFTWARE_GOFAST' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +# Don't let CTOR_LIST end up in sdata section. +CRTSTUFF_T_CFLAGS = + +# Multi-lib support. +MULTILIB_OPTIONS=mea64 + +# Neither gcc or newlib seem to have a standard way to generate multiple +# crt*.o files. So we don't use the standard crt0.o name anymore. + +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o libgcc_cachemgr.a libgcc_cachemgr_nonatomic.a \ + libgcc_cache8k.a libgcc_cache16k.a libgcc_cache32k.a libgcc_cache64k.a libgcc_cache128k.a + +$(T)cachemgr.o: $(srcdir)/config/spu/cachemgr.c + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -c $< -o $@ + +# Specialised rule to add a -D flag. +$(T)cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -DNONATOMIC -c $< -o $@ + +$(T)libgcc_%.a: $(T)%.o + $(AR_FOR_TARGET) -rcs $@ $< + +$(T)cache8k.o: $(srcdir)/config/spu/cache.S + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=8 -o $@ -c $< + +$(T)cache16k.o: $(srcdir)/config/spu/cache.S + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=16 -o $@ -c $< + +$(T)cache32k.o: $(srcdir)/config/spu/cache.S + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=32 -o $@ -c $< + +$(T)cache64k.o: $(srcdir)/config/spu/cache.S + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=64 -o $@ -c $< + +$(T)cache128k.o: $(srcdir)/config/spu/cache.S + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=128 -o $@ -c $< + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +spu.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(REGS_H) hard-reg-set.h \ + real.h insn-config.h conditions.h insn-attr.h flags.h $(RECOG_H) \ + $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \ + output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \ + $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h cfglayout.h \ + $(srcdir)/config/spu/spu-protos.h \ + $(srcdir)/config/spu/spu-builtins.def + +spu-c.o: $(srcdir)/config/spu/spu-c.c \ + $(srcdir)/config/spu/spu-protos.h \ + $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(CPPLIB_H) \ + $(TM_P_H) c-pragma.h coretypes.h $(TM_H) insn-codes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/spu/spu-c.c Index: spu_internals.h =================================================================== --- spu_internals.h (nonexistent) +++ spu_internals.h (revision 384) @@ -0,0 +1,421 @@ +/* Definitions of Synergistic Processing Unit (SPU). */ +/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _SPU_INTERNALS_H +#define _SPU_INTERNALS_H + +/* For a typical GCC implementation, the vector keyword is defined here + * as a macro. If this macro conflicts with user code the user needs to + * undefine it. An extended GCC implementation may implement this + * keyword differently, such that it never conflicts, and will define + * the macro __VECTOR_KEYWORD_SUPPORTED__. */ +#ifndef __VECTOR_KEYWORD_SUPPORTED__ +#define vector __vector +#endif + + +/* The spu specific instruction macros, si_*(), correspond 1-1 with + * SPU instructions in the ISA. The arguments are the same with the + * following exceptions: + * - any instruction which both reads and writes rt will have an + * extra parameter in the macro. + * - instructions which append zero to the immediate field assume + * the value given in a macro already has the zeroes appended. + * - integer/float convert functions expect a value from 0 to 127, + * i.e., the bias is added by the compiler. + * + * Parameters named 'imm' accept an integer literal. + * Parameters named 'r[abcdt]' accept a qword argument. + * Parameters named 'scalar' accept a scalar argument. + */ + +#define qword __vector signed char + +#define si_lqd(ra,imm) __builtin_si_lqd(ra,imm) +#define si_lqx(ra,rb) __builtin_si_lqx(ra,rb) +#define si_lqa(imm) __builtin_si_lqa(imm) +#define si_lqr(imm) __builtin_si_lqr(imm) +#define si_stqd(rt,ra,imm) __builtin_si_stqd(rt,ra,imm) +#define si_stqx(rt,ra,rb) __builtin_si_stqx(rt,ra,rb) +#define si_stqa(rt,imm) __builtin_si_stqa(rt,imm) +#define si_stqr(rt,imm) __builtin_si_stqr(rt,imm) +#define si_cbd(ra,imm) __builtin_si_cbd(ra,imm) +#define si_cbx(ra,rb) __builtin_si_cbx(ra,rb) +#define si_chd(ra,imm) __builtin_si_chd(ra,imm) +#define si_chx(ra,rb) __builtin_si_chx(ra,rb) +#define si_cwd(ra,imm) __builtin_si_cwd(ra,imm) +#define si_cwx(ra,rb) __builtin_si_cwx(ra,rb) +#define si_cdd(ra,imm) __builtin_si_cdd(ra,imm) +#define si_cdx(ra,rb) __builtin_si_cdx(ra,rb) +#define si_ilh(imm) __builtin_si_ilh(imm) +#define si_ilhu(imm) __builtin_si_ilhu(imm) +#define si_il(imm) __builtin_si_il(imm) +#define si_ila(imm) __builtin_si_ila(imm) +#define si_iohl(ra,imm) __builtin_si_iohl(ra,imm) +#define si_fsmbi(imm) __builtin_si_fsmbi(imm) +#define si_ah(ra,rb) __builtin_si_ah(ra,rb) +#define si_ahi(ra,imm) __builtin_si_ahi(ra,imm) +#define si_a(ra,rb) __builtin_si_a(ra,rb) +#define si_ai(ra,imm) __builtin_si_ai(ra,imm) +#define si_addx(ra,rb,rt) __builtin_si_addx(ra,rb,rt) +#define si_cg(ra,rb) __builtin_si_cg(ra,rb) +#define si_cgx(ra,rb,rt) __builtin_si_cgx(ra,rb,rt) +#define si_sfh(ra,rb) __builtin_si_sfh(ra,rb) +#define si_sfhi(imm,ra) __builtin_si_sfhi(imm,ra) +#define si_sf(ra,rb) __builtin_si_sf(ra,rb) +#define si_sfi(ra,imm) __builtin_si_sfi(ra,imm) +#define si_sfx(ra,rb,rt) __builtin_si_sfx(ra,rb,rt) +#define si_bg(ra,rb) __builtin_si_bg(ra,rb) +#define si_bgx(ra,rb,rt) __builtin_si_bgx(ra,rb,rt) +#define si_mpy(ra,rb) __builtin_si_mpy(ra,rb) +#define si_mpyu(ra,rb) __builtin_si_mpyu(ra,rb) +#define si_mpyi(ra,imm) __builtin_si_mpyi(ra,imm) +#define si_mpyui(ra,imm) __builtin_si_mpyui(ra,imm) +#define si_mpya(ra,rb,rc) __builtin_si_mpya(ra,rb,rc) +#define si_mpyh(ra,rb) __builtin_si_mpyh(ra,rb) +#define si_mpys(ra,rb) __builtin_si_mpys(ra,rb) +#define si_mpyhh(ra,rb) __builtin_si_mpyhh(ra,rb) +#define si_mpyhhu(ra,rb) __builtin_si_mpyhhu(ra,rb) +#define si_mpyhha(ra,rb,rc) __builtin_si_mpyhha(ra,rb,rc) +#define si_mpyhhau(ra,rb,rc) __builtin_si_mpyhhau(ra,rb,rc) +#define si_clz(ra) __builtin_si_clz(ra) +#define si_cntb(ra) __builtin_si_cntb(ra) +#define si_fsmb(ra) __builtin_si_fsmb(ra) +#define si_fsmh(ra) __builtin_si_fsmh(ra) +#define si_fsm(ra) __builtin_si_fsm(ra) +#define si_gbb(ra) __builtin_si_gbb(ra) +#define si_gbh(ra) __builtin_si_gbh(ra) +#define si_gb(ra) __builtin_si_gb(ra) +#define si_avgb(ra,rb) __builtin_si_avgb(ra,rb) +#define si_absdb(ra,rb) __builtin_si_absdb(ra,rb) +#define si_sumb(ra,rb) __builtin_si_sumb(ra,rb) +#define si_xsbh(ra) __builtin_si_xsbh(ra) +#define si_xshw(ra) __builtin_si_xshw(ra) +#define si_xswd(ra) __builtin_si_xswd(ra) +#define si_and(ra,rb) __builtin_si_and(ra,rb) +#define si_andc(ra,rb) __builtin_si_andc(ra,rb) +#define si_andbi(ra,imm) __builtin_si_andbi(ra,imm) +#define si_andhi(ra,imm) __builtin_si_andhi(ra,imm) +#define si_andi(ra,imm) __builtin_si_andi(ra,imm) +#define si_or(ra,rb) __builtin_si_or(ra,rb) +#define si_orc(ra,rb) __builtin_si_orc(ra,rb) +#define si_orbi(ra,imm) __builtin_si_orbi(ra,imm) +#define si_orhi(ra,imm) __builtin_si_orhi(ra,imm) +#define si_ori(ra,imm) __builtin_si_ori(ra,imm) +#define si_orx(ra) __builtin_si_orx(ra) +#define si_xor(ra,rb) __builtin_si_xor(ra,rb) +#define si_xorbi(ra,imm) __builtin_si_xorbi(ra,imm) +#define si_xorhi(ra,imm) __builtin_si_xorhi(ra,imm) +#define si_xori(ra,imm) __builtin_si_xori(ra,imm) +#define si_nand(ra,rb) __builtin_si_nand(ra,rb) +#define si_nor(ra,rb) __builtin_si_nor(ra,rb) +#define si_eqv(ra,rb) __builtin_si_eqv(ra,rb) +#define si_selb(ra,rb,rc) __builtin_si_selb(ra,rb,rc) +#define si_shufb(ra,rb,rc) __builtin_si_shufb(ra,rb,rc) +#define si_shlh(ra,rb) __builtin_si_shlh(ra,rb) +#define si_shlhi(ra,imm) __builtin_si_shlhi(ra,imm) +#define si_shl(ra,rb) __builtin_si_shl(ra,rb) +#define si_shli(ra,imm) __builtin_si_shli(ra,imm) +#define si_shlqbi(ra,rb) __builtin_si_shlqbi(ra,rb) +#define si_shlqbii(ra,imm) __builtin_si_shlqbii(ra,imm) +#define si_shlqby(ra,rb) __builtin_si_shlqby(ra,rb) +#define si_shlqbyi(ra,imm) __builtin_si_shlqbyi(ra,imm) +#define si_shlqbybi(ra,rb) __builtin_si_shlqbybi(ra,rb) +#define si_roth(ra,rb) __builtin_si_roth(ra,rb) +#define si_rothi(ra,imm) __builtin_si_rothi(ra,imm) +#define si_rot(ra,rb) __builtin_si_rot(ra,rb) +#define si_roti(ra,imm) __builtin_si_roti(ra,imm) +#define si_rotqby(ra,rb) __builtin_si_rotqby(ra,rb) +#define si_rotqbyi(ra,imm) __builtin_si_rotqbyi(ra,imm) +#define si_rotqbybi(ra,rb) __builtin_si_rotqbybi(ra,rb) +#define si_rotqbi(ra,rb) __builtin_si_rotqbi(ra,rb) +#define si_rotqbii(ra,imm) __builtin_si_rotqbii(ra,imm) +#define si_rothm(ra,rb) __builtin_si_rothm(ra,rb) +#define si_rothmi(ra,imm) __builtin_si_rothmi(ra,imm) +#define si_rotm(ra,rb) __builtin_si_rotm(ra,rb) +#define si_rotmi(ra,imm) __builtin_si_rotmi(ra,imm) +#define si_rotqmby(ra,rb) __builtin_si_rotqmby(ra,rb) +#define si_rotqmbyi(ra,imm) __builtin_si_rotqmbyi(ra,imm) +#define si_rotqmbi(ra,rb) __builtin_si_rotqmbi(ra,rb) +#define si_rotqmbii(ra,imm) __builtin_si_rotqmbii(ra,imm) +#define si_rotqmbybi(ra,rb) __builtin_si_rotqmbybi(ra,rb) +#define si_rotmah(ra,rb) __builtin_si_rotmah(ra,rb) +#define si_rotmahi(ra,imm) __builtin_si_rotmahi(ra,imm) +#define si_rotma(ra,rb) __builtin_si_rotma(ra,rb) +#define si_rotmai(ra,imm) __builtin_si_rotmai(ra,imm) +#define si_heq(ra,rb) __builtin_si_heq(ra,rb) +#define si_heqi(ra,imm) __builtin_si_heqi(ra,imm) +#define si_hgt(ra,rb) __builtin_si_hgt(ra,rb) +#define si_hgti(ra,imm) __builtin_si_hgti(ra,imm) +#define si_hlgt(ra,rb) __builtin_si_hlgt(ra,rb) +#define si_hlgti(ra,imm) __builtin_si_hlgti(ra,imm) +#define si_ceqb(ra,rb) __builtin_si_ceqb(ra,rb) +#define si_ceqbi(ra,imm) __builtin_si_ceqbi(ra,imm) +#define si_ceqh(ra,rb) __builtin_si_ceqh(ra,rb) +#define si_ceqhi(ra,imm) __builtin_si_ceqhi(ra,imm) +#define si_ceq(ra,rb) __builtin_si_ceq(ra,rb) +#define si_ceqi(ra,imm) __builtin_si_ceqi(ra,imm) +#define si_cgtb(ra,rb) __builtin_si_cgtb(ra,rb) +#define si_cgtbi(ra,imm) __builtin_si_cgtbi(ra,imm) +#define si_cgth(ra,rb) __builtin_si_cgth(ra,rb) +#define si_cgthi(ra,imm) __builtin_si_cgthi(ra,imm) +#define si_cgt(ra,rb) __builtin_si_cgt(ra,rb) +#define si_cgti(ra,imm) __builtin_si_cgti(ra,imm) +#define si_clgtb(ra,rb) __builtin_si_clgtb(ra,rb) +#define si_clgtbi(ra,imm) __builtin_si_clgtbi(ra,imm) +#define si_clgth(ra,rb) __builtin_si_clgth(ra,rb) +#define si_clgthi(ra,imm) __builtin_si_clgthi(ra,imm) +#define si_clgt(ra,rb) __builtin_si_clgt(ra,rb) +#define si_clgti(ra,imm) __builtin_si_clgti(ra,imm) +#define si_bisled(ra) __builtin_si_bisled(ra,0) +#define si_bisledd(ra) __builtin_si_bisledd(ra,0) +#define si_bislede(ra) __builtin_si_bislede(ra,0) +#define si_fa(ra,rb) __builtin_si_fa(ra,rb) +#define si_dfa(ra,rb) __builtin_si_dfa(ra,rb) +#define si_fs(ra,rb) __builtin_si_fs(ra,rb) +#define si_dfs(ra,rb) __builtin_si_dfs(ra,rb) +#define si_fm(ra,rb) __builtin_si_fm(ra,rb) +#define si_dfm(ra,rb) __builtin_si_dfm(ra,rb) +#define si_fma(ra,rb,rc) __builtin_si_fma(ra,rb,rc) +#define si_dfma(ra,rb,rc) __builtin_si_dfma(ra,rb,rc) +#define si_dfnma(ra,rb,rc) __builtin_si_dfnma(ra,rb,rc) +#define si_fnms(ra,rb,rc) __builtin_si_fnms(ra,rb,rc) +#define si_dfnms(ra,rb,rc) __builtin_si_dfnms(ra,rb,rc) +#define si_fms(ra,rb,rc) __builtin_si_fms(ra,rb,rc) +#define si_dfms(ra,rb,rc) __builtin_si_dfms(ra,rb,rc) +#define si_frest(ra) __builtin_si_frest(ra) +#define si_frsqest(ra) __builtin_si_frsqest(ra) +#define si_fi(ra,rb) __builtin_si_fi(ra,rb) +#define si_csflt(ra,imm) __builtin_si_csflt(ra,imm) +#define si_cflts(ra,imm) __builtin_si_cflts(ra,imm) +#define si_cuflt(ra,imm) __builtin_si_cuflt(ra,imm) +#define si_cfltu(ra,imm) __builtin_si_cfltu(ra,imm) +#define si_frds(ra) __builtin_si_frds(ra) +#define si_fesd(ra) __builtin_si_fesd(ra) +#define si_fceq(ra,rb) __builtin_si_fceq(ra,rb) +#define si_fcmeq(ra,rb) __builtin_si_fcmeq(ra,rb) +#define si_fcgt(ra,rb) __builtin_si_fcgt(ra,rb) +#define si_fcmgt(ra,rb) __builtin_si_fcmgt(ra,rb) +#define si_stop(imm) __builtin_si_stop(imm) +#define si_stopd(ra,rb,rc) __builtin_si_stopd(ra,rb,rc) +#define si_lnop() __builtin_si_lnop() +#define si_nop() __builtin_si_nop() +#define si_sync() __builtin_si_sync() +#define si_syncc() __builtin_si_syncc() +#define si_dsync() __builtin_si_dsync() +#define si_mfspr(imm) __builtin_si_mfspr(imm) +#define si_mtspr(imm,ra) __builtin_si_mtspr(imm,ra) +#define si_fscrrd() __builtin_si_fscrrd() +#define si_fscrwr(ra) __builtin_si_fscrwr(ra) +#define si_rdch(imm) __builtin_si_rdch(imm) +#define si_rchcnt(imm) __builtin_si_rchcnt(imm) +#define si_wrch(imm,ra) __builtin_si_wrch(imm,ra) + +/* celledp only instructions */ +#ifdef __SPU_EDP__ +#define si_dfceq(ra,rb) __builtin_si_dfceq(ra,rb) +#define si_dfcmeq(ra,rb) __builtin_si_dfcmeq(ra,rb) +#define si_dfcgt(ra,rb) __builtin_si_dfcgt(ra,rb) +#define si_dfcmgt(ra,rb) __builtin_si_dfcmgt(ra,rb) +#define si_dftsv(ra,imm) __builtin_si_dftsv(ra,imm) +#endif /* __SPU_EDP__ */ + +#define si_from_char(scalar) __builtin_si_from_char(scalar) +#define si_from_uchar(scalar) __builtin_si_from_uchar(scalar) +#define si_from_short(scalar) __builtin_si_from_short(scalar) +#define si_from_ushort(scalar) __builtin_si_from_ushort(scalar) +#define si_from_int(scalar) __builtin_si_from_int(scalar) +#define si_from_uint(scalar) __builtin_si_from_uint(scalar) +#define si_from_llong(scalar) __builtin_si_from_long(scalar) +#define si_from_ullong(scalar) __builtin_si_from_ulong(scalar) +#define si_from_float(scalar) __builtin_si_from_float(scalar) +#define si_from_double(scalar) __builtin_si_from_double(scalar) +#define si_from_ptr(scalar) __builtin_si_from_ptr(scalar) + +#define si_to_char(ra) __builtin_si_to_char(ra) +#define si_to_uchar(ra) __builtin_si_to_uchar(ra) +#define si_to_short(ra) __builtin_si_to_short(ra) +#define si_to_ushort(ra) __builtin_si_to_ushort(ra) +#define si_to_int(ra) __builtin_si_to_int(ra) +#define si_to_uint(ra) __builtin_si_to_uint(ra) +#define si_to_llong(ra) __builtin_si_to_long(ra) +#define si_to_ullong(ra) __builtin_si_to_ulong(ra) +#define si_to_float(ra) __builtin_si_to_float(ra) +#define si_to_double(ra) __builtin_si_to_double(ra) +#define si_to_ptr(ra) __builtin_si_to_ptr(ra) + +#define __align_hint(ptr,base,offset) __builtin_spu_align_hint(ptr,base,offset) + +/* generic spu_* intrinsics */ + +#define spu_splats(scalar) __builtin_spu_splats(scalar) +#define spu_convtf(ra,imm) __builtin_spu_convtf(ra,imm) +#define spu_convts(ra,imm) __builtin_spu_convts(ra,imm) +#define spu_convtu(ra,imm) __builtin_spu_convtu(ra,imm) +#define spu_extend(ra) __builtin_spu_extend(ra) +#define spu_roundtf(ra) __builtin_spu_roundtf(ra) +#define spu_add(ra,rb) __builtin_spu_add(ra,rb) +#define spu_addx(ra,rb,rt) __builtin_spu_addx(ra,rb,rt) +#define spu_genc(ra,rb) __builtin_spu_genc(ra,rb) +#define spu_gencx(ra,rb,rt) __builtin_spu_gencx(ra,rb,rt) +#define spu_madd(ra,rb,rc) __builtin_spu_madd(ra,rb,rc) +#define spu_nmadd(ra,rb,rc) __builtin_spu_nmadd(ra,rb,rc) +#define spu_mhhadd(ra,rb,rc) __builtin_spu_mhhadd(ra,rb,rc) +#define spu_msub(ra,rb,rc) __builtin_spu_msub(ra,rb,rc) +#define spu_mul(ra,rb) __builtin_spu_mul(ra,rb) +#define spu_mulh(ra,rb) __builtin_spu_mulh(ra,rb) +#define spu_mule(ra,rb) __builtin_spu_mule(ra,rb) +#define spu_mulo(ra,rb) __builtin_spu_mulo(ra,rb) +#define spu_mulsr(ra,rb) __builtin_spu_mulsr(ra,rb) +#define spu_nmsub(ra,rb,rc) __builtin_spu_nmsub(ra,rb,rc) +#define spu_sub(ra,rb) __builtin_spu_sub(ra,rb) +#define spu_subx(ra,rb,rt) __builtin_spu_subx(ra,rb,rt) +#define spu_genb(ra,rb) __builtin_spu_genb(ra,rb) +#define spu_genbx(ra,rb,rt) __builtin_spu_genbx(ra,rb,rt) +#define spu_absd(ra,rb) __builtin_spu_absd(ra,rb) +#define spu_avg(ra,rb) __builtin_spu_avg(ra,rb) +#define spu_sumb(ra,rb) __builtin_spu_sumb(ra,rb) +#define spu_bisled(ra) __builtin_spu_bisled(ra, 0) +#define spu_bisled_d(ra) __builtin_spu_bisled_d(ra, 0) +#define spu_bisled_e(ra) __builtin_spu_bisled_e(ra, 0) +#define spu_cmpabseq(ra,rb) __builtin_spu_cmpabseq(ra,rb) +#define spu_cmpabsgt(ra,rb) __builtin_spu_cmpabsgt(ra,rb) +#define spu_cmpeq(ra,rb) __builtin_spu_cmpeq(ra,rb) +#define spu_cmpgt(ra,rb) __builtin_spu_cmpgt(ra,rb) +#define spu_testsv(ra,imm) __builtin_spu_testsv(ra,imm) +#define spu_hcmpeq(ra,rb) __builtin_spu_hcmpeq(ra,rb) +#define spu_hcmpgt(ra,rb) __builtin_spu_hcmpgt(ra,rb) +#define spu_cntb(ra) __builtin_spu_cntb(ra) +#define spu_cntlz(ra) __builtin_spu_cntlz(ra) +#define spu_gather(ra) __builtin_spu_gather(ra) +#define spu_maskb(ra) __builtin_spu_maskb(ra) +#define spu_maskh(ra) __builtin_spu_maskh(ra) +#define spu_maskw(ra) __builtin_spu_maskw(ra) +#define spu_sel(ra,rb,rc) __builtin_spu_sel(ra,rb,rc) +#define spu_shuffle(ra,rb,rc) __builtin_spu_shuffle(ra,rb,rc) +#define spu_and(ra,rb) __builtin_spu_and(ra,rb) +#define spu_andc(ra,rb) __builtin_spu_andc(ra,rb) +#define spu_eqv(ra,rb) __builtin_spu_eqv(ra,rb) +#define spu_nand(ra,rb) __builtin_spu_nand(ra,rb) +#define spu_nor(ra,rb) __builtin_spu_nor(ra,rb) +#define spu_or(ra,rb) __builtin_spu_or(ra,rb) +#define spu_orc(ra,rb) __builtin_spu_orc(ra,rb) +#define spu_orx(ra) __builtin_spu_orx(ra) +#define spu_xor(ra,rb) __builtin_spu_xor(ra,rb) +#define spu_rl(ra,rb) __builtin_spu_rl(ra,rb) +#define spu_rlqw(ra,count) __builtin_spu_rlqw(ra,count) +#define spu_rlqwbyte(ra,count) __builtin_spu_rlqwbyte(ra,count) +#define spu_rlqwbytebc(ra,count) __builtin_spu_rlqwbytebc(ra,count) +#define spu_rlmask(ra,rb) __builtin_spu_rlmask(ra,rb) +#define spu_rlmaska(ra,rb) __builtin_spu_rlmaska(ra,rb) +#define spu_rlmaskqw(ra,rb) __builtin_spu_rlmaskqw(ra,rb) +#define spu_rlmaskqwbyte(ra,rb) __builtin_spu_rlmaskqwbyte(ra,rb) +#define spu_rlmaskqwbytebc(ra,rb) __builtin_spu_rlmaskqwbytebc(ra,rb) +#define spu_sl(ra,rb) __builtin_spu_sl(ra,rb) +#define spu_slqw(ra,rb) __builtin_spu_slqw(ra,rb) +#define spu_slqwbyte(ra,rb) __builtin_spu_slqwbyte(ra,rb) +#define spu_slqwbytebc(ra,rb) __builtin_spu_slqwbytebc(ra,rb) +#define spu_sr(ra,rb) __builtin_spu_sr(ra,rb) +#define spu_sra(ra,rb) __builtin_spu_sra(ra,rb) +#define spu_srqw(ra,rb) __builtin_spu_srqw(ra,rb) +#define spu_srqwbyte(ra,rb) __builtin_spu_srqwbyte(ra,rb) +#define spu_srqwbytebc(ra,rb) __builtin_spu_srqwbytebc(ra,rb) +#define spu_extract(ra,pos) __builtin_spu_extract(ra,pos) +#define spu_insert(scalar,ra,pos) __builtin_spu_insert(scalar,ra,pos) +#define spu_promote(scalar,pos) __builtin_spu_promote(scalar,pos) + +#ifdef __cplusplus +extern "C" { +#endif + +/* The type checking for some of these won't be accurate but they need + * to be defines because of the immediate values. */ +#define spu_idisable() __builtin_spu_idisable() +#define spu_ienable() __builtin_spu_ienable() +#define spu_mfspr(imm) si_to_uint(si_mfspr((imm))) +#define spu_mtspr(imm, ra) si_mtspr((imm),si_from_uint (ra)) +#define spu_mffpscr() ((vec_uint4)si_fscrrd()) +#define spu_mtfpscr(a) si_fscrwr((qword)a) +#define spu_dsync() si_dsync() +#define spu_stop(imm) si_stop(imm) +#define spu_sync() si_sync() +#define spu_sync_c() si_syncc() +#define spu_readch(imm) si_to_uint(si_rdch((imm))) +#define spu_readchqw(imm) ((vec_uint4)si_rdch((imm))) +#define spu_readchcnt(imm) si_to_uint(si_rchcnt((imm))) +#define spu_writech(imm, ra) si_wrch((imm), si_from_uint(ra)) +#define spu_writechqw(imm, ra) si_wrch((imm), (qword)(ra)) + +/* The following functions are static and always_inline to make sure + * they don't show up in object files which they aren't used in. */ + +static __inline__ vec_float4 spu_re (vec_float4 ra) __attribute__((__always_inline__)); +static __inline__ vec_float4 spu_rsqrte (vec_float4 ra) __attribute__((__always_inline__)); + +static __inline__ vec_float4 +spu_re (vec_float4 ra) +{ + return (vec_float4) si_fi ((qword) (ra), si_frest ((qword) (ra))); +} +static __inline__ vec_float4 +spu_rsqrte (vec_float4 ra) +{ + return (vec_float4) si_fi ((qword) (ra), si_frsqest ((qword) (ra))); +} + +/* composite intrinsics */ +static __inline__ void spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__)); +static __inline__ void spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__)); +static __inline__ unsigned int spu_mfcstat(unsigned int type) __attribute__((__always_inline__)); + +static __inline__ void +spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd) +{ + si_wrch(MFC_LSA,si_from_ptr(ls)); + si_wrch(MFC_EAL,si_from_uint(ea)); + si_wrch(MFC_Size,si_from_uint(size)); + si_wrch(MFC_TagID,si_from_uint(tagid)); + si_wrch(MFC_Cmd,si_from_uint(cmd)); +} +static __inline__ void +spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd) +{ + si_wrch(MFC_LSA,si_from_ptr(ls)); + si_wrch(MFC_EAH,si_from_uint(eahi)); + si_wrch(MFC_EAL,si_from_uint(ealow)); + si_wrch(MFC_Size,si_from_uint(size)); + si_wrch(MFC_TagID,si_from_uint(tagid)); + si_wrch(MFC_Cmd,si_from_uint(cmd)); +} +static __inline__ unsigned int +spu_mfcstat(unsigned int type) +{ + si_wrch(MFC_WrTagUpdate,si_from_uint(type)); + return si_to_uint(si_rdch(MFC_RdTagStat)); +} +#ifdef __cplusplus + +} +#endif /* __cplusplus */ + +#endif /* SPUINTRIN_H */ +
spu_internals.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: mfc_tag_release.c =================================================================== --- mfc_tag_release.c (nonexistent) +++ mfc_tag_release.c (revision 384) @@ -0,0 +1,59 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include +extern vector unsigned int __mfc_tag_table; + +/* Release the specified DMA tag from exclusive use. Once released, the + tag is available for future reservation. Upon sucessful release, + MFC_DMA_TAG_VALID is returned. If the specified tag is not in the + range 0 to 31, or had not been reserved, no action is taken and + MFC_DMA_TAG_INVALID is returned. */ + +unsigned int +__mfc_tag_release (unsigned int tag) +{ + vector unsigned int is_invalid; + vector unsigned int mask = (vector unsigned int) + { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + vector signed int zero = (vector signed int) { 0, 0, 0, 0 }; + + vector signed int has_been_reserved; + + /* Check if the tag is out of range. */ + is_invalid = spu_cmpgt (spu_promote (tag, 0), 31); + + /* Check whether the tag has been reserved, set to all 1 if has not + been reserved, 0 otherwise. */ + has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag); + has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved); + + /* Set invalid. */ + is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid); + + mask = spu_rlmask (mask, (int)(-tag)); + __mfc_tag_table = spu_or (__mfc_tag_table, mask); + + return spu_extract(is_invalid, 0); +} +
mfc_tag_release.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: float_unssidf.c =================================================================== --- float_unssidf.c (nonexistent) +++ float_unssidf.c (revision 384) @@ -0,0 +1,45 @@ +/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include +const unsigned char __sidf_pat[16] __attribute__ ((__aligned__ (16))) = { + 0x02, 0x03, 0x10, 0x11, + 0x12, 0x13, 0x80, 0x80, + 0x06, 0x07, 0x14, 0x15, + 0x16, 0x17, 0x80, 0x80 +}; + +/* double __float_unssidf (unsigned int SI) */ +qword __float_unssidf (qword SI); +qword +__float_unssidf (qword SI) +{ + qword t0, t1, t2, t3, t4, t5, t6, t7; + t0 = si_clz (SI); + t1 = si_il (1054); + t2 = si_shl (SI, t0); + t3 = si_ceqi (t0, 32); + t4 = si_sf (t0, t1); + t5 = si_a (t2, t2); + t6 = si_andc (t4, t3); + t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat); + return si_shlqbii (t7, 4); +}
float_unssidf.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: float_unsdisf.c =================================================================== --- float_unsdisf.c (nonexistent) +++ float_unsdisf.c (revision 384) @@ -0,0 +1,31 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Prototype. */ +float __floatundisf (unsigned long long x); + +float __floatundisf (unsigned long long x) +{ + /* The SPU back-end now generates inline code for this conversion. + This file is solely used to provide the __floatundisf function + for objects generated with prior versions of GCC. */ + return x; +}
float_unsdisf.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: mfc_multi_tag_reserve.c =================================================================== --- mfc_multi_tag_reserve.c (nonexistent) +++ mfc_multi_tag_reserve.c (revision 384) @@ -0,0 +1,84 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include +extern vector unsigned int __mfc_tag_table; + +/* Reserve a sequential group of tags for exclusive use. The number of + tags to be reserved is specified by the parameter. + This routine returns the first tag ID for a sequential list of + available tags and marks them as reserved. The reserved group + of tags is in the range starting from the returned tag through + the returned tag + -1. + + If the number of tags requested exceeds the number of available + sequential tags, then MFC_DMA_TAG_INVALID is returned indicating + that the request could not be serviced. */ + +unsigned int +__mfc_multi_tag_reserve (unsigned int number_of_tags) +{ + vector unsigned int table_copy; + vector unsigned int one = (vector unsigned int) + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + vector unsigned int count_busy, is_valid; + vector unsigned int count_total; + vector unsigned int count_avail = (vector unsigned int) { 0, 0, 0, 0 }; + vector unsigned int index = (vector unsigned int) { 0, 0, 0, 0 }; + + table_copy = __mfc_tag_table; + + + /* count_busy: number of consecutive busy tags + count_avail: number of consecutive free tags + table_copy: temporary copy of the tag table + count_total: sum of count_busy and count_avail + index: index of the current working tag */ + do + { + table_copy = spu_sl (table_copy, count_avail); + + count_busy = spu_cntlz (table_copy); + table_copy = spu_sl (table_copy, count_busy); + count_avail = spu_cntlz (spu_xor(table_copy, -1)); + count_total = spu_add (count_busy, count_avail); + index = spu_add (index, count_total); + } + while (spu_extract (count_avail, 0) < number_of_tags + && spu_extract (table_copy, 0) != 0); + + index = spu_sub (index, count_avail); + + /* is_valid is set to 0xFFFFFFFF if table_copy == 0, 0 otherwise. */ + is_valid = spu_cmpeq (table_copy, 0); + index = spu_sel (index, is_valid, is_valid); + + /* Now I need to actually mark the tags as used. */ + table_copy = spu_sl (one, number_of_tags); + table_copy = spu_rl (table_copy, -number_of_tags - spu_extract (index, 0)); + table_copy = spu_sel (table_copy, __mfc_tag_table, table_copy); + __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_valid); + + return spu_extract (index, 0); +} +
mfc_multi_tag_reserve.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: vec_types.h =================================================================== --- vec_types.h (nonexistent) +++ vec_types.h (revision 384) @@ -0,0 +1,36 @@ +/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _VEC_TYPES_H_ +#define _VEC_TYPES_H_ 1 + +#include + +/* Define additional PowerPC SIMD/Vector Multi-media eXtension + * single keyword vector data types for use in mapping VMX code + * to the SPU. + */ +#define vec_bchar16 __vector unsigned char +#define vec_bshort8 __vector unsigned short +#define vec_pixel8 __vector unsigned short +#define vec_bint4 __vector unsigned int + +#endif /* _VEC_TYPES_H_ */
vec_types.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: mfc_multi_tag_release.c =================================================================== --- mfc_multi_tag_release.c (nonexistent) +++ mfc_multi_tag_release.c (revision 384) @@ -0,0 +1,72 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include +extern vector unsigned int __mfc_tag_table; + +/* Release a sequential group of tags from exclusive use. The sequential + group of tags is the range starting from through + +-1. Upon sucessful release, MFC_DMA_TAG_VALID + is returned and the tags become available for future reservation. + + If the specified tags were not previously reserved, no action is + taken and MFC_DMA_TAG_INVALID is returned. */ + +unsigned int +__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags) +{ + vector unsigned int table_copy, tmp, tmp1; + vector unsigned int one = (vector unsigned int) + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + vector unsigned int is_invalid; + unsigned int last_tag; + vector unsigned int has_been_reserved; + + last_tag = first_tag + number_of_tags; + + table_copy = spu_sl (one, number_of_tags); + table_copy = spu_rl (table_copy, -last_tag); + table_copy = spu_xor (table_copy, -1); + + /* Make sure the tags are in range and valid. */ + tmp = spu_cmpgt (spu_promote(last_tag, 0), 32); + tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32); + is_invalid = spu_cmpgt (spu_promote(first_tag, 0), 31); + + /* All bits are set to 1 if invalid, 0 if valid. */ + is_invalid = spu_or (tmp, is_invalid); + is_invalid = spu_or (tmp1, is_invalid); + + /* check whether these tags have been reserved */ + tmp = spu_rlmask (one, (int)-number_of_tags); + tmp1 = spu_sl (__mfc_tag_table, first_tag); + has_been_reserved = spu_cmpgt(tmp1, tmp); + + is_invalid = spu_or (has_been_reserved, is_invalid); + + table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy); + __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid); + + return spu_extract (is_invalid, 0); +} +
mfc_multi_tag_release.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: float_disf.c =================================================================== --- float_disf.c (nonexistent) +++ float_disf.c (revision 384) @@ -0,0 +1,31 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Prototype. */ +float __floatdisf (long long x); + +float __floatdisf (long long x) +{ + /* The SPU back-end now generates inline code for this conversion. + This file is solely used to provide the __floatdisf functions + for objects generated with prior versions of GCC. */ + return x; +}
float_disf.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu.h =================================================================== --- spu.h (nonexistent) +++ spu.h (revision 384) @@ -0,0 +1,643 @@ +/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +/* Run-time Target */ +#define TARGET_CPU_CPP_BUILTINS() spu_cpu_cpp_builtins(pfile) + +#define TARGET_VERSION fprintf (stderr, " (spu %s)", __DATE__); + +#define OVERRIDE_OPTIONS spu_override_options() +#define C_COMMON_OVERRIDE_OPTIONS spu_c_common_override_options() + +#define OPTIMIZATION_OPTIONS(level,size) \ + spu_optimization_options(level,size) + +#define INIT_EXPANDERS spu_init_expanders() + +extern int target_flags; +extern const char *spu_fixed_range_string; + +/* Which processor to generate code or schedule for. */ +enum processor_type +{ + PROCESSOR_CELL, + PROCESSOR_CELLEDP +}; + +extern GTY(()) int spu_arch; +extern GTY(()) int spu_tune; + +/* Support for a compile-time default architecture and tuning. The rules are: + --with-arch is ignored if -march is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" } + +/* Default target_flags if no switches specified. */ +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS \ + | MASK_SAFE_HINTS | MASK_ADDRESS_SPACE_CONVERSION) +#endif + + +/* Storage Layout */ + +#define BITS_BIG_ENDIAN 1 + +#define BYTES_BIG_ENDIAN 1 + +#define WORDS_BIG_ENDIAN 1 + +#define BITS_PER_UNIT 8 + +/* GCC uses word_mode in many places, assuming that it is the fastest + integer mode. That is not the case for SPU though. We can't use + 32 here because (of some reason I can't remember.) */ +#define BITS_PER_WORD 128 + +#define UNITS_PER_WORD (BITS_PER_WORD/BITS_PER_UNIT) + +/* We never actually change UNITS_PER_WORD, but defining this causes + libgcc to use some different sizes of types when compiling. */ +#define MIN_UNITS_PER_WORD 4 + +#define POINTER_SIZE 32 + +#define PARM_BOUNDARY 128 + +#define STACK_BOUNDARY 128 + +/* We want it 8-byte aligned so we can properly use dual-issue + instructions, which can only happen on an 8-byte aligned address. */ +#define FUNCTION_BOUNDARY 64 + +/* We would like to allow a larger alignment for data objects (for DMA) + but the aligned attribute is limited by BIGGEST_ALIGNMENT. We don't + define BIGGEST_ALIGNMENT as larger because it is used in other places + and would end up wasting space. (Is this still true?) */ +#define BIGGEST_ALIGNMENT 128 + +#define MINIMUM_ATOMIC_ALIGNMENT 128 + +/* Make all static objects 16-byte aligned. This allows us to assume + they are also padded to 16-bytes, which means we can use a single + load or store instruction to access them. Do the same for objects + on the stack. (Except a bug (?) allows some stack objects to be + unaligned.) */ +#define DATA_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) +#define CONSTANT_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) +#define LOCAL_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128) + +#define EMPTY_FIELD_BOUNDARY 32 + +#define STRICT_ALIGNMENT 1 + +/* symbol_ref's of functions are not aligned to 16 byte boundary. */ +#define ALIGNED_SYMBOL_REF_P(X) \ + (GET_CODE (X) == SYMBOL_REF \ + && (SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ALIGN1) == 0 \ + && (! SYMBOL_REF_FUNCTION_P (X) \ + || align_functions >= 16)) + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +#define MAX_FIXED_MODE_SIZE 128 + +#define STACK_SAVEAREA_MODE(save_level) \ + (save_level == SAVE_FUNCTION ? VOIDmode \ + : save_level == SAVE_NONLOCAL ? SImode \ + : Pmode) + +#define STACK_SIZE_MODE SImode + + +/* Type Layout */ + +#define INT_TYPE_SIZE 32 + +#define LONG_TYPE_SIZE 32 + +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 0 + +#define STDINT_LONG32 0 + + +/* Register Basics */ + +/* 128-130 are special registers that never appear in assembly code. */ +#define FIRST_PSEUDO_REGISTER 131 + +#define FIXED_REGISTERS { \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1 \ +} + +#define CALL_USED_REGISTERS { \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1 \ +} + +#define CONDITIONAL_REGISTER_USAGE \ + spu_conditional_register_usage() + + +/* Values in Registers */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_BITSIZE(MODE)+MAX_FIXED_MODE_SIZE-1)/MAX_FIXED_MODE_SIZE) + +#define HARD_REGNO_MODE_OK(REGNO, MODE) 1 + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_BITSIZE (MODE1) <= MAX_FIXED_MODE_SIZE \ + && GET_MODE_BITSIZE (MODE2) <= MAX_FIXED_MODE_SIZE) + + +/* Register Classes */ + +enum reg_class { + NO_REGS, + GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +/* SPU is simple, it really only has one class of registers. */ +#define IRA_COVER_CLASSES { GENERAL_REGS, LIM_REG_CLASSES } + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ +{ "NO_REGS", \ + "GENERAL_REGS", \ + "ALL_REGS" \ +} + +#define REG_CLASS_CONTENTS { \ + {0, 0, 0, 0, 0}, /* no regs */ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}, /* general regs */ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}} /* all regs */ + +#define REGNO_REG_CLASS(REGNO) (GENERAL_REGS) + +#define BASE_REG_CLASS GENERAL_REGS + +#define INDEX_REG_CLASS GENERAL_REGS + +#define REGNO_OK_FOR_BASE_P(regno) \ + ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0)) + +#define REGNO_OK_FOR_INDEX_P(regno) \ + ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0)) + +#define INT_REG_OK_FOR_INDEX_P(X,STRICT) \ + ((!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X)))) +#define INT_REG_OK_FOR_BASE_P(X,STRICT) \ + ((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X)))) + +#define PREFERRED_RELOAD_CLASS(X,CLASS) (CLASS) + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* GCC assumes that modes are in the lowpart of a register, which is + only true for SPU. */ +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + ((GET_MODE_SIZE (FROM) > 4 || GET_MODE_SIZE (TO) > 4) \ + && (GET_MODE_SIZE (FROM) < 16 || GET_MODE_SIZE (TO) < 16) \ + && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)) + +#define REGISTER_TARGET_PRAGMAS() do { \ +targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ +}while (0); + + +/* Frame Layout */ + +#define STACK_GROWS_DOWNWARD + +#define FRAME_GROWS_DOWNWARD 1 + +#define STARTING_FRAME_OFFSET (0) + +#define STACK_POINTER_OFFSET 32 + +#define FIRST_PARM_OFFSET(FNDECL) (0) + +#define DYNAMIC_CHAIN_ADDRESS(FP) plus_constant ((FP), -16) + +#define RETURN_ADDR_RTX(COUNT,FP) (spu_return_addr (COUNT, FP)) + +/* Should this be defined? Would it simplify our implementation. */ +/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */ + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG(Pmode, LINK_REGISTER_REGNUM) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGISTER_REGNUM) + +#define ARG_POINTER_CFA_OFFSET(FNDECL) \ + (crtl->args.pretend_args_size - STACK_POINTER_OFFSET) + + +/* Stack Checking */ + +/* We store the Available Stack Size in the second slot of the stack + register. We emit stack checking code during the prologue. */ +#define STACK_CHECK_BUILTIN 1 + + +/* Frame Registers, and other registers */ + +#define STACK_POINTER_REGNUM 1 + +/* Will be eliminated. */ +#define FRAME_POINTER_REGNUM 128 + +/* This is not specified in any ABI, so could be set to anything. */ +#define HARD_FRAME_POINTER_REGNUM 127 + +/* Will be eliminated. */ +#define ARG_POINTER_REGNUM 129 + +#define STATIC_CHAIN_REGNUM 2 + +#define LINK_REGISTER_REGNUM 0 + +/* Used to keep track of instructions that have clobbered the hint + * buffer. Users can also specify it in inline asm. */ +#define HBR_REGNUM 130 + +#define MAX_REGISTER_ARGS 72 +#define FIRST_ARG_REGNUM 3 +#define LAST_ARG_REGNUM (FIRST_ARG_REGNUM + MAX_REGISTER_ARGS - 1) + +#define MAX_REGISTER_RETURN 72 +#define FIRST_RETURN_REGNUM 3 +#define LAST_RETURN_REGNUM (FIRST_RETURN_REGNUM + MAX_REGISTER_RETURN - 1) + + +/* Elimination */ + +#define ELIMINABLE_REGS \ + {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = spu_initial_elimination_offset((FROM),(TO))) + + +/* Stack Arguments */ + +#define ACCUMULATE_OUTGOING_ARGS 1 + +#define REG_PARM_STACK_SPACE(FNDECL) 0 + +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) (0) + + +/* Register Arguments */ + +#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ + (spu_function_arg((CUM),(MODE),(TYPE),(NAMED))) + +#define CUMULATIVE_ARGS int + +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \ + ((CUM) = 0) + +#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ + ((CUM) += \ + (TYPE) && TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST ? 1 \ + : (MODE) == BLKmode ? ((int_size_in_bytes(TYPE)+15) / 16) \ + : (MODE) == VOIDmode ? 1 \ + : HARD_REGNO_NREGS(CUM,MODE)) + + +/* The SPU ABI wants 32/64-bit types at offset 0 in the quad-word on the + stack. 8/16-bit types should be at offsets 3/2 respectively. */ +#define FUNCTION_ARG_OFFSET(MODE, TYPE) \ +(((TYPE) && INTEGRAL_TYPE_P (TYPE) && GET_MODE_SIZE (MODE) < 4) \ + ? (4 - GET_MODE_SIZE (MODE)) \ + : 0) + +#define FUNCTION_ARG_PADDING(MODE,TYPE) upward + +#define PAD_VARARGS_DOWN 0 + +#define FUNCTION_ARG_REGNO_P(N) ((N) >= (FIRST_ARG_REGNUM) && (N) <= (LAST_ARG_REGNUM)) + +/* Scalar Return */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + (spu_function_value((VALTYPE),(FUNC))) + +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, FIRST_RETURN_REGNUM) + +#define FUNCTION_VALUE_REGNO_P(N) ((N) >= (FIRST_RETURN_REGNUM) && (N) <= (LAST_RETURN_REGNUM)) + + +/* Machine-specific symbol_ref flags. */ +#define SYMBOL_FLAG_ALIGN1 (SYMBOL_FLAG_MACH_DEP << 0) + +/* Aggregate Return */ + +#define DEFAULT_PCC_STRUCT_RETURN 0 + + +/* Function Entry */ + +#define EXIT_IGNORE_STACK 0 + +#define EPILOGUE_USES(REGNO) ((REGNO)==1 ? 1 : 0) + + +/* Profiling */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + spu_function_profiler ((FILE), (LABELNO)); + +#define NO_PROFILE_COUNTERS 1 + +#define PROFILE_BEFORE_PROLOGUE 1 + + +/* Trampolines */ + +#define TRAMPOLINE_SIZE (TARGET_LARGE_MEM ? 20 : 16) + +#define TRAMPOLINE_ALIGNMENT 128 + +/* Addressing Modes */ + +#define CONSTANT_ADDRESS_P(X) spu_constant_address_p(X) + +#define MAX_REGS_PER_ADDRESS 2 + +#define LEGITIMATE_CONSTANT_P(X) spu_legitimate_constant_p(X) + + +/* Costs */ + +#define BRANCH_COST(speed_p, predictable_p) spu_branch_cost + +#define SLOW_BYTE_ACCESS 0 + +#define MOVE_RATIO(speed) 32 + +#define NO_FUNCTION_CSE + + +/* Sections */ + +#define TEXT_SECTION_ASM_OP ".text" + +#define DATA_SECTION_ASM_OP ".data" + +#define JUMP_TABLES_IN_TEXT_SECTION 1 + + +/* PIC */ +#define PIC_OFFSET_TABLE_REGNUM 126 + + +/* File Framework */ + +#define ASM_APP_ON "" + +#define ASM_APP_OFF "" + +#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME) \ + do { fprintf (STREAM, "\t.file\t"); \ + output_quoted_string (STREAM, NAME); \ + fprintf (STREAM, "\n"); \ + } while (0) + + +/* Uninitialized Data */ +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (ROUNDED))) + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +( fputs (".lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (ROUNDED))) + + +/* Label Output */ +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) + +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME)) + +#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \ + do \ + { \ + tree decl; \ + assemble_name (FILE, XSTR ((X), 0)); \ + if ((decl = SYMBOL_REF_DECL ((X))) != 0 \ + && TREE_CODE (decl) == VAR_DECL \ + && TYPE_ADDR_SPACE (TREE_TYPE (decl))) \ + fputs ("@ppu", FILE); \ + } while (0) + + +/* Instruction Output */ +#define REGISTER_NAMES \ +{"$lr", "$sp", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", \ + "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31", \ + "$32", "$33", "$34", "$35", "$36", "$37", "$38", "$39", "$40", "$41", "$42", "$43", "$44", "$45", "$46", "$47", \ + "$48", "$49", "$50", "$51", "$52", "$53", "$54", "$55", "$56", "$57", "$58", "$59", "$60", "$61", "$62", "$63", \ + "$64", "$65", "$66", "$67", "$68", "$69", "$70", "$71", "$72", "$73", "$74", "$75", "$76", "$77", "$78", "$79", \ + "$80", "$81", "$82", "$83", "$84", "$85", "$86", "$87", "$88", "$89", "$90", "$91", "$92", "$93", "$94", "$95", \ + "$96", "$97", "$98", "$99", "$100", "$101", "$102", "$103", "$104", "$105", "$106", "$107", "$108", "$109", "$110", "$111", \ + "$112", "$113", "$114", "$115", "$116", "$117", "$118", "$119", "$120", "$121", "$122", "$123", "$124", "$125", "$126", "$127", \ + "$vfp", "$vap", "hbr" \ +} + +#define PRINT_OPERAND(FILE, X, CODE) print_operand(FILE, X, CODE) + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ + print_operand_address (FILE, ADDR) + +#define LOCAL_LABEL_PREFIX "." + +#define USER_LABEL_PREFIX "" + + +/* Dispatch Tables */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + fprintf (FILE, "\t.word .L%d\n", VALUE) + + +/* Alignment Output */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0) + + +/* Model costs for the vectorizer. */ + +/* Cost of conditional branch. */ +#ifndef TARG_COND_BRANCH_COST +#define TARG_COND_BRANCH_COST 6 +#endif + +/* Cost of any scalar operation, excluding load and store. */ +#ifndef TARG_SCALAR_STMT_COST +#define TARG_SCALAR_STMT_COST 1 +#endif + +/* Cost of scalar load. */ +#undef TARG_SCALAR_LOAD_COST +#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */ + +/* Cost of scalar store. */ +#undef TARG_SCALAR_STORE_COST +#define TARG_SCALAR_STORE_COST 10 + +/* Cost of any vector operation, excluding load, store, + or vector to scalar operation. */ +#undef TARG_VEC_STMT_COST +#define TARG_VEC_STMT_COST 1 + +/* Cost of vector to scalar operation. */ +#undef TARG_VEC_TO_SCALAR_COST +#define TARG_VEC_TO_SCALAR_COST 1 + +/* Cost of scalar to vector operation. */ +#undef TARG_SCALAR_TO_VEC_COST +#define TARG_SCALAR_TO_VEC_COST 1 + +/* Cost of aligned vector load. */ +#undef TARG_VEC_LOAD_COST +#define TARG_VEC_LOAD_COST 1 + +/* Cost of misaligned vector load. */ +#undef TARG_VEC_UNALIGNED_LOAD_COST +#define TARG_VEC_UNALIGNED_LOAD_COST 2 + +/* Cost of vector store. */ +#undef TARG_VEC_STORE_COST +#define TARG_VEC_STORE_COST 1 + +/* Cost of vector permutation. */ +#ifndef TARG_VEC_PERMUTE_COST +#define TARG_VEC_PERMUTE_COST 1 +#endif + + +/* Misc */ + +#define CASE_VECTOR_MODE SImode + +#define MOVE_MAX 16 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) ((INPREC) <= 32 && (OUTPREC) <= (INPREC)) + +#define STORE_FLAG_VALUE -1 + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +#define NO_IMPLICIT_EXTERN_C 1 + +#define HANDLE_PRAGMA_PACK_PUSH_POP 1 + +/* Canonicalize a comparison from one we don't have to one we do have. */ +#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \ + do { \ + if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \ + { \ + rtx tem = (OP0); \ + (OP0) = (OP1); \ + (OP1) = tem; \ + (CODE) = swap_condition (CODE); \ + } \ + } while (0) + + +/* Address spaces. */ +#define ADDR_SPACE_EA 1 + +/* Named address space keywords. */ +#define TARGET_ADDR_SPACE_KEYWORDS ADDR_SPACE_KEYWORD ("__ea", ADDR_SPACE_EA) + + +/* Builtins. */ + +enum spu_builtin_type +{ + B_INSN, + B_JUMP, + B_BISLED, + B_CALL, + B_HINT, + B_OVERLOAD, + B_INTERNAL +}; + +struct GTY(()) spu_builtin_description +{ + int fcode; + int icode; + const char *name; + enum spu_builtin_type type; + + /* The first element of parm is always the return type. The rest + are a zero terminated list of parameters. */ + int parm[5]; + + tree fndecl; +}; + +extern struct spu_builtin_description spu_builtins[]; +
spu.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: multi3.c =================================================================== --- multi3.c (nonexistent) +++ multi3.c (revision 384) @@ -0,0 +1,97 @@ +/* Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include + +typedef int TItype __attribute__ ((mode (TI))); + +/* A straight forward vectorization and unrolling of + * short l[8], r[8]; + * TItype total = 0; + * for (i = 0; i < 8; i++) + * for (j = 0; j < 8; j++) + * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j))); + */ +TItype +__multi3 (TItype l, TItype r) +{ + qword u = *(qword *) & l; + qword v = *(qword *) & r; + qword splat0 = si_shufb (v, v, si_ilh (0x0001)); + qword splat1 = si_shufb (v, v, si_ilh (0x0203)); + qword splat2 = si_shufb (v, v, si_ilh (0x0405)); + qword splat3 = si_shufb (v, v, si_ilh (0x0607)); + qword splat4 = si_shufb (v, v, si_ilh (0x0809)); + qword splat5 = si_shufb (v, v, si_ilh (0x0a0b)); + qword splat6 = si_shufb (v, v, si_ilh (0x0c0d)); + qword splat7 = si_shufb (v, v, si_ilh (0x0e0f)); + + qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14); + qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14); + qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12); + qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12); + qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10); + qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10); + qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8); + qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8); + qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6); + qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6); + qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4); + qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4); + qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2); + qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2); + qword part7l = si_mpyu (u, splat7); + + qword carry, total0, total1, total2, total3, total4; + qword total5, total6, total7, total8, total9, total10; + qword total; + + total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l); + total1 = si_a (part2l, part3h); + total2 = si_a (part3l, part4h); + total3 = si_a (part4l, part5h); + total4 = si_a (part5l, part6h); + total5 = si_a (part6l, part7h); + total6 = si_a (total0, total1); + total7 = si_a (total2, total3); + total8 = si_a (total4, total5); + total9 = si_a (total6, total7); + total10 = si_a (total8, total9); + + carry = si_cg (part2l, part3h); + carry = si_a (carry, si_cg (part3l, part4h)); + carry = si_a (carry, si_cg (part4l, part5h)); + carry = si_a (carry, si_cg (part5l, part6h)); + carry = si_a (carry, si_cg (part6l, part7h)); + carry = si_a (carry, si_cg (total0, total1)); + carry = si_a (carry, si_cg (total2, total3)); + carry = si_a (carry, si_cg (total4, total5)); + carry = si_a (carry, si_cg (total6, total7)); + carry = si_a (carry, si_cg (total8, total9)); + carry = si_shlqbyi (carry, 4); + + total = si_cg (total10, carry); + total = si_shlqbyi (total, 4); + total = si_cgx (total10, carry, total); + total = si_shlqbyi (total, 4); + total = si_addx (total10, carry, total); + return *(TItype *) & total; +}
multi3.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu-elf.h =================================================================== --- spu-elf.h (nonexistent) +++ spu-elf.h (revision 384) @@ -0,0 +1,84 @@ +/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef OBJECT_FORMAT_ELF + #error elf.h included before elfos.h +#endif + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + + +/* The following macros define "native" directory locations; on the SPU, + these are used only when building the compiler with --with-sysroot. + This can be used to build a pair of PPU and SPU cross-compilers with + a common sysroot; the SPU compiler will search for its files in + ${sysroot}/include and ${sysroot}/lib. */ + +/* STANDARD_STARTFILE_PREFIX_1 is "/lib", which we keep. + STANDARD_STARTFILE_PREFIX_2 is "/usr/lib" -- we remove this. */ +#undef STANDARD_STARTFILE_PREFIX_2 +#define STANDARD_STARTFILE_PREFIX_2 "" + +/* Use "/include" instead of "/usr/include". */ +#undef STANDARD_INCLUDE_DIR +#define STANDARD_INCLUDE_DIR "/include" + +/* We do not provide any "/usr/local/include" directory on SPU. */ +#undef LOCAL_INCLUDE_DIR + +/* Provide a STARTFILE_SPEC appropriate for GNU/Linux. Here we add + the GNU/Linux magical crtbegin.o file (see crtstuff.c) which + provides part of the support for getting C++ file-scope static + object constructed before entering `main'. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{mstdmain: %{pg|p:gcrt2.o%s;:crt2.o%s}}\ + %{!mstdmain: %{pg|p:gcrt1.o%s;:crt1.o%s}}\ + crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +#define DWARF2_DEBUGGING_INFO 1 +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#define SET_ASM_OP "\t.set\t" + +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#define EH_FRAME_IN_DATA_SECTION 1 + +#define LINK_SPEC "%{mlarge-mem: --defsym __stack=0xfffffff0 }" + +#define LIB_SPEC "-( %{!shared:%{g*:-lg}} -lc -lgloss -) \ + %{mno-atomic-updates:-lgcc_cachemgr_nonatomic; :-lgcc_cachemgr} \ + %{mcache-size=128:-lgcc_cache128k; \ + mcache-size=64 :-lgcc_cache64k; \ + mcache-size=32 :-lgcc_cache32k; \ + mcache-size=16 :-lgcc_cache16k; \ + mcache-size=8 :-lgcc_cache8k; \ + :-lgcc_cache64k}" + +/* Turn off warnings in the assembler too. */ +#undef ASM_SPEC +#define ASM_SPEC "%{w:-W}" +
spu-elf.h Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: float_unsdidf.c =================================================================== --- float_unsdidf.c (nonexistent) +++ float_unsdidf.c (revision 384) @@ -0,0 +1,54 @@ +/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include +const unsigned char __didf_scale[16] __attribute__ ((__aligned__ (16))) = { + 0x00, 0x00, 0x04, 0x3e, + 0x00, 0x00, 0x04, 0x1e, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; +const unsigned char __didf_pat[16] __attribute__ ((__aligned__ (16))) = { + 0x02, 0x03, 0x10, 0x11, + 0x12, 0x13, 0x80, 0x80, + 0x06, 0x07, 0x14, 0x15, + 0x16, 0x17, 0x80, 0x80 +}; + +/* double __float_unsdidf (unsigned long long int) + Construct two exact doubles representing the high and low parts (in + parallel), then add them. */ +qword __float_unsdidf (qword DI); +qword +__float_unsdidf (qword DI) +{ + qword t0, t1, t2, t3, t4, t5, t6, t7, t8; + t0 = si_clz (DI); + t1 = si_shl (DI, t0); + t2 = si_ceqi (t0, 32); + t3 = si_sf (t0, *(const qword *) __didf_scale); + t4 = si_a (t1, t1); + t5 = si_andc (t3, t2); + t6 = si_shufb (t5, t4, *(const qword *) __didf_pat); + t7 = si_shlqbii (t6, 4); + t8 = si_shlqbyi (t7, 8); + return si_dfa (t7, t8); +}
float_unsdidf.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: constraints.md =================================================================== --- constraints.md (nonexistent) +++ constraints.md (revision 384) @@ -0,0 +1,179 @@ +;; Constraint definitions for SPU +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +;; GCC: ffffiiiiiiii x x x x xxxx xx +;; SPU: xxxx xxx xxxx xxxx x xxx xx x xxx xx +;; FREE: ffff i a a a a a aa aaa +;; x - used +;; a - available +;; i - available for integer immediates +;; f - available for floating point immediates + +;; For most immediate constraints we have 3 variations to deal with the +;; fact const_int has no mode. One variation treats const_int as 32 bit, +;; another treats it as 64 bit, and the third sign extends it to 128 bit. + +(define_constraint "A" + "An immediate which can be loaded with the il/ila/ilh/ilhu instructions. const_int is treated as a 32-bit value." + (ior (and (match_code "const_int,const_double,const_vector") + (match_test "immediate_load_p (op, SImode)")) + (match_code "symbol_ref,label_ref,high,const"))) + +(define_constraint "B" + "An immediate for arithmetic instructions (e.g., ai, ceqi). const_int is treated as a 32-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "arith_immediate_p (op, SImode, -0x200, 0x1ff)"))) + +(define_constraint "C" + "An immediate for and/xor/or instructions. const_int is treated as a 32-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "logical_immediate_p (op, SImode)"))) + +(define_constraint "D" + "An immediate for iohl instruction. const_int is treated as a 32-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "iohl_immediate_p (op, SImode)"))) + +(define_constraint "U" + "An immediate which can be loaded with the il/ila/ilh/ilhu instructions. const_int is sign extended to 128 bit." + (and (match_code "const_int,const_double,const_vector") + (match_test "immediate_load_p (op, TImode)"))) + +(define_constraint "W" + "An immediate for shift and rotate instructions. const_int is treated as a 32-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "arith_immediate_p (op, SImode, -0x80000000ll, 0x7fffffffll)"))) + +(define_constraint "Y" + "An immediate for and/xor/or instructions. const_int is sign extended as a 128 bit." + (and (match_code "const_int,const_double,const_vector") + (match_test "logical_immediate_p (op, TImode)"))) + +(define_constraint "Z" + "An immediate for iohl instruction. const_int is sign extended to 128 bit." + (and (match_code "const_int,const_double,const_vector") + (match_test "iohl_immediate_p (op, TImode)"))) + +(define_constraint "a" + "An immediate which can be loaded with the il/ila/ilh/ilhu instructions. const_int is treated as a 64-bit value." + (and (match_code "const_int") + (match_test "immediate_load_p (op, DImode)"))) + +(define_constraint "c" + "An immediate for and/xor/or instructions. const_int is treated as a 64-bit value." + (and (match_code "const_int") + (match_test "logical_immediate_p (op, DImode)"))) + +(define_constraint "d" + "An immediate for iohl instruction. const_int is treated as a 64-bit value." + (and (match_code "const_int") + (match_test "iohl_immediate_p (op, DImode)"))) + +(define_constraint "f" + "An immediate which can be loaded with fsmbi." + (and (match_code "const_int,const_double,const_vector") + (match_test "fsmbi_const_p (op)"))) + +(define_constraint "j" + "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions. const_int is treated as a 32-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "cpat_const_p (op, SImode)"))) + +(define_constraint "k" + "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions. const_int is treated as a 64-bit value." + (and (match_code "const_int,const_double,const_vector") + (match_test "cpat_const_p (op, DImode)"))) + +(define_constraint "l" + "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions." + (and (match_code "const_double,const_vector") + (match_test "cpat_const_p (op, TImode)"))) + + +;; Integer constraints + +(define_constraint "I" + "A constant in the range [-64, 63] for shift/rotate instructions." + (and (match_code "const_int") + (match_test "ival >= -0x40 && ival <= 0x3f"))) + +(define_constraint "J" + "An unsigned 7-bit constant for conversion/nop/channel instructions." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 0x7f"))) + +(define_constraint "K" + "A signed 10-bit constant for most arithmetic instructions." + (and (match_code "const_int") + (match_test "ival >= -0x200 && ival <= 0x1ff"))) + +(define_constraint "M" + "A signed 16-bit immediate for @code{stop}." + (and (match_code "const_int") + (match_test "ival >= -0x8000ll && ival <= 0x7fffll"))) + +(define_constraint "N" + "An unsigned 16-bit constant for @code{iohl} and @code{fsmbi}." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 0xffff"))) + +(define_constraint "O" + "An unsigned 7-bit constant whose 3 least significant bits are 0." + (and (match_code "const_int") + (match_test "(ival & 7) == 0"))) + +(define_constraint "P" + "An unsigned 3-bit constant for 16-byte rotates and shifts" + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 7"))) + + +;; Memory constraints + +(define_memory_constraint "R" + "Call operand, reg, for indirect calls" + (and (match_code "mem") + (match_test "GET_CODE(XEXP(op, 0)) == REG"))) + +(define_memory_constraint "S" + "Call operand, symbol, for relative calls." + (and (match_code "mem") + (match_test "!TARGET_LARGE_MEM + && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF + || GET_CODE (XEXP (op, 0)) == LABEL_REF))"))) + +(define_memory_constraint "T" + "Call operand, const_int, for absolute calls." + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == CONST_INT + && INTVAL (XEXP (op, 0)) >= 0 + && INTVAL (XEXP (op, 0)) <= 0x3ffff"))) + + +;; Floating-point constant constraints. + +(define_constraint "v" + "Floating point power of 2 with exponent in [0..127]" + (and (match_code "const_double,const_vector") + (match_test "exp2_immediate_p (op, VOIDmode, 0, 127)"))) + +(define_constraint "w" + "Floating point power of 2 with exponent in [-126..0]" + (and (match_code "const_double,const_vector") + (match_test "exp2_immediate_p (op, VOIDmode, -126, 0)"))) Index: spu.md =================================================================== --- spu.md (nonexistent) +++ spu.md (revision 384) @@ -0,0 +1,5328 @@ +;; Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + + +;; Define an insn type attribute. This is used in function unit delay +;; computations. +;; multi0 is a multiple insn rtl whose first insn is in pipe0 +;; multi1 is a multiple insn rtl whose first insn is in pipe1 +(define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert" + (const_string "fx2")) + +;; Length (in bytes). +(define_attr "length" "" + (const_int 4)) + +(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune"))) +;; Processor type -- this attribute must exactly match the processor_type +;; enumeration in spu.h. + +(define_attr "cpu" "spu" + (const (symbol_ref "spu_cpu_attr"))) + +; (define_function_unit NAME MULTIPLICITY SIMULTANEITY +; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST]) + +(define_cpu_unit "pipe0,pipe1,fp,ls") + +(define_insn_reservation "NOP" 1 (eq_attr "type" "nop") + "pipe0") + +(define_insn_reservation "FX2" 2 (eq_attr "type" "fx2") + "pipe0, nothing") + +(define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb") + "pipe0, nothing*3") + +(define_insn_reservation "FP6" 6 (eq_attr "type" "fp6") + "pipe0 + fp, nothing*5") + +(define_insn_reservation "FP7" 7 (eq_attr "type" "fp7") + "pipe0, fp, nothing*5") + +;; The behavior of the double precision is that both pipes stall +;; for 6 cycles and the rest of the operation pipelines for +;; 7 cycles. The simplest way to model this is to simply ignore +;; the 6 cyle stall. +(define_insn_reservation "FPD" 7 + (and (eq_attr "tune" "cell") + (eq_attr "type" "fpd")) + "pipe0 + pipe1, fp, nothing*5") + +;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined +(define_insn_reservation "FPD_CELLEDP" 9 + (and (eq_attr "tune" "celledp") + (eq_attr "type" "fpd")) + "pipe0 + fp, nothing*8") + +(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop") + "pipe1") + +(define_insn_reservation "STORE" 1 (eq_attr "type" "store") + "pipe1 + ls") + +(define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch") + "pipe1 + ls") + +(define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr") + "pipe1, nothing*3") + +(define_insn_reservation "LOAD" 6 (eq_attr "type" "load") + "pipe1 + ls, nothing*5") + +(define_insn_reservation "HBR" 18 (eq_attr "type" "hbr") + "pipe1, nothing*15") + +(define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0") + "pipe0+pipe1, nothing*3") + +(define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1") + "pipe1, nothing*3") + +(define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert") + "nothing") + +;; Force pipe0 to occur before pipe 1 in a cycle. +(absence_set "pipe0" "pipe1") + + +(define_constants [ + (UNSPEC_BLOCKAGE 0) + (UNSPEC_IPREFETCH 1) + (UNSPEC_FREST 2) + (UNSPEC_FRSQEST 3) + (UNSPEC_FI 4) + (UNSPEC_EXTEND_CMP 5) + (UNSPEC_CG 6) + (UNSPEC_CGX 7) + (UNSPEC_ADDX 8) + (UNSPEC_BG 9) + (UNSPEC_BGX 10) + (UNSPEC_SFX 11) + (UNSPEC_FSM 12) + (UNSPEC_HBR 13) + (UNSPEC_LNOP 14) + (UNSPEC_NOP 15) + (UNSPEC_CONVERT 16) + (UNSPEC_SELB 17) + (UNSPEC_SHUFB 18) + (UNSPEC_CPAT 19) + (UNSPEC_SYNC 20) + (UNSPEC_CNTB 21) + (UNSPEC_SUMB 22) + (UNSPEC_FSMB 23) + (UNSPEC_FSMH 24) + (UNSPEC_GBB 25) + (UNSPEC_GBH 26) + (UNSPEC_GB 27) + (UNSPEC_AVGB 28) + (UNSPEC_ABSDB 29) + (UNSPEC_ORX 30) + (UNSPEC_HEQ 31) + (UNSPEC_HGT 32) + (UNSPEC_HLGT 33) + (UNSPEC_STOP 38) + (UNSPEC_STOPD 39) + (UNSPEC_SET_INTR 40) + (UNSPEC_FSCRRD 42) + (UNSPEC_FSCRWR 43) + (UNSPEC_MFSPR 44) + (UNSPEC_MTSPR 45) + (UNSPEC_RDCH 46) + (UNSPEC_RCHCNT 47) + (UNSPEC_WRCH 48) + (UNSPEC_SPU_REALIGN_LOAD 49) + (UNSPEC_SPU_MASK_FOR_LOAD 50) + (UNSPEC_DFTSV 51) + (UNSPEC_FLOAT_EXTEND 52) + (UNSPEC_FLOAT_TRUNCATE 53) + (UNSPEC_SP_SET 54) + (UNSPEC_SP_TEST 55) +]) + +(include "predicates.md") +(include "constraints.md") + + +;; Mode iterators + +(define_mode_iterator ALL [QI V16QI + HI V8HI + SI V4SI + DI V2DI + TI + SF V4SF + DF V2DF]) + +; Everything except DI and TI which are handled separately because +; they need different constraints to correctly test VOIDmode constants +(define_mode_iterator MOV [QI V16QI + HI V8HI + SI V4SI + V2DI + SF V4SF + DF V2DF]) + +(define_mode_iterator QHSI [QI HI SI]) +(define_mode_iterator QHSDI [QI HI SI DI]) +(define_mode_iterator DTI [DI TI]) + +(define_mode_iterator VINT [QI V16QI + HI V8HI + SI V4SI + DI V2DI + TI]) + +(define_mode_iterator VQHSI [QI V16QI + HI V8HI + SI V4SI]) + +(define_mode_iterator VHSI [HI V8HI + SI V4SI]) + +(define_mode_iterator VSDF [SF V4SF + DF V2DF]) + +(define_mode_iterator VSI [SI V4SI]) +(define_mode_iterator VDI [DI V2DI]) +(define_mode_iterator VSF [SF V4SF]) +(define_mode_iterator VDF [DF V2DF]) + +(define_mode_iterator VCMP [V16QI + V8HI + V4SI + V4SF + V2DF]) + +(define_mode_iterator VCMPU [V16QI + V8HI + V4SI]) + +(define_mode_attr v [(V8HI "v") (V4SI "v") + (HI "") (SI "")]) + +(define_mode_attr bh [(QI "b") (V16QI "b") + (HI "h") (V8HI "h") + (SI "") (V4SI "")]) + +(define_mode_attr d [(SF "") (V4SF "") + (DF "d") (V2DF "d")]) +(define_mode_attr d6 [(SF "6") (V4SF "6") + (DF "d") (V2DF "d")]) + +(define_mode_attr f2i [(SF "si") (V4SF "v4si") + (DF "di") (V2DF "v2di")]) +(define_mode_attr F2I [(SF "SI") (V4SF "V4SI") + (DF "DI") (V2DF "V2DI")]) +(define_mode_attr i2f [(SI "sf") (V4SI "v4sf") + (DI "df") (V2DI "v2df")]) +(define_mode_attr I2F [(SI "SF") (V4SI "V4SF") + (DI "DF") (V2DI "V2DF")]) + +(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")]) + +(define_mode_attr umask [(HI "f") (V8HI "f") + (SI "g") (V4SI "g")]) +(define_mode_attr nmask [(HI "F") (V8HI "F") + (SI "G") (V4SI "G")]) + +;; Used for carry and borrow instructions. +(define_mode_iterator CBOP [SI DI V4SI V2DI]) + +;; Used in vec_set and vec_extract +(define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF]) +(define_mode_attr inner [(V16QI "QI") + (V8HI "HI") + (V4SI "SI") + (V2DI "DI") + (V4SF "SF") + (V2DF "DF")]) +(define_mode_attr vmult [(V16QI "1") + (V8HI "2") + (V4SI "4") + (V2DI "8") + (V4SF "4") + (V2DF "8")]) +(define_mode_attr voff [(V16QI "13") + (V8HI "14") + (V4SI "0") + (V2DI "0") + (V4SF "0") + (V2DF "0")]) + + +;; mov + +(define_expand "mov" + [(set (match_operand:ALL 0 "spu_nonimm_operand" "=r,r,r,m") + (match_operand:ALL 1 "general_operand" "r,i,m,r"))] + "" + { + if (spu_expand_mov(operands, mode)) + DONE; + }) + +(define_split + [(set (match_operand 0 "spu_reg_operand") + (match_operand 1 "immediate_operand"))] + + "" + [(set (match_dup 0) + (high (match_dup 1))) + (set (match_dup 0) + (lo_sum (match_dup 0) + (match_dup 1)))] + { + if (spu_split_immediate (operands)) + DONE; + FAIL; + }) + +(define_insn "pic" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (match_operand:SI 1 "immediate_operand" "s")) + (use (const_int 0))] + "flag_pic" + "ila\t%0,%%pic(%1)") + +;; Whenever a function generates the 'pic' pattern above we need to +;; load the pic_offset_table register. +;; GCC doesn't deal well with labels in the middle of a block so we +;; hardcode the offsets in the asm here. +(define_insn "load_pic_offset" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (unspec:SI [(const_int 0)] 0)) + (set (match_operand:SI 1 "spu_reg_operand" "=r") + (unspec:SI [(const_int 0)] 0))] + "flag_pic" + "ila\t%1,.+8\;brsl\t%0,4" + [(set_attr "length" "8") + (set_attr "type" "multi0")]) + + +;; move internal + +(define_insn "_mov" + [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m") + (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))] + "register_operand(operands[0], mode) + || register_operand(operands[1], mode)" + "@ + ori\t%0,%1,0 + il%s1\t%0,%S1 + fsmbi\t%0,%S1 + c%s1d\t%0,%S1($sp) + lq%p1\t%0,%1 + stq%p0\t%1,%0" + [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) + +(define_insn "low_" + [(set (match_operand:VSI 0 "spu_reg_operand" "=r") + (lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0") + (match_operand:VSI 2 "immediate_operand" "i")))] + "" + "iohl\t%0,%2@l") + +(define_insn "_movdi" + [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m") + (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))] + "register_operand(operands[0], DImode) + || register_operand(operands[1], DImode)" + "@ + ori\t%0,%1,0 + il%d1\t%0,%D1 + fsmbi\t%0,%D1 + c%d1d\t%0,%D1($sp) + lq%p1\t%0,%1 + stq%p0\t%1,%0" + [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) + +(define_insn "_movti" + [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m") + (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))] + "register_operand(operands[0], TImode) + || register_operand(operands[1], TImode)" + "@ + ori\t%0,%1,0 + il%t1\t%0,%T1 + fsmbi\t%0,%T1 + c%t1d\t%0,%T1($sp) + lq%p1\t%0,%1 + stq%p0\t%1,%0" + [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")]) + +(define_split + [(set (match_operand 0 "spu_reg_operand") + (match_operand 1 "memory_operand"))] + "GET_MODE_SIZE (GET_MODE (operands[0])) < 16 + && GET_MODE(operands[0]) == GET_MODE(operands[1]) + && !reload_in_progress && !reload_completed" + [(set (match_dup 0) + (match_dup 1))] + { if (spu_split_load(operands)) + DONE; + }) + +(define_split + [(set (match_operand 0 "memory_operand") + (match_operand 1 "spu_reg_operand"))] + "GET_MODE_SIZE (GET_MODE (operands[0])) < 16 + && GET_MODE(operands[0]) == GET_MODE(operands[1]) + && !reload_in_progress && !reload_completed" + [(set (match_dup 0) + (match_dup 1))] + { if (spu_split_store(operands)) + DONE; + }) +;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d + +(define_expand "cpat" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,n") + (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))] + "" + { + rtx x = gen_cpat_const (operands); + if (x) + { + emit_move_insn (operands[0], x); + DONE; + } + }) + +(define_insn "_cpat" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,n") + (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))] + "" + "@ + c%M3x\t%0,%1,%2 + c%M3d\t%0,%C2(%1)" + [(set_attr "type" "shuf")]) + +(define_split + [(set (match_operand:TI 0 "spu_reg_operand") + (unspec:TI [(match_operand:SI 1 "spu_nonmem_operand") + (match_operand:SI 2 "immediate_operand") + (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))] + "" + [(set (match_dup:TI 0) + (match_dup:TI 4))] + { + operands[4] = gen_cpat_const (operands); + if (!operands[4]) + FAIL; + }) + +;; extend + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "spu_reg_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))] + "" + "xsbh\t%0,%1") + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))] + "" + "xshw\t%0,%1") + +(define_expand "extendsidi2" + [(set (match_dup:DI 2) + (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" ""))) + (set (match_operand:DI 0 "spu_reg_operand" "") + (sign_extend:DI (vec_select:SI (match_dup:V2SI 3) + (parallel [(const_int 1)]))))] + "" + { + operands[2] = gen_reg_rtx (DImode); + operands[3] = spu_gen_subreg (V2SImode, operands[2]); + }) + +(define_insn "xswd" + [(set (match_operand:DI 0 "spu_reg_operand" "=r") + (sign_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "spu_reg_operand" "r") + (parallel [(const_int 1) ]))))] + "" + "xswd\t%0,%1"); + +;; By splitting this late we don't allow much opportunity for sharing of +;; constants. That's ok because this should really be optimized away. +(define_insn_and_split "extendti2" + [(set (match_operand:TI 0 "register_operand" "") + (sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))] + "" + "#" + "" + [(set (match_dup:TI 0) + (sign_extend:TI (match_dup:QHSDI 1)))] + { + spu_expand_sign_extend(operands); + DONE; + }) + + +;; zero_extend + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "spu_reg_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))] + "" + "andi\t%0,%1,0x00ff") + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))] + "" + "andi\t%0,%1,0x00ff") + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))) + (clobber (match_scratch:SI 2 "=&r"))] + "" + { + rtx mask = gen_reg_rtx (SImode); + rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0); + emit_move_insn (mask, GEN_INT (0xffff)); + emit_insn (gen_andsi3(operands[0], op1, mask)); + DONE; + }) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "spu_reg_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))] + "" + "rotqmbyi\t%0,%1,-4" + [(set_attr "type" "shuf")]) + +(define_insn "zero_extendqiti2" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))] + "" + "andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12" + [(set_attr "type" "multi0") + (set_attr "length" "8")]) + +(define_insn "zero_extendhiti2" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))] + "" + "shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14" + [(set_attr "type" "multi1") + (set_attr "length" "8")]) + +(define_insn "zero_extendsiti2" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))] + "" + "rotqmbyi\t%0,%1,-12" + [(set_attr "type" "shuf")]) + +(define_insn "zero_extendditi2" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))] + "" + "rotqmbyi\t%0,%1,-8" + [(set_attr "type" "shuf")]) + + +;; trunc + +(define_insn "truncdiqi2" + [(set (match_operand:QI 0 "spu_reg_operand" "=r") + (truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,4" + [(set_attr "type" "shuf")]) + +(define_insn "truncdihi2" + [(set (match_operand:HI 0 "spu_reg_operand" "=r") + (truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,4" + [(set_attr "type" "shuf")]) + +(define_insn "truncdisi2" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,4" + [(set_attr "type" "shuf")]) + +(define_insn "trunctiqi2" + [(set (match_operand:QI 0 "spu_reg_operand" "=r") + (truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,12" + [(set_attr "type" "shuf")]) + +(define_insn "trunctihi2" + [(set (match_operand:HI 0 "spu_reg_operand" "=r") + (truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,12" + [(set_attr "type" "shuf")]) + +(define_insn "trunctisi2" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,12" + [(set_attr "type" "shuf")]) + +(define_insn "trunctidi2" + [(set (match_operand:DI 0 "spu_reg_operand" "=r") + (truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))] + "" + "shlqbyi\t%0,%1,8" + [(set_attr "type" "shuf")]) + + +;; float conversions + +(define_insn "float2" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (float: (match_operand:VSI 1 "spu_reg_operand" "r")))] + "" + "csflt\t%0,%1,0" + [(set_attr "type" "fp7")]) + +(define_insn "fix_trunc2" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (fix: (match_operand:VSF 1 "spu_reg_operand" "r")))] + "" + "cflts\t%0,%1,0" + [(set_attr "type" "fp7")]) + +(define_insn "floatuns2" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (unsigned_float: (match_operand:VSI 1 "spu_reg_operand" "r")))] + "" + "cuflt\t%0,%1,0" + [(set_attr "type" "fp7")]) + +(define_insn "fixuns_trunc2" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (unsigned_fix: (match_operand:VSF 1 "spu_reg_operand" "r")))] + "" + "cfltu\t%0,%1,0" + [(set_attr "type" "fp7")]) + +(define_insn "float2_mul" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (mult: (float: (match_operand:VSI 1 "spu_reg_operand" "r")) + (match_operand: 2 "spu_inv_exp2_operand" "w")))] + "" + "csflt\t%0,%1,%w2" + [(set_attr "type" "fp7")]) + +(define_insn "float2_div" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (div: (float: (match_operand:VSI 1 "spu_reg_operand" "r")) + (match_operand: 2 "spu_exp2_operand" "v")))] + "" + "csflt\t%0,%1,%v2" + [(set_attr "type" "fp7")]) + + +(define_insn "fix_trunc2_mul" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (fix: (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_exp2_operand" "v"))))] + "" + "cflts\t%0,%1,%v2" + [(set_attr "type" "fp7")]) + +(define_insn "floatuns2_mul" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (mult: (unsigned_float: (match_operand:VSI 1 "spu_reg_operand" "r")) + (match_operand: 2 "spu_inv_exp2_operand" "w")))] + "" + "cuflt\t%0,%1,%w2" + [(set_attr "type" "fp7")]) + +(define_insn "floatuns2_div" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (div: (unsigned_float: (match_operand:VSI 1 "spu_reg_operand" "r")) + (match_operand: 2 "spu_exp2_operand" "v")))] + "" + "cuflt\t%0,%1,%v2" + [(set_attr "type" "fp7")]) + +(define_insn "fixuns_trunc2_mul" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (unsigned_fix: (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_exp2_operand" "v"))))] + "" + "cfltu\t%0,%1,%v2" + [(set_attr "type" "fp7")]) + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "spu_reg_operand" "=r") + (unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")] + UNSPEC_FLOAT_EXTEND))] + "" + "fesd\t%0,%1" + [(set_attr "type" "fpd")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "spu_reg_operand" "=r") + (unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")] + UNSPEC_FLOAT_TRUNCATE))] + "" + "frds\t%0,%1" + [(set_attr "type" "fpd")]) + +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "register_operand" "")))] + "" + { + rtx c0 = gen_reg_rtx (SImode); + rtx r0 = gen_reg_rtx (DImode); + rtx r1 = gen_reg_rtx (SFmode); + rtx r2 = gen_reg_rtx (SImode); + rtx setneg = gen_reg_rtx (SImode); + rtx isneg = gen_reg_rtx (SImode); + rtx neg = gen_reg_rtx (DImode); + rtx mask = gen_reg_rtx (DImode); + + emit_move_insn (c0, GEN_INT (-0x80000000ll)); + + emit_insn (gen_negdi2 (neg, operands[1])); + emit_insn (gen_cgt_di_m1 (isneg, operands[1])); + emit_insn (gen_extend_compare (mask, isneg)); + emit_insn (gen_selb (r0, neg, operands[1], mask)); + emit_insn (gen_andc_si (setneg, c0, isneg)); + + emit_insn (gen_floatunsdisf2 (r1, r0)); + + emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg)); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0)); + DONE; + }) + +(define_insn_and_split "floatunsdisf2" + [(set (match_operand:SF 0 "register_operand" "=r") + (unsigned_float:SF (match_operand:DI 1 "register_operand" "r"))) + (clobber (match_scratch:SF 2 "=r")) + (clobber (match_scratch:SF 3 "=r")) + (clobber (match_scratch:SF 4 "=r"))] + "" + "#" + "reload_completed" + [(set (match_dup:SF 0) + (unsigned_float:SF (match_dup:DI 1)))] + { + rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1])); + rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2])); + rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2])); + rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3])); + + REAL_VALUE_TYPE scale; + real_2expN (&scale, 32, SFmode); + + emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si)); + emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4))); + + emit_move_insn (operands[4], + CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode)); + emit_insn (gen_fma_sf (operands[0], + operands[2], operands[4], operands[3])); + DONE; + }) + +;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000 +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:SI 1 "register_operand" "")))] + "" + { + rtx c0 = gen_reg_rtx (SImode); + rtx c1 = gen_reg_rtx (DFmode); + rtx r0 = gen_reg_rtx (SImode); + rtx r1 = gen_reg_rtx (DFmode); + + emit_move_insn (c0, GEN_INT (-0x80000000ll)); + emit_move_insn (c1, spu_float_const ("2147483648", DFmode)); + emit_insn (gen_xorsi3 (r0, operands[1], c0)); + emit_insn (gen_floatunssidf2 (r1, r0)); + emit_insn (gen_subdf3 (operands[0], r1, c1)); + DONE; + }) + +(define_expand "floatunssidf2" + [(set (match_operand:DF 0 "register_operand" "=r") + (unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))] + "" + "{ + rtx value, insns; + rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, + 0x06071415, 0x16178080); + rtx r0 = gen_reg_rtx (V16QImode); + + if (optimize_size) + { + start_sequence (); + value = + emit_library_call_value (convert_optab_libfunc (ufloat_optab, + DFmode, SImode), + NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], SImode); + insns = get_insns (); + end_sequence (); + emit_libcall_block (insns, operands[0], value, + gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1])); + } + else + { + emit_move_insn (r0, c0); + emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0)); + } + DONE; + }") + +(define_insn_and_split "floatunssidf2_internal" + [(set (match_operand:DF 0 "register_operand" "=r") + (unsigned_float:DF (match_operand:SI 1 "register_operand" "r"))) + (use (match_operand:V16QI 2 "register_operand" "r")) + (clobber (match_scratch:V4SI 3 "=&r")) + (clobber (match_scratch:V4SI 4 "=&r")) + (clobber (match_scratch:V4SI 5 "=&r")) + (clobber (match_scratch:V4SI 6 "=&r"))] + "" + "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4" + "reload_completed" + [(set (match_dup:DF 0) + (unsigned_float:DF (match_dup:SI 1)))] + "{ + rtx *ops = operands; + rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1])); + rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0])); + rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2])); + rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6])); + emit_insn (gen_clzv4si2 (ops[3],op1_v4si)); + emit_move_insn (ops[6], spu_const (V4SImode, 1023+31)); + emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3])); + emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32))); + emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3])); + emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4])); + emit_insn (gen_andc_v4si (ops[6],ops[6],ops[5])); + emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti)); + emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4))); + DONE; + }" + [(set_attr "length" "32")]) + +(define_expand "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:DI 1 "register_operand" "")))] + "" + { + rtx c0 = gen_reg_rtx (DImode); + rtx r0 = gen_reg_rtx (DImode); + rtx r1 = gen_reg_rtx (DFmode); + rtx r2 = gen_reg_rtx (DImode); + rtx setneg = gen_reg_rtx (DImode); + rtx isneg = gen_reg_rtx (SImode); + rtx neg = gen_reg_rtx (DImode); + rtx mask = gen_reg_rtx (DImode); + + emit_move_insn (c0, GEN_INT (0x8000000000000000ull)); + + emit_insn (gen_negdi2 (neg, operands[1])); + emit_insn (gen_cgt_di_m1 (isneg, operands[1])); + emit_insn (gen_extend_compare (mask, isneg)); + emit_insn (gen_selb (r0, neg, operands[1], mask)); + emit_insn (gen_andc_di (setneg, c0, mask)); + + emit_insn (gen_floatunsdidf2 (r1, r0)); + + emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg)); + emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0)); + DONE; + }) + +(define_expand "floatunsdidf2" + [(set (match_operand:DF 0 "register_operand" "=r") + (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))] + "" + "{ + rtx value, insns; + rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, + 0x06071415, 0x16178080); + rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0); + rtx r0 = gen_reg_rtx (V16QImode); + rtx r1 = gen_reg_rtx (V4SImode); + + if (optimize_size) + { + start_sequence (); + value = + emit_library_call_value (convert_optab_libfunc (ufloat_optab, + DFmode, DImode), + NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], DImode); + insns = get_insns (); + end_sequence (); + emit_libcall_block (insns, operands[0], value, + gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1])); + } + else + { + emit_move_insn (r1, c1); + emit_move_insn (r0, c0); + emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1)); + } + DONE; + }") + +(define_insn_and_split "floatunsdidf2_internal" + [(set (match_operand:DF 0 "register_operand" "=r") + (unsigned_float:DF (match_operand:DI 1 "register_operand" "r"))) + (use (match_operand:V16QI 2 "register_operand" "r")) + (use (match_operand:V4SI 3 "register_operand" "r")) + (clobber (match_scratch:V4SI 4 "=&r")) + (clobber (match_scratch:V4SI 5 "=&r")) + (clobber (match_scratch:V4SI 6 "=&r"))] + "" + "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5" + "reload_completed" + [(set (match_operand:DF 0 "register_operand" "=r") + (unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))] + "{ + rtx *ops = operands; + rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1])); + rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2])); + rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4])); + rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5])); + rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4])); + rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5])); + emit_insn (gen_clzv4si2 (ops[4],op1_v4si)); + emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4])); + emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32))); + emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4])); + emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5])); + emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6])); + emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti)); + emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4))); + emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8))); + emit_insn (gen_adddf3 (ops[0],op4_df,op5_df)); + DONE; + }" + [(set_attr "length" "40")]) + + +;; add + +(define_expand "addv16qi3" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r") + (match_operand:V16QI 2 "spu_reg_operand" "r")))] + "" + "{ + rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0); + rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0); + rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0); + rtx rhs_and = gen_reg_rtx (V8HImode); + rtx hi_char = gen_reg_rtx (V8HImode); + rtx lo_char = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V8HImode); + + emit_move_insn (mask, spu_const (V8HImode, 0x00ff)); + emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00))); + emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and)); + emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short)); + emit_insn (gen_selb (res_short, hi_char, lo_char, mask)); + DONE; + }") + +(define_insn "add3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (match_operand:VHSI 2 "spu_arith_operand" "r,B")))] + "" + "@ + a\t%0,%1,%2 + ai\t%0,%1,%2") + +(define_expand "add3" + [(set (match_dup:VDI 3) + (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "") + (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG)) + (set (match_dup:VDI 5) + (unspec:VDI [(match_dup 3) + (match_dup 3) + (match_dup:TI 4)] UNSPEC_SHUFB)) + (set (match_operand:VDI 0 "spu_reg_operand" "") + (unspec:VDI [(match_dup 1) + (match_dup 2) + (match_dup 5)] UNSPEC_ADDX))] + "" + { + unsigned char pat[16] = { + 0x04, 0x05, 0x06, 0x07, + 0x80, 0x80, 0x80, 0x80, + 0x0c, 0x0d, 0x0e, 0x0f, + 0x80, 0x80, 0x80, 0x80 + }; + operands[3] = gen_reg_rtx (mode); + operands[4] = gen_reg_rtx (TImode); + operands[5] = gen_reg_rtx (mode); + emit_move_insn (operands[4], array_to_constant (TImode, pat)); + }) + +(define_insn "cg_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))] + "operands" + "cg\t%0,%1,%2") + +(define_insn "cgx_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))] + "operands" + "cgx\t%0,%1,%2") + +(define_insn "addx_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))] + "operands" + "addx\t%0,%1,%2") + + +;; This is not the most efficient implementation of addti3. +;; We include this here because 1) the compiler needs it to be +;; defined as the word size is 128-bit and 2) sometimes gcc +;; substitutes an add for a constant left-shift. 2) is unlikely +;; because we also give addti3 a high cost. In case gcc does +;; generate TImode add, here is the code to do it. +;; operand 2 is a nonmemory because the compiler requires it. +(define_insn "addti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=&r") + (plus:TI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:TI 2 "spu_nonmem_operand" "r"))) + (clobber (match_scratch:TI 3 "=&r"))] + "" + "cg\t%3,%1,%2\n\\ + shlqbyi\t%3,%3,4\n\\ + cgx\t%3,%1,%2\n\\ + shlqbyi\t%3,%3,4\n\\ + cgx\t%3,%1,%2\n\\ + shlqbyi\t%0,%3,4\n\\ + addx\t%0,%1,%2" + [(set_attr "type" "multi0") + (set_attr "length" "28")]) + +(define_insn "add3" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")))] + "" + "fa\t%0,%1,%2" + [(set_attr "type" "fp6")]) + +(define_insn "add3" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "" + "dfa\t%0,%1,%2" + [(set_attr "type" "fpd")]) + + +;; sub + +(define_expand "subv16qi3" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r") + (match_operand:V16QI 2 "spu_reg_operand" "r")))] + "" + "{ + rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0); + rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0); + rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0); + rtx rhs_and = gen_reg_rtx (V8HImode); + rtx hi_char = gen_reg_rtx (V8HImode); + rtx lo_char = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (V8HImode); + + emit_move_insn (mask, spu_const (V8HImode, 0x00ff)); + emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00))); + emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and)); + emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short)); + emit_insn (gen_selb (res_short, hi_char, lo_char, mask)); + DONE; + }") + +(define_insn "sub3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B") + (match_operand:VHSI 2 "spu_reg_operand" "r,r")))] + "" + "@ + sf\t%0,%2,%1 + sfi\t%0,%2,%1") + +(define_expand "sub3" + [(set (match_dup:VDI 3) + (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "") + (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG)) + (set (match_dup:VDI 5) + (unspec:VDI [(match_dup 3) + (match_dup 3) + (match_dup:TI 4)] UNSPEC_SHUFB)) + (set (match_operand:VDI 0 "spu_reg_operand" "") + (unspec:VDI [(match_dup 1) + (match_dup 2) + (match_dup 5)] UNSPEC_SFX))] + "" + { + unsigned char pat[16] = { + 0x04, 0x05, 0x06, 0x07, + 0xc0, 0xc0, 0xc0, 0xc0, + 0x0c, 0x0d, 0x0e, 0x0f, + 0xc0, 0xc0, 0xc0, 0xc0 + }; + operands[3] = gen_reg_rtx (mode); + operands[4] = gen_reg_rtx (TImode); + operands[5] = gen_reg_rtx (mode); + emit_move_insn (operands[4], array_to_constant (TImode, pat)); + }) + +(define_insn "bg_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))] + "operands" + "bg\t%0,%2,%1") + +(define_insn "bgx_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))] + "operands" + "bgx\t%0,%2,%1") + +(define_insn "sfx_" + [(set (match_operand:CBOP 0 "spu_reg_operand" "=r") + (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))] + "operands" + "sfx\t%0,%2,%1") + +(define_insn "subti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (minus:TI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:TI 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:TI 3 "=&r")) + (clobber (match_scratch:TI 4 "=&r")) + (clobber (match_scratch:TI 5 "=&r")) + (clobber (match_scratch:TI 6 "=&r"))] + "" + "il\t%6,1\n\\ + bg\t%3,%2,%1\n\\ + xor\t%3,%3,%6\n\\ + sf\t%4,%2,%1\n\\ + shlqbyi\t%5,%3,4\n\\ + bg\t%3,%5,%4\n\\ + xor\t%3,%3,%6\n\\ + sf\t%4,%5,%4\n\\ + shlqbyi\t%5,%3,4\n\\ + bg\t%3,%5,%4\n\\ + xor\t%3,%3,%6\n\\ + sf\t%4,%5,%4\n\\ + shlqbyi\t%5,%3,4\n\\ + sf\t%0,%5,%4" + [(set_attr "type" "multi0") + (set_attr "length" "56")]) + +(define_insn "sub3" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")))] + "" + "fs\t%0,%1,%2" + [(set_attr "type" "fp6")]) + +(define_insn "sub3" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "" + "dfs\t%0,%1,%2" + [(set_attr "type" "fpd")]) + + +;; neg + +(define_expand "negv16qi2" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))] + "" + "{ + rtx zero = gen_reg_rtx (V16QImode); + emit_move_insn (zero, CONST0_RTX (V16QImode)); + emit_insn (gen_subv16qi3 (operands[0], zero, operands[1])); + DONE; + }") + +(define_insn "neg2" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r") + (neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))] + "" + "sfi\t%0,%1,0") + +(define_expand "negdi2" + [(set (match_operand:DI 0 "spu_reg_operand" "") + (neg:DI (match_operand:DI 1 "spu_reg_operand" "")))] + "" + { + rtx zero = gen_reg_rtx(DImode); + emit_move_insn(zero, GEN_INT(0)); + emit_insn (gen_subdi3(operands[0], zero, operands[1])); + DONE; + }) + +(define_expand "negti2" + [(set (match_operand:TI 0 "spu_reg_operand" "") + (neg:TI (match_operand:TI 1 "spu_reg_operand" "")))] + "" + { + rtx zero = gen_reg_rtx(TImode); + emit_move_insn(zero, GEN_INT(0)); + emit_insn (gen_subti3(operands[0], zero, operands[1])); + DONE; + }) + +(define_expand "neg2" + [(parallel + [(set (match_operand:VSF 0 "spu_reg_operand" "") + (neg:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) + (use (match_dup 2))])] + "" + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, -0x80000000ull));") + +(define_expand "neg2" + [(parallel + [(set (match_operand:VDF 0 "spu_reg_operand" "") + (neg:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) + (use (match_dup 2))])] + "" + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, -0x8000000000000000ull));") + +(define_insn_and_split "_neg2" + [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") + (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) + (use (match_operand: 2 "spu_reg_operand" "r"))] + "" + "#" + "" + [(set (match_dup: 3) + (xor: (match_dup: 4) + (match_dup: 2)))] + { + operands[3] = spu_gen_subreg (mode, operands[0]); + operands[4] = spu_gen_subreg (mode, operands[1]); + }) + + +;; abs + +(define_expand "abs2" + [(parallel + [(set (match_operand:VSF 0 "spu_reg_operand" "") + (abs:VSF (match_operand:VSF 1 "spu_reg_operand" ""))) + (use (match_dup 2))])] + "" + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, 0x7fffffffull));") + +(define_expand "abs2" + [(parallel + [(set (match_operand:VDF 0 "spu_reg_operand" "") + (abs:VDF (match_operand:VDF 1 "spu_reg_operand" ""))) + (use (match_dup 2))])] + "" + "operands[2] = gen_reg_rtx (mode); + emit_move_insn (operands[2], spu_const (mode, 0x7fffffffffffffffull));") + +(define_insn_and_split "_abs2" + [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") + (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r"))) + (use (match_operand: 2 "spu_reg_operand" "r"))] + "" + "#" + "" + [(set (match_dup: 3) + (and: (match_dup: 4) + (match_dup: 2)))] + { + operands[3] = spu_gen_subreg (mode, operands[0]); + operands[4] = spu_gen_subreg (mode, operands[1]); + }) + + +;; mul + +(define_insn "mulhi3" + [(set (match_operand:HI 0 "spu_reg_operand" "=r,r") + (mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r") + (match_operand:HI 2 "spu_arith_operand" "r,B")))] + "" + "@ + mpy\t%0,%1,%2 + mpyi\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_expand "mulv8hi3" + [(set (match_operand:V8HI 0 "spu_reg_operand" "") + (mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "") + (match_operand:V8HI 2 "spu_reg_operand" "")))] + "" + "{ + rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); + rtx low = gen_reg_rtx (V4SImode); + rtx high = gen_reg_rtx (V4SImode); + rtx shift = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (V4SImode); + + emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff)); + emit_insn (gen_spu_mpyhh (high, operands[1], operands[2])); + emit_insn (gen_spu_mpy (low, operands[1], operands[2])); + emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16))); + emit_insn (gen_selb (result, shift, low, mask)); + DONE; + }") + +(define_expand "mul3" + [(parallel + [(set (match_operand:VSI 0 "spu_reg_operand" "") + (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "") + (match_operand:VSI 2 "spu_reg_operand" ""))) + (clobber (match_dup:VSI 3)) + (clobber (match_dup:VSI 4)) + (clobber (match_dup:VSI 5)) + (clobber (match_dup:VSI 6))])] + "" + { + operands[3] = gen_reg_rtx(mode); + operands[4] = gen_reg_rtx(mode); + operands[5] = gen_reg_rtx(mode); + operands[6] = gen_reg_rtx(mode); + }) + +(define_insn_and_split "_mulsi3" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (match_operand:SI 1 "spu_reg_operand" "r") + (match_operand:SI 2 "spu_arith_operand" "rK"))) + (clobber (match_operand:SI 3 "spu_reg_operand" "=&r")) + (clobber (match_operand:SI 4 "spu_reg_operand" "=&r")) + (clobber (match_operand:SI 5 "spu_reg_operand" "=&r")) + (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))] + "" + "#" + "" + [(set (match_dup:SI 0) + (mult:SI (match_dup:SI 1) + (match_dup:SI 2)))] + { + HOST_WIDE_INT val = 0; + rtx a = operands[3]; + rtx b = operands[4]; + rtx c = operands[5]; + rtx d = operands[6]; + if (GET_CODE(operands[2]) == CONST_INT) + { + val = INTVAL(operands[2]); + emit_move_insn(d, operands[2]); + operands[2] = d; + } + if (val && (val & 0xffff) == 0) + { + emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1])); + } + else if (val > 0 && val < 0x10000) + { + rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d; + emit_insn (gen_mpyh_si(a, operands[1], operands[2])); + emit_insn (gen_mpyu_si(c, operands[1], cst)); + emit_insn (gen_addsi3(operands[0], a, c)); + } + else + { + emit_insn (gen_mpyh_si(a, operands[1], operands[2])); + emit_insn (gen_mpyh_si(b, operands[2], operands[1])); + emit_insn (gen_mpyu_si(c, operands[1], operands[2])); + emit_insn (gen_addsi3(d, a, b)); + emit_insn (gen_addsi3(operands[0], d, c)); + } + DONE; + }) + +(define_insn_and_split "_mulv4si3" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r") + (match_operand:V4SI 2 "spu_reg_operand" "r"))) + (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r")) + (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r")) + (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r")) + (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))] + "" + "#" + "" + [(set (match_dup:V4SI 0) + (mult:V4SI (match_dup:V4SI 1) + (match_dup:V4SI 2)))] + { + rtx a = operands[3]; + rtx b = operands[4]; + rtx c = operands[5]; + rtx d = operands[6]; + rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0); + rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0); + emit_insn (gen_spu_mpyh(a, op1, op2)); + emit_insn (gen_spu_mpyh(b, op2, op1)); + emit_insn (gen_spu_mpyu(c, op1, op2)); + emit_insn (gen_addv4si3(d, a, b)); + emit_insn (gen_addv4si3(operands[0], d, c)); + DONE; + }) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))] + "" + "mpy\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mulhisi3_imm" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (match_operand:SI 2 "imm_K_operand" "K")))] + "" + "mpyi\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))] + "" + "mpyu\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "umulhisi3_imm" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))] + "" + "mpyui\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mpyu_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r,r") + (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r") + (const_int 65535)) + (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K") + (const_int 65535))))] + "" + "@ + mpyu\t%0,%1,%2 + mpyui\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +;; This isn't always profitable to use. Consider r = a * b + c * d. +;; It's faster to do the multiplies in parallel then add them. If we +;; merge a multiply and add it prevents the multiplies from happening in +;; parallel. +(define_insn "mpya_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))) + (match_operand:SI 3 "spu_reg_operand" "r")))] + "0" + "mpya\t%0,%1,%2,%3" + [(set_attr "type" "fp7")]) + +(define_insn "mpyh_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r") + (const_int -65536)) + (and:SI (match_operand:SI 2 "spu_reg_operand" "r") + (const_int 65535))))] + "" + "mpyh\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mpys_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (ashiftrt:SI + (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")) + (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))) + (const_int 16)))] + "" + "mpys\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mpyhh_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") + (const_int 16)) + (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") + (const_int 16))))] + "" + "mpyhh\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mpyhhu_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") + (const_int 16))))] + "" + "mpyhhu\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mpyhha_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r") + (const_int 16)) + (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r") + (const_int 16))) + (match_operand:SI 3 "spu_reg_operand" "0")))] + "0" + "mpyhha\t%0,%1,%2" + [(set_attr "type" "fp7")]) + +(define_insn "mul3" + [(set (match_operand:VSDF 0 "spu_reg_operand" "=r") + (mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r") + (match_operand:VSDF 2 "spu_reg_operand" "r")))] + "" + "fm\t%0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "fma_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")) + (match_operand:VSF 3 "spu_reg_operand" "r")))] + "" + "fma\t%0,%1,%2,%3" + [(set_attr "type" "fp6")]) + +(define_insn "fnms_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r") + (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r"))))] + "" + "fnms\t%0,%1,%2,%3" + [(set_attr "type" "fp6")]) + +(define_insn "fms_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")) + (match_operand:VSF 3 "spu_reg_operand" "r")))] + "" + "fms\t%0,%1,%2,%3" + [(set_attr "type" "fp6")]) + +(define_insn "fma_" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")) + (match_operand:VDF 3 "spu_reg_operand" "0")))] + "" + "dfma\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "fnma_" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")) + (match_operand:VDF 3 "spu_reg_operand" "0"))))] + "" + "dfnma\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "fnms_" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0") + (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r"))))] + "" + "dfnms\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "fms_" + [(set (match_operand:VDF 0 "spu_reg_operand" "=r") + (minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")) + (match_operand:VDF 3 "spu_reg_operand" "0")))] + "" + "dfms\t%0,%1,%2" + [(set_attr "type" "fpd")]) + + +;; mul highpart, used for divide by constant optimizations. + +(define_expand "smulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (ashiftrt:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "register_operand" ""))) + (const_int 32))))] + "" + { + rtx t0 = gen_reg_rtx (SImode); + rtx t1 = gen_reg_rtx (SImode); + rtx t2 = gen_reg_rtx (SImode); + rtx t3 = gen_reg_rtx (SImode); + rtx t4 = gen_reg_rtx (SImode); + rtx t5 = gen_reg_rtx (SImode); + rtx t6 = gen_reg_rtx (SImode); + rtx t7 = gen_reg_rtx (SImode); + rtx t8 = gen_reg_rtx (SImode); + rtx t9 = gen_reg_rtx (SImode); + rtx t11 = gen_reg_rtx (SImode); + rtx t12 = gen_reg_rtx (SImode); + rtx t14 = gen_reg_rtx (SImode); + rtx t15 = gen_reg_rtx (HImode); + rtx t16 = gen_reg_rtx (HImode); + rtx t17 = gen_reg_rtx (HImode); + rtx t18 = gen_reg_rtx (HImode); + rtx t19 = gen_reg_rtx (SImode); + rtx t20 = gen_reg_rtx (SImode); + rtx t21 = gen_reg_rtx (SImode); + rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2); + rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2); + rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2); + rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2); + + rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16))); + emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16))); + emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi)); + emit_insn (gen_mpyh_si (t3, operands[1], operands[2])); + emit_insn (gen_mpyh_si (t4, operands[2], operands[1])); + emit_insn (gen_mpyhh_si (t5, operands[1], operands[2])); + emit_insn (gen_mpys_si (t6, t0_hi, op2_hi)); + emit_insn (gen_mpys_si (t7, t1_hi, op1_hi)); + + /* Gen carry bits (in t9 and t11). */ + emit_insn (gen_addsi3 (t8, t2, t3)); + emit_insn (gen_cg_si (t9, t2, t3)); + emit_insn (gen_cg_si (t11, t8, t4)); + + /* Gen high 32 bits in operand[0]. Correct for mpys. */ + emit_insn (gen_addx_si (t12, t5, t6, t9)); + emit_insn (gen_addx_si (t14, t12, t7, t11)); + + /* mpys treats both operands as signed when we really want it to treat + the first operand as signed and the second operand as unsigned. + The code below corrects for that difference. */ + emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1))); + emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1))); + emit_insn (gen_andc_hi (t17, t1_hi, t15)); + emit_insn (gen_andc_hi (t18, t0_hi, t16)); + emit_insn (gen_extendhisi2 (t19, t17)); + emit_insn (gen_extendhisi2 (t20, t18)); + emit_insn (gen_addsi3 (t21, t19, t20)); + emit_insn (gen_addsi3 (operands[0], t14, t21)); + unshare_all_rtl_in_chain (insn); + DONE; + }) + +(define_expand "umulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (ashiftrt:DI + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "register_operand" ""))) + (const_int 32))))] + "" + + { + rtx t0 = gen_reg_rtx (SImode); + rtx t1 = gen_reg_rtx (SImode); + rtx t2 = gen_reg_rtx (SImode); + rtx t3 = gen_reg_rtx (SImode); + rtx t4 = gen_reg_rtx (SImode); + rtx t5 = gen_reg_rtx (SImode); + rtx t6 = gen_reg_rtx (SImode); + rtx t7 = gen_reg_rtx (SImode); + rtx t8 = gen_reg_rtx (SImode); + rtx t9 = gen_reg_rtx (SImode); + rtx t10 = gen_reg_rtx (SImode); + rtx t12 = gen_reg_rtx (SImode); + rtx t13 = gen_reg_rtx (SImode); + rtx t14 = gen_reg_rtx (SImode); + rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2); + rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2); + rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2); + + rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16))); + emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi)); + emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi)); + emit_insn (gen_mpyhhu_si (t3, operands[1], t0)); + emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2])); + emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16))); + emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16))); + emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16))); + emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16))); + + /* Gen carry bits (in t10 and t12). */ + emit_insn (gen_addsi3 (t9, t1, t5)); + emit_insn (gen_cg_si (t10, t1, t5)); + emit_insn (gen_cg_si (t12, t9, t6)); + + /* Gen high 32 bits in operand[0]. */ + emit_insn (gen_addx_si (t13, t4, t7, t10)); + emit_insn (gen_addx_si (t14, t13, t8, t12)); + emit_insn (gen_movsi (operands[0], t14)); + unshare_all_rtl_in_chain (insn); + + DONE; + }) + +;; div + +;; Not necessarily the best implementation of divide but faster then +;; the default that gcc provides because this is inlined and it uses +;; clz. +(define_insn "divmodsi4" + [(set (match_operand:SI 0 "spu_reg_operand" "=&r") + (div:SI (match_operand:SI 1 "spu_reg_operand" "r") + (match_operand:SI 2 "spu_reg_operand" "r"))) + (set (match_operand:SI 3 "spu_reg_operand" "=&r") + (mod:SI (match_dup 1) + (match_dup 2))) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r")) + (clobber (match_scratch:SI 6 "=&r")) + (clobber (match_scratch:SI 7 "=&r")) + (clobber (match_scratch:SI 8 "=&r")) + (clobber (match_scratch:SI 9 "=&r")) + (clobber (match_scratch:SI 10 "=&r")) + (clobber (match_scratch:SI 11 "=&r")) + (clobber (match_scratch:SI 12 "=&r")) + (clobber (reg:SI 130))] + "" + "heqi %2,0\\n\\ + hbrr 3f,1f\\n\\ + sfi %8,%1,0\\n\\ + sfi %9,%2,0\\n\\ + cgti %10,%1,-1\\n\\ + cgti %11,%2,-1\\n\\ + selb %8,%8,%1,%10\\n\\ + selb %9,%9,%2,%11\\n\\ + clz %4,%8\\n\\ + clz %7,%9\\n\\ + il %5,1\\n\\ + fsmbi %0,0\\n\\ + sf %7,%4,%7\\n\\ + shlqbyi %3,%8,0\\n\\ + xor %11,%10,%11\\n\\ + shl %5,%5,%7\\n\\ + shl %4,%9,%7\\n\\ + lnop \\n\\ +1: or %12,%0,%5\\n\\ + rotqmbii %5,%5,-1\\n\\ + clgt %6,%4,%3\\n\\ + lnop \\n\\ + sf %7,%4,%3\\n\\ + rotqmbii %4,%4,-1\\n\\ + selb %0,%12,%0,%6\\n\\ + lnop \\n\\ + selb %3,%7,%3,%6\\n\\ +3: brnz %5,1b\\n\\ +2: sfi %8,%3,0\\n\\ + sfi %9,%0,0\\n\\ + selb %3,%8,%3,%10\\n\\ + selb %0,%0,%9,%11" + [(set_attr "type" "multi0") + (set_attr "length" "128")]) + +(define_insn "udivmodsi4" + [(set (match_operand:SI 0 "spu_reg_operand" "=&r") + (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r") + (match_operand:SI 2 "spu_reg_operand" "r"))) + (set (match_operand:SI 3 "spu_reg_operand" "=&r") + (umod:SI (match_dup 1) + (match_dup 2))) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r")) + (clobber (match_scratch:SI 6 "=&r")) + (clobber (match_scratch:SI 7 "=&r")) + (clobber (match_scratch:SI 8 "=&r")) + (clobber (reg:SI 130))] + "" + "heqi %2,0\\n\\ + hbrr 3f,1f\\n\\ + clz %7,%2\\n\\ + clz %4,%1\\n\\ + il %5,1\\n\\ + fsmbi %0,0\\n\\ + sf %7,%4,%7\\n\\ + ori %3,%1,0\\n\\ + shl %5,%5,%7\\n\\ + shl %4,%2,%7\\n\\ +1: or %8,%0,%5\\n\\ + rotqmbii %5,%5,-1\\n\\ + clgt %6,%4,%3\\n\\ + lnop \\n\\ + sf %7,%4,%3\\n\\ + rotqmbii %4,%4,-1\\n\\ + selb %0,%8,%0,%6\\n\\ + lnop \\n\\ + selb %3,%7,%3,%6\\n\\ +3: brnz %5,1b\\n\\ +2:" + [(set_attr "type" "multi0") + (set_attr "length" "80")]) + +(define_expand "div3" + [(parallel + [(set (match_operand:VSF 0 "spu_reg_operand" "") + (div:VSF (match_operand:VSF 1 "spu_reg_operand" "") + (match_operand:VSF 2 "spu_reg_operand" ""))) + (clobber (match_scratch:VSF 3 "")) + (clobber (match_scratch:VSF 4 "")) + (clobber (match_scratch:VSF 5 ""))])] + "" + "") + +(define_insn_and_split "*div3_fast" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:VSF 3 "=&r")) + (clobber (match_scratch:VSF 4 "=&r")) + (clobber (scratch:VSF))] + "flag_unsafe_math_optimizations" + "#" + "reload_completed" + [(set (match_dup:VSF 0) + (div:VSF (match_dup:VSF 1) + (match_dup:VSF 2))) + (clobber (match_dup:VSF 3)) + (clobber (match_dup:VSF 4)) + (clobber (scratch:VSF))] + { + emit_insn (gen_frest_(operands[3], operands[2])); + emit_insn (gen_fi_(operands[3], operands[2], operands[3])); + emit_insn (gen_mul3(operands[4], operands[1], operands[3])); + emit_insn (gen_fnms_(operands[0], operands[4], operands[2], operands[1])); + emit_insn (gen_fma_(operands[0], operands[0], operands[3], operands[4])); + DONE; + }) + +(define_insn_and_split "*div3_adjusted" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:VSF 3 "=&r")) + (clobber (match_scratch:VSF 4 "=&r")) + (clobber (match_scratch:VSF 5 "=&r"))] + "!flag_unsafe_math_optimizations" + "#" + "reload_completed" + [(set (match_dup:VSF 0) + (div:VSF (match_dup:VSF 1) + (match_dup:VSF 2))) + (clobber (match_dup:VSF 3)) + (clobber (match_dup:VSF 4)) + (clobber (match_dup:VSF 5))] + { + emit_insn (gen_frest_ (operands[3], operands[2])); + emit_insn (gen_fi_ (operands[3], operands[2], operands[3])); + emit_insn (gen_mul3 (operands[4], operands[1], operands[3])); + emit_insn (gen_fnms_ (operands[5], operands[4], operands[2], operands[1])); + emit_insn (gen_fma_ (operands[3], operands[5], operands[3], operands[4])); + + /* Due to truncation error, the quotient result may be low by 1 ulp. + Conditionally add one if the estimate is too small in magnitude. */ + + emit_move_insn (gen_lowpart (mode, operands[4]), + spu_const (mode, 0x80000000ULL)); + emit_move_insn (gen_lowpart (mode, operands[5]), + spu_const (mode, 0x3f800000ULL)); + emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4])); + + emit_insn (gen_add3 (gen_lowpart (mode, operands[4]), + gen_lowpart (mode, operands[3]), + spu_const (mode, 1))); + emit_insn (gen_fnms_ (operands[0], operands[2], operands[4], operands[1])); + emit_insn (gen_mul3 (operands[0], operands[0], operands[5])); + emit_insn (gen_cgt_ (gen_lowpart (mode, operands[0]), + gen_lowpart (mode, operands[0]), + spu_const (mode, -1))); + emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0])); + DONE; + }) + + +;; sqrt + +(define_insn_and_split "sqrtsf2" + [(set (match_operand:SF 0 "spu_reg_operand" "=r") + (sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r"))) + (clobber (match_scratch:SF 2 "=&r")) + (clobber (match_scratch:SF 3 "=&r")) + (clobber (match_scratch:SF 4 "=&r")) + (clobber (match_scratch:SF 5 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup:SF 0) + (sqrt:SF (match_dup:SF 1))) + (clobber (match_dup:SF 2)) + (clobber (match_dup:SF 3)) + (clobber (match_dup:SF 4)) + (clobber (match_dup:SF 5))] + { + emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode)); + emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode)); + emit_insn (gen_frsqest_sf(operands[2],operands[1])); + emit_insn (gen_fi_sf(operands[2],operands[1],operands[2])); + emit_insn (gen_mulsf3(operands[5],operands[2],operands[1])); + emit_insn (gen_mulsf3(operands[3],operands[5],operands[3])); + emit_insn (gen_fnms_sf(operands[4],operands[2],operands[5],operands[4])); + emit_insn (gen_fma_sf(operands[0],operands[4],operands[3],operands[5])); + DONE; + }) + +(define_insn "frest_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))] + "" + "frest\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "frsqest_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))] + "" + "frsqest\t%0,%1" + [(set_attr "type" "shuf")]) + +(define_insn "fi_" + [(set (match_operand:VSF 0 "spu_reg_operand" "=r") + (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))] + "" + "fi\t%0,%1,%2" + [(set_attr "type" "fp7")]) + + +;; and + +(define_insn "and3" + [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r") + (and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r") + (match_operand:MOV 2 "spu_logical_operand" "r,C")))] + "" + "@ + and\t%0,%1,%2 + and%j2i\t%0,%1,%J2") + +(define_insn "anddi3" + [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") + (and:DI (match_operand:DI 1 "spu_reg_operand" "r,r") + (match_operand:DI 2 "spu_logical_operand" "r,c")))] + "" + "@ + and\t%0,%1,%2 + and%k2i\t%0,%1,%K2") + +(define_insn "andti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (and:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (match_operand:TI 2 "spu_logical_operand" "r,Y")))] + "" + "@ + and\t%0,%1,%2 + and%m2i\t%0,%1,%L2") + +(define_insn "andc_" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r")) + (match_operand:ALL 1 "spu_reg_operand" "r")))] + "" + "andc\t%0,%1,%2") + +(define_insn "nand_" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r") + (match_operand:ALL 1 "spu_reg_operand" "r"))))] + "" + "nand\t%0,%1,%2") + + +;; ior + +(define_insn "ior3" + [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r") + (ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0") + (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))] + "" + "@ + or\t%0,%1,%2 + or%j2i\t%0,%1,%J2 + iohl\t%0,%J2") + +(define_insn "iordi3" + [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r") + (ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0") + (match_operand:DI 2 "spu_ior_operand" "r,c,d")))] + "" + "@ + or\t%0,%1,%2 + or%k2i\t%0,%1,%K2 + iohl\t%0,%K2") + +(define_insn "iorti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r") + (ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0") + (match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))] + "" + "@ + or\t%0,%1,%2 + or%m2i\t%0,%1,%L2 + iohl\t%0,%L2") + +(define_insn "orc_" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r")) + (match_operand:ALL 1 "spu_reg_operand" "r")))] + "" + "orc\t%0,%1,%2") + +(define_insn "nor_" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r") + (match_operand:ALL 2 "spu_reg_operand" "r"))))] + "" + "nor\t%0,%1,%2") + +;; xor + +(define_insn "xor3" + [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r") + (xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r") + (match_operand:MOV 2 "spu_logical_operand" "r,B")))] + "" + "@ + xor\t%0,%1,%2 + xor%j2i\t%0,%1,%J2") + +(define_insn "xordi3" + [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") + (xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r") + (match_operand:DI 2 "spu_logical_operand" "r,c")))] + "" + "@ + xor\t%0,%1,%2 + xor%k2i\t%0,%1,%K2") + +(define_insn "xorti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (match_operand:TI 2 "spu_logical_operand" "r,Y")))] + "" + "@ + xor\t%0,%1,%2 + xor%m2i\t%0,%1,%L2") + +(define_insn "eqv_" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r") + (match_operand:ALL 2 "spu_reg_operand" "r"))))] + "" + "eqv\t%0,%1,%2") + +;; one_cmpl + +(define_insn "one_cmpl2" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))] + "" + "nor\t%0,%1,%1") + + +;; selb + +(define_expand "selb" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "") + (match_operand 2 "spu_reg_operand" "") + (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))] + "" + { + rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]); + PUT_MODE (SET_SRC (s), GET_MODE (operands[0])); + emit_insn (s); + DONE; + }) + +;; This could be defined as a combination of logical operations, but at +;; one time it caused a crash due to recursive expansion of rtl during CSE. +(define_insn "_selb" + [(set (match_operand 0 "spu_reg_operand" "=r") + (unspec [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))] + "GET_MODE(operands[0]) == GET_MODE(operands[1]) + && GET_MODE(operands[1]) == GET_MODE(operands[2])" + "selb\t%0,%1,%2,%3") + + +;; Misc. byte/bit operations +;; clz/ctz/ffs/popcount/parity +;; cntb/sumb + +(define_insn "clz2" + [(set (match_operand:VSI 0 "spu_reg_operand" "=r") + (clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))] + "" + "clz\t%0,%1") + +(define_expand "ctz2" + [(set (match_dup 2) + (neg:VSI (match_operand:VSI 1 "spu_reg_operand" ""))) + (set (match_dup 3) (and:VSI (match_dup 1) + (match_dup 2))) + (set (match_dup 4) (clz:VSI (match_dup 3))) + (set (match_operand:VSI 0 "spu_reg_operand" "") + (minus:VSI (match_dup 5) (match_dup 4)))] + "" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = gen_reg_rtx (mode); + operands[4] = gen_reg_rtx (mode); + operands[5] = spu_const(mode, 31); + }) + +(define_expand "ffs2" + [(set (match_dup 2) + (neg:VSI (match_operand:VSI 1 "spu_reg_operand" ""))) + (set (match_dup 3) (and:VSI (match_dup 1) + (match_dup 2))) + (set (match_dup 4) (clz:VSI (match_dup 3))) + (set (match_operand:VSI 0 "spu_reg_operand" "") + (minus:VSI (match_dup 5) (match_dup 4)))] + "" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = gen_reg_rtx (mode); + operands[4] = gen_reg_rtx (mode); + operands[5] = spu_const(mode, 32); + }) + +(define_expand "popcountsi2" + [(set (match_dup 2) + (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")] + UNSPEC_CNTB)) + (set (match_dup 3) + (unspec:HI [(match_dup 2)] UNSPEC_SUMB)) + (set (match_operand:SI 0 "spu_reg_operand" "") + (sign_extend:SI (match_dup 3)))] + "" + { + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (HImode); + }) + +(define_expand "paritysi2" + [(set (match_operand:SI 0 "spu_reg_operand" "") + (parity:SI (match_operand:SI 1 "spu_reg_operand" "")))] + "" + { + operands[2] = gen_reg_rtx (SImode); + emit_insn (gen_popcountsi2(operands[2], operands[1])); + emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1))); + DONE; + }) + +(define_insn "cntb_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")] + UNSPEC_CNTB))] + "" + "cntb\t%0,%1" + [(set_attr "type" "fxb")]) + +(define_insn "cntb_v16qi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")] + UNSPEC_CNTB))] + "" + "cntb\t%0,%1" + [(set_attr "type" "fxb")]) + +(define_insn "sumb_si" + [(set (match_operand:HI 0 "spu_reg_operand" "=r") + (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))] + "" + "sumb\t%0,%1,%1" + [(set_attr "type" "fxb")]) + + +;; ashl, vashl + +(define_insn "ashl3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))] + "" + "@ + shl\t%0,%1,%2 + shli\t%0,%1,%2" + [(set_attr "type" "fx3")]) + +(define_insn_and_split "ashldi3" + [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,I"))) + (clobber (match_scratch:SI 3 "=&r,X"))] + "" + "#" + "reload_completed" + [(set (match_dup:DI 0) + (ashift:DI (match_dup:DI 1) + (match_dup:SI 2)))] + { + rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0])); + rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); + rtx op2 = operands[2]; + rtx op3 = operands[3]; + + if (GET_CODE (operands[2]) == REG) + { + emit_insn (gen_addsi3 (op3, op2, GEN_INT (64))); + emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64))); + emit_insn (gen_shlqbybi_ti (op0, op0, op3)); + emit_insn (gen_shlqbi_ti (op0, op0, op3)); + } + else + { + HOST_WIDE_INT val = INTVAL (operands[2]); + emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64))); + emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8))); + if (val % 8) + emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8))); + } + DONE; + }) + +(define_expand "ashlti3" + [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "") + (match_operand:SI 2 "spu_nonmem_operand" ""))) + (clobber (match_dup:TI 3))])] + "" + "if (GET_CODE (operands[2]) == CONST_INT) + { + emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2])); + DONE; + } + operands[3] = gen_reg_rtx (TImode);") + +(define_insn_and_split "ashlti3_imm" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "immediate_operand" "O,P")))] + "" + "@ + shlqbyi\t%0,%1,%h2 + shlqbii\t%0,%1,%e2" + "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])" + [(set (match_dup:TI 0) + (ashift:TI (match_dup:TI 1) + (match_dup:SI 3))) + (set (match_dup:TI 0) + (ashift:TI (match_dup:TI 0) + (match_dup:SI 4)))] + { + HOST_WIDE_INT val = INTVAL(operands[2]); + operands[3] = GEN_INT (val&7); + operands[4] = GEN_INT (val&-8); + } + [(set_attr "type" "shuf,shuf")]) + +(define_insn_and_split "ashlti3_reg" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:SI 2 "spu_reg_operand" "r"))) + (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))] + "" + "#" + "" + [(set (match_dup:TI 3) + (ashift:TI (match_dup:TI 1) + (and:SI (match_dup:SI 2) + (const_int 7)))) + (set (match_dup:TI 0) + (ashift:TI (match_dup:TI 3) + (and:SI (match_dup:SI 2) + (const_int -8))))] + "") + +(define_insn "shlqbybi_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int -8))))] + "" + "@ + shlqbybi\t%0,%1,%2 + shlqbyi\t%0,%1,%h2" + [(set_attr "type" "shuf,shuf")]) + +(define_insn "shlqbi_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int 7))))] + "" + "@ + shlqbi\t%0,%1,%2 + shlqbii\t%0,%1,%e2" + [(set_attr "type" "shuf,shuf")]) + +(define_insn "shlqby_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int 8))))] + "" + "@ + shlqby\t%0,%1,%2 + shlqbyi\t%0,%1,%f2" + [(set_attr "type" "shuf,shuf")]) + + +;; lshr, vlshr + +(define_insn_and_split "lshr3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))) + (clobber (match_scratch:VHSI 3 "=&r,X"))] + "" + "@ + # + rotmi\t%0,%1,-%2" + "reload_completed && GET_CODE (operands[2]) == REG" + [(set (match_dup:VHSI 3) + (neg:VHSI (match_dup:VHSI 2))) + (set (match_dup:VHSI 0) + (lshiftrt:VHSI (match_dup:VHSI 1) + (neg:VHSI (match_dup:VHSI 3))))] + "" + [(set_attr "type" "*,fx3")]) + +(define_insn "lshr3_imm" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r") + (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r") + (match_operand:VHSI 2 "immediate_operand" "W")))] + "" + "rotmi\t%0,%1,-%2" + [(set_attr "type" "fx3")]) + +(define_insn "rotm_" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))] + "" + "@ + rotm\t%0,%1,%2 + rotmi\t%0,%1,-%2" + [(set_attr "type" "fx3")]) + +(define_insn_and_split "lshr3" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))] + "" + "@ + # + rotqmbyi\t%0,%1,-%h2 + rotqmbii\t%0,%1,-%e2" + "REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))" + [(set (match_dup:DTI 3) + (lshiftrt:DTI (match_dup:DTI 1) + (match_dup:SI 4))) + (set (match_dup:DTI 0) + (lshiftrt:DTI (match_dup:DTI 3) + (match_dup:SI 5)))] + { + operands[3] = gen_reg_rtx (mode); + if (GET_CODE (operands[2]) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL(operands[2]); + operands[4] = GEN_INT (val & 7); + operands[5] = GEN_INT (val & -8); + } + else + { + rtx t0 = gen_reg_rtx (SImode); + rtx t1 = gen_reg_rtx (SImode); + emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2])); + emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2])); + operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7)); + operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8)); + } + } + [(set_attr "type" "*,shuf,shuf")]) + +(define_expand "shrqbybi_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int -8))) + (const_int -8))))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (7 - INTVAL (operands[2])); + else + { + rtx t0 = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2])); + operands[2] = t0; + } + }) + +(define_insn "rotqmbybi_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int -8))) + (const_int -8))))] + "" + "@ + rotqmbybi\t%0,%1,%2 + rotqmbyi\t%0,%1,-%H2" + [(set_attr "type" "shuf")]) + +(define_insn_and_split "shrqbi_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int 7)))) + (clobber (match_scratch:SI 3 "=&r,X"))] + "" + "#" + "reload_completed" + [(set (match_dup:DTI 0) + (lshiftrt:DTI (match_dup:DTI 1) + (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))] + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[3] = GEN_INT (-INTVAL (operands[2])); + else + emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2])); + } + [(set_attr "type" "shuf")]) + +(define_insn "rotqmbi_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")) + (const_int 7))))] + "" + "@ + rotqmbi\t%0,%1,%2 + rotqmbii\t%0,%1,-%E2" + [(set_attr "type" "shuf")]) + +(define_expand "shrqby_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")) + (const_int 8))))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (-INTVAL (operands[2])); + else + { + rtx t0 = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2])); + operands[2] = t0; + } + }) + +(define_insn "rotqmby_" + [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r") + (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r") + (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")) + (const_int 8))))] + "" + "@ + rotqmby\t%0,%1,%2 + rotqmbyi\t%0,%1,-%F2" + [(set_attr "type" "shuf")]) + + +;; ashr, vashr + +(define_insn_and_split "ashr3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))) + (clobber (match_scratch:VHSI 3 "=&r,X"))] + "" + "@ + # + rotmai\t%0,%1,-%2" + "reload_completed && GET_CODE (operands[2]) == REG" + [(set (match_dup:VHSI 3) + (neg:VHSI (match_dup:VHSI 2))) + (set (match_dup:VHSI 0) + (ashiftrt:VHSI (match_dup:VHSI 1) + (neg:VHSI (match_dup:VHSI 3))))] + "" + [(set_attr "type" "*,fx3")]) + +(define_insn "ashr3_imm" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r") + (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r") + (match_operand:VHSI 2 "immediate_operand" "W")))] + "" + "rotmai\t%0,%1,-%2" + [(set_attr "type" "fx3")]) + + +(define_insn "rotma_" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))] + "" + "@ + rotma\t%0,%1,%2 + rotmai\t%0,%1,-%2" + [(set_attr "type" "fx3")]) + +(define_insn_and_split "ashrdi3" + [(set (match_operand:DI 0 "spu_reg_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,I"))) + (clobber (match_scratch:TI 3 "=&r,&r")) + (clobber (match_scratch:TI 4 "=&r,&r")) + (clobber (match_scratch:SI 5 "=&r,&r"))] + "" + "#" + "reload_completed" + [(set (match_dup:DI 0) + (ashiftrt:DI (match_dup:DI 1) + (match_dup:SI 2)))] + { + rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0])); + rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0)); + rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); + rtx op1s = gen_rtx_REG (SImode, REGNO (op1)); + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx op4 = operands[4]; + rtx op5 = operands[5]; + + if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63) + { + rtx op0s = gen_rtx_REG (SImode, REGNO (op0)); + emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32))); + emit_insn (gen_spu_fsm (op0v, op0s)); + } + else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32) + { + rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0)); + HOST_WIDE_INT val = INTVAL (op2); + emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32))); + emit_insn (gen_spu_xswd (op0d, op0v)); + if (val > 32) + emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32))); + } + else + { + rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3)); + unsigned char arr[16] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + + emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31))); + emit_move_insn (op4, array_to_constant (TImode, arr)); + emit_insn (gen_spu_fsm (op3v, op5)); + + if (GET_CODE (operands[2]) == REG) + { + emit_insn (gen_selb (op4, op3, op1, op4)); + emit_insn (gen_negsi2 (op5, op2)); + emit_insn (gen_rotqbybi_ti (op0, op4, op5)); + emit_insn (gen_rotqbi_ti (op0, op0, op5)); + } + else + { + HOST_WIDE_INT val = -INTVAL (op2); + emit_insn (gen_selb (op0, op3, op1, op4)); + if ((val - 7) / 8) + emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8))); + if (val % 8) + emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8))); + } + } + DONE; + }) + + +(define_insn_and_split "ashrti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,i")))] + "" + "#" + "" + [(set (match_dup:TI 0) + (ashiftrt:TI (match_dup:TI 1) + (match_dup:SI 2)))] + { + rtx sign_shift = gen_reg_rtx (SImode); + rtx sign_mask = gen_reg_rtx (TImode); + rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0); + rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]); + rtx t = gen_reg_rtx (TImode); + emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2]))); + emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31))); + emit_insn (gen_fsm_ti (sign_mask, sign_mask)); + emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift)); + emit_insn (gen_lshrti3 (t, operands[1], operands[2])); + emit_insn (gen_iorti3 (operands[0], t, sign_mask)); + DONE; + }) + +;; fsm is used after rotam to replicate the sign across the whole register. +(define_insn "fsm_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))] + "" + "fsm\t%0,%1" + [(set_attr "type" "shuf")]) + + +;; vrotl, rotl + +(define_insn "rotl3" + [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r") + (rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r") + (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))] + "" + "@ + rot\t%0,%1,%2 + roti\t%0,%1,%2" + [(set_attr "type" "fx3")]) + +(define_insn "rotlti3" + [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r") + (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r") + (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))] + "" + "@ + rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2 + rotqbyi\t%0,%1,%h2 + rotqbii\t%0,%1,%e2 + rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2" + [(set_attr "length" "8,4,4,8") + (set_attr "type" "multi1,shuf,shuf,multi1")]) + +(define_insn "rotqbybi_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int -8))))] + "" + "@ + rotqbybi\t%0,%1,%2 + rotqbyi\t%0,%1,%h2" + [(set_attr "type" "shuf,shuf")]) + +(define_insn "rotqby_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int 8))))] + "" + "@ + rotqby\t%0,%1,%2 + rotqbyi\t%0,%1,%f2" + [(set_attr "type" "shuf,shuf")]) + +(define_insn "rotqbi_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r,r") + (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r") + (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I") + (const_int 7))))] + "" + "@ + rotqbi\t%0,%1,%2 + rotqbii\t%0,%1,%e2" + [(set_attr "type" "shuf,shuf")]) + + +;; struct extract/insert +;; We handle mem's because GCC will generate invalid SUBREG's +;; and inefficient code. + +(define_expand "extv" + [(set (match_operand:TI 0 "register_operand" "") + (sign_extract:TI (match_operand 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "" + { + spu_expand_extv (operands, 0); + DONE; + }) + +(define_expand "extzv" + [(set (match_operand:TI 0 "register_operand" "") + (zero_extract:TI (match_operand 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "" + { + spu_expand_extv (operands, 1); + DONE; + }) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand 3 "nonmemory_operand" ""))] + "" + { spu_expand_insv(operands); DONE; }) + +;; Simplify a number of patterns that get generated by extv, extzv, +;; insv, and loads. +(define_insn_and_split "trunc_shr_ti" + [(set (match_operand:QHSI 0 "spu_reg_operand" "=r") + (truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0") + (const_int 96)])))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + spu_split_convert (operands); + DONE; + } + [(set_attr "type" "convert") + (set_attr "length" "0")]) + +(define_insn_and_split "trunc_shr_tidi" + [(set (match_operand:DI 0 "spu_reg_operand" "=r") + (truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0") + (const_int 64)])))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + spu_split_convert (operands); + DONE; + } + [(set_attr "type" "convert") + (set_attr "length" "0")]) + +(define_insn_and_split "shl_ext_ti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")]) + (const_int 96)))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + spu_split_convert (operands); + DONE; + } + [(set_attr "type" "convert") + (set_attr "length" "0")]) + +(define_insn_and_split "shl_ext_diti" + [(set (match_operand:TI 0 "spu_reg_operand" "=r") + (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")]) + (const_int 64)))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + spu_split_convert (operands); + DONE; + } + [(set_attr "type" "convert") + (set_attr "length" "0")]) + +(define_insn "sext_trunc_lshr_tiqisi" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r") + (const_int 120)]))))] + "" + "rotmai\t%0,%1,-24" + [(set_attr "type" "fx3")]) + +(define_insn "zext_trunc_lshr_tiqisi" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r") + (const_int 120)]))))] + "" + "rotmi\t%0,%1,-24" + [(set_attr "type" "fx3")]) + +(define_insn "sext_trunc_lshr_tihisi" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r") + (const_int 112)]))))] + "" + "rotmai\t%0,%1,-16" + [(set_attr "type" "fx3")]) + +(define_insn "zext_trunc_lshr_tihisi" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r") + (const_int 112)]))))] + "" + "rotmi\t%0,%1,-16" + [(set_attr "type" "fx3")]) + + +;; String/block move insn. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movstrsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (use (match_operand:SI 2 "" "")) + (use (match_operand:SI 3 "" ""))])] + "" + " + { + if (spu_expand_block_move (operands)) + DONE; + else + FAIL; + }") + + +;; jump + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))] + "" + "bi\t%0" + [(set_attr "type" "br")]) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "br\t%0" + [(set_attr "type" "br")]) + + +;; return + +;; This will be used for leaf functions, that don't save any regs and +;; don't have locals on stack, maybe... that is for functions that +;; don't change $sp and don't need to save $lr. +(define_expand "return" + [(return)] + "direct_return()" + "") + +;; used in spu_expand_epilogue to generate return from a function and +;; explicitly set use of $lr. + +(define_insn "_return" + [(return)] + "" + "bi\t$lr" + [(set_attr "type" "br")]) + + + +;; ceq + +(define_insn "ceq_" + [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") + (eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") + (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] + "" + "@ + ceq\t%0,%1,%2 + ceqi\t%0,%1,%2") + +(define_insn_and_split "ceq_di" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (eq:SI (match_operand:DI 1 "spu_reg_operand" "r") + (match_operand:DI 2 "spu_reg_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup:SI 0) + (eq:SI (match_dup:DI 1) + (match_dup:DI 2)))] + { + rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); + rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); + rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); + emit_insn (gen_ceq_v4si (op0, op1, op2)); + emit_insn (gen_spu_gb (op0, op0)); + emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11))); + DONE; + }) + + +;; We provide the TI compares for completeness and because some parts of +;; gcc/libgcc use them, even though user code might never see it. +(define_insn "ceq_ti" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (eq:SI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:TI 2 "spu_reg_operand" "r")))] + "" + "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15" + [(set_attr "type" "multi0") + (set_attr "length" "12")]) + +(define_insn "ceq_" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")))] + "" + "fceq\t%0,%1,%2") + +(define_insn "cmeq_" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) + (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] + "" + "fcmeq\t%0,%1,%2") + +;; These implementations will ignore checking of NaN or INF if +;; compiled with option -ffinite-math-only. +(define_expand "ceq_df" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (eq:SI (match_operand:DF 1 "spu_reg_operand" "r") + (match_operand:DF 2 "const_zero_operand" "i")))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = gen_reg_rtx (V4SImode); + rtx rb = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx biteq = gen_reg_rtx (V4SImode); + rtx ahi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx iszero = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hihi_promote = gen_reg_rtx (TImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, + 0x08090A0B, 0x18191A1B); + emit_move_insn (hihi_promote, pat); + + emit_insn (gen_spu_convert (ra, operands[1])); + emit_insn (gen_spu_convert (rb, operands[2])); + emit_insn (gen_ceq_v4si (biteq, ra, rb)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + if (!flag_finite_math_only) + { + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + } + emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs)); + emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode))); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, biteq, iszero)); + if (!flag_finite_math_only) + { + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + } + emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); + DONE; + } +}) + +(define_insn "ceq__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "spu_arch == PROCESSOR_CELLEDP" + "dfceq\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "cmeq__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (eq: (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) + (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcmeq\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "ceq_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:V2DF 2 "spu_reg_operand" "r")))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx biteq = gen_reg_rtx (V4SImode); + rtx ahi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx iszero = gen_reg_rtx (V4SImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hihi_promote = gen_reg_rtx (TImode); + + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, + 0x08090A0B, 0x18191A1B); + emit_move_insn (hihi_promote, pat); + + emit_insn (gen_ceq_v4si (biteq, ra, rb)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs)); + emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode))); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, biteq, iszero)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); + DONE; + } +}) + +(define_expand "cmeq_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) + (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx biteq = gen_reg_rtx (V4SImode); + rtx ahi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hihi_promote = gen_reg_rtx (TImode); + + emit_move_insn (sign_mask, pat); + + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, + 0x08090A0B, 0x18191A1B); + emit_move_insn (hihi_promote, pat); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_andc_v4si (temp2, biteq, a_nan)); + emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote)); + DONE; + } +}) + + +;; cgt + +(define_insn "cgt_" + [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") + (gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") + (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] + "" + "@ + cgt\t%0,%1,%2 + cgti\t%0,%1,%2") + +(define_insn "cgt_di_m1" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gt:SI (match_operand:DI 1 "spu_reg_operand" "r") + (const_int -1)))] + "" + "cgti\t%0,%1,-1") + +(define_insn_and_split "cgt_di" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gt:SI (match_operand:DI 1 "spu_reg_operand" "r") + (match_operand:DI 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=&r")) + (clobber (match_scratch:V4SI 4 "=&r")) + (clobber (match_scratch:V4SI 5 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup:SI 0) + (gt:SI (match_dup:DI 1) + (match_dup:DI 2)))] + { + rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); + rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); + rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); + rtx op3 = operands[3]; + rtx op4 = operands[4]; + rtx op5 = operands[5]; + rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3])); + emit_insn (gen_clgt_v4si (op3, op1, op2)); + emit_insn (gen_ceq_v4si (op4, op1, op2)); + emit_insn (gen_cgt_v4si (op5, op1, op2)); + emit_insn (gen_spu_xswd (op3d, op3)); + emit_insn (gen_selb (op0, op5, op3, op4)); + DONE; + }) + +(define_insn "cgt_ti" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gt:SI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:TI 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=&r")) + (clobber (match_scratch:V4SI 4 "=&r")) + (clobber (match_scratch:V4SI 5 "=&r"))] + "" + "clgt\t%4,%1,%2\;\ +ceq\t%3,%1,%2\;\ +cgt\t%5,%1,%2\;\ +shlqbyi\t%0,%4,4\;\ +selb\t%0,%4,%0,%3\;\ +shlqbyi\t%0,%0,4\;\ +selb\t%0,%4,%0,%3\;\ +shlqbyi\t%0,%0,4\;\ +selb\t%0,%5,%0,%3" + [(set_attr "type" "multi0") + (set_attr "length" "36")]) + +(define_insn "cgt_" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (match_operand:VSF 1 "spu_reg_operand" "r") + (match_operand:VSF 2 "spu_reg_operand" "r")))] + "" + "fcgt\t%0,%1,%2") + +(define_insn "cmgt_" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r")) + (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))] + "" + "fcmgt\t%0,%1,%2") + +(define_expand "cgt_df" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gt:SI (match_operand:DF 1 "spu_reg_operand" "r") + (match_operand:DF 2 "const_zero_operand" "i")))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = gen_reg_rtx (V4SImode); + rtx rb = gen_reg_rtx (V4SImode); + rtx zero = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx hi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx b_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx asel = gen_reg_rtx (V4SImode); + rtx bsel = gen_reg_rtx (V4SImode); + rtx abor = gen_reg_rtx (V4SImode); + rtx bbor = gen_reg_rtx (V4SImode); + rtx gt_hi = gen_reg_rtx (V4SImode); + rtx gt_lo = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + rtx borrow_shuffle = gen_reg_rtx (TImode); + + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, + 0x0C0D0E0F, 0xC0C0C0C0); + emit_move_insn (borrow_shuffle, pat); + + emit_insn (gen_spu_convert (ra, operands[1])); + emit_insn (gen_spu_convert (rb, operands[2])); + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + + if (!flag_finite_math_only) + { + /* check if ra is NaN */ + emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); + + /* check if rb is NaN */ + emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); + emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); + emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); + + /* check if ra or rb is NaN */ + emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); + } + emit_move_insn (zero, CONST0_RTX (V4SImode)); + emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (asel, asel, asel, hi_promote)); + emit_insn (gen_bg_v4si (abor, zero, a_abs)); + emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); + emit_insn (gen_selb (abor, a_abs, abor, asel)); + + emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); + emit_insn (gen_bg_v4si (bbor, zero, b_abs)); + emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); + emit_insn (gen_selb (bbor, b_abs, bbor, bsel)); + + emit_insn (gen_cgt_v4si (gt_hi, abor, bbor)); + emit_insn (gen_clgt_v4si (gt_lo, abor, bbor)); + emit_insn (gen_ceq_v4si (temp2, abor, bbor)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); + emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); + if (!flag_finite_math_only) + { + /* correct for NaNs */ + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + } + emit_insn (gen_spu_convert (operands[0], temp2)); + DONE; + } +}) + +(define_insn "cgt__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (match_operand:VDF 1 "spu_reg_operand" "r") + (match_operand:VDF 2 "spu_reg_operand" "r")))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcgt\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_insn "cmgt__celledp" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (gt: (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r")) + (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))] + "spu_arch == PROCESSOR_CELLEDP" + "dfcmgt\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "cgt_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:V2DF 2 "spu_reg_operand" "r")))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx zero = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx hi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx b_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx asel = gen_reg_rtx (V4SImode); + rtx bsel = gen_reg_rtx (V4SImode); + rtx abor = gen_reg_rtx (V4SImode); + rtx bbor = gen_reg_rtx (V4SImode); + rtx gt_hi = gen_reg_rtx (V4SImode); + rtx gt_lo = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + rtx borrow_shuffle = gen_reg_rtx (TImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, + 0x0C0D0E0F, 0xC0C0C0C0); + emit_move_insn (borrow_shuffle, pat); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); + emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); + emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); + emit_move_insn (zero, CONST0_RTX (V4SImode)); + emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (asel, asel, asel, hi_promote)); + emit_insn (gen_bg_v4si (abor, zero, a_abs)); + emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); + emit_insn (gen_selb (abor, a_abs, abor, asel)); + emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); + emit_insn (gen_bg_v4si (bbor, zero, b_abs)); + emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); + emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); + emit_insn (gen_selb (bbor, b_abs, bbor, bsel)); + emit_insn (gen_cgt_v4si (gt_hi, abor, bbor)); + emit_insn (gen_clgt_v4si (gt_lo, abor, bbor)); + emit_insn (gen_ceq_v4si (temp2, abor, bbor)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); + + emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); + DONE; + } +}) + +(define_expand "cmgt_v2df" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r")) + (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx rb = spu_gen_subreg (V4SImode, operands[2]); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx hi_inf = gen_reg_rtx (V4SImode); + rtx a_nan = gen_reg_rtx (V4SImode); + rtx b_nan = gen_reg_rtx (V4SImode); + rtx a_abs = gen_reg_rtx (V4SImode); + rtx b_abs = gen_reg_rtx (V4SImode); + rtx gt_hi = gen_reg_rtx (V4SImode); + rtx gt_lo = gen_reg_rtx (V4SImode); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + + emit_insn (gen_andv4si3 (a_abs, ra, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask)); + emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); + emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); + emit_insn (gen_andv4si3 (b_abs, rb, sign_mask)); + emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask)); + emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); + emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2)); + emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote)); + emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan)); + + emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs)); + emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs)); + emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si)); + emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2)); + emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote)); + emit_insn (gen_andc_v4si (temp2, temp2, a_nan)); + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2)); + DONE; + } +}) + + +;; clgt + +(define_insn "clgt_" + [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r") + (gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r") + (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))] + "" + "@ + clgt\t%0,%1,%2 + clgti\t%0,%1,%2") + +(define_insn_and_split "clgt_di" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gtu:SI (match_operand:DI 1 "spu_reg_operand" "r") + (match_operand:DI 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=&r")) + (clobber (match_scratch:V4SI 4 "=&r")) + (clobber (match_scratch:V4SI 5 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup:SI 0) + (gtu:SI (match_dup:DI 1) + (match_dup:DI 2)))] + { + rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0])); + rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1])); + rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); + rtx op3 = operands[3]; + rtx op4 = operands[4]; + rtx op5 = operands[5]; + rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5])); + emit_insn (gen_clgt_v4si (op3, op1, op2)); + emit_insn (gen_ceq_v4si (op4, op1, op2)); + emit_insn (gen_spu_xswd (op5d, op3)); + emit_insn (gen_selb (op0, op3, op5, op4)); + DONE; + }) + +(define_insn "clgt_ti" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (gtu:SI (match_operand:TI 1 "spu_reg_operand" "r") + (match_operand:TI 2 "spu_reg_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=&r")) + (clobber (match_scratch:V4SI 4 "=&r"))] + "" + "ceq\t%3,%1,%2\;\ +clgt\t%4,%1,%2\;\ +shlqbyi\t%0,%4,4\;\ +selb\t%0,%4,%0,%3\;\ +shlqbyi\t%0,%0,4\;\ +selb\t%0,%4,%0,%3\;\ +shlqbyi\t%0,%0,4\;\ +selb\t%0,%4,%0,%3" + [(set_attr "type" "multi0") + (set_attr "length" "32")]) + + +;; dftsv +(define_insn "dftsv_celledp" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_DFTSV))] + "spu_arch == PROCESSOR_CELLEDP" + "dftsv\t%0,%1,%2" + [(set_attr "type" "fpd")]) + +(define_expand "dftsv" + [(set (match_operand:V2DI 0 "spu_reg_operand" "=r") + (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_DFTSV))] + "" +{ + if (spu_arch == PROCESSOR_CELL) + { + rtx result = gen_reg_rtx (V4SImode); + emit_move_insn (result, CONST0_RTX (V4SImode)); + + if (INTVAL (operands[2])) + { + rtx ra = spu_gen_subreg (V4SImode, operands[1]); + rtx abs = gen_reg_rtx (V4SImode); + rtx sign = gen_reg_rtx (V4SImode); + rtx temp = gen_reg_rtx (TImode); + rtx temp_v4si = spu_gen_subreg (V4SImode, temp); + rtx temp2 = gen_reg_rtx (V4SImode); + rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, + 0x7FFFFFFF, 0xFFFFFFFF); + rtx sign_mask = gen_reg_rtx (V4SImode); + rtx hi_promote = gen_reg_rtx (TImode); + emit_move_insn (sign_mask, pat); + pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, + 0x08090A0B, 0x08090A0B); + emit_move_insn (hi_promote, pat); + + emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31))); + emit_insn (gen_shufb (sign, sign, sign, hi_promote)); + emit_insn (gen_andv4si3 (abs, ra, sign_mask)); + + /* NaN or +inf or -inf */ + if (INTVAL (operands[2]) & 0x70) + { + rtx nan_mask = gen_reg_rtx (V4SImode); + rtx isinf = gen_reg_rtx (V4SImode); + pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, + 0x7FF00000, 0x0); + emit_move_insn (nan_mask, pat); + emit_insn (gen_ceq_v4si (isinf, abs, nan_mask)); + + /* NaN */ + if (INTVAL (operands[2]) & 0x40) + { + rtx isnan = gen_reg_rtx (V4SImode); + emit_insn (gen_clgt_v4si (isnan, abs, nan_mask)); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf)); + emit_insn (gen_iorv4si3 (isnan, isnan, temp2)); + emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote)); + emit_insn (gen_iorv4si3 (result, result, isnan)); + } + /* +inf or -inf */ + if (INTVAL (operands[2]) & 0x30) + { + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si)); + emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote)); + + /* +inf */ + if (INTVAL (operands[2]) & 0x20) + { + emit_insn (gen_andc_v4si (temp2, isinf, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -inf */ + if (INTVAL (operands[2]) & 0x10) + { + emit_insn (gen_andv4si3 (temp2, isinf, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + } + + /* 0 or denorm */ + if (INTVAL (operands[2]) & 0xF) + { + rtx iszero = gen_reg_rtx (V4SImode); + emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode))); + emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), + GEN_INT (4 * 8))); + emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si)); + + /* denorm */ + if (INTVAL (operands[2]) & 0x3) + { + rtx isdenorm = gen_reg_rtx (V4SImode); + rtx denorm_mask = gen_reg_rtx (V4SImode); + emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF)); + emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask)); + emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero)); + emit_insn (gen_shufb (isdenorm, isdenorm, + isdenorm, hi_promote)); + /* +denorm */ + if (INTVAL (operands[2]) & 0x2) + { + emit_insn (gen_andc_v4si (temp2, isdenorm, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -denorm */ + if (INTVAL (operands[2]) & 0x1) + { + emit_insn (gen_andv4si3 (temp2, isdenorm, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + + /* 0 */ + if (INTVAL (operands[2]) & 0xC) + { + emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote)); + /* +0 */ + if (INTVAL (operands[2]) & 0x8) + { + emit_insn (gen_andc_v4si (temp2, iszero, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + /* -0 */ + if (INTVAL (operands[2]) & 0x4) + { + emit_insn (gen_andv4si3 (temp2, iszero, sign)); + emit_insn (gen_iorv4si3 (result, result, temp2)); + } + } + } + } + emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result)); + DONE; + } +}) + + +;; branches + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 1 "branch_comparison_operator" + [(match_operand 2 + "spu_reg_operand" "r") + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "br%b2%b1z\t%2,%0" + [(set_attr "type" "br")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 + "spu_reg_operand" "r") + (const_int 0)]) + (return) + (pc)))] + "direct_return ()" + "bi%b1%b0z\t%1,$lr" + [(set_attr "type" "br")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 1 "branch_comparison_operator" + [(match_operand 2 + "spu_reg_operand" "r") + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "br%b2%b1z\t%2,%0" + [(set_attr "type" "br")]) + +(define_insn "" + [(set (pc) + (if_then_else (match_operator 0 "branch_comparison_operator" + [(match_operand 1 + "spu_reg_operand" "r") + (const_int 0)]) + (pc) + (return)))] + "direct_return ()" + "bi%b1%b0z\t%1,$lr" + [(set_attr "type" "br")]) + + +;; vector conditional compare patterns +(define_expand "vcond" + [(set (match_operand:VCMP 0 "spu_reg_operand" "=r") + (if_then_else:VCMP + (match_operator 3 "comparison_operator" + [(match_operand:VCMP 4 "spu_reg_operand" "r") + (match_operand:VCMP 5 "spu_reg_operand" "r")]) + (match_operand:VCMP 1 "spu_reg_operand" "r") + (match_operand:VCMP 2 "spu_reg_operand" "r")))] + "" + { + if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; + }) + +(define_expand "vcondu" + [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r") + (if_then_else:VCMPU + (match_operator 3 "comparison_operator" + [(match_operand:VCMPU 4 "spu_reg_operand" "r") + (match_operand:VCMPU 5 "spu_reg_operand" "r")]) + (match_operand:VCMPU 1 "spu_reg_operand" "r") + (match_operand:VCMPU 2 "spu_reg_operand" "r")))] + "" + { + if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; + }) + + +;; branch on condition + +(define_expand "cbranch4" + [(use (match_operator 0 "ordered_comparison_operator" + [(match_operand:VQHSI 1 "spu_reg_operand" "") + (match_operand:VQHSI 2 "spu_nonmem_operand" "")])) + (use (match_operand 3 ""))] + "" + { spu_emit_branch_or_set (0, operands[0], operands); DONE; }) + +(define_expand "cbranch4" + [(use (match_operator 0 "ordered_comparison_operator" + [(match_operand:DTI 1 "spu_reg_operand" "") + (match_operand:DTI 2 "spu_reg_operand" "")])) + (use (match_operand 3 ""))] + "" + { spu_emit_branch_or_set (0, operands[0], operands); DONE; }) + +(define_expand "cbranch4" + [(use (match_operator 0 "ordered_comparison_operator" + [(match_operand:VSF 1 "spu_reg_operand" "") + (match_operand:VSF 2 "spu_reg_operand" "")])) + (use (match_operand 3 ""))] + "" + { spu_emit_branch_or_set (0, operands[0], operands); DONE; }) + +(define_expand "cbranchdf4" + [(use (match_operator 0 "ordered_comparison_operator" + [(match_operand:DF 1 "spu_reg_operand" "") + (match_operand:DF 2 "spu_reg_operand" "")])) + (use (match_operand 3 ""))] + "" + { spu_emit_branch_or_set (0, operands[0], operands); DONE; }) + + +;; set on condition + +(define_expand "cstore4" + [(use (match_operator 1 "ordered_comparison_operator" + [(match_operand:VQHSI 2 "spu_reg_operand" "") + (match_operand:VQHSI 3 "spu_nonmem_operand" "")])) + (clobber (match_operand:SI 0 "spu_reg_operand"))] + "" + { spu_emit_branch_or_set (1, operands[1], operands); DONE; }) + +(define_expand "cstore4" + [(use (match_operator 1 "ordered_comparison_operator" + [(match_operand:DTI 2 "spu_reg_operand" "") + (match_operand:DTI 3 "spu_reg_operand" "")])) + (clobber (match_operand:SI 0 "spu_reg_operand"))] + "" + { spu_emit_branch_or_set (1, operands[1], operands); DONE; }) + +(define_expand "cstore4" + [(use (match_operator 1 "ordered_comparison_operator" + [(match_operand:VSF 2 "spu_reg_operand" "") + (match_operand:VSF 3 "spu_reg_operand" "")])) + (clobber (match_operand:SI 0 "spu_reg_operand"))] + "" + { spu_emit_branch_or_set (1, operands[1], operands); DONE; }) + +(define_expand "cstoredf4" + [(use (match_operator 1 "ordered_comparison_operator" + [(match_operand:DF 2 "spu_reg_operand" "") + (match_operand:DF 3 "spu_reg_operand" "")])) + (clobber (match_operand:SI 0 "spu_reg_operand"))] + "" + { spu_emit_branch_or_set (1, operands[1], operands); DONE; }) + + +;; conditional move + +;; Define this first one so HAVE_conditional_move is defined. +(define_insn "movcc_dummy" + [(set (match_operand 0 "" "") + (if_then_else (match_operand 1 "" "") + (match_operand 2 "" "") + (match_operand 3 "" "")))] + "!operands[0]" + "") + +(define_expand "movcc" + [(set (match_operand:ALL 0 "spu_reg_operand" "") + (if_then_else:ALL (match_operand 1 "ordered_comparison_operator" "") + (match_operand:ALL 2 "spu_reg_operand" "") + (match_operand:ALL 3 "spu_reg_operand" "")))] + "" + { + spu_emit_branch_or_set(2, operands[1], operands); + DONE; + }) + +;; This pattern is used when the result of a compare is not large +;; enough to use in a selb when expanding conditional moves. +(define_expand "extend_compare" + [(set (match_operand 0 "spu_reg_operand" "=r") + (unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))] + "" + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_UNSPEC (GET_MODE (operands[0]), + gen_rtvec (1, operands[1]), + UNSPEC_EXTEND_CMP))); + DONE; + }) + +(define_insn "extend_compare" + [(set (match_operand:ALL 0 "spu_reg_operand" "=r") + (unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))] + "operands" + "fsm\t%0,%1" + [(set_attr "type" "shuf")]) + + +;; case + +;; operand 0 is index +;; operand 1 is the minimum bound +;; operand 2 is the maximum bound - minimum bound + 1 +;; operand 3 is CODE_LABEL for the table; +;; operand 4 is the CODE_LABEL to go to if index out of range. +(define_expand "casesi" + [(match_operand:SI 0 "spu_reg_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand 3 "" "") + (match_operand 4 "" "")] + "" + { + rtx table = gen_reg_rtx (SImode); + rtx index = gen_reg_rtx (SImode); + rtx sindex = gen_reg_rtx (SImode); + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3])); + + emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1]))); + emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2))); + emit_move_insn (addr, gen_rtx_MEM (SImode, + gen_rtx_PLUS (SImode, table, sindex))); + if (flag_pic) + emit_insn (gen_addsi3 (addr, addr, table)); + + emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]); + emit_jump_insn (gen_tablejump (addr, operands[3])); + DONE; + }) + +(define_insn "tablejump" + [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "bi\t%0" + [(set_attr "type" "br")]) + + +;; call + +;; Note that operand 1 is total size of args, in bytes, +;; and what the call insn wants is the number of words. +(define_expand "sibcall" + [(parallel + [(call (match_operand:QI 0 "call_operand" "") + (match_operand:QI 1 "" "")) + (use (reg:SI 0))])] + "" + { + if (! call_operand (operands[0], QImode)) + XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + }) + +(define_insn "_sibcall" + [(parallel + [(call (match_operand:QI 0 "call_operand" "R,S") + (match_operand:QI 1 "" "i,i")) + (use (reg:SI 0))])] + "SIBLING_CALL_P(insn)" + "@ + bi\t%i0 + br\t%0" + [(set_attr "type" "br,br")]) + +(define_expand "sibcall_value" + [(parallel + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "call_operand" "") + (match_operand:QI 2 "" ""))) + (use (reg:SI 0))])] + "" + { + if (! call_operand (operands[1], QImode)) + XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + }) + +(define_insn "_sibcall_value" + [(parallel + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "call_operand" "R,S") + (match_operand:QI 2 "" "i,i"))) + (use (reg:SI 0))])] + "SIBLING_CALL_P(insn)" + "@ + bi\t%i1 + br\t%1" + [(set_attr "type" "br,br")]) + +;; Note that operand 1 is total size of args, in bytes, +;; and what the call insn wants is the number of words. +(define_expand "call" + [(parallel + [(call (match_operand:QI 0 "call_operand" "") + (match_operand:QI 1 "" "")) + (clobber (reg:SI 0)) + (clobber (reg:SI 130))])] + "" + { + if (! call_operand (operands[0], QImode)) + XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + }) + +(define_insn "_call" + [(parallel + [(call (match_operand:QI 0 "call_operand" "R,S,T") + (match_operand:QI 1 "" "i,i,i")) + (clobber (reg:SI 0)) + (clobber (reg:SI 130))])] + "" + "@ + bisl\t$lr,%i0 + brsl\t$lr,%0 + brasl\t$lr,%0" + [(set_attr "type" "br")]) + +(define_expand "call_value" + [(parallel + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "call_operand" "") + (match_operand:QI 2 "" ""))) + (clobber (reg:SI 0)) + (clobber (reg:SI 130))])] + "" + { + if (! call_operand (operands[1], QImode)) + XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + }) + +(define_insn "_call_value" + [(parallel + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "call_operand" "R,S,T") + (match_operand:QI 2 "" "i,i,i"))) + (clobber (reg:SI 0)) + (clobber (reg:SI 130))])] + "" + "@ + bisl\t$lr,%i1 + brsl\t$lr,%1 + brasl\t$lr,%1" + [(set_attr "type" "br")]) + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" + { + int i; + rtx reg = gen_rtx_REG (TImode, 3); + + /* We need to use call_value so the return value registers don't get + * clobbered. */ + emit_call_insn (gen_call_value (reg, operands[0], const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; + }) + + +;; Patterns used for splitting and combining. + + +;; Function prologue and epilogue. + +(define_expand "prologue" + [(const_int 1)] + "" + { spu_expand_prologue (); DONE; }) + +;; "blockage" is only emited in epilogue. This is what it took to +;; make "basic block reordering" work with the insns sequence +;; generated by the spu_expand_epilogue (taken from mips.md) + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] + "" + "" + [(set_attr "type" "convert") + (set_attr "length" "0")]) + +(define_expand "epilogue" + [(const_int 2)] + "" + { spu_expand_epilogue (false); DONE; }) + +(define_expand "sibcall_epilogue" + [(const_int 2)] + "" + { spu_expand_epilogue (true); DONE; }) + + +;; stack manipulations + +;; An insn to allocate new stack space for dynamic use (e.g., alloca). +;; We move the back-chain and decrement the stack pointer. +(define_expand "allocate_stack" + [(set (match_operand 0 "spu_reg_operand" "") + (minus (reg 1) (match_operand 1 "spu_nonmem_operand" ""))) + (set (reg 1) + (minus (reg 1) (match_dup 1)))] + "" + "spu_allocate_stack (operands[0], operands[1]); DONE;") + +;; These patterns say how to save and restore the stack pointer. We need not +;; save the stack pointer at function level since we are careful to preserve +;; the backchain. +;; + +;; At block level the stack pointer is saved and restored, so that the +;; stack space allocated within a block is deallocated when leaving +;; block scope. By default, according to the SPU ABI, the stack +;; pointer and available stack size are saved in a register. Upon +;; restoration, the stack pointer is simply copied back, and the +;; current available stack size is calculated against the restored +;; stack pointer. +;; +;; For nonlocal gotos, we must save the stack pointer and its +;; backchain and restore both. Note that in the nonlocal case, the +;; save area is a memory location. + +(define_expand "save_stack_function" + [(match_operand 0 "general_operand" "") + (match_operand 1 "general_operand" "")] + "" + "DONE;") + +(define_expand "restore_stack_function" + [(match_operand 0 "general_operand" "") + (match_operand 1 "general_operand" "")] + "" + "DONE;") + +(define_expand "restore_stack_block" + [(match_operand 0 "spu_reg_operand" "") + (match_operand 1 "memory_operand" "")] + "" + " + { + spu_restore_stack_block (operands[0], operands[1]); + DONE; + }") + +(define_expand "save_stack_nonlocal" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "spu_reg_operand" "")] + "" + " + { + rtx temp = gen_reg_rtx (Pmode); + + /* Copy the backchain to the first word, sp to the second. We need to + save the back chain because __builtin_apply appears to clobber it. */ + emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1])); + emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp); + emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]); + DONE; + }") + +(define_expand "restore_stack_nonlocal" + [(match_operand 0 "spu_reg_operand" "") + (match_operand 1 "memory_operand" "")] + "" + " + { + spu_restore_stack_nonlocal(operands[0], operands[1]); + DONE; + }") + + +;; vector patterns + +;; Vector initialization +(define_expand "vec_init" + [(match_operand:V 0 "register_operand" "") + (match_operand 1 "" "")] + "" + { + spu_expand_vector_init (operands[0], operands[1]); + DONE; + }) + +(define_expand "vec_set" + [(use (match_operand:SI 2 "spu_nonmem_operand" "")) + (set (match_dup:TI 3) + (unspec:TI [(match_dup:SI 4) + (match_dup:SI 5) + (match_dup:SI 6)] UNSPEC_CPAT)) + (set (match_operand:V 0 "spu_reg_operand" "") + (unspec:V [(match_operand: 1 "spu_reg_operand" "") + (match_dup:V 0) + (match_dup:TI 3)] UNSPEC_SHUFB))] + "" + { + HOST_WIDE_INT size = GET_MODE_SIZE (mode); + rtx offset = GEN_INT (INTVAL (operands[2]) * size); + operands[3] = gen_reg_rtx (TImode); + operands[4] = stack_pointer_rtx; + operands[5] = offset; + operands[6] = GEN_INT (size); + }) + +(define_expand "vec_extract" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (vec_select: (match_operand:V 1 "spu_reg_operand" "r") + (parallel [(match_operand 2 "const_int_operand" "i")])))] + "" + { + if ((INTVAL (operands[2]) * + ) % 16 == 0) + { + emit_insn (gen_spu_convert (operands[0], operands[1])); + DONE; + } + }) + +(define_insn "_vec_extract" + [(set (match_operand: 0 "spu_reg_operand" "=r") + (vec_select: (match_operand:V 1 "spu_reg_operand" "r") + (parallel [(match_operand 2 "const_int_operand" "i")])))] + "" + "rotqbyi\t%0,%1,(%2*+)%%16" + [(set_attr "type" "shuf")]) + +(define_insn "_vec_extractv8hi_ze" + [(set (match_operand:SI 0 "spu_reg_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)]))))] + "" + "rotqmbyi\t%0,%1,-2" + [(set_attr "type" "shuf")]) + + +;; misc + +(define_expand "shufb" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "") + (match_operand 2 "spu_reg_operand" "") + (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))] + "" + { + rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]); + PUT_MODE (SET_SRC (s), GET_MODE (operands[0])); + emit_insn (s); + DONE; + }) + +(define_insn "_shufb" + [(set (match_operand 0 "spu_reg_operand" "=r") + (unspec [(match_operand 1 "spu_reg_operand" "r") + (match_operand 2 "spu_reg_operand" "r") + (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))] + "operands" + "shufb\t%0,%1,%2,%3" + [(set_attr "type" "shuf")]) + +(define_insn "nop" + [(unspec_volatile [(const_int 0)] UNSPEC_NOP)] + "" + "nop" + [(set_attr "type" "nop")]) + +(define_insn "nopn" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPEC_NOP)] + "" + "nop\t%0" + [(set_attr "type" "nop")]) + +(define_insn "lnop" + [(unspec_volatile [(const_int 0)] UNSPEC_LNOP)] + "" + "lnop" + [(set_attr "type" "lnop")]) + +;; The operand is so we know why we generated this hbrp. +;; We clobber mem to make sure it isn't moved over any +;; loads, stores or calls while scheduling. +(define_insn "iprefetch" + [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH) + (clobber (mem:BLK (scratch)))] + "" + "hbrp\t# %0" + [(set_attr "type" "iprefetch")]) + +;; A non-volatile version so it gets scheduled +(define_insn "nopn_nv" + [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)] + "" + "nop\t%0" + [(set_attr "type" "nop")]) + +(define_insn "hbr" + [(set (reg:SI 130) + (unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i") + (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR)) + (unspec [(const_int 0)] UNSPEC_HBR)] + "" + "@ + hbr\t%0,%1 + hbrr\t%0,%1 + hbra\t%0,%1" + [(set_attr "type" "hbr")]) + +(define_insn "sync" + [(unspec_volatile [(const_int 0)] UNSPEC_SYNC) + (clobber (mem:BLK (scratch)))] + "" + "sync" + [(set_attr "type" "br")]) + +(define_insn "syncc" + [(unspec_volatile [(const_int 1)] UNSPEC_SYNC) + (clobber (mem:BLK (scratch)))] + "" + "syncc" + [(set_attr "type" "br")]) + +(define_insn "dsync" + [(unspec_volatile [(const_int 2)] UNSPEC_SYNC) + (clobber (mem:BLK (scratch)))] + "" + "dsync" + [(set_attr "type" "br")]) + + + + ;; Define the subtract-one-and-jump insns so loop.c + ;; knows what to generate. + (define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" "")) ; iterations; zero if unknown + (use (match_operand 2 "" "")) ; max iterations + (use (match_operand 3 "" "")) ; loop level + (use (match_operand 4 "" ""))] ; label + "" + " + { + /* Currently SMS relies on the do-loop pattern to recognize loops + where (1) the control part comprises of all insns defining and/or + using a certain 'count' register and (2) the loop count can be + adjusted by modifying this register prior to the loop. +. ??? The possible introduction of a new block to initialize the + new IV can potentially effects branch optimizations. */ + if (optimize > 0 && flag_modulo_sched) + { + rtx s0; + rtx bcomp; + rtx loc_ref; + + /* Only use this on innermost loops. */ + if (INTVAL (operands[3]) > 1) + FAIL; + if (GET_MODE (operands[0]) != SImode) + FAIL; + + s0 = operands [0]; + emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1))); + bcomp = gen_rtx_NE(SImode, s0, const0_rtx); + loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]); + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, + loc_ref, pc_rtx))); + + DONE; + }else + FAIL; + }") + +;; convert between any two modes, avoiding any GCC assumptions +(define_expand "spu_convert" + [(set (match_operand 0 "spu_reg_operand" "") + (unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))] + "" + { + rtx c = gen__spu_convert (operands[0], operands[1]); + PUT_MODE (SET_SRC (c), GET_MODE (operands[0])); + emit_insn (c); + DONE; + }) + +(define_insn_and_split "_spu_convert" + [(set (match_operand 0 "spu_reg_operand" "=r") + (unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + spu_split_convert (operands); + DONE; + } + [(set_attr "type" "convert") + (set_attr "length" "0")]) + + +;; +(include "spu-builtins.md") + + +(define_expand "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=r") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "r") + (match_operand:V4SF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V4SImode); + + emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[2], operands[1], mask)); + DONE; +}") + +(define_expand "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=r") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "r") + (match_operand:V4SF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V4SImode); + + emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=r") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "r") + (match_operand:V2DF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V2DImode); + emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[2], operands[1], + spu_gen_subreg (V4SImode, mask))); + DONE; +}") + +(define_expand "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=r") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "r") + (match_operand:V2DF 2 "register_operand" "r")))] + "" + " +{ + rtx mask = gen_reg_rtx (V2DImode); + emit_insn (gen_cgt_v2df (mask, operands[1], operands[2])); + emit_insn (gen_selb (operands[0], operands[1], operands[2], + spu_gen_subreg (V4SImode, mask))); + DONE; +}") + +(define_expand "vec_widen_umult_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=r") + (mult:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))] + "" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2])); + emit_insn (gen_spu_mpyu (vo, operands[1], operands[2])); + emit_insn (gen_shufb (operands[0], ve, vo, mask)); + DONE; +}") + +(define_expand "vec_widen_umult_lo_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=r") + (mult:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))] + "" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2])); + emit_insn (gen_spu_mpyu (vo, operands[1], operands[2])); + emit_insn (gen_shufb (operands[0], ve, vo, mask)); + DONE; +}") + +(define_expand "vec_widen_smult_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=r") + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))] + "" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2])); + emit_insn (gen_spu_mpy (vo, operands[1], operands[2])); + emit_insn (gen_shufb (operands[0], ve, vo, mask)); + DONE; +}") + +(define_expand "vec_widen_smult_lo_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=r") + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "register_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "register_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))] + "" + " +{ + rtx ve = gen_reg_rtx (V4SImode); + rtx vo = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2])); + emit_insn (gen_spu_mpy (vo, operands[1], operands[2])); + emit_insn (gen_shufb (operands[0], ve, vo, mask)); + DONE; +}") + +(define_expand "vec_realign_load_" + [(set (match_operand:ALL 0 "register_operand" "=r") + (unspec:ALL [(match_operand:ALL 1 "register_operand" "r") + (match_operand:ALL 2 "register_operand" "r") + (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))] + "" + " +{ + emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3])); + DONE; +}") + +(define_expand "spu_lvsr" + [(set (match_operand:V16QI 0 "register_operand" "") + (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))] + "" + " +{ + rtx addr; + rtx offset = gen_reg_rtx (V8HImode); + rtx addr_bits = gen_reg_rtx (SImode); + rtx addr_bits_vec = gen_reg_rtx (V8HImode); + rtx splatqi = gen_reg_rtx (TImode); + rtx result = gen_reg_rtx (V8HImode); + unsigned char arr[16] = { + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; + unsigned char arr2[16] = { + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}; + + emit_move_insn (offset, array_to_constant (V8HImode, arr)); + emit_move_insn (splatqi, array_to_constant (TImode, arr2)); + + gcc_assert (GET_CODE (operands[1]) == MEM); + addr = force_reg (Pmode, XEXP (operands[1], 0)); + emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF))); + emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi)); + + /* offset - (addr & 0xF) + It is safe to use a single sfh, because each byte of offset is > 15 and + each byte of addr is <= 15. */ + emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec)); + + result = simplify_gen_subreg (V16QImode, result, V8HImode, 0); + emit_move_insn (operands[0], result); + + DONE; +}") + +(define_expand "vec_unpacku_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + +(define_expand "vec_unpacku_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + + +(define_expand "vec_extract_evenv4si" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (vec_concat:V4SI + (vec_select:V2SI + (match_operand:V4SI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)])) + (vec_select:V2SI + (match_operand:V4SI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + + +(define_expand "vec_extract_evenv4sf" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)])) + (vec_select:V2SF + (match_operand:V4SF 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_evenv8hi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (vec_concat:V8HI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])) + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x04, 0x05, + 0x08, 0x09, 0x0C, 0x0D, + 0x10, 0x11, 0x14, 0x15, + 0x18, 0x19, 0x1C, 0x1D}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_evenv16qi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (vec_concat:V16QI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6) + (const_int 8)(const_int 10)(const_int 12)(const_int 14)])) + (vec_select:V8QI + (match_operand:V16QI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6) + (const_int 8)(const_int 10)(const_int 12)(const_int 14)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_oddv4si" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (vec_concat:V4SI + (vec_select:V2SI + (match_operand:V4SI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)])) + (vec_select:V2SI + (match_operand:V4SI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_oddv4sf" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)])) + (vec_select:V2SF + (match_operand:V4SF 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_oddv8hi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (vec_concat:V8HI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])) + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x02, 0x03, 0x06, 0x07, + 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, + 0x1A, 0x1B, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_extract_oddv16qi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (vec_concat:V16QI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7) + (const_int 9)(const_int 11)(const_int 13)(const_int 15)])) + (vec_select:V8QI + (match_operand:V16QI 2 "spu_reg_operand" "r") + (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7) + (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_highv4sf" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (vec_select:V4SF + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)])) + (vec_select:V2SF + (match_operand:V4SF 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)]))) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_lowv4sf" + [(set (match_operand:V4SF 0 "spu_reg_operand" "=r") + (vec_select:V4SF + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "spu_reg_operand" "r") + (parallel [(const_int 2)(const_int 3)])) + (vec_select:V2SF + (match_operand:V4SF 2 "spu_reg_operand" "r") + (parallel [(const_int 2)(const_int 3)]))) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_highv4si" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (vec_select:V4SI + (vec_concat:V4SI + (vec_select:V2SI + (match_operand:V4SI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)])) + (vec_select:V2SI + (match_operand:V4SI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)]))) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_lowv4si" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (vec_select:V4SI + (vec_concat:V4SI + (vec_select:V2SI + (match_operand:V4SI 1 "spu_reg_operand" "r") + (parallel [(const_int 2)(const_int 3)])) + (vec_select:V2SI + (match_operand:V4SI 2 "spu_reg_operand" "r") + (parallel [(const_int 2)(const_int 3)]))) + (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_highv8hi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (vec_select:V8HI + (vec_concat:V8HI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])) + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))) + (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5) + (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x01, 0x10, 0x11, + 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, + 0x06, 0x07, 0x16, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; + }") + +(define_expand "vec_interleave_lowv8hi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (vec_select:V8HI + (vec_concat:V8HI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])) + (vec_select:V4HI + (match_operand:V8HI 2 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) + (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5) + (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x09, 0x18, 0x19, + 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, + 0x0E, 0x0F, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_highv16qi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (vec_select:V16QI + (vec_concat:V16QI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)])) + (vec_select:V8QI + (match_operand:V16QI 2 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))) + (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9) + (const_int 2)(const_int 10)(const_int 3)(const_int 11) + (const_int 4)(const_int 12)(const_int 5)(const_int 13) + (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_interleave_lowv16qi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (vec_select:V16QI + (vec_concat:V16QI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)])) + (vec_select:V8QI + (match_operand:V16QI 2 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))) + (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9) + (const_int 2)(const_int 10)(const_int 3)(const_int 11) + (const_int 4)(const_int 12)(const_int 5)(const_int 13) + (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))] + + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + DONE; +}") + +(define_expand "vec_pack_trunc_v8hi" + [(set (match_operand:V16QI 0 "spu_reg_operand" "=r") + (vec_concat:V16QI + (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r")) + (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))] + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + + DONE; +}") + +(define_expand "vec_pack_trunc_v4si" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (vec_concat:V8HI + (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r")) + (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))] + "" + " +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask)); + + DONE; +}") + +(define_insn "stack_protect_set" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "" + "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2" + [(set_attr "length" "12") + (set_attr "type" "multi1")] +) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx compare_result; + rtx bcomp, loc_ref; + + compare_result = gen_reg_rtx (SImode); + + emit_insn (gen_stack_protect_test_si (compare_result, + operands[0], + operands[1])); + + bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx); + + loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]); + + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, + loc_ref, pc_rtx))); + + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (match_operand:SI 0 "spu_reg_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 3 "=&r") (const_int 0))] + "" + "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3" + [(set_attr "length" "16") + (set_attr "type" "multi1")] +) + Index: divv2df3.c =================================================================== --- divv2df3.c (nonexistent) +++ divv2df3.c (revision 384) @@ -0,0 +1,195 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include + +vector double __divv2df3 (vector double a_in, vector double b_in); + +/* __divv2df3 divides the vector dividend a by the vector divisor b and + returns the resulting vector quotient. Maximum error about 0.5 ulp + over entire double range including denorms, compared to true result + in round-to-nearest rounding mode. Handles Inf or NaN operands and + results correctly. */ + +vector double +__divv2df3 (vector double a_in, vector double b_in) +{ + /* Variables */ + vec_int4 exp, exp_bias; + vec_uint4 no_underflow, overflow; + vec_float4 mant_bf, inv_bf; + vec_ullong2 exp_a, exp_b; + vec_ullong2 a_nan, a_zero, a_inf, a_denorm, a_denorm0; + vec_ullong2 b_nan, b_zero, b_inf, b_denorm, b_denorm0; + vec_ullong2 nan; + vec_uint4 a_exp, b_exp; + vec_ullong2 a_mant_0, b_mant_0; + vec_ullong2 a_exp_1s, b_exp_1s; + vec_ullong2 sign_exp_mask; + + vec_double2 a, b; + vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult; + + /* Constants */ + vec_uint4 exp_mask_u32 = spu_splats((unsigned int)0x7FF00000); + vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3, 8, 9,10,11, 8,9,10,11}; + vec_uchar16 swap_32 = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; + vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL); + vec_ullong2 sign_mask = spu_splats(0x8000000000000000ULL); + vec_float4 onef = spu_splats(1.0f); + vec_double2 one = spu_splats(1.0); + vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL); + + sign_exp_mask = spu_or(sign_mask, exp_mask); + + /* Extract the floating point components from each of the operands including + * exponent and mantissa. + */ + a_exp = (vec_uint4)spu_and((vec_uint4)a_in, exp_mask_u32); + a_exp = spu_shuffle(a_exp, a_exp, splat_hi); + b_exp = (vec_uint4)spu_and((vec_uint4)b_in, exp_mask_u32); + b_exp = spu_shuffle(b_exp, b_exp, splat_hi); + + a_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)a_in, sign_exp_mask), 0); + a_mant_0 = spu_and(a_mant_0, spu_shuffle(a_mant_0, a_mant_0, swap_32)); + + b_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)b_in, sign_exp_mask), 0); + b_mant_0 = spu_and(b_mant_0, spu_shuffle(b_mant_0, b_mant_0, swap_32)); + + a_exp_1s = (vec_ullong2)spu_cmpeq(a_exp, exp_mask_u32); + b_exp_1s = (vec_ullong2)spu_cmpeq(b_exp, exp_mask_u32); + + /* Identify all possible special values that must be accomodated including: + * +-denorm, +-0, +-infinity, and NaNs. + */ + a_denorm0= (vec_ullong2)spu_cmpeq(a_exp, 0); + a_nan = spu_andc(a_exp_1s, a_mant_0); + a_zero = spu_and (a_denorm0, a_mant_0); + a_inf = spu_and (a_exp_1s, a_mant_0); + a_denorm = spu_andc(a_denorm0, a_zero); + + b_denorm0= (vec_ullong2)spu_cmpeq(b_exp, 0); + b_nan = spu_andc(b_exp_1s, b_mant_0); + b_zero = spu_and (b_denorm0, b_mant_0); + b_inf = spu_and (b_exp_1s, b_mant_0); + b_denorm = spu_andc(b_denorm0, b_zero); + + /* Scale denorm inputs to into normalized numbers by conditionally scaling the + * input parameters. + */ + a = spu_sub(spu_or(a_in, exp_53), spu_sel(exp_53, a_in, sign_mask)); + a = spu_sel(a_in, a, a_denorm); + + b = spu_sub(spu_or(b_in, exp_53), spu_sel(exp_53, b_in, sign_mask)); + b = spu_sel(b_in, b, b_denorm); + + /* Extract the divisor and dividend exponent and force parameters into the signed + * range [1.0,2.0) or [-1.0,2.0). + */ + exp_a = spu_and((vec_ullong2)a, exp_mask); + exp_b = spu_and((vec_ullong2)b, exp_mask); + + mant_a = spu_sel(a, one, (vec_ullong2)exp_mask); + mant_b = spu_sel(b, one, (vec_ullong2)exp_mask); + + /* Approximate the single reciprocal of b by using + * the single precision reciprocal estimate followed by one + * single precision iteration of Newton-Raphson. + */ + mant_bf = spu_roundtf(mant_b); + inv_bf = spu_re(mant_bf); + inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf); + + /* Perform 2 more Newton-Raphson iterations in double precision. The + * result (q1) is in the range (0.5, 2.0). + */ + inv_b = spu_extend(inv_bf); + inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b); + q0 = spu_mul(mant_a, inv_b); + q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0); + + /* Determine the exponent correction factor that must be applied + * to q1 by taking into account the exponent of the normalized inputs + * and the scale factors that were applied to normalize them. + */ + exp = spu_rlmaska(spu_sub((vec_int4)exp_a, (vec_int4)exp_b), -20); + exp = spu_add(exp, (vec_int4)spu_add(spu_and((vec_int4)a_denorm, -0x34), spu_and((vec_int4)b_denorm, 0x34))); + + /* Bias the quotient exponent depending on the sign of the exponent correction + * factor so that a single multiplier will ensure the entire double precision + * domain (including denorms) can be achieved. + * + * exp bias q1 adjust exp + * ===== ======== ========== + * positive 2^+65 -65 + * negative 2^-64 +64 + */ + exp_bias = spu_xor(spu_rlmaska(exp, -31), 64); + exp = spu_sub(exp, exp_bias); + + q1 = spu_sel(q1, (vec_double2)spu_add((vec_int4)q1, spu_sl(exp_bias, 20)), exp_mask); + + /* Compute a multiplier (mult) to applied to the quotient (q1) to produce the + * expected result. On overflow, clamp the multiplier to the maximum non-infinite + * number in case the rounding mode is not round-to-nearest. + */ + exp = spu_add(exp, 0x3FF); + no_underflow = spu_cmpgt(exp, 0); + overflow = spu_cmpgt(exp, 0x7FE); + exp = spu_and(spu_sl(exp, 20), (vec_int4)no_underflow); + exp = spu_and(exp, (vec_int4)exp_mask); + + mult = spu_sel((vec_double2)exp, (vec_double2)(spu_add((vec_uint4)exp_mask, -1)), (vec_ullong2)overflow); + + /* Handle special value conditions. These include: + * + * 1) IF either operand is a NaN OR both operands are 0 or INFINITY THEN a NaN + * results. + * 2) ELSE IF the dividend is an INFINITY OR the divisor is 0 THEN a INFINITY results. + * 3) ELSE IF the dividend is 0 OR the divisor is INFINITY THEN a 0 results. + */ + mult = spu_andc(mult, (vec_double2)spu_or(a_zero, b_inf)); + mult = spu_sel(mult, (vec_double2)exp_mask, spu_or(a_inf, b_zero)); + + nan = spu_or(a_nan, b_nan); + nan = spu_or(nan, spu_and(a_zero, b_zero)); + nan = spu_or(nan, spu_and(a_inf, b_inf)); + + mult = spu_or(mult, (vec_double2)nan); + + /* Scale the final quotient */ + + q2 = spu_mul(q1, mult); + + return (q2); +} + + +/* We use the same function for vector and scalar division. Provide the + scalar entry point as an alias. */ +double __divdf3 (double a, double b) + __attribute__ ((__alias__ ("__divv2df3"))); + +/* Some toolchain builds used the __fast_divdf3 name for this helper function. + Provide this as another alternate entry point for compatibility. */ +double __fast_divdf3 (double a, double b) + __attribute__ ((__alias__ ("__divv2df3"))); +
divv2df3.c Property changes : Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Index: spu-modes.def =================================================================== --- spu-modes.def (nonexistent) +++ spu-modes.def (revision 384) @@ -0,0 +1,29 @@ +/* Copyright (C) 2006, 2007 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Vector modes. */ +VECTOR_MODES (INT, 2); /* V2QI */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ + + +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ + +/* cse_insn needs an INT_MODE larger than WORD_MODE, otherwise some + parts of it will go into an infinite loop. */ +INT_MODE (OI, 32);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.