URL
https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk
Subversion Repositories openrisc_me
Compare Revisions
- This comparison shows the changes necessary to convert path
/openrisc/trunk/gnu-src/gcc-4.2.2/gcc/config/xtensa
- from Rev 38 to Rev 154
- ↔ Reverse comparison
Rev 38 → Rev 154
/crti.asm
0,0 → 1,56
# Start .init and .fini sections. |
# Copyright (C) 2003 Free Software Foundation, Inc. |
# |
# This file is free software; you can redistribute it and/or modify it |
# under the terms of the GNU General Public License as published by |
# the Free Software Foundation; either version 2, or (at your option) |
# any later version. |
# |
# In addition to the permissions in the GNU General Public License, the |
# Free Software Foundation gives you unlimited permission to link the |
# compiled version of this file into combinations with other programs, |
# and to distribute those combinations without any restriction coming |
# from the use of this file. (The General Public License restrictions |
# do apply in other respects; for example, they cover modification of |
# the file, and distribution when not linked into a combine |
# executable.) |
# |
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or |
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
# for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with GCC; see the file COPYING. If not, write to the Free |
# Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
# 02110-1301, USA. |
|
# This file just makes a stack frame for the contents of the .fini and |
# .init sections. Users may put any desired instructions in those |
# sections. |
|
#include "xtensa-config.h" |
|
.section .init |
.globl _init |
.type _init,@function |
.align 4 |
_init: |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
entry sp, 64 |
#else |
addi sp, sp, -32 |
s32i a0, sp, 0 |
#endif |
|
.section .fini |
.globl _fini |
.type _fini,@function |
.align 4 |
_fini: |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
entry sp, 64 |
#else |
addi sp, sp, -32 |
s32i a0, sp, 0 |
#endif |
/predicates.md
0,0 → 1,156
;; Predicate definitions for Xtensa. |
;; Copyright (C) 2005, 2007 Free Software Foundation, Inc. |
;; |
;; This file is part of GCC. |
;; |
;; GCC is free software; you can redistribute it and/or modify |
;; it under the terms of the GNU General Public License as published by |
;; the Free Software Foundation; either version 3, or (at your option) |
;; any later version. |
;; |
;; GCC is distributed in the hope that it will be useful, |
;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
;; GNU General Public License for more details. |
;; |
;; You should have received a copy of the GNU General Public License |
;; along with GCC; see the file COPYING3. If not see |
;; <http://www.gnu.org/licenses/>. |
|
(define_predicate "add_operand" |
(ior (and (match_code "const_int") |
(match_test "xtensa_simm8 (INTVAL (op)) |
|| xtensa_simm8x256 (INTVAL (op))")) |
(match_operand 0 "register_operand"))) |
|
(define_predicate "arith_operand" |
(ior (and (match_code "const_int") |
(match_test "xtensa_simm8 (INTVAL (op))")) |
(match_operand 0 "register_operand"))) |
|
;; Non-immediate operand excluding the constant pool. |
(define_predicate "nonimmed_operand" |
(ior (and (match_operand 0 "memory_operand") |
(match_test "!constantpool_address_p (XEXP (op, 0))")) |
(match_operand 0 "register_operand"))) |
|
;; Memory operand excluding the constant pool. |
(define_predicate "mem_operand" |
(and (match_operand 0 "memory_operand") |
(match_test "!constantpool_address_p (XEXP (op, 0))"))) |
|
(define_predicate "mask_operand" |
(ior (and (match_code "const_int") |
(match_test "xtensa_mask_immediate (INTVAL (op))")) |
(match_operand 0 "register_operand"))) |
|
(define_predicate "extui_fldsz_operand" |
(and (match_code "const_int") |
(match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) |
|
(define_predicate "sext_operand" |
(if_then_else (match_test "TARGET_SEXT") |
(match_operand 0 "nonimmed_operand") |
(match_operand 0 "mem_operand"))) |
|
(define_predicate "sext_fldsz_operand" |
(and (match_code "const_int") |
(match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) |
|
(define_predicate "lsbitnum_operand" |
(and (match_code "const_int") |
(match_test "BITS_BIG_ENDIAN |
? (INTVAL (op) == BITS_PER_WORD - 1) |
: (INTVAL (op) == 0)"))) |
|
(define_predicate "branch_operand" |
(ior (and (match_code "const_int") |
(match_test "xtensa_b4const_or_zero (INTVAL (op))")) |
(match_operand 0 "register_operand"))) |
|
(define_predicate "ubranch_operand" |
(ior (and (match_code "const_int") |
(match_test "xtensa_b4constu (INTVAL (op))")) |
(match_operand 0 "register_operand"))) |
|
(define_predicate "call_insn_operand" |
(match_code "const_int,const,symbol_ref,reg") |
{ |
if ((GET_CODE (op) == REG) |
&& (op != arg_pointer_rtx) |
&& ((REGNO (op) < FRAME_POINTER_REGNUM) |
|| (REGNO (op) > LAST_VIRTUAL_REGISTER))) |
return true; |
|
if (CONSTANT_ADDRESS_P (op)) |
{ |
/* Direct calls only allowed to static functions with PIC. */ |
if (flag_pic) |
{ |
tree callee, callee_sec, caller_sec; |
|
if (GET_CODE (op) != SYMBOL_REF |
|| !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op)) |
return false; |
|
/* Don't attempt a direct call if the callee is known to be in |
a different section, since there's a good chance it will be |
out of range. */ |
|
if (flag_function_sections |
|| DECL_ONE_ONLY (current_function_decl)) |
return false; |
caller_sec = DECL_SECTION_NAME (current_function_decl); |
callee = SYMBOL_REF_DECL (op); |
if (callee) |
{ |
if (DECL_ONE_ONLY (callee)) |
return false; |
callee_sec = DECL_SECTION_NAME (callee); |
if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE)) |
|| (caller_sec != NULL_TREE |
&& strcmp (TREE_STRING_POINTER (caller_sec), |
TREE_STRING_POINTER (callee_sec)) != 0)) |
return false; |
} |
else if (caller_sec != NULL_TREE) |
return false; |
} |
return true; |
} |
|
return false; |
}) |
|
(define_predicate "move_operand" |
(ior |
(ior (match_operand 0 "register_operand") |
(match_operand 0 "memory_operand")) |
(ior (and (match_code "const_int") |
(match_test "GET_MODE_CLASS (mode) == MODE_INT |
&& xtensa_simm12b (INTVAL (op))")) |
(and (match_code "const_int,const_double,const,symbol_ref,label_ref") |
(match_test "TARGET_CONST16 && CONSTANT_P (op) |
&& GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))))) |
|
;; Accept the floating point constant 1 in the appropriate mode. |
(define_predicate "const_float_1_operand" |
(match_code "const_double") |
{ |
REAL_VALUE_TYPE d; |
REAL_VALUE_FROM_CONST_DOUBLE (d, op); |
return REAL_VALUES_EQUAL (d, dconst1); |
}) |
|
(define_predicate "fpmem_offset_operand" |
(and (match_code "const_int") |
(match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) |
|
(define_predicate "branch_operator" |
(match_code "eq,ne,lt,ge")) |
|
(define_predicate "ubranch_operator" |
(match_code "ltu,geu")) |
|
(define_predicate "boolean_operator" |
(match_code "eq,ne")) |
/xtensa.c
0,0 → 1,2690
/* Subroutines for insn-output.c for Tensilica's Xtensa architecture. |
Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING3. If not see |
<http://www.gnu.org/licenses/>. */ |
|
#include "config.h" |
#include "system.h" |
#include "coretypes.h" |
#include "tm.h" |
#include "rtl.h" |
#include "regs.h" |
#include "hard-reg-set.h" |
#include "basic-block.h" |
#include "real.h" |
#include "insn-config.h" |
#include "conditions.h" |
#include "insn-flags.h" |
#include "insn-attr.h" |
#include "insn-codes.h" |
#include "recog.h" |
#include "output.h" |
#include "tree.h" |
#include "expr.h" |
#include "flags.h" |
#include "reload.h" |
#include "tm_p.h" |
#include "function.h" |
#include "toplev.h" |
#include "optabs.h" |
#include "libfuncs.h" |
#include "ggc.h" |
#include "target.h" |
#include "target-def.h" |
#include "langhooks.h" |
#include "tree-gimple.h" |
|
|
/* Enumeration for all of the relational tests, so that we can build |
arrays indexed by the test type, and not worry about the order |
of EQ, NE, etc. */ |
|
enum internal_test |
{ |
ITEST_EQ, |
ITEST_NE, |
ITEST_GT, |
ITEST_GE, |
ITEST_LT, |
ITEST_LE, |
ITEST_GTU, |
ITEST_GEU, |
ITEST_LTU, |
ITEST_LEU, |
ITEST_MAX |
}; |
|
/* Cached operands, and operator to compare for use in set/branch on |
condition codes. */ |
rtx branch_cmp[2]; |
|
/* what type of branch to use */ |
enum cmp_type branch_type; |
|
/* Array giving truth value on whether or not a given hard register |
can support a given mode. */ |
char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; |
|
/* Current frame size calculated by compute_frame_size. */ |
unsigned xtensa_current_frame_size; |
|
/* Largest block move to handle in-line. */ |
#define LARGEST_MOVE_RATIO 15 |
|
/* Define the structure for the machine field in struct function. */ |
struct machine_function GTY(()) |
{ |
int accesses_prev_frame; |
bool need_a7_copy; |
bool vararg_a7; |
rtx vararg_a7_copy; |
rtx set_frame_ptr_insn; |
}; |
|
/* Vector, indexed by hard register number, which contains 1 for a |
register that is allowable in a candidate for leaf function |
treatment. */ |
|
const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = |
{ |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
1, 1, 1, |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
1 |
}; |
|
/* Map hard register number to register class */ |
const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] = |
{ |
RL_REGS, SP_REG, RL_REGS, RL_REGS, |
RL_REGS, RL_REGS, RL_REGS, GR_REGS, |
RL_REGS, RL_REGS, RL_REGS, RL_REGS, |
RL_REGS, RL_REGS, RL_REGS, RL_REGS, |
AR_REGS, AR_REGS, BR_REGS, |
FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
ACC_REG, |
}; |
|
/* Map register constraint character to register class. */ |
enum reg_class xtensa_char_to_class[256] = |
{ |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
NO_REGS, NO_REGS, NO_REGS, NO_REGS, |
}; |
|
static enum internal_test map_test_to_internal_test (enum rtx_code); |
static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); |
static rtx gen_float_relational (enum rtx_code, rtx, rtx); |
static rtx gen_conditional_move (rtx); |
static rtx fixup_subreg_mem (rtx); |
static struct machine_function * xtensa_init_machine_status (void); |
static bool xtensa_return_in_msb (tree); |
static void printx (FILE *, signed int); |
static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT); |
static rtx xtensa_builtin_saveregs (void); |
static unsigned int xtensa_multibss_section_type_flags (tree, const char *, |
int) ATTRIBUTE_UNUSED; |
static section *xtensa_select_rtx_section (enum machine_mode, rtx, |
unsigned HOST_WIDE_INT); |
static bool xtensa_rtx_costs (rtx, int, int, int *); |
static tree xtensa_build_builtin_va_list (void); |
static bool xtensa_return_in_memory (tree, tree); |
static tree xtensa_gimplify_va_arg_expr (tree, tree, tree *, tree *); |
|
static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = |
REG_ALLOC_ORDER; |
|
|
/* This macro generates the assembly code for function exit, |
on machines that need it. If FUNCTION_EPILOGUE is not defined |
then individual return instructions are generated for each |
return statement. Args are same as for FUNCTION_PROLOGUE. */ |
|
#undef TARGET_ASM_FUNCTION_EPILOGUE |
#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue |
|
/* These hooks specify assembly directives for creating certain kinds |
of integer object. */ |
|
#undef TARGET_ASM_ALIGNED_SI_OP |
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" |
|
#undef TARGET_ASM_SELECT_RTX_SECTION |
#define TARGET_ASM_SELECT_RTX_SECTION xtensa_select_rtx_section |
|
#undef TARGET_DEFAULT_TARGET_FLAGS |
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_FUSED_MADD) |
|
#undef TARGET_RTX_COSTS |
#define TARGET_RTX_COSTS xtensa_rtx_costs |
#undef TARGET_ADDRESS_COST |
#define TARGET_ADDRESS_COST hook_int_rtx_0 |
|
#undef TARGET_BUILD_BUILTIN_VA_LIST |
#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list |
|
#undef TARGET_PROMOTE_FUNCTION_ARGS |
#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true |
#undef TARGET_PROMOTE_FUNCTION_RETURN |
#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true |
#undef TARGET_PROMOTE_PROTOTYPES |
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true |
|
#undef TARGET_RETURN_IN_MEMORY |
#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory |
#undef TARGET_SPLIT_COMPLEX_ARG |
#define TARGET_SPLIT_COMPLEX_ARG hook_bool_tree_true |
#undef TARGET_MUST_PASS_IN_STACK |
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size |
|
#undef TARGET_EXPAND_BUILTIN_SAVEREGS |
#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs |
#undef TARGET_GIMPLIFY_VA_ARG_EXPR |
#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr |
|
#undef TARGET_RETURN_IN_MSB |
#define TARGET_RETURN_IN_MSB xtensa_return_in_msb |
|
struct gcc_target targetm = TARGET_INITIALIZER; |
|
|
/* |
* Functions to test Xtensa immediate operand validity. |
*/ |
|
bool |
xtensa_simm8 (HOST_WIDE_INT v) |
{ |
return v >= -128 && v <= 127; |
} |
|
|
bool |
xtensa_simm8x256 (HOST_WIDE_INT v) |
{ |
return (v & 255) == 0 && (v >= -32768 && v <= 32512); |
} |
|
|
bool |
xtensa_simm12b (HOST_WIDE_INT v) |
{ |
return v >= -2048 && v <= 2047; |
} |
|
|
static bool |
xtensa_uimm8 (HOST_WIDE_INT v) |
{ |
return v >= 0 && v <= 255; |
} |
|
|
static bool |
xtensa_uimm8x2 (HOST_WIDE_INT v) |
{ |
return (v & 1) == 0 && (v >= 0 && v <= 510); |
} |
|
|
static bool |
xtensa_uimm8x4 (HOST_WIDE_INT v) |
{ |
return (v & 3) == 0 && (v >= 0 && v <= 1020); |
} |
|
|
static bool |
xtensa_b4const (HOST_WIDE_INT v) |
{ |
switch (v) |
{ |
case -1: |
case 1: |
case 2: |
case 3: |
case 4: |
case 5: |
case 6: |
case 7: |
case 8: |
case 10: |
case 12: |
case 16: |
case 32: |
case 64: |
case 128: |
case 256: |
return true; |
} |
return false; |
} |
|
|
bool |
xtensa_b4const_or_zero (HOST_WIDE_INT v) |
{ |
if (v == 0) |
return true; |
return xtensa_b4const (v); |
} |
|
|
bool |
xtensa_b4constu (HOST_WIDE_INT v) |
{ |
switch (v) |
{ |
case 32768: |
case 65536: |
case 2: |
case 3: |
case 4: |
case 5: |
case 6: |
case 7: |
case 8: |
case 10: |
case 12: |
case 16: |
case 32: |
case 64: |
case 128: |
case 256: |
return true; |
} |
return false; |
} |
|
|
bool |
xtensa_mask_immediate (HOST_WIDE_INT v) |
{ |
#define MAX_MASK_SIZE 16 |
int mask_size; |
|
for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) |
{ |
if ((v & 1) == 0) |
return false; |
v = v >> 1; |
if (v == 0) |
return true; |
} |
|
return false; |
} |
|
|
bool |
xtensa_const_ok_for_letter_p (HOST_WIDE_INT v, int c) |
{ |
switch (c) |
{ |
case 'I': return xtensa_simm12b (v); |
case 'J': return xtensa_simm8 (v); |
case 'K': return (v == 0 || xtensa_b4const (v)); |
case 'L': return xtensa_b4constu (v); |
case 'M': return (v >= -32 && v <= 95); |
case 'N': return xtensa_simm8x256 (v); |
case 'O': return (v == -1 || (v >= 1 && v <= 15)); |
case 'P': return xtensa_mask_immediate (v); |
default: break; |
} |
return false; |
} |
|
|
/* This is just like the standard true_regnum() function except that it |
works even when reg_renumber is not initialized. */ |
|
int |
xt_true_regnum (rtx x) |
{ |
if (GET_CODE (x) == REG) |
{ |
if (reg_renumber |
&& REGNO (x) >= FIRST_PSEUDO_REGISTER |
&& reg_renumber[REGNO (x)] >= 0) |
return reg_renumber[REGNO (x)]; |
return REGNO (x); |
} |
if (GET_CODE (x) == SUBREG) |
{ |
int base = xt_true_regnum (SUBREG_REG (x)); |
if (base >= 0 && base < FIRST_PSEUDO_REGISTER) |
return base + subreg_regno_offset (REGNO (SUBREG_REG (x)), |
GET_MODE (SUBREG_REG (x)), |
SUBREG_BYTE (x), GET_MODE (x)); |
} |
return -1; |
} |
|
|
int |
xtensa_valid_move (enum machine_mode mode, rtx *operands) |
{ |
/* Either the destination or source must be a register, and the |
MAC16 accumulator doesn't count. */ |
|
if (register_operand (operands[0], mode)) |
{ |
int dst_regnum = xt_true_regnum (operands[0]); |
|
/* The stack pointer can only be assigned with a MOVSP opcode. */ |
if (dst_regnum == STACK_POINTER_REGNUM) |
return (mode == SImode |
&& register_operand (operands[1], mode) |
&& !ACC_REG_P (xt_true_regnum (operands[1]))); |
|
if (!ACC_REG_P (dst_regnum)) |
return true; |
} |
if (register_operand (operands[1], mode)) |
{ |
int src_regnum = xt_true_regnum (operands[1]); |
if (!ACC_REG_P (src_regnum)) |
return true; |
} |
return FALSE; |
} |
|
|
int |
smalloffset_mem_p (rtx op) |
{ |
if (GET_CODE (op) == MEM) |
{ |
rtx addr = XEXP (op, 0); |
if (GET_CODE (addr) == REG) |
return REG_OK_FOR_BASE_P (addr); |
if (GET_CODE (addr) == PLUS) |
{ |
rtx offset = XEXP (addr, 0); |
HOST_WIDE_INT val; |
if (GET_CODE (offset) != CONST_INT) |
offset = XEXP (addr, 1); |
if (GET_CODE (offset) != CONST_INT) |
return FALSE; |
|
val = INTVAL (offset); |
return (val & 3) == 0 && (val >= 0 && val <= 60); |
} |
} |
return FALSE; |
} |
|
|
int |
constantpool_address_p (rtx addr) |
{ |
rtx sym = addr; |
|
if (GET_CODE (addr) == CONST) |
{ |
rtx offset; |
|
/* Only handle (PLUS (SYM, OFFSET)) form. */ |
addr = XEXP (addr, 0); |
if (GET_CODE (addr) != PLUS) |
return FALSE; |
|
/* Make sure the address is word aligned. */ |
offset = XEXP (addr, 1); |
if ((GET_CODE (offset) != CONST_INT) |
|| ((INTVAL (offset) & 3) != 0)) |
return FALSE; |
|
sym = XEXP (addr, 0); |
} |
|
if ((GET_CODE (sym) == SYMBOL_REF) |
&& CONSTANT_POOL_ADDRESS_P (sym)) |
return TRUE; |
return FALSE; |
} |
|
|
int |
constantpool_mem_p (rtx op) |
{ |
if (GET_CODE (op) == SUBREG) |
op = SUBREG_REG (op); |
if (GET_CODE (op) == MEM) |
return constantpool_address_p (XEXP (op, 0)); |
return FALSE; |
} |
|
|
void |
xtensa_extend_reg (rtx dst, rtx src) |
{ |
rtx temp = gen_reg_rtx (SImode); |
rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src))); |
|
/* Generate paradoxical subregs as needed so that the modes match. */ |
src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0); |
dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0); |
|
emit_insn (gen_ashlsi3 (temp, src, shift)); |
emit_insn (gen_ashrsi3 (dst, temp, shift)); |
} |
|
|
bool |
xtensa_mem_offset (unsigned v, enum machine_mode mode) |
{ |
switch (mode) |
{ |
case BLKmode: |
/* Handle the worst case for block moves. See xtensa_expand_block_move |
where we emit an optimized block move operation if the block can be |
moved in < "move_ratio" pieces. The worst case is when the block is |
aligned but has a size of (3 mod 4) (does this happen?) so that the |
last piece requires a byte load/store. */ |
return (xtensa_uimm8 (v) |
&& xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO)); |
|
case QImode: |
return xtensa_uimm8 (v); |
|
case HImode: |
return xtensa_uimm8x2 (v); |
|
case DFmode: |
return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4)); |
|
default: |
break; |
} |
|
return xtensa_uimm8x4 (v); |
} |
|
|
bool |
xtensa_extra_constraint (rtx op, int c) |
{ |
/* Allow pseudo registers during reload. */ |
if (GET_CODE (op) != MEM) |
return (c >= 'R' && c <= 'U' |
&& reload_in_progress && GET_CODE (op) == REG |
&& REGNO (op) >= FIRST_PSEUDO_REGISTER); |
|
switch (c) |
{ |
case 'R': return smalloffset_mem_p (op); |
case 'T': return !TARGET_CONST16 && constantpool_mem_p (op); |
case 'U': return !constantpool_mem_p (op); |
default: break; |
} |
return false; |
} |
|
|
/* Make normal rtx_code into something we can index from an array. */ |
|
static enum internal_test |
map_test_to_internal_test (enum rtx_code test_code) |
{ |
enum internal_test test = ITEST_MAX; |
|
switch (test_code) |
{ |
default: break; |
case EQ: test = ITEST_EQ; break; |
case NE: test = ITEST_NE; break; |
case GT: test = ITEST_GT; break; |
case GE: test = ITEST_GE; break; |
case LT: test = ITEST_LT; break; |
case LE: test = ITEST_LE; break; |
case GTU: test = ITEST_GTU; break; |
case GEU: test = ITEST_GEU; break; |
case LTU: test = ITEST_LTU; break; |
case LEU: test = ITEST_LEU; break; |
} |
|
return test; |
} |
|
|
/* Generate the code to compare two integer values. The return value is |
the comparison expression. */ |
|
static rtx |
gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ |
rtx cmp0, /* first operand to compare */ |
rtx cmp1, /* second operand to compare */ |
int *p_invert /* whether branch needs to reverse test */) |
{ |
struct cmp_info |
{ |
enum rtx_code test_code; /* test code to use in insn */ |
bool (*const_range_p) (HOST_WIDE_INT); /* range check function */ |
int const_add; /* constant to add (convert LE -> LT) */ |
int reverse_regs; /* reverse registers in test */ |
int invert_const; /* != 0 if invert value if cmp1 is constant */ |
int invert_reg; /* != 0 if invert value if cmp1 is register */ |
int unsignedp; /* != 0 for unsigned comparisons. */ |
}; |
|
static struct cmp_info info[ (int)ITEST_MAX ] = { |
|
{ EQ, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* EQ */ |
{ NE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* NE */ |
|
{ LT, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* GT */ |
{ GE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* GE */ |
{ LT, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* LT */ |
{ GE, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* LE */ |
|
{ LTU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* GTU */ |
{ GEU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* GEU */ |
{ LTU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* LTU */ |
{ GEU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* LEU */ |
}; |
|
enum internal_test test; |
enum machine_mode mode; |
struct cmp_info *p_info; |
|
test = map_test_to_internal_test (test_code); |
gcc_assert (test != ITEST_MAX); |
|
p_info = &info[ (int)test ]; |
|
mode = GET_MODE (cmp0); |
if (mode == VOIDmode) |
mode = GET_MODE (cmp1); |
|
/* Make sure we can handle any constants given to us. */ |
if (GET_CODE (cmp1) == CONST_INT) |
{ |
HOST_WIDE_INT value = INTVAL (cmp1); |
unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value; |
|
/* if the immediate overflows or does not fit in the immediate field, |
spill it to a register */ |
|
if ((p_info->unsignedp ? |
(uvalue + p_info->const_add > uvalue) : |
(value + p_info->const_add > value)) != (p_info->const_add > 0)) |
{ |
cmp1 = force_reg (mode, cmp1); |
} |
else if (!(p_info->const_range_p) (value + p_info->const_add)) |
{ |
cmp1 = force_reg (mode, cmp1); |
} |
} |
else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG)) |
{ |
cmp1 = force_reg (mode, cmp1); |
} |
|
/* See if we need to invert the result. */ |
*p_invert = ((GET_CODE (cmp1) == CONST_INT) |
? p_info->invert_const |
: p_info->invert_reg); |
|
/* Comparison to constants, may involve adding 1 to change a LT into LE. |
Comparison between two registers, may involve switching operands. */ |
if (GET_CODE (cmp1) == CONST_INT) |
{ |
if (p_info->const_add != 0) |
cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add); |
|
} |
else if (p_info->reverse_regs) |
{ |
rtx temp = cmp0; |
cmp0 = cmp1; |
cmp1 = temp; |
} |
|
return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); |
} |
|
|
/* Generate the code to compare two float values. The return value is |
the comparison expression. */ |
|
static rtx |
gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ |
rtx cmp0, /* first operand to compare */ |
rtx cmp1 /* second operand to compare */) |
{ |
rtx (*gen_fn) (rtx, rtx, rtx); |
rtx brtmp; |
int reverse_regs, invert; |
|
switch (test_code) |
{ |
case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break; |
case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break; |
case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break; |
case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break; |
case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break; |
case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break; |
default: |
fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); |
reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */ |
} |
|
if (reverse_regs) |
{ |
rtx temp = cmp0; |
cmp0 = cmp1; |
cmp1 = temp; |
} |
|
brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM); |
emit_insn (gen_fn (brtmp, cmp0, cmp1)); |
|
return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx); |
} |
|
|
void |
xtensa_expand_conditional_branch (rtx *operands, enum rtx_code test_code) |
{ |
enum cmp_type type = branch_type; |
rtx cmp0 = branch_cmp[0]; |
rtx cmp1 = branch_cmp[1]; |
rtx cmp; |
int invert; |
rtx label1, label2; |
|
switch (type) |
{ |
case CMP_DF: |
default: |
fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); |
|
case CMP_SI: |
invert = FALSE; |
cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); |
break; |
|
case CMP_SF: |
if (!TARGET_HARD_FLOAT) |
fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); |
invert = FALSE; |
cmp = gen_float_relational (test_code, cmp0, cmp1); |
break; |
} |
|
/* Generate the branch. */ |
|
label1 = gen_rtx_LABEL_REF (VOIDmode, operands[0]); |
label2 = pc_rtx; |
|
if (invert) |
{ |
label2 = label1; |
label1 = pc_rtx; |
} |
|
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, |
gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, |
label1, |
label2))); |
} |
|
|
static rtx |
gen_conditional_move (rtx cmp) |
{ |
enum rtx_code code = GET_CODE (cmp); |
rtx op0 = branch_cmp[0]; |
rtx op1 = branch_cmp[1]; |
|
if (branch_type == CMP_SI) |
{ |
/* Jump optimization calls get_condition() which canonicalizes |
comparisons like (GE x <const>) to (GT x <const-1>). |
Transform those comparisons back to GE, since that is the |
comparison supported in Xtensa. We shouldn't have to |
transform <LE x const> comparisons, because neither |
xtensa_expand_conditional_branch() nor get_condition() will |
produce them. */ |
|
if ((code == GT) && (op1 == constm1_rtx)) |
{ |
code = GE; |
op1 = const0_rtx; |
} |
cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx); |
|
if (boolean_operator (cmp, VOIDmode)) |
{ |
/* Swap the operands to make const0 second. */ |
if (op0 == const0_rtx) |
{ |
op0 = op1; |
op1 = const0_rtx; |
} |
|
/* If not comparing against zero, emit a comparison (subtract). */ |
if (op1 != const0_rtx) |
{ |
op0 = expand_binop (SImode, sub_optab, op0, op1, |
0, 0, OPTAB_LIB_WIDEN); |
op1 = const0_rtx; |
} |
} |
else if (branch_operator (cmp, VOIDmode)) |
{ |
/* Swap the operands to make const0 second. */ |
if (op0 == const0_rtx) |
{ |
op0 = op1; |
op1 = const0_rtx; |
|
switch (code) |
{ |
case LT: code = GE; break; |
case GE: code = LT; break; |
default: gcc_unreachable (); |
} |
} |
|
if (op1 != const0_rtx) |
return 0; |
} |
else |
return 0; |
|
return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); |
} |
|
if (TARGET_HARD_FLOAT && (branch_type == CMP_SF)) |
return gen_float_relational (code, op0, op1); |
|
return 0; |
} |
|
|
int |
xtensa_expand_conditional_move (rtx *operands, int isflt) |
{ |
rtx cmp; |
rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); |
|
if (!(cmp = gen_conditional_move (operands[1]))) |
return 0; |
|
if (isflt) |
gen_fn = (branch_type == CMP_SI |
? gen_movsfcc_internal0 |
: gen_movsfcc_internal1); |
else |
gen_fn = (branch_type == CMP_SI |
? gen_movsicc_internal0 |
: gen_movsicc_internal1); |
|
emit_insn (gen_fn (operands[0], XEXP (cmp, 0), |
operands[2], operands[3], cmp)); |
return 1; |
} |
|
|
int |
xtensa_expand_scc (rtx *operands) |
{ |
rtx dest = operands[0]; |
rtx cmp = operands[1]; |
rtx one_tmp, zero_tmp; |
rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); |
|
if (!(cmp = gen_conditional_move (cmp))) |
return 0; |
|
one_tmp = gen_reg_rtx (SImode); |
zero_tmp = gen_reg_rtx (SImode); |
emit_insn (gen_movsi (one_tmp, const_true_rtx)); |
emit_insn (gen_movsi (zero_tmp, const0_rtx)); |
|
gen_fn = (branch_type == CMP_SI |
? gen_movsicc_internal0 |
: gen_movsicc_internal1); |
emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp)); |
return 1; |
} |
|
|
/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1]. MODE is |
for the output, i.e., the input operands are twice as big as MODE. */ |
|
void |
xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode) |
{ |
switch (GET_CODE (operands[1])) |
{ |
case REG: |
operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1); |
operands[2] = gen_rtx_REG (mode, REGNO (operands[1])); |
break; |
|
case MEM: |
operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode)); |
operands[2] = adjust_address (operands[1], mode, 0); |
break; |
|
case CONST_INT: |
case CONST_DOUBLE: |
split_double (operands[1], &operands[2], &operands[3]); |
break; |
|
default: |
gcc_unreachable (); |
} |
|
switch (GET_CODE (operands[0])) |
{ |
case REG: |
operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1); |
operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); |
break; |
|
case MEM: |
operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode)); |
operands[0] = adjust_address (operands[0], mode, 0); |
break; |
|
default: |
gcc_unreachable (); |
} |
} |
|
|
/* Emit insns to move operands[1] into operands[0]. |
Return 1 if we have written out everything that needs to be done to |
do the move. Otherwise, return 0 and the caller will emit the move |
normally. */ |
|
int |
xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode) |
{ |
if (CONSTANT_P (operands[1]) |
&& (GET_CODE (operands[1]) != CONST_INT |
|| !xtensa_simm12b (INTVAL (operands[1])))) |
{ |
if (!TARGET_CONST16) |
operands[1] = force_const_mem (SImode, operands[1]); |
|
/* PC-relative loads are always SImode, and CONST16 is only |
supported in the movsi pattern, so add a SUBREG for any other |
(smaller) mode. */ |
|
if (mode != SImode) |
{ |
if (register_operand (operands[0], mode)) |
{ |
operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); |
emit_move_insn (operands[0], operands[1]); |
return 1; |
} |
else |
{ |
operands[1] = force_reg (SImode, operands[1]); |
operands[1] = gen_lowpart_SUBREG (mode, operands[1]); |
} |
} |
} |
|
if (!(reload_in_progress | reload_completed) |
&& !xtensa_valid_move (mode, operands)) |
operands[1] = force_reg (mode, operands[1]); |
|
operands[1] = xtensa_copy_incoming_a7 (operands[1]); |
|
/* During reload we don't want to emit (subreg:X (mem:Y)) since that |
instruction won't be recognized after reload, so we remove the |
subreg and adjust mem accordingly. */ |
if (reload_in_progress) |
{ |
operands[0] = fixup_subreg_mem (operands[0]); |
operands[1] = fixup_subreg_mem (operands[1]); |
} |
return 0; |
} |
|
|
static rtx |
fixup_subreg_mem (rtx x) |
{ |
if (GET_CODE (x) == SUBREG |
&& GET_CODE (SUBREG_REG (x)) == REG |
&& REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER) |
{ |
rtx temp = |
gen_rtx_SUBREG (GET_MODE (x), |
reg_equiv_mem [REGNO (SUBREG_REG (x))], |
SUBREG_BYTE (x)); |
x = alter_subreg (&temp); |
} |
return x; |
} |
|
|
/* Check if an incoming argument in a7 is expected to be used soon and |
if OPND is a register or register pair that includes a7. If so, |
create a new pseudo and copy a7 into that pseudo at the very |
beginning of the function, followed by the special "set_frame_ptr" |
unspec_volatile insn. The return value is either the original |
operand, if it is not a7, or the new pseudo containing a copy of |
the incoming argument. This is necessary because the register |
allocator will ignore conflicts with a7 and may either assign some |
other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering |
the incoming argument in a7. By copying the argument out of a7 as |
the very first thing, and then immediately following that with an |
unspec_volatile to keep the scheduler away, we should avoid any |
problems. Putting the set_frame_ptr insn at the beginning, with |
only the a7 copy before it, also makes it easier for the prologue |
expander to initialize the frame pointer after the a7 copy and to |
fix up the a7 copy to use the stack pointer instead of the frame |
pointer. */ |
|
rtx |
xtensa_copy_incoming_a7 (rtx opnd) |
{ |
rtx entry_insns = 0; |
rtx reg, tmp; |
enum machine_mode mode; |
|
if (!cfun->machine->need_a7_copy) |
return opnd; |
|
/* This function should never be called again once a7 has been copied. */ |
gcc_assert (!cfun->machine->set_frame_ptr_insn); |
|
mode = GET_MODE (opnd); |
|
/* The operand using a7 may come in a later instruction, so just return |
the original operand if it doesn't use a7. */ |
reg = opnd; |
if (GET_CODE (reg) == SUBREG) |
{ |
gcc_assert (SUBREG_BYTE (reg) == 0); |
reg = SUBREG_REG (reg); |
} |
if (GET_CODE (reg) != REG |
|| REGNO (reg) > A7_REG |
|| REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG) |
return opnd; |
|
/* 1-word args will always be in a7; 2-word args in a6/a7. */ |
gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG); |
|
cfun->machine->need_a7_copy = false; |
|
/* Copy a7 to a new pseudo at the function entry. Use gen_raw_REG to |
create the REG for a7 so that hard_frame_pointer_rtx is not used. */ |
|
start_sequence (); |
tmp = gen_reg_rtx (mode); |
|
switch (mode) |
{ |
case DFmode: |
case DImode: |
emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0), |
gen_rtx_REG (SImode, A7_REG - 1))); |
emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4), |
gen_raw_REG (SImode, A7_REG))); |
break; |
case SFmode: |
emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG))); |
break; |
case SImode: |
emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG))); |
break; |
case HImode: |
emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG))); |
break; |
case QImode: |
emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG))); |
break; |
default: |
gcc_unreachable (); |
} |
|
cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ()); |
entry_insns = get_insns (); |
end_sequence (); |
|
if (cfun->machine->vararg_a7) |
{ |
/* This is called from within builtin_saveregs, which will insert the |
saveregs code at the function entry, ahead of anything placed at |
the function entry now. Instead, save the sequence to be inserted |
at the beginning of the saveregs code. */ |
cfun->machine->vararg_a7_copy = entry_insns; |
} |
else |
{ |
/* Put entry_insns after the NOTE that starts the function. If |
this is inside a start_sequence, make the outer-level insn |
chain current, so the code is placed at the start of the |
function. */ |
push_topmost_sequence (); |
/* Do not use entry_of_function() here. This is called from within |
expand_function_start, when the CFG still holds GIMPLE. */ |
emit_insn_after (entry_insns, get_insns ()); |
pop_topmost_sequence (); |
} |
|
return tmp; |
} |
|
|
/* Try to expand a block move operation to a sequence of RTL move |
instructions. If not optimizing, or if the block size is not a |
constant, or if the block is too large, the expansion fails and GCC |
falls back to calling memcpy(). |
|
operands[0] is the destination |
operands[1] is the source |
operands[2] is the length |
operands[3] is the alignment */ |
|
int |
xtensa_expand_block_move (rtx *operands) |
{ |
static const enum machine_mode mode_from_align[] = |
{ |
VOIDmode, QImode, HImode, VOIDmode, SImode, |
}; |
|
rtx dst_mem = operands[0]; |
rtx src_mem = operands[1]; |
HOST_WIDE_INT bytes, align; |
int num_pieces, move_ratio; |
rtx temp[2]; |
enum machine_mode mode[2]; |
int amount[2]; |
bool active[2]; |
int phase = 0; |
int next; |
int offset_ld = 0; |
int offset_st = 0; |
rtx x; |
|
/* If this is not a fixed size move, just call memcpy. */ |
if (!optimize || (GET_CODE (operands[2]) != CONST_INT)) |
return 0; |
|
bytes = INTVAL (operands[2]); |
align = INTVAL (operands[3]); |
|
/* Anything to move? */ |
if (bytes <= 0) |
return 0; |
|
if (align > MOVE_MAX) |
align = MOVE_MAX; |
|
/* Decide whether to expand inline based on the optimization level. */ |
move_ratio = 4; |
if (optimize > 2) |
move_ratio = LARGEST_MOVE_RATIO; |
num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ |
if (num_pieces > move_ratio) |
return 0; |
|
x = XEXP (dst_mem, 0); |
if (!REG_P (x)) |
{ |
x = force_reg (Pmode, x); |
dst_mem = replace_equiv_address (dst_mem, x); |
} |
|
x = XEXP (src_mem, 0); |
if (!REG_P (x)) |
{ |
x = force_reg (Pmode, x); |
src_mem = replace_equiv_address (src_mem, x); |
} |
|
active[0] = active[1] = false; |
|
do |
{ |
next = phase; |
phase ^= 1; |
|
if (bytes > 0) |
{ |
int next_amount; |
|
next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1)); |
next_amount = MIN (next_amount, align); |
|
amount[next] = next_amount; |
mode[next] = mode_from_align[next_amount]; |
temp[next] = gen_reg_rtx (mode[next]); |
|
x = adjust_address (src_mem, mode[next], offset_ld); |
emit_insn (gen_rtx_SET (VOIDmode, temp[next], x)); |
|
offset_ld += next_amount; |
bytes -= next_amount; |
active[next] = true; |
} |
|
if (active[phase]) |
{ |
active[phase] = false; |
|
x = adjust_address (dst_mem, mode[phase], offset_st); |
emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase])); |
|
offset_st += amount[phase]; |
} |
} |
while (active[next]); |
|
return 1; |
} |
|
|
void |
xtensa_expand_nonlocal_goto (rtx *operands) |
{ |
rtx goto_handler = operands[1]; |
rtx containing_fp = operands[3]; |
|
/* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code |
is too big to generate in-line. */ |
|
if (GET_CODE (containing_fp) != REG) |
containing_fp = force_reg (Pmode, containing_fp); |
|
goto_handler = replace_rtx (copy_rtx (goto_handler), |
virtual_stack_vars_rtx, |
containing_fp); |
|
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"), |
0, VOIDmode, 2, |
containing_fp, Pmode, |
goto_handler, Pmode); |
} |
|
|
static struct machine_function * |
xtensa_init_machine_status (void) |
{ |
return ggc_alloc_cleared (sizeof (struct machine_function)); |
} |
|
|
void |
xtensa_setup_frame_addresses (void) |
{ |
/* Set flag to cause FRAME_POINTER_REQUIRED to be set. */ |
cfun->machine->accesses_prev_frame = 1; |
|
emit_library_call |
(gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"), |
0, VOIDmode, 0); |
} |
|
|
/* Emit the assembly for the end of a zero-cost loop. Normally we just emit |
a comment showing where the end of the loop is. However, if there is a |
label or a branch at the end of the loop then we need to place a nop |
there. If the loop ends with a label we need the nop so that branches |
targeting that label will target the nop (and thus remain in the loop), |
instead of targeting the instruction after the loop (and thus exiting |
the loop). If the loop ends with a branch, we need the nop in case the |
branch is targeting a location inside the loop. When the branch |
executes it will cause the loop count to be decremented even if it is |
taken (because it is the last instruction in the loop), so we need to |
nop after the branch to prevent the loop count from being decremented |
when the branch is taken. */ |
|
void |
xtensa_emit_loop_end (rtx insn, rtx *operands) |
{ |
char done = 0; |
|
for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn)) |
{ |
switch (GET_CODE (insn)) |
{ |
case NOTE: |
case BARRIER: |
break; |
|
case CODE_LABEL: |
output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands); |
done = 1; |
break; |
|
default: |
{ |
rtx body = PATTERN (insn); |
|
if (GET_CODE (body) == JUMP_INSN) |
{ |
output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands); |
done = 1; |
} |
else if ((GET_CODE (body) != USE) |
&& (GET_CODE (body) != CLOBBER)) |
done = 1; |
} |
break; |
} |
} |
|
output_asm_insn ("# loop end for %0", operands); |
} |
|
|
char * |
xtensa_emit_call (int callop, rtx *operands) |
{ |
static char result[64]; |
rtx tgt = operands[callop]; |
|
if (GET_CODE (tgt) == CONST_INT) |
sprintf (result, "call8\t0x%lx", INTVAL (tgt)); |
else if (register_operand (tgt, VOIDmode)) |
sprintf (result, "callx8\t%%%d", callop); |
else |
sprintf (result, "call8\t%%%d", callop); |
|
return result; |
} |
|
|
/* Return the debugger register number to use for 'regno'. */ |
|
int |
xtensa_dbx_register_number (int regno) |
{ |
int first = -1; |
|
if (GP_REG_P (regno)) |
{ |
regno -= GP_REG_FIRST; |
first = 0; |
} |
else if (BR_REG_P (regno)) |
{ |
regno -= BR_REG_FIRST; |
first = 16; |
} |
else if (FP_REG_P (regno)) |
{ |
regno -= FP_REG_FIRST; |
first = 48; |
} |
else if (ACC_REG_P (regno)) |
{ |
first = 0x200; /* Start of Xtensa special registers. */ |
regno = 16; /* ACCLO is special register 16. */ |
} |
|
/* When optimizing, we sometimes get asked about pseudo-registers |
that don't represent hard registers. Return 0 for these. */ |
if (first == -1) |
return 0; |
|
return first + regno; |
} |
|
|
/* Argument support functions. */ |
|
/* Initialize CUMULATIVE_ARGS for a function. */ |
|
void |
init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming) |
{ |
cum->arg_words = 0; |
cum->incoming = incoming; |
} |
|
|
/* Advance the argument to the next argument position. */ |
|
void |
function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type) |
{ |
int words, max; |
int *arg_words; |
|
arg_words = &cum->arg_words; |
max = MAX_ARGS_IN_REGISTERS; |
|
words = (((mode != BLKmode) |
? (int) GET_MODE_SIZE (mode) |
: int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
|
if (*arg_words < max |
&& (targetm.calls.must_pass_in_stack (mode, type) |
|| *arg_words + words > max)) |
*arg_words = max; |
|
*arg_words += words; |
} |
|
|
/* Return an RTL expression containing the register for the given mode, |
or 0 if the argument is to be passed on the stack. INCOMING_P is nonzero |
if this is an incoming argument to the current function. */ |
|
rtx |
function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, |
int incoming_p) |
{ |
int regbase, words, max; |
int *arg_words; |
int regno; |
|
arg_words = &cum->arg_words; |
regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST); |
max = MAX_ARGS_IN_REGISTERS; |
|
words = (((mode != BLKmode) |
? (int) GET_MODE_SIZE (mode) |
: int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
|
if (type && (TYPE_ALIGN (type) > BITS_PER_WORD)) |
{ |
int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD; |
*arg_words = (*arg_words + align - 1) & -align; |
} |
|
if (*arg_words + words > max) |
return (rtx)0; |
|
regno = regbase + *arg_words; |
|
if (cum->incoming && regno <= A7_REG && regno + words > A7_REG) |
cfun->machine->need_a7_copy = true; |
|
return gen_rtx_REG (mode, regno); |
} |
|
|
int |
function_arg_boundary (enum machine_mode mode, tree type) |
{ |
unsigned int alignment; |
|
alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); |
if (alignment < PARM_BOUNDARY) |
alignment = PARM_BOUNDARY; |
if (alignment > STACK_BOUNDARY) |
alignment = STACK_BOUNDARY; |
return alignment; |
} |
|
|
static bool |
xtensa_return_in_msb (tree valtype) |
{ |
return (TARGET_BIG_ENDIAN |
&& AGGREGATE_TYPE_P (valtype) |
&& int_size_in_bytes (valtype) >= UNITS_PER_WORD); |
} |
|
|
void |
override_options (void) |
{ |
int regno; |
enum machine_mode mode; |
|
if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT) |
error ("boolean registers required for the floating-point option"); |
|
xtensa_char_to_class['q'] = SP_REG; |
xtensa_char_to_class['a'] = GR_REGS; |
xtensa_char_to_class['b'] = ((TARGET_BOOLEANS) ? BR_REGS : NO_REGS); |
xtensa_char_to_class['f'] = ((TARGET_HARD_FLOAT) ? FP_REGS : NO_REGS); |
xtensa_char_to_class['A'] = ((TARGET_MAC16) ? ACC_REG : NO_REGS); |
xtensa_char_to_class['B'] = ((TARGET_SEXT) ? GR_REGS : NO_REGS); |
xtensa_char_to_class['C'] = ((TARGET_MUL16) ? GR_REGS: NO_REGS); |
xtensa_char_to_class['D'] = ((TARGET_DENSITY) ? GR_REGS: NO_REGS); |
xtensa_char_to_class['d'] = ((TARGET_DENSITY) ? AR_REGS: NO_REGS); |
xtensa_char_to_class['W'] = ((TARGET_CONST16) ? GR_REGS: NO_REGS); |
|
/* Set up array giving whether a given register can hold a given mode. */ |
for (mode = VOIDmode; |
mode != MAX_MACHINE_MODE; |
mode = (enum machine_mode) ((int) mode + 1)) |
{ |
int size = GET_MODE_SIZE (mode); |
enum mode_class class = GET_MODE_CLASS (mode); |
|
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
{ |
int temp; |
|
if (ACC_REG_P (regno)) |
temp = (TARGET_MAC16 |
&& (class == MODE_INT) && (size <= UNITS_PER_WORD)); |
else if (GP_REG_P (regno)) |
temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD)); |
else if (FP_REG_P (regno)) |
temp = (TARGET_HARD_FLOAT && (mode == SFmode)); |
else if (BR_REG_P (regno)) |
temp = (TARGET_BOOLEANS && (mode == CCmode)); |
else |
temp = FALSE; |
|
xtensa_hard_regno_mode_ok[(int) mode][regno] = temp; |
} |
} |
|
init_machine_status = xtensa_init_machine_status; |
|
/* Check PIC settings. PIC is only supported when using L32R |
instructions, and some targets need to always use PIC. */ |
if (flag_pic && TARGET_CONST16) |
error ("-f%s is not supported with CONST16 instructions", |
(flag_pic > 1 ? "PIC" : "pic")); |
else if (XTENSA_ALWAYS_PIC) |
{ |
if (TARGET_CONST16) |
error ("PIC is required but not supported with CONST16 instructions"); |
flag_pic = 1; |
} |
/* There's no need for -fPIC (as opposed to -fpic) on Xtensa. */ |
if (flag_pic > 1) |
flag_pic = 1; |
|
/* Hot/cold partitioning does not work on this architecture, because of |
constant pools (the load instruction cannot necessarily reach that far). |
Therefore disable it on this architecture. */ |
if (flag_reorder_blocks_and_partition) |
{ |
flag_reorder_blocks_and_partition = 0; |
flag_reorder_blocks = 1; |
} |
} |
|
|
/* A C compound statement to output to stdio stream STREAM the |
assembler syntax for an instruction operand X. X is an RTL |
expression. |
|
CODE is a value that can be used to specify one of several ways |
of printing the operand. It is used when identical operands |
must be printed differently depending on the context. CODE |
comes from the '%' specification that was used to request |
printing of the operand. If the specification was just '%DIGIT' |
then CODE is 0; if the specification was '%LTR DIGIT' then CODE |
is the ASCII code for LTR. |
|
If X is a register, this macro should print the register's name. |
The names can be found in an array 'reg_names' whose type is |
'char *[]'. 'reg_names' is initialized from 'REGISTER_NAMES'. |
|
When the machine description has a specification '%PUNCT' (a '%' |
followed by a punctuation character), this macro is called with |
a null pointer for X and the punctuation character for CODE. |
|
'a', 'c', 'l', and 'n' are reserved. |
|
The Xtensa specific codes are: |
|
'd' CONST_INT, print as signed decimal |
'x' CONST_INT, print as signed hexadecimal |
'K' CONST_INT, print number of bits in mask for EXTUI |
'R' CONST_INT, print (X & 0x1f) |
'L' CONST_INT, print ((32 - X) & 0x1f) |
'D' REG, print second register of double-word register operand |
'N' MEM, print address of next word following a memory operand |
'v' MEM, if memory reference is volatile, output a MEMW before it |
't' any constant, add "@h" suffix for top 16 bits |
'b' any constant, add "@l" suffix for bottom 16 bits |
*/ |
|
static void |
printx (FILE *file, signed int val) |
{ |
/* Print a hexadecimal value in a nice way. */ |
if ((val > -0xa) && (val < 0xa)) |
fprintf (file, "%d", val); |
else if (val < 0) |
fprintf (file, "-0x%x", -val); |
else |
fprintf (file, "0x%x", val); |
} |
|
|
void |
print_operand (FILE *file, rtx x, int letter) |
{ |
if (!x) |
error ("PRINT_OPERAND null pointer"); |
|
switch (letter) |
{ |
case 'D': |
if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) |
fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]); |
else |
output_operand_lossage ("invalid %%D value"); |
break; |
|
case 'v': |
if (GET_CODE (x) == MEM) |
{ |
/* For a volatile memory reference, emit a MEMW before the |
load or store. */ |
if (MEM_VOLATILE_P (x)) |
fprintf (file, "memw\n\t"); |
} |
else |
output_operand_lossage ("invalid %%v value"); |
break; |
|
case 'N': |
if (GET_CODE (x) == MEM |
&& (GET_MODE (x) == DFmode || GET_MODE (x) == DImode)) |
{ |
x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4); |
output_address (XEXP (x, 0)); |
} |
else |
output_operand_lossage ("invalid %%N value"); |
break; |
|
case 'K': |
if (GET_CODE (x) == CONST_INT) |
{ |
int num_bits = 0; |
unsigned val = INTVAL (x); |
while (val & 1) |
{ |
num_bits += 1; |
val = val >> 1; |
} |
if ((val != 0) || (num_bits == 0) || (num_bits > 16)) |
fatal_insn ("invalid mask", x); |
|
fprintf (file, "%d", num_bits); |
} |
else |
output_operand_lossage ("invalid %%K value"); |
break; |
|
case 'L': |
if (GET_CODE (x) == CONST_INT) |
fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f); |
else |
output_operand_lossage ("invalid %%L value"); |
break; |
|
case 'R': |
if (GET_CODE (x) == CONST_INT) |
fprintf (file, "%ld", INTVAL (x) & 0x1f); |
else |
output_operand_lossage ("invalid %%R value"); |
break; |
|
case 'x': |
if (GET_CODE (x) == CONST_INT) |
printx (file, INTVAL (x)); |
else |
output_operand_lossage ("invalid %%x value"); |
break; |
|
case 'd': |
if (GET_CODE (x) == CONST_INT) |
fprintf (file, "%ld", INTVAL (x)); |
else |
output_operand_lossage ("invalid %%d value"); |
break; |
|
case 't': |
case 'b': |
if (GET_CODE (x) == CONST_INT) |
{ |
printx (file, INTVAL (x)); |
fputs (letter == 't' ? "@h" : "@l", file); |
} |
else if (GET_CODE (x) == CONST_DOUBLE) |
{ |
REAL_VALUE_TYPE r; |
REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
if (GET_MODE (x) == SFmode) |
{ |
long l; |
REAL_VALUE_TO_TARGET_SINGLE (r, l); |
fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l'); |
} |
else |
output_operand_lossage ("invalid %%t/%%b value"); |
} |
else if (GET_CODE (x) == CONST) |
{ |
/* X must be a symbolic constant on ELF. Write an expression |
suitable for 'const16' that sets the high or low 16 bits. */ |
if (GET_CODE (XEXP (x, 0)) != PLUS |
|| (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF |
&& GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) |
|| GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) |
output_operand_lossage ("invalid %%t/%%b value"); |
print_operand (file, XEXP (XEXP (x, 0), 0), 0); |
fputs (letter == 't' ? "@h" : "@l", file); |
/* There must be a non-alphanumeric character between 'h' or 'l' |
and the number. The '-' is added by print_operand() already. */ |
if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0) |
fputs ("+", file); |
print_operand (file, XEXP (XEXP (x, 0), 1), 0); |
} |
else |
{ |
output_addr_const (file, x); |
fputs (letter == 't' ? "@h" : "@l", file); |
} |
break; |
|
default: |
if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) |
fprintf (file, "%s", reg_names[xt_true_regnum (x)]); |
else if (GET_CODE (x) == MEM) |
output_address (XEXP (x, 0)); |
else if (GET_CODE (x) == CONST_INT) |
fprintf (file, "%ld", INTVAL (x)); |
else |
output_addr_const (file, x); |
} |
} |
|
|
/* A C compound statement to output to stdio stream STREAM the |
assembler syntax for an instruction operand that is a memory |
reference whose address is ADDR. ADDR is an RTL expression. */ |
|
void |
print_operand_address (FILE *file, rtx addr) |
{ |
if (!addr) |
error ("PRINT_OPERAND_ADDRESS, null pointer"); |
|
switch (GET_CODE (addr)) |
{ |
default: |
fatal_insn ("invalid address", addr); |
break; |
|
case REG: |
fprintf (file, "%s, 0", reg_names [REGNO (addr)]); |
break; |
|
case PLUS: |
{ |
rtx reg = (rtx)0; |
rtx offset = (rtx)0; |
rtx arg0 = XEXP (addr, 0); |
rtx arg1 = XEXP (addr, 1); |
|
if (GET_CODE (arg0) == REG) |
{ |
reg = arg0; |
offset = arg1; |
} |
else if (GET_CODE (arg1) == REG) |
{ |
reg = arg1; |
offset = arg0; |
} |
else |
fatal_insn ("no register in address", addr); |
|
if (CONSTANT_P (offset)) |
{ |
fprintf (file, "%s, ", reg_names [REGNO (reg)]); |
output_addr_const (file, offset); |
} |
else |
fatal_insn ("address offset not a constant", addr); |
} |
break; |
|
case LABEL_REF: |
case SYMBOL_REF: |
case CONST_INT: |
case CONST: |
output_addr_const (file, addr); |
break; |
} |
} |
|
|
void |
xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno) |
{ |
long value_long[2]; |
REAL_VALUE_TYPE r; |
int size; |
rtx first, second; |
|
fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno); |
|
switch (GET_MODE_CLASS (mode)) |
{ |
case MODE_FLOAT: |
gcc_assert (GET_CODE (x) == CONST_DOUBLE); |
|
REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
switch (mode) |
{ |
case SFmode: |
REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]); |
if (HOST_BITS_PER_LONG > 32) |
value_long[0] &= 0xffffffff; |
fprintf (file, "0x%08lx\n", value_long[0]); |
break; |
|
case DFmode: |
REAL_VALUE_TO_TARGET_DOUBLE (r, value_long); |
if (HOST_BITS_PER_LONG > 32) |
{ |
value_long[0] &= 0xffffffff; |
value_long[1] &= 0xffffffff; |
} |
fprintf (file, "0x%08lx, 0x%08lx\n", |
value_long[0], value_long[1]); |
break; |
|
default: |
gcc_unreachable (); |
} |
|
break; |
|
case MODE_INT: |
case MODE_PARTIAL_INT: |
size = GET_MODE_SIZE (mode); |
switch (size) |
{ |
case 4: |
output_addr_const (file, x); |
fputs ("\n", file); |
break; |
|
case 8: |
split_double (x, &first, &second); |
output_addr_const (file, first); |
fputs (", ", file); |
output_addr_const (file, second); |
fputs ("\n", file); |
break; |
|
default: |
gcc_unreachable (); |
} |
break; |
|
default: |
gcc_unreachable (); |
} |
} |
|
|
/* Return the bytes needed to compute the frame pointer from the current |
stack pointer. */ |
|
#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT) |
#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1)) |
|
long |
compute_frame_size (int size) |
{ |
/* Add space for the incoming static chain value. */ |
if (cfun->static_chain_decl != NULL) |
size += (1 * UNITS_PER_WORD); |
|
xtensa_current_frame_size = |
XTENSA_STACK_ALIGN (size |
+ current_function_outgoing_args_size |
+ (WINDOW_SIZE * UNITS_PER_WORD)); |
return xtensa_current_frame_size; |
} |
|
|
int |
xtensa_frame_pointer_required (void) |
{ |
/* The code to expand builtin_frame_addr and builtin_return_addr |
currently uses the hard_frame_pointer instead of frame_pointer. |
This seems wrong but maybe it's necessary for other architectures. |
This function is derived from the i386 code. */ |
|
if (cfun->machine->accesses_prev_frame) |
return 1; |
|
return 0; |
} |
|
|
void |
xtensa_expand_prologue (void) |
{ |
HOST_WIDE_INT total_size; |
rtx size_rtx; |
|
total_size = compute_frame_size (get_frame_size ()); |
size_rtx = GEN_INT (total_size); |
|
if (total_size < (1 << (12+3))) |
emit_insn (gen_entry (size_rtx, size_rtx)); |
else |
{ |
/* Use a8 as a temporary since a0-a7 may be live. */ |
rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG); |
emit_insn (gen_entry (size_rtx, GEN_INT (MIN_FRAME_SIZE))); |
emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE)); |
emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg)); |
emit_move_insn (stack_pointer_rtx, tmp_reg); |
} |
|
if (frame_pointer_needed) |
{ |
if (cfun->machine->set_frame_ptr_insn) |
{ |
rtx first, insn; |
|
push_topmost_sequence (); |
first = get_insns (); |
pop_topmost_sequence (); |
|
/* For all instructions prior to set_frame_ptr_insn, replace |
hard_frame_pointer references with stack_pointer. */ |
for (insn = first; |
insn != cfun->machine->set_frame_ptr_insn; |
insn = NEXT_INSN (insn)) |
{ |
if (INSN_P (insn)) |
PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)), |
hard_frame_pointer_rtx, |
stack_pointer_rtx); |
} |
} |
else |
emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
} |
} |
|
|
/* Clear variables at function end. */ |
|
void |
xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED, |
HOST_WIDE_INT size ATTRIBUTE_UNUSED) |
{ |
xtensa_current_frame_size = 0; |
} |
|
|
rtx |
xtensa_return_addr (int count, rtx frame) |
{ |
rtx result, retaddr; |
|
if (count == -1) |
retaddr = gen_rtx_REG (Pmode, A0_REG); |
else |
{ |
rtx addr = plus_constant (frame, -4 * UNITS_PER_WORD); |
addr = memory_address (Pmode, addr); |
retaddr = gen_reg_rtx (Pmode); |
emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr)); |
} |
|
/* The 2 most-significant bits of the return address on Xtensa hold |
the register window size. To get the real return address, these |
bits must be replaced with the high bits from the current PC. */ |
|
result = gen_reg_rtx (Pmode); |
emit_insn (gen_fix_return_addr (result, retaddr)); |
return result; |
} |
|
|
/* Create the va_list data type. |
|
This structure is set up by __builtin_saveregs. The __va_reg field |
points to a stack-allocated region holding the contents of the |
incoming argument registers. The __va_ndx field is an index |
initialized to the position of the first unnamed (variable) |
argument. This same index is also used to address the arguments |
passed in memory. Thus, the __va_stk field is initialized to point |
to the position of the first argument in memory offset to account |
for the arguments passed in registers and to account for the size |
of the argument registers not being 16-byte aligned. E.G., there |
are 6 argument registers of 4 bytes each, but we want the __va_ndx |
for the first stack argument to have the maximal alignment of 16 |
bytes, so we offset the __va_stk address by 32 bytes so that |
__va_stk[32] references the first argument on the stack. */ |
|
static tree |
xtensa_build_builtin_va_list (void) |
{ |
tree f_stk, f_reg, f_ndx, record, type_decl; |
|
record = (*lang_hooks.types.make_type) (RECORD_TYPE); |
type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); |
|
f_stk = build_decl (FIELD_DECL, get_identifier ("__va_stk"), |
ptr_type_node); |
f_reg = build_decl (FIELD_DECL, get_identifier ("__va_reg"), |
ptr_type_node); |
f_ndx = build_decl (FIELD_DECL, get_identifier ("__va_ndx"), |
integer_type_node); |
|
DECL_FIELD_CONTEXT (f_stk) = record; |
DECL_FIELD_CONTEXT (f_reg) = record; |
DECL_FIELD_CONTEXT (f_ndx) = record; |
|
TREE_CHAIN (record) = type_decl; |
TYPE_NAME (record) = type_decl; |
TYPE_FIELDS (record) = f_stk; |
TREE_CHAIN (f_stk) = f_reg; |
TREE_CHAIN (f_reg) = f_ndx; |
|
layout_type (record); |
return record; |
} |
|
|
/* Save the incoming argument registers on the stack. Returns the |
address of the saved registers. */ |
|
static rtx |
xtensa_builtin_saveregs (void) |
{ |
rtx gp_regs, dest; |
int arg_words = current_function_args_info.arg_words; |
int gp_left = MAX_ARGS_IN_REGISTERS - arg_words; |
|
if (gp_left <= 0) |
return const0_rtx; |
|
/* Allocate the general-purpose register space. */ |
gp_regs = assign_stack_local |
(BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1); |
set_mem_alias_set (gp_regs, get_varargs_alias_set ()); |
|
/* Now store the incoming registers. */ |
dest = change_address (gp_regs, SImode, |
plus_constant (XEXP (gp_regs, 0), |
arg_words * UNITS_PER_WORD)); |
cfun->machine->need_a7_copy = true; |
cfun->machine->vararg_a7 = true; |
move_block_from_reg (GP_ARG_FIRST + arg_words, dest, gp_left); |
gcc_assert (cfun->machine->vararg_a7_copy != 0); |
emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ()); |
|
return XEXP (gp_regs, 0); |
} |
|
|
/* Implement `va_start' for varargs and stdarg. We look at the |
current function to fill in an initial va_list. */ |
|
void |
xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) |
{ |
tree f_stk, stk; |
tree f_reg, reg; |
tree f_ndx, ndx; |
tree t, u; |
int arg_words; |
|
arg_words = current_function_args_info.arg_words; |
|
f_stk = TYPE_FIELDS (va_list_type_node); |
f_reg = TREE_CHAIN (f_stk); |
f_ndx = TREE_CHAIN (f_reg); |
|
stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE); |
reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), valist, f_reg, NULL_TREE); |
ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), valist, f_ndx, NULL_TREE); |
|
/* Call __builtin_saveregs; save the result in __va_reg */ |
u = make_tree (ptr_type_node, expand_builtin_saveregs ()); |
t = build2 (MODIFY_EXPR, ptr_type_node, reg, u); |
TREE_SIDE_EFFECTS (t) = 1; |
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
|
/* Set the __va_stk member to ($arg_ptr - 32). */ |
u = make_tree (ptr_type_node, virtual_incoming_args_rtx); |
u = fold_build2 (PLUS_EXPR, ptr_type_node, u, |
build_int_cst (NULL_TREE, -32)); |
t = build2 (MODIFY_EXPR, ptr_type_node, stk, u); |
TREE_SIDE_EFFECTS (t) = 1; |
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
|
/* Set the __va_ndx member. If the first variable argument is on |
the stack, adjust __va_ndx by 2 words to account for the extra |
alignment offset for __va_stk. */ |
if (arg_words >= MAX_ARGS_IN_REGISTERS) |
arg_words += 2; |
u = build_int_cst (NULL_TREE, arg_words * UNITS_PER_WORD); |
t = build2 (MODIFY_EXPR, integer_type_node, ndx, u); |
TREE_SIDE_EFFECTS (t) = 1; |
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
} |
|
|
/* Implement `va_arg'. */ |
|
static tree |
xtensa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, |
tree *post_p ATTRIBUTE_UNUSED) |
{ |
tree f_stk, stk; |
tree f_reg, reg; |
tree f_ndx, ndx; |
tree type_size, array, orig_ndx, addr, size, va_size, t; |
tree lab_false, lab_over, lab_false2; |
bool indirect; |
|
indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); |
if (indirect) |
type = build_pointer_type (type); |
|
/* Handle complex values as separate real and imaginary parts. */ |
if (TREE_CODE (type) == COMPLEX_TYPE) |
{ |
tree real_part, imag_part; |
|
real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type), |
pre_p, NULL); |
real_part = get_initialized_tmp_var (real_part, pre_p, NULL); |
|
imag_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type), |
pre_p, NULL); |
imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL); |
|
return build2 (COMPLEX_EXPR, type, real_part, imag_part); |
} |
|
f_stk = TYPE_FIELDS (va_list_type_node); |
f_reg = TREE_CHAIN (f_stk); |
f_ndx = TREE_CHAIN (f_reg); |
|
stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE); |
reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), valist, f_reg, NULL_TREE); |
ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), valist, f_ndx, NULL_TREE); |
|
type_size = size_in_bytes (type); |
va_size = round_up (type_size, UNITS_PER_WORD); |
gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue); |
|
|
/* First align __va_ndx if necessary for this arg: |
|
orig_ndx = (AP).__va_ndx; |
if (__alignof__ (TYPE) > 4 ) |
orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1) |
& -__alignof__ (TYPE)); */ |
|
orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL); |
|
if (TYPE_ALIGN (type) > BITS_PER_WORD) |
{ |
int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT; |
|
t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, |
build_int_cst (NULL_TREE, align - 1)); |
t = build2 (BIT_AND_EXPR, integer_type_node, t, |
build_int_cst (NULL_TREE, -align)); |
t = build2 (MODIFY_EXPR, integer_type_node, orig_ndx, t); |
gimplify_and_add (t, pre_p); |
} |
|
|
/* Increment __va_ndx to point past the argument: |
|
(AP).__va_ndx = orig_ndx + __va_size (TYPE); */ |
|
t = fold_convert (integer_type_node, va_size); |
t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t); |
t = build2 (MODIFY_EXPR, integer_type_node, ndx, t); |
gimplify_and_add (t, pre_p); |
|
|
/* Check if the argument is in registers: |
|
if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4 |
&& !must_pass_in_stack (type)) |
__array = (AP).__va_reg; */ |
|
array = create_tmp_var (ptr_type_node, NULL); |
|
lab_over = NULL; |
if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) |
{ |
lab_false = create_artificial_label (); |
lab_over = create_artificial_label (); |
|
t = build_int_cst (NULL_TREE, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD); |
t = build2 (GT_EXPR, boolean_type_node, ndx, t); |
t = build3 (COND_EXPR, void_type_node, t, |
build1 (GOTO_EXPR, void_type_node, lab_false), |
NULL_TREE); |
gimplify_and_add (t, pre_p); |
|
t = build2 (MODIFY_EXPR, void_type_node, array, reg); |
gimplify_and_add (t, pre_p); |
|
t = build1 (GOTO_EXPR, void_type_node, lab_over); |
gimplify_and_add (t, pre_p); |
|
t = build1 (LABEL_EXPR, void_type_node, lab_false); |
gimplify_and_add (t, pre_p); |
} |
|
|
/* ...otherwise, the argument is on the stack (never split between |
registers and the stack -- change __va_ndx if necessary): |
|
else |
{ |
if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4) |
(AP).__va_ndx = 32 + __va_size (TYPE); |
__array = (AP).__va_stk; |
} */ |
|
lab_false2 = create_artificial_label (); |
|
t = build_int_cst (NULL_TREE, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD); |
t = build2 (GT_EXPR, boolean_type_node, orig_ndx, t); |
t = build3 (COND_EXPR, void_type_node, t, |
build1 (GOTO_EXPR, void_type_node, lab_false2), |
NULL_TREE); |
gimplify_and_add (t, pre_p); |
|
t = size_binop (PLUS_EXPR, va_size, size_int (32)); |
t = fold_convert (integer_type_node, t); |
t = build2 (MODIFY_EXPR, integer_type_node, ndx, t); |
gimplify_and_add (t, pre_p); |
|
t = build1 (LABEL_EXPR, void_type_node, lab_false2); |
gimplify_and_add (t, pre_p); |
|
t = build2 (MODIFY_EXPR, void_type_node, array, stk); |
gimplify_and_add (t, pre_p); |
|
if (lab_over) |
{ |
t = build1 (LABEL_EXPR, void_type_node, lab_over); |
gimplify_and_add (t, pre_p); |
} |
|
|
/* Given the base array pointer (__array) and index to the subsequent |
argument (__va_ndx), find the address: |
|
__array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4 |
? sizeof (TYPE) |
: __va_size (TYPE)) |
|
The results are endian-dependent because values smaller than one word |
are aligned differently. */ |
|
|
if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST) |
{ |
t = size_int (PARM_BOUNDARY / BITS_PER_UNIT); |
t = fold_build2 (GE_EXPR, boolean_type_node, type_size, t); |
t = fold_build3 (COND_EXPR, sizetype, t, va_size, type_size); |
size = t; |
} |
else |
size = va_size; |
|
t = fold_convert (ptr_type_node, ndx); |
addr = build2 (PLUS_EXPR, ptr_type_node, array, t); |
t = fold_convert (ptr_type_node, size); |
addr = build2 (MINUS_EXPR, ptr_type_node, addr, t); |
|
addr = fold_convert (build_pointer_type (type), addr); |
if (indirect) |
addr = build_va_arg_indirect_ref (addr); |
return build_va_arg_indirect_ref (addr); |
} |
|
|
enum reg_class |
xtensa_preferred_reload_class (rtx x, enum reg_class class, int isoutput) |
{ |
if (!isoutput && CONSTANT_P (x) && GET_CODE (x) == CONST_DOUBLE) |
return NO_REGS; |
|
/* Don't use the stack pointer or hard frame pointer for reloads! |
The hard frame pointer would normally be OK except that it may |
briefly hold an incoming argument in the prologue, and reload |
won't know that it is live because the hard frame pointer is |
treated specially. */ |
|
if (class == AR_REGS || class == GR_REGS) |
return RL_REGS; |
|
return class; |
} |
|
|
enum reg_class |
xtensa_secondary_reload_class (enum reg_class class, |
enum machine_mode mode ATTRIBUTE_UNUSED, |
rtx x, int isoutput) |
{ |
int regno; |
|
if (GET_CODE (x) == SIGN_EXTEND) |
x = XEXP (x, 0); |
regno = xt_true_regnum (x); |
|
if (!isoutput) |
{ |
if (class == FP_REGS && constantpool_mem_p (x)) |
return RL_REGS; |
} |
|
if (ACC_REG_P (regno)) |
return ((class == GR_REGS || class == RL_REGS) ? NO_REGS : RL_REGS); |
if (class == ACC_REG) |
return (GP_REG_P (regno) ? NO_REGS : RL_REGS); |
|
return NO_REGS; |
} |
|
|
void |
order_regs_for_local_alloc (void) |
{ |
if (!leaf_function_p ()) |
{ |
memcpy (reg_alloc_order, reg_nonleaf_alloc_order, |
FIRST_PSEUDO_REGISTER * sizeof (int)); |
} |
else |
{ |
int i, num_arg_regs; |
int nxt = 0; |
|
/* Use the AR registers in increasing order (skipping a0 and a1) |
but save the incoming argument registers for a last resort. */ |
num_arg_regs = current_function_args_info.arg_words; |
if (num_arg_regs > MAX_ARGS_IN_REGISTERS) |
num_arg_regs = MAX_ARGS_IN_REGISTERS; |
for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++) |
reg_alloc_order[nxt++] = i + num_arg_regs; |
for (i = 0; i < num_arg_regs; i++) |
reg_alloc_order[nxt++] = GP_ARG_FIRST + i; |
|
/* List the coprocessor registers in order. */ |
for (i = 0; i < BR_REG_NUM; i++) |
reg_alloc_order[nxt++] = BR_REG_FIRST + i; |
|
/* List the FP registers in order for now. */ |
for (i = 0; i < 16; i++) |
reg_alloc_order[nxt++] = FP_REG_FIRST + i; |
|
/* GCC requires that we list *all* the registers.... */ |
reg_alloc_order[nxt++] = 0; /* a0 = return address */ |
reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */ |
reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */ |
reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */ |
|
reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */ |
} |
} |
|
|
/* Some Xtensa targets support multiple bss sections. If the section |
name ends with ".bss", add SECTION_BSS to the flags. */ |
|
static unsigned int |
xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) |
{ |
unsigned int flags = default_section_type_flags (decl, name, reloc); |
const char *suffix; |
|
suffix = strrchr (name, '.'); |
if (suffix && strcmp (suffix, ".bss") == 0) |
{ |
if (!decl || (TREE_CODE (decl) == VAR_DECL |
&& DECL_INITIAL (decl) == NULL_TREE)) |
flags |= SECTION_BSS; /* @nobits */ |
else |
warning (0, "only uninitialized variables can be placed in a " |
".bss section"); |
} |
|
return flags; |
} |
|
|
/* The literal pool stays with the function. */ |
|
static section * |
xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED, |
rtx x ATTRIBUTE_UNUSED, |
unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) |
{ |
return function_section (current_function_decl); |
} |
|
|
/* Compute a (partial) cost for rtx X. Return true if the complete |
cost has been computed, and false if subexpressions should be |
scanned. In either case, *TOTAL contains the cost result. */ |
|
static bool |
xtensa_rtx_costs (rtx x, int code, int outer_code, int *total) |
{ |
switch (code) |
{ |
case CONST_INT: |
switch (outer_code) |
{ |
case SET: |
if (xtensa_simm12b (INTVAL (x))) |
{ |
*total = 4; |
return true; |
} |
break; |
case PLUS: |
if (xtensa_simm8 (INTVAL (x)) |
|| xtensa_simm8x256 (INTVAL (x))) |
{ |
*total = 0; |
return true; |
} |
break; |
case AND: |
if (xtensa_mask_immediate (INTVAL (x))) |
{ |
*total = 0; |
return true; |
} |
break; |
case COMPARE: |
if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x))) |
{ |
*total = 0; |
return true; |
} |
break; |
case ASHIFT: |
case ASHIFTRT: |
case LSHIFTRT: |
case ROTATE: |
case ROTATERT: |
/* No way to tell if X is the 2nd operand so be conservative. */ |
default: break; |
} |
if (xtensa_simm12b (INTVAL (x))) |
*total = 5; |
else if (TARGET_CONST16) |
*total = COSTS_N_INSNS (2); |
else |
*total = 6; |
return true; |
|
case CONST: |
case LABEL_REF: |
case SYMBOL_REF: |
if (TARGET_CONST16) |
*total = COSTS_N_INSNS (2); |
else |
*total = 5; |
return true; |
|
case CONST_DOUBLE: |
if (TARGET_CONST16) |
*total = COSTS_N_INSNS (4); |
else |
*total = 7; |
return true; |
|
case MEM: |
{ |
int num_words = |
(GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ? 2 : 1; |
|
if (memory_address_p (GET_MODE (x), XEXP ((x), 0))) |
*total = COSTS_N_INSNS (num_words); |
else |
*total = COSTS_N_INSNS (2*num_words); |
return true; |
} |
|
case FFS: |
*total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50); |
return true; |
|
case NOT: |
*total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2); |
return true; |
|
case AND: |
case IOR: |
case XOR: |
if (GET_MODE (x) == DImode) |
*total = COSTS_N_INSNS (2); |
else |
*total = COSTS_N_INSNS (1); |
return true; |
|
case ASHIFT: |
case ASHIFTRT: |
case LSHIFTRT: |
if (GET_MODE (x) == DImode) |
*total = COSTS_N_INSNS (50); |
else |
*total = COSTS_N_INSNS (1); |
return true; |
|
case ABS: |
{ |
enum machine_mode xmode = GET_MODE (x); |
if (xmode == SFmode) |
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); |
else if (xmode == DFmode) |
*total = COSTS_N_INSNS (50); |
else |
*total = COSTS_N_INSNS (4); |
return true; |
} |
|
case PLUS: |
case MINUS: |
{ |
enum machine_mode xmode = GET_MODE (x); |
if (xmode == SFmode) |
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); |
else if (xmode == DFmode || xmode == DImode) |
*total = COSTS_N_INSNS (50); |
else |
*total = COSTS_N_INSNS (1); |
return true; |
} |
|
case NEG: |
*total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2); |
return true; |
|
case MULT: |
{ |
enum machine_mode xmode = GET_MODE (x); |
if (xmode == SFmode) |
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50); |
else if (xmode == DFmode || xmode == DImode) |
*total = COSTS_N_INSNS (50); |
else if (TARGET_MUL32) |
*total = COSTS_N_INSNS (4); |
else if (TARGET_MAC16) |
*total = COSTS_N_INSNS (16); |
else if (TARGET_MUL16) |
*total = COSTS_N_INSNS (12); |
else |
*total = COSTS_N_INSNS (50); |
return true; |
} |
|
case DIV: |
case MOD: |
{ |
enum machine_mode xmode = GET_MODE (x); |
if (xmode == SFmode) |
{ |
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50); |
return true; |
} |
else if (xmode == DFmode) |
{ |
*total = COSTS_N_INSNS (50); |
return true; |
} |
} |
/* Fall through. */ |
|
case UDIV: |
case UMOD: |
{ |
enum machine_mode xmode = GET_MODE (x); |
if (xmode == DImode) |
*total = COSTS_N_INSNS (50); |
else if (TARGET_DIV32) |
*total = COSTS_N_INSNS (32); |
else |
*total = COSTS_N_INSNS (50); |
return true; |
} |
|
case SQRT: |
if (GET_MODE (x) == SFmode) |
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50); |
else |
*total = COSTS_N_INSNS (50); |
return true; |
|
case SMIN: |
case UMIN: |
case SMAX: |
case UMAX: |
*total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50); |
return true; |
|
case SIGN_EXTRACT: |
case SIGN_EXTEND: |
*total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2); |
return true; |
|
case ZERO_EXTRACT: |
case ZERO_EXTEND: |
*total = COSTS_N_INSNS (1); |
return true; |
|
default: |
return false; |
} |
} |
|
/* Worker function for TARGET_RETURN_IN_MEMORY. */ |
|
static bool |
xtensa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED) |
{ |
return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) |
> 4 * UNITS_PER_WORD); |
} |
|
#include "gt-xtensa.h" |
/crtn.asm
0,0 → 1,50
# End of .init and .fini sections. |
# Copyright (C) 2003 Free Software Foundation, Inc. |
# |
# This file is free software; you can redistribute it and/or modify it |
# under the terms of the GNU General Public License as published by |
# the Free Software Foundation; either version 2, or (at your option) |
# any later version. |
# |
# In addition to the permissions in the GNU General Public License, the |
# Free Software Foundation gives you unlimited permission to link the |
# compiled version of this file into combinations with other programs, |
# and to distribute those combinations without any restriction coming |
# from the use of this file. (The General Public License restrictions |
# do apply in other respects; for example, they cover modification of |
# the file, and distribution when not linked into a combine |
# executable.) |
# |
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or |
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
# for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with GCC; see the file COPYING. If not, write to the Free |
# Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
# 02110-1301, USA. |
|
# This file just makes sure that the .fini and .init sections do in |
# fact return. Users may put any desired instructions in those sections. |
# This file is the last thing linked into any executable. |
|
#include "xtensa-config.h" |
|
.section .init |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
retw |
#else |
l32i a0, sp, 0 |
addi sp, sp, 32 |
ret |
#endif |
|
.section .fini |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
retw |
#else |
l32i a0, sp, 0 |
addi sp, sp, 32 |
ret |
#endif |
/linux.h
0,0 → 1,61
/* Xtensa Linux configuration. |
Derived from the configuration for GCC for Intel i386 running Linux. |
Copyright (C) 2001, 2002, 2003, 2006, 2007 Free Software Foundation, Inc. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING3. If not see |
<http://www.gnu.org/licenses/>. */ |
|
#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() |
|
#undef SUBTARGET_CPP_SPEC |
#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" |
|
#undef TARGET_VERSION |
#define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr); |
|
#undef WCHAR_TYPE |
#define WCHAR_TYPE "long int" |
|
#undef WCHAR_TYPE_SIZE |
#define WCHAR_TYPE_SIZE 32 |
|
#undef ASM_SPEC |
#define ASM_SPEC \ |
"%{v} \ |
%{mtext-section-literals:--text-section-literals} \ |
%{mno-text-section-literals:--no-text-section-literals} \ |
%{mtarget-align:--target-align} \ |
%{mno-target-align:--no-target-align} \ |
%{mlongcalls:--longcalls} \ |
%{mno-longcalls:--no-longcalls}" |
|
#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" |
|
#undef LINK_SPEC |
#define LINK_SPEC \ |
"%{shared:-shared} \ |
%{!shared: \ |
%{!ibcs: \ |
%{!static: \ |
%{rdynamic:-export-dynamic} \ |
%{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER "}} \ |
%{static:-static}}}" |
|
#undef LOCAL_LABEL_PREFIX |
#define LOCAL_LABEL_PREFIX "." |
|
/* Always enable "-fpic" for Xtensa Linux. */ |
#define XTENSA_ALWAYS_PIC 1 |
/xtensa.h
0,0 → 1,1223
/* Definitions of Tensilica's Xtensa target machine for GNU compiler. |
Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING3. If not see |
<http://www.gnu.org/licenses/>. */ |
|
/* Get Xtensa configuration settings */ |
#include "xtensa-config.h" |
|
/* Standard GCC variables that we reference. */ |
extern int current_function_calls_alloca; |
extern int optimize; |
|
/* External variables defined in xtensa.c. */ |
|
/* comparison type */ |
enum cmp_type { |
CMP_SI, /* four byte integers */ |
CMP_DI, /* eight byte integers */ |
CMP_SF, /* single precision floats */ |
CMP_DF, /* double precision floats */ |
CMP_MAX /* max comparison type */ |
}; |
|
extern struct rtx_def * branch_cmp[2]; /* operands for compare */ |
extern enum cmp_type branch_type; /* what type of branch to use */ |
extern unsigned xtensa_current_frame_size; |
|
/* Macros used in the machine description to select various Xtensa |
configuration options. */ |
#define TARGET_BIG_ENDIAN XCHAL_HAVE_BE |
#define TARGET_DENSITY XCHAL_HAVE_DENSITY |
#define TARGET_MAC16 XCHAL_HAVE_MAC16 |
#define TARGET_MUL16 XCHAL_HAVE_MUL16 |
#define TARGET_MUL32 XCHAL_HAVE_MUL32 |
#define TARGET_DIV32 XCHAL_HAVE_DIV32 |
#define TARGET_NSA XCHAL_HAVE_NSA |
#define TARGET_MINMAX XCHAL_HAVE_MINMAX |
#define TARGET_SEXT XCHAL_HAVE_SEXT |
#define TARGET_BOOLEANS XCHAL_HAVE_BOOLEANS |
#define TARGET_HARD_FLOAT XCHAL_HAVE_FP |
#define TARGET_HARD_FLOAT_DIV XCHAL_HAVE_FP_DIV |
#define TARGET_HARD_FLOAT_RECIP XCHAL_HAVE_FP_RECIP |
#define TARGET_HARD_FLOAT_SQRT XCHAL_HAVE_FP_SQRT |
#define TARGET_HARD_FLOAT_RSQRT XCHAL_HAVE_FP_RSQRT |
#define TARGET_ABS XCHAL_HAVE_ABS |
#define TARGET_ADDX XCHAL_HAVE_ADDX |
|
#define TARGET_DEFAULT ( \ |
(XCHAL_HAVE_L32R ? 0 : MASK_CONST16)) |
|
#define OVERRIDE_OPTIONS override_options () |
|
/* Reordering blocks for Xtensa is not a good idea unless the compiler |
understands the range of conditional branches. Currently all branch |
relaxation for Xtensa is handled in the assembler, so GCC cannot do a |
good job of reordering blocks. Do not enable reordering unless it is |
explicitly requested. */ |
#define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \ |
do \ |
{ \ |
flag_reorder_blocks = 0; \ |
} \ |
while (0) |
|
|
/* Target CPU builtins. */ |
#define TARGET_CPU_CPP_BUILTINS() \ |
do { \ |
builtin_assert ("cpu=xtensa"); \ |
builtin_assert ("machine=xtensa"); \ |
builtin_define ("__xtensa__"); \ |
builtin_define ("__XTENSA__"); \ |
builtin_define ("__XTENSA_WINDOWED_ABI__"); \ |
builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \ |
if (!TARGET_HARD_FLOAT) \ |
builtin_define ("__XTENSA_SOFT_FLOAT__"); \ |
} while (0) |
|
#define CPP_SPEC " %(subtarget_cpp_spec) " |
|
#ifndef SUBTARGET_CPP_SPEC |
#define SUBTARGET_CPP_SPEC "" |
#endif |
|
#define EXTRA_SPECS \ |
{ "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, |
|
#ifdef __XTENSA_EB__ |
#define LIBGCC2_WORDS_BIG_ENDIAN 1 |
#else |
#define LIBGCC2_WORDS_BIG_ENDIAN 0 |
#endif |
|
/* Show we can debug even without a frame pointer. */ |
#define CAN_DEBUG_WITHOUT_FP |
|
|
/* Target machine storage layout */ |
|
/* Define this if most significant bit is lowest numbered |
in instructions that operate on numbered bit-fields. */ |
#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) |
|
/* Define this if most significant byte of a word is the lowest numbered. */ |
#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) |
|
/* Define this if most significant word of a multiword number is the lowest. */ |
#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) |
|
#define MAX_BITS_PER_WORD 32 |
|
/* Width of a word, in units (bytes). */ |
#define UNITS_PER_WORD 4 |
#define MIN_UNITS_PER_WORD 4 |
|
/* Width of a floating point register. */ |
#define UNITS_PER_FPREG 4 |
|
/* Size in bits of various types on the target machine. */ |
#define INT_TYPE_SIZE 32 |
#define SHORT_TYPE_SIZE 16 |
#define LONG_TYPE_SIZE 32 |
#define LONG_LONG_TYPE_SIZE 64 |
#define FLOAT_TYPE_SIZE 32 |
#define DOUBLE_TYPE_SIZE 64 |
#define LONG_DOUBLE_TYPE_SIZE 64 |
|
/* Allocation boundary (in *bits*) for storing pointers in memory. */ |
#define POINTER_BOUNDARY 32 |
|
/* Allocation boundary (in *bits*) for storing arguments in argument list. */ |
#define PARM_BOUNDARY 32 |
|
/* Allocation boundary (in *bits*) for the code of a function. */ |
#define FUNCTION_BOUNDARY 32 |
|
/* Alignment of field after 'int : 0' in a structure. */ |
#define EMPTY_FIELD_BOUNDARY 32 |
|
/* Every structure's size must be a multiple of this. */ |
#define STRUCTURE_SIZE_BOUNDARY 8 |
|
/* There is no point aligning anything to a rounder boundary than this. */ |
#define BIGGEST_ALIGNMENT 128 |
|
/* Set this nonzero if move instructions will actually fail to work |
when given unaligned data. */ |
#define STRICT_ALIGNMENT 1 |
|
/* Promote integer modes smaller than a word to SImode. Set UNSIGNEDP |
for QImode, because there is no 8-bit load from memory with sign |
extension. Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit |
loads both with and without sign extension. */ |
#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ |
do { \ |
if (GET_MODE_CLASS (MODE) == MODE_INT \ |
&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ |
{ \ |
if ((MODE) == QImode) \ |
(UNSIGNEDP) = 1; \ |
(MODE) = SImode; \ |
} \ |
} while (0) |
|
/* Imitate the way many other C compilers handle alignment of |
bitfields and the structures that contain them. */ |
#define PCC_BITFIELD_TYPE_MATTERS 1 |
|
/* Disable the use of word-sized or smaller complex modes for structures, |
and for function arguments in particular, where they cause problems with |
register a7. The xtensa_copy_incoming_a7 function assumes that there is |
a single reference to an argument in a7, but with small complex modes the |
real and imaginary components may be extracted separately, leading to two |
uses of the register, only one of which would be replaced. */ |
#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \ |
((MODE) == CQImode || (MODE) == CHImode) |
|
/* Align string constants and constructors to at least a word boundary. |
The typical use of this macro is to increase alignment for string |
constants to be word aligned so that 'strcpy' calls that copy |
constants can be done inline. */ |
#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ |
((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR) \ |
&& (ALIGN) < BITS_PER_WORD \ |
? BITS_PER_WORD \ |
: (ALIGN)) |
|
/* Align arrays, unions and records to at least a word boundary. |
One use of this macro is to increase alignment of medium-size |
data to make it all fit in fewer cache lines. Another is to |
cause character arrays to be word-aligned so that 'strcpy' calls |
that copy constants to character arrays can be done inline. */ |
#undef DATA_ALIGNMENT |
#define DATA_ALIGNMENT(TYPE, ALIGN) \ |
((((ALIGN) < BITS_PER_WORD) \ |
&& (TREE_CODE (TYPE) == ARRAY_TYPE \ |
|| TREE_CODE (TYPE) == UNION_TYPE \ |
|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) |
|
/* Operations between registers always perform the operation |
on the full register even if a narrower mode is specified. */ |
#define WORD_REGISTER_OPERATIONS |
|
/* Xtensa loads are zero-extended by default. */ |
#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND |
|
/* Standard register usage. */ |
|
/* Number of actual hardware registers. |
The hardware registers are assigned numbers for the compiler |
from 0 to just below FIRST_PSEUDO_REGISTER. |
All registers that the compiler knows about must be given numbers, |
even those that are not normally considered general registers. |
|
The fake frame pointer and argument pointer will never appear in |
the generated code, since they will always be eliminated and replaced |
by either the stack pointer or the hard frame pointer. |
|
0 - 15 AR[0] - AR[15] |
16 FRAME_POINTER (fake = initial sp) |
17 ARG_POINTER (fake = initial sp + framesize) |
18 BR[0] for floating-point CC |
19 - 34 FR[0] - FR[15] |
35 MAC16 accumulator */ |
|
#define FIRST_PSEUDO_REGISTER 36 |
|
/* Return the stabs register number to use for REGNO. */ |
#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO) |
|
/* 1 for registers that have pervasive standard uses |
and are not available for the register allocator. */ |
#define FIXED_REGISTERS \ |
{ \ |
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ |
1, 1, 0, \ |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ |
0, \ |
} |
|
/* 1 for registers not available across function calls. |
These must include the FIXED_REGISTERS and also any |
registers that can be used without being saved. |
The latter must include the registers where values are returned |
and the register where structure-value addresses are passed. |
Aside from that, you can include as many other registers as you like. */ |
#define CALL_USED_REGISTERS \ |
{ \ |
1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ |
1, 1, 1, \ |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ |
1, \ |
} |
|
/* For non-leaf procedures on Xtensa processors, the allocation order |
is as specified below by REG_ALLOC_ORDER. For leaf procedures, we |
want to use the lowest numbered registers first to minimize |
register window overflows. However, local-alloc is not smart |
enough to consider conflicts with incoming arguments. If an |
incoming argument in a2 is live throughout the function and |
local-alloc decides to use a2, then the incoming argument must |
either be spilled or copied to another register. To get around |
this, we define ORDER_REGS_FOR_LOCAL_ALLOC to redefine |
reg_alloc_order for leaf functions such that lowest numbered |
registers are used first with the exception that the incoming |
argument registers are not used until after other register choices |
have been exhausted. */ |
|
#define REG_ALLOC_ORDER \ |
{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ |
18, \ |
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ |
0, 1, 16, 17, \ |
35, \ |
} |
|
#define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc () |
|
/* For Xtensa, the only point of this is to prevent GCC from otherwise |
giving preference to call-used registers. To minimize window |
overflows for the AR registers, we want to give preference to the |
lower-numbered AR registers. For other register files, which are |
not windowed, we still prefer call-used registers, if there are any. */ |
extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER]; |
#define LEAF_REGISTERS xtensa_leaf_regs |
|
/* For Xtensa, no remapping is necessary, but this macro must be |
defined if LEAF_REGISTERS is defined. */ |
#define LEAF_REG_REMAP(REGNO) (REGNO) |
|
/* This must be declared if LEAF_REGISTERS is set. */ |
extern int leaf_function; |
|
/* Internal macros to classify a register number. */ |
|
/* 16 address registers + fake registers */ |
#define GP_REG_FIRST 0 |
#define GP_REG_LAST 17 |
#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) |
|
/* Coprocessor registers */ |
#define BR_REG_FIRST 18 |
#define BR_REG_LAST 18 |
#define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1) |
|
/* 16 floating-point registers */ |
#define FP_REG_FIRST 19 |
#define FP_REG_LAST 34 |
#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) |
|
/* MAC16 accumulator */ |
#define ACC_REG_FIRST 35 |
#define ACC_REG_LAST 35 |
#define ACC_REG_NUM (ACC_REG_LAST - ACC_REG_FIRST + 1) |
|
#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM) |
#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM) |
#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM) |
#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM) |
|
/* Return number of consecutive hard regs needed starting at reg REGNO |
to hold something of mode MODE. */ |
#define HARD_REGNO_NREGS(REGNO, MODE) \ |
(FP_REG_P (REGNO) ? \ |
((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \ |
((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) |
|
/* Value is 1 if hard register REGNO can hold a value of machine-mode |
MODE. */ |
extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER]; |
|
#define HARD_REGNO_MODE_OK(REGNO, MODE) \ |
xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)] |
|
/* Value is 1 if it is a good idea to tie two pseudo registers |
when one has mode MODE1 and one has mode MODE2. |
If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, |
for any hard reg, then this must be 0 for correct output. */ |
#define MODES_TIEABLE_P(MODE1, MODE2) \ |
((GET_MODE_CLASS (MODE1) == MODE_FLOAT || \ |
GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT) \ |
== (GET_MODE_CLASS (MODE2) == MODE_FLOAT || \ |
GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT)) |
|
/* Register to use for pushing function arguments. */ |
#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1) |
|
/* Base register for access to local variables of the function. */ |
#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7) |
|
/* The register number of the frame pointer register, which is used to |
access automatic variables in the stack frame. For Xtensa, this |
register never appears in the output. It is always eliminated to |
either the stack pointer or the hard frame pointer. */ |
#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16) |
|
/* Value should be nonzero if functions must have frame pointers. |
Zero means the frame pointer need not be set up (and parms |
may be accessed via the stack pointer) in functions that seem suitable. |
This is computed in 'reload', in reload1.c. */ |
#define FRAME_POINTER_REQUIRED xtensa_frame_pointer_required () |
|
/* Base register for access to arguments of the function. */ |
#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17) |
|
/* If the static chain is passed in memory, these macros provide rtx |
giving 'mem' expressions that denote where they are stored. |
'STATIC_CHAIN' and 'STATIC_CHAIN_INCOMING' give the locations as |
seen by the calling and called functions, respectively. */ |
|
#define STATIC_CHAIN \ |
gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, -5 * UNITS_PER_WORD)) |
|
#define STATIC_CHAIN_INCOMING \ |
gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, -5 * UNITS_PER_WORD)) |
|
/* For now we don't try to use the full set of boolean registers. Without |
software pipelining of FP operations, there's not much to gain and it's |
a real pain to get them reloaded. */ |
#define FPCC_REGNUM (BR_REG_FIRST + 0) |
|
/* It is as good or better to call a constant function address than to |
call an address kept in a register. */ |
#define NO_FUNCTION_CSE 1 |
|
/* Xtensa processors have "register windows". GCC does not currently |
take advantage of the possibility for variable-sized windows; instead, |
we use a fixed window size of 8. */ |
|
#define INCOMING_REGNO(OUT) \ |
((GP_REG_P (OUT) && \ |
((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ? \ |
(OUT) - WINDOW_SIZE : (OUT)) |
|
#define OUTGOING_REGNO(IN) \ |
((GP_REG_P (IN) && \ |
((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ? \ |
(IN) + WINDOW_SIZE : (IN)) |
|
|
/* Define the classes of registers for register constraints in the |
machine description. */ |
enum reg_class |
{ |
NO_REGS, /* no registers in set */ |
BR_REGS, /* coprocessor boolean registers */ |
FP_REGS, /* floating point registers */ |
ACC_REG, /* MAC16 accumulator */ |
SP_REG, /* sp register (aka a1) */ |
RL_REGS, /* preferred reload regs (not sp or fp) */ |
GR_REGS, /* integer registers except sp */ |
AR_REGS, /* all integer registers */ |
ALL_REGS, /* all registers */ |
LIM_REG_CLASSES /* max value + 1 */ |
}; |
|
#define N_REG_CLASSES (int) LIM_REG_CLASSES |
|
#define GENERAL_REGS AR_REGS |
|
/* An initializer containing the names of the register classes as C |
string constants. These names are used in writing some of the |
debugging dumps. */ |
#define REG_CLASS_NAMES \ |
{ \ |
"NO_REGS", \ |
"BR_REGS", \ |
"FP_REGS", \ |
"ACC_REG", \ |
"SP_REG", \ |
"RL_REGS", \ |
"GR_REGS", \ |
"AR_REGS", \ |
"ALL_REGS" \ |
} |
|
/* Contents of the register classes. The Nth integer specifies the |
contents of class N. The way the integer MASK is interpreted is |
that register R is in the class if 'MASK & (1 << R)' is 1. */ |
#define REG_CLASS_CONTENTS \ |
{ \ |
{ 0x00000000, 0x00000000 }, /* no registers */ \ |
{ 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \ |
{ 0xfff80000, 0x00000007 }, /* floating-point registers */ \ |
{ 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \ |
{ 0x00000002, 0x00000000 }, /* stack pointer register */ \ |
{ 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \ |
{ 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \ |
{ 0x0003ffff, 0x00000000 }, /* integer registers */ \ |
{ 0xffffffff, 0x0000000f } /* all registers */ \ |
} |
|
/* A C expression whose value is a register class containing hard |
register REGNO. In general there is more that one such class; |
choose a class which is "minimal", meaning that no smaller class |
also contains the register. */ |
extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER]; |
|
#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ] |
|
/* Use the Xtensa AR register file for base registers. |
No index registers. */ |
#define BASE_REG_CLASS AR_REGS |
#define INDEX_REG_CLASS NO_REGS |
|
/* SMALL_REGISTER_CLASSES is required for Xtensa, because all of the |
16 AR registers may be explicitly used in the RTL, as either |
incoming or outgoing arguments. */ |
#define SMALL_REGISTER_CLASSES 1 |
|
|
/* REGISTER AND CONSTANT CLASSES */ |
|
/* Get reg_class from a letter such as appears in the machine |
description. |
|
Available letters: a-f,h,j-l,q,t-z,A-D,W,Y-Z |
|
DEFINED REGISTER CLASSES: |
|
'a' general-purpose registers except sp |
'q' sp (aka a1) |
'D' general-purpose registers (only if density option enabled) |
'd' general-purpose registers, including sp (only if density enabled) |
'A' MAC16 accumulator (only if MAC16 option enabled) |
'B' general-purpose registers (only if sext instruction enabled) |
'C' general-purpose registers (only if mul16 option enabled) |
'W' general-purpose registers (only if const16 option enabled) |
'b' coprocessor boolean registers |
'f' floating-point registers |
*/ |
|
extern enum reg_class xtensa_char_to_class[256]; |
|
#define REG_CLASS_FROM_LETTER(C) xtensa_char_to_class[ (int) (C) ] |
|
/* The letters I, J, K, L, M, N, O, and P in a register constraint |
string can be used to stand for particular ranges of immediate |
operands. This macro defines what the ranges are. C is the |
letter, and VALUE is a constant value. Return 1 if VALUE is |
in the range specified by C. |
|
For Xtensa: |
|
I = 12-bit signed immediate for MOVI |
J = 8-bit signed immediate for ADDI |
K = 4-bit value in (b4const U {0}) |
L = 4-bit value in b4constu |
M = 7-bit immediate value for MOVI.N |
N = 8-bit unsigned immediate shifted left by 8 bits for ADDMI |
O = 4-bit immediate for ADDI.N |
P = valid immediate mask value for EXTUI */ |
|
#define CONST_OK_FOR_LETTER_P xtensa_const_ok_for_letter_p |
#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) (0) |
|
|
/* Other letters can be defined in a machine-dependent fashion to |
stand for particular classes of registers or other arbitrary |
operand types. |
|
R = memory that can be accessed with a 4-bit unsigned offset |
T = memory in a constant pool (addressable with a pc-relative load) |
U = memory *NOT* in a constant pool |
|
The offset range should not be checked here (except to distinguish |
denser versions of the instructions for which more general versions |
are available). Doing so leads to problems in reloading: an |
argptr-relative address may become invalid when the phony argptr is |
eliminated in favor of the stack pointer (the offset becomes too |
large to fit in the instruction's immediate field); a reload is |
generated to fix this but the RTL is not immediately updated; in |
the meantime, the constraints are checked and none match. The |
solution seems to be to simply skip the offset check here. The |
address will be checked anyway because of the code in |
GO_IF_LEGITIMATE_ADDRESS. */ |
|
#define EXTRA_CONSTRAINT xtensa_extra_constraint |
|
#define PREFERRED_RELOAD_CLASS(X, CLASS) \ |
xtensa_preferred_reload_class (X, CLASS, 0) |
|
#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \ |
xtensa_preferred_reload_class (X, CLASS, 1) |
|
#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ |
xtensa_secondary_reload_class (CLASS, MODE, X, 0) |
|
#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ |
xtensa_secondary_reload_class (CLASS, MODE, X, 1) |
|
/* Return the maximum number of consecutive registers |
needed to represent mode MODE in a register of class CLASS. */ |
#define CLASS_UNITS(mode, size) \ |
((GET_MODE_SIZE (mode) + (size) - 1) / (size)) |
|
#define CLASS_MAX_NREGS(CLASS, MODE) \ |
(CLASS_UNITS (MODE, UNITS_PER_WORD)) |
|
|
/* Stack layout; function entry, exit and calling. */ |
|
#define STACK_GROWS_DOWNWARD |
|
/* Offset within stack frame to start allocating local variables at. */ |
#define STARTING_FRAME_OFFSET \ |
current_function_outgoing_args_size |
|
/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so |
they are eliminated to either the stack pointer or hard frame pointer. */ |
#define ELIMINABLE_REGS \ |
{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ |
{ ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ |
{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ |
{ FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} |
|
#define CAN_ELIMINATE(FROM, TO) 1 |
|
/* Specify the initial difference between the specified pair of registers. */ |
#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ |
do { \ |
compute_frame_size (get_frame_size ()); \ |
switch (FROM) \ |
{ \ |
case FRAME_POINTER_REGNUM: \ |
(OFFSET) = 0; \ |
break; \ |
case ARG_POINTER_REGNUM: \ |
(OFFSET) = xtensa_current_frame_size; \ |
break; \ |
default: \ |
gcc_unreachable (); \ |
} \ |
} while (0) |
|
/* If defined, the maximum amount of space required for outgoing |
arguments will be computed and placed into the variable |
'current_function_outgoing_args_size'. No space will be pushed |
onto the stack for each call; instead, the function prologue |
should increase the stack frame size by this amount. */ |
#define ACCUMULATE_OUTGOING_ARGS 1 |
|
/* Offset from the argument pointer register to the first argument's |
address. On some machines it may depend on the data type of the |
function. If 'ARGS_GROW_DOWNWARD', this is the offset to the |
location above the first argument's address. */ |
#define FIRST_PARM_OFFSET(FNDECL) 0 |
|
/* Align stack frames on 128 bits for Xtensa. This is necessary for |
128-bit datatypes defined in TIE (e.g., for Vectra). */ |
#define STACK_BOUNDARY 128 |
|
/* Functions do not pop arguments off the stack. */ |
#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) 0 |
|
/* Use a fixed register window size of 8. */ |
#define WINDOW_SIZE 8 |
|
/* Symbolic macros for the registers used to return integer, floating |
point, and values of coprocessor and user-defined modes. */ |
#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) |
#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) |
|
/* Symbolic macros for the first/last argument registers. */ |
#define GP_ARG_FIRST (GP_REG_FIRST + 2) |
#define GP_ARG_LAST (GP_REG_FIRST + 7) |
#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE) |
#define GP_OUTGOING_ARG_LAST (GP_REG_FIRST + 7 + WINDOW_SIZE) |
|
#define MAX_ARGS_IN_REGISTERS 6 |
|
/* Don't worry about compatibility with PCC. */ |
#define DEFAULT_PCC_STRUCT_RETURN 0 |
|
/* Define how to find the value returned by a library function |
assuming the value has mode MODE. Because we have defined |
TARGET_PROMOTE_FUNCTION_RETURN that returns true, we have to |
perform the same promotions as PROMOTE_MODE. */ |
#define XTENSA_LIBCALL_VALUE(MODE, OUTGOINGP) \ |
gen_rtx_REG ((GET_MODE_CLASS (MODE) == MODE_INT \ |
&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ |
? SImode : (MODE), \ |
OUTGOINGP ? GP_OUTGOING_RETURN : GP_RETURN) |
|
#define LIBCALL_VALUE(MODE) \ |
XTENSA_LIBCALL_VALUE ((MODE), 0) |
|
#define LIBCALL_OUTGOING_VALUE(MODE) \ |
XTENSA_LIBCALL_VALUE ((MODE), 1) |
|
/* Define how to find the value returned by a function. |
VALTYPE is the data type of the value (as a tree). |
If the precise function being called is known, FUNC is its FUNCTION_DECL; |
otherwise, FUNC is 0. */ |
#define XTENSA_FUNCTION_VALUE(VALTYPE, FUNC, OUTGOINGP) \ |
gen_rtx_REG ((INTEGRAL_TYPE_P (VALTYPE) \ |
&& TYPE_PRECISION (VALTYPE) < BITS_PER_WORD) \ |
? SImode: TYPE_MODE (VALTYPE), \ |
OUTGOINGP ? GP_OUTGOING_RETURN : GP_RETURN) |
|
#define FUNCTION_VALUE(VALTYPE, FUNC) \ |
XTENSA_FUNCTION_VALUE (VALTYPE, FUNC, 0) |
|
#define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC) \ |
XTENSA_FUNCTION_VALUE (VALTYPE, FUNC, 1) |
|
/* A C expression that is nonzero if REGNO is the number of a hard |
register in which the values of called function may come back. A |
register whose use for returning values is limited to serving as |
the second of a pair (for a value of type 'double', say) need not |
be recognized by this macro. If the machine has register windows, |
so that the caller and the called function use different registers |
for the return value, this macro should recognize only the caller's |
register numbers. */ |
#define FUNCTION_VALUE_REGNO_P(N) \ |
((N) == GP_RETURN) |
|
/* A C expression that is nonzero if REGNO is the number of a hard |
register in which function arguments are sometimes passed. This |
does *not* include implicit arguments such as the static chain and |
the structure-value address. On many machines, no registers can be |
used for this purpose since all function arguments are pushed on |
the stack. */ |
#define FUNCTION_ARG_REGNO_P(N) \ |
((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) |
|
/* Record the number of argument words seen so far, along with a flag to |
indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG |
is used for both incoming and outgoing args, so a separate flag is |
needed. */ |
typedef struct xtensa_args |
{ |
int arg_words; |
int incoming; |
} CUMULATIVE_ARGS; |
|
#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ |
init_cumulative_args (&CUM, 0) |
|
#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \ |
init_cumulative_args (&CUM, 1) |
|
/* Update the data in CUM to advance over an argument |
of mode MODE and data type TYPE. |
(TYPE is null for libcalls where that information may not be available.) */ |
#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ |
function_arg_advance (&CUM, MODE, TYPE) |
|
#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ |
function_arg (&CUM, MODE, TYPE, FALSE) |
|
#define FUNCTION_INCOMING_ARG(CUM, MODE, TYPE, NAMED) \ |
function_arg (&CUM, MODE, TYPE, TRUE) |
|
#define FUNCTION_ARG_BOUNDARY function_arg_boundary |
|
/* Profiling Xtensa code is typically done with the built-in profiling |
feature of Tensilica's instruction set simulator, which does not |
require any compiler support. Profiling code on a real (i.e., |
non-simulated) Xtensa processor is currently only supported by |
GNU/Linux with glibc. The glibc version of _mcount doesn't require |
counter variables. The _mcount function needs the current PC and |
the current return address to identify an arc in the call graph. |
Pass the current return address as the first argument; the current |
PC is available as a0 in _mcount's register window. Both of these |
values contain window size information in the two most significant |
bits; we assume that _mcount will mask off those bits. The call to |
_mcount uses a window size of 8 to make sure that it doesn't clobber |
any incoming argument values. */ |
|
#define NO_PROFILE_COUNTERS 1 |
|
#define FUNCTION_PROFILER(FILE, LABELNO) \ |
do { \ |
fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \ |
if (flag_pic) \ |
{ \ |
fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n"); \ |
fprintf (FILE, "\tcallx8\ta8\n"); \ |
} \ |
else \ |
fprintf (FILE, "\tcall8\t_mcount\n"); \ |
} while (0) |
|
/* Stack pointer value doesn't matter at exit. */ |
#define EXIT_IGNORE_STACK 1 |
|
/* A C statement to output, on the stream FILE, assembler code for a |
block of data that contains the constant parts of a trampoline. |
This code should not include a label--the label is taken care of |
automatically. |
|
For Xtensa, the trampoline must perform an entry instruction with a |
minimal stack frame in order to get some free registers. Once the |
actual call target is known, the proper stack frame size is extracted |
from the entry instruction at the target and the current frame is |
adjusted to match. The trampoline then transfers control to the |
instruction following the entry at the target. Note: this assumes |
that the target begins with an entry instruction. */ |
|
/* minimum frame = reg save area (4 words) plus static chain (1 word) |
and the total number of words must be a multiple of 128 bits */ |
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) |
|
#define TRAMPOLINE_TEMPLATE(STREAM) \ |
do { \ |
fprintf (STREAM, "\t.begin no-transform\n"); \ |
fprintf (STREAM, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); \ |
\ |
/* save the return address */ \ |
fprintf (STREAM, "\tmov\ta10, a0\n"); \ |
\ |
/* Use a CALL0 instruction to skip past the constants and in the \ |
process get the PC into A0. This allows PC-relative access to \ |
the constants without relying on L32R, which may not always be \ |
available. */ \ |
\ |
fprintf (STREAM, "\tcall0\t.Lskipconsts\n"); \ |
fprintf (STREAM, "\t.align\t4\n"); \ |
fprintf (STREAM, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); \ |
fprintf (STREAM, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); \ |
fprintf (STREAM, ".Lskipconsts:\n"); \ |
\ |
/* store the static chain */ \ |
fprintf (STREAM, "\taddi\ta0, a0, 3\n"); \ |
fprintf (STREAM, "\tl32i\ta8, a0, 0\n"); \ |
fprintf (STREAM, "\ts32i\ta8, sp, %d\n", MIN_FRAME_SIZE - 20); \ |
\ |
/* set the proper stack pointer value */ \ |
fprintf (STREAM, "\tl32i\ta8, a0, 4\n"); \ |
fprintf (STREAM, "\tl32i\ta9, a8, 0\n"); \ |
fprintf (STREAM, "\textui\ta9, a9, %d, 12\n", \ |
TARGET_BIG_ENDIAN ? 8 : 12); \ |
fprintf (STREAM, "\tslli\ta9, a9, 3\n"); \ |
fprintf (STREAM, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); \ |
fprintf (STREAM, "\tsub\ta9, sp, a9\n"); \ |
fprintf (STREAM, "\tmovsp\tsp, a9\n"); \ |
\ |
/* restore the return address */ \ |
fprintf (STREAM, "\tmov\ta0, a10\n"); \ |
\ |
/* jump to the instruction following the entry */ \ |
fprintf (STREAM, "\taddi\ta8, a8, 3\n"); \ |
fprintf (STREAM, "\tjx\ta8\n"); \ |
fprintf (STREAM, "\t.byte\t0\n"); \ |
fprintf (STREAM, "\t.end no-transform\n"); \ |
} while (0) |
|
/* Size in bytes of the trampoline, as an integer. Make sure this is |
a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */ |
#define TRAMPOLINE_SIZE 60 |
|
/* Alignment required for trampolines, in bits. */ |
#define TRAMPOLINE_ALIGNMENT (32) |
|
/* A C statement to initialize the variable parts of a trampoline. */ |
#define INITIALIZE_TRAMPOLINE(ADDR, FUNC, CHAIN) \ |
do { \ |
rtx addr = ADDR; \ |
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 12)), CHAIN); \ |
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (addr, 16)), FUNC); \ |
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), \ |
0, VOIDmode, 1, addr, Pmode); \ |
} while (0) |
|
/* Implement `va_start' for varargs and stdarg. */ |
#define EXPAND_BUILTIN_VA_START(valist, nextarg) \ |
xtensa_va_start (valist, nextarg) |
|
/* If defined, a C expression that produces the machine-specific code |
to setup the stack so that arbitrary frames can be accessed. |
|
On Xtensa, a stack back-trace must always begin from the stack pointer, |
so that the register overflow save area can be located. However, the |
stack-walking code in GCC always begins from the hard_frame_pointer |
register, not the stack pointer. The frame pointer is usually equal |
to the stack pointer, but the __builtin_return_address and |
__builtin_frame_address functions will not work if count > 0 and |
they are called from a routine that uses alloca. These functions |
are not guaranteed to work at all if count > 0 so maybe that is OK. |
|
A nicer solution would be to allow the architecture-specific files to |
specify whether to start from the stack pointer or frame pointer. That |
would also allow us to skip the machine->accesses_prev_frame stuff that |
we currently need to ensure that there is a frame pointer when these |
builtin functions are used. */ |
|
#define SETUP_FRAME_ADDRESSES xtensa_setup_frame_addresses |
|
/* A C expression whose value is RTL representing the address in a |
stack frame where the pointer to the caller's frame is stored. |
Assume that FRAMEADDR is an RTL expression for the address of the |
stack frame itself. |
|
For Xtensa, there is no easy way to get the frame pointer if it is |
not equivalent to the stack pointer. Moreover, the result of this |
macro is used for continuing to walk back up the stack, so it must |
return the stack pointer address. Thus, there is some inconsistency |
here in that __builtin_frame_address will return the frame pointer |
when count == 0 and the stack pointer when count > 0. */ |
|
#define DYNAMIC_CHAIN_ADDRESS(frame) \ |
gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD)) |
|
/* Define this if the return address of a particular stack frame is |
accessed from the frame pointer of the previous stack frame. */ |
#define RETURN_ADDR_IN_PREVIOUS_FRAME |
|
/* A C expression whose value is RTL representing the value of the |
return address for the frame COUNT steps up from the current |
frame, after the prologue. */ |
#define RETURN_ADDR_RTX xtensa_return_addr |
|
/* Addressing modes, and classification of registers for them. */ |
|
/* C expressions which are nonzero if register number NUM is suitable |
for use as a base or index register in operand addresses. It may |
be either a suitable hard register or a pseudo register that has |
been allocated such a hard register. The difference between an |
index register and a base register is that the index register may |
be scaled. */ |
|
#define REGNO_OK_FOR_BASE_P(NUM) \ |
(GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM])) |
|
#define REGNO_OK_FOR_INDEX_P(NUM) 0 |
|
/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is |
valid for use as a base or index register. For hard registers, it |
should always accept those which the hardware permits and reject |
the others. Whether the macro accepts or rejects pseudo registers |
must be controlled by `REG_OK_STRICT'. This usually requires two |
variant definitions, of which `REG_OK_STRICT' controls the one |
actually used. The difference between an index register and a base |
register is that the index register may be scaled. */ |
|
#ifdef REG_OK_STRICT |
|
#define REG_OK_FOR_INDEX_P(X) 0 |
#define REG_OK_FOR_BASE_P(X) \ |
REGNO_OK_FOR_BASE_P (REGNO (X)) |
|
#else /* !REG_OK_STRICT */ |
|
#define REG_OK_FOR_INDEX_P(X) 0 |
#define REG_OK_FOR_BASE_P(X) \ |
((REGNO (X) >= FIRST_PSEUDO_REGISTER) || (GP_REG_P (REGNO (X)))) |
|
#endif /* !REG_OK_STRICT */ |
|
/* Maximum number of registers that can appear in a valid memory address. */ |
#define MAX_REGS_PER_ADDRESS 1 |
|
/* Identify valid Xtensa addresses. */ |
#define GO_IF_LEGITIMATE_ADDRESS(MODE, ADDR, LABEL) \ |
do { \ |
rtx xinsn = (ADDR); \ |
\ |
/* allow constant pool addresses */ \ |
if ((MODE) != BLKmode && GET_MODE_SIZE (MODE) >= UNITS_PER_WORD \ |
&& !TARGET_CONST16 && constantpool_address_p (xinsn)) \ |
goto LABEL; \ |
\ |
while (GET_CODE (xinsn) == SUBREG) \ |
xinsn = SUBREG_REG (xinsn); \ |
\ |
/* allow base registers */ \ |
if (GET_CODE (xinsn) == REG && REG_OK_FOR_BASE_P (xinsn)) \ |
goto LABEL; \ |
\ |
/* check for "register + offset" addressing */ \ |
if (GET_CODE (xinsn) == PLUS) \ |
{ \ |
rtx xplus0 = XEXP (xinsn, 0); \ |
rtx xplus1 = XEXP (xinsn, 1); \ |
enum rtx_code code0; \ |
enum rtx_code code1; \ |
\ |
while (GET_CODE (xplus0) == SUBREG) \ |
xplus0 = SUBREG_REG (xplus0); \ |
code0 = GET_CODE (xplus0); \ |
\ |
while (GET_CODE (xplus1) == SUBREG) \ |
xplus1 = SUBREG_REG (xplus1); \ |
code1 = GET_CODE (xplus1); \ |
\ |
/* swap operands if necessary so the register is first */ \ |
if (code0 != REG && code1 == REG) \ |
{ \ |
xplus0 = XEXP (xinsn, 1); \ |
xplus1 = XEXP (xinsn, 0); \ |
code0 = GET_CODE (xplus0); \ |
code1 = GET_CODE (xplus1); \ |
} \ |
\ |
if (code0 == REG && REG_OK_FOR_BASE_P (xplus0) \ |
&& code1 == CONST_INT \ |
&& xtensa_mem_offset (INTVAL (xplus1), (MODE))) \ |
{ \ |
goto LABEL; \ |
} \ |
} \ |
} while (0) |
|
/* A C expression that is 1 if the RTX X is a constant which is a |
valid address. This is defined to be the same as 'CONSTANT_P (X)', |
but rejecting CONST_DOUBLE. */ |
#define CONSTANT_ADDRESS_P(X) \ |
((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ |
|| GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH \ |
|| (GET_CODE (X) == CONST))) |
|
/* Nonzero if the constant value X is a legitimate general operand. |
It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ |
#define LEGITIMATE_CONSTANT_P(X) 1 |
|
/* A C expression that is nonzero if X is a legitimate immediate |
operand on the target machine when generating position independent |
code. */ |
#define LEGITIMATE_PIC_OPERAND_P(X) \ |
((GET_CODE (X) != SYMBOL_REF \ |
|| (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X))) \ |
&& GET_CODE (X) != LABEL_REF \ |
&& GET_CODE (X) != CONST) |
|
/* Tell GCC how to use ADDMI to generate addresses. */ |
#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) \ |
do { \ |
rtx xinsn = (X); \ |
if (GET_CODE (xinsn) == PLUS) \ |
{ \ |
rtx plus0 = XEXP (xinsn, 0); \ |
rtx plus1 = XEXP (xinsn, 1); \ |
\ |
if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG) \ |
{ \ |
plus0 = XEXP (xinsn, 1); \ |
plus1 = XEXP (xinsn, 0); \ |
} \ |
\ |
if (GET_CODE (plus0) == REG \ |
&& GET_CODE (plus1) == CONST_INT \ |
&& !xtensa_mem_offset (INTVAL (plus1), MODE) \ |
&& !xtensa_simm8 (INTVAL (plus1)) \ |
&& xtensa_mem_offset (INTVAL (plus1) & 0xff, MODE) \ |
&& xtensa_simm8x256 (INTVAL (plus1) & ~0xff)) \ |
{ \ |
rtx temp = gen_reg_rtx (Pmode); \ |
emit_insn (gen_rtx_SET (Pmode, temp, \ |
gen_rtx_PLUS (Pmode, plus0, \ |
GEN_INT (INTVAL (plus1) & ~0xff)))); \ |
(X) = gen_rtx_PLUS (Pmode, temp, \ |
GEN_INT (INTVAL (plus1) & 0xff)); \ |
goto WIN; \ |
} \ |
} \ |
} while (0) |
|
|
/* Treat constant-pool references as "mode dependent" since they can |
only be accessed with SImode loads. This works around a bug in the |
combiner where a constant pool reference is temporarily converted |
to an HImode load, which is then assumed to zero-extend based on |
our definition of LOAD_EXTEND_OP. This is wrong because the high |
bits of a 16-bit value in the constant pool are now sign-extended |
by default. */ |
|
#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ |
do { \ |
if (constantpool_address_p (ADDR)) \ |
goto LABEL; \ |
} while (0) |
|
/* Specify the machine mode that this machine uses |
for the index in the tablejump instruction. */ |
#define CASE_VECTOR_MODE (SImode) |
|
/* Define this as 1 if 'char' should by default be signed; else as 0. */ |
#define DEFAULT_SIGNED_CHAR 0 |
|
/* Max number of bytes we can move from memory to memory |
in one reasonably fast instruction. */ |
#define MOVE_MAX 4 |
#define MAX_MOVE_MAX 4 |
|
/* Prefer word-sized loads. */ |
#define SLOW_BYTE_ACCESS 1 |
|
/* Shift instructions ignore all but the low-order few bits. */ |
#define SHIFT_COUNT_TRUNCATED 1 |
|
/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits |
is done just by pretending it is already truncated. */ |
#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 |
|
/* Specify the machine mode that pointers have. |
After generation of rtl, the compiler makes no further distinction |
between pointers and any other objects of this machine mode. */ |
#define Pmode SImode |
|
/* A function address in a call instruction is a word address (for |
indexing purposes) so give the MEM rtx a words's mode. */ |
#define FUNCTION_MODE SImode |
|
/* A C expression for the cost of moving data from a register in |
class FROM to one in class TO. The classes are expressed using |
the enumeration values such as 'GENERAL_REGS'. A value of 2 is |
the default; other values are interpreted relative to that. */ |
#define REGISTER_MOVE_COST(MODE, FROM, TO) \ |
(((FROM) == (TO) && (FROM) != BR_REGS && (TO) != BR_REGS) \ |
? 2 \ |
: (reg_class_subset_p ((FROM), AR_REGS) \ |
&& reg_class_subset_p ((TO), AR_REGS) \ |
? 2 \ |
: (reg_class_subset_p ((FROM), AR_REGS) \ |
&& (TO) == ACC_REG \ |
? 3 \ |
: ((FROM) == ACC_REG \ |
&& reg_class_subset_p ((TO), AR_REGS) \ |
? 3 \ |
: 10)))) |
|
#define MEMORY_MOVE_COST(MODE, CLASS, IN) 4 |
|
#define BRANCH_COST 3 |
|
/* How to refer to registers in assembler output. |
This sequence is indexed by compiler's hard-register-number (see above). */ |
#define REGISTER_NAMES \ |
{ \ |
"a0", "sp", "a2", "a3", "a4", "a5", "a6", "a7", \ |
"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", \ |
"fp", "argp", "b0", \ |
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ |
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ |
"acc" \ |
} |
|
/* If defined, a C initializer for an array of structures containing a |
name and a register number. This macro defines additional names |
for hard registers, thus allowing the 'asm' option in declarations |
to refer to registers using alternate names. */ |
#define ADDITIONAL_REGISTER_NAMES \ |
{ \ |
{ "a1", 1 + GP_REG_FIRST } \ |
} |
|
#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) |
#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR) |
|
/* Recognize machine-specific patterns that may appear within |
constants. Used for PIC-specific UNSPECs. */ |
#define OUTPUT_ADDR_CONST_EXTRA(STREAM, X, FAIL) \ |
do { \ |
if (flag_pic && GET_CODE (X) == UNSPEC && XVECLEN ((X), 0) == 1) \ |
{ \ |
switch (XINT ((X), 1)) \ |
{ \ |
case UNSPEC_PLT: \ |
output_addr_const ((STREAM), XVECEXP ((X), 0, 0)); \ |
fputs ("@PLT", (STREAM)); \ |
break; \ |
default: \ |
goto FAIL; \ |
} \ |
break; \ |
} \ |
else \ |
goto FAIL; \ |
} while (0) |
|
/* Globalizing directive for a label. */ |
#define GLOBAL_ASM_OP "\t.global\t" |
|
/* Declare an uninitialized external linkage data object. */ |
#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ |
asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) |
|
/* This is how to output an element of a case-vector that is absolute. */ |
#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ |
fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE), \ |
LOCAL_LABEL_PREFIX, VALUE) |
|
/* This is how to output an element of a case-vector that is relative. |
This is used for pc-relative code. */ |
#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ |
do { \ |
fprintf (STREAM, "%s%sL%u-%sL%u\n", integer_asm_op (4, TRUE), \ |
LOCAL_LABEL_PREFIX, (VALUE), \ |
LOCAL_LABEL_PREFIX, (REL)); \ |
} while (0) |
|
/* This is how to output an assembler line that says to advance the |
location counter to a multiple of 2**LOG bytes. */ |
#define ASM_OUTPUT_ALIGN(STREAM, LOG) \ |
do { \ |
if ((LOG) != 0) \ |
fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG)); \ |
} while (0) |
|
/* Indicate that jump tables go in the text section. This is |
necessary when compiling PIC code. */ |
#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic) |
|
|
/* Define the strings to put out for each section in the object file. */ |
#define TEXT_SECTION_ASM_OP "\t.text" |
#define DATA_SECTION_ASM_OP "\t.data" |
#define BSS_SECTION_ASM_OP "\t.section\t.bss" |
|
|
/* Define output to appear before the constant pool. */ |
#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ |
do { \ |
if ((SIZE) > 0) \ |
{ \ |
resolve_unique_section ((FUNDECL), 0, flag_function_sections); \ |
switch_to_section (function_section (FUNDECL)); \ |
fprintf (FILE, "\t.literal_position\n"); \ |
} \ |
} while (0) |
|
|
/* A C statement (with or without semicolon) to output a constant in |
the constant pool, if it needs special treatment. */ |
#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \ |
do { \ |
xtensa_output_literal (FILE, X, MODE, LABELNO); \ |
goto JUMPTO; \ |
} while (0) |
|
/* How to start an assembler comment. */ |
#define ASM_COMMENT_START "#" |
|
/* Exception handling TODO!! */ |
#define DWARF_UNWIND_INFO 0 |
|
/* Xtensa constant pool breaks the devices in crtstuff.c to control |
section in where code resides. We have to write it as asm code. Use |
a MOVI and let the assembler relax it -- for the .init and .fini |
sections, the assembler knows to put the literal in the right |
place. */ |
#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ |
asm (SECTION_OP "\n\ |
movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\ |
callx8\ta8\n" \ |
TEXT_SECTION_ASM_OP); |
/lib2funcs.S
0,0 → 1,190
/* Assembly functions for libgcc2. |
Copyright (C) 2001 Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 2, or (at your option) any later |
version. |
|
In addition to the permissions in the GNU General Public License, the |
Free Software Foundation gives you unlimited permission to link the |
compiled version of this file into combinations with other programs, |
and to distribute those combinations without any restriction coming |
from the use of this file. (The General Public License restrictions |
do apply in other respects; for example, they cover modification of |
the file, and distribution when not linked into a combine |
executable.) |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING. If not, write to the Free |
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
02110-1301, USA. */ |
|
#include "xtensa-config.h" |
|
/* __xtensa_libgcc_window_spill: This function flushes out all but the |
current register window. This is used to set up the stack so that |
arbitrary frames can be accessed. */ |
|
.align 4 |
.global __xtensa_libgcc_window_spill |
.type __xtensa_libgcc_window_spill,@function |
__xtensa_libgcc_window_spill: |
entry sp, 32 |
movi a2, 0 |
syscall |
retw |
.size __xtensa_libgcc_window_spill,.-__xtensa_libgcc_window_spill |
|
|
/* __xtensa_nonlocal_goto: This code does all the hard work of a |
nonlocal goto on Xtensa. It is here in the library to avoid the |
code size bloat of generating it in-line. There are two |
arguments: |
|
a2 = frame pointer for the procedure containing the label |
a3 = goto handler address |
|
This function never returns to its caller but instead goes directly |
to the address of the specified goto handler. */ |
|
.align 4 |
.global __xtensa_nonlocal_goto |
.type __xtensa_nonlocal_goto,@function |
__xtensa_nonlocal_goto: |
entry sp, 32 |
|
/* flush registers */ |
mov a5, a2 |
movi a2, 0 |
syscall |
mov a2, a5 |
|
/* Because the save area for a0-a3 is stored one frame below |
the one identified by a2, the only way to restore those |
registers is to unwind the stack. If alloca() were never |
called, we could just unwind until finding the sp value |
matching a2. However, a2 is a frame pointer, not a stack |
pointer, and may not be encountered during the unwinding. |
The solution is to unwind until going _past_ the value |
given by a2. This involves keeping three stack pointer |
values during the unwinding: |
|
next = sp of frame N-1 |
cur = sp of frame N |
prev = sp of frame N+1 |
|
When next > a2, the desired save area is stored relative |
to prev. At this point, cur will be the same as a2 |
except in the alloca() case. |
|
Besides finding the values to be restored to a0-a3, we also |
need to find the current window size for the target |
function. This can be extracted from the high bits of the |
return address, initially in a0. As the unwinding |
proceeds, the window size is taken from the value of a0 |
saved _two_ frames below the current frame. */ |
|
addi a5, sp, -16 # a5 = prev - save area |
l32i a6, a5, 4 |
addi a6, a6, -16 # a6 = cur - save area |
mov a8, a0 # a8 = return address (for window size) |
j .Lfirstframe |
|
.Lnextframe: |
l32i a8, a5, 0 # next return address (for window size) |
mov a5, a6 # advance prev |
addi a6, a7, -16 # advance cur |
.Lfirstframe: |
l32i a7, a6, 4 # a7 = next |
bge a2, a7, .Lnextframe |
|
/* At this point, prev (a5) points to the save area with the saved |
values of a0-a3. Copy those values into the save area at the |
current sp so they will be reloaded when the return from this |
function underflows. We don't have to worry about exceptions |
while updating the current save area, because the windows have |
already been flushed. */ |
|
addi a4, sp, -16 # a4 = save area of this function |
l32i a6, a5, 0 |
l32i a7, a5, 4 |
s32i a6, a4, 0 |
s32i a7, a4, 4 |
l32i a6, a5, 8 |
l32i a7, a5, 12 |
s32i a6, a4, 8 |
s32i a7, a4, 12 |
|
/* Set return address to goto handler. Use the window size bits |
from the return address two frames below the target. */ |
extui a8, a8, 30, 2 # get window size from return addr. |
slli a3, a3, 2 # get goto handler addr. << 2 |
ssai 2 |
src a0, a8, a3 # combine them with a funnel shift |
|
retw |
.size __xtensa_nonlocal_goto,.-__xtensa_nonlocal_goto |
|
|
/* __xtensa_sync_caches: This function is called after writing a trampoline |
on the stack to force all the data writes to memory and invalidate the |
instruction cache. a2 is the address of the new trampoline. |
|
After the trampoline data is written out, it must be flushed out of |
the data cache into memory. We use DHWB in case we have a writeback |
cache. At least one DHWB instruction is needed for each data cache |
line which may be touched by the trampoline. An ISYNC instruction |
must follow the DHWBs. |
|
We have to flush the i-cache to make sure that the new values get used. |
At least one IHI instruction is needed for each i-cache line which may |
be touched by the trampoline. An ISYNC instruction is also needed to |
make sure that the modified instructions are loaded into the instruction |
fetch buffer. */ |
|
#define TRAMPOLINE_SIZE 60 |
|
.text |
.align 4 |
.global __xtensa_sync_caches |
.type __xtensa_sync_caches,@function |
__xtensa_sync_caches: |
entry sp, 32 |
#if XCHAL_DCACHE_SIZE > 0 |
# Flush the trampoline from the data cache |
extui a4, a2, 0, XCHAL_DCACHE_LINEWIDTH |
addi a4, a4, TRAMPOLINE_SIZE |
addi a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1 |
srli a4, a4, XCHAL_DCACHE_LINEWIDTH |
mov a3, a2 |
.Ldcache_loop: |
dhwb a3, 0 |
addi a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH) |
addi a4, a4, -1 |
bnez a4, .Ldcache_loop |
isync |
#endif |
#if XCHAL_ICACHE_SIZE > 0 |
# Invalidate the corresponding lines in the instruction cache |
extui a4, a2, 0, XCHAL_ICACHE_LINEWIDTH |
addi a4, a4, TRAMPOLINE_SIZE |
addi a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1 |
srli a4, a4, XCHAL_ICACHE_LINEWIDTH |
.Licache_loop: |
ihi a2, 0 |
addi a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH) |
addi a4, a4, -1 |
bnez a4, .Licache_loop |
isync |
#endif |
retw |
.size __xtensa_sync_caches,.-__xtensa_sync_caches |
/elf.h
0,0 → 1,101
/* Xtensa/Elf configuration. |
Derived from the configuration for GCC for Intel i386 running Linux. |
Copyright (C) 2001,2003, 2007 Free Software Foundation, Inc. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING3. If not see |
<http://www.gnu.org/licenses/>. */ |
|
#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags |
|
/* Don't assume anything about the header files. */ |
#define NO_IMPLICIT_EXTERN_C |
|
#undef ASM_APP_ON |
#define ASM_APP_ON "#APP\n" |
|
#undef ASM_APP_OFF |
#define ASM_APP_OFF "#NO_APP\n" |
|
#undef MD_EXEC_PREFIX |
#undef MD_STARTFILE_PREFIX |
|
#undef TARGET_VERSION |
#define TARGET_VERSION fputs (" (Xtensa/ELF)", stderr); |
|
#undef WCHAR_TYPE |
#define WCHAR_TYPE "short unsigned int" |
|
#undef WCHAR_TYPE_SIZE |
#define WCHAR_TYPE_SIZE 16 |
|
#undef ASM_SPEC |
#define ASM_SPEC \ |
"%{v} \ |
%{mtext-section-literals:--text-section-literals} \ |
%{mno-text-section-literals:--no-text-section-literals} \ |
%{mtarget-align:--target-align} \ |
%{mno-target-align:--no-target-align} \ |
%{mlongcalls:--longcalls} \ |
%{mno-longcalls:--no-longcalls}" |
|
#undef LIB_SPEC |
#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal" |
|
#undef STARTFILE_SPEC |
#define STARTFILE_SPEC \ |
"crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s" |
|
#undef ENDFILE_SPEC |
#define ENDFILE_SPEC "crtend%O%s crtn%O%s" |
|
#undef LINK_SPEC |
#define LINK_SPEC \ |
"%{shared:-shared} \ |
%{!shared: \ |
%{!static: \ |
%{rdynamic:-export-dynamic} \ |
%{static:-static}}}" |
|
#undef LOCAL_LABEL_PREFIX |
#define LOCAL_LABEL_PREFIX "." |
|
/* Avoid dots for compatibility with VxWorks. */ |
#undef NO_DOLLAR_IN_LABEL |
#define NO_DOT_IN_LABEL |
|
/* Do not force "-fpic" for this target. */ |
#define XTENSA_ALWAYS_PIC 0 |
|
/* Search for headers in $tooldir/arch/include and for libraries and |
startfiles in $tooldir/arch/lib. */ |
#define GCC_DRIVER_HOST_INITIALIZATION \ |
do \ |
{ \ |
char *tooldir, *archdir; \ |
tooldir = concat (tooldir_base_prefix, spec_machine, \ |
dir_separator_str, NULL); \ |
if (!IS_ABSOLUTE_PATH (tooldir)) \ |
tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ |
spec_version, dir_separator_str, tooldir, NULL); \ |
archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ |
add_prefix (&startfile_prefixes, \ |
concat (archdir, "lib", dir_separator_str, NULL), \ |
"GCC", PREFIX_PRIORITY_LAST, 0, 1); \ |
add_prefix (&include_prefixes, archdir, \ |
"GCC", PREFIX_PRIORITY_LAST, 0, 0); \ |
} \ |
while (0) |
|
/ieee754-df.S
0,0 → 1,2365
/* IEEE-754 double-precision functions for Xtensa |
Copyright (C) 2006 Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it |
under the terms of the GNU General Public License as published by |
the Free Software Foundation; either version 2, or (at your option) |
any later version. |
|
In addition to the permissions in the GNU General Public License, |
the Free Software Foundation gives you unlimited permission to link |
the compiled version of this file into combinations with other |
programs, and to distribute those combinations without any |
restriction coming from the use of this file. (The General Public |
License restrictions do apply in other respects; for example, they |
cover modification of the file, and distribution when not linked |
into a combine executable.) |
|
GCC is distributed in the hope that it will be useful, but WITHOUT |
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
License for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING. If not, write to the Free |
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
02110-1301, USA. */ |
|
#ifdef __XTENSA_EB__ |
#define xh a2 |
#define xl a3 |
#define yh a4 |
#define yl a5 |
#else |
#define xh a3 |
#define xl a2 |
#define yh a5 |
#define yl a4 |
#endif |
|
/* Warning! The branch displacements for some Xtensa branch instructions |
are quite small, and this code has been carefully laid out to keep |
branch targets in range. If you change anything, be sure to check that |
the assembler is not relaxing anything to branch over a jump. */ |
|
#ifdef L_negdf2 |
|
.align 4 |
.global __negdf2 |
.type __negdf2, @function |
__negdf2: |
leaf_entry sp, 16 |
movi a4, 0x80000000 |
xor xh, xh, a4 |
leaf_return |
|
#endif /* L_negdf2 */ |
|
#ifdef L_addsubdf3 |
|
/* Addition */ |
__adddf3_aux: |
|
/* Handle NaNs and Infinities. (This code is placed before the |
start of the function just to keep it in range of the limited |
branch displacements.) */ |
|
.Ladd_xnan_or_inf: |
/* If y is neither Infinity nor NaN, return x. */ |
bnall yh, a6, 1f |
/* If x is a NaN, return it. Otherwise, return y. */ |
slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, .Ladd_ynan_or_inf |
1: leaf_return |
|
.Ladd_ynan_or_inf: |
/* Return y. */ |
mov xh, yh |
mov xl, yl |
leaf_return |
|
.Ladd_opposite_signs: |
/* Operand signs differ. Do a subtraction. */ |
slli a7, a6, 11 |
xor yh, yh, a7 |
j .Lsub_same_sign |
|
.align 4 |
.global __adddf3 |
.type __adddf3, @function |
__adddf3: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
|
/* Check if the two operands have the same sign. */ |
xor a7, xh, yh |
bltz a7, .Ladd_opposite_signs |
|
.Ladd_same_sign: |
/* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ |
ball xh, a6, .Ladd_xnan_or_inf |
ball yh, a6, .Ladd_ynan_or_inf |
|
/* Compare the exponents. The smaller operand will be shifted |
right by the exponent difference and added to the larger |
one. */ |
extui a7, xh, 20, 12 |
extui a8, yh, 20, 12 |
bltu a7, a8, .Ladd_shiftx |
|
.Ladd_shifty: |
/* Check if the smaller (or equal) exponent is zero. */ |
bnone yh, a6, .Ladd_yexpzero |
|
/* Replace yh sign/exponent with 0x001. */ |
or yh, yh, a6 |
slli yh, yh, 11 |
srli yh, yh, 11 |
|
.Ladd_yexpdiff: |
/* Compute the exponent difference. Optimize for difference < 32. */ |
sub a10, a7, a8 |
bgeui a10, 32, .Ladd_bigshifty |
|
/* Shift yh/yl right by the exponent difference. Any bits that are |
shifted out of yl are saved in a9 for rounding the result. */ |
ssr a10 |
movi a9, 0 |
src a9, yl, a9 |
src yl, yh, yl |
srl yh, yh |
|
.Ladd_addy: |
/* Do the 64-bit addition. */ |
add xl, xl, yl |
add xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, 1 |
1: |
/* Check if the add overflowed into the exponent. */ |
extui a10, xh, 20, 12 |
beq a10, a7, .Ladd_round |
mov a8, a7 |
j .Ladd_carry |
|
.Ladd_yexpzero: |
/* y is a subnormal value. Replace its sign/exponent with zero, |
i.e., no implicit "1.0", and increment the apparent exponent |
because subnormals behave as if they had the minimum (nonzero) |
exponent. Test for the case when both exponents are zero. */ |
slli yh, yh, 12 |
srli yh, yh, 12 |
bnone xh, a6, .Ladd_bothexpzero |
addi a8, a8, 1 |
j .Ladd_yexpdiff |
|
.Ladd_bothexpzero: |
/* Both exponents are zero. Handle this as a special case. There |
is no need to shift or round, and the normal code for handling |
a carry into the exponent field will not work because it |
assumes there is an implicit "1.0" that needs to be added. */ |
add xl, xl, yl |
add xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, 1 |
1: leaf_return |
|
.Ladd_bigshifty: |
/* Exponent difference > 64 -- just return the bigger value. */ |
bgeui a10, 64, 1b |
|
/* Shift yh/yl right by the exponent difference. Any bits that are |
shifted out are saved in a9 for rounding the result. */ |
ssr a10 |
sll a11, yl /* lost bits shifted out of yl */ |
src a9, yh, yl |
srl yl, yh |
movi yh, 0 |
beqz a11, .Ladd_addy |
or a9, a9, a10 /* any positive, nonzero value will work */ |
j .Ladd_addy |
|
.Ladd_xexpzero: |
/* Same as "yexpzero" except skip handling the case when both |
exponents are zero. */ |
slli xh, xh, 12 |
srli xh, xh, 12 |
addi a7, a7, 1 |
j .Ladd_xexpdiff |
|
.Ladd_shiftx: |
/* Same thing as the "shifty" code, but with x and y swapped. Also, |
because the exponent difference is always nonzero in this version, |
the shift sequence can use SLL and skip loading a constant zero. */ |
bnone xh, a6, .Ladd_xexpzero |
|
or xh, xh, a6 |
slli xh, xh, 11 |
srli xh, xh, 11 |
|
.Ladd_xexpdiff: |
sub a10, a8, a7 |
bgeui a10, 32, .Ladd_bigshiftx |
|
ssr a10 |
sll a9, xl |
src xl, xh, xl |
srl xh, xh |
|
.Ladd_addx: |
add xl, xl, yl |
add xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, 1 |
1: |
/* Check if the add overflowed into the exponent. */ |
extui a10, xh, 20, 12 |
bne a10, a8, .Ladd_carry |
|
.Ladd_round: |
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a9, 1f |
addi xl, xl, 1 |
beqz xl, .Ladd_roundcarry |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a9, a9, 1 |
beqz a9, .Ladd_exactlyhalf |
1: leaf_return |
|
.Ladd_bigshiftx: |
/* Mostly the same thing as "bigshifty".... */ |
bgeui a10, 64, .Ladd_returny |
|
ssr a10 |
sll a11, xl |
src a9, xh, xl |
srl xl, xh |
movi xh, 0 |
beqz a11, .Ladd_addx |
or a9, a9, a10 |
j .Ladd_addx |
|
.Ladd_returny: |
mov xh, yh |
mov xl, yl |
leaf_return |
|
.Ladd_carry: |
/* The addition has overflowed into the exponent field, so the |
value needs to be renormalized. The mantissa of the result |
can be recovered by subtracting the original exponent and |
adding 0x100000 (which is the explicit "1.0" for the |
mantissa of the non-shifted operand -- the "1.0" for the |
shifted operand was already added). The mantissa can then |
be shifted right by one bit. The explicit "1.0" of the |
shifted mantissa then needs to be replaced by the exponent, |
incremented by one to account for the normalizing shift. |
It is faster to combine these operations: do the shift first |
and combine the additions and subtractions. If x is the |
original exponent, the result is: |
shifted mantissa - (x << 19) + (1 << 19) + (x << 20) |
or: |
shifted mantissa + ((x + 1) << 19) |
Note that the exponent is incremented here by leaving the |
explicit "1.0" of the mantissa in the exponent field. */ |
|
/* Shift xh/xl right by one bit. Save the lsb of xl. */ |
mov a10, xl |
ssai 1 |
src xl, xh, xl |
srl xh, xh |
|
/* See explanation above. The original exponent is in a8. */ |
addi a8, a8, 1 |
slli a8, a8, 19 |
add xh, xh, a8 |
|
/* Return an Infinity if the exponent overflowed. */ |
ball xh, a6, .Ladd_infinity |
|
/* Same thing as the "round" code except the msb of the leftover |
fraction is bit 0 of a10, with the rest of the fraction in a9. */ |
bbci.l a10, 0, 1f |
addi xl, xl, 1 |
beqz xl, .Ladd_roundcarry |
beqz a9, .Ladd_exactlyhalf |
1: leaf_return |
|
.Ladd_infinity: |
/* Clear the mantissa. */ |
movi xl, 0 |
srli xh, xh, 20 |
slli xh, xh, 20 |
|
/* The sign bit may have been lost in a carry-out. Put it back. */ |
slli a8, a8, 1 |
or xh, xh, a8 |
leaf_return |
|
.Ladd_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli xl, xl, 1 |
slli xl, xl, 1 |
leaf_return |
|
.Ladd_roundcarry: |
/* xl is always zero when the rounding increment overflows, so |
there's no need to round it to an even value. */ |
addi xh, xh, 1 |
/* Overflow to the exponent is OK. */ |
leaf_return |
|
|
/* Subtraction */ |
__subdf3_aux: |
|
/* Handle NaNs and Infinities. (This code is placed before the |
start of the function just to keep it in range of the limited |
branch displacements.) */ |
|
.Lsub_xnan_or_inf: |
/* If y is neither Infinity nor NaN, return x. */ |
bnall yh, a6, 1f |
/* Both x and y are either NaN or Inf, so the result is NaN. */ |
movi a4, 0x80000 /* make it a quiet NaN */ |
or xh, xh, a4 |
1: leaf_return |
|
.Lsub_ynan_or_inf: |
/* Negate y and return it. */ |
slli a7, a6, 11 |
xor xh, yh, a7 |
mov xl, yl |
leaf_return |
|
.Lsub_opposite_signs: |
/* Operand signs differ. Do an addition. */ |
slli a7, a6, 11 |
xor yh, yh, a7 |
j .Ladd_same_sign |
|
.align 4 |
.global __subdf3 |
.type __subdf3, @function |
__subdf3: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
|
/* Check if the two operands have the same sign. */ |
xor a7, xh, yh |
bltz a7, .Lsub_opposite_signs |
|
.Lsub_same_sign: |
/* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ |
ball xh, a6, .Lsub_xnan_or_inf |
ball yh, a6, .Lsub_ynan_or_inf |
|
/* Compare the operands. In contrast to addition, the entire |
value matters here. */ |
extui a7, xh, 20, 11 |
extui a8, yh, 20, 11 |
bltu xh, yh, .Lsub_xsmaller |
beq xh, yh, .Lsub_compare_low |
|
.Lsub_ysmaller: |
/* Check if the smaller (or equal) exponent is zero. */ |
bnone yh, a6, .Lsub_yexpzero |
|
/* Replace yh sign/exponent with 0x001. */ |
or yh, yh, a6 |
slli yh, yh, 11 |
srli yh, yh, 11 |
|
.Lsub_yexpdiff: |
/* Compute the exponent difference. Optimize for difference < 32. */ |
sub a10, a7, a8 |
bgeui a10, 32, .Lsub_bigshifty |
|
/* Shift yh/yl right by the exponent difference. Any bits that are |
shifted out of yl are saved in a9 for rounding the result. */ |
ssr a10 |
movi a9, 0 |
src a9, yl, a9 |
src yl, yh, yl |
srl yh, yh |
|
.Lsub_suby: |
/* Do the 64-bit subtraction. */ |
sub xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, -1 |
1: sub xl, xl, yl |
|
/* Subtract the leftover bits in a9 from zero and propagate any |
borrow from xh/xl. */ |
neg a9, a9 |
beqz a9, 1f |
addi a5, xh, -1 |
moveqz xh, a5, xl |
addi xl, xl, -1 |
1: |
/* Check if the subtract underflowed into the exponent. */ |
extui a10, xh, 20, 11 |
beq a10, a7, .Lsub_round |
j .Lsub_borrow |
|
.Lsub_compare_low: |
/* The high words are equal. Compare the low words. */ |
bltu xl, yl, .Lsub_xsmaller |
bltu yl, xl, .Lsub_ysmaller |
/* The operands are equal. Return 0.0. */ |
movi xh, 0 |
movi xl, 0 |
1: leaf_return |
|
.Lsub_yexpzero: |
/* y is a subnormal value. Replace its sign/exponent with zero, |
i.e., no implicit "1.0". Unless x is also a subnormal, increment |
y's apparent exponent because subnormals behave as if they had |
the minimum (nonzero) exponent. */ |
slli yh, yh, 12 |
srli yh, yh, 12 |
bnone xh, a6, .Lsub_yexpdiff |
addi a8, a8, 1 |
j .Lsub_yexpdiff |
|
.Lsub_bigshifty: |
/* Exponent difference > 64 -- just return the bigger value. */ |
bgeui a10, 64, 1b |
|
/* Shift yh/yl right by the exponent difference. Any bits that are |
shifted out are saved in a9 for rounding the result. */ |
ssr a10 |
sll a11, yl /* lost bits shifted out of yl */ |
src a9, yh, yl |
srl yl, yh |
movi yh, 0 |
beqz a11, .Lsub_suby |
or a9, a9, a10 /* any positive, nonzero value will work */ |
j .Lsub_suby |
|
.Lsub_xsmaller: |
/* Same thing as the "ysmaller" code, but with x and y swapped and |
with y negated. */ |
bnone xh, a6, .Lsub_xexpzero |
|
or xh, xh, a6 |
slli xh, xh, 11 |
srli xh, xh, 11 |
|
.Lsub_xexpdiff: |
sub a10, a8, a7 |
bgeui a10, 32, .Lsub_bigshiftx |
|
ssr a10 |
movi a9, 0 |
src a9, xl, a9 |
src xl, xh, xl |
srl xh, xh |
|
/* Negate y. */ |
slli a11, a6, 11 |
xor yh, yh, a11 |
|
.Lsub_subx: |
sub xl, yl, xl |
sub xh, yh, xh |
bgeu yl, xl, 1f |
addi xh, xh, -1 |
1: |
/* Subtract the leftover bits in a9 from zero and propagate any |
borrow from xh/xl. */ |
neg a9, a9 |
beqz a9, 1f |
addi a5, xh, -1 |
moveqz xh, a5, xl |
addi xl, xl, -1 |
1: |
/* Check if the subtract underflowed into the exponent. */ |
extui a10, xh, 20, 11 |
bne a10, a8, .Lsub_borrow |
|
.Lsub_round: |
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a9, 1f |
addi xl, xl, 1 |
beqz xl, .Lsub_roundcarry |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a9, a9, 1 |
beqz a9, .Lsub_exactlyhalf |
1: leaf_return |
|
.Lsub_xexpzero: |
/* Same as "yexpzero". */ |
slli xh, xh, 12 |
srli xh, xh, 12 |
bnone yh, a6, .Lsub_xexpdiff |
addi a7, a7, 1 |
j .Lsub_xexpdiff |
|
.Lsub_bigshiftx: |
/* Mostly the same thing as "bigshifty", but with the sign bit of the |
shifted value set so that the subsequent subtraction flips the |
sign of y. */ |
bgeui a10, 64, .Lsub_returny |
|
ssr a10 |
sll a11, xl |
src a9, xh, xl |
srl xl, xh |
slli xh, a6, 11 /* set sign bit of xh */ |
beqz a11, .Lsub_subx |
or a9, a9, a10 |
j .Lsub_subx |
|
.Lsub_returny: |
/* Negate and return y. */ |
slli a7, a6, 11 |
xor xh, yh, a7 |
mov xl, yl |
leaf_return |
|
.Lsub_borrow: |
/* The subtraction has underflowed into the exponent field, so the |
value needs to be renormalized. Shift the mantissa left as |
needed to remove any leading zeros and adjust the exponent |
accordingly. If the exponent is not large enough to remove |
all the leading zeros, the result will be a subnormal value. */ |
|
slli a8, xh, 12 |
beqz a8, .Lsub_xhzero |
do_nsau a6, a8, a7, a11 |
srli a8, a8, 12 |
bge a6, a10, .Lsub_subnormal |
addi a6, a6, 1 |
|
.Lsub_shift_lt32: |
/* Shift the mantissa (a8/xl/a9) left by a6. */ |
ssl a6 |
src a8, a8, xl |
src xl, xl, a9 |
sll a9, a9 |
|
/* Combine the shifted mantissa with the sign and exponent, |
decrementing the exponent by a6. (The exponent has already |
been decremented by one due to the borrow from the subtraction, |
but adding the mantissa will increment the exponent by one.) */ |
srli xh, xh, 20 |
sub xh, xh, a6 |
slli xh, xh, 20 |
add xh, xh, a8 |
j .Lsub_round |
|
.Lsub_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli xl, xl, 1 |
slli xl, xl, 1 |
leaf_return |
|
.Lsub_roundcarry: |
/* xl is always zero when the rounding increment overflows, so |
there's no need to round it to an even value. */ |
addi xh, xh, 1 |
/* Overflow to the exponent is OK. */ |
leaf_return |
|
.Lsub_xhzero: |
/* When normalizing the result, all the mantissa bits in the high |
word are zero. Shift by "20 + (leading zero count of xl) + 1". */ |
do_nsau a6, xl, a7, a11 |
addi a6, a6, 21 |
blt a10, a6, .Lsub_subnormal |
|
.Lsub_normalize_shift: |
bltui a6, 32, .Lsub_shift_lt32 |
|
ssl a6 |
src a8, xl, a9 |
sll xl, a9 |
movi a9, 0 |
|
srli xh, xh, 20 |
sub xh, xh, a6 |
slli xh, xh, 20 |
add xh, xh, a8 |
j .Lsub_round |
|
.Lsub_subnormal: |
/* The exponent is too small to shift away all the leading zeros. |
Set a6 to the current exponent (which has already been |
decremented by the borrow) so that the exponent of the result |
will be zero. Do not add 1 to a6 in this case, because: (1) |
adding the mantissa will not increment the exponent, so there is |
no need to subtract anything extra from the exponent to |
compensate, and (2) the effective exponent of a subnormal is 1 |
not 0 so the shift amount must be 1 smaller than normal. */ |
mov a6, a10 |
j .Lsub_normalize_shift |
|
#endif /* L_addsubdf3 */ |
|
#ifdef L_muldf3 |
|
/* Multiplication */ |
__muldf3_aux: |
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities). |
(This code is placed before the start of the function just to |
keep it in range of the limited branch displacements.) */ |
|
.Lmul_xexpzero: |
/* Clear the sign bit of x. */ |
slli xh, xh, 1 |
srli xh, xh, 1 |
|
/* If x is zero, return zero. */ |
or a10, xh, xl |
beqz a10, .Lmul_return_zero |
|
/* Normalize x. Adjust the exponent in a8. */ |
beqz xh, .Lmul_xh_zero |
do_nsau a10, xh, a11, a12 |
addi a10, a10, -11 |
ssl a10 |
src xh, xh, xl |
sll xl, xl |
movi a8, 1 |
sub a8, a8, a10 |
j .Lmul_xnormalized |
.Lmul_xh_zero: |
do_nsau a10, xl, a11, a12 |
addi a10, a10, -11 |
movi a8, -31 |
sub a8, a8, a10 |
ssl a10 |
bltz a10, .Lmul_xl_srl |
sll xh, xl |
movi xl, 0 |
j .Lmul_xnormalized |
.Lmul_xl_srl: |
srl xh, xl |
sll xl, xl |
j .Lmul_xnormalized |
|
.Lmul_yexpzero: |
/* Clear the sign bit of y. */ |
slli yh, yh, 1 |
srli yh, yh, 1 |
|
/* If y is zero, return zero. */ |
or a10, yh, yl |
beqz a10, .Lmul_return_zero |
|
/* Normalize y. Adjust the exponent in a9. */ |
beqz yh, .Lmul_yh_zero |
do_nsau a10, yh, a11, a12 |
addi a10, a10, -11 |
ssl a10 |
src yh, yh, yl |
sll yl, yl |
movi a9, 1 |
sub a9, a9, a10 |
j .Lmul_ynormalized |
.Lmul_yh_zero: |
do_nsau a10, yl, a11, a12 |
addi a10, a10, -11 |
movi a9, -31 |
sub a9, a9, a10 |
ssl a10 |
bltz a10, .Lmul_yl_srl |
sll yh, yl |
movi yl, 0 |
j .Lmul_ynormalized |
.Lmul_yl_srl: |
srl yh, yl |
sll yl, yl |
j .Lmul_ynormalized |
|
.Lmul_return_zero: |
/* Return zero with the appropriate sign bit. */ |
srli xh, a7, 31 |
slli xh, xh, 31 |
movi xl, 0 |
j .Lmul_done |
|
.Lmul_xnan_or_inf: |
/* If y is zero, return NaN. */ |
bnez yl, 1f |
slli a8, yh, 1 |
bnez a8, 1f |
movi a4, 0x80000 /* make it a quiet NaN */ |
or xh, xh, a4 |
j .Lmul_done |
1: |
/* If y is NaN, return y. */ |
bnall yh, a6, .Lmul_returnx |
slli a8, yh, 12 |
or a8, a8, yl |
beqz a8, .Lmul_returnx |
|
.Lmul_returny: |
mov xh, yh |
mov xl, yl |
|
.Lmul_returnx: |
/* Set the sign bit and return. */ |
extui a7, a7, 31, 1 |
slli xh, xh, 1 |
ssai 1 |
src xh, a7, xh |
j .Lmul_done |
|
.Lmul_ynan_or_inf: |
/* If x is zero, return NaN. */ |
bnez xl, .Lmul_returny |
slli a8, xh, 1 |
bnez a8, .Lmul_returny |
movi a7, 0x80000 /* make it a quiet NaN */ |
or xh, yh, a7 |
j .Lmul_done |
|
.align 4 |
.global __muldf3 |
.type __muldf3, @function |
__muldf3: |
leaf_entry sp, 32 |
#if __XTENSA_CALL0_ABI__ |
addi sp, sp, -32 |
s32i a12, sp, 16 |
s32i a13, sp, 20 |
s32i a14, sp, 24 |
s32i a15, sp, 28 |
#endif |
movi a6, 0x7ff00000 |
|
/* Get the sign of the result. */ |
xor a7, xh, yh |
|
/* Check for NaN and infinity. */ |
ball xh, a6, .Lmul_xnan_or_inf |
ball yh, a6, .Lmul_ynan_or_inf |
|
/* Extract the exponents. */ |
extui a8, xh, 20, 11 |
extui a9, yh, 20, 11 |
|
beqz a8, .Lmul_xexpzero |
.Lmul_xnormalized: |
beqz a9, .Lmul_yexpzero |
.Lmul_ynormalized: |
|
/* Add the exponents. */ |
add a8, a8, a9 |
|
/* Replace sign/exponent fields with explicit "1.0". */ |
movi a10, 0x1fffff |
or xh, xh, a6 |
and xh, xh, a10 |
or yh, yh, a6 |
and yh, yh, a10 |
|
/* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. |
The least-significant word of the result is thrown away except |
that if it is nonzero, the lsb of a6 is set to 1. */ |
#if XCHAL_HAVE_MUL32_HIGH |
|
/* Compute a6 with any carry-outs in a10. */ |
movi a10, 0 |
mull a6, xl, yh |
mull a11, xh, yl |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a10, a10, 1 |
1: |
muluh a11, xl, yl |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a10, a10, 1 |
1: |
/* If the low word of the result is nonzero, set the lsb of a6. */ |
mull a11, xl, yl |
beqz a11, 1f |
movi a9, 1 |
or a6, a6, a9 |
1: |
/* Compute xl with any carry-outs in a9. */ |
movi a9, 0 |
mull a11, xh, yh |
add a10, a10, a11 |
bgeu a10, a11, 1f |
addi a9, a9, 1 |
1: |
muluh a11, xh, yl |
add a10, a10, a11 |
bgeu a10, a11, 1f |
addi a9, a9, 1 |
1: |
muluh xl, xl, yh |
add xl, xl, a10 |
bgeu xl, a10, 1f |
addi a9, a9, 1 |
1: |
/* Compute xh. */ |
muluh xh, xh, yh |
add xh, xh, a9 |
|
#else |
|
/* Break the inputs into 16-bit chunks and compute 16 32-bit partial |
products. These partial products are: |
|
0 xll * yll |
|
1 xll * ylh |
2 xlh * yll |
|
3 xll * yhl |
4 xlh * ylh |
5 xhl * yll |
|
6 xll * yhh |
7 xlh * yhl |
8 xhl * ylh |
9 xhh * yll |
|
10 xlh * yhh |
11 xhl * yhl |
12 xhh * ylh |
|
13 xhl * yhh |
14 xhh * yhl |
|
15 xhh * yhh |
|
where the input chunks are (hh, hl, lh, ll). If using the Mul16 |
or Mul32 multiplier options, these input chunks must be stored in |
separate registers. For Mac16, the UMUL.AA.* opcodes can specify |
that the inputs come from either half of the registers, so there |
is no need to shift them out ahead of time. If there is no |
multiply hardware, the 16-bit chunks can be extracted when setting |
up the arguments to the separate multiply function. */ |
|
/* Save a7 since it is needed to hold a temporary value. */ |
s32i a7, sp, 4 |
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
/* Calling a separate multiply function will clobber a0 and requires |
use of a8 as a temporary, so save those values now. (The function |
uses a custom ABI so nothing else needs to be saved.) */ |
s32i a0, sp, 0 |
s32i a8, sp, 8 |
#endif |
|
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 |
|
#define xlh a12 |
#define ylh a13 |
#define xhh a14 |
#define yhh a15 |
|
/* Get the high halves of the inputs into registers. */ |
srli xlh, xl, 16 |
srli ylh, yl, 16 |
srli xhh, xh, 16 |
srli yhh, yh, 16 |
|
#define xll xl |
#define yll yl |
#define xhl xh |
#define yhl yh |
|
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 |
/* Clear the high halves of the inputs. This does not matter |
for MUL16 because the high bits are ignored. */ |
extui xl, xl, 0, 16 |
extui xh, xh, 0, 16 |
extui yl, yl, 0, 16 |
extui yh, yh, 0, 16 |
#endif |
#endif /* MUL16 || MUL32 */ |
|
|
#if XCHAL_HAVE_MUL16 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
mul16u dst, xreg ## xhalf, yreg ## yhalf |
|
#elif XCHAL_HAVE_MUL32 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
mull dst, xreg ## xhalf, yreg ## yhalf |
|
#elif XCHAL_HAVE_MAC16 |
|
/* The preprocessor insists on inserting a space when concatenating after |
a period in the definition of do_mul below. These macros are a workaround |
using underscores instead of periods when doing the concatenation. */ |
#define umul_aa_ll umul.aa.ll |
#define umul_aa_lh umul.aa.lh |
#define umul_aa_hl umul.aa.hl |
#define umul_aa_hh umul.aa.hh |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
umul_aa_ ## xhalf ## yhalf xreg, yreg; \ |
rsr dst, ACCLO |
|
#else /* no multiply hardware */ |
|
#define set_arg_l(dst, src) \ |
extui dst, src, 0, 16 |
#define set_arg_h(dst, src) \ |
srli dst, src, 16 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
set_arg_ ## xhalf (a13, xreg); \ |
set_arg_ ## yhalf (a14, yreg); \ |
call0 .Lmul_mulsi3; \ |
mov dst, a12 |
#endif |
|
/* Add pp1 and pp2 into a10 with carry-out in a9. */ |
do_mul(a10, xl, l, yl, h) /* pp 1 */ |
do_mul(a11, xl, h, yl, l) /* pp 2 */ |
movi a9, 0 |
add a10, a10, a11 |
bgeu a10, a11, 1f |
addi a9, a9, 1 |
1: |
/* Initialize a6 with a9/a10 shifted into position. Note that |
this value can be safely incremented without any carry-outs. */ |
ssai 16 |
src a6, a9, a10 |
|
/* Compute the low word into a10. */ |
do_mul(a11, xl, l, yl, l) /* pp 0 */ |
sll a10, a10 |
add a10, a10, a11 |
bgeu a10, a11, 1f |
addi a6, a6, 1 |
1: |
/* Compute the contributions of pp0-5 to a6, with carry-outs in a9. |
This is good enough to determine the low half of a6, so that any |
nonzero bits from the low word of the result can be collapsed |
into a6, freeing up a register. */ |
movi a9, 0 |
do_mul(a11, xl, l, yh, l) /* pp 3 */ |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a9, a9, 1 |
1: |
do_mul(a11, xl, h, yl, h) /* pp 4 */ |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a9, a9, 1 |
1: |
do_mul(a11, xh, l, yl, l) /* pp 5 */ |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a9, a9, 1 |
1: |
/* Collapse any nonzero bits from the low word into a6. */ |
beqz a10, 1f |
movi a11, 1 |
or a6, a6, a11 |
1: |
/* Add pp6-9 into a11 with carry-outs in a10. */ |
do_mul(a7, xl, l, yh, h) /* pp 6 */ |
do_mul(a11, xh, h, yl, l) /* pp 9 */ |
movi a10, 0 |
add a11, a11, a7 |
bgeu a11, a7, 1f |
addi a10, a10, 1 |
1: |
do_mul(a7, xl, h, yh, l) /* pp 7 */ |
add a11, a11, a7 |
bgeu a11, a7, 1f |
addi a10, a10, 1 |
1: |
do_mul(a7, xh, l, yl, h) /* pp 8 */ |
add a11, a11, a7 |
bgeu a11, a7, 1f |
addi a10, a10, 1 |
1: |
/* Shift a10/a11 into position, and add low half of a11 to a6. */ |
src a10, a10, a11 |
add a10, a10, a9 |
sll a11, a11 |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a10, a10, 1 |
1: |
/* Add pp10-12 into xl with carry-outs in a9. */ |
movi a9, 0 |
do_mul(xl, xl, h, yh, h) /* pp 10 */ |
add xl, xl, a10 |
bgeu xl, a10, 1f |
addi a9, a9, 1 |
1: |
do_mul(a10, xh, l, yh, l) /* pp 11 */ |
add xl, xl, a10 |
bgeu xl, a10, 1f |
addi a9, a9, 1 |
1: |
do_mul(a10, xh, h, yl, h) /* pp 12 */ |
add xl, xl, a10 |
bgeu xl, a10, 1f |
addi a9, a9, 1 |
1: |
/* Add pp13-14 into a11 with carry-outs in a10. */ |
do_mul(a11, xh, l, yh, h) /* pp 13 */ |
do_mul(a7, xh, h, yh, l) /* pp 14 */ |
movi a10, 0 |
add a11, a11, a7 |
bgeu a11, a7, 1f |
addi a10, a10, 1 |
1: |
/* Shift a10/a11 into position, and add low half of a11 to a6. */ |
src a10, a10, a11 |
add a10, a10, a9 |
sll a11, a11 |
add xl, xl, a11 |
bgeu xl, a11, 1f |
addi a10, a10, 1 |
1: |
/* Compute xh. */ |
do_mul(xh, xh, h, yh, h) /* pp 15 */ |
add xh, xh, a10 |
|
/* Restore values saved on the stack during the multiplication. */ |
l32i a7, sp, 4 |
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
l32i a0, sp, 0 |
l32i a8, sp, 8 |
#endif |
#endif |
|
/* Shift left by 12 bits, unless there was a carry-out from the |
multiply, in which case, shift by 11 bits and increment the |
exponent. Note: It is convenient to use the constant 0x3ff |
instead of 0x400 when removing the extra exponent bias (so that |
it is easy to construct 0x7fe for the overflow check). Reverse |
the logic here to decrement the exponent sum by one unless there |
was a carry-out. */ |
movi a4, 11 |
srli a5, xh, 21 - 12 |
bnez a5, 1f |
addi a4, a4, 1 |
addi a8, a8, -1 |
1: ssl a4 |
src xh, xh, xl |
src xl, xl, a6 |
sll a6, a6 |
|
/* Subtract the extra bias from the exponent sum (plus one to account |
for the explicit "1.0" of the mantissa that will be added to the |
exponent in the final result). */ |
movi a4, 0x3ff |
sub a8, a8, a4 |
|
/* Check for over/underflow. The value in a8 is one less than the |
final exponent, so values in the range 0..7fd are OK here. */ |
slli a4, a4, 1 /* 0x7fe */ |
bgeu a8, a4, .Lmul_overflow |
|
.Lmul_round: |
/* Round. */ |
bgez a6, .Lmul_rounded |
addi xl, xl, 1 |
beqz xl, .Lmul_roundcarry |
slli a6, a6, 1 |
beqz a6, .Lmul_exactlyhalf |
|
.Lmul_rounded: |
/* Add the exponent to the mantissa. */ |
slli a8, a8, 20 |
add xh, xh, a8 |
|
.Lmul_addsign: |
/* Add the sign bit. */ |
srli a7, a7, 31 |
slli a7, a7, 31 |
or xh, xh, a7 |
|
.Lmul_done: |
#if __XTENSA_CALL0_ABI__ |
l32i a12, sp, 16 |
l32i a13, sp, 20 |
l32i a14, sp, 24 |
l32i a15, sp, 28 |
addi sp, sp, 32 |
#endif |
leaf_return |
|
.Lmul_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli xl, xl, 1 |
slli xl, xl, 1 |
j .Lmul_rounded |
|
.Lmul_roundcarry: |
/* xl is always zero when the rounding increment overflows, so |
there's no need to round it to an even value. */ |
addi xh, xh, 1 |
/* Overflow is OK -- it will be added to the exponent. */ |
j .Lmul_rounded |
|
.Lmul_overflow: |
bltz a8, .Lmul_underflow |
/* Return +/- Infinity. */ |
addi a8, a4, 1 /* 0x7ff */ |
slli xh, a8, 20 |
movi xl, 0 |
j .Lmul_addsign |
|
.Lmul_underflow: |
/* Create a subnormal value, where the exponent field contains zero, |
but the effective exponent is 1. The value of a8 is one less than |
the actual exponent, so just negate it to get the shift amount. */ |
neg a8, a8 |
mov a9, a6 |
ssr a8 |
bgeui a8, 32, .Lmul_bigshift |
|
/* Shift xh/xl right. Any bits that are shifted out of xl are saved |
in a6 (combined with the shifted-out bits currently in a6) for |
rounding the result. */ |
sll a6, xl |
src xl, xh, xl |
srl xh, xh |
j 1f |
|
.Lmul_bigshift: |
bgeui a8, 64, .Lmul_flush_to_zero |
sll a10, xl /* lost bits shifted out of xl */ |
src a6, xh, xl |
srl xl, xh |
movi xh, 0 |
or a9, a9, a10 |
|
/* Set the exponent to zero. */ |
1: movi a8, 0 |
|
/* Pack any nonzero bits shifted out into a6. */ |
beqz a9, .Lmul_round |
movi a9, 1 |
or a6, a6, a9 |
j .Lmul_round |
|
.Lmul_flush_to_zero: |
/* Return zero with the appropriate sign bit. */ |
srli xh, a7, 31 |
slli xh, xh, 31 |
movi xl, 0 |
j .Lmul_done |
|
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
|
/* For Xtensa processors with no multiply hardware, this simplified |
version of _mulsi3 is used for multiplying 16-bit chunks of |
the floating-point mantissas. It uses a custom ABI: the inputs |
are passed in a13 and a14, the result is returned in a12, and |
a8 and a15 are clobbered. */ |
.align 4 |
.Lmul_mulsi3: |
movi a12, 0 |
.Lmul_mult_loop: |
add a15, a14, a12 |
extui a8, a13, 0, 1 |
movnez a12, a15, a8 |
|
do_addx2 a15, a14, a12, a15 |
extui a8, a13, 1, 1 |
movnez a12, a15, a8 |
|
do_addx4 a15, a14, a12, a15 |
extui a8, a13, 2, 1 |
movnez a12, a15, a8 |
|
do_addx8 a15, a14, a12, a15 |
extui a8, a13, 3, 1 |
movnez a12, a15, a8 |
|
srli a13, a13, 4 |
slli a14, a14, 4 |
bnez a13, .Lmul_mult_loop |
ret |
#endif /* !MUL16 && !MUL32 && !MAC16 */ |
#endif /* L_muldf3 */ |
|
#ifdef L_divdf3 |
|
/* Division */ |
__divdf3_aux: |
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities). |
(This code is placed before the start of the function just to |
keep it in range of the limited branch displacements.) */ |
|
.Ldiv_yexpzero: |
/* Clear the sign bit of y. */ |
slli yh, yh, 1 |
srli yh, yh, 1 |
|
/* Check for division by zero. */ |
or a10, yh, yl |
beqz a10, .Ldiv_yzero |
|
/* Normalize y. Adjust the exponent in a9. */ |
beqz yh, .Ldiv_yh_zero |
do_nsau a10, yh, a11, a9 |
addi a10, a10, -11 |
ssl a10 |
src yh, yh, yl |
sll yl, yl |
movi a9, 1 |
sub a9, a9, a10 |
j .Ldiv_ynormalized |
.Ldiv_yh_zero: |
do_nsau a10, yl, a11, a9 |
addi a10, a10, -11 |
movi a9, -31 |
sub a9, a9, a10 |
ssl a10 |
bltz a10, .Ldiv_yl_srl |
sll yh, yl |
movi yl, 0 |
j .Ldiv_ynormalized |
.Ldiv_yl_srl: |
srl yh, yl |
sll yl, yl |
j .Ldiv_ynormalized |
|
.Ldiv_yzero: |
/* y is zero. Return NaN if x is also zero; otherwise, infinity. */ |
slli xh, xh, 1 |
srli xh, xh, 1 |
or xl, xl, xh |
srli xh, a7, 31 |
slli xh, xh, 31 |
or xh, xh, a6 |
bnez xl, 1f |
movi a4, 0x80000 /* make it a quiet NaN */ |
or xh, xh, a4 |
1: movi xl, 0 |
leaf_return |
|
.Ldiv_xexpzero: |
/* Clear the sign bit of x. */ |
slli xh, xh, 1 |
srli xh, xh, 1 |
|
/* If x is zero, return zero. */ |
or a10, xh, xl |
beqz a10, .Ldiv_return_zero |
|
/* Normalize x. Adjust the exponent in a8. */ |
beqz xh, .Ldiv_xh_zero |
do_nsau a10, xh, a11, a8 |
addi a10, a10, -11 |
ssl a10 |
src xh, xh, xl |
sll xl, xl |
movi a8, 1 |
sub a8, a8, a10 |
j .Ldiv_xnormalized |
.Ldiv_xh_zero: |
do_nsau a10, xl, a11, a8 |
addi a10, a10, -11 |
movi a8, -31 |
sub a8, a8, a10 |
ssl a10 |
bltz a10, .Ldiv_xl_srl |
sll xh, xl |
movi xl, 0 |
j .Ldiv_xnormalized |
.Ldiv_xl_srl: |
srl xh, xl |
sll xl, xl |
j .Ldiv_xnormalized |
|
.Ldiv_return_zero: |
/* Return zero with the appropriate sign bit. */ |
srli xh, a7, 31 |
slli xh, xh, 31 |
movi xl, 0 |
leaf_return |
|
.Ldiv_xnan_or_inf: |
/* Set the sign bit of the result. */ |
srli a7, yh, 31 |
slli a7, a7, 31 |
xor xh, xh, a7 |
/* If y is NaN or Inf, return NaN. */ |
bnall yh, a6, 1f |
movi a4, 0x80000 /* make it a quiet NaN */ |
or xh, xh, a4 |
1: leaf_return |
|
.Ldiv_ynan_or_inf: |
/* If y is Infinity, return zero. */ |
slli a8, yh, 12 |
or a8, a8, yl |
beqz a8, .Ldiv_return_zero |
/* y is NaN; return it. */ |
mov xh, yh |
mov xl, yl |
leaf_return |
|
.Ldiv_highequal1: |
bltu xl, yl, 2f |
j 3f |
|
.align 4 |
.global __divdf3 |
.type __divdf3, @function |
__divdf3: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
|
/* Get the sign of the result. */ |
xor a7, xh, yh |
|
/* Check for NaN and infinity. */ |
ball xh, a6, .Ldiv_xnan_or_inf |
ball yh, a6, .Ldiv_ynan_or_inf |
|
/* Extract the exponents. */ |
extui a8, xh, 20, 11 |
extui a9, yh, 20, 11 |
|
beqz a9, .Ldiv_yexpzero |
.Ldiv_ynormalized: |
beqz a8, .Ldiv_xexpzero |
.Ldiv_xnormalized: |
|
/* Subtract the exponents. */ |
sub a8, a8, a9 |
|
/* Replace sign/exponent fields with explicit "1.0". */ |
movi a10, 0x1fffff |
or xh, xh, a6 |
and xh, xh, a10 |
or yh, yh, a6 |
and yh, yh, a10 |
|
/* Set SAR for left shift by one. */ |
ssai (32 - 1) |
|
/* The first digit of the mantissa division must be a one. |
Shift x (and adjust the exponent) as needed to make this true. */ |
bltu yh, xh, 3f |
beq yh, xh, .Ldiv_highequal1 |
2: src xh, xh, xl |
sll xl, xl |
addi a8, a8, -1 |
3: |
/* Do the first subtraction and shift. */ |
sub xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, -1 |
1: sub xl, xl, yl |
src xh, xh, xl |
sll xl, xl |
|
/* Put the quotient into a10/a11. */ |
movi a10, 0 |
movi a11, 1 |
|
/* Divide one bit at a time for 52 bits. */ |
movi a9, 52 |
#if XCHAL_HAVE_LOOPS |
loop a9, .Ldiv_loopend |
#endif |
.Ldiv_loop: |
/* Shift the quotient << 1. */ |
src a10, a10, a11 |
sll a11, a11 |
|
/* Is this digit a 0 or 1? */ |
bltu xh, yh, 3f |
beq xh, yh, .Ldiv_highequal2 |
|
/* Output a 1 and subtract. */ |
2: addi a11, a11, 1 |
sub xh, xh, yh |
bgeu xl, yl, 1f |
addi xh, xh, -1 |
1: sub xl, xl, yl |
|
/* Shift the dividend << 1. */ |
3: src xh, xh, xl |
sll xl, xl |
|
#if !XCHAL_HAVE_LOOPS |
addi a9, a9, -1 |
bnez a9, .Ldiv_loop |
#endif |
.Ldiv_loopend: |
|
/* Add the exponent bias (less one to account for the explicit "1.0" |
of the mantissa that will be added to the exponent in the final |
result). */ |
movi a9, 0x3fe |
add a8, a8, a9 |
|
/* Check for over/underflow. The value in a8 is one less than the |
final exponent, so values in the range 0..7fd are OK here. */ |
addmi a9, a9, 0x400 /* 0x7fe */ |
bgeu a8, a9, .Ldiv_overflow |
|
.Ldiv_round: |
/* Round. The remainder (<< 1) is in xh/xl. */ |
bltu xh, yh, .Ldiv_rounded |
beq xh, yh, .Ldiv_highequal3 |
.Ldiv_roundup: |
addi a11, a11, 1 |
beqz a11, .Ldiv_roundcarry |
|
.Ldiv_rounded: |
mov xl, a11 |
/* Add the exponent to the mantissa. */ |
slli a8, a8, 20 |
add xh, a10, a8 |
|
.Ldiv_addsign: |
/* Add the sign bit. */ |
srli a7, a7, 31 |
slli a7, a7, 31 |
or xh, xh, a7 |
leaf_return |
|
.Ldiv_highequal2: |
bgeu xl, yl, 2b |
j 3b |
|
.Ldiv_highequal3: |
bltu xl, yl, .Ldiv_rounded |
bne xl, yl, .Ldiv_roundup |
|
/* Remainder is exactly half the divisor. Round even. */ |
addi a11, a11, 1 |
beqz a11, .Ldiv_roundcarry |
srli a11, a11, 1 |
slli a11, a11, 1 |
j .Ldiv_rounded |
|
.Ldiv_overflow: |
bltz a8, .Ldiv_underflow |
/* Return +/- Infinity. */ |
addi a8, a9, 1 /* 0x7ff */ |
slli xh, a8, 20 |
movi xl, 0 |
j .Ldiv_addsign |
|
.Ldiv_underflow: |
/* Create a subnormal value, where the exponent field contains zero, |
but the effective exponent is 1. The value of a8 is one less than |
the actual exponent, so just negate it to get the shift amount. */ |
neg a8, a8 |
ssr a8 |
bgeui a8, 32, .Ldiv_bigshift |
|
/* Shift a10/a11 right. Any bits that are shifted out of a11 are |
saved in a6 for rounding the result. */ |
sll a6, a11 |
src a11, a10, a11 |
srl a10, a10 |
j 1f |
|
.Ldiv_bigshift: |
bgeui a8, 64, .Ldiv_flush_to_zero |
sll a9, a11 /* lost bits shifted out of a11 */ |
src a6, a10, a11 |
srl a11, a10 |
movi a10, 0 |
or xl, xl, a9 |
|
/* Set the exponent to zero. */ |
1: movi a8, 0 |
|
/* Pack any nonzero remainder (in xh/xl) into a6. */ |
or xh, xh, xl |
beqz xh, 1f |
movi a9, 1 |
or a6, a6, a9 |
|
/* Round a10/a11 based on the bits shifted out into a6. */ |
1: bgez a6, .Ldiv_rounded |
addi a11, a11, 1 |
beqz a11, .Ldiv_roundcarry |
slli a6, a6, 1 |
bnez a6, .Ldiv_rounded |
srli a11, a11, 1 |
slli a11, a11, 1 |
j .Ldiv_rounded |
|
.Ldiv_roundcarry: |
/* a11 is always zero when the rounding increment overflows, so |
there's no need to round it to an even value. */ |
addi a10, a10, 1 |
/* Overflow to the exponent field is OK. */ |
j .Ldiv_rounded |
|
.Ldiv_flush_to_zero: |
/* Return zero with the appropriate sign bit. */ |
srli xh, a7, 31 |
slli xh, xh, 31 |
movi xl, 0 |
leaf_return |
|
#endif /* L_divdf3 */ |
|
#ifdef L_cmpdf2 |
|
/* Equal and Not Equal */ |
|
.align 4 |
.global __eqdf2 |
.global __nedf2 |
.set __nedf2, __eqdf2 |
.type __eqdf2, @function |
__eqdf2: |
leaf_entry sp, 16 |
bne xl, yl, 2f |
bne xh, yh, 4f |
|
/* The values are equal but NaN != NaN. Check the exponent. */ |
movi a6, 0x7ff00000 |
ball xh, a6, 3f |
|
/* Equal. */ |
movi a2, 0 |
leaf_return |
|
/* Not equal. */ |
2: movi a2, 1 |
leaf_return |
|
/* Check if the mantissas are nonzero. */ |
3: slli a7, xh, 12 |
or a7, a7, xl |
j 5f |
|
/* Check if x and y are zero with different signs. */ |
4: or a7, xh, yh |
slli a7, a7, 1 |
or a7, a7, xl /* xl == yl here */ |
|
/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa |
or x when exponent(x) = 0x7ff and x == y. */ |
5: movi a2, 0 |
movi a3, 1 |
movnez a2, a3, a7 |
leaf_return |
|
|
/* Greater Than */ |
|
.align 4 |
.global __gtdf2 |
.type __gtdf2, @function |
__gtdf2: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
ball xh, a6, 2f |
1: bnall yh, a6, .Lle_cmp |
|
/* Check if y is a NaN. */ |
slli a7, yh, 12 |
or a7, a7, yl |
beqz a7, .Lle_cmp |
movi a2, 0 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, 1b |
movi a2, 0 |
leaf_return |
|
|
/* Less Than or Equal */ |
|
.align 4 |
.global __ledf2 |
.type __ledf2, @function |
__ledf2: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
ball xh, a6, 2f |
1: bnall yh, a6, .Lle_cmp |
|
/* Check if y is a NaN. */ |
slli a7, yh, 12 |
or a7, a7, yl |
beqz a7, .Lle_cmp |
movi a2, 1 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, 1b |
movi a2, 1 |
leaf_return |
|
.Lle_cmp: |
/* Check if x and y have different signs. */ |
xor a7, xh, yh |
bltz a7, .Lle_diff_signs |
|
/* Check if x is negative. */ |
bltz xh, .Lle_xneg |
|
/* Check if x <= y. */ |
bltu xh, yh, 4f |
bne xh, yh, 5f |
bltu yl, xl, 5f |
4: movi a2, 0 |
leaf_return |
|
.Lle_xneg: |
/* Check if y <= x. */ |
bltu yh, xh, 4b |
bne yh, xh, 5f |
bgeu xl, yl, 4b |
5: movi a2, 1 |
leaf_return |
|
.Lle_diff_signs: |
bltz xh, 4b |
|
/* Check if both x and y are zero. */ |
or a7, xh, yh |
slli a7, a7, 1 |
or a7, a7, xl |
or a7, a7, yl |
movi a2, 1 |
movi a3, 0 |
moveqz a2, a3, a7 |
leaf_return |
|
|
/* Greater Than or Equal */ |
|
.align 4 |
.global __gedf2 |
.type __gedf2, @function |
__gedf2: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
ball xh, a6, 2f |
1: bnall yh, a6, .Llt_cmp |
|
/* Check if y is a NaN. */ |
slli a7, yh, 12 |
or a7, a7, yl |
beqz a7, .Llt_cmp |
movi a2, -1 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, 1b |
movi a2, -1 |
leaf_return |
|
|
/* Less Than */ |
|
.align 4 |
.global __ltdf2 |
.type __ltdf2, @function |
__ltdf2: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
ball xh, a6, 2f |
1: bnall yh, a6, .Llt_cmp |
|
/* Check if y is a NaN. */ |
slli a7, yh, 12 |
or a7, a7, yl |
beqz a7, .Llt_cmp |
movi a2, 0 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, 1b |
movi a2, 0 |
leaf_return |
|
.Llt_cmp: |
/* Check if x and y have different signs. */ |
xor a7, xh, yh |
bltz a7, .Llt_diff_signs |
|
/* Check if x is negative. */ |
bltz xh, .Llt_xneg |
|
/* Check if x < y. */ |
bltu xh, yh, 4f |
bne xh, yh, 5f |
bgeu xl, yl, 5f |
4: movi a2, -1 |
leaf_return |
|
.Llt_xneg: |
/* Check if y < x. */ |
bltu yh, xh, 4b |
bne yh, xh, 5f |
bltu yl, xl, 4b |
5: movi a2, 0 |
leaf_return |
|
.Llt_diff_signs: |
bgez xh, 5b |
|
/* Check if both x and y are nonzero. */ |
or a7, xh, yh |
slli a7, a7, 1 |
or a7, a7, xl |
or a7, a7, yl |
movi a2, 0 |
movi a3, -1 |
movnez a2, a3, a7 |
leaf_return |
|
|
/* Unordered */ |
|
.align 4 |
.global __unorddf2 |
.type __unorddf2, @function |
__unorddf2: |
leaf_entry sp, 16 |
movi a6, 0x7ff00000 |
ball xh, a6, 3f |
1: ball yh, a6, 4f |
2: movi a2, 0 |
leaf_return |
|
3: slli a7, xh, 12 |
or a7, a7, xl |
beqz a7, 1b |
movi a2, 1 |
leaf_return |
|
4: slli a7, yh, 12 |
or a7, a7, yl |
beqz a7, 2b |
movi a2, 1 |
leaf_return |
|
#endif /* L_cmpdf2 */ |
|
#ifdef L_fixdfsi |
|
.align 4 |
.global __fixdfsi |
.type __fixdfsi, @function |
__fixdfsi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7ff00000 |
ball xh, a6, .Lfixdfsi_nan_or_inf |
|
/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ |
extui a4, xh, 20, 11 |
extui a5, a6, 19, 10 /* 0x3fe */ |
sub a4, a4, a5 |
bgei a4, 32, .Lfixdfsi_maxint |
blti a4, 1, .Lfixdfsi_zero |
|
/* Add explicit "1.0" and shift << 11. */ |
or a7, xh, a6 |
ssai (32 - 11) |
src a5, a7, xl |
|
/* Shift back to the right, based on the exponent. */ |
ssl a4 /* shift by 32 - a4 */ |
srl a5, a5 |
|
/* Negate the result if sign != 0. */ |
neg a2, a5 |
movgez a2, a5, a7 |
leaf_return |
|
.Lfixdfsi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, xh, 12 |
or a4, a4, xl |
beqz a4, .Lfixdfsi_maxint |
|
/* Translate NaN to +maxint. */ |
movi xh, 0 |
|
.Lfixdfsi_maxint: |
slli a4, a6, 11 /* 0x80000000 */ |
addi a5, a4, -1 /* 0x7fffffff */ |
movgez a4, a5, xh |
mov a2, a4 |
leaf_return |
|
.Lfixdfsi_zero: |
movi a2, 0 |
leaf_return |
|
#endif /* L_fixdfsi */ |
|
#ifdef L_fixdfdi |
|
.align 4 |
.global __fixdfdi |
.type __fixdfdi, @function |
__fixdfdi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7ff00000 |
ball xh, a6, .Lfixdfdi_nan_or_inf |
|
/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ |
extui a4, xh, 20, 11 |
extui a5, a6, 19, 10 /* 0x3fe */ |
sub a4, a4, a5 |
bgei a4, 64, .Lfixdfdi_maxint |
blti a4, 1, .Lfixdfdi_zero |
|
/* Add explicit "1.0" and shift << 11. */ |
or a7, xh, a6 |
ssai (32 - 11) |
src xh, a7, xl |
sll xl, xl |
|
/* Shift back to the right, based on the exponent. */ |
ssl a4 /* shift by 64 - a4 */ |
bgei a4, 32, .Lfixdfdi_smallshift |
srl xl, xh |
movi xh, 0 |
|
.Lfixdfdi_shifted: |
/* Negate the result if sign != 0. */ |
bgez a7, 1f |
neg xl, xl |
neg xh, xh |
beqz xl, 1f |
addi xh, xh, -1 |
1: leaf_return |
|
.Lfixdfdi_smallshift: |
src xl, xh, xl |
srl xh, xh |
j .Lfixdfdi_shifted |
|
.Lfixdfdi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, xh, 12 |
or a4, a4, xl |
beqz a4, .Lfixdfdi_maxint |
|
/* Translate NaN to +maxint. */ |
movi xh, 0 |
|
.Lfixdfdi_maxint: |
slli a7, a6, 11 /* 0x80000000 */ |
bgez xh, 1f |
mov xh, a7 |
movi xl, 0 |
leaf_return |
|
1: addi xh, a7, -1 /* 0x7fffffff */ |
movi xl, -1 |
leaf_return |
|
.Lfixdfdi_zero: |
movi xh, 0 |
movi xl, 0 |
leaf_return |
|
#endif /* L_fixdfdi */ |
|
#ifdef L_fixunsdfsi |
|
.align 4 |
.global __fixunsdfsi |
.type __fixunsdfsi, @function |
__fixunsdfsi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7ff00000 |
ball xh, a6, .Lfixunsdfsi_nan_or_inf |
|
/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ |
extui a4, xh, 20, 11 |
extui a5, a6, 20, 10 /* 0x3ff */ |
sub a4, a4, a5 |
bgei a4, 32, .Lfixunsdfsi_maxint |
bltz a4, .Lfixunsdfsi_zero |
|
/* Add explicit "1.0" and shift << 11. */ |
or a7, xh, a6 |
ssai (32 - 11) |
src a5, a7, xl |
|
/* Shift back to the right, based on the exponent. */ |
addi a4, a4, 1 |
beqi a4, 32, .Lfixunsdfsi_bigexp |
ssl a4 /* shift by 32 - a4 */ |
srl a5, a5 |
|
/* Negate the result if sign != 0. */ |
neg a2, a5 |
movgez a2, a5, a7 |
leaf_return |
|
.Lfixunsdfsi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, xh, 12 |
or a4, a4, xl |
beqz a4, .Lfixunsdfsi_maxint |
|
/* Translate NaN to 0xffffffff. */ |
movi a2, -1 |
leaf_return |
|
.Lfixunsdfsi_maxint: |
slli a4, a6, 11 /* 0x80000000 */ |
movi a5, -1 /* 0xffffffff */ |
movgez a4, a5, xh |
mov a2, a4 |
leaf_return |
|
.Lfixunsdfsi_zero: |
movi a2, 0 |
leaf_return |
|
.Lfixunsdfsi_bigexp: |
/* Handle unsigned maximum exponent case. */ |
bltz xh, 1f |
mov a2, a5 /* no shift needed */ |
leaf_return |
|
/* Return 0x80000000 if negative. */ |
1: slli a2, a6, 11 |
leaf_return |
|
#endif /* L_fixunsdfsi */ |
|
#ifdef L_fixunsdfdi |
|
.align 4 |
.global __fixunsdfdi |
.type __fixunsdfdi, @function |
__fixunsdfdi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7ff00000 |
ball xh, a6, .Lfixunsdfdi_nan_or_inf |
|
/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ |
extui a4, xh, 20, 11 |
extui a5, a6, 20, 10 /* 0x3ff */ |
sub a4, a4, a5 |
bgei a4, 64, .Lfixunsdfdi_maxint |
bltz a4, .Lfixunsdfdi_zero |
|
/* Add explicit "1.0" and shift << 11. */ |
or a7, xh, a6 |
ssai (32 - 11) |
src xh, a7, xl |
sll xl, xl |
|
/* Shift back to the right, based on the exponent. */ |
addi a4, a4, 1 |
beqi a4, 64, .Lfixunsdfdi_bigexp |
ssl a4 /* shift by 64 - a4 */ |
bgei a4, 32, .Lfixunsdfdi_smallshift |
srl xl, xh |
movi xh, 0 |
|
.Lfixunsdfdi_shifted: |
/* Negate the result if sign != 0. */ |
bgez a7, 1f |
neg xl, xl |
neg xh, xh |
beqz xl, 1f |
addi xh, xh, -1 |
1: leaf_return |
|
.Lfixunsdfdi_smallshift: |
src xl, xh, xl |
srl xh, xh |
j .Lfixunsdfdi_shifted |
|
.Lfixunsdfdi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, xh, 12 |
or a4, a4, xl |
beqz a4, .Lfixunsdfdi_maxint |
|
/* Translate NaN to 0xffffffff.... */ |
1: movi xh, -1 |
movi xl, -1 |
leaf_return |
|
.Lfixunsdfdi_maxint: |
bgez xh, 1b |
2: slli xh, a6, 11 /* 0x80000000 */ |
movi xl, 0 |
leaf_return |
|
.Lfixunsdfdi_zero: |
movi xh, 0 |
movi xl, 0 |
leaf_return |
|
.Lfixunsdfdi_bigexp: |
/* Handle unsigned maximum exponent case. */ |
bltz a7, 2b |
leaf_return /* no shift needed */ |
|
#endif /* L_fixunsdfdi */ |
|
#ifdef L_floatsidf |
|
.align 4 |
.global __floatunsidf |
.type __floatunsidf, @function |
__floatunsidf: |
leaf_entry sp, 16 |
beqz a2, .Lfloatsidf_return_zero |
|
/* Set the sign to zero and jump to the floatsidf code. */ |
movi a7, 0 |
j .Lfloatsidf_normalize |
|
.align 4 |
.global __floatsidf |
.type __floatsidf, @function |
__floatsidf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
beqz a2, .Lfloatsidf_return_zero |
|
/* Save the sign. */ |
extui a7, a2, 31, 1 |
|
/* Get the absolute value. */ |
#if XCHAL_HAVE_ABS |
abs a2, a2 |
#else |
neg a4, a2 |
movltz a2, a4, a2 |
#endif |
|
.Lfloatsidf_normalize: |
/* Normalize with the first 1 bit in the msb. */ |
do_nsau a4, a2, a5, a6 |
ssl a4 |
sll a5, a2 |
|
/* Shift the mantissa into position. */ |
srli xh, a5, 11 |
slli xl, a5, (32 - 11) |
|
/* Set the exponent. */ |
movi a5, 0x41d /* 0x3fe + 31 */ |
sub a5, a5, a4 |
slli a5, a5, 20 |
add xh, xh, a5 |
|
/* Add the sign and return. */ |
slli a7, a7, 31 |
or xh, xh, a7 |
leaf_return |
|
.Lfloatsidf_return_zero: |
movi a3, 0 |
leaf_return |
|
#endif /* L_floatsidf */ |
|
#ifdef L_floatdidf |
|
.align 4 |
.global __floatundidf |
.type __floatundidf, @function |
__floatundidf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
or a4, xh, xl |
beqz a4, 2f |
|
/* Set the sign to zero and jump to the floatdidf code. */ |
movi a7, 0 |
j .Lfloatdidf_normalize |
|
.align 4 |
.global __floatdidf |
.type __floatdidf, @function |
__floatdidf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
or a4, xh, xl |
beqz a4, 2f |
|
/* Save the sign. */ |
extui a7, xh, 31, 1 |
|
/* Get the absolute value. */ |
bgez xh, .Lfloatdidf_normalize |
neg xl, xl |
neg xh, xh |
beqz xl, .Lfloatdidf_normalize |
addi xh, xh, -1 |
|
.Lfloatdidf_normalize: |
/* Normalize with the first 1 bit in the msb of xh. */ |
beqz xh, .Lfloatdidf_bigshift |
do_nsau a4, xh, a5, a6 |
ssl a4 |
src xh, xh, xl |
sll xl, xl |
|
.Lfloatdidf_shifted: |
/* Shift the mantissa into position, with rounding bits in a6. */ |
ssai 11 |
sll a6, xl |
src xl, xh, xl |
srl xh, xh |
|
/* Set the exponent. */ |
movi a5, 0x43d /* 0x3fe + 63 */ |
sub a5, a5, a4 |
slli a5, a5, 20 |
add xh, xh, a5 |
|
/* Add the sign. */ |
slli a7, a7, 31 |
or xh, xh, a7 |
|
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a6, 2f |
addi xl, xl, 1 |
beqz xl, .Lfloatdidf_roundcarry |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a6, a6, 1 |
beqz a6, .Lfloatdidf_exactlyhalf |
2: leaf_return |
|
.Lfloatdidf_bigshift: |
/* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ |
do_nsau a4, xl, a5, a6 |
ssl a4 |
sll xh, xl |
movi xl, 0 |
addi a4, a4, 32 |
j .Lfloatdidf_shifted |
|
.Lfloatdidf_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli xl, xl, 1 |
slli xl, xl, 1 |
leaf_return |
|
.Lfloatdidf_roundcarry: |
/* xl is always zero when the rounding increment overflows, so |
there's no need to round it to an even value. */ |
addi xh, xh, 1 |
/* Overflow to the exponent is OK. */ |
leaf_return |
|
#endif /* L_floatdidf */ |
|
#ifdef L_truncdfsf2 |
|
.align 4 |
.global __truncdfsf2 |
.type __truncdfsf2, @function |
__truncdfsf2: |
leaf_entry sp, 16 |
|
/* Adjust the exponent bias. */ |
movi a4, (0x3ff - 0x7f) << 20 |
sub a5, xh, a4 |
|
/* Check for underflow. */ |
xor a6, xh, a5 |
bltz a6, .Ltrunc_underflow |
extui a6, a5, 20, 11 |
beqz a6, .Ltrunc_underflow |
|
/* Check for overflow. */ |
movi a4, 255 |
bge a6, a4, .Ltrunc_overflow |
|
/* Shift a5/xl << 3 into a5/a4. */ |
ssai (32 - 3) |
src a5, a5, xl |
sll a4, xl |
|
.Ltrunc_addsign: |
/* Add the sign bit. */ |
extui a6, xh, 31, 1 |
slli a6, a6, 31 |
or a2, a6, a5 |
|
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a4, 1f |
addi a2, a2, 1 |
/* Overflow to the exponent is OK. The answer will be correct. */ |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a4, a4, 1 |
beqz a4, .Ltrunc_exactlyhalf |
1: leaf_return |
|
.Ltrunc_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
leaf_return |
|
.Ltrunc_overflow: |
/* Check if exponent == 0x7ff. */ |
movi a4, 0x7ff00000 |
bnall xh, a4, 1f |
|
/* Check if mantissa is nonzero. */ |
slli a5, xh, 12 |
or a5, a5, xl |
beqz a5, 1f |
|
/* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ |
srli a4, a4, 1 |
|
1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ |
/* Add the sign bit. */ |
extui a6, xh, 31, 1 |
ssai 1 |
src a2, a6, a4 |
leaf_return |
|
.Ltrunc_underflow: |
/* Find shift count for a subnormal. Flush to zero if >= 32. */ |
extui a6, xh, 20, 11 |
movi a5, 0x3ff - 0x7f |
sub a6, a5, a6 |
addi a6, a6, 1 |
bgeui a6, 32, 1f |
|
/* Replace the exponent with an explicit "1.0". */ |
slli a5, a5, 13 /* 0x700000 */ |
or a5, a5, xh |
slli a5, a5, 11 |
srli a5, a5, 11 |
|
/* Shift the mantissa left by 3 bits (into a5/a4). */ |
ssai (32 - 3) |
src a5, a5, xl |
sll a4, xl |
|
/* Shift right by a6. */ |
ssr a6 |
sll a7, a4 |
src a4, a5, a4 |
srl a5, a5 |
beqz a7, .Ltrunc_addsign |
or a4, a4, a6 /* any positive, nonzero value will work */ |
j .Ltrunc_addsign |
|
/* Return +/- zero. */ |
1: extui a2, xh, 31, 1 |
slli a2, a2, 31 |
leaf_return |
|
#endif /* L_truncdfsf2 */ |
|
#ifdef L_extendsfdf2 |
|
.align 4 |
.global __extendsfdf2 |
.type __extendsfdf2, @function |
__extendsfdf2: |
leaf_entry sp, 16 |
|
/* Save the sign bit and then shift it off. */ |
extui a5, a2, 31, 1 |
slli a5, a5, 31 |
slli a4, a2, 1 |
|
/* Extract and check the exponent. */ |
extui a6, a2, 23, 8 |
beqz a6, .Lextend_expzero |
addi a6, a6, 1 |
beqi a6, 256, .Lextend_nan_or_inf |
|
/* Shift >> 3 into a4/xl. */ |
srli a4, a4, 4 |
slli xl, a2, (32 - 3) |
|
/* Adjust the exponent bias. */ |
movi a6, (0x3ff - 0x7f) << 20 |
add a4, a4, a6 |
|
/* Add the sign bit. */ |
or xh, a4, a5 |
leaf_return |
|
.Lextend_nan_or_inf: |
movi a4, 0x7ff00000 |
|
/* Check for NaN. */ |
slli a7, a2, 9 |
beqz a7, 1f |
|
slli a6, a6, 11 /* 0x80000 */ |
or a4, a4, a6 |
|
/* Add the sign and return. */ |
1: or xh, a4, a5 |
movi xl, 0 |
leaf_return |
|
.Lextend_expzero: |
beqz a4, 1b |
|
/* Normalize it to have 8 zero bits before the first 1 bit. */ |
do_nsau a7, a4, a2, a3 |
addi a7, a7, -8 |
ssl a7 |
sll a4, a4 |
|
/* Shift >> 3 into a4/xl. */ |
slli xl, a4, (32 - 3) |
srli a4, a4, 3 |
|
/* Set the exponent. */ |
movi a6, 0x3fe - 0x7f |
sub a6, a6, a7 |
slli a6, a6, 20 |
add a4, a4, a6 |
|
/* Add the sign and return. */ |
or xh, a4, a5 |
leaf_return |
|
#endif /* L_extendsfdf2 */ |
|
|
/t-linux
0,0 → 1,2365
EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o |
/xtensa.md
0,0 → 1,2133
;; GCC machine description for Tensilica's Xtensa architecture. |
;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2007 |
; Free Software Foundation, Inc. |
;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
;; This file is part of GCC. |
|
;; GCC is free software; you can redistribute it and/or modify it |
;; under the terms of the GNU General Public License as published by |
;; the Free Software Foundation; either version 3, or (at your option) |
;; any later version. |
|
;; GCC is distributed in the hope that it will be useful, but WITHOUT |
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
;; License for more details. |
|
;; You should have received a copy of the GNU General Public License |
;; along with GCC; see the file COPYING3. If not see |
;; <http://www.gnu.org/licenses/>. |
|
|
(define_constants [ |
(A0_REG 0) |
(A1_REG 1) |
(A7_REG 7) |
(A8_REG 8) |
|
(UNSPEC_NSAU 1) |
(UNSPEC_NOP 2) |
(UNSPEC_PLT 3) |
(UNSPEC_RET_ADDR 4) |
(UNSPECV_SET_FP 1) |
(UNSPECV_ENTRY 2) |
]) |
|
|
;; Attributes. |
|
(define_attr "type" |
"unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr" |
(const_string "unknown")) |
|
(define_attr "mode" |
"unknown,none,QI,HI,SI,DI,SF,DF,BL" |
(const_string "unknown")) |
|
(define_attr "length" "" (const_int 1)) |
|
;; Describe a user's asm statement. |
(define_asm_attributes |
[(set_attr "type" "multi")]) |
|
|
;; Pipeline model. |
|
;; The Xtensa basically has simple 5-stage RISC pipeline. |
;; Most instructions complete in 1 cycle, and it is OK to assume that |
;; everything is fully pipelined. The exceptions have special insn |
;; reservations in the pipeline description below. The Xtensa can |
;; issue one instruction per cycle, so defining CPU units is unnecessary. |
|
(define_insn_reservation "xtensa_any_insn" 1 |
(eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv") |
"nothing") |
|
(define_insn_reservation "xtensa_memory" 2 |
(eq_attr "type" "load,fload") |
"nothing") |
|
(define_insn_reservation "xtensa_sreg" 2 |
(eq_attr "type" "rsr") |
"nothing") |
|
(define_insn_reservation "xtensa_mul16" 2 |
(eq_attr "type" "mul16") |
"nothing") |
|
(define_insn_reservation "xtensa_mul32" 2 |
(eq_attr "type" "mul32") |
"nothing") |
|
(define_insn_reservation "xtensa_fmadd" 4 |
(eq_attr "type" "fmadd") |
"nothing") |
|
(define_insn_reservation "xtensa_fconv" 2 |
(eq_attr "type" "fconv") |
"nothing") |
|
;; Include predicate definitions |
|
(include "predicates.md") |
|
|
;; Addition. |
|
(define_insn "addsi3" |
[(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a") |
(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r") |
(match_operand:SI 2 "add_operand" "d,O,r,J,N")))] |
"" |
"@ |
add.n\t%0, %1, %2 |
addi.n\t%0, %1, %d2 |
add\t%0, %1, %2 |
addi\t%0, %1, %d2 |
addmi\t%0, %1, %x2" |
[(set_attr "type" "arith,arith,arith,arith,arith") |
(set_attr "mode" "SI") |
(set_attr "length" "2,2,3,3,3")]) |
|
(define_insn "*addx2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 2)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"addx2\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "*addx4" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 4)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"addx4\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "*addx8" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 8)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"addx8\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "addsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(plus:SF (match_operand:SF 1 "register_operand" "%f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"add.s\t%0, %1, %2" |
[(set_attr "type" "fmadd") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Subtraction. |
|
(define_insn "subsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(minus:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "register_operand" "r")))] |
"" |
"sub\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "*subx2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 2)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"subx2\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "*subx4" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 4)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"subx4\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "*subx8" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") |
(const_int 8)) |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_ADDX" |
"subx8\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "subsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(minus:SF (match_operand:SF 1 "register_operand" "f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"sub.s\t%0, %1, %2" |
[(set_attr "type" "fmadd") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Multiplication. |
|
(define_insn "mulsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(mult:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_MUL32" |
"mull\t%0, %1, %2" |
[(set_attr "type" "mul32") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "mulhisi3" |
[(set (match_operand:SI 0 "register_operand" "=C,A") |
(mult:SI (sign_extend:SI |
(match_operand:HI 1 "register_operand" "%r,r")) |
(sign_extend:SI |
(match_operand:HI 2 "register_operand" "r,r"))))] |
"TARGET_MUL16 || TARGET_MAC16" |
"@ |
mul16s\t%0, %1, %2 |
mul.aa.ll\t%1, %2" |
[(set_attr "type" "mul16,mac16") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "umulhisi3" |
[(set (match_operand:SI 0 "register_operand" "=C,A") |
(mult:SI (zero_extend:SI |
(match_operand:HI 1 "register_operand" "%r,r")) |
(zero_extend:SI |
(match_operand:HI 2 "register_operand" "r,r"))))] |
"TARGET_MUL16 || TARGET_MAC16" |
"@ |
mul16u\t%0, %1, %2 |
umul.aa.ll\t%1, %2" |
[(set_attr "type" "mul16,mac16") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "muladdhisi" |
[(set (match_operand:SI 0 "register_operand" "=A") |
(plus:SI (mult:SI (sign_extend:SI |
(match_operand:HI 1 "register_operand" "%r")) |
(sign_extend:SI |
(match_operand:HI 2 "register_operand" "r"))) |
(match_operand:SI 3 "register_operand" "0")))] |
"TARGET_MAC16" |
"mula.aa.ll\t%1, %2" |
[(set_attr "type" "mac16") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "mulsubhisi" |
[(set (match_operand:SI 0 "register_operand" "=A") |
(minus:SI (match_operand:SI 1 "register_operand" "0") |
(mult:SI (sign_extend:SI |
(match_operand:HI 2 "register_operand" "%r")) |
(sign_extend:SI |
(match_operand:HI 3 "register_operand" "r")))))] |
"TARGET_MAC16" |
"muls.aa.ll\t%2, %3" |
[(set_attr "type" "mac16") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "mulsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(mult:SF (match_operand:SF 1 "register_operand" "%f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"mul.s\t%0, %1, %2" |
[(set_attr "type" "fmadd") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "muladdsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(plus:SF (mult:SF (match_operand:SF 1 "register_operand" "%f") |
(match_operand:SF 2 "register_operand" "f")) |
(match_operand:SF 3 "register_operand" "0")))] |
"TARGET_HARD_FLOAT && TARGET_FUSED_MADD" |
"madd.s\t%0, %1, %2" |
[(set_attr "type" "fmadd") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "mulsubsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(minus:SF (match_operand:SF 1 "register_operand" "0") |
(mult:SF (match_operand:SF 2 "register_operand" "%f") |
(match_operand:SF 3 "register_operand" "f"))))] |
"TARGET_HARD_FLOAT && TARGET_FUSED_MADD" |
"msub.s\t%0, %2, %3" |
[(set_attr "type" "fmadd") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Division. |
|
(define_insn "divsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(div:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_DIV32" |
"quos\t%0, %1, %2" |
[(set_attr "type" "div32") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "udivsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(udiv:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_DIV32" |
"quou\t%0, %1, %2" |
[(set_attr "type" "div32") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "divsf3" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(div:SF (match_operand:SF 1 "register_operand" "f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT_DIV" |
"div.s\t%0, %1, %2" |
[(set_attr "type" "fdiv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "*recipsf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(div:SF (match_operand:SF 1 "const_float_1_operand" "") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations" |
"recip.s\t%0, %2" |
[(set_attr "type" "fdiv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Remainders. |
|
(define_insn "modsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(mod:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_DIV32" |
"rems\t%0, %1, %2" |
[(set_attr "type" "div32") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "umodsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(umod:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_DIV32" |
"remu\t%0, %1, %2" |
[(set_attr "type" "div32") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Square roots. |
|
(define_insn "sqrtsf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(sqrt:SF (match_operand:SF 1 "register_operand" "f")))] |
"TARGET_HARD_FLOAT_SQRT" |
"sqrt.s\t%0, %1" |
[(set_attr "type" "fsqrt") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "*rsqrtsf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(div:SF (match_operand:SF 1 "const_float_1_operand" "") |
(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))] |
"TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations" |
"rsqrt.s\t%0, %2" |
[(set_attr "type" "fsqrt") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Absolute value. |
|
(define_insn "abssi2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(abs:SI (match_operand:SI 1 "register_operand" "r")))] |
"TARGET_ABS" |
"abs\t%0, %1" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "abssf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(abs:SF (match_operand:SF 1 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"abs.s\t%0, %1" |
[(set_attr "type" "farith") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Min and max. |
|
(define_insn "sminsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(smin:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_MINMAX" |
"min\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "uminsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(umin:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_MINMAX" |
"minu\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "smaxsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(smax:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_MINMAX" |
"max\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "umaxsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(umax:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"TARGET_MINMAX" |
"maxu\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Find first bit. |
|
(define_expand "ffssi2" |
[(set (match_operand:SI 0 "register_operand" "") |
(ffs:SI (match_operand:SI 1 "register_operand" "")))] |
"TARGET_NSA" |
{ |
rtx temp = gen_reg_rtx (SImode); |
emit_insn (gen_negsi2 (temp, operands[1])); |
emit_insn (gen_andsi3 (temp, temp, operands[1])); |
emit_insn (gen_nsau (temp, temp)); |
emit_insn (gen_negsi2 (temp, temp)); |
emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); |
DONE; |
}) |
|
;; There is no RTL operator corresponding to NSAU. |
(define_insn "nsau" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_NSAU))] |
"TARGET_NSA" |
"nsau\t%0, %1" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Negation and one's complement. |
|
(define_insn "negsi2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(neg:SI (match_operand:SI 1 "register_operand" "r")))] |
"" |
"neg\t%0, %1" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_expand "one_cmplsi2" |
[(set (match_operand:SI 0 "register_operand" "") |
(not:SI (match_operand:SI 1 "register_operand" "")))] |
"" |
{ |
rtx temp = gen_reg_rtx (SImode); |
emit_insn (gen_movsi (temp, constm1_rtx)); |
emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); |
DONE; |
}) |
|
(define_insn "negsf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(neg:SF (match_operand:SF 1 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"neg.s\t%0, %1" |
[(set_attr "type" "farith") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Logical instructions. |
|
(define_insn "andsi3" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(and:SI (match_operand:SI 1 "register_operand" "%r,r") |
(match_operand:SI 2 "mask_operand" "P,r")))] |
"" |
"@ |
extui\t%0, %1, 0, %K2 |
and\t%0, %1, %2" |
[(set_attr "type" "arith,arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "iorsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(ior:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"" |
"or\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "xorsi3" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(xor:SI (match_operand:SI 1 "register_operand" "%r") |
(match_operand:SI 2 "register_operand" "r")))] |
"" |
"xor\t%0, %1, %2" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Zero-extend instructions. |
|
(define_insn "zero_extendhisi2" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))] |
"" |
"@ |
extui\t%0, %1, 0, 16 |
l16ui\t%0, %1" |
[(set_attr "type" "arith,load") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "zero_extendqisi2" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))] |
"" |
"@ |
extui\t%0, %1, 0, 8 |
l8ui\t%0, %1" |
[(set_attr "type" "arith,load") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
|
;; Sign-extend instructions. |
|
(define_expand "extendhisi2" |
[(set (match_operand:SI 0 "register_operand" "") |
(sign_extend:SI (match_operand:HI 1 "register_operand" "")))] |
"" |
{ |
if (sext_operand (operands[1], HImode)) |
emit_insn (gen_extendhisi2_internal (operands[0], operands[1])); |
else |
xtensa_extend_reg (operands[0], operands[1]); |
DONE; |
}) |
|
(define_insn "extendhisi2_internal" |
[(set (match_operand:SI 0 "register_operand" "=B,a") |
(sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))] |
"" |
"@ |
sext\t%0, %1, 15 |
l16si\t%0, %1" |
[(set_attr "type" "arith,load") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_expand "extendqisi2" |
[(set (match_operand:SI 0 "register_operand" "") |
(sign_extend:SI (match_operand:QI 1 "register_operand" "")))] |
"" |
{ |
if (TARGET_SEXT) |
emit_insn (gen_extendqisi2_internal (operands[0], operands[1])); |
else |
xtensa_extend_reg (operands[0], operands[1]); |
DONE; |
}) |
|
(define_insn "extendqisi2_internal" |
[(set (match_operand:SI 0 "register_operand" "=B") |
(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))] |
"TARGET_SEXT" |
"sext\t%0, %1, 7" |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Field extract instructions. |
|
(define_expand "extv" |
[(set (match_operand:SI 0 "register_operand" "") |
(sign_extract:SI (match_operand:SI 1 "register_operand" "") |
(match_operand:SI 2 "const_int_operand" "") |
(match_operand:SI 3 "const_int_operand" "")))] |
"TARGET_SEXT" |
{ |
if (!sext_fldsz_operand (operands[2], SImode)) |
FAIL; |
|
/* We could expand to a right shift followed by SEXT but that's |
no better than the standard left and right shift sequence. */ |
if (!lsbitnum_operand (operands[3], SImode)) |
FAIL; |
|
emit_insn (gen_extv_internal (operands[0], operands[1], |
operands[2], operands[3])); |
DONE; |
}) |
|
(define_insn "extv_internal" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(sign_extract:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "sext_fldsz_operand" "i") |
(match_operand:SI 3 "lsbitnum_operand" "i")))] |
"TARGET_SEXT" |
{ |
int fldsz = INTVAL (operands[2]); |
operands[2] = GEN_INT (fldsz - 1); |
return "sext\t%0, %1, %2"; |
} |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_expand "extzv" |
[(set (match_operand:SI 0 "register_operand" "") |
(zero_extract:SI (match_operand:SI 1 "register_operand" "") |
(match_operand:SI 2 "const_int_operand" "") |
(match_operand:SI 3 "const_int_operand" "")))] |
"" |
{ |
if (!extui_fldsz_operand (operands[2], SImode)) |
FAIL; |
emit_insn (gen_extzv_internal (operands[0], operands[1], |
operands[2], operands[3])); |
DONE; |
}) |
|
(define_insn "extzv_internal" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(zero_extract:SI (match_operand:SI 1 "register_operand" "r") |
(match_operand:SI 2 "extui_fldsz_operand" "i") |
(match_operand:SI 3 "const_int_operand" "i")))] |
"" |
{ |
int shift; |
if (BITS_BIG_ENDIAN) |
shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f; |
else |
shift = INTVAL (operands[3]) & 0x1f; |
operands[3] = GEN_INT (shift); |
return "extui\t%0, %1, %3, %2"; |
} |
[(set_attr "type" "arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
|
;; Conversions. |
|
(define_insn "fix_truncsfsi2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(fix:SI (match_operand:SF 1 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"trunc.s\t%0, %1, 0" |
[(set_attr "type" "fconv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "fixuns_truncsfsi2" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"utrunc.s\t%0, %1, 0" |
[(set_attr "type" "fconv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "floatsisf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(float:SF (match_operand:SI 1 "register_operand" "a")))] |
"TARGET_HARD_FLOAT" |
"float.s\t%0, %1, 0" |
[(set_attr "type" "fconv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "floatunssisf2" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))] |
"TARGET_HARD_FLOAT" |
"ufloat.s\t%0, %1, 0" |
[(set_attr "type" "fconv") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
|
;; Data movement instructions. |
|
;; 64-bit Integer moves |
|
(define_expand "movdi" |
[(set (match_operand:DI 0 "nonimmed_operand" "") |
(match_operand:DI 1 "general_operand" ""))] |
"" |
{ |
if (CONSTANT_P (operands[1]) && !TARGET_CONST16) |
operands[1] = force_const_mem (DImode, operands[1]); |
|
if (!register_operand (operands[0], DImode) |
&& !register_operand (operands[1], DImode)) |
operands[1] = force_reg (DImode, operands[1]); |
|
operands[1] = xtensa_copy_incoming_a7 (operands[1]); |
}) |
|
(define_insn_and_split "movdi_internal" |
[(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U") |
(match_operand:DI 1 "move_operand" "r,i,T,U,r"))] |
"register_operand (operands[0], DImode) |
|| register_operand (operands[1], DImode)" |
"#" |
"reload_completed" |
[(set (match_dup 0) (match_dup 2)) |
(set (match_dup 1) (match_dup 3))] |
{ |
xtensa_split_operand_pair (operands, SImode); |
if (reg_overlap_mentioned_p (operands[0], operands[3])) |
{ |
rtx tmp; |
tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; |
tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; |
} |
}) |
|
;; 32-bit Integer moves |
|
(define_expand "movsi" |
[(set (match_operand:SI 0 "nonimmed_operand" "") |
(match_operand:SI 1 "general_operand" ""))] |
"" |
{ |
if (xtensa_emit_move_sequence (operands, SImode)) |
DONE; |
}) |
|
(define_insn "movsi_internal" |
[(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A") |
(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))] |
"xtensa_valid_move (SImode, operands)" |
"@ |
movi.n\t%0, %x1 |
mov.n\t%0, %1 |
mov.n\t%0, %1 |
%v1l32i.n\t%0, %1 |
%v0s32i.n\t%1, %0 |
%v0s32i.n\t%1, %0 |
mov\t%0, %1 |
movsp\t%0, %1 |
movi\t%0, %x1 |
const16\t%0, %t1\;const16\t%0, %b1 |
%v1l32r\t%0, %1 |
%v1l32i\t%0, %1 |
%v0s32i\t%1, %0 |
rsr\t%0, ACCLO |
wsr\t%1, ACCLO" |
[(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr") |
(set_attr "mode" "SI") |
(set_attr "length" "2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")]) |
|
;; 16-bit Integer moves |
|
(define_expand "movhi" |
[(set (match_operand:HI 0 "nonimmed_operand" "") |
(match_operand:HI 1 "general_operand" ""))] |
"" |
{ |
if (xtensa_emit_move_sequence (operands, HImode)) |
DONE; |
}) |
|
(define_insn "movhi_internal" |
[(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A") |
(match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))] |
"xtensa_valid_move (HImode, operands)" |
"@ |
movi.n\t%0, %x1 |
mov.n\t%0, %1 |
mov\t%0, %1 |
movi\t%0, %x1 |
%v1l16ui\t%0, %1 |
%v0s16i\t%1, %0 |
rsr\t%0, ACCLO |
wsr\t%1, ACCLO" |
[(set_attr "type" "move,move,move,move,load,store,rsr,wsr") |
(set_attr "mode" "HI") |
(set_attr "length" "2,2,3,3,3,3,3,3")]) |
|
;; 8-bit Integer moves |
|
(define_expand "movqi" |
[(set (match_operand:QI 0 "nonimmed_operand" "") |
(match_operand:QI 1 "general_operand" ""))] |
"" |
{ |
if (xtensa_emit_move_sequence (operands, QImode)) |
DONE; |
}) |
|
(define_insn "movqi_internal" |
[(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A") |
(match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))] |
"xtensa_valid_move (QImode, operands)" |
"@ |
movi.n\t%0, %x1 |
mov.n\t%0, %1 |
mov\t%0, %1 |
movi\t%0, %x1 |
%v1l8ui\t%0, %1 |
%v0s8i\t%1, %0 |
rsr\t%0, ACCLO |
wsr\t%1, ACCLO" |
[(set_attr "type" "move,move,move,move,load,store,rsr,wsr") |
(set_attr "mode" "QI") |
(set_attr "length" "2,2,3,3,3,3,3,3")]) |
|
;; 32-bit floating point moves |
|
(define_expand "movsf" |
[(set (match_operand:SF 0 "nonimmed_operand" "") |
(match_operand:SF 1 "general_operand" ""))] |
"" |
{ |
if (!TARGET_CONST16 && CONSTANT_P (operands[1])) |
operands[1] = force_const_mem (SFmode, operands[1]); |
|
if ((!register_operand (operands[0], SFmode) |
&& !register_operand (operands[1], SFmode)) |
|| (FP_REG_P (xt_true_regnum (operands[0])) |
&& !(reload_in_progress | reload_completed) |
&& (constantpool_mem_p (operands[1]) |
|| CONSTANT_P (operands[1])))) |
operands[1] = force_reg (SFmode, operands[1]); |
|
operands[1] = xtensa_copy_incoming_a7 (operands[1]); |
}) |
|
(define_insn "movsf_internal" |
[(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U") |
(match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))] |
"((register_operand (operands[0], SFmode) |
|| register_operand (operands[1], SFmode)) |
&& !(FP_REG_P (xt_true_regnum (operands[0])) |
&& (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" |
"@ |
mov.s\t%0, %1 |
%v1lsi\t%0, %1 |
%v0ssi\t%1, %0 |
mov.n\t%0, %1 |
%v1l32i.n\t%0, %1 |
%v0s32i.n\t%1, %0 |
mov\t%0, %1 |
wfr\t%0, %1 |
rfr\t%0, %1 |
const16\t%0, %t1\;const16\t%0, %b1 |
%v1l32r\t%0, %1 |
%v1l32i\t%0, %1 |
%v0s32i\t%1, %0" |
[(set_attr "type" "farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store") |
(set_attr "mode" "SF") |
(set_attr "length" "3,3,3,2,2,2,3,3,3,6,3,3,3")]) |
|
(define_insn "*lsiu" |
[(set (match_operand:SF 0 "register_operand" "=f") |
(mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a") |
(match_operand:SI 2 "fpmem_offset_operand" "i")))) |
(set (match_dup 1) |
(plus:SI (match_dup 1) (match_dup 2)))] |
"TARGET_HARD_FLOAT" |
{ |
if (volatile_refs_p (PATTERN (insn))) |
output_asm_insn ("memw", operands); |
return "lsiu\t%0, %1, %2"; |
} |
[(set_attr "type" "fload") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
(define_insn "*ssiu" |
[(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a") |
(match_operand:SI 1 "fpmem_offset_operand" "i"))) |
(match_operand:SF 2 "register_operand" "f")) |
(set (match_dup 0) |
(plus:SI (match_dup 0) (match_dup 1)))] |
"TARGET_HARD_FLOAT" |
{ |
if (volatile_refs_p (PATTERN (insn))) |
output_asm_insn ("memw", operands); |
return "ssiu\t%2, %0, %1"; |
} |
[(set_attr "type" "fstore") |
(set_attr "mode" "SF") |
(set_attr "length" "3")]) |
|
;; 64-bit floating point moves |
|
(define_expand "movdf" |
[(set (match_operand:DF 0 "nonimmed_operand" "") |
(match_operand:DF 1 "general_operand" ""))] |
"" |
{ |
if (CONSTANT_P (operands[1]) && !TARGET_CONST16) |
operands[1] = force_const_mem (DFmode, operands[1]); |
|
if (!register_operand (operands[0], DFmode) |
&& !register_operand (operands[1], DFmode)) |
operands[1] = force_reg (DFmode, operands[1]); |
|
operands[1] = xtensa_copy_incoming_a7 (operands[1]); |
}) |
|
(define_insn_and_split "movdf_internal" |
[(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U") |
(match_operand:DF 1 "move_operand" "r,iF,T,U,r"))] |
"register_operand (operands[0], DFmode) |
|| register_operand (operands[1], DFmode)" |
"#" |
"reload_completed" |
[(set (match_dup 0) (match_dup 2)) |
(set (match_dup 1) (match_dup 3))] |
{ |
xtensa_split_operand_pair (operands, SFmode); |
if (reg_overlap_mentioned_p (operands[0], operands[3])) |
{ |
rtx tmp; |
tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; |
tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; |
} |
}) |
|
;; Block moves |
|
(define_expand "movmemsi" |
[(parallel [(set (match_operand:BLK 0 "" "") |
(match_operand:BLK 1 "" "")) |
(use (match_operand:SI 2 "arith_operand" "")) |
(use (match_operand:SI 3 "const_int_operand" ""))])] |
"" |
{ |
if (!xtensa_expand_block_move (operands)) |
FAIL; |
DONE; |
}) |
|
|
;; Shift instructions. |
|
(define_expand "ashlsi3" |
[(set (match_operand:SI 0 "register_operand" "") |
(ashift:SI (match_operand:SI 1 "register_operand" "") |
(match_operand:SI 2 "arith_operand" "")))] |
"" |
{ |
operands[1] = xtensa_copy_incoming_a7 (operands[1]); |
}) |
|
(define_insn "ashlsi3_internal" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(ashift:SI (match_operand:SI 1 "register_operand" "r,r") |
(match_operand:SI 2 "arith_operand" "J,r")))] |
"" |
"@ |
slli\t%0, %1, %R2 |
ssl\t%2\;sll\t%0, %1" |
[(set_attr "type" "arith,arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3,6")]) |
|
(define_insn "ashrsi3" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r") |
(match_operand:SI 2 "arith_operand" "J,r")))] |
"" |
"@ |
srai\t%0, %1, %R2 |
ssr\t%2\;sra\t%0, %1" |
[(set_attr "type" "arith,arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3,6")]) |
|
(define_insn "lshrsi3" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") |
(match_operand:SI 2 "arith_operand" "J,r")))] |
"" |
{ |
if (which_alternative == 0) |
{ |
if ((INTVAL (operands[2]) & 0x1f) < 16) |
return "srli\t%0, %1, %R2"; |
else |
return "extui\t%0, %1, %R2, %L2"; |
} |
return "ssr\t%2\;srl\t%0, %1"; |
} |
[(set_attr "type" "arith,arith") |
(set_attr "mode" "SI") |
(set_attr "length" "3,6")]) |
|
(define_insn "rotlsi3" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(rotate:SI (match_operand:SI 1 "register_operand" "r,r") |
(match_operand:SI 2 "arith_operand" "J,r")))] |
"" |
"@ |
ssai\t%L2\;src\t%0, %1, %1 |
ssl\t%2\;src\t%0, %1, %1" |
[(set_attr "type" "multi,multi") |
(set_attr "mode" "SI") |
(set_attr "length" "6,6")]) |
|
(define_insn "rotrsi3" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(rotatert:SI (match_operand:SI 1 "register_operand" "r,r") |
(match_operand:SI 2 "arith_operand" "J,r")))] |
"" |
"@ |
ssai\t%R2\;src\t%0, %1, %1 |
ssr\t%2\;src\t%0, %1, %1" |
[(set_attr "type" "multi,multi") |
(set_attr "mode" "SI") |
(set_attr "length" "6,6")]) |
|
|
;; Comparisons. |
|
;; Handle comparisons by stashing away the operands and then using that |
;; information in the subsequent conditional branch. |
|
(define_expand "cmpsi" |
[(set (cc0) |
(compare:CC (match_operand:SI 0 "register_operand" "") |
(match_operand:SI 1 "nonmemory_operand" "")))] |
"" |
{ |
branch_cmp[0] = operands[0]; |
branch_cmp[1] = operands[1]; |
branch_type = CMP_SI; |
DONE; |
}) |
|
(define_expand "tstsi" |
[(set (cc0) |
(match_operand:SI 0 "register_operand" ""))] |
"" |
{ |
branch_cmp[0] = operands[0]; |
branch_cmp[1] = const0_rtx; |
branch_type = CMP_SI; |
DONE; |
}) |
|
(define_expand "cmpsf" |
[(set (cc0) |
(compare:CC (match_operand:SF 0 "register_operand" "") |
(match_operand:SF 1 "register_operand" "")))] |
"TARGET_HARD_FLOAT" |
{ |
branch_cmp[0] = operands[0]; |
branch_cmp[1] = operands[1]; |
branch_type = CMP_SF; |
DONE; |
}) |
|
|
;; Conditional branches. |
|
(define_expand "beq" |
[(set (pc) |
(if_then_else (eq (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, EQ); |
DONE; |
}) |
|
(define_expand "bne" |
[(set (pc) |
(if_then_else (ne (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, NE); |
DONE; |
}) |
|
(define_expand "bgt" |
[(set (pc) |
(if_then_else (gt (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, GT); |
DONE; |
}) |
|
(define_expand "bge" |
[(set (pc) |
(if_then_else (ge (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, GE); |
DONE; |
}) |
|
(define_expand "blt" |
[(set (pc) |
(if_then_else (lt (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, LT); |
DONE; |
}) |
|
(define_expand "ble" |
[(set (pc) |
(if_then_else (le (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, LE); |
DONE; |
}) |
|
(define_expand "bgtu" |
[(set (pc) |
(if_then_else (gtu (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, GTU); |
DONE; |
}) |
|
(define_expand "bgeu" |
[(set (pc) |
(if_then_else (geu (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, GEU); |
DONE; |
}) |
|
(define_expand "bltu" |
[(set (pc) |
(if_then_else (ltu (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, LTU); |
DONE; |
}) |
|
(define_expand "bleu" |
[(set (pc) |
(if_then_else (leu (cc0) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc)))] |
"" |
{ |
xtensa_expand_conditional_branch (operands, LEU); |
DONE; |
}) |
|
;; Branch patterns for standard integer comparisons |
|
(define_insn "*btrue" |
[(set (pc) |
(if_then_else (match_operator 3 "branch_operator" |
[(match_operand:SI 0 "register_operand" "r,r") |
(match_operand:SI 1 "branch_operand" "K,r")]) |
(label_ref (match_operand 2 "" "")) |
(pc)))] |
"" |
{ |
if (which_alternative == 1) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "beq\t%0, %1, %2"; |
case NE: return "bne\t%0, %1, %2"; |
case LT: return "blt\t%0, %1, %2"; |
case GE: return "bge\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
else if (INTVAL (operands[1]) == 0) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return (TARGET_DENSITY |
? "beqz.n\t%0, %2" |
: "beqz\t%0, %2"); |
case NE: return (TARGET_DENSITY |
? "bnez.n\t%0, %2" |
: "bnez\t%0, %2"); |
case LT: return "bltz\t%0, %2"; |
case GE: return "bgez\t%0, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "beqi\t%0, %d1, %2"; |
case NE: return "bnei\t%0, %d1, %2"; |
case LT: return "blti\t%0, %d1, %2"; |
case GE: return "bgei\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump,jump") |
(set_attr "mode" "none") |
(set_attr "length" "3,3")]) |
|
(define_insn "*bfalse" |
[(set (pc) |
(if_then_else (match_operator 3 "branch_operator" |
[(match_operand:SI 0 "register_operand" "r,r") |
(match_operand:SI 1 "branch_operand" "K,r")]) |
(pc) |
(label_ref (match_operand 2 "" ""))))] |
"" |
{ |
if (which_alternative == 1) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bne\t%0, %1, %2"; |
case NE: return "beq\t%0, %1, %2"; |
case LT: return "bge\t%0, %1, %2"; |
case GE: return "blt\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
else if (INTVAL (operands[1]) == 0) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return (TARGET_DENSITY |
? "bnez.n\t%0, %2" |
: "bnez\t%0, %2"); |
case NE: return (TARGET_DENSITY |
? "beqz.n\t%0, %2" |
: "beqz\t%0, %2"); |
case LT: return "bgez\t%0, %2"; |
case GE: return "bltz\t%0, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bnei\t%0, %d1, %2"; |
case NE: return "beqi\t%0, %d1, %2"; |
case LT: return "bgei\t%0, %d1, %2"; |
case GE: return "blti\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump,jump") |
(set_attr "mode" "none") |
(set_attr "length" "3,3")]) |
|
(define_insn "*ubtrue" |
[(set (pc) |
(if_then_else (match_operator 3 "ubranch_operator" |
[(match_operand:SI 0 "register_operand" "r,r") |
(match_operand:SI 1 "ubranch_operand" "L,r")]) |
(label_ref (match_operand 2 "" "")) |
(pc)))] |
"" |
{ |
if (which_alternative == 1) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case LTU: return "bltu\t%0, %1, %2"; |
case GEU: return "bgeu\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case LTU: return "bltui\t%0, %d1, %2"; |
case GEU: return "bgeui\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump,jump") |
(set_attr "mode" "none") |
(set_attr "length" "3,3")]) |
|
(define_insn "*ubfalse" |
[(set (pc) |
(if_then_else (match_operator 3 "ubranch_operator" |
[(match_operand:SI 0 "register_operand" "r,r") |
(match_operand:SI 1 "ubranch_operand" "L,r")]) |
(pc) |
(label_ref (match_operand 2 "" ""))))] |
"" |
{ |
if (which_alternative == 1) |
{ |
switch (GET_CODE (operands[3])) |
{ |
case LTU: return "bgeu\t%0, %1, %2"; |
case GEU: return "bltu\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case LTU: return "bgeui\t%0, %d1, %2"; |
case GEU: return "bltui\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump,jump") |
(set_attr "mode" "none") |
(set_attr "length" "3,3")]) |
|
;; Branch patterns for bit testing |
|
(define_insn "*bittrue" |
[(set (pc) |
(if_then_else (match_operator 3 "boolean_operator" |
[(zero_extract:SI |
(match_operand:SI 0 "register_operand" "r,r") |
(const_int 1) |
(match_operand:SI 1 "arith_operand" "J,r")) |
(const_int 0)]) |
(label_ref (match_operand 2 "" "")) |
(pc)))] |
"" |
{ |
if (which_alternative == 0) |
{ |
unsigned bitnum = INTVAL(operands[1]) & 0x1f; |
operands[1] = GEN_INT(bitnum); |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bbci\t%0, %d1, %2"; |
case NE: return "bbsi\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bbc\t%0, %1, %2"; |
case NE: return "bbs\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "*bitfalse" |
[(set (pc) |
(if_then_else (match_operator 3 "boolean_operator" |
[(zero_extract:SI |
(match_operand:SI 0 "register_operand" "r,r") |
(const_int 1) |
(match_operand:SI 1 "arith_operand" "J,r")) |
(const_int 0)]) |
(pc) |
(label_ref (match_operand 2 "" ""))))] |
"" |
{ |
if (which_alternative == 0) |
{ |
unsigned bitnum = INTVAL (operands[1]) & 0x1f; |
operands[1] = GEN_INT (bitnum); |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bbsi\t%0, %d1, %2"; |
case NE: return "bbci\t%0, %d1, %2"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bbs\t%0, %1, %2"; |
case NE: return "bbc\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "*masktrue" |
[(set (pc) |
(if_then_else (match_operator 3 "boolean_operator" |
[(and:SI (match_operand:SI 0 "register_operand" "r") |
(match_operand:SI 1 "register_operand" "r")) |
(const_int 0)]) |
(label_ref (match_operand 2 "" "")) |
(pc)))] |
"" |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bnone\t%0, %1, %2"; |
case NE: return "bany\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "*maskfalse" |
[(set (pc) |
(if_then_else (match_operator 3 "boolean_operator" |
[(and:SI (match_operand:SI 0 "register_operand" "r") |
(match_operand:SI 1 "register_operand" "r")) |
(const_int 0)]) |
(pc) |
(label_ref (match_operand 2 "" ""))))] |
"" |
{ |
switch (GET_CODE (operands[3])) |
{ |
case EQ: return "bany\t%0, %1, %2"; |
case NE: return "bnone\t%0, %1, %2"; |
default: gcc_unreachable (); |
} |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
|
;; Define the loop insns used by bct optimization to represent the |
;; start and end of a zero-overhead loop (in loop.c). This start |
;; template generates the loop insn; the end template doesn't generate |
;; any instructions since loop end is handled in hardware. |
|
(define_insn "zero_cost_loop_start" |
[(set (pc) |
(if_then_else (eq (match_operand:SI 0 "register_operand" "a") |
(const_int 0)) |
(label_ref (match_operand 1 "" "")) |
(pc))) |
(set (reg:SI 19) |
(plus:SI (match_dup 0) (const_int -1)))] |
"" |
"loopnez\t%0, %l1" |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "zero_cost_loop_end" |
[(set (pc) |
(if_then_else (ne (reg:SI 19) (const_int 0)) |
(label_ref (match_operand 0 "" "")) |
(pc))) |
(set (reg:SI 19) |
(plus:SI (reg:SI 19) (const_int -1)))] |
"" |
{ |
xtensa_emit_loop_end (insn, operands); |
return ""; |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "0")]) |
|
|
;; Setting a register from a comparison. |
|
(define_expand "seq" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_EQ (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
(define_expand "sne" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_NE (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
(define_expand "sgt" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_GT (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
(define_expand "sge" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_GE (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
(define_expand "slt" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_LT (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
(define_expand "sle" |
[(set (match_operand:SI 0 "register_operand" "") |
(match_dup 1))] |
"" |
{ |
operands[1] = gen_rtx_LE (SImode, branch_cmp[0], branch_cmp[1]); |
if (!xtensa_expand_scc (operands)) |
FAIL; |
DONE; |
}) |
|
|
;; Conditional moves. |
|
(define_expand "movsicc" |
[(set (match_operand:SI 0 "register_operand" "") |
(if_then_else:SI (match_operand 1 "comparison_operator" "") |
(match_operand:SI 2 "register_operand" "") |
(match_operand:SI 3 "register_operand" "")))] |
"" |
{ |
if (!xtensa_expand_conditional_move (operands, 0)) |
FAIL; |
DONE; |
}) |
|
(define_expand "movsfcc" |
[(set (match_operand:SF 0 "register_operand" "") |
(if_then_else:SF (match_operand 1 "comparison_operator" "") |
(match_operand:SF 2 "register_operand" "") |
(match_operand:SF 3 "register_operand" "")))] |
"" |
{ |
if (!xtensa_expand_conditional_move (operands, 1)) |
FAIL; |
DONE; |
}) |
|
(define_insn "movsicc_internal0" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(if_then_else:SI (match_operator 4 "branch_operator" |
[(match_operand:SI 1 "register_operand" "r,r") |
(const_int 0)]) |
(match_operand:SI 2 "register_operand" "r,0") |
(match_operand:SI 3 "register_operand" "0,r")))] |
"" |
{ |
if (which_alternative == 0) |
{ |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "moveqz\t%0, %2, %1"; |
case NE: return "movnez\t%0, %2, %1"; |
case LT: return "movltz\t%0, %2, %1"; |
case GE: return "movgez\t%0, %2, %1"; |
default: gcc_unreachable (); |
} |
} |
else |
{ |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "movnez\t%0, %3, %1"; |
case NE: return "moveqz\t%0, %3, %1"; |
case LT: return "movgez\t%0, %3, %1"; |
case GE: return "movltz\t%0, %3, %1"; |
default: gcc_unreachable (); |
} |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "move,move") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "movsicc_internal1" |
[(set (match_operand:SI 0 "register_operand" "=a,a") |
(if_then_else:SI (match_operator 4 "boolean_operator" |
[(match_operand:CC 1 "register_operand" "b,b") |
(const_int 0)]) |
(match_operand:SI 2 "register_operand" "r,0") |
(match_operand:SI 3 "register_operand" "0,r")))] |
"TARGET_BOOLEANS" |
{ |
int isEq = (GET_CODE (operands[4]) == EQ); |
switch (which_alternative) |
{ |
case 0: |
if (isEq) return "movf\t%0, %2, %1"; |
return "movt\t%0, %2, %1"; |
case 1: |
if (isEq) return "movt\t%0, %3, %1"; |
return "movf\t%0, %3, %1"; |
default: |
gcc_unreachable (); |
} |
} |
[(set_attr "type" "move,move") |
(set_attr "mode" "SI") |
(set_attr "length" "3,3")]) |
|
(define_insn "movsfcc_internal0" |
[(set (match_operand:SF 0 "register_operand" "=a,a,f,f") |
(if_then_else:SF (match_operator 4 "branch_operator" |
[(match_operand:SI 1 "register_operand" "r,r,r,r") |
(const_int 0)]) |
(match_operand:SF 2 "register_operand" "r,0,f,0") |
(match_operand:SF 3 "register_operand" "0,r,0,f")))] |
"" |
{ |
switch (which_alternative) |
{ |
case 0: |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "moveqz\t%0, %2, %1"; |
case NE: return "movnez\t%0, %2, %1"; |
case LT: return "movltz\t%0, %2, %1"; |
case GE: return "movgez\t%0, %2, %1"; |
default: gcc_unreachable (); |
} |
break; |
case 1: |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "movnez\t%0, %3, %1"; |
case NE: return "moveqz\t%0, %3, %1"; |
case LT: return "movgez\t%0, %3, %1"; |
case GE: return "movltz\t%0, %3, %1"; |
default: gcc_unreachable (); |
} |
break; |
case 2: |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "moveqz.s %0, %2, %1"; |
case NE: return "movnez.s %0, %2, %1"; |
case LT: return "movltz.s %0, %2, %1"; |
case GE: return "movgez.s %0, %2, %1"; |
default: gcc_unreachable (); |
} |
break; |
case 3: |
switch (GET_CODE (operands[4])) |
{ |
case EQ: return "movnez.s %0, %3, %1"; |
case NE: return "moveqz.s %0, %3, %1"; |
case LT: return "movgez.s %0, %3, %1"; |
case GE: return "movltz.s %0, %3, %1"; |
default: gcc_unreachable (); |
} |
break; |
default: |
gcc_unreachable (); |
} |
gcc_unreachable (); |
} |
[(set_attr "type" "move,move,move,move") |
(set_attr "mode" "SF") |
(set_attr "length" "3,3,3,3")]) |
|
(define_insn "movsfcc_internal1" |
[(set (match_operand:SF 0 "register_operand" "=a,a,f,f") |
(if_then_else:SF (match_operator 4 "boolean_operator" |
[(match_operand:CC 1 "register_operand" "b,b,b,b") |
(const_int 0)]) |
(match_operand:SF 2 "register_operand" "r,0,f,0") |
(match_operand:SF 3 "register_operand" "0,r,0,f")))] |
"TARGET_BOOLEANS" |
{ |
int isEq = (GET_CODE (operands[4]) == EQ); |
switch (which_alternative) |
{ |
case 0: |
if (isEq) return "movf\t%0, %2, %1"; |
return "movt\t%0, %2, %1"; |
case 1: |
if (isEq) return "movt\t%0, %3, %1"; |
return "movf\t%0, %3, %1"; |
case 2: |
if (isEq) return "movf.s\t%0, %2, %1"; |
return "movt.s\t%0, %2, %1"; |
case 3: |
if (isEq) return "movt.s\t%0, %3, %1"; |
return "movf.s\t%0, %3, %1"; |
default: |
gcc_unreachable (); |
} |
} |
[(set_attr "type" "move,move,move,move") |
(set_attr "mode" "SF") |
(set_attr "length" "3,3,3,3")]) |
|
|
;; Floating-point comparisons. |
|
(define_insn "seq_sf" |
[(set (match_operand:CC 0 "register_operand" "=b") |
(eq:CC (match_operand:SF 1 "register_operand" "f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"oeq.s\t%0, %1, %2" |
[(set_attr "type" "farith") |
(set_attr "mode" "BL") |
(set_attr "length" "3")]) |
|
(define_insn "slt_sf" |
[(set (match_operand:CC 0 "register_operand" "=b") |
(lt:CC (match_operand:SF 1 "register_operand" "f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"olt.s\t%0, %1, %2" |
[(set_attr "type" "farith") |
(set_attr "mode" "BL") |
(set_attr "length" "3")]) |
|
(define_insn "sle_sf" |
[(set (match_operand:CC 0 "register_operand" "=b") |
(le:CC (match_operand:SF 1 "register_operand" "f") |
(match_operand:SF 2 "register_operand" "f")))] |
"TARGET_HARD_FLOAT" |
"ole.s\t%0, %1, %2" |
[(set_attr "type" "farith") |
(set_attr "mode" "BL") |
(set_attr "length" "3")]) |
|
|
;; Unconditional branches. |
|
(define_insn "jump" |
[(set (pc) |
(label_ref (match_operand 0 "" "")))] |
"" |
"j\t%l0" |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_expand "indirect_jump" |
[(set (pc) |
(match_operand 0 "register_operand" ""))] |
"" |
{ |
rtx dest = operands[0]; |
if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode) |
operands[0] = copy_to_mode_reg (Pmode, dest); |
|
emit_jump_insn (gen_indirect_jump_internal (dest)); |
DONE; |
}) |
|
(define_insn "indirect_jump_internal" |
[(set (pc) (match_operand:SI 0 "register_operand" "r"))] |
"" |
"jx\t%0" |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
|
(define_expand "tablejump" |
[(use (match_operand:SI 0 "register_operand" "")) |
(use (label_ref (match_operand 1 "" "")))] |
"" |
{ |
rtx target = operands[0]; |
if (flag_pic) |
{ |
/* For PIC, the table entry is relative to the start of the table. */ |
rtx label = gen_reg_rtx (SImode); |
target = gen_reg_rtx (SImode); |
emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1])); |
emit_insn (gen_addsi3 (target, operands[0], label)); |
} |
emit_jump_insn (gen_tablejump_internal (target, operands[1])); |
DONE; |
}) |
|
(define_insn "tablejump_internal" |
[(set (pc) |
(match_operand:SI 0 "register_operand" "r")) |
(use (label_ref (match_operand 1 "" "")))] |
"" |
"jx\t%0" |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
|
;; Function calls. |
|
(define_expand "sym_PLT" |
[(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))] |
"" |
"") |
|
(define_expand "call" |
[(call (match_operand 0 "memory_operand" "") |
(match_operand 1 "" ""))] |
"" |
{ |
rtx addr = XEXP (operands[0], 0); |
if (flag_pic && GET_CODE (addr) == SYMBOL_REF |
&& (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) |
addr = gen_sym_PLT (addr); |
if (!call_insn_operand (addr, VOIDmode)) |
XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); |
}) |
|
(define_insn "call_internal" |
[(call (mem (match_operand:SI 0 "call_insn_operand" "n,i,r")) |
(match_operand 1 "" "i,i,i"))] |
"" |
{ |
return xtensa_emit_call (0, operands); |
} |
[(set_attr "type" "call") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_expand "call_value" |
[(set (match_operand 0 "register_operand" "") |
(call (match_operand 1 "memory_operand" "") |
(match_operand 2 "" "")))] |
"" |
{ |
rtx addr = XEXP (operands[1], 0); |
if (flag_pic && GET_CODE (addr) == SYMBOL_REF |
&& (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) |
addr = gen_sym_PLT (addr); |
if (!call_insn_operand (addr, VOIDmode)) |
XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); |
}) |
|
;; Cannot combine constraints for operand 0 into "afvb": |
;; reload.c:find_reloads seems to assume that grouped constraints somehow |
;; specify related register classes, and when they don't the constraints |
;; fail to match. By not grouping the constraints, we get the correct |
;; behavior. |
(define_insn "call_value_internal" |
[(set (match_operand 0 "register_operand" "=af,af,af,v,v,v,b,b,b") |
(call (mem (match_operand:SI 1 "call_insn_operand" |
"n,i,r,n,i,r,n,i,r")) |
(match_operand 2 "" "i,i,i,i,i,i,i,i,i")))] |
"" |
{ |
return xtensa_emit_call (1, operands); |
} |
[(set_attr "type" "call") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "entry" |
[(set (reg:SI A1_REG) |
(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i") |
(match_operand:SI 1 "const_int_operand" "i")] |
UNSPECV_ENTRY))] |
"" |
{ |
if (frame_pointer_needed) |
output_asm_insn (".frame\ta7, %0", operands); |
else |
output_asm_insn (".frame\tsp, %0", operands); |
return "entry\tsp, %1"; |
} |
[(set_attr "type" "move") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
(define_insn "return" |
[(return) |
(use (reg:SI A0_REG))] |
"reload_completed" |
{ |
return (TARGET_DENSITY ? "retw.n" : "retw"); |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "2")]) |
|
|
;; Miscellaneous instructions. |
|
(define_expand "prologue" |
[(const_int 0)] |
"" |
{ |
xtensa_expand_prologue (); |
DONE; |
}) |
|
(define_expand "epilogue" |
[(return)] |
"" |
{ |
emit_jump_insn (gen_return ()); |
DONE; |
}) |
|
(define_insn "nop" |
[(const_int 0)] |
"" |
{ |
return (TARGET_DENSITY ? "nop.n" : "nop"); |
} |
[(set_attr "type" "nop") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_expand "nonlocal_goto" |
[(match_operand:SI 0 "general_operand" "") |
(match_operand:SI 1 "general_operand" "") |
(match_operand:SI 2 "general_operand" "") |
(match_operand:SI 3 "" "")] |
"" |
{ |
xtensa_expand_nonlocal_goto (operands); |
DONE; |
}) |
|
;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't |
;; know if a frame pointer is required until the reload pass, and |
;; because there may be an incoming argument value in the hard frame |
;; pointer register (a7). If there is an incoming argument in that |
;; register, the "set_frame_ptr" insn gets inserted immediately after |
;; the insn that copies the incoming argument to a pseudo or to the |
;; stack. This serves several purposes here: (1) it keeps the |
;; optimizer from copy-propagating or scheduling the use of a7 as an |
;; incoming argument away from the beginning of the function; (2) we |
;; can use a post-reload splitter to expand away the insn if a frame |
;; pointer is not required, so that the post-reload scheduler can do |
;; the right thing; and (3) it makes it easy for the prologue expander |
;; to search for this insn to determine whether it should add a new insn |
;; to set up the frame pointer. |
|
(define_insn "set_frame_ptr" |
[(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))] |
"" |
{ |
if (frame_pointer_needed) |
return "mov\ta7, sp"; |
return ""; |
} |
[(set_attr "type" "move") |
(set_attr "mode" "SI") |
(set_attr "length" "3")]) |
|
;; Post-reload splitter to remove fp assignment when it's not needed. |
(define_split |
[(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))] |
"reload_completed && !frame_pointer_needed" |
[(unspec [(const_int 0)] UNSPEC_NOP)] |
"") |
|
;; The preceding splitter needs something to split the insn into; |
;; things start breaking if the result is just a "use" so instead we |
;; generate the following insn. |
(define_insn "*unspec_nop" |
[(unspec [(const_int 0)] UNSPEC_NOP)] |
"" |
"" |
[(set_attr "type" "nop") |
(set_attr "mode" "none") |
(set_attr "length" "0")]) |
|
;; The fix_return_addr pattern sets the high 2 bits of an address in a |
;; register to match the high bits of the current PC. |
(define_insn "fix_return_addr" |
[(set (match_operand:SI 0 "register_operand" "=a") |
(unspec:SI [(match_operand:SI 1 "register_operand" "r")] |
UNSPEC_RET_ADDR)) |
(clobber (match_scratch:SI 2 "=r")) |
(clobber (match_scratch:SI 3 "=r"))] |
"" |
"mov\t%2, a0\;call0\t0f\;.align\t4\;0:\;mov\t%3, a0\;mov\ta0, %2\;\ |
srli\t%3, %3, 30\;slli\t%0, %1, 2\;ssai\t2\;src\t%0, %3, %0" |
[(set_attr "type" "multi") |
(set_attr "mode" "SI") |
(set_attr "length" "24")]) |
|
|
;; Instructions for the Xtensa "boolean" option. |
|
(define_insn "*booltrue" |
[(set (pc) |
(if_then_else (match_operator 2 "boolean_operator" |
[(match_operand:CC 0 "register_operand" "b") |
(const_int 0)]) |
(label_ref (match_operand 1 "" "")) |
(pc)))] |
"TARGET_BOOLEANS" |
{ |
if (GET_CODE (operands[2]) == EQ) |
return "bf\t%0, %1"; |
else |
return "bt\t%0, %1"; |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
|
(define_insn "*boolfalse" |
[(set (pc) |
(if_then_else (match_operator 2 "boolean_operator" |
[(match_operand:CC 0 "register_operand" "b") |
(const_int 0)]) |
(pc) |
(label_ref (match_operand 1 "" ""))))] |
"TARGET_BOOLEANS" |
{ |
if (GET_CODE (operands[2]) == EQ) |
return "bt\t%0, %1"; |
else |
return "bf\t%0, %1"; |
} |
[(set_attr "type" "jump") |
(set_attr "mode" "none") |
(set_attr "length" "3")]) |
/xtensa.opt
0,0 → 1,39
; Options for the Tensilica Xtensa port of the compiler. |
|
; Copyright (C) 2005, 2007 Free Software Foundation, Inc. |
; |
; This file is part of GCC. |
; |
; GCC is free software; you can redistribute it and/or modify it under |
; the terms of the GNU General Public License as published by the Free |
; Software Foundation; either version 3, or (at your option) any later |
; version. |
; |
; GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
; WARRANTY; without even the implied warranty of MERCHANTABILITY or |
; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
; for more details. |
; |
; You should have received a copy of the GNU General Public License |
; along with GCC; see the file COPYING3. If not see |
; <http://www.gnu.org/licenses/>. |
|
mconst16 |
Target Report Mask(CONST16) |
Use CONST16 instruction to load constants |
|
mfused-madd |
Target Report Mask(FUSED_MADD) |
Enable fused multiply/add and multiply/subtract FP instructions |
|
mlongcalls |
Target |
Use indirect CALLXn instructions for large programs |
|
mtarget-align |
Target |
Automatically align branch targets to reduce branch penalties |
|
mtext-section-literals |
Target |
Intersperse literal pools with code in the text section |
/t-xtensa
0,0 → 1,21
LIB1ASMSRC = xtensa/lib1funcs.asm |
LIB1ASMFUNCS = _mulsi3 _nsau _divsi3 _modsi3 _udivsi3 _umodsi3 \ |
_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ |
_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ |
_floatdisf _floatundisf \ |
_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ |
_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ |
_floatdidf _floatundidf \ |
_truncdfsf2 _extendsfdf2 |
|
LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S |
|
$(T)crti.o: $(srcdir)/config/xtensa/crti.asm $(GCC_PASSES) |
$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ |
-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/xtensa/crti.asm |
$(T)crtn.o: $(srcdir)/config/xtensa/crtn.asm $(GCC_PASSES) |
$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ |
-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/xtensa/crtn.asm |
|
$(out_object_file): gt-xtensa.h |
gt-xtensa.h : s-gtype ; @true |
/lib1funcs.asm
0,0 → 1,484
/* Assembly functions for the Xtensa version of libgcc1. |
Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 2, or (at your option) any later |
version. |
|
In addition to the permissions in the GNU General Public License, the |
Free Software Foundation gives you unlimited permission to link the |
compiled version of this file into combinations with other programs, |
and to distribute those combinations without any restriction coming |
from the use of this file. (The General Public License restrictions |
do apply in other respects; for example, they cover modification of |
the file, and distribution when not linked into a combine |
executable.) |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING. If not, write to the Free |
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
02110-1301, USA. */ |
|
#include "xtensa-config.h" |
|
# Define macros for the ABS and ADDX* instructions to handle cases |
# where they are not included in the Xtensa processor configuration. |
|
.macro do_abs dst, src, tmp |
#if XCHAL_HAVE_ABS |
abs \dst, \src |
#else |
neg \tmp, \src |
movgez \tmp, \src, \src |
mov \dst, \tmp |
#endif |
.endm |
|
.macro do_addx2 dst, as, at, tmp |
#if XCHAL_HAVE_ADDX |
addx2 \dst, \as, \at |
#else |
slli \tmp, \as, 1 |
add \dst, \tmp, \at |
#endif |
.endm |
|
.macro do_addx4 dst, as, at, tmp |
#if XCHAL_HAVE_ADDX |
addx4 \dst, \as, \at |
#else |
slli \tmp, \as, 2 |
add \dst, \tmp, \at |
#endif |
.endm |
|
.macro do_addx8 dst, as, at, tmp |
#if XCHAL_HAVE_ADDX |
addx8 \dst, \as, \at |
#else |
slli \tmp, \as, 3 |
add \dst, \tmp, \at |
#endif |
.endm |
|
# Define macros for leaf function entry and return, supporting either the |
# standard register windowed ABI or the non-windowed call0 ABI. These |
# macros do not allocate any extra stack space, so they only work for |
# leaf functions that do not need to spill anything to the stack. |
|
.macro leaf_entry reg, size |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
entry \reg, \size |
#else |
/* do nothing */ |
#endif |
.endm |
|
.macro leaf_return |
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ |
retw |
#else |
ret |
#endif |
.endm |
|
|
#ifdef L_mulsi3 |
.align 4 |
.global __mulsi3 |
.type __mulsi3,@function |
__mulsi3: |
leaf_entry sp, 16 |
|
#if XCHAL_HAVE_MUL16 |
or a4, a2, a3 |
srai a4, a4, 16 |
bnez a4, .LMUL16 |
mul16u a2, a2, a3 |
leaf_return |
.LMUL16: |
srai a4, a2, 16 |
srai a5, a3, 16 |
mul16u a7, a4, a3 |
mul16u a6, a5, a2 |
mul16u a4, a2, a3 |
add a7, a7, a6 |
slli a7, a7, 16 |
add a2, a7, a4 |
|
#elif XCHAL_HAVE_MAC16 |
mul.aa.hl a2, a3 |
mula.aa.lh a2, a3 |
rsr a5, ACCLO |
umul.aa.ll a2, a3 |
rsr a4, ACCLO |
slli a5, a5, 16 |
add a2, a4, a5 |
|
#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ |
|
# Multiply one bit at a time, but unroll the loop 4x to better |
# exploit the addx instructions and avoid overhead. |
# Peel the first iteration to save a cycle on init. |
|
# Avoid negative numbers. |
xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative |
do_abs a3, a3, a6 |
do_abs a2, a2, a6 |
|
# Swap so the second argument is smaller. |
sub a7, a2, a3 |
mov a4, a3 |
movgez a4, a2, a7 # a4 = max(a2, a3) |
movltz a3, a2, a7 # a3 = min(a2, a3) |
|
movi a2, 0 |
extui a6, a3, 0, 1 |
movnez a2, a4, a6 |
|
do_addx2 a7, a4, a2, a7 |
extui a6, a3, 1, 1 |
movnez a2, a7, a6 |
|
do_addx4 a7, a4, a2, a7 |
extui a6, a3, 2, 1 |
movnez a2, a7, a6 |
|
do_addx8 a7, a4, a2, a7 |
extui a6, a3, 3, 1 |
movnez a2, a7, a6 |
|
bgeui a3, 16, .Lmult_main_loop |
neg a3, a2 |
movltz a2, a3, a5 |
leaf_return |
|
.align 4 |
.Lmult_main_loop: |
srli a3, a3, 4 |
slli a4, a4, 4 |
|
add a7, a4, a2 |
extui a6, a3, 0, 1 |
movnez a2, a7, a6 |
|
do_addx2 a7, a4, a2, a7 |
extui a6, a3, 1, 1 |
movnez a2, a7, a6 |
|
do_addx4 a7, a4, a2, a7 |
extui a6, a3, 2, 1 |
movnez a2, a7, a6 |
|
do_addx8 a7, a4, a2, a7 |
extui a6, a3, 3, 1 |
movnez a2, a7, a6 |
|
bgeui a3, 16, .Lmult_main_loop |
|
neg a3, a2 |
movltz a2, a3, a5 |
|
#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ |
|
leaf_return |
.size __mulsi3,.-__mulsi3 |
|
#endif /* L_mulsi3 */ |
|
|
# Define a macro for the NSAU (unsigned normalize shift amount) |
# instruction, which computes the number of leading zero bits, |
# to handle cases where it is not included in the Xtensa processor |
# configuration. |
|
.macro do_nsau cnt, val, tmp, a |
#if XCHAL_HAVE_NSA |
nsau \cnt, \val |
#else |
mov \a, \val |
movi \cnt, 0 |
extui \tmp, \a, 16, 16 |
bnez \tmp, 0f |
movi \cnt, 16 |
slli \a, \a, 16 |
0: |
extui \tmp, \a, 24, 8 |
bnez \tmp, 1f |
addi \cnt, \cnt, 8 |
slli \a, \a, 8 |
1: |
movi \tmp, __nsau_data |
extui \a, \a, 24, 8 |
add \tmp, \tmp, \a |
l8ui \tmp, \tmp, 0 |
add \cnt, \cnt, \tmp |
#endif /* !XCHAL_HAVE_NSA */ |
.endm |
|
#ifdef L_nsau |
.section .rodata |
.align 4 |
.global __nsau_data |
.type __nsau_data,@object |
__nsau_data: |
#if !XCHAL_HAVE_NSA |
.byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 |
.byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 |
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 |
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 |
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
#endif /* !XCHAL_HAVE_NSA */ |
.size __nsau_data,.-__nsau_data |
.hidden __nsau_data |
#endif /* L_nsau */ |
|
|
#ifdef L_udivsi3 |
.align 4 |
.global __udivsi3 |
.type __udivsi3,@function |
__udivsi3: |
leaf_entry sp, 16 |
bltui a3, 2, .Lle_one # check if the divisor <= 1 |
|
mov a6, a2 # keep dividend in a6 |
do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend) |
do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor) |
bgeu a5, a4, .Lspecial |
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift |
ssl a4 |
sll a3, a3 # divisor <<= count |
movi a2, 0 # quotient = 0 |
|
# test-subtract-and-shift loop; one quotient bit on each iteration |
#if XCHAL_HAVE_LOOPS |
loopnez a4, .Lloopend |
#endif /* XCHAL_HAVE_LOOPS */ |
.Lloop: |
bltu a6, a3, .Lzerobit |
sub a6, a6, a3 |
addi a2, a2, 1 |
.Lzerobit: |
slli a2, a2, 1 |
srli a3, a3, 1 |
#if !XCHAL_HAVE_LOOPS |
addi a4, a4, -1 |
bnez a4, .Lloop |
#endif /* !XCHAL_HAVE_LOOPS */ |
.Lloopend: |
|
bltu a6, a3, .Lreturn |
addi a2, a2, 1 # increment quotient if dividend >= divisor |
.Lreturn: |
leaf_return |
|
.Lle_one: |
beqz a3, .Lerror # if divisor == 1, return the dividend |
leaf_return |
|
.Lspecial: |
# return dividend >= divisor |
bltu a6, a3, .Lreturn0 |
movi a2, 1 |
leaf_return |
|
.Lerror: |
# just return 0; could throw an exception |
|
.Lreturn0: |
movi a2, 0 |
leaf_return |
.size __udivsi3,.-__udivsi3 |
|
#endif /* L_udivsi3 */ |
|
|
#ifdef L_divsi3 |
.align 4 |
.global __divsi3 |
.type __divsi3,@function |
__divsi3: |
leaf_entry sp, 16 |
xor a7, a2, a3 # sign = dividend ^ divisor |
do_abs a6, a2, a4 # udividend = abs(dividend) |
do_abs a3, a3, a4 # udivisor = abs(divisor) |
bltui a3, 2, .Lle_one # check if udivisor <= 1 |
do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend) |
do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor) |
bgeu a5, a4, .Lspecial |
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift |
ssl a4 |
sll a3, a3 # udivisor <<= count |
movi a2, 0 # quotient = 0 |
|
# test-subtract-and-shift loop; one quotient bit on each iteration |
#if XCHAL_HAVE_LOOPS |
loopnez a4, .Lloopend |
#endif /* XCHAL_HAVE_LOOPS */ |
.Lloop: |
bltu a6, a3, .Lzerobit |
sub a6, a6, a3 |
addi a2, a2, 1 |
.Lzerobit: |
slli a2, a2, 1 |
srli a3, a3, 1 |
#if !XCHAL_HAVE_LOOPS |
addi a4, a4, -1 |
bnez a4, .Lloop |
#endif /* !XCHAL_HAVE_LOOPS */ |
.Lloopend: |
|
bltu a6, a3, .Lreturn |
addi a2, a2, 1 # increment quotient if udividend >= udivisor |
.Lreturn: |
neg a5, a2 |
movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient |
leaf_return |
|
.Lle_one: |
beqz a3, .Lerror |
neg a2, a6 # if udivisor == 1, then return... |
movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend |
leaf_return |
|
.Lspecial: |
bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0 |
movi a2, 1 |
movi a4, -1 |
movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1 |
leaf_return |
|
.Lerror: |
# just return 0; could throw an exception |
|
.Lreturn0: |
movi a2, 0 |
leaf_return |
.size __divsi3,.-__divsi3 |
|
#endif /* L_divsi3 */ |
|
|
#ifdef L_umodsi3 |
.align 4 |
.global __umodsi3 |
.type __umodsi3,@function |
__umodsi3: |
leaf_entry sp, 16 |
bltui a3, 2, .Lle_one # check if the divisor is <= 1 |
|
do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend) |
do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor) |
bgeu a5, a4, .Lspecial |
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift |
ssl a4 |
sll a3, a3 # divisor <<= count |
|
# test-subtract-and-shift loop |
#if XCHAL_HAVE_LOOPS |
loopnez a4, .Lloopend |
#endif /* XCHAL_HAVE_LOOPS */ |
.Lloop: |
bltu a2, a3, .Lzerobit |
sub a2, a2, a3 |
.Lzerobit: |
srli a3, a3, 1 |
#if !XCHAL_HAVE_LOOPS |
addi a4, a4, -1 |
bnez a4, .Lloop |
#endif /* !XCHAL_HAVE_LOOPS */ |
.Lloopend: |
|
.Lspecial: |
bltu a2, a3, .Lreturn |
sub a2, a2, a3 # subtract once more if dividend >= divisor |
.Lreturn: |
leaf_return |
|
.Lle_one: |
# the divisor is either 0 or 1, so just return 0. |
# someday we may want to throw an exception if the divisor is 0. |
movi a2, 0 |
leaf_return |
.size __umodsi3,.-__umodsi3 |
|
#endif /* L_umodsi3 */ |
|
|
#ifdef L_modsi3 |
.align 4 |
.global __modsi3 |
.type __modsi3,@function |
__modsi3: |
leaf_entry sp, 16 |
mov a7, a2 # save original (signed) dividend |
do_abs a2, a2, a4 # udividend = abs(dividend) |
do_abs a3, a3, a4 # udivisor = abs(divisor) |
bltui a3, 2, .Lle_one # check if udivisor <= 1 |
do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend) |
do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor) |
bgeu a5, a4, .Lspecial |
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift |
ssl a4 |
sll a3, a3 # udivisor <<= count |
|
# test-subtract-and-shift loop |
#if XCHAL_HAVE_LOOPS |
loopnez a4, .Lloopend |
#endif /* XCHAL_HAVE_LOOPS */ |
.Lloop: |
bltu a2, a3, .Lzerobit |
sub a2, a2, a3 |
.Lzerobit: |
srli a3, a3, 1 |
#if !XCHAL_HAVE_LOOPS |
addi a4, a4, -1 |
bnez a4, .Lloop |
#endif /* !XCHAL_HAVE_LOOPS */ |
.Lloopend: |
|
.Lspecial: |
bltu a2, a3, .Lreturn |
sub a2, a2, a3 # subtract once more if udividend >= udivisor |
.Lreturn: |
bgez a7, .Lpositive |
neg a2, a2 # if (dividend < 0), return -udividend |
.Lpositive: |
leaf_return |
|
.Lle_one: |
# udivisor is either 0 or 1, so just return 0. |
# someday we may want to throw an exception if udivisor is 0. |
movi a2, 0 |
leaf_return |
.size __modsi3,.-__modsi3 |
|
#endif /* L_modsi3 */ |
|
#include "ieee754-df.S" |
#include "ieee754-sf.S" |
/t-elf
0,0 → 1,6
# Build CRT files and libgcc with the "longcalls" option |
CRTSTUFF_T_CFLAGS += -mlongcalls |
CRTSTUFF_T_CFLAGS_S += -mlongcalls |
TARGET_LIBGCC2_CFLAGS += -mlongcalls |
|
EXTRA_MULTILIB_PARTS = crti.o crtn.o crtbegin.o crtend.o |
/xtensa-protos.h
0,0 → 1,84
/* Prototypes of target machine for GNU compiler for Xtensa. |
Copyright 2001, 2002, 2003, 2004, 2005, 2007 |
Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING3. If not see |
<http://www.gnu.org/licenses/>. */ |
|
#ifndef __XTENSA_PROTOS_H__ |
#define __XTENSA_PROTOS_H__ |
|
/* Functions to test whether an immediate fits in a given field. */ |
extern bool xtensa_simm8 (HOST_WIDE_INT); |
extern bool xtensa_simm8x256 (HOST_WIDE_INT); |
extern bool xtensa_simm12b (HOST_WIDE_INT); |
extern bool xtensa_b4const_or_zero (HOST_WIDE_INT); |
extern bool xtensa_b4constu (HOST_WIDE_INT); |
extern bool xtensa_mask_immediate (HOST_WIDE_INT); |
extern bool xtensa_const_ok_for_letter_p (HOST_WIDE_INT, int); |
extern bool xtensa_mem_offset (unsigned, enum machine_mode); |
|
/* Functions within xtensa.c that we reference. */ |
#ifdef RTX_CODE |
extern int xt_true_regnum (rtx); |
extern int xtensa_valid_move (enum machine_mode, rtx *); |
extern int smalloffset_mem_p (rtx); |
extern int constantpool_address_p (rtx); |
extern int constantpool_mem_p (rtx); |
extern void xtensa_extend_reg (rtx, rtx); |
extern bool xtensa_extra_constraint (rtx, int); |
extern void xtensa_expand_conditional_branch (rtx *, enum rtx_code); |
extern int xtensa_expand_conditional_move (rtx *, int); |
extern int xtensa_expand_scc (rtx *); |
extern int xtensa_expand_block_move (rtx *); |
extern void xtensa_split_operand_pair (rtx *, enum machine_mode); |
extern int xtensa_emit_move_sequence (rtx *, enum machine_mode); |
extern rtx xtensa_copy_incoming_a7 (rtx); |
extern void xtensa_expand_nonlocal_goto (rtx *); |
extern void xtensa_emit_loop_end (rtx, rtx *); |
extern char *xtensa_emit_call (int, rtx *); |
|
#ifdef TREE_CODE |
extern void init_cumulative_args (CUMULATIVE_ARGS *, int); |
extern void xtensa_va_start (tree, rtx); |
#endif /* TREE_CODE */ |
|
extern void print_operand (FILE *, rtx, int); |
extern void print_operand_address (FILE *, rtx); |
extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int); |
extern rtx xtensa_return_addr (int, rtx); |
extern enum reg_class xtensa_preferred_reload_class (rtx, enum reg_class, int); |
extern enum reg_class xtensa_secondary_reload_class (enum reg_class, |
enum machine_mode, rtx, |
int); |
#endif /* RTX_CODE */ |
|
#ifdef TREE_CODE |
extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, tree); |
extern struct rtx_def *function_arg (CUMULATIVE_ARGS *, enum machine_mode, |
tree, int); |
#endif /* TREE_CODE */ |
|
extern void xtensa_setup_frame_addresses (void); |
extern int xtensa_dbx_register_number (int); |
extern void override_options (void); |
extern long compute_frame_size (int); |
extern int xtensa_frame_pointer_required (void); |
extern void xtensa_expand_prologue (void); |
extern void order_regs_for_local_alloc (void); |
|
#endif /* !__XTENSA_PROTOS_H__ */ |
/ieee754-sf.S
0,0 → 1,1734
/* IEEE-754 single-precision functions for Xtensa |
Copyright (C) 2006 Free Software Foundation, Inc. |
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it |
under the terms of the GNU General Public License as published by |
the Free Software Foundation; either version 2, or (at your option) |
any later version. |
|
In addition to the permissions in the GNU General Public License, |
the Free Software Foundation gives you unlimited permission to link |
the compiled version of this file into combinations with other |
programs, and to distribute those combinations without any |
restriction coming from the use of this file. (The General Public |
License restrictions do apply in other respects; for example, they |
cover modification of the file, and distribution when not linked |
into a combine executable.) |
|
GCC is distributed in the hope that it will be useful, but WITHOUT |
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
License for more details. |
|
You should have received a copy of the GNU General Public License |
along with GCC; see the file COPYING. If not, write to the Free |
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
02110-1301, USA. */ |
|
#ifdef __XTENSA_EB__ |
#define xh a2 |
#define xl a3 |
#define yh a4 |
#define yl a5 |
#else |
#define xh a3 |
#define xl a2 |
#define yh a5 |
#define yl a4 |
#endif |
|
/* Warning! The branch displacements for some Xtensa branch instructions |
are quite small, and this code has been carefully laid out to keep |
branch targets in range. If you change anything, be sure to check that |
the assembler is not relaxing anything to branch over a jump. */ |
|
#ifdef L_negsf2 |
|
.align 4 |
.global __negsf2 |
.type __negsf2, @function |
__negsf2: |
leaf_entry sp, 16 |
movi a4, 0x80000000 |
xor a2, a2, a4 |
leaf_return |
|
#endif /* L_negsf2 */ |
|
#ifdef L_addsubsf3 |
|
/* Addition */ |
__addsf3_aux: |
|
/* Handle NaNs and Infinities. (This code is placed before the |
start of the function just to keep it in range of the limited |
branch displacements.) */ |
|
.Ladd_xnan_or_inf: |
/* If y is neither Infinity nor NaN, return x. */ |
bnall a3, a6, 1f |
/* If x is a NaN, return it. Otherwise, return y. */ |
slli a7, a2, 9 |
beqz a7, .Ladd_ynan_or_inf |
1: leaf_return |
|
.Ladd_ynan_or_inf: |
/* Return y. */ |
mov a2, a3 |
leaf_return |
|
.Ladd_opposite_signs: |
/* Operand signs differ. Do a subtraction. */ |
slli a7, a6, 8 |
xor a3, a3, a7 |
j .Lsub_same_sign |
|
.align 4 |
.global __addsf3 |
.type __addsf3, @function |
__addsf3: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
|
/* Check if the two operands have the same sign. */ |
xor a7, a2, a3 |
bltz a7, .Ladd_opposite_signs |
|
.Ladd_same_sign: |
/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ |
ball a2, a6, .Ladd_xnan_or_inf |
ball a3, a6, .Ladd_ynan_or_inf |
|
/* Compare the exponents. The smaller operand will be shifted |
right by the exponent difference and added to the larger |
one. */ |
extui a7, a2, 23, 9 |
extui a8, a3, 23, 9 |
bltu a7, a8, .Ladd_shiftx |
|
.Ladd_shifty: |
/* Check if the smaller (or equal) exponent is zero. */ |
bnone a3, a6, .Ladd_yexpzero |
|
/* Replace y sign/exponent with 0x008. */ |
or a3, a3, a6 |
slli a3, a3, 8 |
srli a3, a3, 8 |
|
.Ladd_yexpdiff: |
/* Compute the exponent difference. */ |
sub a10, a7, a8 |
|
/* Exponent difference > 32 -- just return the bigger value. */ |
bgeui a10, 32, 1f |
|
/* Shift y right by the exponent difference. Any bits that are |
shifted out of y are saved in a9 for rounding the result. */ |
ssr a10 |
movi a9, 0 |
src a9, a3, a9 |
srl a3, a3 |
|
/* Do the addition. */ |
add a2, a2, a3 |
|
/* Check if the add overflowed into the exponent. */ |
extui a10, a2, 23, 9 |
beq a10, a7, .Ladd_round |
mov a8, a7 |
j .Ladd_carry |
|
.Ladd_yexpzero: |
/* y is a subnormal value. Replace its sign/exponent with zero, |
i.e., no implicit "1.0", and increment the apparent exponent |
because subnormals behave as if they had the minimum (nonzero) |
exponent. Test for the case when both exponents are zero. */ |
slli a3, a3, 9 |
srli a3, a3, 9 |
bnone a2, a6, .Ladd_bothexpzero |
addi a8, a8, 1 |
j .Ladd_yexpdiff |
|
.Ladd_bothexpzero: |
/* Both exponents are zero. Handle this as a special case. There |
is no need to shift or round, and the normal code for handling |
a carry into the exponent field will not work because it |
assumes there is an implicit "1.0" that needs to be added. */ |
add a2, a2, a3 |
1: leaf_return |
|
.Ladd_xexpzero: |
/* Same as "yexpzero" except skip handling the case when both |
exponents are zero. */ |
slli a2, a2, 9 |
srli a2, a2, 9 |
addi a7, a7, 1 |
j .Ladd_xexpdiff |
|
.Ladd_shiftx: |
/* Same thing as the "shifty" code, but with x and y swapped. Also, |
because the exponent difference is always nonzero in this version, |
the shift sequence can use SLL and skip loading a constant zero. */ |
bnone a2, a6, .Ladd_xexpzero |
|
or a2, a2, a6 |
slli a2, a2, 8 |
srli a2, a2, 8 |
|
.Ladd_xexpdiff: |
sub a10, a8, a7 |
bgeui a10, 32, .Ladd_returny |
|
ssr a10 |
sll a9, a2 |
srl a2, a2 |
|
add a2, a2, a3 |
|
/* Check if the add overflowed into the exponent. */ |
extui a10, a2, 23, 9 |
bne a10, a8, .Ladd_carry |
|
.Ladd_round: |
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a9, 1f |
addi a2, a2, 1 |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a9, a9, 1 |
beqz a9, .Ladd_exactlyhalf |
1: leaf_return |
|
.Ladd_returny: |
mov a2, a3 |
leaf_return |
|
.Ladd_carry: |
/* The addition has overflowed into the exponent field, so the |
value needs to be renormalized. The mantissa of the result |
can be recovered by subtracting the original exponent and |
adding 0x800000 (which is the explicit "1.0" for the |
mantissa of the non-shifted operand -- the "1.0" for the |
shifted operand was already added). The mantissa can then |
be shifted right by one bit. The explicit "1.0" of the |
shifted mantissa then needs to be replaced by the exponent, |
incremented by one to account for the normalizing shift. |
It is faster to combine these operations: do the shift first |
and combine the additions and subtractions. If x is the |
original exponent, the result is: |
shifted mantissa - (x << 22) + (1 << 22) + (x << 23) |
or: |
shifted mantissa + ((x + 1) << 22) |
Note that the exponent is incremented here by leaving the |
explicit "1.0" of the mantissa in the exponent field. */ |
|
/* Shift x right by one bit. Save the lsb. */ |
mov a10, a2 |
srli a2, a2, 1 |
|
/* See explanation above. The original exponent is in a8. */ |
addi a8, a8, 1 |
slli a8, a8, 22 |
add a2, a2, a8 |
|
/* Return an Infinity if the exponent overflowed. */ |
ball a2, a6, .Ladd_infinity |
|
/* Same thing as the "round" code except the msb of the leftover |
fraction is bit 0 of a10, with the rest of the fraction in a9. */ |
bbci.l a10, 0, 1f |
addi a2, a2, 1 |
beqz a9, .Ladd_exactlyhalf |
1: leaf_return |
|
.Ladd_infinity: |
/* Clear the mantissa. */ |
srli a2, a2, 23 |
slli a2, a2, 23 |
|
/* The sign bit may have been lost in a carry-out. Put it back. */ |
slli a8, a8, 1 |
or a2, a2, a8 |
leaf_return |
|
.Ladd_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
leaf_return |
|
|
/* Subtraction */ |
__subsf3_aux: |
|
/* Handle NaNs and Infinities. (This code is placed before the |
start of the function just to keep it in range of the limited |
branch displacements.) */ |
|
.Lsub_xnan_or_inf: |
/* If y is neither Infinity nor NaN, return x. */ |
bnall a3, a6, 1f |
/* Both x and y are either NaN or Inf, so the result is NaN. */ |
movi a4, 0x400000 /* make it a quiet NaN */ |
or a2, a2, a4 |
1: leaf_return |
|
.Lsub_ynan_or_inf: |
/* Negate y and return it. */ |
slli a7, a6, 8 |
xor a2, a3, a7 |
leaf_return |
|
.Lsub_opposite_signs: |
/* Operand signs differ. Do an addition. */ |
slli a7, a6, 8 |
xor a3, a3, a7 |
j .Ladd_same_sign |
|
.align 4 |
.global __subsf3 |
.type __subsf3, @function |
__subsf3: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
|
/* Check if the two operands have the same sign. */ |
xor a7, a2, a3 |
bltz a7, .Lsub_opposite_signs |
|
.Lsub_same_sign: |
/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ |
ball a2, a6, .Lsub_xnan_or_inf |
ball a3, a6, .Lsub_ynan_or_inf |
|
/* Compare the operands. In contrast to addition, the entire |
value matters here. */ |
extui a7, a2, 23, 8 |
extui a8, a3, 23, 8 |
bltu a2, a3, .Lsub_xsmaller |
|
.Lsub_ysmaller: |
/* Check if the smaller (or equal) exponent is zero. */ |
bnone a3, a6, .Lsub_yexpzero |
|
/* Replace y sign/exponent with 0x008. */ |
or a3, a3, a6 |
slli a3, a3, 8 |
srli a3, a3, 8 |
|
.Lsub_yexpdiff: |
/* Compute the exponent difference. */ |
sub a10, a7, a8 |
|
/* Exponent difference > 32 -- just return the bigger value. */ |
bgeui a10, 32, 1f |
|
/* Shift y right by the exponent difference. Any bits that are |
shifted out of y are saved in a9 for rounding the result. */ |
ssr a10 |
movi a9, 0 |
src a9, a3, a9 |
srl a3, a3 |
|
sub a2, a2, a3 |
|
/* Subtract the leftover bits in a9 from zero and propagate any |
borrow from a2. */ |
neg a9, a9 |
addi a10, a2, -1 |
movnez a2, a10, a9 |
|
/* Check if the subtract underflowed into the exponent. */ |
extui a10, a2, 23, 8 |
beq a10, a7, .Lsub_round |
j .Lsub_borrow |
|
.Lsub_yexpzero: |
/* Return zero if the inputs are equal. (For the non-subnormal |
case, subtracting the "1.0" will cause a borrow from the exponent |
and this case can be detected when handling the borrow.) */ |
beq a2, a3, .Lsub_return_zero |
|
/* y is a subnormal value. Replace its sign/exponent with zero, |
i.e., no implicit "1.0". Unless x is also a subnormal, increment |
y's apparent exponent because subnormals behave as if they had |
the minimum (nonzero) exponent. */ |
slli a3, a3, 9 |
srli a3, a3, 9 |
bnone a2, a6, .Lsub_yexpdiff |
addi a8, a8, 1 |
j .Lsub_yexpdiff |
|
.Lsub_returny: |
/* Negate and return y. */ |
slli a7, a6, 8 |
xor a2, a3, a7 |
1: leaf_return |
|
.Lsub_xsmaller: |
/* Same thing as the "ysmaller" code, but with x and y swapped and |
with y negated. */ |
bnone a2, a6, .Lsub_xexpzero |
|
or a2, a2, a6 |
slli a2, a2, 8 |
srli a2, a2, 8 |
|
.Lsub_xexpdiff: |
sub a10, a8, a7 |
bgeui a10, 32, .Lsub_returny |
|
ssr a10 |
movi a9, 0 |
src a9, a2, a9 |
srl a2, a2 |
|
/* Negate y. */ |
slli a11, a6, 8 |
xor a3, a3, a11 |
|
sub a2, a3, a2 |
|
neg a9, a9 |
addi a10, a2, -1 |
movnez a2, a10, a9 |
|
/* Check if the subtract underflowed into the exponent. */ |
extui a10, a2, 23, 8 |
bne a10, a8, .Lsub_borrow |
|
.Lsub_round: |
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a9, 1f |
addi a2, a2, 1 |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a9, a9, 1 |
beqz a9, .Lsub_exactlyhalf |
1: leaf_return |
|
.Lsub_xexpzero: |
/* Same as "yexpzero". */ |
beq a2, a3, .Lsub_return_zero |
slli a2, a2, 9 |
srli a2, a2, 9 |
bnone a3, a6, .Lsub_xexpdiff |
addi a7, a7, 1 |
j .Lsub_xexpdiff |
|
.Lsub_return_zero: |
movi a2, 0 |
leaf_return |
|
.Lsub_borrow: |
/* The subtraction has underflowed into the exponent field, so the |
value needs to be renormalized. Shift the mantissa left as |
needed to remove any leading zeros and adjust the exponent |
accordingly. If the exponent is not large enough to remove |
all the leading zeros, the result will be a subnormal value. */ |
|
slli a8, a2, 9 |
beqz a8, .Lsub_xzero |
do_nsau a6, a8, a7, a11 |
srli a8, a8, 9 |
bge a6, a10, .Lsub_subnormal |
addi a6, a6, 1 |
|
.Lsub_normalize_shift: |
/* Shift the mantissa (a8/a9) left by a6. */ |
ssl a6 |
src a8, a8, a9 |
sll a9, a9 |
|
/* Combine the shifted mantissa with the sign and exponent, |
decrementing the exponent by a6. (The exponent has already |
been decremented by one due to the borrow from the subtraction, |
but adding the mantissa will increment the exponent by one.) */ |
srli a2, a2, 23 |
sub a2, a2, a6 |
slli a2, a2, 23 |
add a2, a2, a8 |
j .Lsub_round |
|
.Lsub_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
leaf_return |
|
.Lsub_xzero: |
/* If there was a borrow from the exponent, and the mantissa and |
guard digits are all zero, then the inputs were equal and the |
result should be zero. */ |
beqz a9, .Lsub_return_zero |
|
/* Only the guard digit is nonzero. Shift by min(24, a10). */ |
addi a11, a10, -24 |
movi a6, 24 |
movltz a6, a10, a11 |
j .Lsub_normalize_shift |
|
.Lsub_subnormal: |
/* The exponent is too small to shift away all the leading zeros. |
Set a6 to the current exponent (which has already been |
decremented by the borrow) so that the exponent of the result |
will be zero. Do not add 1 to a6 in this case, because: (1) |
adding the mantissa will not increment the exponent, so there is |
no need to subtract anything extra from the exponent to |
compensate, and (2) the effective exponent of a subnormal is 1 |
not 0 so the shift amount must be 1 smaller than normal. */ |
mov a6, a10 |
j .Lsub_normalize_shift |
|
#endif /* L_addsubsf3 */ |
|
#ifdef L_mulsf3 |
|
/* Multiplication */ |
__mulsf3_aux: |
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities). |
(This code is placed before the start of the function just to |
keep it in range of the limited branch displacements.) */ |
|
.Lmul_xexpzero: |
/* Clear the sign bit of x. */ |
slli a2, a2, 1 |
srli a2, a2, 1 |
|
/* If x is zero, return zero. */ |
beqz a2, .Lmul_return_zero |
|
/* Normalize x. Adjust the exponent in a8. */ |
do_nsau a10, a2, a11, a12 |
addi a10, a10, -8 |
ssl a10 |
sll a2, a2 |
movi a8, 1 |
sub a8, a8, a10 |
j .Lmul_xnormalized |
|
.Lmul_yexpzero: |
/* Clear the sign bit of y. */ |
slli a3, a3, 1 |
srli a3, a3, 1 |
|
/* If y is zero, return zero. */ |
beqz a3, .Lmul_return_zero |
|
/* Normalize y. Adjust the exponent in a9. */ |
do_nsau a10, a3, a11, a12 |
addi a10, a10, -8 |
ssl a10 |
sll a3, a3 |
movi a9, 1 |
sub a9, a9, a10 |
j .Lmul_ynormalized |
|
.Lmul_return_zero: |
/* Return zero with the appropriate sign bit. */ |
srli a2, a7, 31 |
slli a2, a2, 31 |
j .Lmul_done |
|
.Lmul_xnan_or_inf: |
/* If y is zero, return NaN. */ |
slli a8, a3, 1 |
bnez a8, 1f |
movi a4, 0x400000 /* make it a quiet NaN */ |
or a2, a2, a4 |
j .Lmul_done |
1: |
/* If y is NaN, return y. */ |
bnall a3, a6, .Lmul_returnx |
slli a8, a3, 9 |
beqz a8, .Lmul_returnx |
|
.Lmul_returny: |
mov a2, a3 |
|
.Lmul_returnx: |
/* Set the sign bit and return. */ |
extui a7, a7, 31, 1 |
slli a2, a2, 1 |
ssai 1 |
src a2, a7, a2 |
j .Lmul_done |
|
.Lmul_ynan_or_inf: |
/* If x is zero, return NaN. */ |
slli a8, a2, 1 |
bnez a8, .Lmul_returny |
movi a7, 0x400000 /* make it a quiet NaN */ |
or a2, a3, a7 |
j .Lmul_done |
|
.align 4 |
.global __mulsf3 |
.type __mulsf3, @function |
__mulsf3: |
leaf_entry sp, 32 |
#if __XTENSA_CALL0_ABI__ |
addi sp, sp, -32 |
s32i a12, sp, 16 |
s32i a13, sp, 20 |
s32i a14, sp, 24 |
s32i a15, sp, 28 |
#endif |
movi a6, 0x7f800000 |
|
/* Get the sign of the result. */ |
xor a7, a2, a3 |
|
/* Check for NaN and infinity. */ |
ball a2, a6, .Lmul_xnan_or_inf |
ball a3, a6, .Lmul_ynan_or_inf |
|
/* Extract the exponents. */ |
extui a8, a2, 23, 8 |
extui a9, a3, 23, 8 |
|
beqz a8, .Lmul_xexpzero |
.Lmul_xnormalized: |
beqz a9, .Lmul_yexpzero |
.Lmul_ynormalized: |
|
/* Add the exponents. */ |
add a8, a8, a9 |
|
/* Replace sign/exponent fields with explicit "1.0". */ |
movi a10, 0xffffff |
or a2, a2, a6 |
and a2, a2, a10 |
or a3, a3, a6 |
and a3, a3, a10 |
|
/* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ |
|
#if XCHAL_HAVE_MUL32_HIGH |
|
mull a6, a2, a3 |
muluh a2, a2, a3 |
|
#else |
|
/* Break the inputs into 16-bit chunks and compute 4 32-bit partial |
products. These partial products are: |
|
0 xl * yl |
|
1 xl * yh |
2 xh * yl |
|
3 xh * yh |
|
If using the Mul16 or Mul32 multiplier options, these input |
chunks must be stored in separate registers. For Mac16, the |
UMUL.AA.* opcodes can specify that the inputs come from either |
half of the registers, so there is no need to shift them out |
ahead of time. If there is no multiply hardware, the 16-bit |
chunks can be extracted when setting up the arguments to the |
separate multiply function. */ |
|
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
/* Calling a separate multiply function will clobber a0 and requires |
use of a8 as a temporary, so save those values now. (The function |
uses a custom ABI so nothing else needs to be saved.) */ |
s32i a0, sp, 0 |
s32i a8, sp, 4 |
#endif |
|
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 |
|
#define a2h a4 |
#define a3h a5 |
|
/* Get the high halves of the inputs into registers. */ |
srli a2h, a2, 16 |
srli a3h, a3, 16 |
|
#define a2l a2 |
#define a3l a3 |
|
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 |
/* Clear the high halves of the inputs. This does not matter |
for MUL16 because the high bits are ignored. */ |
extui a2, a2, 0, 16 |
extui a3, a3, 0, 16 |
#endif |
#endif /* MUL16 || MUL32 */ |
|
|
#if XCHAL_HAVE_MUL16 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
mul16u dst, xreg ## xhalf, yreg ## yhalf |
|
#elif XCHAL_HAVE_MUL32 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
mull dst, xreg ## xhalf, yreg ## yhalf |
|
#elif XCHAL_HAVE_MAC16 |
|
/* The preprocessor insists on inserting a space when concatenating after |
a period in the definition of do_mul below. These macros are a workaround |
using underscores instead of periods when doing the concatenation. */ |
#define umul_aa_ll umul.aa.ll |
#define umul_aa_lh umul.aa.lh |
#define umul_aa_hl umul.aa.hl |
#define umul_aa_hh umul.aa.hh |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
umul_aa_ ## xhalf ## yhalf xreg, yreg; \ |
rsr dst, ACCLO |
|
#else /* no multiply hardware */ |
|
#define set_arg_l(dst, src) \ |
extui dst, src, 0, 16 |
#define set_arg_h(dst, src) \ |
srli dst, src, 16 |
|
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ |
set_arg_ ## xhalf (a13, xreg); \ |
set_arg_ ## yhalf (a14, yreg); \ |
call0 .Lmul_mulsi3; \ |
mov dst, a12 |
#endif |
|
/* Add pp1 and pp2 into a6 with carry-out in a9. */ |
do_mul(a6, a2, l, a3, h) /* pp 1 */ |
do_mul(a11, a2, h, a3, l) /* pp 2 */ |
movi a9, 0 |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a9, a9, 1 |
1: |
/* Shift the high half of a9/a6 into position in a9. Note that |
this value can be safely incremented without any carry-outs. */ |
ssai 16 |
src a9, a9, a6 |
|
/* Compute the low word into a6. */ |
do_mul(a11, a2, l, a3, l) /* pp 0 */ |
sll a6, a6 |
add a6, a6, a11 |
bgeu a6, a11, 1f |
addi a9, a9, 1 |
1: |
/* Compute the high word into a2. */ |
do_mul(a2, a2, h, a3, h) /* pp 3 */ |
add a2, a2, a9 |
|
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
/* Restore values saved on the stack during the multiplication. */ |
l32i a0, sp, 0 |
l32i a8, sp, 4 |
#endif |
#endif |
|
/* Shift left by 9 bits, unless there was a carry-out from the |
multiply, in which case, shift by 8 bits and increment the |
exponent. */ |
movi a4, 9 |
srli a5, a2, 24 - 9 |
beqz a5, 1f |
addi a4, a4, -1 |
addi a8, a8, 1 |
1: ssl a4 |
src a2, a2, a6 |
sll a6, a6 |
|
/* Subtract the extra bias from the exponent sum (plus one to account |
for the explicit "1.0" of the mantissa that will be added to the |
exponent in the final result). */ |
movi a4, 0x80 |
sub a8, a8, a4 |
|
/* Check for over/underflow. The value in a8 is one less than the |
final exponent, so values in the range 0..fd are OK here. */ |
movi a4, 0xfe |
bgeu a8, a4, .Lmul_overflow |
|
.Lmul_round: |
/* Round. */ |
bgez a6, .Lmul_rounded |
addi a2, a2, 1 |
slli a6, a6, 1 |
beqz a6, .Lmul_exactlyhalf |
|
.Lmul_rounded: |
/* Add the exponent to the mantissa. */ |
slli a8, a8, 23 |
add a2, a2, a8 |
|
.Lmul_addsign: |
/* Add the sign bit. */ |
srli a7, a7, 31 |
slli a7, a7, 31 |
or a2, a2, a7 |
|
.Lmul_done: |
#if __XTENSA_CALL0_ABI__ |
l32i a12, sp, 16 |
l32i a13, sp, 20 |
l32i a14, sp, 24 |
l32i a15, sp, 28 |
addi sp, sp, 32 |
#endif |
leaf_return |
|
.Lmul_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
j .Lmul_rounded |
|
.Lmul_overflow: |
bltz a8, .Lmul_underflow |
/* Return +/- Infinity. */ |
movi a8, 0xff |
slli a2, a8, 23 |
j .Lmul_addsign |
|
.Lmul_underflow: |
/* Create a subnormal value, where the exponent field contains zero, |
but the effective exponent is 1. The value of a8 is one less than |
the actual exponent, so just negate it to get the shift amount. */ |
neg a8, a8 |
mov a9, a6 |
ssr a8 |
bgeui a8, 32, .Lmul_flush_to_zero |
|
/* Shift a2 right. Any bits that are shifted out of a2 are saved |
in a6 (combined with the shifted-out bits currently in a6) for |
rounding the result. */ |
sll a6, a2 |
srl a2, a2 |
|
/* Set the exponent to zero. */ |
movi a8, 0 |
|
/* Pack any nonzero bits shifted out into a6. */ |
beqz a9, .Lmul_round |
movi a9, 1 |
or a6, a6, a9 |
j .Lmul_round |
|
.Lmul_flush_to_zero: |
/* Return zero with the appropriate sign bit. */ |
srli a2, a7, 31 |
slli a2, a2, 31 |
j .Lmul_done |
|
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 |
|
/* For Xtensa processors with no multiply hardware, this simplified |
version of _mulsi3 is used for multiplying 16-bit chunks of |
the floating-point mantissas. It uses a custom ABI: the inputs |
are passed in a13 and a14, the result is returned in a12, and |
a8 and a15 are clobbered. */ |
.align 4 |
.Lmul_mulsi3: |
movi a12, 0 |
.Lmul_mult_loop: |
add a15, a14, a12 |
extui a8, a13, 0, 1 |
movnez a12, a15, a8 |
|
do_addx2 a15, a14, a12, a15 |
extui a8, a13, 1, 1 |
movnez a12, a15, a8 |
|
do_addx4 a15, a14, a12, a15 |
extui a8, a13, 2, 1 |
movnez a12, a15, a8 |
|
do_addx8 a15, a14, a12, a15 |
extui a8, a13, 3, 1 |
movnez a12, a15, a8 |
|
srli a13, a13, 4 |
slli a14, a14, 4 |
bnez a13, .Lmul_mult_loop |
ret |
#endif /* !MUL16 && !MUL32 && !MAC16 */ |
#endif /* L_mulsf3 */ |
|
#ifdef L_divsf3 |
|
/* Division */ |
__divsf3_aux: |
|
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities). |
(This code is placed before the start of the function just to |
keep it in range of the limited branch displacements.) */ |
|
.Ldiv_yexpzero: |
/* Clear the sign bit of y. */ |
slli a3, a3, 1 |
srli a3, a3, 1 |
|
/* Check for division by zero. */ |
beqz a3, .Ldiv_yzero |
|
/* Normalize y. Adjust the exponent in a9. */ |
do_nsau a10, a3, a4, a5 |
addi a10, a10, -8 |
ssl a10 |
sll a3, a3 |
movi a9, 1 |
sub a9, a9, a10 |
j .Ldiv_ynormalized |
|
.Ldiv_yzero: |
/* y is zero. Return NaN if x is also zero; otherwise, infinity. */ |
slli a4, a2, 1 |
srli a4, a4, 1 |
srli a2, a7, 31 |
slli a2, a2, 31 |
or a2, a2, a6 |
bnez a4, 1f |
movi a4, 0x400000 /* make it a quiet NaN */ |
or a2, a2, a4 |
1: leaf_return |
|
.Ldiv_xexpzero: |
/* Clear the sign bit of x. */ |
slli a2, a2, 1 |
srli a2, a2, 1 |
|
/* If x is zero, return zero. */ |
beqz a2, .Ldiv_return_zero |
|
/* Normalize x. Adjust the exponent in a8. */ |
do_nsau a10, a2, a4, a5 |
addi a10, a10, -8 |
ssl a10 |
sll a2, a2 |
movi a8, 1 |
sub a8, a8, a10 |
j .Ldiv_xnormalized |
|
.Ldiv_return_zero: |
/* Return zero with the appropriate sign bit. */ |
srli a2, a7, 31 |
slli a2, a2, 31 |
leaf_return |
|
.Ldiv_xnan_or_inf: |
/* Set the sign bit of the result. */ |
srli a7, a3, 31 |
slli a7, a7, 31 |
xor a2, a2, a7 |
/* If y is NaN or Inf, return NaN. */ |
bnall a3, a6, 1f |
movi a4, 0x400000 /* make it a quiet NaN */ |
or a2, a2, a4 |
1: leaf_return |
|
.Ldiv_ynan_or_inf: |
/* If y is Infinity, return zero. */ |
slli a8, a3, 9 |
beqz a8, .Ldiv_return_zero |
/* y is NaN; return it. */ |
mov a2, a3 |
leaf_return |
|
.align 4 |
.global __divsf3 |
.type __divsf3, @function |
__divsf3: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
|
/* Get the sign of the result. */ |
xor a7, a2, a3 |
|
/* Check for NaN and infinity. */ |
ball a2, a6, .Ldiv_xnan_or_inf |
ball a3, a6, .Ldiv_ynan_or_inf |
|
/* Extract the exponents. */ |
extui a8, a2, 23, 8 |
extui a9, a3, 23, 8 |
|
beqz a9, .Ldiv_yexpzero |
.Ldiv_ynormalized: |
beqz a8, .Ldiv_xexpzero |
.Ldiv_xnormalized: |
|
/* Subtract the exponents. */ |
sub a8, a8, a9 |
|
/* Replace sign/exponent fields with explicit "1.0". */ |
movi a10, 0xffffff |
or a2, a2, a6 |
and a2, a2, a10 |
or a3, a3, a6 |
and a3, a3, a10 |
|
/* The first digit of the mantissa division must be a one. |
Shift x (and adjust the exponent) as needed to make this true. */ |
bltu a3, a2, 1f |
slli a2, a2, 1 |
addi a8, a8, -1 |
1: |
/* Do the first subtraction and shift. */ |
sub a2, a2, a3 |
slli a2, a2, 1 |
|
/* Put the quotient into a10. */ |
movi a10, 1 |
|
/* Divide one bit at a time for 23 bits. */ |
movi a9, 23 |
#if XCHAL_HAVE_LOOPS |
loop a9, .Ldiv_loopend |
#endif |
.Ldiv_loop: |
/* Shift the quotient << 1. */ |
slli a10, a10, 1 |
|
/* Is this digit a 0 or 1? */ |
bltu a2, a3, 1f |
|
/* Output a 1 and subtract. */ |
addi a10, a10, 1 |
sub a2, a2, a3 |
|
/* Shift the dividend << 1. */ |
1: slli a2, a2, 1 |
|
#if !XCHAL_HAVE_LOOPS |
addi a9, a9, -1 |
bnez a9, .Ldiv_loop |
#endif |
.Ldiv_loopend: |
|
/* Add the exponent bias (less one to account for the explicit "1.0" |
of the mantissa that will be added to the exponent in the final |
result). */ |
addi a8, a8, 0x7e |
|
/* Check for over/underflow. The value in a8 is one less than the |
final exponent, so values in the range 0..fd are OK here. */ |
movi a4, 0xfe |
bgeu a8, a4, .Ldiv_overflow |
|
.Ldiv_round: |
/* Round. The remainder (<< 1) is in a2. */ |
bltu a2, a3, .Ldiv_rounded |
addi a10, a10, 1 |
beq a2, a3, .Ldiv_exactlyhalf |
|
.Ldiv_rounded: |
/* Add the exponent to the mantissa. */ |
slli a8, a8, 23 |
add a2, a10, a8 |
|
.Ldiv_addsign: |
/* Add the sign bit. */ |
srli a7, a7, 31 |
slli a7, a7, 31 |
or a2, a2, a7 |
leaf_return |
|
.Ldiv_overflow: |
bltz a8, .Ldiv_underflow |
/* Return +/- Infinity. */ |
addi a8, a4, 1 /* 0xff */ |
slli a2, a8, 23 |
j .Ldiv_addsign |
|
.Ldiv_exactlyhalf: |
/* Remainder is exactly half the divisor. Round even. */ |
srli a10, a10, 1 |
slli a10, a10, 1 |
j .Ldiv_rounded |
|
.Ldiv_underflow: |
/* Create a subnormal value, where the exponent field contains zero, |
but the effective exponent is 1. The value of a8 is one less than |
the actual exponent, so just negate it to get the shift amount. */ |
neg a8, a8 |
ssr a8 |
bgeui a8, 32, .Ldiv_flush_to_zero |
|
/* Shift a10 right. Any bits that are shifted out of a10 are |
saved in a6 for rounding the result. */ |
sll a6, a10 |
srl a10, a10 |
|
/* Set the exponent to zero. */ |
movi a8, 0 |
|
/* Pack any nonzero remainder (in a2) into a6. */ |
beqz a2, 1f |
movi a9, 1 |
or a6, a6, a9 |
|
/* Round a10 based on the bits shifted out into a6. */ |
1: bgez a6, .Ldiv_rounded |
addi a10, a10, 1 |
slli a6, a6, 1 |
bnez a6, .Ldiv_rounded |
srli a10, a10, 1 |
slli a10, a10, 1 |
j .Ldiv_rounded |
|
.Ldiv_flush_to_zero: |
/* Return zero with the appropriate sign bit. */ |
srli a2, a7, 31 |
slli a2, a2, 31 |
leaf_return |
|
#endif /* L_divsf3 */ |
|
#ifdef L_cmpsf2 |
|
/* Equal and Not Equal */ |
|
.align 4 |
.global __eqsf2 |
.global __nesf2 |
.set __nesf2, __eqsf2 |
.type __eqsf2, @function |
__eqsf2: |
leaf_entry sp, 16 |
bne a2, a3, 4f |
|
/* The values are equal but NaN != NaN. Check the exponent. */ |
movi a6, 0x7f800000 |
ball a2, a6, 3f |
|
/* Equal. */ |
movi a2, 0 |
leaf_return |
|
/* Not equal. */ |
2: movi a2, 1 |
leaf_return |
|
/* Check if the mantissas are nonzero. */ |
3: slli a7, a2, 9 |
j 5f |
|
/* Check if x and y are zero with different signs. */ |
4: or a7, a2, a3 |
slli a7, a7, 1 |
|
/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa |
or x when exponent(x) = 0x7f8 and x == y. */ |
5: movi a2, 0 |
movi a3, 1 |
movnez a2, a3, a7 |
leaf_return |
|
|
/* Greater Than */ |
|
.align 4 |
.global __gtsf2 |
.type __gtsf2, @function |
__gtsf2: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
ball a2, a6, 2f |
1: bnall a3, a6, .Lle_cmp |
|
/* Check if y is a NaN. */ |
slli a7, a3, 9 |
beqz a7, .Lle_cmp |
movi a2, 0 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, a2, 9 |
beqz a7, 1b |
movi a2, 0 |
leaf_return |
|
|
/* Less Than or Equal */ |
|
.align 4 |
.global __lesf2 |
.type __lesf2, @function |
__lesf2: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
ball a2, a6, 2f |
1: bnall a3, a6, .Lle_cmp |
|
/* Check if y is a NaN. */ |
slli a7, a3, 9 |
beqz a7, .Lle_cmp |
movi a2, 1 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, a2, 9 |
beqz a7, 1b |
movi a2, 1 |
leaf_return |
|
.Lle_cmp: |
/* Check if x and y have different signs. */ |
xor a7, a2, a3 |
bltz a7, .Lle_diff_signs |
|
/* Check if x is negative. */ |
bltz a2, .Lle_xneg |
|
/* Check if x <= y. */ |
bltu a3, a2, 5f |
4: movi a2, 0 |
leaf_return |
|
.Lle_xneg: |
/* Check if y <= x. */ |
bgeu a2, a3, 4b |
5: movi a2, 1 |
leaf_return |
|
.Lle_diff_signs: |
bltz a2, 4b |
|
/* Check if both x and y are zero. */ |
or a7, a2, a3 |
slli a7, a7, 1 |
movi a2, 1 |
movi a3, 0 |
moveqz a2, a3, a7 |
leaf_return |
|
|
/* Greater Than or Equal */ |
|
.align 4 |
.global __gesf2 |
.type __gesf2, @function |
__gesf2: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
ball a2, a6, 2f |
1: bnall a3, a6, .Llt_cmp |
|
/* Check if y is a NaN. */ |
slli a7, a3, 9 |
beqz a7, .Llt_cmp |
movi a2, -1 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, a2, 9 |
beqz a7, 1b |
movi a2, -1 |
leaf_return |
|
|
/* Less Than */ |
|
.align 4 |
.global __ltsf2 |
.type __ltsf2, @function |
__ltsf2: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
ball a2, a6, 2f |
1: bnall a3, a6, .Llt_cmp |
|
/* Check if y is a NaN. */ |
slli a7, a3, 9 |
beqz a7, .Llt_cmp |
movi a2, 0 |
leaf_return |
|
/* Check if x is a NaN. */ |
2: slli a7, a2, 9 |
beqz a7, 1b |
movi a2, 0 |
leaf_return |
|
.Llt_cmp: |
/* Check if x and y have different signs. */ |
xor a7, a2, a3 |
bltz a7, .Llt_diff_signs |
|
/* Check if x is negative. */ |
bltz a2, .Llt_xneg |
|
/* Check if x < y. */ |
bgeu a2, a3, 5f |
4: movi a2, -1 |
leaf_return |
|
.Llt_xneg: |
/* Check if y < x. */ |
bltu a3, a2, 4b |
5: movi a2, 0 |
leaf_return |
|
.Llt_diff_signs: |
bgez a2, 5b |
|
/* Check if both x and y are nonzero. */ |
or a7, a2, a3 |
slli a7, a7, 1 |
movi a2, 0 |
movi a3, -1 |
movnez a2, a3, a7 |
leaf_return |
|
|
/* Unordered */ |
|
.align 4 |
.global __unordsf2 |
.type __unordsf2, @function |
__unordsf2: |
leaf_entry sp, 16 |
movi a6, 0x7f800000 |
ball a2, a6, 3f |
1: ball a3, a6, 4f |
2: movi a2, 0 |
leaf_return |
|
3: slli a7, a2, 9 |
beqz a7, 1b |
movi a2, 1 |
leaf_return |
|
4: slli a7, a3, 9 |
beqz a7, 2b |
movi a2, 1 |
leaf_return |
|
#endif /* L_cmpsf2 */ |
|
#ifdef L_fixsfsi |
|
.align 4 |
.global __fixsfsi |
.type __fixsfsi, @function |
__fixsfsi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7f800000 |
ball a2, a6, .Lfixsfsi_nan_or_inf |
|
/* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ |
extui a4, a2, 23, 8 |
addi a4, a4, -0x7e |
bgei a4, 32, .Lfixsfsi_maxint |
blti a4, 1, .Lfixsfsi_zero |
|
/* Add explicit "1.0" and shift << 8. */ |
or a7, a2, a6 |
slli a5, a7, 8 |
|
/* Shift back to the right, based on the exponent. */ |
ssl a4 /* shift by 32 - a4 */ |
srl a5, a5 |
|
/* Negate the result if sign != 0. */ |
neg a2, a5 |
movgez a2, a5, a7 |
leaf_return |
|
.Lfixsfsi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, a2, 9 |
beqz a4, .Lfixsfsi_maxint |
|
/* Translate NaN to +maxint. */ |
movi a2, 0 |
|
.Lfixsfsi_maxint: |
slli a4, a6, 8 /* 0x80000000 */ |
addi a5, a4, -1 /* 0x7fffffff */ |
movgez a4, a5, a2 |
mov a2, a4 |
leaf_return |
|
.Lfixsfsi_zero: |
movi a2, 0 |
leaf_return |
|
#endif /* L_fixsfsi */ |
|
#ifdef L_fixsfdi |
|
.align 4 |
.global __fixsfdi |
.type __fixsfdi, @function |
__fixsfdi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7f800000 |
ball a2, a6, .Lfixsfdi_nan_or_inf |
|
/* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ |
extui a4, a2, 23, 8 |
addi a4, a4, -0x7e |
bgei a4, 64, .Lfixsfdi_maxint |
blti a4, 1, .Lfixsfdi_zero |
|
/* Add explicit "1.0" and shift << 8. */ |
or a7, a2, a6 |
slli xh, a7, 8 |
|
/* Shift back to the right, based on the exponent. */ |
ssl a4 /* shift by 64 - a4 */ |
bgei a4, 32, .Lfixsfdi_smallshift |
srl xl, xh |
movi xh, 0 |
|
.Lfixsfdi_shifted: |
/* Negate the result if sign != 0. */ |
bgez a7, 1f |
neg xl, xl |
neg xh, xh |
beqz xl, 1f |
addi xh, xh, -1 |
1: leaf_return |
|
.Lfixsfdi_smallshift: |
movi xl, 0 |
sll xl, xh |
srl xh, xh |
j .Lfixsfdi_shifted |
|
.Lfixsfdi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, a2, 9 |
beqz a4, .Lfixsfdi_maxint |
|
/* Translate NaN to +maxint. */ |
movi a2, 0 |
|
.Lfixsfdi_maxint: |
slli a7, a6, 8 /* 0x80000000 */ |
bgez a2, 1f |
mov xh, a7 |
movi xl, 0 |
leaf_return |
|
1: addi xh, a7, -1 /* 0x7fffffff */ |
movi xl, -1 |
leaf_return |
|
.Lfixsfdi_zero: |
movi xh, 0 |
movi xl, 0 |
leaf_return |
|
#endif /* L_fixsfdi */ |
|
#ifdef L_fixunssfsi |
|
.align 4 |
.global __fixunssfsi |
.type __fixunssfsi, @function |
__fixunssfsi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7f800000 |
ball a2, a6, .Lfixunssfsi_nan_or_inf |
|
/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ |
extui a4, a2, 23, 8 |
addi a4, a4, -0x7f |
bgei a4, 32, .Lfixunssfsi_maxint |
bltz a4, .Lfixunssfsi_zero |
|
/* Add explicit "1.0" and shift << 8. */ |
or a7, a2, a6 |
slli a5, a7, 8 |
|
/* Shift back to the right, based on the exponent. */ |
addi a4, a4, 1 |
beqi a4, 32, .Lfixunssfsi_bigexp |
ssl a4 /* shift by 32 - a4 */ |
srl a5, a5 |
|
/* Negate the result if sign != 0. */ |
neg a2, a5 |
movgez a2, a5, a7 |
leaf_return |
|
.Lfixunssfsi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, a2, 9 |
beqz a4, .Lfixunssfsi_maxint |
|
/* Translate NaN to 0xffffffff. */ |
movi a2, -1 |
leaf_return |
|
.Lfixunssfsi_maxint: |
slli a4, a6, 8 /* 0x80000000 */ |
movi a5, -1 /* 0xffffffff */ |
movgez a4, a5, a2 |
mov a2, a4 |
leaf_return |
|
.Lfixunssfsi_zero: |
movi a2, 0 |
leaf_return |
|
.Lfixunssfsi_bigexp: |
/* Handle unsigned maximum exponent case. */ |
bltz a2, 1f |
mov a2, a5 /* no shift needed */ |
leaf_return |
|
/* Return 0x80000000 if negative. */ |
1: slli a2, a6, 8 |
leaf_return |
|
#endif /* L_fixunssfsi */ |
|
#ifdef L_fixunssfdi |
|
.align 4 |
.global __fixunssfdi |
.type __fixunssfdi, @function |
__fixunssfdi: |
leaf_entry sp, 16 |
|
/* Check for NaN and Infinity. */ |
movi a6, 0x7f800000 |
ball a2, a6, .Lfixunssfdi_nan_or_inf |
|
/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ |
extui a4, a2, 23, 8 |
addi a4, a4, -0x7f |
bgei a4, 64, .Lfixunssfdi_maxint |
bltz a4, .Lfixunssfdi_zero |
|
/* Add explicit "1.0" and shift << 8. */ |
or a7, a2, a6 |
slli xh, a7, 8 |
|
/* Shift back to the right, based on the exponent. */ |
addi a4, a4, 1 |
beqi a4, 64, .Lfixunssfdi_bigexp |
ssl a4 /* shift by 64 - a4 */ |
bgei a4, 32, .Lfixunssfdi_smallshift |
srl xl, xh |
movi xh, 0 |
|
.Lfixunssfdi_shifted: |
/* Negate the result if sign != 0. */ |
bgez a7, 1f |
neg xl, xl |
neg xh, xh |
beqz xl, 1f |
addi xh, xh, -1 |
1: leaf_return |
|
.Lfixunssfdi_smallshift: |
movi xl, 0 |
src xl, xh, xl |
srl xh, xh |
j .Lfixunssfdi_shifted |
|
.Lfixunssfdi_nan_or_inf: |
/* Handle Infinity and NaN. */ |
slli a4, a2, 9 |
beqz a4, .Lfixunssfdi_maxint |
|
/* Translate NaN to 0xffffffff.... */ |
1: movi xh, -1 |
movi xl, -1 |
leaf_return |
|
.Lfixunssfdi_maxint: |
bgez a2, 1b |
2: slli xh, a6, 8 /* 0x80000000 */ |
movi xl, 0 |
leaf_return |
|
.Lfixunssfdi_zero: |
movi xh, 0 |
movi xl, 0 |
leaf_return |
|
.Lfixunssfdi_bigexp: |
/* Handle unsigned maximum exponent case. */ |
bltz a7, 2b |
movi xl, 0 |
leaf_return /* no shift needed */ |
|
#endif /* L_fixunssfdi */ |
|
#ifdef L_floatsisf |
|
.align 4 |
.global __floatunsisf |
.type __floatunsisf, @function |
__floatunsisf: |
leaf_entry sp, 16 |
beqz a2, .Lfloatsisf_return |
|
/* Set the sign to zero and jump to the floatsisf code. */ |
movi a7, 0 |
j .Lfloatsisf_normalize |
|
.align 4 |
.global __floatsisf |
.type __floatsisf, @function |
__floatsisf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
beqz a2, .Lfloatsisf_return |
|
/* Save the sign. */ |
extui a7, a2, 31, 1 |
|
/* Get the absolute value. */ |
#if XCHAL_HAVE_ABS |
abs a2, a2 |
#else |
neg a4, a2 |
movltz a2, a4, a2 |
#endif |
|
.Lfloatsisf_normalize: |
/* Normalize with the first 1 bit in the msb. */ |
do_nsau a4, a2, a5, a6 |
ssl a4 |
sll a5, a2 |
|
/* Shift the mantissa into position, with rounding bits in a6. */ |
srli a2, a5, 8 |
slli a6, a5, (32 - 8) |
|
/* Set the exponent. */ |
movi a5, 0x9d /* 0x7e + 31 */ |
sub a5, a5, a4 |
slli a5, a5, 23 |
add a2, a2, a5 |
|
/* Add the sign. */ |
slli a7, a7, 31 |
or a2, a2, a7 |
|
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a6, .Lfloatsisf_return |
addi a2, a2, 1 /* Overflow to the exponent is OK. */ |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a6, a6, 1 |
beqz a6, .Lfloatsisf_exactlyhalf |
|
.Lfloatsisf_return: |
leaf_return |
|
.Lfloatsisf_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
leaf_return |
|
#endif /* L_floatsisf */ |
|
#ifdef L_floatdisf |
|
.align 4 |
.global __floatundisf |
.type __floatundisf, @function |
__floatundisf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
or a4, xh, xl |
beqz a4, 2f |
|
/* Set the sign to zero and jump to the floatdisf code. */ |
movi a7, 0 |
j .Lfloatdisf_normalize |
|
.align 4 |
.global __floatdisf |
.type __floatdisf, @function |
__floatdisf: |
leaf_entry sp, 16 |
|
/* Check for zero. */ |
or a4, xh, xl |
beqz a4, 2f |
|
/* Save the sign. */ |
extui a7, xh, 31, 1 |
|
/* Get the absolute value. */ |
bgez xh, .Lfloatdisf_normalize |
neg xl, xl |
neg xh, xh |
beqz xl, .Lfloatdisf_normalize |
addi xh, xh, -1 |
|
.Lfloatdisf_normalize: |
/* Normalize with the first 1 bit in the msb of xh. */ |
beqz xh, .Lfloatdisf_bigshift |
do_nsau a4, xh, a5, a6 |
ssl a4 |
src xh, xh, xl |
sll xl, xl |
|
.Lfloatdisf_shifted: |
/* Shift the mantissa into position, with rounding bits in a6. */ |
ssai 8 |
sll a5, xl |
src a6, xh, xl |
srl xh, xh |
beqz a5, 1f |
movi a5, 1 |
or a6, a6, a5 |
1: |
/* Set the exponent. */ |
movi a5, 0xbd /* 0x7e + 63 */ |
sub a5, a5, a4 |
slli a5, a5, 23 |
add a2, xh, a5 |
|
/* Add the sign. */ |
slli a7, a7, 31 |
or a2, a2, a7 |
|
/* Round up if the leftover fraction is >= 1/2. */ |
bgez a6, 2f |
addi a2, a2, 1 /* Overflow to the exponent is OK. */ |
|
/* Check if the leftover fraction is exactly 1/2. */ |
slli a6, a6, 1 |
beqz a6, .Lfloatdisf_exactlyhalf |
2: leaf_return |
|
.Lfloatdisf_bigshift: |
/* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ |
do_nsau a4, xl, a5, a6 |
ssl a4 |
sll xh, xl |
movi xl, 0 |
addi a4, a4, 32 |
j .Lfloatdisf_shifted |
|
.Lfloatdisf_exactlyhalf: |
/* Round down to the nearest even value. */ |
srli a2, a2, 1 |
slli a2, a2, 1 |
leaf_return |
|
#endif /* L_floatdisf */ |