URL
https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk
Subversion Repositories openrisc_me
[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [xtensa/] [ieee754-sf.S] - Rev 282
Compare with Previous | Blame | View Log
/* IEEE-754 single-precision functions for XtensaCopyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.This file is part of GCC.GCC is free software; you can redistribute it and/or modify itunder the terms of the GNU General Public License as published bythe Free Software Foundation; either version 3, or (at your option)any later version.GCC is distributed in the hope that it will be useful, but WITHOUTANY WARRANTY; without even the implied warranty of MERCHANTABILITYor FITNESS FOR A PARTICULAR PURPOSE. See the GNU General PublicLicense for more details.Under Section 7 of GPL version 3, you are granted additionalpermissions described in the GCC Runtime Library Exception, version3.1, as published by the Free Software Foundation.You should have received a copy of the GNU General Public License anda copy of the GCC Runtime Library Exception along with this program;see the files COPYING3 and COPYING.RUNTIME respectively. If not, see<http://www.gnu.org/licenses/>. */#ifdef __XTENSA_EB__#define xh a2#define xl a3#define yh a4#define yl a5#else#define xh a3#define xl a2#define yh a5#define yl a4#endif/* Warning! The branch displacements for some Xtensa branch instructionsare quite small, and this code has been carefully laid out to keepbranch targets in range. If you change anything, be sure to check thatthe assembler is not relaxing anything to branch over a jump. */#ifdef L_negsf2.align 4.global __negsf2.type __negsf2, @function__negsf2:leaf_entry sp, 16movi a4, 0x80000000xor a2, a2, a4leaf_return#endif /* L_negsf2 */#ifdef L_addsubsf3/* Addition */__addsf3_aux:/* Handle NaNs and Infinities. (This code is placed before thestart of the function just to keep it in range of the limitedbranch displacements.) */.Ladd_xnan_or_inf:/* If y is neither Infinity nor NaN, return x. */bnall a3, a6, 1f/* If x is a NaN, return it. Otherwise, return y. */slli a7, a2, 9beqz a7, .Ladd_ynan_or_inf1: leaf_return.Ladd_ynan_or_inf:/* Return y. */mov a2, a3leaf_return.Ladd_opposite_signs:/* Operand signs differ. Do a subtraction. */slli a7, a6, 8xor a3, a3, a7j .Lsub_same_sign.align 4.global __addsf3.type __addsf3, @function__addsf3:leaf_entry sp, 16movi a6, 0x7f800000/* Check if the two operands have the same sign. */xor a7, a2, a3bltz a7, .Ladd_opposite_signs.Ladd_same_sign:/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ball a2, a6, .Ladd_xnan_or_infball a3, a6, .Ladd_ynan_or_inf/* Compare the exponents. The smaller operand will be shiftedright by the exponent difference and added to the largerone. */extui a7, a2, 23, 9extui a8, a3, 23, 9bltu a7, a8, .Ladd_shiftx.Ladd_shifty:/* Check if the smaller (or equal) exponent is zero. */bnone a3, a6, .Ladd_yexpzero/* Replace y sign/exponent with 0x008. */or a3, a3, a6slli a3, a3, 8srli a3, a3, 8.Ladd_yexpdiff:/* Compute the exponent difference. */sub a10, a7, a8/* Exponent difference > 32 -- just return the bigger value. */bgeui a10, 32, 1f/* Shift y right by the exponent difference. Any bits that areshifted out of y are saved in a9 for rounding the result. */ssr a10movi a9, 0src a9, a3, a9srl a3, a3/* Do the addition. */add a2, a2, a3/* Check if the add overflowed into the exponent. */extui a10, a2, 23, 9beq a10, a7, .Ladd_roundmov a8, a7j .Ladd_carry.Ladd_yexpzero:/* y is a subnormal value. Replace its sign/exponent with zero,i.e., no implicit "1.0", and increment the apparent exponentbecause subnormals behave as if they had the minimum (nonzero)exponent. Test for the case when both exponents are zero. */slli a3, a3, 9srli a3, a3, 9bnone a2, a6, .Ladd_bothexpzeroaddi a8, a8, 1j .Ladd_yexpdiff.Ladd_bothexpzero:/* Both exponents are zero. Handle this as a special case. Thereis no need to shift or round, and the normal code for handlinga carry into the exponent field will not work because itassumes there is an implicit "1.0" that needs to be added. */add a2, a2, a31: leaf_return.Ladd_xexpzero:/* Same as "yexpzero" except skip handling the case when bothexponents are zero. */slli a2, a2, 9srli a2, a2, 9addi a7, a7, 1j .Ladd_xexpdiff.Ladd_shiftx:/* Same thing as the "shifty" code, but with x and y swapped. Also,because the exponent difference is always nonzero in this version,the shift sequence can use SLL and skip loading a constant zero. */bnone a2, a6, .Ladd_xexpzeroor a2, a2, a6slli a2, a2, 8srli a2, a2, 8.Ladd_xexpdiff:sub a10, a8, a7bgeui a10, 32, .Ladd_returnyssr a10sll a9, a2srl a2, a2add a2, a2, a3/* Check if the add overflowed into the exponent. */extui a10, a2, 23, 9bne a10, a8, .Ladd_carry.Ladd_round:/* Round up if the leftover fraction is >= 1/2. */bgez a9, 1faddi a2, a2, 1/* Check if the leftover fraction is exactly 1/2. */slli a9, a9, 1beqz a9, .Ladd_exactlyhalf1: leaf_return.Ladd_returny:mov a2, a3leaf_return.Ladd_carry:/* The addition has overflowed into the exponent field, so thevalue needs to be renormalized. The mantissa of the resultcan be recovered by subtracting the original exponent andadding 0x800000 (which is the explicit "1.0" for themantissa of the non-shifted operand -- the "1.0" for theshifted operand was already added). The mantissa can thenbe shifted right by one bit. The explicit "1.0" of theshifted mantissa then needs to be replaced by the exponent,incremented by one to account for the normalizing shift.It is faster to combine these operations: do the shift firstand combine the additions and subtractions. If x is theoriginal exponent, the result is:shifted mantissa - (x << 22) + (1 << 22) + (x << 23)or:shifted mantissa + ((x + 1) << 22)Note that the exponent is incremented here by leaving theexplicit "1.0" of the mantissa in the exponent field. *//* Shift x right by one bit. Save the lsb. */mov a10, a2srli a2, a2, 1/* See explanation above. The original exponent is in a8. */addi a8, a8, 1slli a8, a8, 22add a2, a2, a8/* Return an Infinity if the exponent overflowed. */ball a2, a6, .Ladd_infinity/* Same thing as the "round" code except the msb of the leftoverfraction is bit 0 of a10, with the rest of the fraction in a9. */bbci.l a10, 0, 1faddi a2, a2, 1beqz a9, .Ladd_exactlyhalf1: leaf_return.Ladd_infinity:/* Clear the mantissa. */srli a2, a2, 23slli a2, a2, 23/* The sign bit may have been lost in a carry-out. Put it back. */slli a8, a8, 1or a2, a2, a8leaf_return.Ladd_exactlyhalf:/* Round down to the nearest even value. */srli a2, a2, 1slli a2, a2, 1leaf_return/* Subtraction */__subsf3_aux:/* Handle NaNs and Infinities. (This code is placed before thestart of the function just to keep it in range of the limitedbranch displacements.) */.Lsub_xnan_or_inf:/* If y is neither Infinity nor NaN, return x. */bnall a3, a6, 1f/* Both x and y are either NaN or Inf, so the result is NaN. */movi a4, 0x400000 /* make it a quiet NaN */or a2, a2, a41: leaf_return.Lsub_ynan_or_inf:/* Negate y and return it. */slli a7, a6, 8xor a2, a3, a7leaf_return.Lsub_opposite_signs:/* Operand signs differ. Do an addition. */slli a7, a6, 8xor a3, a3, a7j .Ladd_same_sign.align 4.global __subsf3.type __subsf3, @function__subsf3:leaf_entry sp, 16movi a6, 0x7f800000/* Check if the two operands have the same sign. */xor a7, a2, a3bltz a7, .Lsub_opposite_signs.Lsub_same_sign:/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ball a2, a6, .Lsub_xnan_or_infball a3, a6, .Lsub_ynan_or_inf/* Compare the operands. In contrast to addition, the entirevalue matters here. */extui a7, a2, 23, 8extui a8, a3, 23, 8bltu a2, a3, .Lsub_xsmaller.Lsub_ysmaller:/* Check if the smaller (or equal) exponent is zero. */bnone a3, a6, .Lsub_yexpzero/* Replace y sign/exponent with 0x008. */or a3, a3, a6slli a3, a3, 8srli a3, a3, 8.Lsub_yexpdiff:/* Compute the exponent difference. */sub a10, a7, a8/* Exponent difference > 32 -- just return the bigger value. */bgeui a10, 32, 1f/* Shift y right by the exponent difference. Any bits that areshifted out of y are saved in a9 for rounding the result. */ssr a10movi a9, 0src a9, a3, a9srl a3, a3sub a2, a2, a3/* Subtract the leftover bits in a9 from zero and propagate anyborrow from a2. */neg a9, a9addi a10, a2, -1movnez a2, a10, a9/* Check if the subtract underflowed into the exponent. */extui a10, a2, 23, 8beq a10, a7, .Lsub_roundj .Lsub_borrow.Lsub_yexpzero:/* Return zero if the inputs are equal. (For the non-subnormalcase, subtracting the "1.0" will cause a borrow from the exponentand this case can be detected when handling the borrow.) */beq a2, a3, .Lsub_return_zero/* y is a subnormal value. Replace its sign/exponent with zero,i.e., no implicit "1.0". Unless x is also a subnormal, incrementy's apparent exponent because subnormals behave as if they hadthe minimum (nonzero) exponent. */slli a3, a3, 9srli a3, a3, 9bnone a2, a6, .Lsub_yexpdiffaddi a8, a8, 1j .Lsub_yexpdiff.Lsub_returny:/* Negate and return y. */slli a7, a6, 8xor a2, a3, a71: leaf_return.Lsub_xsmaller:/* Same thing as the "ysmaller" code, but with x and y swapped andwith y negated. */bnone a2, a6, .Lsub_xexpzeroor a2, a2, a6slli a2, a2, 8srli a2, a2, 8.Lsub_xexpdiff:sub a10, a8, a7bgeui a10, 32, .Lsub_returnyssr a10movi a9, 0src a9, a2, a9srl a2, a2/* Negate y. */slli a11, a6, 8xor a3, a3, a11sub a2, a3, a2neg a9, a9addi a10, a2, -1movnez a2, a10, a9/* Check if the subtract underflowed into the exponent. */extui a10, a2, 23, 8bne a10, a8, .Lsub_borrow.Lsub_round:/* Round up if the leftover fraction is >= 1/2. */bgez a9, 1faddi a2, a2, 1/* Check if the leftover fraction is exactly 1/2. */slli a9, a9, 1beqz a9, .Lsub_exactlyhalf1: leaf_return.Lsub_xexpzero:/* Same as "yexpzero". */beq a2, a3, .Lsub_return_zeroslli a2, a2, 9srli a2, a2, 9bnone a3, a6, .Lsub_xexpdiffaddi a7, a7, 1j .Lsub_xexpdiff.Lsub_return_zero:movi a2, 0leaf_return.Lsub_borrow:/* The subtraction has underflowed into the exponent field, so thevalue needs to be renormalized. Shift the mantissa left asneeded to remove any leading zeros and adjust the exponentaccordingly. If the exponent is not large enough to removeall the leading zeros, the result will be a subnormal value. */slli a8, a2, 9beqz a8, .Lsub_xzerodo_nsau a6, a8, a7, a11srli a8, a8, 9bge a6, a10, .Lsub_subnormaladdi a6, a6, 1.Lsub_normalize_shift:/* Shift the mantissa (a8/a9) left by a6. */ssl a6src a8, a8, a9sll a9, a9/* Combine the shifted mantissa with the sign and exponent,decrementing the exponent by a6. (The exponent has alreadybeen decremented by one due to the borrow from the subtraction,but adding the mantissa will increment the exponent by one.) */srli a2, a2, 23sub a2, a2, a6slli a2, a2, 23add a2, a2, a8j .Lsub_round.Lsub_exactlyhalf:/* Round down to the nearest even value. */srli a2, a2, 1slli a2, a2, 1leaf_return.Lsub_xzero:/* If there was a borrow from the exponent, and the mantissa andguard digits are all zero, then the inputs were equal and theresult should be zero. */beqz a9, .Lsub_return_zero/* Only the guard digit is nonzero. Shift by min(24, a10). */addi a11, a10, -24movi a6, 24movltz a6, a10, a11j .Lsub_normalize_shift.Lsub_subnormal:/* The exponent is too small to shift away all the leading zeros.Set a6 to the current exponent (which has already beendecremented by the borrow) so that the exponent of the resultwill be zero. Do not add 1 to a6 in this case, because: (1)adding the mantissa will not increment the exponent, so there isno need to subtract anything extra from the exponent tocompensate, and (2) the effective exponent of a subnormal is 1not 0 so the shift amount must be 1 smaller than normal. */mov a6, a10j .Lsub_normalize_shift#endif /* L_addsubsf3 */#ifdef L_mulsf3/* Multiplication */#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16#define XCHAL_NO_MUL 1#endif__mulsf3_aux:/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).(This code is placed before the start of the function just tokeep it in range of the limited branch displacements.) */.Lmul_xexpzero:/* Clear the sign bit of x. */slli a2, a2, 1srli a2, a2, 1/* If x is zero, return zero. */beqz a2, .Lmul_return_zero/* Normalize x. Adjust the exponent in a8. */do_nsau a10, a2, a11, a12addi a10, a10, -8ssl a10sll a2, a2movi a8, 1sub a8, a8, a10j .Lmul_xnormalized.Lmul_yexpzero:/* Clear the sign bit of y. */slli a3, a3, 1srli a3, a3, 1/* If y is zero, return zero. */beqz a3, .Lmul_return_zero/* Normalize y. Adjust the exponent in a9. */do_nsau a10, a3, a11, a12addi a10, a10, -8ssl a10sll a3, a3movi a9, 1sub a9, a9, a10j .Lmul_ynormalized.Lmul_return_zero:/* Return zero with the appropriate sign bit. */srli a2, a7, 31slli a2, a2, 31j .Lmul_done.Lmul_xnan_or_inf:/* If y is zero, return NaN. */slli a8, a3, 1bnez a8, 1fmovi a4, 0x400000 /* make it a quiet NaN */or a2, a2, a4j .Lmul_done1:/* If y is NaN, return y. */bnall a3, a6, .Lmul_returnxslli a8, a3, 9beqz a8, .Lmul_returnx.Lmul_returny:mov a2, a3.Lmul_returnx:/* Set the sign bit and return. */extui a7, a7, 31, 1slli a2, a2, 1ssai 1src a2, a7, a2j .Lmul_done.Lmul_ynan_or_inf:/* If x is zero, return NaN. */slli a8, a2, 1bnez a8, .Lmul_returnymovi a7, 0x400000 /* make it a quiet NaN */or a2, a3, a7j .Lmul_done.align 4.global __mulsf3.type __mulsf3, @function__mulsf3:#if __XTENSA_CALL0_ABI__leaf_entry sp, 32addi sp, sp, -32s32i a12, sp, 16s32i a13, sp, 20s32i a14, sp, 24s32i a15, sp, 28#elif XCHAL_NO_MUL/* This is not really a leaf function; allocate enough stack spaceto allow CALL12s to a helper function. */leaf_entry sp, 64#elseleaf_entry sp, 32#endifmovi a6, 0x7f800000/* Get the sign of the result. */xor a7, a2, a3/* Check for NaN and infinity. */ball a2, a6, .Lmul_xnan_or_infball a3, a6, .Lmul_ynan_or_inf/* Extract the exponents. */extui a8, a2, 23, 8extui a9, a3, 23, 8beqz a8, .Lmul_xexpzero.Lmul_xnormalized:beqz a9, .Lmul_yexpzero.Lmul_ynormalized:/* Add the exponents. */add a8, a8, a9/* Replace sign/exponent fields with explicit "1.0". */movi a10, 0xffffffor a2, a2, a6and a2, a2, a10or a3, a3, a6and a3, a3, a10/* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */#if XCHAL_HAVE_MUL32_HIGHmull a6, a2, a3muluh a2, a2, a3#else/* Break the inputs into 16-bit chunks and compute 4 32-bit partialproducts. These partial products are:0 xl * yl1 xl * yh2 xh * yl3 xh * yhIf using the Mul16 or Mul32 multiplier options, these inputchunks must be stored in separate registers. For Mac16, theUMUL.AA.* opcodes can specify that the inputs come from eitherhalf of the registers, so there is no need to shift them outahead of time. If there is no multiply hardware, the 16-bitchunks can be extracted when setting up the arguments to theseparate multiply function. */#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL/* Calling a separate multiply function will clobber a0 and requiresuse of a8 as a temporary, so save those values now. (The functionuses a custom ABI so nothing else needs to be saved.) */s32i a0, sp, 0s32i a8, sp, 4#endif#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32#define a2h a4#define a3h a5/* Get the high halves of the inputs into registers. */srli a2h, a2, 16srli a3h, a3, 16#define a2l a2#define a3l a3#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16/* Clear the high halves of the inputs. This does not matterfor MUL16 because the high bits are ignored. */extui a2, a2, 0, 16extui a3, a3, 0, 16#endif#endif /* MUL16 || MUL32 */#if XCHAL_HAVE_MUL16#define do_mul(dst, xreg, xhalf, yreg, yhalf) \mul16u dst, xreg ## xhalf, yreg ## yhalf#elif XCHAL_HAVE_MUL32#define do_mul(dst, xreg, xhalf, yreg, yhalf) \mull dst, xreg ## xhalf, yreg ## yhalf#elif XCHAL_HAVE_MAC16/* The preprocessor insists on inserting a space when concatenating aftera period in the definition of do_mul below. These macros are a workaroundusing underscores instead of periods when doing the concatenation. */#define umul_aa_ll umul.aa.ll#define umul_aa_lh umul.aa.lh#define umul_aa_hl umul.aa.hl#define umul_aa_hh umul.aa.hh#define do_mul(dst, xreg, xhalf, yreg, yhalf) \umul_aa_ ## xhalf ## yhalf xreg, yreg; \rsr dst, ACCLO#else /* no multiply hardware */#define set_arg_l(dst, src) \extui dst, src, 0, 16#define set_arg_h(dst, src) \srli dst, src, 16#if __XTENSA_CALL0_ABI__#define do_mul(dst, xreg, xhalf, yreg, yhalf) \set_arg_ ## xhalf (a13, xreg); \set_arg_ ## yhalf (a14, yreg); \call0 .Lmul_mulsi3; \mov dst, a12#else#define do_mul(dst, xreg, xhalf, yreg, yhalf) \set_arg_ ## xhalf (a14, xreg); \set_arg_ ## yhalf (a15, yreg); \call12 .Lmul_mulsi3; \mov dst, a14#endif /* __XTENSA_CALL0_ABI__ */#endif /* no multiply hardware *//* Add pp1 and pp2 into a6 with carry-out in a9. */do_mul(a6, a2, l, a3, h) /* pp 1 */do_mul(a11, a2, h, a3, l) /* pp 2 */movi a9, 0add a6, a6, a11bgeu a6, a11, 1faddi a9, a9, 11:/* Shift the high half of a9/a6 into position in a9. Note thatthis value can be safely incremented without any carry-outs. */ssai 16src a9, a9, a6/* Compute the low word into a6. */do_mul(a11, a2, l, a3, l) /* pp 0 */sll a6, a6add a6, a6, a11bgeu a6, a11, 1faddi a9, a9, 11:/* Compute the high word into a2. */do_mul(a2, a2, h, a3, h) /* pp 3 */add a2, a2, a9#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL/* Restore values saved on the stack during the multiplication. */l32i a0, sp, 0l32i a8, sp, 4#endif#endif /* ! XCHAL_HAVE_MUL32_HIGH *//* Shift left by 9 bits, unless there was a carry-out from themultiply, in which case, shift by 8 bits and increment theexponent. */movi a4, 9srli a5, a2, 24 - 9beqz a5, 1faddi a4, a4, -1addi a8, a8, 11: ssl a4src a2, a2, a6sll a6, a6/* Subtract the extra bias from the exponent sum (plus one to accountfor the explicit "1.0" of the mantissa that will be added to theexponent in the final result). */movi a4, 0x80sub a8, a8, a4/* Check for over/underflow. The value in a8 is one less than thefinal exponent, so values in the range 0..fd are OK here. */movi a4, 0xfebgeu a8, a4, .Lmul_overflow.Lmul_round:/* Round. */bgez a6, .Lmul_roundedaddi a2, a2, 1slli a6, a6, 1beqz a6, .Lmul_exactlyhalf.Lmul_rounded:/* Add the exponent to the mantissa. */slli a8, a8, 23add a2, a2, a8.Lmul_addsign:/* Add the sign bit. */srli a7, a7, 31slli a7, a7, 31or a2, a2, a7.Lmul_done:#if __XTENSA_CALL0_ABI__l32i a12, sp, 16l32i a13, sp, 20l32i a14, sp, 24l32i a15, sp, 28addi sp, sp, 32#endifleaf_return.Lmul_exactlyhalf:/* Round down to the nearest even value. */srli a2, a2, 1slli a2, a2, 1j .Lmul_rounded.Lmul_overflow:bltz a8, .Lmul_underflow/* Return +/- Infinity. */movi a8, 0xffslli a2, a8, 23j .Lmul_addsign.Lmul_underflow:/* Create a subnormal value, where the exponent field contains zero,but the effective exponent is 1. The value of a8 is one less thanthe actual exponent, so just negate it to get the shift amount. */neg a8, a8mov a9, a6ssr a8bgeui a8, 32, .Lmul_flush_to_zero/* Shift a2 right. Any bits that are shifted out of a2 are savedin a6 (combined with the shifted-out bits currently in a6) forrounding the result. */sll a6, a2srl a2, a2/* Set the exponent to zero. */movi a8, 0/* Pack any nonzero bits shifted out into a6. */beqz a9, .Lmul_roundmovi a9, 1or a6, a6, a9j .Lmul_round.Lmul_flush_to_zero:/* Return zero with the appropriate sign bit. */srli a2, a7, 31slli a2, a2, 31j .Lmul_done#if XCHAL_NO_MUL/* For Xtensa processors with no multiply hardware, this simplifiedversion of _mulsi3 is used for multiplying 16-bit chunks ofthe floating-point mantissas. When using CALL0, this functionuses a custom ABI: the inputs are passed in a13 and a14, theresult is returned in a12, and a8 and a15 are clobbered. */.align 4.Lmul_mulsi3:leaf_entry sp, 16.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2movi \dst, 01: add \tmp1, \src2, \dstextui \tmp2, \src1, 0, 1movnez \dst, \tmp1, \tmp2do_addx2 \tmp1, \src2, \dst, \tmp1extui \tmp2, \src1, 1, 1movnez \dst, \tmp1, \tmp2do_addx4 \tmp1, \src2, \dst, \tmp1extui \tmp2, \src1, 2, 1movnez \dst, \tmp1, \tmp2do_addx8 \tmp1, \src2, \dst, \tmp1extui \tmp2, \src1, 3, 1movnez \dst, \tmp1, \tmp2srli \src1, \src1, 4slli \src2, \src2, 4bnez \src1, 1b.endm#if __XTENSA_CALL0_ABI__mul_mulsi3_body a12, a13, a14, a15, a8#else/* The result will be written into a2, so save that argument in a4. */mov a4, a2mul_mulsi3_body a2, a4, a3, a5, a6#endifleaf_return#endif /* XCHAL_NO_MUL */#endif /* L_mulsf3 */#ifdef L_divsf3/* Division */__divsf3_aux:/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).(This code is placed before the start of the function just tokeep it in range of the limited branch displacements.) */.Ldiv_yexpzero:/* Clear the sign bit of y. */slli a3, a3, 1srli a3, a3, 1/* Check for division by zero. */beqz a3, .Ldiv_yzero/* Normalize y. Adjust the exponent in a9. */do_nsau a10, a3, a4, a5addi a10, a10, -8ssl a10sll a3, a3movi a9, 1sub a9, a9, a10j .Ldiv_ynormalized.Ldiv_yzero:/* y is zero. Return NaN if x is also zero; otherwise, infinity. */slli a4, a2, 1srli a4, a4, 1srli a2, a7, 31slli a2, a2, 31or a2, a2, a6bnez a4, 1fmovi a4, 0x400000 /* make it a quiet NaN */or a2, a2, a41: leaf_return.Ldiv_xexpzero:/* Clear the sign bit of x. */slli a2, a2, 1srli a2, a2, 1/* If x is zero, return zero. */beqz a2, .Ldiv_return_zero/* Normalize x. Adjust the exponent in a8. */do_nsau a10, a2, a4, a5addi a10, a10, -8ssl a10sll a2, a2movi a8, 1sub a8, a8, a10j .Ldiv_xnormalized.Ldiv_return_zero:/* Return zero with the appropriate sign bit. */srli a2, a7, 31slli a2, a2, 31leaf_return.Ldiv_xnan_or_inf:/* Set the sign bit of the result. */srli a7, a3, 31slli a7, a7, 31xor a2, a2, a7/* If y is NaN or Inf, return NaN. */bnall a3, a6, 1fmovi a4, 0x400000 /* make it a quiet NaN */or a2, a2, a41: leaf_return.Ldiv_ynan_or_inf:/* If y is Infinity, return zero. */slli a8, a3, 9beqz a8, .Ldiv_return_zero/* y is NaN; return it. */mov a2, a3leaf_return.align 4.global __divsf3.type __divsf3, @function__divsf3:leaf_entry sp, 16movi a6, 0x7f800000/* Get the sign of the result. */xor a7, a2, a3/* Check for NaN and infinity. */ball a2, a6, .Ldiv_xnan_or_infball a3, a6, .Ldiv_ynan_or_inf/* Extract the exponents. */extui a8, a2, 23, 8extui a9, a3, 23, 8beqz a9, .Ldiv_yexpzero.Ldiv_ynormalized:beqz a8, .Ldiv_xexpzero.Ldiv_xnormalized:/* Subtract the exponents. */sub a8, a8, a9/* Replace sign/exponent fields with explicit "1.0". */movi a10, 0xffffffor a2, a2, a6and a2, a2, a10or a3, a3, a6and a3, a3, a10/* The first digit of the mantissa division must be a one.Shift x (and adjust the exponent) as needed to make this true. */bltu a3, a2, 1fslli a2, a2, 1addi a8, a8, -11:/* Do the first subtraction and shift. */sub a2, a2, a3slli a2, a2, 1/* Put the quotient into a10. */movi a10, 1/* Divide one bit at a time for 23 bits. */movi a9, 23#if XCHAL_HAVE_LOOPSloop a9, .Ldiv_loopend#endif.Ldiv_loop:/* Shift the quotient << 1. */slli a10, a10, 1/* Is this digit a 0 or 1? */bltu a2, a3, 1f/* Output a 1 and subtract. */addi a10, a10, 1sub a2, a2, a3/* Shift the dividend << 1. */1: slli a2, a2, 1#if !XCHAL_HAVE_LOOPSaddi a9, a9, -1bnez a9, .Ldiv_loop#endif.Ldiv_loopend:/* Add the exponent bias (less one to account for the explicit "1.0"of the mantissa that will be added to the exponent in the finalresult). */addi a8, a8, 0x7e/* Check for over/underflow. The value in a8 is one less than thefinal exponent, so values in the range 0..fd are OK here. */movi a4, 0xfebgeu a8, a4, .Ldiv_overflow.Ldiv_round:/* Round. The remainder (<< 1) is in a2. */bltu a2, a3, .Ldiv_roundedaddi a10, a10, 1beq a2, a3, .Ldiv_exactlyhalf.Ldiv_rounded:/* Add the exponent to the mantissa. */slli a8, a8, 23add a2, a10, a8.Ldiv_addsign:/* Add the sign bit. */srli a7, a7, 31slli a7, a7, 31or a2, a2, a7leaf_return.Ldiv_overflow:bltz a8, .Ldiv_underflow/* Return +/- Infinity. */addi a8, a4, 1 /* 0xff */slli a2, a8, 23j .Ldiv_addsign.Ldiv_exactlyhalf:/* Remainder is exactly half the divisor. Round even. */srli a10, a10, 1slli a10, a10, 1j .Ldiv_rounded.Ldiv_underflow:/* Create a subnormal value, where the exponent field contains zero,but the effective exponent is 1. The value of a8 is one less thanthe actual exponent, so just negate it to get the shift amount. */neg a8, a8ssr a8bgeui a8, 32, .Ldiv_flush_to_zero/* Shift a10 right. Any bits that are shifted out of a10 aresaved in a6 for rounding the result. */sll a6, a10srl a10, a10/* Set the exponent to zero. */movi a8, 0/* Pack any nonzero remainder (in a2) into a6. */beqz a2, 1fmovi a9, 1or a6, a6, a9/* Round a10 based on the bits shifted out into a6. */1: bgez a6, .Ldiv_roundedaddi a10, a10, 1slli a6, a6, 1bnez a6, .Ldiv_roundedsrli a10, a10, 1slli a10, a10, 1j .Ldiv_rounded.Ldiv_flush_to_zero:/* Return zero with the appropriate sign bit. */srli a2, a7, 31slli a2, a2, 31leaf_return#endif /* L_divsf3 */#ifdef L_cmpsf2/* Equal and Not Equal */.align 4.global __eqsf2.global __nesf2.set __nesf2, __eqsf2.type __eqsf2, @function__eqsf2:leaf_entry sp, 16bne a2, a3, 4f/* The values are equal but NaN != NaN. Check the exponent. */movi a6, 0x7f800000ball a2, a6, 3f/* Equal. */movi a2, 0leaf_return/* Not equal. */2: movi a2, 1leaf_return/* Check if the mantissas are nonzero. */3: slli a7, a2, 9j 5f/* Check if x and y are zero with different signs. */4: or a7, a2, a3slli a7, a7, 1/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissaor x when exponent(x) = 0x7f8 and x == y. */5: movi a2, 0movi a3, 1movnez a2, a3, a7leaf_return/* Greater Than */.align 4.global __gtsf2.type __gtsf2, @function__gtsf2:leaf_entry sp, 16movi a6, 0x7f800000ball a2, a6, 2f1: bnall a3, a6, .Lle_cmp/* Check if y is a NaN. */slli a7, a3, 9beqz a7, .Lle_cmpmovi a2, 0leaf_return/* Check if x is a NaN. */2: slli a7, a2, 9beqz a7, 1bmovi a2, 0leaf_return/* Less Than or Equal */.align 4.global __lesf2.type __lesf2, @function__lesf2:leaf_entry sp, 16movi a6, 0x7f800000ball a2, a6, 2f1: bnall a3, a6, .Lle_cmp/* Check if y is a NaN. */slli a7, a3, 9beqz a7, .Lle_cmpmovi a2, 1leaf_return/* Check if x is a NaN. */2: slli a7, a2, 9beqz a7, 1bmovi a2, 1leaf_return.Lle_cmp:/* Check if x and y have different signs. */xor a7, a2, a3bltz a7, .Lle_diff_signs/* Check if x is negative. */bltz a2, .Lle_xneg/* Check if x <= y. */bltu a3, a2, 5f4: movi a2, 0leaf_return.Lle_xneg:/* Check if y <= x. */bgeu a2, a3, 4b5: movi a2, 1leaf_return.Lle_diff_signs:bltz a2, 4b/* Check if both x and y are zero. */or a7, a2, a3slli a7, a7, 1movi a2, 1movi a3, 0moveqz a2, a3, a7leaf_return/* Greater Than or Equal */.align 4.global __gesf2.type __gesf2, @function__gesf2:leaf_entry sp, 16movi a6, 0x7f800000ball a2, a6, 2f1: bnall a3, a6, .Llt_cmp/* Check if y is a NaN. */slli a7, a3, 9beqz a7, .Llt_cmpmovi a2, -1leaf_return/* Check if x is a NaN. */2: slli a7, a2, 9beqz a7, 1bmovi a2, -1leaf_return/* Less Than */.align 4.global __ltsf2.type __ltsf2, @function__ltsf2:leaf_entry sp, 16movi a6, 0x7f800000ball a2, a6, 2f1: bnall a3, a6, .Llt_cmp/* Check if y is a NaN. */slli a7, a3, 9beqz a7, .Llt_cmpmovi a2, 0leaf_return/* Check if x is a NaN. */2: slli a7, a2, 9beqz a7, 1bmovi a2, 0leaf_return.Llt_cmp:/* Check if x and y have different signs. */xor a7, a2, a3bltz a7, .Llt_diff_signs/* Check if x is negative. */bltz a2, .Llt_xneg/* Check if x < y. */bgeu a2, a3, 5f4: movi a2, -1leaf_return.Llt_xneg:/* Check if y < x. */bltu a3, a2, 4b5: movi a2, 0leaf_return.Llt_diff_signs:bgez a2, 5b/* Check if both x and y are nonzero. */or a7, a2, a3slli a7, a7, 1movi a2, 0movi a3, -1movnez a2, a3, a7leaf_return/* Unordered */.align 4.global __unordsf2.type __unordsf2, @function__unordsf2:leaf_entry sp, 16movi a6, 0x7f800000ball a2, a6, 3f1: ball a3, a6, 4f2: movi a2, 0leaf_return3: slli a7, a2, 9beqz a7, 1bmovi a2, 1leaf_return4: slli a7, a3, 9beqz a7, 2bmovi a2, 1leaf_return#endif /* L_cmpsf2 */#ifdef L_fixsfsi.align 4.global __fixsfsi.type __fixsfsi, @function__fixsfsi:leaf_entry sp, 16/* Check for NaN and Infinity. */movi a6, 0x7f800000ball a2, a6, .Lfixsfsi_nan_or_inf/* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */extui a4, a2, 23, 8addi a4, a4, -0x7ebgei a4, 32, .Lfixsfsi_maxintblti a4, 1, .Lfixsfsi_zero/* Add explicit "1.0" and shift << 8. */or a7, a2, a6slli a5, a7, 8/* Shift back to the right, based on the exponent. */ssl a4 /* shift by 32 - a4 */srl a5, a5/* Negate the result if sign != 0. */neg a2, a5movgez a2, a5, a7leaf_return.Lfixsfsi_nan_or_inf:/* Handle Infinity and NaN. */slli a4, a2, 9beqz a4, .Lfixsfsi_maxint/* Translate NaN to +maxint. */movi a2, 0.Lfixsfsi_maxint:slli a4, a6, 8 /* 0x80000000 */addi a5, a4, -1 /* 0x7fffffff */movgez a4, a5, a2mov a2, a4leaf_return.Lfixsfsi_zero:movi a2, 0leaf_return#endif /* L_fixsfsi */#ifdef L_fixsfdi.align 4.global __fixsfdi.type __fixsfdi, @function__fixsfdi:leaf_entry sp, 16/* Check for NaN and Infinity. */movi a6, 0x7f800000ball a2, a6, .Lfixsfdi_nan_or_inf/* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */extui a4, a2, 23, 8addi a4, a4, -0x7ebgei a4, 64, .Lfixsfdi_maxintblti a4, 1, .Lfixsfdi_zero/* Add explicit "1.0" and shift << 8. */or a7, a2, a6slli xh, a7, 8/* Shift back to the right, based on the exponent. */ssl a4 /* shift by 64 - a4 */bgei a4, 32, .Lfixsfdi_smallshiftsrl xl, xhmovi xh, 0.Lfixsfdi_shifted:/* Negate the result if sign != 0. */bgez a7, 1fneg xl, xlneg xh, xhbeqz xl, 1faddi xh, xh, -11: leaf_return.Lfixsfdi_smallshift:movi xl, 0sll xl, xhsrl xh, xhj .Lfixsfdi_shifted.Lfixsfdi_nan_or_inf:/* Handle Infinity and NaN. */slli a4, a2, 9beqz a4, .Lfixsfdi_maxint/* Translate NaN to +maxint. */movi a2, 0.Lfixsfdi_maxint:slli a7, a6, 8 /* 0x80000000 */bgez a2, 1fmov xh, a7movi xl, 0leaf_return1: addi xh, a7, -1 /* 0x7fffffff */movi xl, -1leaf_return.Lfixsfdi_zero:movi xh, 0movi xl, 0leaf_return#endif /* L_fixsfdi */#ifdef L_fixunssfsi.align 4.global __fixunssfsi.type __fixunssfsi, @function__fixunssfsi:leaf_entry sp, 16/* Check for NaN and Infinity. */movi a6, 0x7f800000ball a2, a6, .Lfixunssfsi_nan_or_inf/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */extui a4, a2, 23, 8addi a4, a4, -0x7fbgei a4, 32, .Lfixunssfsi_maxintbltz a4, .Lfixunssfsi_zero/* Add explicit "1.0" and shift << 8. */or a7, a2, a6slli a5, a7, 8/* Shift back to the right, based on the exponent. */addi a4, a4, 1beqi a4, 32, .Lfixunssfsi_bigexpssl a4 /* shift by 32 - a4 */srl a5, a5/* Negate the result if sign != 0. */neg a2, a5movgez a2, a5, a7leaf_return.Lfixunssfsi_nan_or_inf:/* Handle Infinity and NaN. */slli a4, a2, 9beqz a4, .Lfixunssfsi_maxint/* Translate NaN to 0xffffffff. */movi a2, -1leaf_return.Lfixunssfsi_maxint:slli a4, a6, 8 /* 0x80000000 */movi a5, -1 /* 0xffffffff */movgez a4, a5, a2mov a2, a4leaf_return.Lfixunssfsi_zero:movi a2, 0leaf_return.Lfixunssfsi_bigexp:/* Handle unsigned maximum exponent case. */bltz a2, 1fmov a2, a5 /* no shift needed */leaf_return/* Return 0x80000000 if negative. */1: slli a2, a6, 8leaf_return#endif /* L_fixunssfsi */#ifdef L_fixunssfdi.align 4.global __fixunssfdi.type __fixunssfdi, @function__fixunssfdi:leaf_entry sp, 16/* Check for NaN and Infinity. */movi a6, 0x7f800000ball a2, a6, .Lfixunssfdi_nan_or_inf/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */extui a4, a2, 23, 8addi a4, a4, -0x7fbgei a4, 64, .Lfixunssfdi_maxintbltz a4, .Lfixunssfdi_zero/* Add explicit "1.0" and shift << 8. */or a7, a2, a6slli xh, a7, 8/* Shift back to the right, based on the exponent. */addi a4, a4, 1beqi a4, 64, .Lfixunssfdi_bigexpssl a4 /* shift by 64 - a4 */bgei a4, 32, .Lfixunssfdi_smallshiftsrl xl, xhmovi xh, 0.Lfixunssfdi_shifted:/* Negate the result if sign != 0. */bgez a7, 1fneg xl, xlneg xh, xhbeqz xl, 1faddi xh, xh, -11: leaf_return.Lfixunssfdi_smallshift:movi xl, 0src xl, xh, xlsrl xh, xhj .Lfixunssfdi_shifted.Lfixunssfdi_nan_or_inf:/* Handle Infinity and NaN. */slli a4, a2, 9beqz a4, .Lfixunssfdi_maxint/* Translate NaN to 0xffffffff.... */1: movi xh, -1movi xl, -1leaf_return.Lfixunssfdi_maxint:bgez a2, 1b2: slli xh, a6, 8 /* 0x80000000 */movi xl, 0leaf_return.Lfixunssfdi_zero:movi xh, 0movi xl, 0leaf_return.Lfixunssfdi_bigexp:/* Handle unsigned maximum exponent case. */bltz a7, 2bmovi xl, 0leaf_return /* no shift needed */#endif /* L_fixunssfdi */#ifdef L_floatsisf.align 4.global __floatunsisf.type __floatunsisf, @function__floatunsisf:leaf_entry sp, 16beqz a2, .Lfloatsisf_return/* Set the sign to zero and jump to the floatsisf code. */movi a7, 0j .Lfloatsisf_normalize.align 4.global __floatsisf.type __floatsisf, @function__floatsisf:leaf_entry sp, 16/* Check for zero. */beqz a2, .Lfloatsisf_return/* Save the sign. */extui a7, a2, 31, 1/* Get the absolute value. */#if XCHAL_HAVE_ABSabs a2, a2#elseneg a4, a2movltz a2, a4, a2#endif.Lfloatsisf_normalize:/* Normalize with the first 1 bit in the msb. */do_nsau a4, a2, a5, a6ssl a4sll a5, a2/* Shift the mantissa into position, with rounding bits in a6. */srli a2, a5, 8slli a6, a5, (32 - 8)/* Set the exponent. */movi a5, 0x9d /* 0x7e + 31 */sub a5, a5, a4slli a5, a5, 23add a2, a2, a5/* Add the sign. */slli a7, a7, 31or a2, a2, a7/* Round up if the leftover fraction is >= 1/2. */bgez a6, .Lfloatsisf_returnaddi a2, a2, 1 /* Overflow to the exponent is OK. *//* Check if the leftover fraction is exactly 1/2. */slli a6, a6, 1beqz a6, .Lfloatsisf_exactlyhalf.Lfloatsisf_return:leaf_return.Lfloatsisf_exactlyhalf:/* Round down to the nearest even value. */srli a2, a2, 1slli a2, a2, 1leaf_return#endif /* L_floatsisf */#ifdef L_floatdisf.align 4.global __floatundisf.type __floatundisf, @function__floatundisf:leaf_entry sp, 16/* Check for zero. */or a4, xh, xlbeqz a4, 2f/* Set the sign to zero and jump to the floatdisf code. */movi a7, 0j .Lfloatdisf_normalize.align 4.global __floatdisf.type __floatdisf, @function__floatdisf:leaf_entry sp, 16/* Check for zero. */or a4, xh, xlbeqz a4, 2f/* Save the sign. */extui a7, xh, 31, 1/* Get the absolute value. */bgez xh, .Lfloatdisf_normalizeneg xl, xlneg xh, xhbeqz xl, .Lfloatdisf_normalizeaddi xh, xh, -1.Lfloatdisf_normalize:/* Normalize with the first 1 bit in the msb of xh. */beqz xh, .Lfloatdisf_bigshiftdo_nsau a4, xh, a5, a6ssl a4src xh, xh, xlsll xl, xl.Lfloatdisf_shifted:/* Shift the mantissa into position, with rounding bits in a6. */ssai 8sll a5, xlsrc a6, xh, xlsrl xh, xhbeqz a5, 1fmovi a5, 1or a6, a6, a51:/* Set the exponent. */movi a5, 0xbd /* 0x7e + 63 */sub a5, a5, a4slli a5, a5, 23add a2, xh, a5/* Add the sign. */slli a7, a7, 31or a2, a2, a7/* Round up if the leftover fraction is >= 1/2. */bgez a6, 2faddi a2, a2, 1 /* Overflow to the exponent is OK. *//* Check if the leftover fraction is exactly 1/2. */slli a6, a6, 1beqz a6, .Lfloatdisf_exactlyhalf2: leaf_return.Lfloatdisf_bigshift:/* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */do_nsau a4, xl, a5, a6ssl a4sll xh, xlmovi xl, 0addi a4, a4, 32j .Lfloatdisf_shifted.Lfloatdisf_exactlyhalf:/* Round down to the nearest even value. */srli a2, a2, 1slli a2, a2, 1leaf_return#endif /* L_floatdisf */
