/* Assembly functions for the Xtensa version of libgcc1.
|
/* Assembly functions for the Xtensa version of libgcc1.
|
Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
|
Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
|
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
|
|
This file is part of GCC.
|
This file is part of GCC.
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
GCC is free software; you can redistribute it and/or modify it under
|
the terms of the GNU General Public License as published by the Free
|
the terms of the GNU General Public License as published by the Free
|
Software Foundation; either version 2, or (at your option) any later
|
Software Foundation; either version 2, or (at your option) any later
|
version.
|
version.
|
|
|
In addition to the permissions in the GNU General Public License, the
|
In addition to the permissions in the GNU General Public License, the
|
Free Software Foundation gives you unlimited permission to link the
|
Free Software Foundation gives you unlimited permission to link the
|
compiled version of this file into combinations with other programs,
|
compiled version of this file into combinations with other programs,
|
and to distribute those combinations without any restriction coming
|
and to distribute those combinations without any restriction coming
|
from the use of this file. (The General Public License restrictions
|
from the use of this file. (The General Public License restrictions
|
do apply in other respects; for example, they cover modification of
|
do apply in other respects; for example, they cover modification of
|
the file, and distribution when not linked into a combine
|
the file, and distribution when not linked into a combine
|
executable.)
|
executable.)
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
for more details.
|
for more details.
|
|
|
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
along with GCC; see the file COPYING. If not, write to the Free
|
along with GCC; see the file COPYING. If not, write to the Free
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
02110-1301, USA. */
|
02110-1301, USA. */
|
|
|
#include "xtensa-config.h"
|
#include "xtensa-config.h"
|
|
|
# Define macros for the ABS and ADDX* instructions to handle cases
|
# Define macros for the ABS and ADDX* instructions to handle cases
|
# where they are not included in the Xtensa processor configuration.
|
# where they are not included in the Xtensa processor configuration.
|
|
|
.macro do_abs dst, src, tmp
|
.macro do_abs dst, src, tmp
|
#if XCHAL_HAVE_ABS
|
#if XCHAL_HAVE_ABS
|
abs \dst, \src
|
abs \dst, \src
|
#else
|
#else
|
neg \tmp, \src
|
neg \tmp, \src
|
movgez \tmp, \src, \src
|
movgez \tmp, \src, \src
|
mov \dst, \tmp
|
mov \dst, \tmp
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
.macro do_addx2 dst, as, at, tmp
|
.macro do_addx2 dst, as, at, tmp
|
#if XCHAL_HAVE_ADDX
|
#if XCHAL_HAVE_ADDX
|
addx2 \dst, \as, \at
|
addx2 \dst, \as, \at
|
#else
|
#else
|
slli \tmp, \as, 1
|
slli \tmp, \as, 1
|
add \dst, \tmp, \at
|
add \dst, \tmp, \at
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
.macro do_addx4 dst, as, at, tmp
|
.macro do_addx4 dst, as, at, tmp
|
#if XCHAL_HAVE_ADDX
|
#if XCHAL_HAVE_ADDX
|
addx4 \dst, \as, \at
|
addx4 \dst, \as, \at
|
#else
|
#else
|
slli \tmp, \as, 2
|
slli \tmp, \as, 2
|
add \dst, \tmp, \at
|
add \dst, \tmp, \at
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
.macro do_addx8 dst, as, at, tmp
|
.macro do_addx8 dst, as, at, tmp
|
#if XCHAL_HAVE_ADDX
|
#if XCHAL_HAVE_ADDX
|
addx8 \dst, \as, \at
|
addx8 \dst, \as, \at
|
#else
|
#else
|
slli \tmp, \as, 3
|
slli \tmp, \as, 3
|
add \dst, \tmp, \at
|
add \dst, \tmp, \at
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
# Define macros for leaf function entry and return, supporting either the
|
# Define macros for leaf function entry and return, supporting either the
|
# standard register windowed ABI or the non-windowed call0 ABI. These
|
# standard register windowed ABI or the non-windowed call0 ABI. These
|
# macros do not allocate any extra stack space, so they only work for
|
# macros do not allocate any extra stack space, so they only work for
|
# leaf functions that do not need to spill anything to the stack.
|
# leaf functions that do not need to spill anything to the stack.
|
|
|
.macro leaf_entry reg, size
|
.macro leaf_entry reg, size
|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
|
entry \reg, \size
|
entry \reg, \size
|
#else
|
#else
|
/* do nothing */
|
/* do nothing */
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
.macro leaf_return
|
.macro leaf_return
|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
|
retw
|
retw
|
#else
|
#else
|
ret
|
ret
|
#endif
|
#endif
|
.endm
|
.endm
|
|
|
|
|
#ifdef L_mulsi3
|
#ifdef L_mulsi3
|
.align 4
|
.align 4
|
.global __mulsi3
|
.global __mulsi3
|
.type __mulsi3,@function
|
.type __mulsi3,@function
|
__mulsi3:
|
__mulsi3:
|
leaf_entry sp, 16
|
leaf_entry sp, 16
|
|
|
#if XCHAL_HAVE_MUL16
|
#if XCHAL_HAVE_MUL16
|
or a4, a2, a3
|
or a4, a2, a3
|
srai a4, a4, 16
|
srai a4, a4, 16
|
bnez a4, .LMUL16
|
bnez a4, .LMUL16
|
mul16u a2, a2, a3
|
mul16u a2, a2, a3
|
leaf_return
|
leaf_return
|
.LMUL16:
|
.LMUL16:
|
srai a4, a2, 16
|
srai a4, a2, 16
|
srai a5, a3, 16
|
srai a5, a3, 16
|
mul16u a7, a4, a3
|
mul16u a7, a4, a3
|
mul16u a6, a5, a2
|
mul16u a6, a5, a2
|
mul16u a4, a2, a3
|
mul16u a4, a2, a3
|
add a7, a7, a6
|
add a7, a7, a6
|
slli a7, a7, 16
|
slli a7, a7, 16
|
add a2, a7, a4
|
add a2, a7, a4
|
|
|
#elif XCHAL_HAVE_MAC16
|
#elif XCHAL_HAVE_MAC16
|
mul.aa.hl a2, a3
|
mul.aa.hl a2, a3
|
mula.aa.lh a2, a3
|
mula.aa.lh a2, a3
|
rsr a5, ACCLO
|
rsr a5, ACCLO
|
umul.aa.ll a2, a3
|
umul.aa.ll a2, a3
|
rsr a4, ACCLO
|
rsr a4, ACCLO
|
slli a5, a5, 16
|
slli a5, a5, 16
|
add a2, a4, a5
|
add a2, a4, a5
|
|
|
#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
|
#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
|
|
|
# Multiply one bit at a time, but unroll the loop 4x to better
|
# Multiply one bit at a time, but unroll the loop 4x to better
|
# exploit the addx instructions and avoid overhead.
|
# exploit the addx instructions and avoid overhead.
|
# Peel the first iteration to save a cycle on init.
|
# Peel the first iteration to save a cycle on init.
|
|
|
# Avoid negative numbers.
|
# Avoid negative numbers.
|
xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
|
xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
|
do_abs a3, a3, a6
|
do_abs a3, a3, a6
|
do_abs a2, a2, a6
|
do_abs a2, a2, a6
|
|
|
# Swap so the second argument is smaller.
|
# Swap so the second argument is smaller.
|
sub a7, a2, a3
|
sub a7, a2, a3
|
mov a4, a3
|
mov a4, a3
|
movgez a4, a2, a7 # a4 = max(a2, a3)
|
movgez a4, a2, a7 # a4 = max(a2, a3)
|
movltz a3, a2, a7 # a3 = min(a2, a3)
|
movltz a3, a2, a7 # a3 = min(a2, a3)
|
|
|
movi a2, 0
|
movi a2, 0
|
extui a6, a3, 0, 1
|
extui a6, a3, 0, 1
|
movnez a2, a4, a6
|
movnez a2, a4, a6
|
|
|
do_addx2 a7, a4, a2, a7
|
do_addx2 a7, a4, a2, a7
|
extui a6, a3, 1, 1
|
extui a6, a3, 1, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
do_addx4 a7, a4, a2, a7
|
do_addx4 a7, a4, a2, a7
|
extui a6, a3, 2, 1
|
extui a6, a3, 2, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
do_addx8 a7, a4, a2, a7
|
do_addx8 a7, a4, a2, a7
|
extui a6, a3, 3, 1
|
extui a6, a3, 3, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
bgeui a3, 16, .Lmult_main_loop
|
bgeui a3, 16, .Lmult_main_loop
|
neg a3, a2
|
neg a3, a2
|
movltz a2, a3, a5
|
movltz a2, a3, a5
|
leaf_return
|
leaf_return
|
|
|
.align 4
|
.align 4
|
.Lmult_main_loop:
|
.Lmult_main_loop:
|
srli a3, a3, 4
|
srli a3, a3, 4
|
slli a4, a4, 4
|
slli a4, a4, 4
|
|
|
add a7, a4, a2
|
add a7, a4, a2
|
extui a6, a3, 0, 1
|
extui a6, a3, 0, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
do_addx2 a7, a4, a2, a7
|
do_addx2 a7, a4, a2, a7
|
extui a6, a3, 1, 1
|
extui a6, a3, 1, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
do_addx4 a7, a4, a2, a7
|
do_addx4 a7, a4, a2, a7
|
extui a6, a3, 2, 1
|
extui a6, a3, 2, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
do_addx8 a7, a4, a2, a7
|
do_addx8 a7, a4, a2, a7
|
extui a6, a3, 3, 1
|
extui a6, a3, 3, 1
|
movnez a2, a7, a6
|
movnez a2, a7, a6
|
|
|
bgeui a3, 16, .Lmult_main_loop
|
bgeui a3, 16, .Lmult_main_loop
|
|
|
neg a3, a2
|
neg a3, a2
|
movltz a2, a3, a5
|
movltz a2, a3, a5
|
|
|
#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
|
#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
|
|
|
leaf_return
|
leaf_return
|
.size __mulsi3,.-__mulsi3
|
.size __mulsi3,.-__mulsi3
|
|
|
#endif /* L_mulsi3 */
|
#endif /* L_mulsi3 */
|
|
|
|
|
# Define a macro for the NSAU (unsigned normalize shift amount)
|
# Define a macro for the NSAU (unsigned normalize shift amount)
|
# instruction, which computes the number of leading zero bits,
|
# instruction, which computes the number of leading zero bits,
|
# to handle cases where it is not included in the Xtensa processor
|
# to handle cases where it is not included in the Xtensa processor
|
# configuration.
|
# configuration.
|
|
|
.macro do_nsau cnt, val, tmp, a
|
.macro do_nsau cnt, val, tmp, a
|
#if XCHAL_HAVE_NSA
|
#if XCHAL_HAVE_NSA
|
nsau \cnt, \val
|
nsau \cnt, \val
|
#else
|
#else
|
mov \a, \val
|
mov \a, \val
|
movi \cnt, 0
|
movi \cnt, 0
|
extui \tmp, \a, 16, 16
|
extui \tmp, \a, 16, 16
|
bnez \tmp, 0f
|
bnez \tmp, 0f
|
movi \cnt, 16
|
movi \cnt, 16
|
slli \a, \a, 16
|
slli \a, \a, 16
|
0:
|
0:
|
extui \tmp, \a, 24, 8
|
extui \tmp, \a, 24, 8
|
bnez \tmp, 1f
|
bnez \tmp, 1f
|
addi \cnt, \cnt, 8
|
addi \cnt, \cnt, 8
|
slli \a, \a, 8
|
slli \a, \a, 8
|
1:
|
1:
|
movi \tmp, __nsau_data
|
movi \tmp, __nsau_data
|
extui \a, \a, 24, 8
|
extui \a, \a, 24, 8
|
add \tmp, \tmp, \a
|
add \tmp, \tmp, \a
|
l8ui \tmp, \tmp, 0
|
l8ui \tmp, \tmp, 0
|
add \cnt, \cnt, \tmp
|
add \cnt, \cnt, \tmp
|
#endif /* !XCHAL_HAVE_NSA */
|
#endif /* !XCHAL_HAVE_NSA */
|
.endm
|
.endm
|
|
|
#ifdef L_nsau
|
#ifdef L_nsau
|
.section .rodata
|
.section .rodata
|
.align 4
|
.align 4
|
.global __nsau_data
|
.global __nsau_data
|
.type __nsau_data,@object
|
.type __nsau_data,@object
|
__nsau_data:
|
__nsau_data:
|
#if !XCHAL_HAVE_NSA
|
#if !XCHAL_HAVE_NSA
|
.byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
|
.byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
|
.byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
.byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
#endif /* !XCHAL_HAVE_NSA */
|
#endif /* !XCHAL_HAVE_NSA */
|
.size __nsau_data,.-__nsau_data
|
.size __nsau_data,.-__nsau_data
|
.hidden __nsau_data
|
.hidden __nsau_data
|
#endif /* L_nsau */
|
#endif /* L_nsau */
|
|
|
|
|
#ifdef L_udivsi3
|
#ifdef L_udivsi3
|
.align 4
|
.align 4
|
.global __udivsi3
|
.global __udivsi3
|
.type __udivsi3,@function
|
.type __udivsi3,@function
|
__udivsi3:
|
__udivsi3:
|
leaf_entry sp, 16
|
leaf_entry sp, 16
|
bltui a3, 2, .Lle_one # check if the divisor <= 1
|
bltui a3, 2, .Lle_one # check if the divisor <= 1
|
|
|
mov a6, a2 # keep dividend in a6
|
mov a6, a2 # keep dividend in a6
|
do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
|
do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
|
do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
|
do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
|
bgeu a5, a4, .Lspecial
|
bgeu a5, a4, .Lspecial
|
|
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift
|
ssl a4
|
ssl a4
|
sll a3, a3 # divisor <<= count
|
sll a3, a3 # divisor <<= count
|
movi a2, 0 # quotient = 0
|
movi a2, 0 # quotient = 0
|
|
|
# test-subtract-and-shift loop; one quotient bit on each iteration
|
# test-subtract-and-shift loop; one quotient bit on each iteration
|
#if XCHAL_HAVE_LOOPS
|
#if XCHAL_HAVE_LOOPS
|
loopnez a4, .Lloopend
|
loopnez a4, .Lloopend
|
#endif /* XCHAL_HAVE_LOOPS */
|
#endif /* XCHAL_HAVE_LOOPS */
|
.Lloop:
|
.Lloop:
|
bltu a6, a3, .Lzerobit
|
bltu a6, a3, .Lzerobit
|
sub a6, a6, a3
|
sub a6, a6, a3
|
addi a2, a2, 1
|
addi a2, a2, 1
|
.Lzerobit:
|
.Lzerobit:
|
slli a2, a2, 1
|
slli a2, a2, 1
|
srli a3, a3, 1
|
srli a3, a3, 1
|
#if !XCHAL_HAVE_LOOPS
|
#if !XCHAL_HAVE_LOOPS
|
addi a4, a4, -1
|
addi a4, a4, -1
|
bnez a4, .Lloop
|
bnez a4, .Lloop
|
#endif /* !XCHAL_HAVE_LOOPS */
|
#endif /* !XCHAL_HAVE_LOOPS */
|
.Lloopend:
|
.Lloopend:
|
|
|
bltu a6, a3, .Lreturn
|
bltu a6, a3, .Lreturn
|
addi a2, a2, 1 # increment quotient if dividend >= divisor
|
addi a2, a2, 1 # increment quotient if dividend >= divisor
|
.Lreturn:
|
.Lreturn:
|
leaf_return
|
leaf_return
|
|
|
.Lle_one:
|
.Lle_one:
|
beqz a3, .Lerror # if divisor == 1, return the dividend
|
beqz a3, .Lerror # if divisor == 1, return the dividend
|
leaf_return
|
leaf_return
|
|
|
.Lspecial:
|
.Lspecial:
|
# return dividend >= divisor
|
# return dividend >= divisor
|
bltu a6, a3, .Lreturn0
|
bltu a6, a3, .Lreturn0
|
movi a2, 1
|
movi a2, 1
|
leaf_return
|
leaf_return
|
|
|
.Lerror:
|
.Lerror:
|
# just return 0; could throw an exception
|
# just return 0; could throw an exception
|
|
|
.Lreturn0:
|
.Lreturn0:
|
movi a2, 0
|
movi a2, 0
|
leaf_return
|
leaf_return
|
.size __udivsi3,.-__udivsi3
|
.size __udivsi3,.-__udivsi3
|
|
|
#endif /* L_udivsi3 */
|
#endif /* L_udivsi3 */
|
|
|
|
|
#ifdef L_divsi3
|
#ifdef L_divsi3
|
.align 4
|
.align 4
|
.global __divsi3
|
.global __divsi3
|
.type __divsi3,@function
|
.type __divsi3,@function
|
__divsi3:
|
__divsi3:
|
leaf_entry sp, 16
|
leaf_entry sp, 16
|
xor a7, a2, a3 # sign = dividend ^ divisor
|
xor a7, a2, a3 # sign = dividend ^ divisor
|
do_abs a6, a2, a4 # udividend = abs(dividend)
|
do_abs a6, a2, a4 # udividend = abs(dividend)
|
do_abs a3, a3, a4 # udivisor = abs(divisor)
|
do_abs a3, a3, a4 # udivisor = abs(divisor)
|
bltui a3, 2, .Lle_one # check if udivisor <= 1
|
bltui a3, 2, .Lle_one # check if udivisor <= 1
|
do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
|
do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
|
do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
|
do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
|
bgeu a5, a4, .Lspecial
|
bgeu a5, a4, .Lspecial
|
|
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift
|
ssl a4
|
ssl a4
|
sll a3, a3 # udivisor <<= count
|
sll a3, a3 # udivisor <<= count
|
movi a2, 0 # quotient = 0
|
movi a2, 0 # quotient = 0
|
|
|
# test-subtract-and-shift loop; one quotient bit on each iteration
|
# test-subtract-and-shift loop; one quotient bit on each iteration
|
#if XCHAL_HAVE_LOOPS
|
#if XCHAL_HAVE_LOOPS
|
loopnez a4, .Lloopend
|
loopnez a4, .Lloopend
|
#endif /* XCHAL_HAVE_LOOPS */
|
#endif /* XCHAL_HAVE_LOOPS */
|
.Lloop:
|
.Lloop:
|
bltu a6, a3, .Lzerobit
|
bltu a6, a3, .Lzerobit
|
sub a6, a6, a3
|
sub a6, a6, a3
|
addi a2, a2, 1
|
addi a2, a2, 1
|
.Lzerobit:
|
.Lzerobit:
|
slli a2, a2, 1
|
slli a2, a2, 1
|
srli a3, a3, 1
|
srli a3, a3, 1
|
#if !XCHAL_HAVE_LOOPS
|
#if !XCHAL_HAVE_LOOPS
|
addi a4, a4, -1
|
addi a4, a4, -1
|
bnez a4, .Lloop
|
bnez a4, .Lloop
|
#endif /* !XCHAL_HAVE_LOOPS */
|
#endif /* !XCHAL_HAVE_LOOPS */
|
.Lloopend:
|
.Lloopend:
|
|
|
bltu a6, a3, .Lreturn
|
bltu a6, a3, .Lreturn
|
addi a2, a2, 1 # increment quotient if udividend >= udivisor
|
addi a2, a2, 1 # increment quotient if udividend >= udivisor
|
.Lreturn:
|
.Lreturn:
|
neg a5, a2
|
neg a5, a2
|
movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
|
movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
|
leaf_return
|
leaf_return
|
|
|
.Lle_one:
|
.Lle_one:
|
beqz a3, .Lerror
|
beqz a3, .Lerror
|
neg a2, a6 # if udivisor == 1, then return...
|
neg a2, a6 # if udivisor == 1, then return...
|
movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
|
movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
|
leaf_return
|
leaf_return
|
|
|
.Lspecial:
|
.Lspecial:
|
bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0
|
bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0
|
movi a2, 1
|
movi a2, 1
|
movi a4, -1
|
movi a4, -1
|
movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
|
movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
|
leaf_return
|
leaf_return
|
|
|
.Lerror:
|
.Lerror:
|
# just return 0; could throw an exception
|
# just return 0; could throw an exception
|
|
|
.Lreturn0:
|
.Lreturn0:
|
movi a2, 0
|
movi a2, 0
|
leaf_return
|
leaf_return
|
.size __divsi3,.-__divsi3
|
.size __divsi3,.-__divsi3
|
|
|
#endif /* L_divsi3 */
|
#endif /* L_divsi3 */
|
|
|
|
|
#ifdef L_umodsi3
|
#ifdef L_umodsi3
|
.align 4
|
.align 4
|
.global __umodsi3
|
.global __umodsi3
|
.type __umodsi3,@function
|
.type __umodsi3,@function
|
__umodsi3:
|
__umodsi3:
|
leaf_entry sp, 16
|
leaf_entry sp, 16
|
bltui a3, 2, .Lle_one # check if the divisor is <= 1
|
bltui a3, 2, .Lle_one # check if the divisor is <= 1
|
|
|
do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
|
do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
|
do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
|
do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
|
bgeu a5, a4, .Lspecial
|
bgeu a5, a4, .Lspecial
|
|
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift
|
sub a4, a4, a5 # count = divisor_shift - dividend_shift
|
ssl a4
|
ssl a4
|
sll a3, a3 # divisor <<= count
|
sll a3, a3 # divisor <<= count
|
|
|
# test-subtract-and-shift loop
|
# test-subtract-and-shift loop
|
#if XCHAL_HAVE_LOOPS
|
#if XCHAL_HAVE_LOOPS
|
loopnez a4, .Lloopend
|
loopnez a4, .Lloopend
|
#endif /* XCHAL_HAVE_LOOPS */
|
#endif /* XCHAL_HAVE_LOOPS */
|
.Lloop:
|
.Lloop:
|
bltu a2, a3, .Lzerobit
|
bltu a2, a3, .Lzerobit
|
sub a2, a2, a3
|
sub a2, a2, a3
|
.Lzerobit:
|
.Lzerobit:
|
srli a3, a3, 1
|
srli a3, a3, 1
|
#if !XCHAL_HAVE_LOOPS
|
#if !XCHAL_HAVE_LOOPS
|
addi a4, a4, -1
|
addi a4, a4, -1
|
bnez a4, .Lloop
|
bnez a4, .Lloop
|
#endif /* !XCHAL_HAVE_LOOPS */
|
#endif /* !XCHAL_HAVE_LOOPS */
|
.Lloopend:
|
.Lloopend:
|
|
|
.Lspecial:
|
.Lspecial:
|
bltu a2, a3, .Lreturn
|
bltu a2, a3, .Lreturn
|
sub a2, a2, a3 # subtract once more if dividend >= divisor
|
sub a2, a2, a3 # subtract once more if dividend >= divisor
|
.Lreturn:
|
.Lreturn:
|
leaf_return
|
leaf_return
|
|
|
.Lle_one:
|
.Lle_one:
|
# the divisor is either 0 or 1, so just return 0.
|
# the divisor is either 0 or 1, so just return 0.
|
# someday we may want to throw an exception if the divisor is 0.
|
# someday we may want to throw an exception if the divisor is 0.
|
movi a2, 0
|
movi a2, 0
|
leaf_return
|
leaf_return
|
.size __umodsi3,.-__umodsi3
|
.size __umodsi3,.-__umodsi3
|
|
|
#endif /* L_umodsi3 */
|
#endif /* L_umodsi3 */
|
|
|
|
|
#ifdef L_modsi3
|
#ifdef L_modsi3
|
.align 4
|
.align 4
|
.global __modsi3
|
.global __modsi3
|
.type __modsi3,@function
|
.type __modsi3,@function
|
__modsi3:
|
__modsi3:
|
leaf_entry sp, 16
|
leaf_entry sp, 16
|
mov a7, a2 # save original (signed) dividend
|
mov a7, a2 # save original (signed) dividend
|
do_abs a2, a2, a4 # udividend = abs(dividend)
|
do_abs a2, a2, a4 # udividend = abs(dividend)
|
do_abs a3, a3, a4 # udivisor = abs(divisor)
|
do_abs a3, a3, a4 # udivisor = abs(divisor)
|
bltui a3, 2, .Lle_one # check if udivisor <= 1
|
bltui a3, 2, .Lle_one # check if udivisor <= 1
|
do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
|
do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
|
do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
|
do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
|
bgeu a5, a4, .Lspecial
|
bgeu a5, a4, .Lspecial
|
|
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift
|
sub a4, a4, a5 # count = udivisor_shift - udividend_shift
|
ssl a4
|
ssl a4
|
sll a3, a3 # udivisor <<= count
|
sll a3, a3 # udivisor <<= count
|
|
|
# test-subtract-and-shift loop
|
# test-subtract-and-shift loop
|
#if XCHAL_HAVE_LOOPS
|
#if XCHAL_HAVE_LOOPS
|
loopnez a4, .Lloopend
|
loopnez a4, .Lloopend
|
#endif /* XCHAL_HAVE_LOOPS */
|
#endif /* XCHAL_HAVE_LOOPS */
|
.Lloop:
|
.Lloop:
|
bltu a2, a3, .Lzerobit
|
bltu a2, a3, .Lzerobit
|
sub a2, a2, a3
|
sub a2, a2, a3
|
.Lzerobit:
|
.Lzerobit:
|
srli a3, a3, 1
|
srli a3, a3, 1
|
#if !XCHAL_HAVE_LOOPS
|
#if !XCHAL_HAVE_LOOPS
|
addi a4, a4, -1
|
addi a4, a4, -1
|
bnez a4, .Lloop
|
bnez a4, .Lloop
|
#endif /* !XCHAL_HAVE_LOOPS */
|
#endif /* !XCHAL_HAVE_LOOPS */
|
.Lloopend:
|
.Lloopend:
|
|
|
.Lspecial:
|
.Lspecial:
|
bltu a2, a3, .Lreturn
|
bltu a2, a3, .Lreturn
|
sub a2, a2, a3 # subtract once more if udividend >= udivisor
|
sub a2, a2, a3 # subtract once more if udividend >= udivisor
|
.Lreturn:
|
.Lreturn:
|
bgez a7, .Lpositive
|
bgez a7, .Lpositive
|
neg a2, a2 # if (dividend < 0), return -udividend
|
neg a2, a2 # if (dividend < 0), return -udividend
|
.Lpositive:
|
.Lpositive:
|
leaf_return
|
leaf_return
|
|
|
.Lle_one:
|
.Lle_one:
|
# udivisor is either 0 or 1, so just return 0.
|
# udivisor is either 0 or 1, so just return 0.
|
# someday we may want to throw an exception if udivisor is 0.
|
# someday we may want to throw an exception if udivisor is 0.
|
movi a2, 0
|
movi a2, 0
|
leaf_return
|
leaf_return
|
.size __modsi3,.-__modsi3
|
.size __modsi3,.-__modsi3
|
|
|
#endif /* L_modsi3 */
|
#endif /* L_modsi3 */
|
|
|
#include "ieee754-df.S"
|
#include "ieee754-df.S"
|
#include "ieee754-sf.S"
|
#include "ieee754-sf.S"
|
|
|