/* Copyright (C) 2006 Free Software Foundation, Inc.
|
/* Copyright (C) 2006 Free Software Foundation, Inc.
|
|
|
This file is free software; you can redistribute it and/or modify it
|
This file is free software; you can redistribute it and/or modify it
|
under the terms of the GNU General Public License as published by the
|
under the terms of the GNU General Public License as published by the
|
Free Software Foundation; either version 2, or (at your option) any
|
Free Software Foundation; either version 2, or (at your option) any
|
later version.
|
later version.
|
|
|
In addition to the permissions in the GNU General Public License, the
|
In addition to the permissions in the GNU General Public License, the
|
Free Software Foundation gives you unlimited permission to link the
|
Free Software Foundation gives you unlimited permission to link the
|
compiled version of this file into combinations with other programs,
|
compiled version of this file into combinations with other programs,
|
and to distribute those combinations without any restriction coming
|
and to distribute those combinations without any restriction coming
|
from the use of this file. (The General Public License restrictions
|
from the use of this file. (The General Public License restrictions
|
do apply in other respects; for example, they cover modification of
|
do apply in other respects; for example, they cover modification of
|
the file, and distribution when not linked into a combine
|
the file, and distribution when not linked into a combine
|
executable.)
|
executable.)
|
|
|
This file is distributed in the hope that it will be useful, but
|
This file is distributed in the hope that it will be useful, but
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
General Public License for more details.
|
General Public License for more details.
|
|
|
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
along with this program; see the file COPYING. If not, write to
|
along with this program; see the file COPYING. If not, write to
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
Boston, MA 02110-1301, USA. */
|
Boston, MA 02110-1301, USA. */
|
|
|
/* Moderately Space-optimized libgcc routines for the Renesas SH /
|
/* Moderately Space-optimized libgcc routines for the Renesas SH /
|
STMicroelectronics ST40 CPUs.
|
STMicroelectronics ST40 CPUs.
|
Contributed by J"orn Rennecke joern.rennecke@st.com. */
|
Contributed by J"orn Rennecke joern.rennecke@st.com. */
|
|
|
#include "lib1funcs.h"
|
#include "lib1funcs.h"
|
|
|
#if !__SHMEDIA__
|
#if !__SHMEDIA__
|
#ifdef L_udivsi3_i4i
|
#ifdef L_udivsi3_i4i
|
|
|
/* 88 bytes; sh4-200 cycle counts:
|
/* 88 bytes; sh4-200 cycle counts:
|
divisor >= 2G: 11 cycles
|
divisor >= 2G: 11 cycles
|
dividend < 2G: 48 cycles
|
dividend < 2G: 48 cycles
|
dividend >= 2G: divisor != 1: 54 cycles
|
dividend >= 2G: divisor != 1: 54 cycles
|
dividend >= 2G, divisor == 1: 22 cycles */
|
dividend >= 2G, divisor == 1: 22 cycles */
|
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
|
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
|
!! args in r4 and r5, result in r0, clobber r1
|
!! args in r4 and r5, result in r0, clobber r1
|
|
|
.global GLOBAL(udivsi3_i4i)
|
.global GLOBAL(udivsi3_i4i)
|
FUNC(GLOBAL(udivsi3_i4i))
|
FUNC(GLOBAL(udivsi3_i4i))
|
GLOBAL(udivsi3_i4i):
|
GLOBAL(udivsi3_i4i):
|
mova L1,r0
|
mova L1,r0
|
cmp/pz r5
|
cmp/pz r5
|
sts fpscr,r1
|
sts fpscr,r1
|
lds.l @r0+,fpscr
|
lds.l @r0+,fpscr
|
sts.l fpul,@-r15
|
sts.l fpul,@-r15
|
bf LOCAL(huge_divisor)
|
bf LOCAL(huge_divisor)
|
mov.l r1,@-r15
|
mov.l r1,@-r15
|
lds r4,fpul
|
lds r4,fpul
|
cmp/pz r4
|
cmp/pz r4
|
#ifdef FMOVD_WORKS
|
#ifdef FMOVD_WORKS
|
fmov.d dr0,@-r15
|
fmov.d dr0,@-r15
|
float fpul,dr0
|
float fpul,dr0
|
fmov.d dr2,@-r15
|
fmov.d dr2,@-r15
|
bt LOCAL(dividend_adjusted)
|
bt LOCAL(dividend_adjusted)
|
mov #1,r1
|
mov #1,r1
|
fmov.d @r0,dr2
|
fmov.d @r0,dr2
|
cmp/eq r1,r5
|
cmp/eq r1,r5
|
bt LOCAL(div_by_1)
|
bt LOCAL(div_by_1)
|
fadd dr2,dr0
|
fadd dr2,dr0
|
LOCAL(dividend_adjusted):
|
LOCAL(dividend_adjusted):
|
lds r5,fpul
|
lds r5,fpul
|
float fpul,dr2
|
float fpul,dr2
|
fdiv dr2,dr0
|
fdiv dr2,dr0
|
LOCAL(div_by_1):
|
LOCAL(div_by_1):
|
fmov.d @r15+,dr2
|
fmov.d @r15+,dr2
|
ftrc dr0,fpul
|
ftrc dr0,fpul
|
fmov.d @r15+,dr0
|
fmov.d @r15+,dr0
|
#else /* !FMOVD_WORKS */
|
#else /* !FMOVD_WORKS */
|
fmov.s DR01,@-r15
|
fmov.s DR01,@-r15
|
mov #1,r1
|
mov #1,r1
|
fmov.s DR00,@-r15
|
fmov.s DR00,@-r15
|
float fpul,dr0
|
float fpul,dr0
|
fmov.s DR21,@-r15
|
fmov.s DR21,@-r15
|
bt/s LOCAL(dividend_adjusted)
|
bt/s LOCAL(dividend_adjusted)
|
fmov.s DR20,@-r15
|
fmov.s DR20,@-r15
|
cmp/eq r1,r5
|
cmp/eq r1,r5
|
bt LOCAL(div_by_1)
|
bt LOCAL(div_by_1)
|
fmov.s @r0+,DR20
|
fmov.s @r0+,DR20
|
fmov.s @r0,DR21
|
fmov.s @r0,DR21
|
fadd dr2,dr0
|
fadd dr2,dr0
|
LOCAL(dividend_adjusted):
|
LOCAL(dividend_adjusted):
|
lds r5,fpul
|
lds r5,fpul
|
float fpul,dr2
|
float fpul,dr2
|
fdiv dr2,dr0
|
fdiv dr2,dr0
|
LOCAL(div_by_1):
|
LOCAL(div_by_1):
|
fmov.s @r15+,DR20
|
fmov.s @r15+,DR20
|
fmov.s @r15+,DR21
|
fmov.s @r15+,DR21
|
ftrc dr0,fpul
|
ftrc dr0,fpul
|
fmov.s @r15+,DR00
|
fmov.s @r15+,DR00
|
fmov.s @r15+,DR01
|
fmov.s @r15+,DR01
|
#endif /* !FMOVD_WORKS */
|
#endif /* !FMOVD_WORKS */
|
lds.l @r15+,fpscr
|
lds.l @r15+,fpscr
|
sts fpul,r0
|
sts fpul,r0
|
rts
|
rts
|
lds.l @r15+,fpul
|
lds.l @r15+,fpul
|
|
|
#ifdef FMOVD_WORKS
|
#ifdef FMOVD_WORKS
|
.p2align 3 ! make double below 8 byte aligned.
|
.p2align 3 ! make double below 8 byte aligned.
|
#endif
|
#endif
|
LOCAL(huge_divisor):
|
LOCAL(huge_divisor):
|
lds r1,fpscr
|
lds r1,fpscr
|
add #4,r15
|
add #4,r15
|
cmp/hs r5,r4
|
cmp/hs r5,r4
|
rts
|
rts
|
movt r0
|
movt r0
|
|
|
.p2align 2
|
.p2align 2
|
L1:
|
L1:
|
#ifndef FMOVD_WORKS
|
#ifndef FMOVD_WORKS
|
.long 0x80000
|
.long 0x80000
|
#else
|
#else
|
.long 0x180000
|
.long 0x180000
|
#endif
|
#endif
|
.double 4294967296
|
.double 4294967296
|
|
|
ENDFUNC(GLOBAL(udivsi3_i4i))
|
ENDFUNC(GLOBAL(udivsi3_i4i))
|
#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
|
#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
|
|
|
#if 0
|
#if 0
|
/* With 36 bytes, the following would probably be the most compact
|
/* With 36 bytes, the following would probably be the most compact
|
implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
|
implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
|
GLOBAL(udivsi3_i4i):
|
GLOBAL(udivsi3_i4i):
|
mov.l r2,@-r15
|
mov.l r2,@-r15
|
mov #0,r1
|
mov #0,r1
|
div0u
|
div0u
|
mov r1,r2
|
mov r1,r2
|
mov.l r3,@-r15
|
mov.l r3,@-r15
|
mov r1,r3
|
mov r1,r3
|
sett
|
sett
|
mov r4,r0
|
mov r4,r0
|
LOCAL(loop):
|
LOCAL(loop):
|
rotcr r2
|
rotcr r2
|
;
|
;
|
bt/s LOCAL(end)
|
bt/s LOCAL(end)
|
cmp/gt r2,r3
|
cmp/gt r2,r3
|
rotcl r0
|
rotcl r0
|
bra LOCAL(loop)
|
bra LOCAL(loop)
|
div1 r5,r1
|
div1 r5,r1
|
LOCAL(end):
|
LOCAL(end):
|
rotcl r0
|
rotcl r0
|
mov.l @r15+,r3
|
mov.l @r15+,r3
|
rts
|
rts
|
mov.l @r15+,r2
|
mov.l @r15+,r2
|
#endif /* 0 */
|
#endif /* 0 */
|
|
|
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
|
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
|
sh4-200 run times:
|
sh4-200 run times:
|
udiv small divisor: 55 cycles
|
udiv small divisor: 55 cycles
|
udiv large divisor: 52 cycles
|
udiv large divisor: 52 cycles
|
sdiv small divisor, positive result: 59 cycles
|
sdiv small divisor, positive result: 59 cycles
|
sdiv large divisor, positive result: 56 cycles
|
sdiv large divisor, positive result: 56 cycles
|
sdiv small divisor, negative result: 65 cycles (*)
|
sdiv small divisor, negative result: 65 cycles (*)
|
sdiv large divisor, negative result: 62 cycles (*)
|
sdiv large divisor, negative result: 62 cycles (*)
|
(*): r2 is restored in the rts delay slot and has a lingering latency
|
(*): r2 is restored in the rts delay slot and has a lingering latency
|
of two more cycles. */
|
of two more cycles. */
|
.balign 4
|
.balign 4
|
.global GLOBAL(udivsi3_i4i)
|
.global GLOBAL(udivsi3_i4i)
|
FUNC(GLOBAL(udivsi3_i4i))
|
FUNC(GLOBAL(udivsi3_i4i))
|
FUNC(GLOBAL(sdivsi3_i4i))
|
FUNC(GLOBAL(sdivsi3_i4i))
|
GLOBAL(udivsi3_i4i):
|
GLOBAL(udivsi3_i4i):
|
sts pr,r1
|
sts pr,r1
|
mov.l r4,@-r15
|
mov.l r4,@-r15
|
extu.w r5,r0
|
extu.w r5,r0
|
cmp/eq r5,r0
|
cmp/eq r5,r0
|
swap.w r4,r0
|
swap.w r4,r0
|
shlr16 r4
|
shlr16 r4
|
bf/s LOCAL(large_divisor)
|
bf/s LOCAL(large_divisor)
|
div0u
|
div0u
|
mov.l r5,@-r15
|
mov.l r5,@-r15
|
shll16 r5
|
shll16 r5
|
LOCAL(sdiv_small_divisor):
|
LOCAL(sdiv_small_divisor):
|
div1 r5,r4
|
div1 r5,r4
|
bsr LOCAL(div6)
|
bsr LOCAL(div6)
|
div1 r5,r4
|
div1 r5,r4
|
div1 r5,r4
|
div1 r5,r4
|
bsr LOCAL(div6)
|
bsr LOCAL(div6)
|
div1 r5,r4
|
div1 r5,r4
|
xtrct r4,r0
|
xtrct r4,r0
|
xtrct r0,r4
|
xtrct r0,r4
|
bsr LOCAL(div7)
|
bsr LOCAL(div7)
|
swap.w r4,r4
|
swap.w r4,r4
|
div1 r5,r4
|
div1 r5,r4
|
bsr LOCAL(div7)
|
bsr LOCAL(div7)
|
div1 r5,r4
|
div1 r5,r4
|
xtrct r4,r0
|
xtrct r4,r0
|
mov.l @r15+,r5
|
mov.l @r15+,r5
|
swap.w r0,r0
|
swap.w r0,r0
|
mov.l @r15+,r4
|
mov.l @r15+,r4
|
jmp @r1
|
jmp @r1
|
rotcl r0
|
rotcl r0
|
LOCAL(div7):
|
LOCAL(div7):
|
div1 r5,r4
|
div1 r5,r4
|
LOCAL(div6):
|
LOCAL(div6):
|
div1 r5,r4; div1 r5,r4; div1 r5,r4
|
div1 r5,r4; div1 r5,r4; div1 r5,r4
|
div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
|
div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
|
|
|
LOCAL(divx3):
|
LOCAL(divx3):
|
rotcl r0
|
rotcl r0
|
div1 r5,r4
|
div1 r5,r4
|
rotcl r0
|
rotcl r0
|
div1 r5,r4
|
div1 r5,r4
|
rotcl r0
|
rotcl r0
|
rts
|
rts
|
div1 r5,r4
|
div1 r5,r4
|
|
|
LOCAL(large_divisor):
|
LOCAL(large_divisor):
|
mov.l r5,@-r15
|
mov.l r5,@-r15
|
LOCAL(sdiv_large_divisor):
|
LOCAL(sdiv_large_divisor):
|
xor r4,r0
|
xor r4,r0
|
.rept 4
|
.rept 4
|
rotcl r0
|
rotcl r0
|
bsr LOCAL(divx3)
|
bsr LOCAL(divx3)
|
div1 r5,r4
|
div1 r5,r4
|
.endr
|
.endr
|
mov.l @r15+,r5
|
mov.l @r15+,r5
|
mov.l @r15+,r4
|
mov.l @r15+,r4
|
jmp @r1
|
jmp @r1
|
rotcl r0
|
rotcl r0
|
ENDFUNC(GLOBAL(udivsi3_i4i))
|
ENDFUNC(GLOBAL(udivsi3_i4i))
|
|
|
.global GLOBAL(sdivsi3_i4i)
|
.global GLOBAL(sdivsi3_i4i)
|
GLOBAL(sdivsi3_i4i):
|
GLOBAL(sdivsi3_i4i):
|
mov.l r4,@-r15
|
mov.l r4,@-r15
|
cmp/pz r5
|
cmp/pz r5
|
mov.l r5,@-r15
|
mov.l r5,@-r15
|
bt/s LOCAL(pos_divisor)
|
bt/s LOCAL(pos_divisor)
|
cmp/pz r4
|
cmp/pz r4
|
neg r5,r5
|
neg r5,r5
|
extu.w r5,r0
|
extu.w r5,r0
|
bt/s LOCAL(neg_result)
|
bt/s LOCAL(neg_result)
|
cmp/eq r5,r0
|
cmp/eq r5,r0
|
neg r4,r4
|
neg r4,r4
|
LOCAL(pos_result):
|
LOCAL(pos_result):
|
swap.w r4,r0
|
swap.w r4,r0
|
bra LOCAL(sdiv_check_divisor)
|
bra LOCAL(sdiv_check_divisor)
|
sts pr,r1
|
sts pr,r1
|
LOCAL(pos_divisor):
|
LOCAL(pos_divisor):
|
extu.w r5,r0
|
extu.w r5,r0
|
bt/s LOCAL(pos_result)
|
bt/s LOCAL(pos_result)
|
cmp/eq r5,r0
|
cmp/eq r5,r0
|
neg r4,r4
|
neg r4,r4
|
LOCAL(neg_result):
|
LOCAL(neg_result):
|
mova LOCAL(negate_result),r0
|
mova LOCAL(negate_result),r0
|
;
|
;
|
mov r0,r1
|
mov r0,r1
|
swap.w r4,r0
|
swap.w r4,r0
|
lds r2,macl
|
lds r2,macl
|
sts pr,r2
|
sts pr,r2
|
LOCAL(sdiv_check_divisor):
|
LOCAL(sdiv_check_divisor):
|
shlr16 r4
|
shlr16 r4
|
bf/s LOCAL(sdiv_large_divisor)
|
bf/s LOCAL(sdiv_large_divisor)
|
div0u
|
div0u
|
bra LOCAL(sdiv_small_divisor)
|
bra LOCAL(sdiv_small_divisor)
|
shll16 r5
|
shll16 r5
|
.balign 4
|
.balign 4
|
LOCAL(negate_result):
|
LOCAL(negate_result):
|
neg r0,r0
|
neg r0,r0
|
jmp @r2
|
jmp @r2
|
sts macl,r2
|
sts macl,r2
|
ENDFUNC(GLOBAL(sdivsi3_i4i))
|
ENDFUNC(GLOBAL(sdivsi3_i4i))
|
#endif /* !__SH_FPU_DOUBLE__ */
|
#endif /* !__SH_FPU_DOUBLE__ */
|
#endif /* L_udivsi3_i4i */
|
#endif /* L_udivsi3_i4i */
|
|
|
#ifdef L_sdivsi3_i4i
|
#ifdef L_sdivsi3_i4i
|
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
|
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
|
/* 48 bytes, 45 cycles on sh4-200 */
|
/* 48 bytes, 45 cycles on sh4-200 */
|
!! args in r4 and r5, result in r0, clobber r1
|
!! args in r4 and r5, result in r0, clobber r1
|
|
|
.global GLOBAL(sdivsi3_i4i)
|
.global GLOBAL(sdivsi3_i4i)
|
FUNC(GLOBAL(sdivsi3_i4i))
|
FUNC(GLOBAL(sdivsi3_i4i))
|
GLOBAL(sdivsi3_i4i):
|
GLOBAL(sdivsi3_i4i):
|
sts.l fpscr,@-r15
|
sts.l fpscr,@-r15
|
sts fpul,r1
|
sts fpul,r1
|
mova L1,r0
|
mova L1,r0
|
lds.l @r0+,fpscr
|
lds.l @r0+,fpscr
|
lds r4,fpul
|
lds r4,fpul
|
#ifdef FMOVD_WORKS
|
#ifdef FMOVD_WORKS
|
fmov.d dr0,@-r15
|
fmov.d dr0,@-r15
|
float fpul,dr0
|
float fpul,dr0
|
lds r5,fpul
|
lds r5,fpul
|
fmov.d dr2,@-r15
|
fmov.d dr2,@-r15
|
#else
|
#else
|
fmov.s DR01,@-r15
|
fmov.s DR01,@-r15
|
fmov.s DR00,@-r15
|
fmov.s DR00,@-r15
|
float fpul,dr0
|
float fpul,dr0
|
lds r5,fpul
|
lds r5,fpul
|
fmov.s DR21,@-r15
|
fmov.s DR21,@-r15
|
fmov.s DR20,@-r15
|
fmov.s DR20,@-r15
|
#endif
|
#endif
|
float fpul,dr2
|
float fpul,dr2
|
fdiv dr2,dr0
|
fdiv dr2,dr0
|
#ifdef FMOVD_WORKS
|
#ifdef FMOVD_WORKS
|
fmov.d @r15+,dr2
|
fmov.d @r15+,dr2
|
#else
|
#else
|
fmov.s @r15+,DR20
|
fmov.s @r15+,DR20
|
fmov.s @r15+,DR21
|
fmov.s @r15+,DR21
|
#endif
|
#endif
|
ftrc dr0,fpul
|
ftrc dr0,fpul
|
#ifdef FMOVD_WORKS
|
#ifdef FMOVD_WORKS
|
fmov.d @r15+,dr0
|
fmov.d @r15+,dr0
|
#else
|
#else
|
fmov.s @r15+,DR00
|
fmov.s @r15+,DR00
|
fmov.s @r15+,DR01
|
fmov.s @r15+,DR01
|
#endif
|
#endif
|
lds.l @r15+,fpscr
|
lds.l @r15+,fpscr
|
sts fpul,r0
|
sts fpul,r0
|
rts
|
rts
|
lds r1,fpul
|
lds r1,fpul
|
|
|
.p2align 2
|
.p2align 2
|
L1:
|
L1:
|
#ifndef FMOVD_WORKS
|
#ifndef FMOVD_WORKS
|
.long 0x80000
|
.long 0x80000
|
#else
|
#else
|
.long 0x180000
|
.long 0x180000
|
#endif
|
#endif
|
|
|
ENDFUNC(GLOBAL(sdivsi3_i4i))
|
ENDFUNC(GLOBAL(sdivsi3_i4i))
|
#endif /* __SH_FPU_DOUBLE__ */
|
#endif /* __SH_FPU_DOUBLE__ */
|
#endif /* L_sdivsi3_i4i */
|
#endif /* L_sdivsi3_i4i */
|
#endif /* !__SHMEDIA__ */
|
#endif /* !__SHMEDIA__ */
|
|
|