OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [arm/] [ieee754-df.S] - Rev 737

Go to most recent revision | Compare with Previous | Blame | View Log

/* ieee754-df.S double-precision floating point support for ARM

   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
   Contributed by Nicolas Pitre (nico@cam.org)

   This file is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 3, or (at your option) any
   later version.

   This file is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

/*
 * Notes: 
 * 
 * The goal of this code is to be as fast as possible.  This is
 * not meant to be easy to understand for the casual reader.
 * For slightly simpler code please see the single precision version
 * of this file.
 * 
 * Only the default rounding mode is intended for best performances.
 * Exceptions aren't supported yet, but that can be added quite easily
 * if necessary without impacting performances.
 */


@ For FPA, float words are always big-endian.
@ For VFP, floats words follow the memory system mode.
#if defined(__VFP_FP__) && !defined(__ARMEB__)
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#else
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#endif


#ifdef L_arm_negdf2

ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2

        @ flip sign bit
        eor     xh, xh, #0x80000000
        RET

        FUNC_END aeabi_dneg
        FUNC_END negdf2

#endif

#ifdef L_arm_addsubdf3

ARM_FUNC_START aeabi_drsub

        eor     xh, xh, #0x80000000     @ flip sign bit of first arg
        b       1f      

ARM_FUNC_START subdf3
ARM_FUNC_ALIAS aeabi_dsub subdf3

        eor     yh, yh, #0x80000000     @ flip sign bit of second arg
#if defined(__INTERWORKING_STUBS__)
        b       1f                      @ Skip Thumb-code prologue
#endif

ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3

1:      do_push {r4, r5, lr}

        @ Look for zeroes, equal values, INF, or NAN.
        shift1  lsl, r4, xh, #1
        shift1  lsl, r5, yh, #1
        teq     r4, r5
        do_it   eq
        teqeq   xl, yl
        do_it   ne, ttt
        COND(orr,s,ne)  ip, r4, xl
        COND(orr,s,ne)  ip, r5, yl
        COND(mvn,s,ne)  ip, r4, asr #21
        COND(mvn,s,ne)  ip, r5, asr #21
        beq     LSYM(Lad_s)

        @ Compute exponent difference.  Make largest exponent in r4,
        @ corresponding arg in xh-xl, and positive exponent difference in r5.
        shift1  lsr, r4, r4, #21
        rsbs    r5, r4, r5, lsr #21
        do_it   lt
        rsblt   r5, r5, #0
        ble     1f
        add     r4, r4, r5
        eor     yl, xl, yl
        eor     yh, xh, yh
        eor     xl, yl, xl
        eor     xh, yh, xh
        eor     yl, xl, yl
        eor     yh, xh, yh
1:
        @ If exponent difference is too large, return largest argument
        @ already in xh-xl.  We need up to 54 bit to handle proper rounding
        @ of 0x1p54 - 1.1.
        cmp     r5, #54
        do_it   hi
        RETLDM  "r4, r5" hi

        @ Convert mantissa to signed integer.
        tst     xh, #0x80000000
        mov     xh, xh, lsl #12
        mov     ip, #0x00100000
        orr     xh, ip, xh, lsr #12
        beq     1f
#if defined(__thumb2__)
        negs    xl, xl
        sbc     xh, xh, xh, lsl #1
#else
        rsbs    xl, xl, #0
        rsc     xh, xh, #0
#endif
1:
        tst     yh, #0x80000000
        mov     yh, yh, lsl #12
        orr     yh, ip, yh, lsr #12
        beq     1f
#if defined(__thumb2__)
        negs    yl, yl
        sbc     yh, yh, yh, lsl #1
#else
        rsbs    yl, yl, #0
        rsc     yh, yh, #0
#endif
1:
        @ If exponent == difference, one or both args were denormalized.
        @ Since this is not common case, rescale them off line.
        teq     r4, r5
        beq     LSYM(Lad_d)
LSYM(Lad_x):

        @ Compensate for the exponent overlapping the mantissa MSB added later
        sub     r4, r4, #1

        @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
        rsbs    lr, r5, #32
        blt     1f
        shift1  lsl, ip, yl, lr
        shiftop adds xl xl yl lsr r5 yl
        adc     xh, xh, #0
        shiftop adds xl xl yh lsl lr yl
        shiftop adcs xh xh yh asr r5 yh
        b       2f
1:      sub     r5, r5, #32
        add     lr, lr, #32
        cmp     yl, #1
        shift1  lsl,ip, yh, lr
        do_it   cs
        orrcs   ip, ip, #2              @ 2 not 1, to allow lsr #1 later
        shiftop adds xl xl yh asr r5 yh
        adcs    xh, xh, yh, asr #31
2:
        @ We now have a result in xh-xl-ip.
        @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
        and     r5, xh, #0x80000000
        bpl     LSYM(Lad_p)
#if defined(__thumb2__)
        mov     lr, #0
        negs    ip, ip
        sbcs    xl, lr, xl
        sbc     xh, lr, xh
#else
        rsbs    ip, ip, #0
        rscs    xl, xl, #0
        rsc     xh, xh, #0
#endif

        @ Determine how to normalize the result.
LSYM(Lad_p):
        cmp     xh, #0x00100000
        bcc     LSYM(Lad_a)
        cmp     xh, #0x00200000
        bcc     LSYM(Lad_e)

        @ Result needs to be shifted right.
        movs    xh, xh, lsr #1
        movs    xl, xl, rrx
        mov     ip, ip, rrx
        add     r4, r4, #1

        @ Make sure we did not bust our exponent.
        mov     r2, r4, lsl #21
        cmn     r2, #(2 << 21)
        bcs     LSYM(Lad_o)

        @ Our result is now properly aligned into xh-xl, remaining bits in ip.
        @ Round with MSB of ip. If halfway between two numbers, round towards
        @ LSB of xl = 0.
        @ Pack final result together.
LSYM(Lad_e):
        cmp     ip, #0x80000000
        do_it   eq
        COND(mov,s,eq)  ip, xl, lsr #1
        adcs    xl, xl, #0
        adc     xh, xh, r4, lsl #20
        orr     xh, xh, r5
        RETLDM  "r4, r5"

        @ Result must be shifted left and exponent adjusted.
LSYM(Lad_a):
        movs    ip, ip, lsl #1
        adcs    xl, xl, xl
        adc     xh, xh, xh
        tst     xh, #0x00100000
        sub     r4, r4, #1
        bne     LSYM(Lad_e)

        @ No rounding necessary since ip will always be 0 at this point.
LSYM(Lad_l):

#if __ARM_ARCH__ < 5

        teq     xh, #0
        movne   r3, #20
        moveq   r3, #52
        moveq   xh, xl
        moveq   xl, #0
        mov     r2, xh
        cmp     r2, #(1 << 16)
        movhs   r2, r2, lsr #16
        subhs   r3, r3, #16
        cmp     r2, #(1 << 8)
        movhs   r2, r2, lsr #8
        subhs   r3, r3, #8
        cmp     r2, #(1 << 4)
        movhs   r2, r2, lsr #4
        subhs   r3, r3, #4
        cmp     r2, #(1 << 2)
        subhs   r3, r3, #2
        sublo   r3, r3, r2, lsr #1
        sub     r3, r3, r2, lsr #3

#else

        teq     xh, #0
        do_it   eq, t
        moveq   xh, xl
        moveq   xl, #0
        clz     r3, xh
        do_it   eq
        addeq   r3, r3, #32
        sub     r3, r3, #11

#endif

        @ determine how to shift the value.
        subs    r2, r3, #32
        bge     2f
        adds    r2, r2, #12
        ble     1f

        @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
        @ since a register switch happened above.
        add     ip, r2, #20
        rsb     r2, r2, #12
        shift1  lsl, xl, xh, ip
        shift1  lsr, xh, xh, r2
        b       3f

        @ actually shift value left 1 to 20 bits, which might also represent
        @ 32 to 52 bits if counting the register switch that happened earlier.
1:      add     r2, r2, #20
2:      do_it   le
        rsble   ip, r2, #32
        shift1  lsl, xh, xh, r2
#if defined(__thumb2__)
        lsr     ip, xl, ip
        itt     le
        orrle   xh, xh, ip
        lslle   xl, xl, r2
#else
        orrle   xh, xh, xl, lsr ip
        movle   xl, xl, lsl r2
#endif

        @ adjust exponent accordingly.
3:      subs    r4, r4, r3
        do_it   ge, tt
        addge   xh, xh, r4, lsl #20
        orrge   xh, xh, r5
        RETLDM  "r4, r5" ge

        @ Exponent too small, denormalize result.
        @ Find out proper shift value.
        mvn     r4, r4
        subs    r4, r4, #31
        bge     2f
        adds    r4, r4, #12
        bgt     1f

        @ shift result right of 1 to 20 bits, sign is in r5.
        add     r4, r4, #20
        rsb     r2, r4, #32
        shift1  lsr, xl, xl, r4
        shiftop orr xl xl xh lsl r2 yh
        shiftop orr xh r5 xh lsr r4 yh
        RETLDM  "r4, r5"

        @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
        @ a register switch from xh to xl.
1:      rsb     r4, r4, #12
        rsb     r2, r4, #32
        shift1  lsr, xl, xl, r2
        shiftop orr xl xl xh lsl r4 yh
        mov     xh, r5
        RETLDM  "r4, r5"

        @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
        @ from xh to xl.
2:      shift1  lsr, xl, xh, r4
        mov     xh, r5
        RETLDM  "r4, r5"

        @ Adjust exponents for denormalized arguments.
        @ Note that r4 must not remain equal to 0.
LSYM(Lad_d):
        teq     r4, #0
        eor     yh, yh, #0x00100000
        do_it   eq, te
        eoreq   xh, xh, #0x00100000
        addeq   r4, r4, #1
        subne   r5, r5, #1
        b       LSYM(Lad_x)


LSYM(Lad_s):
        mvns    ip, r4, asr #21
        do_it   ne
        COND(mvn,s,ne)  ip, r5, asr #21
        beq     LSYM(Lad_i)

        teq     r4, r5
        do_it   eq
        teqeq   xl, yl
        beq     1f

        @ Result is x + 0.0 = x or 0.0 + y = y.
        orrs    ip, r4, xl
        do_it   eq, t
        moveq   xh, yh
        moveq   xl, yl
        RETLDM  "r4, r5"

1:      teq     xh, yh

        @ Result is x - x = 0.
        do_it   ne, tt
        movne   xh, #0
        movne   xl, #0
        RETLDM  "r4, r5" ne

        @ Result is x + x = 2x.
        movs    ip, r4, lsr #21
        bne     2f
        movs    xl, xl, lsl #1
        adcs    xh, xh, xh
        do_it   cs
        orrcs   xh, xh, #0x80000000
        RETLDM  "r4, r5"
2:      adds    r4, r4, #(2 << 21)
        do_it   cc, t
        addcc   xh, xh, #(1 << 20)
        RETLDM  "r4, r5" cc
        and     r5, xh, #0x80000000

        @ Overflow: return INF.
LSYM(Lad_o):
        orr     xh, r5, #0x7f000000
        orr     xh, xh, #0x00f00000
        mov     xl, #0
        RETLDM  "r4, r5"

        @ At least one of x or y is INF/NAN.
        @   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
        @   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
        @   if either is NAN: return NAN
        @   if opposite sign: return NAN
        @   otherwise return xh-xl (which is INF or -INF)
LSYM(Lad_i):
        mvns    ip, r4, asr #21
        do_it   ne, te
        movne   xh, yh
        movne   xl, yl
        COND(mvn,s,eq)  ip, r5, asr #21
        do_it   ne, t
        movne   yh, xh
        movne   yl, xl
        orrs    r4, xl, xh, lsl #12
        do_it   eq, te
        COND(orr,s,eq)  r5, yl, yh, lsl #12
        teqeq   xh, yh
        orrne   xh, xh, #0x00080000     @ quiet NAN
        RETLDM  "r4, r5"

        FUNC_END aeabi_dsub
        FUNC_END subdf3
        FUNC_END aeabi_dadd
        FUNC_END adddf3

ARM_FUNC_START floatunsidf
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf

        teq     r0, #0
        do_it   eq, t
        moveq   r1, #0
        RETc(eq)
        do_push {r4, r5, lr}
        mov     r4, #0x400              @ initial exponent
        add     r4, r4, #(52-1 - 1)
        mov     r5, #0                  @ sign bit is 0
        .ifnc   xl, r0
        mov     xl, r0
        .endif
        mov     xh, #0
        b       LSYM(Lad_l)

        FUNC_END aeabi_ui2d
        FUNC_END floatunsidf

ARM_FUNC_START floatsidf
ARM_FUNC_ALIAS aeabi_i2d floatsidf

        teq     r0, #0
        do_it   eq, t
        moveq   r1, #0
        RETc(eq)
        do_push {r4, r5, lr}
        mov     r4, #0x400              @ initial exponent
        add     r4, r4, #(52-1 - 1)
        ands    r5, r0, #0x80000000     @ sign bit in r5
        do_it   mi
        rsbmi   r0, r0, #0              @ absolute value
        .ifnc   xl, r0
        mov     xl, r0
        .endif
        mov     xh, #0
        b       LSYM(Lad_l)

        FUNC_END aeabi_i2d
        FUNC_END floatsidf

ARM_FUNC_START extendsfdf2
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2

        movs    r2, r0, lsl #1          @ toss sign bit
        mov     xh, r2, asr #3          @ stretch exponent
        mov     xh, xh, rrx             @ retrieve sign bit
        mov     xl, r2, lsl #28         @ retrieve remaining bits
        do_it   ne, ttt
        COND(and,s,ne)  r3, r2, #0xff000000     @ isolate exponent
        teqne   r3, #0xff000000         @ if not 0, check if INF or NAN
        eorne   xh, xh, #0x38000000     @ fixup exponent otherwise.
        RETc(ne)                        @ and return it.

        teq     r2, #0                  @ if actually 0
        do_it   ne, e
        teqne   r3, #0xff000000         @ or INF or NAN
        RETc(eq)                        @ we are done already.

        @ value was denormalized.  We can normalize it now.
        do_push {r4, r5, lr}
        mov     r4, #0x380              @ setup corresponding exponent
        and     r5, xh, #0x80000000     @ move sign bit in r5
        bic     xh, xh, #0x80000000
        b       LSYM(Lad_l)

        FUNC_END aeabi_f2d
        FUNC_END extendsfdf2

ARM_FUNC_START floatundidf
ARM_FUNC_ALIAS aeabi_ul2d floatundidf

        orrs    r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
        do_it   eq, t
        mvfeqd  f0, #0.0
#else
        do_it   eq
#endif
        RETc(eq)

#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
        @ For hard FPA code we want to return via the tail below so that
        @ we can return the result in f0 as well as in r0/r1 for backwards
        @ compatibility.
        adr     ip, LSYM(f0_ret)
        @ Push pc as well so that RETLDM works correctly.
        do_push {r4, r5, ip, lr, pc}
#else
        do_push {r4, r5, lr}
#endif

        mov     r5, #0
        b       2f

ARM_FUNC_START floatdidf
ARM_FUNC_ALIAS aeabi_l2d floatdidf

        orrs    r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
        do_it   eq, t
        mvfeqd  f0, #0.0
#else
        do_it   eq
#endif
        RETc(eq)

#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
        @ For hard FPA code we want to return via the tail below so that
        @ we can return the result in f0 as well as in r0/r1 for backwards
        @ compatibility.
        adr     ip, LSYM(f0_ret)
        @ Push pc as well so that RETLDM works correctly.
        do_push {r4, r5, ip, lr, pc}
#else
        do_push {r4, r5, lr}
#endif

        ands    r5, ah, #0x80000000     @ sign bit in r5
        bpl     2f
#if defined(__thumb2__)
        negs    al, al
        sbc     ah, ah, ah, lsl #1
#else
        rsbs    al, al, #0
        rsc     ah, ah, #0
#endif
2:
        mov     r4, #0x400              @ initial exponent
        add     r4, r4, #(52-1 - 1)

        @ FPA little-endian: must swap the word order.
        .ifnc   xh, ah
        mov     ip, al
        mov     xh, ah
        mov     xl, ip
        .endif

        movs    ip, xh, lsr #22
        beq     LSYM(Lad_p)

        @ The value is too big.  Scale it down a bit...
        mov     r2, #3
        movs    ip, ip, lsr #3
        do_it   ne
        addne   r2, r2, #3
        movs    ip, ip, lsr #3
        do_it   ne
        addne   r2, r2, #3
        add     r2, r2, ip, lsr #3

        rsb     r3, r2, #32
        shift1  lsl, ip, xl, r3
        shift1  lsr, xl, xl, r2
        shiftop orr xl xl xh lsl r3 lr
        shift1  lsr, xh, xh, r2
        add     r4, r4, r2
        b       LSYM(Lad_p)

#if !defined (__VFP_FP__) && !defined(__SOFTFP__)

        @ Legacy code expects the result to be returned in f0.  Copy it
        @ there as well.
LSYM(f0_ret):
        do_push {r0, r1}
        ldfd    f0, [sp], #8
        RETLDM

#endif

        FUNC_END floatdidf
        FUNC_END aeabi_l2d
        FUNC_END floatundidf
        FUNC_END aeabi_ul2d

#endif /* L_addsubdf3 */

#ifdef L_arm_muldivdf3

ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3
        do_push {r4, r5, r6, lr}

        @ Mask out exponents, trap any zero/denormal/INF/NAN.
        mov     ip, #0xff
        orr     ip, ip, #0x700
        ands    r4, ip, xh, lsr #20
        do_it   ne, tte
        COND(and,s,ne)  r5, ip, yh, lsr #20
        teqne   r4, ip
        teqne   r5, ip
        bleq    LSYM(Lml_s)

        @ Add exponents together
        add     r4, r4, r5

        @ Determine final sign.
        eor     r6, xh, yh

        @ Convert mantissa to unsigned integer.
        @ If power of two, branch to a separate path.
        bic     xh, xh, ip, lsl #21
        bic     yh, yh, ip, lsl #21
        orrs    r5, xl, xh, lsl #12
        do_it   ne
        COND(orr,s,ne)  r5, yl, yh, lsl #12
        orr     xh, xh, #0x00100000
        orr     yh, yh, #0x00100000
        beq     LSYM(Lml_1)

#if __ARM_ARCH__ < 4

        @ Put sign bit in r6, which will be restored in yl later.
        and   r6, r6, #0x80000000

        @ Well, no way to make it shorter without the umull instruction.
        stmfd   sp!, {r6, r7, r8, r9, sl, fp}
        mov     r7, xl, lsr #16
        mov     r8, yl, lsr #16
        mov     r9, xh, lsr #16
        mov     sl, yh, lsr #16
        bic     xl, xl, r7, lsl #16
        bic     yl, yl, r8, lsl #16
        bic     xh, xh, r9, lsl #16
        bic     yh, yh, sl, lsl #16
        mul     ip, xl, yl
        mul     fp, xl, r8
        mov     lr, #0
        adds    ip, ip, fp, lsl #16
        adc     lr, lr, fp, lsr #16
        mul     fp, r7, yl
        adds    ip, ip, fp, lsl #16
        adc     lr, lr, fp, lsr #16
        mul     fp, xl, sl
        mov     r5, #0
        adds    lr, lr, fp, lsl #16
        adc     r5, r5, fp, lsr #16
        mul     fp, r7, yh
        adds    lr, lr, fp, lsl #16
        adc     r5, r5, fp, lsr #16
        mul     fp, xh, r8
        adds    lr, lr, fp, lsl #16
        adc     r5, r5, fp, lsr #16
        mul     fp, r9, yl
        adds    lr, lr, fp, lsl #16
        adc     r5, r5, fp, lsr #16
        mul     fp, xh, sl
        mul     r6, r9, sl
        adds    r5, r5, fp, lsl #16
        adc     r6, r6, fp, lsr #16
        mul     fp, r9, yh
        adds    r5, r5, fp, lsl #16
        adc     r6, r6, fp, lsr #16
        mul     fp, xl, yh
        adds    lr, lr, fp
        mul     fp, r7, sl
        adcs    r5, r5, fp
        mul     fp, xh, yl
        adc     r6, r6, #0
        adds    lr, lr, fp
        mul     fp, r9, r8
        adcs    r5, r5, fp
        mul     fp, r7, r8
        adc     r6, r6, #0
        adds    lr, lr, fp
        mul     fp, xh, yh
        adcs    r5, r5, fp
        adc     r6, r6, #0
        ldmfd   sp!, {yl, r7, r8, r9, sl, fp}

#else

        @ Here is the actual multiplication.
        umull   ip, lr, xl, yl
        mov     r5, #0
        umlal   lr, r5, xh, yl
        and     yl, r6, #0x80000000
        umlal   lr, r5, xl, yh
        mov     r6, #0
        umlal   r5, r6, xh, yh

#endif

        @ The LSBs in ip are only significant for the final rounding.
        @ Fold them into lr.
        teq     ip, #0
        do_it   ne
        orrne   lr, lr, #1

        @ Adjust result upon the MSB position.
        sub     r4, r4, #0xff
        cmp     r6, #(1 << (20-11))
        sbc     r4, r4, #0x300
        bcs     1f
        movs    lr, lr, lsl #1
        adcs    r5, r5, r5
        adc     r6, r6, r6
1:
        @ Shift to final position, add sign to result.
        orr     xh, yl, r6, lsl #11
        orr     xh, xh, r5, lsr #21
        mov     xl, r5, lsl #11
        orr     xl, xl, lr, lsr #21
        mov     lr, lr, lsl #11

        @ Check exponent range for under/overflow.
        subs    ip, r4, #(254 - 1)
        do_it   hi
        cmphi   ip, #0x700
        bhi     LSYM(Lml_u)

        @ Round the result, merge final exponent.
        cmp     lr, #0x80000000
        do_it   eq
        COND(mov,s,eq)  lr, xl, lsr #1
        adcs    xl, xl, #0
        adc     xh, xh, r4, lsl #20
        RETLDM  "r4, r5, r6"

        @ Multiplication by 0x1p*: let''s shortcut a lot of code.
LSYM(Lml_1):
        and     r6, r6, #0x80000000
        orr     xh, r6, xh
        orr     xl, xl, yl
        eor     xh, xh, yh
        subs    r4, r4, ip, lsr #1
        do_it   gt, tt
        COND(rsb,s,gt)  r5, r4, ip
        orrgt   xh, xh, r4, lsl #20
        RETLDM  "r4, r5, r6" gt

        @ Under/overflow: fix things up for the code below.
        orr     xh, xh, #0x00100000
        mov     lr, #0
        subs    r4, r4, #1

LSYM(Lml_u):
        @ Overflow?
        bgt     LSYM(Lml_o)

        @ Check if denormalized result is possible, otherwise return signed 0.
        cmn     r4, #(53 + 1)
        do_it   le, tt
        movle   xl, #0
        bicle   xh, xh, #0x7fffffff
        RETLDM  "r4, r5, r6" le

        @ Find out proper shift value.
        rsb     r4, r4, #0
        subs    r4, r4, #32
        bge     2f
        adds    r4, r4, #12
        bgt     1f

        @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
        add     r4, r4, #20
        rsb     r5, r4, #32
        shift1  lsl, r3, xl, r5
        shift1  lsr, xl, xl, r4
        shiftop orr xl xl xh lsl r5 r2
        and     r2, xh, #0x80000000
        bic     xh, xh, #0x80000000
        adds    xl, xl, r3, lsr #31
        shiftop adc xh r2 xh lsr r4 r6
        orrs    lr, lr, r3, lsl #1
        do_it   eq
        biceq   xl, xl, r3, lsr #31
        RETLDM  "r4, r5, r6"

        @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
        @ a register switch from xh to xl. Then round.
1:      rsb     r4, r4, #12
        rsb     r5, r4, #32
        shift1  lsl, r3, xl, r4
        shift1  lsr, xl, xl, r5
        shiftop orr xl xl xh lsl r4 r2
        bic     xh, xh, #0x7fffffff
        adds    xl, xl, r3, lsr #31
        adc     xh, xh, #0
        orrs    lr, lr, r3, lsl #1
        do_it   eq
        biceq   xl, xl, r3, lsr #31
        RETLDM  "r4, r5, r6"

        @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
        @ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
2:      rsb     r5, r4, #32
        shiftop orr lr lr xl lsl r5 r2
        shift1  lsr, r3, xl, r4
        shiftop orr r3 r3 xh lsl r5 r2
        shift1  lsr, xl, xh, r4
        bic     xh, xh, #0x7fffffff
        shiftop bic xl xl xh lsr r4 r2
        add     xl, xl, r3, lsr #31
        orrs    lr, lr, r3, lsl #1
        do_it   eq
        biceq   xl, xl, r3, lsr #31
        RETLDM  "r4, r5, r6"

        @ One or both arguments are denormalized.
        @ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
        teq     r4, #0
        bne     2f
        and     r6, xh, #0x80000000
1:      movs    xl, xl, lsl #1
        adc     xh, xh, xh
        tst     xh, #0x00100000
        do_it   eq
        subeq   r4, r4, #1
        beq     1b
        orr     xh, xh, r6
        teq     r5, #0
        do_it   ne
        RETc(ne)
2:      and     r6, yh, #0x80000000
3:      movs    yl, yl, lsl #1
        adc     yh, yh, yh
        tst     yh, #0x00100000
        do_it   eq
        subeq   r5, r5, #1
        beq     3b
        orr     yh, yh, r6
        RET

LSYM(Lml_s):
        @ Isolate the INF and NAN cases away
        teq     r4, ip
        and     r5, ip, yh, lsr #20
        do_it   ne
        teqne   r5, ip
        beq     1f

        @ Here, one or more arguments are either denormalized or zero.
        orrs    r6, xl, xh, lsl #1
        do_it   ne
        COND(orr,s,ne)  r6, yl, yh, lsl #1
        bne     LSYM(Lml_d)

        @ Result is 0, but determine sign anyway.
LSYM(Lml_z):
        eor     xh, xh, yh
        and     xh, xh, #0x80000000
        mov     xl, #0
        RETLDM  "r4, r5, r6"

1:      @ One or both args are INF or NAN.
        orrs    r6, xl, xh, lsl #1
        do_it   eq, te
        moveq   xl, yl
        moveq   xh, yh
        COND(orr,s,ne)  r6, yl, yh, lsl #1
        beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
        teq     r4, ip
        bne     1f
        orrs    r6, xl, xh, lsl #12
        bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
1:      teq     r5, ip
        bne     LSYM(Lml_i)
        orrs    r6, yl, yh, lsl #12
        do_it   ne, t
        movne   xl, yl
        movne   xh, yh
        bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN

        @ Result is INF, but we need to determine its sign.
LSYM(Lml_i):
        eor     xh, xh, yh

        @ Overflow: return INF (sign already in xh).
LSYM(Lml_o):
        and     xh, xh, #0x80000000
        orr     xh, xh, #0x7f000000
        orr     xh, xh, #0x00f00000
        mov     xl, #0
        RETLDM  "r4, r5, r6"

        @ Return a quiet NAN.
LSYM(Lml_n):
        orr     xh, xh, #0x7f000000
        orr     xh, xh, #0x00f80000
        RETLDM  "r4, r5, r6"

        FUNC_END aeabi_dmul
        FUNC_END muldf3

ARM_FUNC_START divdf3
ARM_FUNC_ALIAS aeabi_ddiv divdf3
        
        do_push {r4, r5, r6, lr}

        @ Mask out exponents, trap any zero/denormal/INF/NAN.
        mov     ip, #0xff
        orr     ip, ip, #0x700
        ands    r4, ip, xh, lsr #20
        do_it   ne, tte
        COND(and,s,ne)  r5, ip, yh, lsr #20
        teqne   r4, ip
        teqne   r5, ip
        bleq    LSYM(Ldv_s)

        @ Substract divisor exponent from dividend''s.
        sub     r4, r4, r5

        @ Preserve final sign into lr.
        eor     lr, xh, yh

        @ Convert mantissa to unsigned integer.
        @ Dividend -> r5-r6, divisor -> yh-yl.
        orrs    r5, yl, yh, lsl #12
        mov     xh, xh, lsl #12
        beq     LSYM(Ldv_1)
        mov     yh, yh, lsl #12
        mov     r5, #0x10000000
        orr     yh, r5, yh, lsr #4
        orr     yh, yh, yl, lsr #24
        mov     yl, yl, lsl #8
        orr     r5, r5, xh, lsr #4
        orr     r5, r5, xl, lsr #24
        mov     r6, xl, lsl #8

        @ Initialize xh with final sign bit.
        and     xh, lr, #0x80000000

        @ Ensure result will land to known bit position.
        @ Apply exponent bias accordingly.
        cmp     r5, yh
        do_it   eq
        cmpeq   r6, yl
        adc     r4, r4, #(255 - 2)
        add     r4, r4, #0x300
        bcs     1f
        movs    yh, yh, lsr #1
        mov     yl, yl, rrx
1:
        @ Perform first substraction to align result to a nibble.
        subs    r6, r6, yl
        sbc     r5, r5, yh
        movs    yh, yh, lsr #1
        mov     yl, yl, rrx
        mov     xl, #0x00100000
        mov     ip, #0x00080000

        @ The actual division loop.
1:      subs    lr, r6, yl
        sbcs    lr, r5, yh
        do_it   cs, tt
        subcs   r6, r6, yl
        movcs   r5, lr
        orrcs   xl, xl, ip
        movs    yh, yh, lsr #1
        mov     yl, yl, rrx
        subs    lr, r6, yl
        sbcs    lr, r5, yh
        do_it   cs, tt
        subcs   r6, r6, yl
        movcs   r5, lr
        orrcs   xl, xl, ip, lsr #1
        movs    yh, yh, lsr #1
        mov     yl, yl, rrx
        subs    lr, r6, yl
        sbcs    lr, r5, yh
        do_it   cs, tt
        subcs   r6, r6, yl
        movcs   r5, lr
        orrcs   xl, xl, ip, lsr #2
        movs    yh, yh, lsr #1
        mov     yl, yl, rrx
        subs    lr, r6, yl
        sbcs    lr, r5, yh
        do_it   cs, tt
        subcs   r6, r6, yl
        movcs   r5, lr
        orrcs   xl, xl, ip, lsr #3

        orrs    lr, r5, r6
        beq     2f
        mov     r5, r5, lsl #4
        orr     r5, r5, r6, lsr #28
        mov     r6, r6, lsl #4
        mov     yh, yh, lsl #3
        orr     yh, yh, yl, lsr #29
        mov     yl, yl, lsl #3
        movs    ip, ip, lsr #4
        bne     1b

        @ We are done with a word of the result.
        @ Loop again for the low word if this pass was for the high word.
        tst     xh, #0x00100000
        bne     3f
        orr     xh, xh, xl
        mov     xl, #0
        mov     ip, #0x80000000
        b       1b
2:
        @ Be sure result starts in the high word.
        tst     xh, #0x00100000
        do_it   eq, t
        orreq   xh, xh, xl
        moveq   xl, #0
3:
        @ Check exponent range for under/overflow.
        subs    ip, r4, #(254 - 1)
        do_it   hi
        cmphi   ip, #0x700
        bhi     LSYM(Lml_u)

        @ Round the result, merge final exponent.
        subs    ip, r5, yh
        do_it   eq, t
        COND(sub,s,eq)  ip, r6, yl
        COND(mov,s,eq)  ip, xl, lsr #1
        adcs    xl, xl, #0
        adc     xh, xh, r4, lsl #20
        RETLDM  "r4, r5, r6"

        @ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1):
        and     lr, lr, #0x80000000
        orr     xh, lr, xh, lsr #12
        adds    r4, r4, ip, lsr #1
        do_it   gt, tt
        COND(rsb,s,gt)  r5, r4, ip
        orrgt   xh, xh, r4, lsl #20
        RETLDM  "r4, r5, r6" gt

        orr     xh, xh, #0x00100000
        mov     lr, #0
        subs    r4, r4, #1
        b       LSYM(Lml_u)

        @ Result mightt need to be denormalized: put remainder bits
        @ in lr for rounding considerations.
LSYM(Ldv_u):
        orr     lr, r5, r6
        b       LSYM(Lml_u)

        @ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
        and     r5, ip, yh, lsr #20
        teq     r4, ip
        do_it   eq
        teqeq   r5, ip
        beq     LSYM(Lml_n)             @ INF/NAN / INF/NAN -> NAN
        teq     r4, ip
        bne     1f
        orrs    r4, xl, xh, lsl #12
        bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
        teq     r5, ip
        bne     LSYM(Lml_i)             @ INF / <anything> -> INF
        mov     xl, yl
        mov     xh, yh
        b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
1:      teq     r5, ip
        bne     2f
        orrs    r5, yl, yh, lsl #12
        beq     LSYM(Lml_z)             @ <anything> / INF -> 0
        mov     xl, yl
        mov     xh, yh
        b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
2:      @ If both are nonzero, we need to normalize and resume above.
        orrs    r6, xl, xh, lsl #1
        do_it   ne
        COND(orr,s,ne)  r6, yl, yh, lsl #1
        bne     LSYM(Lml_d)
        @ One or both arguments are 0.
        orrs    r4, xl, xh, lsl #1
        bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
        orrs    r5, yl, yh, lsl #1
        bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
        b       LSYM(Lml_n)             @ 0 / 0 -> NAN

        FUNC_END aeabi_ddiv
        FUNC_END divdf3

#endif /* L_muldivdf3 */

#ifdef L_arm_cmpdf2

@ Note: only r0 (return value) and ip are clobbered here.

ARM_FUNC_START gtdf2
ARM_FUNC_ALIAS gedf2 gtdf2
        mov     ip, #-1
        b       1f

ARM_FUNC_START ltdf2
ARM_FUNC_ALIAS ledf2 ltdf2
        mov     ip, #1
        b       1f

ARM_FUNC_START cmpdf2
ARM_FUNC_ALIAS nedf2 cmpdf2
ARM_FUNC_ALIAS eqdf2 cmpdf2
        mov     ip, #1                  @ how should we specify unordered here?

1:      str     ip, [sp, #-4]!

        @ Trap any INF/NAN first.
        mov     ip, xh, lsl #1
        mvns    ip, ip, asr #21
        mov     ip, yh, lsl #1
        do_it   ne
        COND(mvn,s,ne)  ip, ip, asr #21
        beq     3f

        @ Test for equality.
        @ Note that 0.0 is equal to -0.0.
2:      add     sp, sp, #4
        orrs    ip, xl, xh, lsl #1      @ if x == 0.0 or -0.0
        do_it   eq, e
        COND(orr,s,eq)  ip, yl, yh, lsl #1      @ and y == 0.0 or -0.0
        teqne   xh, yh                  @ or xh == yh
        do_it   eq, tt
        teqeq   xl, yl                  @ and xl == yl
        moveq   r0, #0                  @ then equal.
        RETc(eq)

        @ Clear C flag
        cmn     r0, #0

        @ Compare sign, 
        teq     xh, yh

        @ Compare values if same sign
        do_it   pl
        cmppl   xh, yh
        do_it   eq
        cmpeq   xl, yl

        @ Result:
        do_it   cs, e
        movcs   r0, yh, asr #31
        mvncc   r0, yh, asr #31
        orr     r0, r0, #1
        RET

        @ Look for a NAN.
3:      mov     ip, xh, lsl #1
        mvns    ip, ip, asr #21
        bne     4f
        orrs    ip, xl, xh, lsl #12
        bne     5f                      @ x is NAN
4:      mov     ip, yh, lsl #1
        mvns    ip, ip, asr #21
        bne     2b
        orrs    ip, yl, yh, lsl #12
        beq     2b                      @ y is not NAN
5:      ldr     r0, [sp], #4            @ unordered return code
        RET

        FUNC_END gedf2
        FUNC_END gtdf2
        FUNC_END ledf2
        FUNC_END ltdf2
        FUNC_END nedf2
        FUNC_END eqdf2
        FUNC_END cmpdf2

ARM_FUNC_START aeabi_cdrcmple

        mov     ip, r0
        mov     r0, r2
        mov     r2, ip
        mov     ip, r1
        mov     r1, r3
        mov     r3, ip
        b       6f
        
ARM_FUNC_START aeabi_cdcmpeq
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq

        @ The status-returning routines are required to preserve all
        @ registers except ip, lr, and cpsr.
6:      do_push {r0, lr}
        ARM_CALL cmpdf2
        @ Set the Z flag correctly, and the C flag unconditionally.
        cmp     r0, #0
        @ Clear the C flag if the return value was -1, indicating
        @ that the first operand was smaller than the second.
        do_it   mi
        cmnmi   r0, #0
        RETLDM  "r0"

        FUNC_END aeabi_cdcmple
        FUNC_END aeabi_cdcmpeq
        FUNC_END aeabi_cdrcmple
        
ARM_FUNC_START  aeabi_dcmpeq

        str     lr, [sp, #-8]!
        ARM_CALL aeabi_cdcmple
        do_it   eq, e
        moveq   r0, #1  @ Equal to.
        movne   r0, #0  @ Less than, greater than, or unordered.
        RETLDM

        FUNC_END aeabi_dcmpeq

ARM_FUNC_START  aeabi_dcmplt

        str     lr, [sp, #-8]!
        ARM_CALL aeabi_cdcmple
        do_it   cc, e
        movcc   r0, #1  @ Less than.
        movcs   r0, #0  @ Equal to, greater than, or unordered.
        RETLDM

        FUNC_END aeabi_dcmplt

ARM_FUNC_START  aeabi_dcmple

        str     lr, [sp, #-8]!
        ARM_CALL aeabi_cdcmple
        do_it   ls, e
        movls   r0, #1  @ Less than or equal to.
        movhi   r0, #0  @ Greater than or unordered.
        RETLDM

        FUNC_END aeabi_dcmple

ARM_FUNC_START  aeabi_dcmpge

        str     lr, [sp, #-8]!
        ARM_CALL aeabi_cdrcmple
        do_it   ls, e
        movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
        movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
        RETLDM

        FUNC_END aeabi_dcmpge

ARM_FUNC_START  aeabi_dcmpgt

        str     lr, [sp, #-8]!
        ARM_CALL aeabi_cdrcmple
        do_it   cc, e
        movcc   r0, #1  @ Operand 2 is less than operand 1.
        movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
                        @ or they are unordered.
        RETLDM

        FUNC_END aeabi_dcmpgt

#endif /* L_cmpdf2 */

#ifdef L_arm_unorddf2

ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2

        mov     ip, xh, lsl #1
        mvns    ip, ip, asr #21
        bne     1f
        orrs    ip, xl, xh, lsl #12
        bne     3f                      @ x is NAN
1:      mov     ip, yh, lsl #1
        mvns    ip, ip, asr #21
        bne     2f
        orrs    ip, yl, yh, lsl #12
        bne     3f                      @ y is NAN
2:      mov     r0, #0                  @ arguments are ordered.
        RET

3:      mov     r0, #1                  @ arguments are unordered.
        RET

        FUNC_END aeabi_dcmpun
        FUNC_END unorddf2

#endif /* L_unorddf2 */

#ifdef L_arm_fixdfsi

ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi

        @ check exponent range.
        mov     r2, xh, lsl #1
        adds    r2, r2, #(1 << 21)
        bcs     2f                      @ value is INF or NAN
        bpl     1f                      @ value is too small
        mov     r3, #(0xfffffc00 + 31)
        subs    r2, r3, r2, asr #21
        bls     3f                      @ value is too large

        @ scale value
        mov     r3, xh, lsl #11
        orr     r3, r3, #0x80000000
        orr     r3, r3, xl, lsr #21
        tst     xh, #0x80000000         @ the sign bit
        shift1  lsr, r0, r3, r2
        do_it   ne
        rsbne   r0, r0, #0
        RET

1:      mov     r0, #0
        RET

2:      orrs    xl, xl, xh, lsl #12
        bne     4f                      @ x is NAN.
3:      ands    r0, xh, #0x80000000     @ the sign bit
        do_it   eq
        moveq   r0, #0x7fffffff         @ maximum signed positive si
        RET

4:      mov     r0, #0                  @ How should we convert NAN?
        RET

        FUNC_END aeabi_d2iz
        FUNC_END fixdfsi

#endif /* L_fixdfsi */

#ifdef L_arm_fixunsdfsi

ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi

        @ check exponent range.
        movs    r2, xh, lsl #1
        bcs     1f                      @ value is negative
        adds    r2, r2, #(1 << 21)
        bcs     2f                      @ value is INF or NAN
        bpl     1f                      @ value is too small
        mov     r3, #(0xfffffc00 + 31)
        subs    r2, r3, r2, asr #21
        bmi     3f                      @ value is too large

        @ scale value
        mov     r3, xh, lsl #11
        orr     r3, r3, #0x80000000
        orr     r3, r3, xl, lsr #21
        shift1  lsr, r0, r3, r2
        RET

1:      mov     r0, #0
        RET

2:      orrs    xl, xl, xh, lsl #12
        bne     4f                      @ value is NAN.
3:      mov     r0, #0xffffffff         @ maximum unsigned si
        RET

4:      mov     r0, #0                  @ How should we convert NAN?
        RET

        FUNC_END aeabi_d2uiz
        FUNC_END fixunsdfsi

#endif /* L_fixunsdfsi */

#ifdef L_arm_truncdfsf2

ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2

        @ check exponent range.
        mov     r2, xh, lsl #1
        subs    r3, r2, #((1023 - 127) << 21)
        do_it   cs, t
        COND(sub,s,cs)  ip, r3, #(1 << 21)
        COND(rsb,s,cs)  ip, ip, #(254 << 21)
        bls     2f                      @ value is out of range

1:      @ shift and round mantissa
        and     ip, xh, #0x80000000
        mov     r2, xl, lsl #3
        orr     xl, ip, xl, lsr #29
        cmp     r2, #0x80000000
        adc     r0, xl, r3, lsl #2
        do_it   eq
        biceq   r0, r0, #1
        RET

2:      @ either overflow or underflow
        tst     xh, #0x40000000
        bne     3f                      @ overflow

        @ check if denormalized value is possible
        adds    r2, r3, #(23 << 21)
        do_it   lt, t
        andlt   r0, xh, #0x80000000     @ too small, return signed 0.
        RETc(lt)

        @ denormalize value so we can resume with the code above afterwards.
        orr     xh, xh, #0x00100000
        mov     r2, r2, lsr #21
        rsb     r2, r2, #24
        rsb     ip, r2, #32
#if defined(__thumb2__)
        lsls    r3, xl, ip
#else
        movs    r3, xl, lsl ip
#endif
        shift1  lsr, xl, xl, r2
        do_it   ne
        orrne   xl, xl, #1              @ fold r3 for rounding considerations. 
        mov     r3, xh, lsl #11
        mov     r3, r3, lsr #11
        shiftop orr xl xl r3 lsl ip ip
        shift1  lsr, r3, r3, r2
        mov     r3, r3, lsl #1
        b       1b

3:      @ chech for NAN
        mvns    r3, r2, asr #21
        bne     5f                      @ simple overflow
        orrs    r3, xl, xh, lsl #12
        do_it   ne, tt
        movne   r0, #0x7f000000
        orrne   r0, r0, #0x00c00000
        RETc(ne)                        @ return NAN

5:      @ return INF with sign
        and     r0, xh, #0x80000000
        orr     r0, r0, #0x7f000000
        orr     r0, r0, #0x00800000
        RET

        FUNC_END aeabi_d2f
        FUNC_END truncdfsf2

#endif /* L_truncdfsf2 */

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.