URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [arm/] [ieee754-df.S] - Rev 734
Compare with Previous | Blame | View Log
/* ieee754-df.S double-precision floating point support for ARMCopyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.Contributed by Nicolas Pitre (nico@cam.org)This file is free software; you can redistribute it and/or modify itunder the terms of the GNU General Public License as published by theFree Software Foundation; either version 3, or (at your option) anylater version.This file is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.Under Section 7 of GPL version 3, you are granted additionalpermissions described in the GCC Runtime Library Exception, version3.1, as published by the Free Software Foundation.You should have received a copy of the GNU General Public License anda copy of the GCC Runtime Library Exception along with this program;see the files COPYING3 and COPYING.RUNTIME respectively. If not, see<http://www.gnu.org/licenses/>. *//** Notes:** The goal of this code is to be as fast as possible. This is* not meant to be easy to understand for the casual reader.* For slightly simpler code please see the single precision version* of this file.** Only the default rounding mode is intended for best performances.* Exceptions aren't supported yet, but that can be added quite easily* if necessary without impacting performances.*/@ For FPA, float words are always big-endian.@ For VFP, floats words follow the memory system mode.#if defined(__VFP_FP__) && !defined(__ARMEB__)#define xl r0#define xh r1#define yl r2#define yh r3#else#define xh r0#define xl r1#define yh r2#define yl r3#endif#ifdef L_arm_negdf2ARM_FUNC_START negdf2ARM_FUNC_ALIAS aeabi_dneg negdf2@ flip sign biteor xh, xh, #0x80000000RETFUNC_END aeabi_dnegFUNC_END negdf2#endif#ifdef L_arm_addsubdf3ARM_FUNC_START aeabi_drsubeor xh, xh, #0x80000000 @ flip sign bit of first argb 1fARM_FUNC_START subdf3ARM_FUNC_ALIAS aeabi_dsub subdf3eor yh, yh, #0x80000000 @ flip sign bit of second arg#if defined(__INTERWORKING_STUBS__)b 1f @ Skip Thumb-code prologue#endifARM_FUNC_START adddf3ARM_FUNC_ALIAS aeabi_dadd adddf31: do_push {r4, r5, lr}@ Look for zeroes, equal values, INF, or NAN.shift1 lsl, r4, xh, #1shift1 lsl, r5, yh, #1teq r4, r5do_it eqteqeq xl, yldo_it ne, tttCOND(orr,s,ne) ip, r4, xlCOND(orr,s,ne) ip, r5, ylCOND(mvn,s,ne) ip, r4, asr #21COND(mvn,s,ne) ip, r5, asr #21beq LSYM(Lad_s)@ Compute exponent difference. Make largest exponent in r4,@ corresponding arg in xh-xl, and positive exponent difference in r5.shift1 lsr, r4, r4, #21rsbs r5, r4, r5, lsr #21do_it ltrsblt r5, r5, #0ble 1fadd r4, r4, r5eor yl, xl, yleor yh, xh, yheor xl, yl, xleor xh, yh, xheor yl, xl, yleor yh, xh, yh1:@ If exponent difference is too large, return largest argument@ already in xh-xl. We need up to 54 bit to handle proper rounding@ of 0x1p54 - 1.1.cmp r5, #54do_it hiRETLDM "r4, r5" hi@ Convert mantissa to signed integer.tst xh, #0x80000000mov xh, xh, lsl #12mov ip, #0x00100000orr xh, ip, xh, lsr #12beq 1f#if defined(__thumb2__)negs xl, xlsbc xh, xh, xh, lsl #1#elsersbs xl, xl, #0rsc xh, xh, #0#endif1:tst yh, #0x80000000mov yh, yh, lsl #12orr yh, ip, yh, lsr #12beq 1f#if defined(__thumb2__)negs yl, ylsbc yh, yh, yh, lsl #1#elsersbs yl, yl, #0rsc yh, yh, #0#endif1:@ If exponent == difference, one or both args were denormalized.@ Since this is not common case, rescale them off line.teq r4, r5beq LSYM(Lad_d)LSYM(Lad_x):@ Compensate for the exponent overlapping the mantissa MSB added latersub r4, r4, #1@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.rsbs lr, r5, #32blt 1fshift1 lsl, ip, yl, lrshiftop adds xl xl yl lsr r5 yladc xh, xh, #0shiftop adds xl xl yh lsl lr ylshiftop adcs xh xh yh asr r5 yhb 2f1: sub r5, r5, #32add lr, lr, #32cmp yl, #1shift1 lsl,ip, yh, lrdo_it csorrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 latershiftop adds xl xl yh asr r5 yhadcs xh, xh, yh, asr #312:@ We now have a result in xh-xl-ip.@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)and r5, xh, #0x80000000bpl LSYM(Lad_p)#if defined(__thumb2__)mov lr, #0negs ip, ipsbcs xl, lr, xlsbc xh, lr, xh#elsersbs ip, ip, #0rscs xl, xl, #0rsc xh, xh, #0#endif@ Determine how to normalize the result.LSYM(Lad_p):cmp xh, #0x00100000bcc LSYM(Lad_a)cmp xh, #0x00200000bcc LSYM(Lad_e)@ Result needs to be shifted right.movs xh, xh, lsr #1movs xl, xl, rrxmov ip, ip, rrxadd r4, r4, #1@ Make sure we did not bust our exponent.mov r2, r4, lsl #21cmn r2, #(2 << 21)bcs LSYM(Lad_o)@ Our result is now properly aligned into xh-xl, remaining bits in ip.@ Round with MSB of ip. If halfway between two numbers, round towards@ LSB of xl = 0.@ Pack final result together.LSYM(Lad_e):cmp ip, #0x80000000do_it eqCOND(mov,s,eq) ip, xl, lsr #1adcs xl, xl, #0adc xh, xh, r4, lsl #20orr xh, xh, r5RETLDM "r4, r5"@ Result must be shifted left and exponent adjusted.LSYM(Lad_a):movs ip, ip, lsl #1adcs xl, xl, xladc xh, xh, xhtst xh, #0x00100000sub r4, r4, #1bne LSYM(Lad_e)@ No rounding necessary since ip will always be 0 at this point.LSYM(Lad_l):#if __ARM_ARCH__ < 5teq xh, #0movne r3, #20moveq r3, #52moveq xh, xlmoveq xl, #0mov r2, xhcmp r2, #(1 << 16)movhs r2, r2, lsr #16subhs r3, r3, #16cmp r2, #(1 << 8)movhs r2, r2, lsr #8subhs r3, r3, #8cmp r2, #(1 << 4)movhs r2, r2, lsr #4subhs r3, r3, #4cmp r2, #(1 << 2)subhs r3, r3, #2sublo r3, r3, r2, lsr #1sub r3, r3, r2, lsr #3#elseteq xh, #0do_it eq, tmoveq xh, xlmoveq xl, #0clz r3, xhdo_it eqaddeq r3, r3, #32sub r3, r3, #11#endif@ determine how to shift the value.subs r2, r3, #32bge 2fadds r2, r2, #12ble 1f@ shift value left 21 to 31 bits, or actually right 11 to 1 bits@ since a register switch happened above.add ip, r2, #20rsb r2, r2, #12shift1 lsl, xl, xh, ipshift1 lsr, xh, xh, r2b 3f@ actually shift value left 1 to 20 bits, which might also represent@ 32 to 52 bits if counting the register switch that happened earlier.1: add r2, r2, #202: do_it lersble ip, r2, #32shift1 lsl, xh, xh, r2#if defined(__thumb2__)lsr ip, xl, ipitt leorrle xh, xh, iplslle xl, xl, r2#elseorrle xh, xh, xl, lsr ipmovle xl, xl, lsl r2#endif@ adjust exponent accordingly.3: subs r4, r4, r3do_it ge, ttaddge xh, xh, r4, lsl #20orrge xh, xh, r5RETLDM "r4, r5" ge@ Exponent too small, denormalize result.@ Find out proper shift value.mvn r4, r4subs r4, r4, #31bge 2fadds r4, r4, #12bgt 1f@ shift result right of 1 to 20 bits, sign is in r5.add r4, r4, #20rsb r2, r4, #32shift1 lsr, xl, xl, r4shiftop orr xl xl xh lsl r2 yhshiftop orr xh r5 xh lsr r4 yhRETLDM "r4, r5"@ shift result right of 21 to 31 bits, or left 11 to 1 bits after@ a register switch from xh to xl.1: rsb r4, r4, #12rsb r2, r4, #32shift1 lsr, xl, xl, r2shiftop orr xl xl xh lsl r4 yhmov xh, r5RETLDM "r4, r5"@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch@ from xh to xl.2: shift1 lsr, xl, xh, r4mov xh, r5RETLDM "r4, r5"@ Adjust exponents for denormalized arguments.@ Note that r4 must not remain equal to 0.LSYM(Lad_d):teq r4, #0eor yh, yh, #0x00100000do_it eq, teeoreq xh, xh, #0x00100000addeq r4, r4, #1subne r5, r5, #1b LSYM(Lad_x)LSYM(Lad_s):mvns ip, r4, asr #21do_it neCOND(mvn,s,ne) ip, r5, asr #21beq LSYM(Lad_i)teq r4, r5do_it eqteqeq xl, ylbeq 1f@ Result is x + 0.0 = x or 0.0 + y = y.orrs ip, r4, xldo_it eq, tmoveq xh, yhmoveq xl, ylRETLDM "r4, r5"1: teq xh, yh@ Result is x - x = 0.do_it ne, ttmovne xh, #0movne xl, #0RETLDM "r4, r5" ne@ Result is x + x = 2x.movs ip, r4, lsr #21bne 2fmovs xl, xl, lsl #1adcs xh, xh, xhdo_it csorrcs xh, xh, #0x80000000RETLDM "r4, r5"2: adds r4, r4, #(2 << 21)do_it cc, taddcc xh, xh, #(1 << 20)RETLDM "r4, r5" ccand r5, xh, #0x80000000@ Overflow: return INF.LSYM(Lad_o):orr xh, r5, #0x7f000000orr xh, xh, #0x00f00000mov xl, #0RETLDM "r4, r5"@ At least one of x or y is INF/NAN.@ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)@ if either is NAN: return NAN@ if opposite sign: return NAN@ otherwise return xh-xl (which is INF or -INF)LSYM(Lad_i):mvns ip, r4, asr #21do_it ne, temovne xh, yhmovne xl, ylCOND(mvn,s,eq) ip, r5, asr #21do_it ne, tmovne yh, xhmovne yl, xlorrs r4, xl, xh, lsl #12do_it eq, teCOND(orr,s,eq) r5, yl, yh, lsl #12teqeq xh, yhorrne xh, xh, #0x00080000 @ quiet NANRETLDM "r4, r5"FUNC_END aeabi_dsubFUNC_END subdf3FUNC_END aeabi_daddFUNC_END adddf3ARM_FUNC_START floatunsidfARM_FUNC_ALIAS aeabi_ui2d floatunsidfteq r0, #0do_it eq, tmoveq r1, #0RETc(eq)do_push {r4, r5, lr}mov r4, #0x400 @ initial exponentadd r4, r4, #(52-1 - 1)mov r5, #0 @ sign bit is 0.ifnc xl, r0mov xl, r0.endifmov xh, #0b LSYM(Lad_l)FUNC_END aeabi_ui2dFUNC_END floatunsidfARM_FUNC_START floatsidfARM_FUNC_ALIAS aeabi_i2d floatsidfteq r0, #0do_it eq, tmoveq r1, #0RETc(eq)do_push {r4, r5, lr}mov r4, #0x400 @ initial exponentadd r4, r4, #(52-1 - 1)ands r5, r0, #0x80000000 @ sign bit in r5do_it mirsbmi r0, r0, #0 @ absolute value.ifnc xl, r0mov xl, r0.endifmov xh, #0b LSYM(Lad_l)FUNC_END aeabi_i2dFUNC_END floatsidfARM_FUNC_START extendsfdf2ARM_FUNC_ALIAS aeabi_f2d extendsfdf2movs r2, r0, lsl #1 @ toss sign bitmov xh, r2, asr #3 @ stretch exponentmov xh, xh, rrx @ retrieve sign bitmov xl, r2, lsl #28 @ retrieve remaining bitsdo_it ne, tttCOND(and,s,ne) r3, r2, #0xff000000 @ isolate exponentteqne r3, #0xff000000 @ if not 0, check if INF or NANeorne xh, xh, #0x38000000 @ fixup exponent otherwise.RETc(ne) @ and return it.teq r2, #0 @ if actually 0do_it ne, eteqne r3, #0xff000000 @ or INF or NANRETc(eq) @ we are done already.@ value was denormalized. We can normalize it now.do_push {r4, r5, lr}mov r4, #0x380 @ setup corresponding exponentand r5, xh, #0x80000000 @ move sign bit in r5bic xh, xh, #0x80000000b LSYM(Lad_l)FUNC_END aeabi_f2dFUNC_END extendsfdf2ARM_FUNC_START floatundidfARM_FUNC_ALIAS aeabi_ul2d floatundidforrs r2, r0, r1#if !defined (__VFP_FP__) && !defined(__SOFTFP__)do_it eq, tmvfeqd f0, #0.0#elsedo_it eq#endifRETc(eq)#if !defined (__VFP_FP__) && !defined(__SOFTFP__)@ For hard FPA code we want to return via the tail below so that@ we can return the result in f0 as well as in r0/r1 for backwards@ compatibility.adr ip, LSYM(f0_ret)@ Push pc as well so that RETLDM works correctly.do_push {r4, r5, ip, lr, pc}#elsedo_push {r4, r5, lr}#endifmov r5, #0b 2fARM_FUNC_START floatdidfARM_FUNC_ALIAS aeabi_l2d floatdidforrs r2, r0, r1#if !defined (__VFP_FP__) && !defined(__SOFTFP__)do_it eq, tmvfeqd f0, #0.0#elsedo_it eq#endifRETc(eq)#if !defined (__VFP_FP__) && !defined(__SOFTFP__)@ For hard FPA code we want to return via the tail below so that@ we can return the result in f0 as well as in r0/r1 for backwards@ compatibility.adr ip, LSYM(f0_ret)@ Push pc as well so that RETLDM works correctly.do_push {r4, r5, ip, lr, pc}#elsedo_push {r4, r5, lr}#endifands r5, ah, #0x80000000 @ sign bit in r5bpl 2f#if defined(__thumb2__)negs al, alsbc ah, ah, ah, lsl #1#elsersbs al, al, #0rsc ah, ah, #0#endif2:mov r4, #0x400 @ initial exponentadd r4, r4, #(52-1 - 1)@ FPA little-endian: must swap the word order..ifnc xh, ahmov ip, almov xh, ahmov xl, ip.endifmovs ip, xh, lsr #22beq LSYM(Lad_p)@ The value is too big. Scale it down a bit...mov r2, #3movs ip, ip, lsr #3do_it neaddne r2, r2, #3movs ip, ip, lsr #3do_it neaddne r2, r2, #3add r2, r2, ip, lsr #3rsb r3, r2, #32shift1 lsl, ip, xl, r3shift1 lsr, xl, xl, r2shiftop orr xl xl xh lsl r3 lrshift1 lsr, xh, xh, r2add r4, r4, r2b LSYM(Lad_p)#if !defined (__VFP_FP__) && !defined(__SOFTFP__)@ Legacy code expects the result to be returned in f0. Copy it@ there as well.LSYM(f0_ret):do_push {r0, r1}ldfd f0, [sp], #8RETLDM#endifFUNC_END floatdidfFUNC_END aeabi_l2dFUNC_END floatundidfFUNC_END aeabi_ul2d#endif /* L_addsubdf3 */#ifdef L_arm_muldivdf3ARM_FUNC_START muldf3ARM_FUNC_ALIAS aeabi_dmul muldf3do_push {r4, r5, r6, lr}@ Mask out exponents, trap any zero/denormal/INF/NAN.mov ip, #0xfforr ip, ip, #0x700ands r4, ip, xh, lsr #20do_it ne, tteCOND(and,s,ne) r5, ip, yh, lsr #20teqne r4, ipteqne r5, ipbleq LSYM(Lml_s)@ Add exponents togetheradd r4, r4, r5@ Determine final sign.eor r6, xh, yh@ Convert mantissa to unsigned integer.@ If power of two, branch to a separate path.bic xh, xh, ip, lsl #21bic yh, yh, ip, lsl #21orrs r5, xl, xh, lsl #12do_it neCOND(orr,s,ne) r5, yl, yh, lsl #12orr xh, xh, #0x00100000orr yh, yh, #0x00100000beq LSYM(Lml_1)#if __ARM_ARCH__ < 4@ Put sign bit in r6, which will be restored in yl later.and r6, r6, #0x80000000@ Well, no way to make it shorter without the umull instruction.stmfd sp!, {r6, r7, r8, r9, sl, fp}mov r7, xl, lsr #16mov r8, yl, lsr #16mov r9, xh, lsr #16mov sl, yh, lsr #16bic xl, xl, r7, lsl #16bic yl, yl, r8, lsl #16bic xh, xh, r9, lsl #16bic yh, yh, sl, lsl #16mul ip, xl, ylmul fp, xl, r8mov lr, #0adds ip, ip, fp, lsl #16adc lr, lr, fp, lsr #16mul fp, r7, yladds ip, ip, fp, lsl #16adc lr, lr, fp, lsr #16mul fp, xl, slmov r5, #0adds lr, lr, fp, lsl #16adc r5, r5, fp, lsr #16mul fp, r7, yhadds lr, lr, fp, lsl #16adc r5, r5, fp, lsr #16mul fp, xh, r8adds lr, lr, fp, lsl #16adc r5, r5, fp, lsr #16mul fp, r9, yladds lr, lr, fp, lsl #16adc r5, r5, fp, lsr #16mul fp, xh, slmul r6, r9, sladds r5, r5, fp, lsl #16adc r6, r6, fp, lsr #16mul fp, r9, yhadds r5, r5, fp, lsl #16adc r6, r6, fp, lsr #16mul fp, xl, yhadds lr, lr, fpmul fp, r7, sladcs r5, r5, fpmul fp, xh, yladc r6, r6, #0adds lr, lr, fpmul fp, r9, r8adcs r5, r5, fpmul fp, r7, r8adc r6, r6, #0adds lr, lr, fpmul fp, xh, yhadcs r5, r5, fpadc r6, r6, #0ldmfd sp!, {yl, r7, r8, r9, sl, fp}#else@ Here is the actual multiplication.umull ip, lr, xl, ylmov r5, #0umlal lr, r5, xh, yland yl, r6, #0x80000000umlal lr, r5, xl, yhmov r6, #0umlal r5, r6, xh, yh#endif@ The LSBs in ip are only significant for the final rounding.@ Fold them into lr.teq ip, #0do_it neorrne lr, lr, #1@ Adjust result upon the MSB position.sub r4, r4, #0xffcmp r6, #(1 << (20-11))sbc r4, r4, #0x300bcs 1fmovs lr, lr, lsl #1adcs r5, r5, r5adc r6, r6, r61:@ Shift to final position, add sign to result.orr xh, yl, r6, lsl #11orr xh, xh, r5, lsr #21mov xl, r5, lsl #11orr xl, xl, lr, lsr #21mov lr, lr, lsl #11@ Check exponent range for under/overflow.subs ip, r4, #(254 - 1)do_it hicmphi ip, #0x700bhi LSYM(Lml_u)@ Round the result, merge final exponent.cmp lr, #0x80000000do_it eqCOND(mov,s,eq) lr, xl, lsr #1adcs xl, xl, #0adc xh, xh, r4, lsl #20RETLDM "r4, r5, r6"@ Multiplication by 0x1p*: let''s shortcut a lot of code.LSYM(Lml_1):and r6, r6, #0x80000000orr xh, r6, xhorr xl, xl, yleor xh, xh, yhsubs r4, r4, ip, lsr #1do_it gt, ttCOND(rsb,s,gt) r5, r4, iporrgt xh, xh, r4, lsl #20RETLDM "r4, r5, r6" gt@ Under/overflow: fix things up for the code below.orr xh, xh, #0x00100000mov lr, #0subs r4, r4, #1LSYM(Lml_u):@ Overflow?bgt LSYM(Lml_o)@ Check if denormalized result is possible, otherwise return signed 0.cmn r4, #(53 + 1)do_it le, ttmovle xl, #0bicle xh, xh, #0x7fffffffRETLDM "r4, r5, r6" le@ Find out proper shift value.rsb r4, r4, #0subs r4, r4, #32bge 2fadds r4, r4, #12bgt 1f@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.add r4, r4, #20rsb r5, r4, #32shift1 lsl, r3, xl, r5shift1 lsr, xl, xl, r4shiftop orr xl xl xh lsl r5 r2and r2, xh, #0x80000000bic xh, xh, #0x80000000adds xl, xl, r3, lsr #31shiftop adc xh r2 xh lsr r4 r6orrs lr, lr, r3, lsl #1do_it eqbiceq xl, xl, r3, lsr #31RETLDM "r4, r5, r6"@ shift result right of 21 to 31 bits, or left 11 to 1 bits after@ a register switch from xh to xl. Then round.1: rsb r4, r4, #12rsb r5, r4, #32shift1 lsl, r3, xl, r4shift1 lsr, xl, xl, r5shiftop orr xl xl xh lsl r4 r2bic xh, xh, #0x7fffffffadds xl, xl, r3, lsr #31adc xh, xh, #0orrs lr, lr, r3, lsl #1do_it eqbiceq xl, xl, r3, lsr #31RETLDM "r4, r5, r6"@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch@ from xh to xl. Leftover bits are in r3-r6-lr for rounding.2: rsb r5, r4, #32shiftop orr lr lr xl lsl r5 r2shift1 lsr, r3, xl, r4shiftop orr r3 r3 xh lsl r5 r2shift1 lsr, xl, xh, r4bic xh, xh, #0x7fffffffshiftop bic xl xl xh lsr r4 r2add xl, xl, r3, lsr #31orrs lr, lr, r3, lsl #1do_it eqbiceq xl, xl, r3, lsr #31RETLDM "r4, r5, r6"@ One or both arguments are denormalized.@ Scale them leftwards and preserve sign bit.LSYM(Lml_d):teq r4, #0bne 2fand r6, xh, #0x800000001: movs xl, xl, lsl #1adc xh, xh, xhtst xh, #0x00100000do_it eqsubeq r4, r4, #1beq 1borr xh, xh, r6teq r5, #0do_it neRETc(ne)2: and r6, yh, #0x800000003: movs yl, yl, lsl #1adc yh, yh, yhtst yh, #0x00100000do_it eqsubeq r5, r5, #1beq 3borr yh, yh, r6RETLSYM(Lml_s):@ Isolate the INF and NAN cases awayteq r4, ipand r5, ip, yh, lsr #20do_it neteqne r5, ipbeq 1f@ Here, one or more arguments are either denormalized or zero.orrs r6, xl, xh, lsl #1do_it neCOND(orr,s,ne) r6, yl, yh, lsl #1bne LSYM(Lml_d)@ Result is 0, but determine sign anyway.LSYM(Lml_z):eor xh, xh, yhand xh, xh, #0x80000000mov xl, #0RETLDM "r4, r5, r6"1: @ One or both args are INF or NAN.orrs r6, xl, xh, lsl #1do_it eq, temoveq xl, ylmoveq xh, yhCOND(orr,s,ne) r6, yl, yh, lsl #1beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NANteq r4, ipbne 1forrs r6, xl, xh, lsl #12bne LSYM(Lml_n) @ NAN * <anything> -> NAN1: teq r5, ipbne LSYM(Lml_i)orrs r6, yl, yh, lsl #12do_it ne, tmovne xl, ylmovne xh, yhbne LSYM(Lml_n) @ <anything> * NAN -> NAN@ Result is INF, but we need to determine its sign.LSYM(Lml_i):eor xh, xh, yh@ Overflow: return INF (sign already in xh).LSYM(Lml_o):and xh, xh, #0x80000000orr xh, xh, #0x7f000000orr xh, xh, #0x00f00000mov xl, #0RETLDM "r4, r5, r6"@ Return a quiet NAN.LSYM(Lml_n):orr xh, xh, #0x7f000000orr xh, xh, #0x00f80000RETLDM "r4, r5, r6"FUNC_END aeabi_dmulFUNC_END muldf3ARM_FUNC_START divdf3ARM_FUNC_ALIAS aeabi_ddiv divdf3do_push {r4, r5, r6, lr}@ Mask out exponents, trap any zero/denormal/INF/NAN.mov ip, #0xfforr ip, ip, #0x700ands r4, ip, xh, lsr #20do_it ne, tteCOND(and,s,ne) r5, ip, yh, lsr #20teqne r4, ipteqne r5, ipbleq LSYM(Ldv_s)@ Substract divisor exponent from dividend''s.sub r4, r4, r5@ Preserve final sign into lr.eor lr, xh, yh@ Convert mantissa to unsigned integer.@ Dividend -> r5-r6, divisor -> yh-yl.orrs r5, yl, yh, lsl #12mov xh, xh, lsl #12beq LSYM(Ldv_1)mov yh, yh, lsl #12mov r5, #0x10000000orr yh, r5, yh, lsr #4orr yh, yh, yl, lsr #24mov yl, yl, lsl #8orr r5, r5, xh, lsr #4orr r5, r5, xl, lsr #24mov r6, xl, lsl #8@ Initialize xh with final sign bit.and xh, lr, #0x80000000@ Ensure result will land to known bit position.@ Apply exponent bias accordingly.cmp r5, yhdo_it eqcmpeq r6, yladc r4, r4, #(255 - 2)add r4, r4, #0x300bcs 1fmovs yh, yh, lsr #1mov yl, yl, rrx1:@ Perform first substraction to align result to a nibble.subs r6, r6, ylsbc r5, r5, yhmovs yh, yh, lsr #1mov yl, yl, rrxmov xl, #0x00100000mov ip, #0x00080000@ The actual division loop.1: subs lr, r6, ylsbcs lr, r5, yhdo_it cs, ttsubcs r6, r6, ylmovcs r5, lrorrcs xl, xl, ipmovs yh, yh, lsr #1mov yl, yl, rrxsubs lr, r6, ylsbcs lr, r5, yhdo_it cs, ttsubcs r6, r6, ylmovcs r5, lrorrcs xl, xl, ip, lsr #1movs yh, yh, lsr #1mov yl, yl, rrxsubs lr, r6, ylsbcs lr, r5, yhdo_it cs, ttsubcs r6, r6, ylmovcs r5, lrorrcs xl, xl, ip, lsr #2movs yh, yh, lsr #1mov yl, yl, rrxsubs lr, r6, ylsbcs lr, r5, yhdo_it cs, ttsubcs r6, r6, ylmovcs r5, lrorrcs xl, xl, ip, lsr #3orrs lr, r5, r6beq 2fmov r5, r5, lsl #4orr r5, r5, r6, lsr #28mov r6, r6, lsl #4mov yh, yh, lsl #3orr yh, yh, yl, lsr #29mov yl, yl, lsl #3movs ip, ip, lsr #4bne 1b@ We are done with a word of the result.@ Loop again for the low word if this pass was for the high word.tst xh, #0x00100000bne 3forr xh, xh, xlmov xl, #0mov ip, #0x80000000b 1b2:@ Be sure result starts in the high word.tst xh, #0x00100000do_it eq, torreq xh, xh, xlmoveq xl, #03:@ Check exponent range for under/overflow.subs ip, r4, #(254 - 1)do_it hicmphi ip, #0x700bhi LSYM(Lml_u)@ Round the result, merge final exponent.subs ip, r5, yhdo_it eq, tCOND(sub,s,eq) ip, r6, ylCOND(mov,s,eq) ip, xl, lsr #1adcs xl, xl, #0adc xh, xh, r4, lsl #20RETLDM "r4, r5, r6"@ Division by 0x1p*: shortcut a lot of code.LSYM(Ldv_1):and lr, lr, #0x80000000orr xh, lr, xh, lsr #12adds r4, r4, ip, lsr #1do_it gt, ttCOND(rsb,s,gt) r5, r4, iporrgt xh, xh, r4, lsl #20RETLDM "r4, r5, r6" gtorr xh, xh, #0x00100000mov lr, #0subs r4, r4, #1b LSYM(Lml_u)@ Result mightt need to be denormalized: put remainder bits@ in lr for rounding considerations.LSYM(Ldv_u):orr lr, r5, r6b LSYM(Lml_u)@ One or both arguments is either INF, NAN or zero.LSYM(Ldv_s):and r5, ip, yh, lsr #20teq r4, ipdo_it eqteqeq r5, ipbeq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NANteq r4, ipbne 1forrs r4, xl, xh, lsl #12bne LSYM(Lml_n) @ NAN / <anything> -> NANteq r5, ipbne LSYM(Lml_i) @ INF / <anything> -> INFmov xl, ylmov xh, yhb LSYM(Lml_n) @ INF / (INF or NAN) -> NAN1: teq r5, ipbne 2forrs r5, yl, yh, lsl #12beq LSYM(Lml_z) @ <anything> / INF -> 0mov xl, ylmov xh, yhb LSYM(Lml_n) @ <anything> / NAN -> NAN2: @ If both are nonzero, we need to normalize and resume above.orrs r6, xl, xh, lsl #1do_it neCOND(orr,s,ne) r6, yl, yh, lsl #1bne LSYM(Lml_d)@ One or both arguments are 0.orrs r4, xl, xh, lsl #1bne LSYM(Lml_i) @ <non_zero> / 0 -> INForrs r5, yl, yh, lsl #1bne LSYM(Lml_z) @ 0 / <non_zero> -> 0b LSYM(Lml_n) @ 0 / 0 -> NANFUNC_END aeabi_ddivFUNC_END divdf3#endif /* L_muldivdf3 */#ifdef L_arm_cmpdf2@ Note: only r0 (return value) and ip are clobbered here.ARM_FUNC_START gtdf2ARM_FUNC_ALIAS gedf2 gtdf2mov ip, #-1b 1fARM_FUNC_START ltdf2ARM_FUNC_ALIAS ledf2 ltdf2mov ip, #1b 1fARM_FUNC_START cmpdf2ARM_FUNC_ALIAS nedf2 cmpdf2ARM_FUNC_ALIAS eqdf2 cmpdf2mov ip, #1 @ how should we specify unordered here?1: str ip, [sp, #-4]!@ Trap any INF/NAN first.mov ip, xh, lsl #1mvns ip, ip, asr #21mov ip, yh, lsl #1do_it neCOND(mvn,s,ne) ip, ip, asr #21beq 3f@ Test for equality.@ Note that 0.0 is equal to -0.0.2: add sp, sp, #4orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0do_it eq, eCOND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0teqne xh, yh @ or xh == yhdo_it eq, ttteqeq xl, yl @ and xl == ylmoveq r0, #0 @ then equal.RETc(eq)@ Clear C flagcmn r0, #0@ Compare sign,teq xh, yh@ Compare values if same signdo_it plcmppl xh, yhdo_it eqcmpeq xl, yl@ Result:do_it cs, emovcs r0, yh, asr #31mvncc r0, yh, asr #31orr r0, r0, #1RET@ Look for a NAN.3: mov ip, xh, lsl #1mvns ip, ip, asr #21bne 4forrs ip, xl, xh, lsl #12bne 5f @ x is NAN4: mov ip, yh, lsl #1mvns ip, ip, asr #21bne 2borrs ip, yl, yh, lsl #12beq 2b @ y is not NAN5: ldr r0, [sp], #4 @ unordered return codeRETFUNC_END gedf2FUNC_END gtdf2FUNC_END ledf2FUNC_END ltdf2FUNC_END nedf2FUNC_END eqdf2FUNC_END cmpdf2ARM_FUNC_START aeabi_cdrcmplemov ip, r0mov r0, r2mov r2, ipmov ip, r1mov r1, r3mov r3, ipb 6fARM_FUNC_START aeabi_cdcmpeqARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq@ The status-returning routines are required to preserve all@ registers except ip, lr, and cpsr.6: do_push {r0, lr}ARM_CALL cmpdf2@ Set the Z flag correctly, and the C flag unconditionally.cmp r0, #0@ Clear the C flag if the return value was -1, indicating@ that the first operand was smaller than the second.do_it micmnmi r0, #0RETLDM "r0"FUNC_END aeabi_cdcmpleFUNC_END aeabi_cdcmpeqFUNC_END aeabi_cdrcmpleARM_FUNC_START aeabi_dcmpeqstr lr, [sp, #-8]!ARM_CALL aeabi_cdcmpledo_it eq, emoveq r0, #1 @ Equal to.movne r0, #0 @ Less than, greater than, or unordered.RETLDMFUNC_END aeabi_dcmpeqARM_FUNC_START aeabi_dcmpltstr lr, [sp, #-8]!ARM_CALL aeabi_cdcmpledo_it cc, emovcc r0, #1 @ Less than.movcs r0, #0 @ Equal to, greater than, or unordered.RETLDMFUNC_END aeabi_dcmpltARM_FUNC_START aeabi_dcmplestr lr, [sp, #-8]!ARM_CALL aeabi_cdcmpledo_it ls, emovls r0, #1 @ Less than or equal to.movhi r0, #0 @ Greater than or unordered.RETLDMFUNC_END aeabi_dcmpleARM_FUNC_START aeabi_dcmpgestr lr, [sp, #-8]!ARM_CALL aeabi_cdrcmpledo_it ls, emovls r0, #1 @ Operand 2 is less than or equal to operand 1.movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.RETLDMFUNC_END aeabi_dcmpgeARM_FUNC_START aeabi_dcmpgtstr lr, [sp, #-8]!ARM_CALL aeabi_cdrcmpledo_it cc, emovcc r0, #1 @ Operand 2 is less than operand 1.movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,@ or they are unordered.RETLDMFUNC_END aeabi_dcmpgt#endif /* L_cmpdf2 */#ifdef L_arm_unorddf2ARM_FUNC_START unorddf2ARM_FUNC_ALIAS aeabi_dcmpun unorddf2mov ip, xh, lsl #1mvns ip, ip, asr #21bne 1forrs ip, xl, xh, lsl #12bne 3f @ x is NAN1: mov ip, yh, lsl #1mvns ip, ip, asr #21bne 2forrs ip, yl, yh, lsl #12bne 3f @ y is NAN2: mov r0, #0 @ arguments are ordered.RET3: mov r0, #1 @ arguments are unordered.RETFUNC_END aeabi_dcmpunFUNC_END unorddf2#endif /* L_unorddf2 */#ifdef L_arm_fixdfsiARM_FUNC_START fixdfsiARM_FUNC_ALIAS aeabi_d2iz fixdfsi@ check exponent range.mov r2, xh, lsl #1adds r2, r2, #(1 << 21)bcs 2f @ value is INF or NANbpl 1f @ value is too smallmov r3, #(0xfffffc00 + 31)subs r2, r3, r2, asr #21bls 3f @ value is too large@ scale valuemov r3, xh, lsl #11orr r3, r3, #0x80000000orr r3, r3, xl, lsr #21tst xh, #0x80000000 @ the sign bitshift1 lsr, r0, r3, r2do_it nersbne r0, r0, #0RET1: mov r0, #0RET2: orrs xl, xl, xh, lsl #12bne 4f @ x is NAN.3: ands r0, xh, #0x80000000 @ the sign bitdo_it eqmoveq r0, #0x7fffffff @ maximum signed positive siRET4: mov r0, #0 @ How should we convert NAN?RETFUNC_END aeabi_d2izFUNC_END fixdfsi#endif /* L_fixdfsi */#ifdef L_arm_fixunsdfsiARM_FUNC_START fixunsdfsiARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi@ check exponent range.movs r2, xh, lsl #1bcs 1f @ value is negativeadds r2, r2, #(1 << 21)bcs 2f @ value is INF or NANbpl 1f @ value is too smallmov r3, #(0xfffffc00 + 31)subs r2, r3, r2, asr #21bmi 3f @ value is too large@ scale valuemov r3, xh, lsl #11orr r3, r3, #0x80000000orr r3, r3, xl, lsr #21shift1 lsr, r0, r3, r2RET1: mov r0, #0RET2: orrs xl, xl, xh, lsl #12bne 4f @ value is NAN.3: mov r0, #0xffffffff @ maximum unsigned siRET4: mov r0, #0 @ How should we convert NAN?RETFUNC_END aeabi_d2uizFUNC_END fixunsdfsi#endif /* L_fixunsdfsi */#ifdef L_arm_truncdfsf2ARM_FUNC_START truncdfsf2ARM_FUNC_ALIAS aeabi_d2f truncdfsf2@ check exponent range.mov r2, xh, lsl #1subs r3, r2, #((1023 - 127) << 21)do_it cs, tCOND(sub,s,cs) ip, r3, #(1 << 21)COND(rsb,s,cs) ip, ip, #(254 << 21)bls 2f @ value is out of range1: @ shift and round mantissaand ip, xh, #0x80000000mov r2, xl, lsl #3orr xl, ip, xl, lsr #29cmp r2, #0x80000000adc r0, xl, r3, lsl #2do_it eqbiceq r0, r0, #1RET2: @ either overflow or underflowtst xh, #0x40000000bne 3f @ overflow@ check if denormalized value is possibleadds r2, r3, #(23 << 21)do_it lt, tandlt r0, xh, #0x80000000 @ too small, return signed 0.RETc(lt)@ denormalize value so we can resume with the code above afterwards.orr xh, xh, #0x00100000mov r2, r2, lsr #21rsb r2, r2, #24rsb ip, r2, #32#if defined(__thumb2__)lsls r3, xl, ip#elsemovs r3, xl, lsl ip#endifshift1 lsr, xl, xl, r2do_it neorrne xl, xl, #1 @ fold r3 for rounding considerations.mov r3, xh, lsl #11mov r3, r3, lsr #11shiftop orr xl xl r3 lsl ip ipshift1 lsr, r3, r3, r2mov r3, r3, lsl #1b 1b3: @ chech for NANmvns r3, r2, asr #21bne 5f @ simple overfloworrs r3, xl, xh, lsl #12do_it ne, ttmovne r0, #0x7f000000orrne r0, r0, #0x00c00000RETc(ne) @ return NAN5: @ return INF with signand r0, xh, #0x80000000orr r0, r0, #0x7f000000orr r0, r0, #0x00800000RETFUNC_END aeabi_d2fFUNC_END truncdfsf2#endif /* L_truncdfsf2 */
