URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [arm/] [lib1funcs.S] - Rev 734
Compare with Previous | Blame | View Log
@ libgcc routines for ARM cpu.@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,2009, 2010 Free Software Foundation, Inc.This file is free software; you can redistribute it and/or modify itunder the terms of the GNU General Public License as published by theFree Software Foundation; either version 3, or (at your option) anylater version.This file is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.Under Section 7 of GPL version 3, you are granted additionalpermissions described in the GCC Runtime Library Exception, version3.1, as published by the Free Software Foundation.You should have received a copy of the GNU General Public License anda copy of the GCC Runtime Library Exception along with this program;see the files COPYING3 and COPYING.RUNTIME respectively. If not, see<http://www.gnu.org/licenses/>. *//* An executable stack is *not* required for these functions. */#if defined(__ELF__) && defined(__linux__).section .note.GNU-stack,"",%progbits.previous#endif /* __ELF__ and __linux__ */#ifdef __ARM_EABI__/* Some attributes that are common to all routines in this file. *//* Tag_ABI_align_needed: This code does not require 8-bytealignment from the caller. *//* .eabi_attribute 24, 0 -- default setting. *//* Tag_ABI_align_preserved: This code preserves 8-bytealignment in any callee. */.eabi_attribute 25, 1#endif /* __ARM_EABI__ *//* ------------------------------------------------------------------------ *//* We need to know what prefix to add to function names. */#ifndef __USER_LABEL_PREFIX__#error __USER_LABEL_PREFIX__ not defined#endif/* ANSI concatenation macros. */#define CONCAT1(a, b) CONCAT2(a, b)#define CONCAT2(a, b) a ## b/* Use the right prefix for global labels. */#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)#ifdef __ELF__#ifdef __thumb__#define __PLT__ /* Not supported in Thumb assembler (for now). */#elif defined __vxworks && !defined __PIC__#define __PLT__ /* Not supported by the kernel loader. */#else#define __PLT__ (PLT)#endif#define TYPE(x) .type SYM(x),function#define SIZE(x) .size SYM(x), . - SYM(x)#define LSYM(x) .x#else#define __PLT__#define TYPE(x)#define SIZE(x)#define LSYM(x) x#endif/* Function end macros. Variants for interworking. */#if defined(__ARM_ARCH_2__)# define __ARM_ARCH__ 2#endif#if defined(__ARM_ARCH_3__)# define __ARM_ARCH__ 3#endif#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \|| defined(__ARM_ARCH_4T__)/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor withlong multiply instructions. That includes v3M. */# define __ARM_ARCH__ 4#endif#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \|| defined(__ARM_ARCH_5TEJ__)# define __ARM_ARCH__ 5#endif#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \|| defined(__ARM_ARCH_6M__)# define __ARM_ARCH__ 6#endif#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \|| defined(__ARM_ARCH_7EM__)# define __ARM_ARCH__ 7#endif#ifndef __ARM_ARCH__#error Unable to determine architecture.#endif/* There are times when we might prefer Thumb1 code even if ARM code ispermitted, for example, the code might be smaller, or there might beinterworking problems with switching to ARM state if interworking isdisabled. */#if (defined(__thumb__) \&& !defined(__thumb2__) \&& (!defined(__THUMB_INTERWORK__) \|| defined (__OPTIMIZE_SIZE__) \|| defined(__ARM_ARCH_6M__)))# define __prefer_thumb__#endif/* How to return from a function call depends on the architecture variant. */#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)# define RET bx lr# define RETc(x) bx##x lr/* Special precautions for interworking on armv4t. */# if (__ARM_ARCH__ == 4)/* Always use bx, not ldr pc. */# if (defined(__thumb__) || defined(__THUMB_INTERWORK__))# define __INTERWORKING__# endif /* __THUMB__ || __THUMB_INTERWORK__ *//* Include thumb stub before arm mode code. */# if defined(__thumb__) && !defined(__THUMB_INTERWORK__)# define __INTERWORKING_STUBS__# endif /* __thumb__ && !__THUMB_INTERWORK__ */#endif /* __ARM_ARCH == 4 */#else# define RET mov pc, lr# define RETc(x) mov##x pc, lr#endif.macro cfi_pop advance, reg, cfa_offset#ifdef __ELF__.pushsection .debug_frame.byte 0x4 /* DW_CFA_advance_loc4 */.4byte \advance.byte (0xc0 | \reg) /* DW_CFA_restore */.byte 0xe /* DW_CFA_def_cfa_offset */.uleb128 \cfa_offset.popsection#endif.endm.macro cfi_push advance, reg, offset, cfa_offset#ifdef __ELF__.pushsection .debug_frame.byte 0x4 /* DW_CFA_advance_loc4 */.4byte \advance.byte (0x80 | \reg) /* DW_CFA_offset */.uleb128 (\offset / -4).byte 0xe /* DW_CFA_def_cfa_offset */.uleb128 \cfa_offset.popsection#endif.endm.macro cfi_start start_label, end_label#ifdef __ELF__.pushsection .debug_frameLSYM(Lstart_frame):.4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIELSYM(Lstart_cie):.4byte 0xffffffff @ CIE Identifier Tag.byte 0x1 @ CIE Version.ascii "\0" @ CIE Augmentation.uleb128 0x1 @ CIE Code Alignment Factor.sleb128 -4 @ CIE Data Alignment Factor.byte 0xe @ CIE RA Column.byte 0xc @ DW_CFA_def_cfa.uleb128 0xd.uleb128 0x0.align 2LSYM(Lend_cie):.4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE LengthLSYM(Lstart_fde):.4byte LSYM(Lstart_frame) @ FDE CIE offset.4byte \start_label @ FDE initial location.4byte \end_label-\start_label @ FDE address range.popsection#endif.endm.macro cfi_end end_label#ifdef __ELF__.pushsection .debug_frame.align 2LSYM(Lend_fde):.popsection\end_label:#endif.endm/* Don't pass dirn, it's there just to get token pasting right. */.macro RETLDM regs=, cond=, unwind=, dirn=ia#if defined (__INTERWORKING__).ifc "\regs",""ldr\cond lr, [sp], #8.else# if defined(__thumb2__)pop\cond {\regs, lr}# elseldm\cond\dirn sp!, {\regs, lr}# endif.endif.ifnc "\unwind", ""/* Mark LR as restored. */97: cfi_pop 97b - \unwind, 0xe, 0x0.endifbx\cond lr#else/* Caller is responsible for providing IT instruction. */.ifc "\regs",""ldr\cond pc, [sp], #8.else# if defined(__thumb2__)pop\cond {\regs, pc}# elseldm\cond\dirn sp!, {\regs, pc}# endif.endif#endif.endm/* The Unified assembly syntax allows the same code to be assembled for bothARM and Thumb-2. However this is only supported by recent gas, so definea set of macros to allow ARM code on older assemblers. */#if defined(__thumb2__).macro do_it cond, suffix=""it\suffix \cond.endm.macro shift1 op, arg0, arg1, arg2\op \arg0, \arg1, \arg2.endm#define do_push push#define do_pop pop#define COND(op1, op2, cond) op1 ## op2 ## cond/* Perform an arithmetic operation with a variable shift operand. Thisrequires two instructions and a scratch register on Thumb-2. */.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp\shiftop \tmp, \src2, \shiftreg\name \dest, \src1, \tmp.endm#else.macro do_it cond, suffix="".endm.macro shift1 op, arg0, arg1, arg2mov \arg0, \arg1, \op \arg2.endm#define do_push stmfd sp!,#define do_pop ldmfd sp!,#define COND(op1, op2, cond) op1 ## cond ## op2.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp\name \dest, \src1, \src2, \shiftop \shiftreg.endm#endif#ifdef __ARM_EABI__.macro ARM_LDIV0 name signedcmp r0, #0.ifc \signed, unsignedmovne r0, #0xffffffff.elsemovgt r0, #0x7fffffffmovlt r0, #0x80000000.endifb SYM (__aeabi_idiv0) __PLT__.endm#else.macro ARM_LDIV0 name signedstr lr, [sp, #-8]!98: cfi_push 98b - __\name, 0xe, -0x8, 0x8bl SYM (__div0) __PLT__mov r0, #0 @ About as wrong as it could be.RETLDM unwind=98b.endm#endif#ifdef __ARM_EABI__.macro THUMB_LDIV0 name signed#if defined(__ARM_ARCH_6M__).ifc \signed, unsignedcmp r0, #0beq 1fmov r0, #0mvn r0, r0 @ 0xffffffff1:.elsecmp r0, #0beq 2fblt 3fmov r0, #0mvn r0, r0lsr r0, r0, #1 @ 0x7fffffffb 2f3: mov r0, #0x80lsl r0, r0, #24 @ 0x800000002:.endifpush {r0, r1, r2}ldr r0, 4fadr r1, 4fadd r0, r1str r0, [sp, #8]@ We know we are not on armv4t, so pop pc is safe.pop {r0, r1, pc}.align 24:.word __aeabi_idiv0 - 4b#elif defined(__thumb2__).syntax unified.ifc \signed, unsignedcbz r0, 1fmov r0, #0xffffffff1:.elsecmp r0, #0do_it gtmovgt r0, #0x7fffffffdo_it ltmovlt r0, #0x80000000.endifb.w SYM(__aeabi_idiv0) __PLT__#else.align 2bx pcnop.armcmp r0, #0.ifc \signed, unsignedmovne r0, #0xffffffff.elsemovgt r0, #0x7fffffffmovlt r0, #0x80000000.endifb SYM(__aeabi_idiv0) __PLT__.thumb#endif.endm#else.macro THUMB_LDIV0 name signedpush { r1, lr }98: cfi_push 98b - __\name, 0xe, -0x4, 0x8bl SYM (__div0)mov r0, #0 @ About as wrong as it could be.#if defined (__INTERWORKING__)pop { r1, r2 }bx r2#elsepop { r1, pc }#endif.endm#endif.macro FUNC_END nameSIZE (__\name).endm.macro DIV_FUNC_END name signedcfi_start __\name, LSYM(Lend_div0)LSYM(Ldiv0):#ifdef __thumb__THUMB_LDIV0 \name \signed#elseARM_LDIV0 \name \signed#endifcfi_end LSYM(Lend_div0)FUNC_END \name.endm.macro THUMB_FUNC_START name.globl SYM (\name)TYPE (\name).thumb_funcSYM (\name):.endm/* Function start macros. Variants for ARM and Thumb. */#ifdef __thumb__#define THUMB_FUNC .thumb_func#define THUMB_CODE .force_thumb# if defined(__thumb2__)#define THUMB_SYNTAX .syntax divided# else#define THUMB_SYNTAX# endif#else#define THUMB_FUNC#define THUMB_CODE#define THUMB_SYNTAX#endif.macro FUNC_START name.text.globl SYM (__\name)TYPE (__\name).align 0THUMB_CODETHUMB_FUNCTHUMB_SYNTAXSYM (__\name):.endm/* Special function that will always be coded in ARM assembly, even ifin Thumb-only compilation. */#if defined(__thumb2__)/* For Thumb-2 we build everything in thumb mode. */.macro ARM_FUNC_START nameFUNC_START \name.syntax unified.endm#define EQUIV .thumb_set.macro ARM_CALL namebl __\name.endm#elif defined(__INTERWORKING_STUBS__).macro ARM_FUNC_START nameFUNC_START \namebx pcnop.arm/* A hook to tell gdb that we've switched to ARM mode. Also used to calldirectly from other local arm routines. */_L__\name:.endm#define EQUIV .thumb_set/* Branch directly to a function declared with ARM_FUNC_START.Must be called in arm mode. */.macro ARM_CALL namebl _L__\name.endm#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */#ifdef __ARM_ARCH_6M__#define EQUIV .thumb_set#else.macro ARM_FUNC_START name.text.globl SYM (__\name)TYPE (__\name).align 0.armSYM (__\name):.endm#define EQUIV .set.macro ARM_CALL namebl __\name.endm#endif#endif.macro FUNC_ALIAS new old.globl SYM (__\new)#if defined (__thumb__).thumb_set SYM (__\new), SYM (__\old)#else.set SYM (__\new), SYM (__\old)#endif.endm#ifndef __ARM_ARCH_6M__.macro ARM_FUNC_ALIAS new old.globl SYM (__\new)EQUIV SYM (__\new), SYM (__\old)#if defined(__INTERWORKING_STUBS__).set SYM (_L__\new), SYM (_L__\old)#endif.endm#endif#ifdef __ARMEB__#define xxh r0#define xxl r1#define yyh r2#define yyl r3#else#define xxh r1#define xxl r0#define yyh r3#define yyl r2#endif#ifdef __ARM_EABI__.macro WEAK name.weak SYM (__\name).endm#endif#ifdef __thumb__/* Register aliases. */work .req r4 @ XXXX is this safe ?dividend .req r0divisor .req r1overdone .req r2result .req r2curbit .req r3#endif#if 0ip .req r12sp .req r13lr .req r14pc .req r15#endif/* ------------------------------------------------------------------------ *//* Bodies of the division and modulo routines. *//* ------------------------------------------------------------------------ */.macro ARM_DIV_BODY dividend, divisor, result, curbit#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)#if defined (__thumb2__)clz \curbit, \dividendclz \result, \divisorsub \curbit, \result, \curbitrsb \curbit, \curbit, #31adr \result, 1fadd \curbit, \result, \curbit, lsl #4mov \result, #0mov pc, \curbit.p2align 31:.set shift, 32.rept 32.set shift, shift - 1cmp.w \dividend, \divisor, lsl #shiftnop.nadc.w \result, \result, \resultit cssubcs.w \dividend, \dividend, \divisor, lsl #shift.endr#elseclz \curbit, \dividendclz \result, \divisorsub \curbit, \result, \curbitrsbs \curbit, \curbit, #31addne \curbit, \curbit, \curbit, lsl #1mov \result, #0addne pc, pc, \curbit, lsl #2nop.set shift, 32.rept 32.set shift, shift - 1cmp \dividend, \divisor, lsl #shiftadc \result, \result, \resultsubcs \dividend, \dividend, \divisor, lsl #shift.endr#endif#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */#if __ARM_ARCH__ >= 5clz \curbit, \divisorclz \result, \dividendsub \result, \curbit, \resultmov \curbit, #1mov \divisor, \divisor, lsl \resultmov \curbit, \curbit, lsl \resultmov \result, #0#else /* __ARM_ARCH__ < 5 */@ Initially shift the divisor left 3 bits if possible,@ set curbit accordingly. This allows for curbit to be located@ at the left end of each 4-bit nibbles in the division loop@ to save one loop in most cases.tst \divisor, #0xe0000000moveq \divisor, \divisor, lsl #3moveq \curbit, #8movne \curbit, #1@ Unless the divisor is very big, shift it up in multiples of@ four bits, since this is the amount of unwinding in the main@ division loop. Continue shifting until the divisor is@ larger than the dividend.1: cmp \divisor, #0x10000000cmplo \divisor, \dividendmovlo \divisor, \divisor, lsl #4movlo \curbit, \curbit, lsl #4blo 1b@ For very big divisors, we must shift it a bit at a time, or@ we will be in danger of overflowing.1: cmp \divisor, #0x80000000cmplo \divisor, \dividendmovlo \divisor, \divisor, lsl #1movlo \curbit, \curbit, lsl #1blo 1bmov \result, #0#endif /* __ARM_ARCH__ < 5 */@ Division loop1: cmp \dividend, \divisordo_it hs, tsubhs \dividend, \dividend, \divisororrhs \result, \result, \curbitcmp \dividend, \divisor, lsr #1do_it hs, tsubhs \dividend, \dividend, \divisor, lsr #1orrhs \result, \result, \curbit, lsr #1cmp \dividend, \divisor, lsr #2do_it hs, tsubhs \dividend, \dividend, \divisor, lsr #2orrhs \result, \result, \curbit, lsr #2cmp \dividend, \divisor, lsr #3do_it hs, tsubhs \dividend, \dividend, \divisor, lsr #3orrhs \result, \result, \curbit, lsr #3cmp \dividend, #0 @ Early termination?do_it ne, tmovnes \curbit, \curbit, lsr #4 @ No, any more bits to do?movne \divisor, \divisor, lsr #4bne 1b#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */.endm/* ------------------------------------------------------------------------ */.macro ARM_DIV2_ORDER divisor, order#if __ARM_ARCH__ >= 5clz \order, \divisorrsb \order, \order, #31#elsecmp \divisor, #(1 << 16)movhs \divisor, \divisor, lsr #16movhs \order, #16movlo \order, #0cmp \divisor, #(1 << 8)movhs \divisor, \divisor, lsr #8addhs \order, \order, #8cmp \divisor, #(1 << 4)movhs \divisor, \divisor, lsr #4addhs \order, \order, #4cmp \divisor, #(1 << 2)addhi \order, \order, #3addls \order, \order, \divisor, lsr #1#endif.endm/* ------------------------------------------------------------------------ */.macro ARM_MOD_BODY dividend, divisor, order, spare#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)clz \order, \divisorclz \spare, \dividendsub \order, \order, \sparersbs \order, \order, #31addne pc, pc, \order, lsl #3nop.set shift, 32.rept 32.set shift, shift - 1cmp \dividend, \divisor, lsl #shiftsubcs \dividend, \dividend, \divisor, lsl #shift.endr#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */#if __ARM_ARCH__ >= 5clz \order, \divisorclz \spare, \dividendsub \order, \order, \sparemov \divisor, \divisor, lsl \order#else /* __ARM_ARCH__ < 5 */mov \order, #0@ Unless the divisor is very big, shift it up in multiples of@ four bits, since this is the amount of unwinding in the main@ division loop. Continue shifting until the divisor is@ larger than the dividend.1: cmp \divisor, #0x10000000cmplo \divisor, \dividendmovlo \divisor, \divisor, lsl #4addlo \order, \order, #4blo 1b@ For very big divisors, we must shift it a bit at a time, or@ we will be in danger of overflowing.1: cmp \divisor, #0x80000000cmplo \divisor, \dividendmovlo \divisor, \divisor, lsl #1addlo \order, \order, #1blo 1b#endif /* __ARM_ARCH__ < 5 */@ Perform all needed substractions to keep only the reminder.@ Do comparisons in batch of 4 first.subs \order, \order, #3 @ yes, 3 is intended hereblt 2f1: cmp \dividend, \divisorsubhs \dividend, \dividend, \divisorcmp \dividend, \divisor, lsr #1subhs \dividend, \dividend, \divisor, lsr #1cmp \dividend, \divisor, lsr #2subhs \dividend, \dividend, \divisor, lsr #2cmp \dividend, \divisor, lsr #3subhs \dividend, \dividend, \divisor, lsr #3cmp \dividend, #1mov \divisor, \divisor, lsr #4subges \order, \order, #4bge 1btst \order, #3teqne \dividend, #0beq 5f@ Either 1, 2 or 3 comparison/substractions are left.2: cmn \order, #2blt 4fbeq 3fcmp \dividend, \divisorsubhs \dividend, \dividend, \divisormov \divisor, \divisor, lsr #13: cmp \dividend, \divisorsubhs \dividend, \dividend, \divisormov \divisor, \divisor, lsr #14: cmp \dividend, \divisorsubhs \dividend, \dividend, \divisor5:#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */.endm/* ------------------------------------------------------------------------ */.macro THUMB_DIV_MOD_BODY modulo@ Load the constant 0x10000000 into our work register.mov work, #1lsl work, #28LSYM(Loop1):@ Unless the divisor is very big, shift it up in multiples of@ four bits, since this is the amount of unwinding in the main@ division loop. Continue shifting until the divisor is@ larger than the dividend.cmp divisor, workbhs LSYM(Lbignum)cmp divisor, dividendbhs LSYM(Lbignum)lsl divisor, #4lsl curbit, #4b LSYM(Loop1)LSYM(Lbignum):@ Set work to 0x80000000lsl work, #3LSYM(Loop2):@ For very big divisors, we must shift it a bit at a time, or@ we will be in danger of overflowing.cmp divisor, workbhs LSYM(Loop3)cmp divisor, dividendbhs LSYM(Loop3)lsl divisor, #1lsl curbit, #1b LSYM(Loop2)LSYM(Loop3):@ Test for possible subtractions ....if \modulo@ ... On the final pass, this may subtract too much from the dividend,@ so keep track of which subtractions are done, we can fix them up@ afterwards.mov overdone, #0cmp dividend, divisorblo LSYM(Lover1)sub dividend, dividend, divisorLSYM(Lover1):lsr work, divisor, #1cmp dividend, workblo LSYM(Lover2)sub dividend, dividend, workmov ip, curbitmov work, #1ror curbit, workorr overdone, curbitmov curbit, ipLSYM(Lover2):lsr work, divisor, #2cmp dividend, workblo LSYM(Lover3)sub dividend, dividend, workmov ip, curbitmov work, #2ror curbit, workorr overdone, curbitmov curbit, ipLSYM(Lover3):lsr work, divisor, #3cmp dividend, workblo LSYM(Lover4)sub dividend, dividend, workmov ip, curbitmov work, #3ror curbit, workorr overdone, curbitmov curbit, ipLSYM(Lover4):mov ip, curbit.else@ ... and note which bits are done in the result. On the final pass,@ this may subtract too much from the dividend, but the result will be ok,@ since the "bit" will have been shifted out at the bottom.cmp dividend, divisorblo LSYM(Lover1)sub dividend, dividend, divisororr result, result, curbitLSYM(Lover1):lsr work, divisor, #1cmp dividend, workblo LSYM(Lover2)sub dividend, dividend, worklsr work, curbit, #1orr result, workLSYM(Lover2):lsr work, divisor, #2cmp dividend, workblo LSYM(Lover3)sub dividend, dividend, worklsr work, curbit, #2orr result, workLSYM(Lover3):lsr work, divisor, #3cmp dividend, workblo LSYM(Lover4)sub dividend, dividend, worklsr work, curbit, #3orr result, workLSYM(Lover4):.endifcmp dividend, #0 @ Early termination?beq LSYM(Lover5)lsr curbit, #4 @ No, any more bits to do?beq LSYM(Lover5)lsr divisor, #4b LSYM(Loop3)LSYM(Lover5):.if \modulo@ Any subtractions that we should not have done will be recorded in@ the top three bits of "overdone". Exactly which were not needed@ are governed by the position of the bit, stored in ip.mov work, #0xelsl work, #28and overdone, workbeq LSYM(Lgot_result)@ If we terminated early, because dividend became zero, then the@ bit in ip will not be in the bottom nibble, and we should not@ perform the additions below. We must test for this though@ (rather relying upon the TSTs to prevent the additions) since@ the bit in ip could be in the top two bits which might then match@ with one of the smaller RORs.mov curbit, ipmov work, #0x7tst curbit, workbeq LSYM(Lgot_result)mov curbit, ipmov work, #3ror curbit, worktst overdone, curbitbeq LSYM(Lover6)lsr work, divisor, #3add dividend, workLSYM(Lover6):mov curbit, ipmov work, #2ror curbit, worktst overdone, curbitbeq LSYM(Lover7)lsr work, divisor, #2add dividend, workLSYM(Lover7):mov curbit, ipmov work, #1ror curbit, worktst overdone, curbitbeq LSYM(Lgot_result)lsr work, divisor, #1add dividend, work.endifLSYM(Lgot_result):.endm/* ------------------------------------------------------------------------ *//* Start of the Real Functions *//* ------------------------------------------------------------------------ */#ifdef L_udivsi3#if defined(__prefer_thumb__)FUNC_START udivsi3FUNC_ALIAS aeabi_uidiv udivsi3cmp divisor, #0beq LSYM(Ldiv0)LSYM(udivsi3_skip_div0_test):mov curbit, #1mov result, #0push { work }cmp dividend, divisorblo LSYM(Lgot_result)THUMB_DIV_MOD_BODY 0mov r0, resultpop { work }RET#elif defined(__ARM_ARCH_EXT_IDIV__)ARM_FUNC_START udivsi3ARM_FUNC_ALIAS aeabi_uidiv udivsi3cmp r1, #0beq LSYM(Ldiv0)udiv r0, r0, r1RET#else /* ARM version/Thumb-2. */ARM_FUNC_START udivsi3ARM_FUNC_ALIAS aeabi_uidiv udivsi3/* Note: if called via udivsi3_skip_div0_test, this will unnecessarilycheck for division-by-zero a second time. */LSYM(udivsi3_skip_div0_test):subs r2, r1, #1do_it eqRETc(eq)bcc LSYM(Ldiv0)cmp r0, r1bls 11ftst r1, r2beq 12fARM_DIV_BODY r0, r1, r2, r3mov r0, r2RET11: do_it eq, emoveq r0, #1movne r0, #0RET12: ARM_DIV2_ORDER r1, r2mov r0, r0, lsr r2RET#endif /* ARM version */DIV_FUNC_END udivsi3 unsigned#if defined(__prefer_thumb__)FUNC_START aeabi_uidivmodcmp r1, #0beq LSYM(Ldiv0)push {r0, r1, lr}bl LSYM(udivsi3_skip_div0_test)POP {r1, r2, r3}mul r2, r0sub r1, r1, r2bx r3#elif defined(__ARM_ARCH_EXT_IDIV__)ARM_FUNC_START aeabi_uidivmodcmp r1, #0beq LSYM(Ldiv0)mov r2, r0udiv r0, r0, r1mls r1, r0, r1, r2RET#elseARM_FUNC_START aeabi_uidivmodcmp r1, #0beq LSYM(Ldiv0)stmfd sp!, { r0, r1, lr }bl LSYM(udivsi3_skip_div0_test)ldmfd sp!, { r1, r2, lr }mul r3, r2, r0sub r1, r1, r3RET#endifFUNC_END aeabi_uidivmod#endif /* L_udivsi3 *//* ------------------------------------------------------------------------ */#ifdef L_umodsi3#ifdef __ARM_ARCH_EXT_IDIV__ARM_FUNC_START umodsi3cmp r1, #0beq LSYM(Ldiv0)udiv r2, r0, r1mls r0, r1, r2, r0RET#elif defined(__thumb__)FUNC_START umodsi3cmp divisor, #0beq LSYM(Ldiv0)mov curbit, #1cmp dividend, divisorbhs LSYM(Lover10)RETLSYM(Lover10):push { work }THUMB_DIV_MOD_BODY 1pop { work }RET#else /* ARM version. */FUNC_START umodsi3subs r2, r1, #1 @ compare divisor with 1bcc LSYM(Ldiv0)cmpne r0, r1 @ compare dividend with divisormoveq r0, #0tsthi r1, r2 @ see if divisor is power of 2andeq r0, r0, r2RETc(ls)ARM_MOD_BODY r0, r1, r2, r3RET#endif /* ARM version. */DIV_FUNC_END umodsi3 unsigned#endif /* L_umodsi3 *//* ------------------------------------------------------------------------ */#ifdef L_divsi3#if defined(__prefer_thumb__)FUNC_START divsi3FUNC_ALIAS aeabi_idiv divsi3cmp divisor, #0beq LSYM(Ldiv0)LSYM(divsi3_skip_div0_test):push { work }mov work, dividendeor work, divisor @ Save the sign of the result.mov ip, workmov curbit, #1mov result, #0cmp divisor, #0bpl LSYM(Lover10)neg divisor, divisor @ Loops below use unsigned.LSYM(Lover10):cmp dividend, #0bpl LSYM(Lover11)neg dividend, dividendLSYM(Lover11):cmp dividend, divisorblo LSYM(Lgot_result)THUMB_DIV_MOD_BODY 0mov r0, resultmov work, ipcmp work, #0bpl LSYM(Lover12)neg r0, r0LSYM(Lover12):pop { work }RET#elif defined(__ARM_ARCH_EXT_IDIV__)ARM_FUNC_START divsi3ARM_FUNC_ALIAS aeabi_idiv divsi3cmp r1, #0beq LSYM(Ldiv0)sdiv r0, r0, r1RET#else /* ARM/Thumb-2 version. */ARM_FUNC_START divsi3ARM_FUNC_ALIAS aeabi_idiv divsi3cmp r1, #0beq LSYM(Ldiv0)LSYM(divsi3_skip_div0_test):eor ip, r0, r1 @ save the sign of the result.do_it mirsbmi r1, r1, #0 @ loops below use unsigned.subs r2, r1, #1 @ division by 1 or -1 ?beq 10fmovs r3, r0do_it mirsbmi r3, r0, #0 @ positive dividend valuecmp r3, r1bls 11ftst r1, r2 @ divisor is power of 2 ?beq 12fARM_DIV_BODY r3, r1, r0, r2cmp ip, #0do_it mirsbmi r0, r0, #0RET10: teq ip, r0 @ same sign ?do_it mirsbmi r0, r0, #0RET11: do_it lomovlo r0, #0do_it eq,tmoveq r0, ip, asr #31orreq r0, r0, #1RET12: ARM_DIV2_ORDER r1, r2cmp ip, #0mov r0, r3, lsr r2do_it mirsbmi r0, r0, #0RET#endif /* ARM version */DIV_FUNC_END divsi3 signed#if defined(__prefer_thumb__)FUNC_START aeabi_idivmodcmp r1, #0beq LSYM(Ldiv0)push {r0, r1, lr}bl LSYM(divsi3_skip_div0_test)POP {r1, r2, r3}mul r2, r0sub r1, r1, r2bx r3#elif defined(__ARM_ARCH_EXT_IDIV__)ARM_FUNC_START aeabi_idivmodcmp r1, #0beq LSYM(Ldiv0)mov r2, r0sdiv r0, r0, r1mls r1, r0, r1, r2RET#elseARM_FUNC_START aeabi_idivmodcmp r1, #0beq LSYM(Ldiv0)stmfd sp!, { r0, r1, lr }bl LSYM(divsi3_skip_div0_test)ldmfd sp!, { r1, r2, lr }mul r3, r2, r0sub r1, r1, r3RET#endifFUNC_END aeabi_idivmod#endif /* L_divsi3 *//* ------------------------------------------------------------------------ */#ifdef L_modsi3#if defined(__ARM_ARCH_EXT_IDIV__)ARM_FUNC_START modsi3cmp r1, #0beq LSYM(Ldiv0)sdiv r2, r0, r1mls r0, r1, r2, r0RET#elif defined(__thumb__)FUNC_START modsi3mov curbit, #1cmp divisor, #0beq LSYM(Ldiv0)bpl LSYM(Lover10)neg divisor, divisor @ Loops below use unsigned.LSYM(Lover10):push { work }@ Need to save the sign of the dividend, unfortunately, we need@ work later on. Must do this after saving the original value of@ the work register, because we will pop this value off first.push { dividend }cmp dividend, #0bpl LSYM(Lover11)neg dividend, dividendLSYM(Lover11):cmp dividend, divisorblo LSYM(Lgot_result)THUMB_DIV_MOD_BODY 1pop { work }cmp work, #0bpl LSYM(Lover12)neg dividend, dividendLSYM(Lover12):pop { work }RET#else /* ARM version. */FUNC_START modsi3cmp r1, #0beq LSYM(Ldiv0)rsbmi r1, r1, #0 @ loops below use unsigned.movs ip, r0 @ preserve sign of dividendrsbmi r0, r0, #0 @ if negative make positivesubs r2, r1, #1 @ compare divisor with 1cmpne r0, r1 @ compare dividend with divisormoveq r0, #0tsthi r1, r2 @ see if divisor is power of 2andeq r0, r0, r2bls 10fARM_MOD_BODY r0, r1, r2, r310: cmp ip, #0rsbmi r0, r0, #0RET#endif /* ARM version */DIV_FUNC_END modsi3 signed#endif /* L_modsi3 *//* ------------------------------------------------------------------------ */#ifdef L_dvmd_tls#ifdef __ARM_EABI__WEAK aeabi_idiv0WEAK aeabi_ldiv0FUNC_START aeabi_idiv0FUNC_START aeabi_ldiv0RETFUNC_END aeabi_ldiv0FUNC_END aeabi_idiv0#elseFUNC_START div0RETFUNC_END div0#endif#endif /* L_divmodsi_tools *//* ------------------------------------------------------------------------ */#ifdef L_dvmd_lnx@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls/* Constant taken from <asm/signal.h>. */#define SIGFPE 8#ifdef __ARM_EABI__WEAK aeabi_idiv0WEAK aeabi_ldiv0ARM_FUNC_START aeabi_idiv0ARM_FUNC_START aeabi_ldiv0#elseARM_FUNC_START div0#endifdo_push {r1, lr}mov r0, #SIGFPEbl SYM(raise) __PLT__RETLDM r1#ifdef __ARM_EABI__FUNC_END aeabi_ldiv0FUNC_END aeabi_idiv0#elseFUNC_END div0#endif#endif /* L_dvmd_lnx */#ifdef L_clear_cache#if defined __ARM_EABI__ && defined __linux__@ EABI GNU/Linux call to cacheflush syscall.ARM_FUNC_START clear_cachedo_push {r7}#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)movw r7, #2movt r7, #0xf#elsemov r7, #0xf0000add r7, r7, #2#endifmov r2, #0swi 0do_pop {r7}RETFUNC_END clear_cache#else#error "This is only for ARM EABI GNU/Linux"#endif#endif /* L_clear_cache *//* ------------------------------------------------------------------------ *//* Dword shift operations. *//* All the following Dword shift variants rely on the fact thatshft xxx, Regis in fact done asshft xxx, (Reg & 255)so for Reg value in (32...63) and (-1...-31) we will get zero (in thecase of logical shifts) or the sign (for asr). */#ifdef __ARMEB__#define al r1#define ah r0#else#define al r0#define ah r1#endif/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */#ifndef __symbian__#ifdef L_lshrdi3FUNC_START lshrdi3FUNC_ALIAS aeabi_llsr lshrdi3#ifdef __thumb__lsr al, r2mov r3, ahlsr ah, r2mov ip, r3sub r2, #32lsr r3, r2orr al, r3neg r2, r2mov r3, iplsl r3, r2orr al, r3RET#elsesubs r3, r2, #32rsb ip, r2, #32movmi al, al, lsr r2movpl al, ah, lsr r3orrmi al, al, ah, lsl ipmov ah, ah, lsr r2RET#endifFUNC_END aeabi_llsrFUNC_END lshrdi3#endif#ifdef L_ashrdi3FUNC_START ashrdi3FUNC_ALIAS aeabi_lasr ashrdi3#ifdef __thumb__lsr al, r2mov r3, ahasr ah, r2sub r2, #32@ If r2 is negative at this point the following step would OR@ the sign bit into all of AL. That's not what we want...bmi 1fmov ip, r3asr r3, r2orr al, r3mov r3, ip1:neg r2, r2lsl r3, r2orr al, r3RET#elsesubs r3, r2, #32rsb ip, r2, #32movmi al, al, lsr r2movpl al, ah, asr r3orrmi al, al, ah, lsl ipmov ah, ah, asr r2RET#endifFUNC_END aeabi_lasrFUNC_END ashrdi3#endif#ifdef L_ashldi3FUNC_START ashldi3FUNC_ALIAS aeabi_llsl ashldi3#ifdef __thumb__lsl ah, r2mov r3, allsl al, r2mov ip, r3sub r2, #32lsl r3, r2orr ah, r3neg r2, r2mov r3, iplsr r3, r2orr ah, r3RET#elsesubs r3, r2, #32rsb ip, r2, #32movmi ah, ah, lsl r2movpl ah, al, lsl r3orrmi ah, ah, al, lsr ipmov al, al, lsl r2RET#endifFUNC_END aeabi_llslFUNC_END ashldi3#endif#endif /* __symbian__ */#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \|| defined(__ARM_ARCH_5TEJ__)#define HAVE_ARM_CLZ 1#endif#ifdef L_clzsi2#if defined(__ARM_ARCH_6M__)FUNC_START clzsi2mov r1, #28mov r3, #1lsl r3, r3, #16cmp r0, r3 /* 0x10000 */bcc 2flsr r0, r0, #16sub r1, r1, #162: lsr r3, r3, #8cmp r0, r3 /* #0x100 */bcc 2flsr r0, r0, #8sub r1, r1, #82: lsr r3, r3, #4cmp r0, r3 /* #0x10 */bcc 2flsr r0, r0, #4sub r1, r1, #42: adr r2, 1fldrb r0, [r2, r0]add r0, r0, r1bx lr.align 21:.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0FUNC_END clzsi2#elseARM_FUNC_START clzsi2# if defined(HAVE_ARM_CLZ)clz r0, r0RET# elsemov r1, #28cmp r0, #0x10000do_it cs, tmovcs r0, r0, lsr #16subcs r1, r1, #16cmp r0, #0x100do_it cs, tmovcs r0, r0, lsr #8subcs r1, r1, #8cmp r0, #0x10do_it cs, tmovcs r0, r0, lsr #4subcs r1, r1, #4adr r2, 1fldrb r0, [r2, r0]add r0, r0, r1RET.align 21:.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0# endif /* !HAVE_ARM_CLZ */FUNC_END clzsi2#endif#endif /* L_clzsi2 */#ifdef L_clzdi2#if !defined(HAVE_ARM_CLZ)# if defined(__ARM_ARCH_6M__)FUNC_START clzdi2push {r4, lr}# elseARM_FUNC_START clzdi2do_push {r4, lr}# endifcmp xxh, #0bne 1f# ifdef __ARMEB__mov r0, xxlbl __clzsi2add r0, r0, #32b 2f1:bl __clzsi2# elsebl __clzsi2add r0, r0, #32b 2f1:mov r0, xxhbl __clzsi2# endif2:# if defined(__ARM_ARCH_6M__)pop {r4, pc}# elseRETLDM r4# endifFUNC_END clzdi2#else /* HAVE_ARM_CLZ */ARM_FUNC_START clzdi2cmp xxh, #0do_it eq, etclzeq r0, xxlclzne r0, xxhaddeq r0, r0, #32RETFUNC_END clzdi2#endif#endif /* L_clzdi2 *//* ------------------------------------------------------------------------ *//* These next two sections are here despite the fact that they contain Thumbassembler because their presence allows interworked code to be linked evenwhen the GCC library is this one. *//* Do not build the interworking functions when the target architecture doesnot support Thumb instructions. (This can be a multilib option). */#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\|| defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \|| __ARM_ARCH__ >= 6#if defined L_call_via_rX/* These labels & instructions are used by the Arm/Thumb interworking code.The address of function to be called is loaded into a register and thenone of these labels is called via a BL instruction. This puts thereturn address into the link register with the bottom bit set, and thecode here switches to the correct mode before executing the function. */.text.align 0.force_thumb.macro call_via registerTHUMB_FUNC_START _call_via_\registerbx \registernopSIZE (_call_via_\register).endmcall_via r0call_via r1call_via r2call_via r3call_via r4call_via r5call_via r6call_via r7call_via r8call_via r9call_via slcall_via fpcall_via ipcall_via spcall_via lr#endif /* L_call_via_rX *//* Don't bother with the old interworking routines for Thumb-2. *//* ??? Maybe only omit these on "m" variants. */#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)#if defined L_interwork_call_via_rX/* These labels & instructions are used by the Arm/Thumb interworking code,when the target address is in an unknown instruction set. The addressof function to be called is loaded into a register and then one of theselabels is called via a BL instruction. This puts the return addressinto the link register with the bottom bit set, and the code hereswitches to the correct mode before executing the function. Unfortunatelythe target code cannot be relied upon to return via a BX instruction, soinstead we have to store the resturn address on the stack and allow thecalled function to return here instead. Upon return we recover the realreturn address and use a BX to get back to Thumb mode.There are three variations of this code. The first,_interwork_call_via_rN(), will push the return address onto thestack and pop it in _arm_return(). It should only be used if allarguments are passed in registers.The second, _interwork_r7_call_via_rN(), instead stores the returnaddress at [r7, #-4]. It is the caller's responsibility to ensurethat this address is valid and contains no useful data.The third, _interwork_r11_call_via_rN(), works in the same way butuses r11 instead of r7. It is useful if the caller does not reallyneed a frame pointer. */.text.align 0.code 32.globl _arm_returnLSYM(Lstart_arm_return):cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)cfi_push 0, 0xe, -0x8, 0x8nop @ This nop is for the benefit of debuggers, so that@ backtraces will use the correct unwind information._arm_return:RETLDM unwind=LSYM(Lstart_arm_return)cfi_end LSYM(Lend_arm_return).globl _arm_return_r7_arm_return_r7:ldr lr, [r7, #-4]bx lr.globl _arm_return_r11_arm_return_r11:ldr lr, [r11, #-4]bx lr.macro interwork_with_frame frame, register, name, return.code 16THUMB_FUNC_START \namebx pcnop.code 32tst \register, #1streq lr, [\frame, #-4]adreq lr, _arm_return_\framebx \registerSIZE (\name).endm.macro interwork register.code 16THUMB_FUNC_START _interwork_call_via_\registerbx pcnop.code 32.globl LSYM(Lchange_\register)LSYM(Lchange_\register):tst \register, #1streq lr, [sp, #-8]!adreq lr, _arm_returnbx \registerSIZE (_interwork_call_via_\register)interwork_with_frame r7,\register,_interwork_r7_call_via_\registerinterwork_with_frame r11,\register,_interwork_r11_call_via_\register.endminterwork r0interwork r1interwork r2interwork r3interwork r4interwork r5interwork r6interwork r7interwork r8interwork r9interwork slinterwork fpinterwork ipinterwork sp/* The LR case has to be handled a little differently... */.code 16THUMB_FUNC_START _interwork_call_via_lrbx pcnop.code 32.globl .Lchange_lr.Lchange_lr:tst lr, #1stmeqdb r13!, {lr, pc}mov ip, lradreq lr, _arm_returnbx ipSIZE (_interwork_call_via_lr)#endif /* L_interwork_call_via_rX */#endif /* !__thumb2__ *//* Functions to support compact pic switch tables in thumb1 state.All these routines take an index into the table in r0. Thetable is at LR & ~1 (but this must be rounded up in the caseof 32-bit entires). They are only permitted to clobber r12and r14 and r0 must be preserved on exit. */#ifdef L_thumb1_case_sqi.text.align 0.force_thumb.syntax unifiedTHUMB_FUNC_START __gnu_thumb1_case_sqipush {r1}mov r1, lrlsrs r1, r1, #1lsls r1, r1, #1ldrsb r1, [r1, r0]lsls r1, r1, #1add lr, lr, r1pop {r1}bx lrSIZE (__gnu_thumb1_case_sqi)#endif#ifdef L_thumb1_case_uqi.text.align 0.force_thumb.syntax unifiedTHUMB_FUNC_START __gnu_thumb1_case_uqipush {r1}mov r1, lrlsrs r1, r1, #1lsls r1, r1, #1ldrb r1, [r1, r0]lsls r1, r1, #1add lr, lr, r1pop {r1}bx lrSIZE (__gnu_thumb1_case_uqi)#endif#ifdef L_thumb1_case_shi.text.align 0.force_thumb.syntax unifiedTHUMB_FUNC_START __gnu_thumb1_case_shipush {r0, r1}mov r1, lrlsrs r1, r1, #1lsls r0, r0, #1lsls r1, r1, #1ldrsh r1, [r1, r0]lsls r1, r1, #1add lr, lr, r1pop {r0, r1}bx lrSIZE (__gnu_thumb1_case_shi)#endif#ifdef L_thumb1_case_uhi.text.align 0.force_thumb.syntax unifiedTHUMB_FUNC_START __gnu_thumb1_case_uhipush {r0, r1}mov r1, lrlsrs r1, r1, #1lsls r0, r0, #1lsls r1, r1, #1ldrh r1, [r1, r0]lsls r1, r1, #1add lr, lr, r1pop {r0, r1}bx lrSIZE (__gnu_thumb1_case_uhi)#endif#ifdef L_thumb1_case_si.text.align 0.force_thumb.syntax unifiedTHUMB_FUNC_START __gnu_thumb1_case_sipush {r0, r1}mov r1, lradds.n r1, r1, #2 /* Align to word. */lsrs r1, r1, #2lsls r0, r0, #2lsls r1, r1, #2ldr r0, [r1, r0]adds r0, r0, r1mov lr, r0pop {r0, r1}mov pc, lr /* We know we were called from thumb code. */SIZE (__gnu_thumb1_case_si)#endif#endif /* Arch supports thumb. */#ifndef __symbian__#ifndef __ARM_ARCH_6M__#include "ieee754-df.S"#include "ieee754-sf.S"#include "bpabi.S"#else /* __ARM_ARCH_6M__ */#include "bpabi-v6m.S"#endif /* __ARM_ARCH_6M__ */#endif /* !__symbian__ */
