URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [newlib-1.10.0/] [newlib/] [libc/] [machine/] [arm/] [memcpy.S] - Blame information for rev 1765

Details | Compare with Previous | View Log


/*      $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $  */
 
/*-
 * Copyright (c) 1997 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Neil A. Carson and Mark Brinicombe
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the NetBSD
 *      Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
 
/* This was modified by Jay Monkman  to
 *   save and restore r12. This is necessary for RTEMS.
 */
/* #include */
 
#define ENTRY(_LABEL) \
  .global _LABEL ; _LABEL:
/*
.globl memcpy
memcpy:
*/
ENTRY(memcpy)
        stmfd   sp!, {r0, r12, lr}
        bl      _memcpy
        ldmfd   sp!, {r0, r12, pc}
 
 
/*
.globl memove
memmove:
*/
ENTRY(memmove)
        stmfd   sp!, {r0, r12, lr}
        bl      _memcpy
        ldmfd   sp!, {r0, r12, pc}
 
 
 
/*
 * This is one fun bit of code ...
 * Some easy listening music is suggested while trying to understand this
 * code e.g. Iron Maiden
 *
 * For anyone attempting to understand it :
 *
 * The core code is implemented here with simple stubs for memcpy()
 * memmove() and bcopy().
 *
 * All local labels are prefixed with Lmemcpy_
 * Following the prefix a label starting f is used in the forward copy code
 * while a label using b is used in the backwards copy code
 * The source and destination addresses determine whether a forward or
 * backward copy is performed.
 * Separate bits of code are used to deal with the following situations
 * for both the forward and backwards copy.
 * unaligned source address
 * unaligned destination address
 * Separate copy routines are used to produce an optimised result for each
 * of these cases.
 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
 * a time where possible.
 *
 * Note: r12 (aka ip) can be trashed during the function along with
 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
 * Additional registers are preserved prior to use i.e. r4, r5 & lr
 *
 * Apologies for the state of the comments ;-)
 */
 
 
/*
_memcpy:
*/
ENTRY(_memcpy)
        /* Determine copy direction */
        cmp     r1, r0
        bcc     Lmemcpy_backwards
 
        moveq   r0, #0                  /* Quick abort for len=0 */
        moveq   pc, lr
 
        stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
        subs    r2, r2, #4
        blt     Lmemcpy_fl4             /* less than 4 bytes */
        ands    r12, r0, #3
        bne     Lmemcpy_fdestul         /* oh unaligned destination addr */
        ands    r12, r1, #3
        bne     Lmemcpy_fsrcul          /* oh unaligned source addr */
 
Lmemcpy_ft8:
        /* We have aligned source and destination */
        subs    r2, r2, #8
        blt     Lmemcpy_fl12            /* less than 12 bytes (4 from above) */
        subs    r2, r2, #0x14
        blt     Lmemcpy_fl32            /* less than 32 bytes (12 from above) */
        stmdb   sp!, {r4}               /* borrow r4 */
 
        /* blat 32 bytes at a time */
        /* XXX for really big copies perhaps we should use more registers */
Lmemcpy_floop32:
        ldmia   r1!, {r3, r4, r12, lr}
        stmia   r0!, {r3, r4, r12, lr}
        ldmia   r1!, {r3, r4, r12, lr}
        stmia   r0!, {r3, r4, r12, lr}
        subs    r2, r2, #0x20
        bge     Lmemcpy_floop32
 
        cmn     r2, #0x10
        ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
        stmgeia r0!, {r3, r4, r12, lr}
        subge   r2, r2, #0x10
        ldmia   sp!, {r4}               /* return r4 */
 
Lmemcpy_fl32:
        adds    r2, r2, #0x14
 
        /* blat 12 bytes at a time */
Lmemcpy_floop12:
        ldmgeia r1!, {r3, r12, lr}
        stmgeia r0!, {r3, r12, lr}
        subges  r2, r2, #0x0c
        bge     Lmemcpy_floop12
 
Lmemcpy_fl12:
        adds    r2, r2, #8
        blt     Lmemcpy_fl4
 
        subs    r2, r2, #4
        ldrlt   r3, [r1], #4
        strlt   r3, [r0], #4
        ldmgeia r1!, {r3, r12}
        stmgeia r0!, {r3, r12}
        subge   r2, r2, #4
 
Lmemcpy_fl4:
        /* less than 4 bytes to go */
        adds    r2, r2, #4
        ldmeqia sp!, {r0, pc}           /* done */
 
        /* copy the crud byte at a time */
        cmp     r2, #2
        ldrb    r3, [r1], #1
        strb    r3, [r0], #1
        ldrgeb  r3, [r1], #1
        strgeb  r3, [r0], #1
        ldrgtb  r3, [r1], #1
        strgtb  r3, [r0], #1
        ldmia   sp!, {r0, pc}
 
        /* erg - unaligned destination */
Lmemcpy_fdestul:
        rsb     r12, r12, #4
        cmp     r12, #2
 
        /* align destination with byte copies */
        ldrb    r3, [r1], #1
        strb    r3, [r0], #1
        ldrgeb  r3, [r1], #1
        strgeb  r3, [r0], #1
        ldrgtb  r3, [r1], #1
        strgtb  r3, [r0], #1
        subs    r2, r2, r12
        blt     Lmemcpy_fl4             /* less the 4 bytes */
 
        ands    r12, r1, #3
        beq     Lmemcpy_ft8             /* we have an aligned source */
 
        /* erg - unaligned source */
        /* This is where it gets nasty ... */
Lmemcpy_fsrcul:
        bic     r1, r1, #3
        ldr     lr, [r1], #4
        cmp     r12, #2
        bgt     Lmemcpy_fsrcul3
        beq     Lmemcpy_fsrcul2
        cmp     r2, #0x0c
        blt     Lmemcpy_fsrcul1loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5}
 
Lmemcpy_fsrcul1loop16:
        mov     r3, lr, lsr #8
        ldmia   r1!, {r4, r5, r12, lr}
        orr     r3, r3, r4, lsl #24
        mov     r4, r4, lsr #8
        orr     r4, r4, r5, lsl #24
        mov     r5, r5, lsr #8
        orr     r5, r5, r12, lsl #24
        mov     r12, r12, lsr #8
        orr     r12, r12, lr, lsl #24
        stmia   r0!, {r3-r5, r12}
        subs    r2, r2, #0x10
        bge     Lmemcpy_fsrcul1loop16
        ldmia   sp!, {r4, r5}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_fsrcul1l4
 
Lmemcpy_fsrcul1loop4:
        mov     r12, lr, lsr #8
        ldr     lr, [r1], #4
        orr     r12, r12, lr, lsl #24
        str     r12, [r0], #4
        subs    r2, r2, #4
        bge     Lmemcpy_fsrcul1loop4
 
Lmemcpy_fsrcul1l4:
        sub     r1, r1, #3
        b       Lmemcpy_fl4
 
Lmemcpy_fsrcul2:
        cmp     r2, #0x0c
        blt     Lmemcpy_fsrcul2loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5}
 
Lmemcpy_fsrcul2loop16:
        mov     r3, lr, lsr #16
        ldmia   r1!, {r4, r5, r12, lr}
        orr     r3, r3, r4, lsl #16
        mov     r4, r4, lsr #16
        orr     r4, r4, r5, lsl #16
        mov     r5, r5, lsr #16
        orr     r5, r5, r12, lsl #16
        mov     r12, r12, lsr #16
        orr     r12, r12, lr, lsl #16
        stmia   r0!, {r3-r5, r12}
        subs    r2, r2, #0x10
        bge     Lmemcpy_fsrcul2loop16
        ldmia   sp!, {r4, r5}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_fsrcul2l4
 
Lmemcpy_fsrcul2loop4:
        mov     r12, lr, lsr #16
        ldr     lr, [r1], #4
        orr     r12, r12, lr, lsl #16
        str     r12, [r0], #4
        subs    r2, r2, #4
        bge     Lmemcpy_fsrcul2loop4
 
Lmemcpy_fsrcul2l4:
        sub     r1, r1, #2
        b       Lmemcpy_fl4
 
Lmemcpy_fsrcul3:
        cmp     r2, #0x0c
        blt     Lmemcpy_fsrcul3loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5}
 
Lmemcpy_fsrcul3loop16:
        mov     r3, lr, lsr #24
        ldmia   r1!, {r4, r5, r12, lr}
        orr     r3, r3, r4, lsl #8
        mov     r4, r4, lsr #24
        orr     r4, r4, r5, lsl #8
        mov     r5, r5, lsr #24
        orr     r5, r5, r12, lsl #8
        mov     r12, r12, lsr #24
        orr     r12, r12, lr, lsl #8
        stmia   r0!, {r3-r5, r12}
        subs    r2, r2, #0x10
        bge     Lmemcpy_fsrcul3loop16
        ldmia   sp!, {r4, r5}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_fsrcul3l4
 
Lmemcpy_fsrcul3loop4:
        mov     r12, lr, lsr #24
        ldr     lr, [r1], #4
        orr     r12, r12, lr, lsl #8
        str     r12, [r0], #4
        subs    r2, r2, #4
        bge     Lmemcpy_fsrcul3loop4
 
Lmemcpy_fsrcul3l4:
        sub     r1, r1, #1
        b       Lmemcpy_fl4
 
Lmemcpy_backwards:
        add     r1, r1, r2
        add     r0, r0, r2
        subs    r2, r2, #4
        blt     Lmemcpy_bl4             /* less than 4 bytes */
        ands    r12, r0, #3
        bne     Lmemcpy_bdestul         /* oh unaligned destination addr */
        ands    r12, r1, #3
        bne     Lmemcpy_bsrcul          /* oh unaligned source addr */
 
Lmemcpy_bt8:
        /* We have aligned source and destination */
        subs    r2, r2, #8
        blt     Lmemcpy_bl12            /* less than 12 bytes (4 from above) */
        stmdb   sp!, {r4, lr}
        subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
        blt     Lmemcpy_bl32
 
        /* blat 32 bytes at a time */
        /* XXX for really big copies perhaps we should use more registers */
Lmemcpy_bloop32:
        ldmdb   r1!, {r3, r4, r12, lr}
        stmdb   r0!, {r3, r4, r12, lr}
        ldmdb   r1!, {r3, r4, r12, lr}
        stmdb   r0!, {r3, r4, r12, lr}
        subs    r2, r2, #0x20
        bge     Lmemcpy_bloop32
 
Lmemcpy_bl32:
        cmn     r2, #0x10
        ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
        stmgedb r0!, {r3, r4, r12, lr}
        subge   r2, r2, #0x10
        adds    r2, r2, #0x14
        ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
        stmgedb r0!, {r3, r12, lr}
        subge   r2, r2, #0x0c
        ldmia   sp!, {r4, lr}
 
Lmemcpy_bl12:
        adds    r2, r2, #8
        blt     Lmemcpy_bl4
        subs    r2, r2, #4
        ldrlt   r3, [r1, #-4]!
        strlt   r3, [r0, #-4]!
        ldmgedb r1!, {r3, r12}
        stmgedb r0!, {r3, r12}
        subge   r2, r2, #4
 
Lmemcpy_bl4:
        /* less than 4 bytes to go */
        adds    r2, r2, #4
        moveq   pc, lr                  /* done */
 
        /* copy the crud byte at a time */
        cmp     r2, #2
        ldrb    r3, [r1, #-1]!
        strb    r3, [r0, #-1]!
        ldrgeb  r3, [r1, #-1]!
        strgeb  r3, [r0, #-1]!
        ldrgtb  r3, [r1, #-1]!
        strgtb  r3, [r0, #-1]!
        mov     pc, lr
 
        /* erg - unaligned destination */
Lmemcpy_bdestul:
        cmp     r12, #2
 
        /* align destination with byte copies */
        ldrb    r3, [r1, #-1]!
        strb    r3, [r0, #-1]!
        ldrgeb  r3, [r1, #-1]!
        strgeb  r3, [r0, #-1]!
        ldrgtb  r3, [r1, #-1]!
        strgtb  r3, [r0, #-1]!
        subs    r2, r2, r12
        blt     Lmemcpy_bl4             /* less than 4 bytes to go */
        ands    r12, r1, #3
        beq     Lmemcpy_bt8             /* we have an aligned source */
 
        /* erg - unaligned source */
        /* This is where it gets nasty ... */
Lmemcpy_bsrcul:
        bic     r1, r1, #3
        ldr     r3, [r1, #0]
        cmp     r12, #2
        blt     Lmemcpy_bsrcul1
        beq     Lmemcpy_bsrcul2
        cmp     r2, #0x0c
        blt     Lmemcpy_bsrcul3loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5, lr}
 
Lmemcpy_bsrcul3loop16:
        mov     lr, r3, lsl #8
        ldmdb   r1!, {r3-r5, r12}
        orr     lr, lr, r12, lsr #24
        mov     r12, r12, lsl #8
        orr     r12, r12, r5, lsr #24
        mov     r5, r5, lsl #8
        orr     r5, r5, r4, lsr #24
        mov     r4, r4, lsl #8
        orr     r4, r4, r3, lsr #24
        stmdb   r0!, {r4, r5, r12, lr}
        subs    r2, r2, #0x10
        bge     Lmemcpy_bsrcul3loop16
        ldmia   sp!, {r4, r5, lr}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_bsrcul3l4
 
Lmemcpy_bsrcul3loop4:
        mov     r12, r3, lsl #8
        ldr     r3, [r1, #-4]!
        orr     r12, r12, r3, lsr #24
        str     r12, [r0, #-4]!
        subs    r2, r2, #4
        bge     Lmemcpy_bsrcul3loop4
 
Lmemcpy_bsrcul3l4:
        add     r1, r1, #3
        b       Lmemcpy_bl4
 
Lmemcpy_bsrcul2:
        cmp     r2, #0x0c
        blt     Lmemcpy_bsrcul2loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5, lr}
 
Lmemcpy_bsrcul2loop16:
        mov     lr, r3, lsl #16
        ldmdb   r1!, {r3-r5, r12}
        orr     lr, lr, r12, lsr #16
        mov     r12, r12, lsl #16
        orr     r12, r12, r5, lsr #16
        mov     r5, r5, lsl #16
        orr     r5, r5, r4, lsr #16
        mov     r4, r4, lsl #16
        orr     r4, r4, r3, lsr #16
        stmdb   r0!, {r4, r5, r12, lr}
        subs    r2, r2, #0x10
        bge     Lmemcpy_bsrcul2loop16
        ldmia   sp!, {r4, r5, lr}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_bsrcul2l4
 
Lmemcpy_bsrcul2loop4:
        mov     r12, r3, lsl #16
        ldr     r3, [r1, #-4]!
        orr     r12, r12, r3, lsr #16
        str     r12, [r0, #-4]!
        subs    r2, r2, #4
        bge     Lmemcpy_bsrcul2loop4
 
Lmemcpy_bsrcul2l4:
        add     r1, r1, #2
        b       Lmemcpy_bl4
 
Lmemcpy_bsrcul1:
        cmp     r2, #0x0c
        blt     Lmemcpy_bsrcul1loop4
        sub     r2, r2, #0x0c
        stmdb   sp!, {r4, r5, lr}
 
Lmemcpy_bsrcul1loop32:
        mov     lr, r3, lsl #24
        ldmdb   r1!, {r3-r5, r12}
        orr     lr, lr, r12, lsr #8
        mov     r12, r12, lsl #24
        orr     r12, r12, r5, lsr #8
        mov     r5, r5, lsl #24
        orr     r5, r5, r4, lsr #8
        mov     r4, r4, lsl #24
        orr     r4, r4, r3, lsr #8
        stmdb   r0!, {r4, r5, r12, lr}
        subs    r2, r2, #0x10
        bge     Lmemcpy_bsrcul1loop32
        ldmia   sp!, {r4, r5, lr}
        adds    r2, r2, #0x0c
        blt     Lmemcpy_bsrcul1l4
 
Lmemcpy_bsrcul1loop4:
        mov     r12, r3, lsl #24
        ldr     r3, [r1, #-4]!
        orr     r12, r12, r3, lsr #8
        str     r12, [r0, #-4]!
        subs    r2, r2, #4
        bge     Lmemcpy_bsrcul1loop4
 
Lmemcpy_bsrcul1l4:
        add     r1, r1, #1
        b       Lmemcpy_bl4
 

Browse

Tools

Subversion Repositories or1k

[/] [or1k/] [trunk/] [newlib-1.10.0/] [newlib/] [libc/] [machine/] [arm/] [memcpy.S] - Blame information for rev 1765

Line No.	Rev	Author	Line
1	1007	ivang	`/* $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $ */`
2
3			`/*-`
4			`* Copyright (c) 1997 The NetBSD Foundation, Inc.`
5			`* All rights reserved.`
6			`*`
7			`* This code is derived from software contributed to The NetBSD Foundation`
8			`* by Neil A. Carson and Mark Brinicombe`
9			`*`
10			`* Redistribution and use in source and binary forms, with or without`
11			`* modification, are permitted provided that the following conditions`
12			`* are met:`
13			`* 1. Redistributions of source code must retain the above copyright`
14			`* notice, this list of conditions and the following disclaimer.`
15			`* 2. Redistributions in binary form must reproduce the above copyright`
16			`* notice, this list of conditions and the following disclaimer in the`
17			`* documentation and/or other materials provided with the distribution.`
18			`* 3. All advertising materials mentioning features or use of this software`
19			`* must display the following acknowledgement:`
20			`* This product includes software developed by the NetBSD`
21			`* Foundation, Inc. and its contributors.`
22			`* 4. Neither the name of The NetBSD Foundation nor the names of its`
23			`* contributors may be used to endorse or promote products derived`
24			`* from this software without specific prior written permission.`
25			`*`
26			`* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS`
27			* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28			`* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR`
29			`* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS`
30			`* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
31			`* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
32			`* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
33			`* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
34			`* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
35			`* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
36			`* POSSIBILITY OF SUCH DAMAGE.`
37			`*/`
38
39			`/* This was modified by Jay Monkman to`
40			`* save and restore r12. This is necessary for RTEMS.`
41			`*/`
42			`/* #include */`
43
44			`#define ENTRY(_LABEL) \`
45			`.global _LABEL ; _LABEL:`
46			`/*`
47			`.globl memcpy`
48			`memcpy:`
49			`*/`
50			`ENTRY(memcpy)`
51			`stmfd sp!, {r0, r12, lr}`
52			`bl _memcpy`
53			`ldmfd sp!, {r0, r12, pc}`
54
55
56			`/*`
57			`.globl memove`
58			`memmove:`
59			`*/`
60			`ENTRY(memmove)`
61			`stmfd sp!, {r0, r12, lr}`
62			`bl _memcpy`
63			`ldmfd sp!, {r0, r12, pc}`
64
65
66
67			`/*`
68			`* This is one fun bit of code ...`
69			`* Some easy listening music is suggested while trying to understand this`
70			`* code e.g. Iron Maiden`
71			`*`
72			`* For anyone attempting to understand it :`
73			`*`
74			`* The core code is implemented here with simple stubs for memcpy()`
75			`* memmove() and bcopy().`
76			`*`
77			`* All local labels are prefixed with Lmemcpy_`
78			`* Following the prefix a label starting f is used in the forward copy code`
79			`* while a label using b is used in the backwards copy code`
80			`* The source and destination addresses determine whether a forward or`
81			`* backward copy is performed.`
82			`* Separate bits of code are used to deal with the following situations`
83			`* for both the forward and backwards copy.`
84			`* unaligned source address`
85			`* unaligned destination address`
86			`* Separate copy routines are used to produce an optimised result for each`
87			`* of these cases.`
88			`* The copy code will use LDM/STM instructions to copy up to 32 bytes at`
89			`* a time where possible.`
90			`*`
91			`* Note: r12 (aka ip) can be trashed during the function along with`
92			`* r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.`
93			`* Additional registers are preserved prior to use i.e. r4, r5 & lr`
94			`*`
95			`* Apologies for the state of the comments ;-)`
96			`*/`
97
98
99			`/*`
100			`_memcpy:`
101			`*/`
102			`ENTRY(_memcpy)`
103			`/* Determine copy direction */`
104			`cmp r1, r0`
105			`bcc Lmemcpy_backwards`
106
107			`moveq r0, #0 /* Quick abort for len=0 */`
108			`moveq pc, lr`
109
110			`stmdb sp!, {r0, lr} /* memcpy() returns dest addr */`
111			`subs r2, r2, #4`
112			`blt Lmemcpy_fl4 /* less than 4 bytes */`
113			`ands r12, r0, #3`
114			`bne Lmemcpy_fdestul /* oh unaligned destination addr */`
115			`ands r12, r1, #3`
116			`bne Lmemcpy_fsrcul /* oh unaligned source addr */`
117
118			`Lmemcpy_ft8:`
119			`/* We have aligned source and destination */`
120			`subs r2, r2, #8`
121			`blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */`
122			`subs r2, r2, #0x14`
123			`blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */`
124			`stmdb sp!, {r4} /* borrow r4 */`
125
126			`/* blat 32 bytes at a time */`
127			`/* XXX for really big copies perhaps we should use more registers */`
128			`Lmemcpy_floop32:`
129			`ldmia r1!, {r3, r4, r12, lr}`
130			`stmia r0!, {r3, r4, r12, lr}`
131			`ldmia r1!, {r3, r4, r12, lr}`
132			`stmia r0!, {r3, r4, r12, lr}`
133			`subs r2, r2, #0x20`
134			`bge Lmemcpy_floop32`
135
136			`cmn r2, #0x10`
137			`ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */`
138			`stmgeia r0!, {r3, r4, r12, lr}`
139			`subge r2, r2, #0x10`
140			`ldmia sp!, {r4} /* return r4 */`
141
142			`Lmemcpy_fl32:`
143			`adds r2, r2, #0x14`
144
145			`/* blat 12 bytes at a time */`
146			`Lmemcpy_floop12:`
147			`ldmgeia r1!, {r3, r12, lr}`
148			`stmgeia r0!, {r3, r12, lr}`
149			`subges r2, r2, #0x0c`
150			`bge Lmemcpy_floop12`
151
152			`Lmemcpy_fl12:`
153			`adds r2, r2, #8`
154			`blt Lmemcpy_fl4`
155
156			`subs r2, r2, #4`
157			`ldrlt r3, [r1], #4`
158			`strlt r3, [r0], #4`
159			`ldmgeia r1!, {r3, r12}`
160			`stmgeia r0!, {r3, r12}`
161			`subge r2, r2, #4`
162
163			`Lmemcpy_fl4:`
164			`/* less than 4 bytes to go */`
165			`adds r2, r2, #4`
166			`ldmeqia sp!, {r0, pc} /* done */`
167
168			`/* copy the crud byte at a time */`
169			`cmp r2, #2`
170			`ldrb r3, [r1], #1`
171			`strb r3, [r0], #1`
172			`ldrgeb r3, [r1], #1`
173			`strgeb r3, [r0], #1`
174			`ldrgtb r3, [r1], #1`
175			`strgtb r3, [r0], #1`
176			`ldmia sp!, {r0, pc}`
177
178			`/* erg - unaligned destination */`
179			`Lmemcpy_fdestul:`
180			`rsb r12, r12, #4`
181			`cmp r12, #2`
182
183			`/* align destination with byte copies */`
184			`ldrb r3, [r1], #1`
185			`strb r3, [r0], #1`
186			`ldrgeb r3, [r1], #1`
187			`strgeb r3, [r0], #1`
188			`ldrgtb r3, [r1], #1`
189			`strgtb r3, [r0], #1`
190			`subs r2, r2, r12`
191			`blt Lmemcpy_fl4 /* less the 4 bytes */`
192
193			`ands r12, r1, #3`
194			`beq Lmemcpy_ft8 /* we have an aligned source */`
195
196			`/* erg - unaligned source */`
197			`/* This is where it gets nasty ... */`
198			`Lmemcpy_fsrcul:`
199			`bic r1, r1, #3`
200			`ldr lr, [r1], #4`
201			`cmp r12, #2`
202			`bgt Lmemcpy_fsrcul3`
203			`beq Lmemcpy_fsrcul2`
204			`cmp r2, #0x0c`
205			`blt Lmemcpy_fsrcul1loop4`
206			`sub r2, r2, #0x0c`
207			`stmdb sp!, {r4, r5}`
208
209			`Lmemcpy_fsrcul1loop16:`
210			`mov r3, lr, lsr #8`
211			`ldmia r1!, {r4, r5, r12, lr}`
212			`orr r3, r3, r4, lsl #24`
213			`mov r4, r4, lsr #8`
214			`orr r4, r4, r5, lsl #24`
215			`mov r5, r5, lsr #8`
216			`orr r5, r5, r12, lsl #24`
217			`mov r12, r12, lsr #8`
218			`orr r12, r12, lr, lsl #24`
219			`stmia r0!, {r3-r5, r12}`
220			`subs r2, r2, #0x10`
221			`bge Lmemcpy_fsrcul1loop16`
222			`ldmia sp!, {r4, r5}`
223			`adds r2, r2, #0x0c`
224			`blt Lmemcpy_fsrcul1l4`
225
226			`Lmemcpy_fsrcul1loop4:`
227			`mov r12, lr, lsr #8`
228			`ldr lr, [r1], #4`
229			`orr r12, r12, lr, lsl #24`
230			`str r12, [r0], #4`
231			`subs r2, r2, #4`
232			`bge Lmemcpy_fsrcul1loop4`
233
234			`Lmemcpy_fsrcul1l4:`
235			`sub r1, r1, #3`
236			`b Lmemcpy_fl4`
237
238			`Lmemcpy_fsrcul2:`
239			`cmp r2, #0x0c`
240			`blt Lmemcpy_fsrcul2loop4`
241			`sub r2, r2, #0x0c`
242			`stmdb sp!, {r4, r5}`
243
244			`Lmemcpy_fsrcul2loop16:`
245			`mov r3, lr, lsr #16`
246			`ldmia r1!, {r4, r5, r12, lr}`
247			`orr r3, r3, r4, lsl #16`
248			`mov r4, r4, lsr #16`
249			`orr r4, r4, r5, lsl #16`
250			`mov r5, r5, lsr #16`
251			`orr r5, r5, r12, lsl #16`
252			`mov r12, r12, lsr #16`
253			`orr r12, r12, lr, lsl #16`
254			`stmia r0!, {r3-r5, r12}`
255			`subs r2, r2, #0x10`
256			`bge Lmemcpy_fsrcul2loop16`
257			`ldmia sp!, {r4, r5}`
258			`adds r2, r2, #0x0c`
259			`blt Lmemcpy_fsrcul2l4`
260
261			`Lmemcpy_fsrcul2loop4:`
262			`mov r12, lr, lsr #16`
263			`ldr lr, [r1], #4`
264			`orr r12, r12, lr, lsl #16`
265			`str r12, [r0], #4`
266			`subs r2, r2, #4`
267			`bge Lmemcpy_fsrcul2loop4`
268
269			`Lmemcpy_fsrcul2l4:`
270			`sub r1, r1, #2`
271			`b Lmemcpy_fl4`
272
273			`Lmemcpy_fsrcul3:`
274			`cmp r2, #0x0c`
275			`blt Lmemcpy_fsrcul3loop4`
276			`sub r2, r2, #0x0c`
277			`stmdb sp!, {r4, r5}`
278
279			`Lmemcpy_fsrcul3loop16:`
280			`mov r3, lr, lsr #24`
281			`ldmia r1!, {r4, r5, r12, lr}`
282			`orr r3, r3, r4, lsl #8`
283			`mov r4, r4, lsr #24`
284			`orr r4, r4, r5, lsl #8`
285			`mov r5, r5, lsr #24`
286			`orr r5, r5, r12, lsl #8`
287			`mov r12, r12, lsr #24`
288			`orr r12, r12, lr, lsl #8`
289			`stmia r0!, {r3-r5, r12}`
290			`subs r2, r2, #0x10`
291			`bge Lmemcpy_fsrcul3loop16`
292			`ldmia sp!, {r4, r5}`
293			`adds r2, r2, #0x0c`
294			`blt Lmemcpy_fsrcul3l4`
295
296			`Lmemcpy_fsrcul3loop4:`
297			`mov r12, lr, lsr #24`
298			`ldr lr, [r1], #4`
299			`orr r12, r12, lr, lsl #8`
300			`str r12, [r0], #4`
301			`subs r2, r2, #4`
302			`bge Lmemcpy_fsrcul3loop4`
303
304			`Lmemcpy_fsrcul3l4:`
305			`sub r1, r1, #1`
306			`b Lmemcpy_fl4`
307
308			`Lmemcpy_backwards:`
309			`add r1, r1, r2`
310			`add r0, r0, r2`
311			`subs r2, r2, #4`
312			`blt Lmemcpy_bl4 /* less than 4 bytes */`
313			`ands r12, r0, #3`
314			`bne Lmemcpy_bdestul /* oh unaligned destination addr */`
315			`ands r12, r1, #3`
316			`bne Lmemcpy_bsrcul /* oh unaligned source addr */`
317
318			`Lmemcpy_bt8:`
319			`/* We have aligned source and destination */`
320			`subs r2, r2, #8`
321			`blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */`
322			`stmdb sp!, {r4, lr}`
323			`subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */`
324			`blt Lmemcpy_bl32`
325
326			`/* blat 32 bytes at a time */`
327			`/* XXX for really big copies perhaps we should use more registers */`
328			`Lmemcpy_bloop32:`
329			`ldmdb r1!, {r3, r4, r12, lr}`
330			`stmdb r0!, {r3, r4, r12, lr}`
331			`ldmdb r1!, {r3, r4, r12, lr}`
332			`stmdb r0!, {r3, r4, r12, lr}`
333			`subs r2, r2, #0x20`
334			`bge Lmemcpy_bloop32`
335
336			`Lmemcpy_bl32:`
337			`cmn r2, #0x10`
338			`ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */`
339			`stmgedb r0!, {r3, r4, r12, lr}`
340			`subge r2, r2, #0x10`
341			`adds r2, r2, #0x14`
342			`ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */`
343			`stmgedb r0!, {r3, r12, lr}`
344			`subge r2, r2, #0x0c`
345			`ldmia sp!, {r4, lr}`
346
347			`Lmemcpy_bl12:`
348			`adds r2, r2, #8`
349			`blt Lmemcpy_bl4`
350			`subs r2, r2, #4`
351			`ldrlt r3, [r1, #-4]!`
352			`strlt r3, [r0, #-4]!`
353			`ldmgedb r1!, {r3, r12}`
354			`stmgedb r0!, {r3, r12}`
355			`subge r2, r2, #4`
356
357			`Lmemcpy_bl4:`
358			`/* less than 4 bytes to go */`
359			`adds r2, r2, #4`
360			`moveq pc, lr /* done */`
361
362			`/* copy the crud byte at a time */`
363			`cmp r2, #2`
364			`ldrb r3, [r1, #-1]!`
365			`strb r3, [r0, #-1]!`
366			`ldrgeb r3, [r1, #-1]!`
367			`strgeb r3, [r0, #-1]!`
368			`ldrgtb r3, [r1, #-1]!`
369			`strgtb r3, [r0, #-1]!`
370			`mov pc, lr`
371
372			`/* erg - unaligned destination */`
373			`Lmemcpy_bdestul:`
374			`cmp r12, #2`
375
376			`/* align destination with byte copies */`
377			`ldrb r3, [r1, #-1]!`
378			`strb r3, [r0, #-1]!`
379			`ldrgeb r3, [r1, #-1]!`
380			`strgeb r3, [r0, #-1]!`
381			`ldrgtb r3, [r1, #-1]!`
382			`strgtb r3, [r0, #-1]!`
383			`subs r2, r2, r12`
384			`blt Lmemcpy_bl4 /* less than 4 bytes to go */`
385			`ands r12, r1, #3`
386			`beq Lmemcpy_bt8 /* we have an aligned source */`
387
388			`/* erg - unaligned source */`
389			`/* This is where it gets nasty ... */`
390			`Lmemcpy_bsrcul:`
391			`bic r1, r1, #3`
392			`ldr r3, [r1, #0]`
393			`cmp r12, #2`
394			`blt Lmemcpy_bsrcul1`
395			`beq Lmemcpy_bsrcul2`
396			`cmp r2, #0x0c`
397			`blt Lmemcpy_bsrcul3loop4`
398			`sub r2, r2, #0x0c`
399			`stmdb sp!, {r4, r5, lr}`
400
401			`Lmemcpy_bsrcul3loop16:`
402			`mov lr, r3, lsl #8`
403			`ldmdb r1!, {r3-r5, r12}`
404			`orr lr, lr, r12, lsr #24`
405			`mov r12, r12, lsl #8`
406			`orr r12, r12, r5, lsr #24`
407			`mov r5, r5, lsl #8`
408			`orr r5, r5, r4, lsr #24`
409			`mov r4, r4, lsl #8`
410			`orr r4, r4, r3, lsr #24`
411			`stmdb r0!, {r4, r5, r12, lr}`
412			`subs r2, r2, #0x10`
413			`bge Lmemcpy_bsrcul3loop16`
414			`ldmia sp!, {r4, r5, lr}`
415			`adds r2, r2, #0x0c`
416			`blt Lmemcpy_bsrcul3l4`
417
418			`Lmemcpy_bsrcul3loop4:`
419			`mov r12, r3, lsl #8`
420			`ldr r3, [r1, #-4]!`
421			`orr r12, r12, r3, lsr #24`
422			`str r12, [r0, #-4]!`
423			`subs r2, r2, #4`
424			`bge Lmemcpy_bsrcul3loop4`
425
426			`Lmemcpy_bsrcul3l4:`
427			`add r1, r1, #3`
428			`b Lmemcpy_bl4`
429
430			`Lmemcpy_bsrcul2:`
431			`cmp r2, #0x0c`
432			`blt Lmemcpy_bsrcul2loop4`
433			`sub r2, r2, #0x0c`
434			`stmdb sp!, {r4, r5, lr}`
435
436			`Lmemcpy_bsrcul2loop16:`
437			`mov lr, r3, lsl #16`
438			`ldmdb r1!, {r3-r5, r12}`
439			`orr lr, lr, r12, lsr #16`
440			`mov r12, r12, lsl #16`
441			`orr r12, r12, r5, lsr #16`
442			`mov r5, r5, lsl #16`
443			`orr r5, r5, r4, lsr #16`
444			`mov r4, r4, lsl #16`
445			`orr r4, r4, r3, lsr #16`
446			`stmdb r0!, {r4, r5, r12, lr}`
447			`subs r2, r2, #0x10`
448			`bge Lmemcpy_bsrcul2loop16`
449			`ldmia sp!, {r4, r5, lr}`
450			`adds r2, r2, #0x0c`
451			`blt Lmemcpy_bsrcul2l4`
452
453			`Lmemcpy_bsrcul2loop4:`
454			`mov r12, r3, lsl #16`
455			`ldr r3, [r1, #-4]!`
456			`orr r12, r12, r3, lsr #16`
457			`str r12, [r0, #-4]!`
458			`subs r2, r2, #4`
459			`bge Lmemcpy_bsrcul2loop4`
460
461			`Lmemcpy_bsrcul2l4:`
462			`add r1, r1, #2`
463			`b Lmemcpy_bl4`
464
465			`Lmemcpy_bsrcul1:`
466			`cmp r2, #0x0c`
467			`blt Lmemcpy_bsrcul1loop4`
468			`sub r2, r2, #0x0c`
469			`stmdb sp!, {r4, r5, lr}`
470
471			`Lmemcpy_bsrcul1loop32:`
472			`mov lr, r3, lsl #24`
473			`ldmdb r1!, {r3-r5, r12}`
474			`orr lr, lr, r12, lsr #8`
475			`mov r12, r12, lsl #24`
476			`orr r12, r12, r5, lsr #8`
477			`mov r5, r5, lsl #24`
478			`orr r5, r5, r4, lsr #8`
479			`mov r4, r4, lsl #24`
480			`orr r4, r4, r3, lsr #8`
481			`stmdb r0!, {r4, r5, r12, lr}`
482			`subs r2, r2, #0x10`
483			`bge Lmemcpy_bsrcul1loop32`
484			`ldmia sp!, {r4, r5, lr}`
485			`adds r2, r2, #0x0c`
486			`blt Lmemcpy_bsrcul1l4`
487
488			`Lmemcpy_bsrcul1loop4:`
489			`mov r12, r3, lsl #24`
490			`ldr r3, [r1, #-4]!`
491			`orr r12, r12, r3, lsr #8`
492			`str r12, [r0, #-4]!`
493			`subs r2, r2, #4`
494			`bge Lmemcpy_bsrcul1loop4`
495
496			`Lmemcpy_bsrcul1l4:`
497			`add r1, r1, #1`
498			`b Lmemcpy_bl4`
499