OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [lib/] [memcpy_64.S] - Rev 3

Compare with Previous | Blame | View Log

/*
 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
#include <asm/processor.h>
#include <asm/ppc_asm.h>

        .align  7
_GLOBAL(memcpy)
        std     r3,48(r1)       /* save destination pointer for return value */
        PPC_MTOCRF      0x01,r5
        cmpldi  cr1,r5,16
        neg     r6,r3           # LS 3 bits = # bytes to 8-byte dest bdry
        andi.   r6,r6,7
        dcbt    0,r4
        blt     cr1,.Lshort_copy
        bne     .Ldst_unaligned
.Ldst_aligned:
        andi.   r0,r4,7
        addi    r3,r3,-16
        bne     .Lsrc_unaligned
        srdi    r7,r5,4
        ld      r9,0(r4)
        addi    r4,r4,-8
        mtctr   r7
        andi.   r5,r5,7
        bf      cr7*4+0,2f
        addi    r3,r3,8
        addi    r4,r4,8
        mr      r8,r9
        blt     cr1,3f
1:      ld      r9,8(r4)
        std     r8,8(r3)
2:      ldu     r8,16(r4)
        stdu    r9,16(r3)
        bdnz    1b
3:      std     r8,8(r3)
        beq     3f
        addi    r3,r3,16
        ld      r9,8(r4)
.Ldo_tail:
        bf      cr7*4+1,1f
        rotldi  r9,r9,32
        stw     r9,0(r3)
        addi    r3,r3,4
1:      bf      cr7*4+2,2f
        rotldi  r9,r9,16
        sth     r9,0(r3)
        addi    r3,r3,2
2:      bf      cr7*4+3,3f
        rotldi  r9,r9,8
        stb     r9,0(r3)
3:      ld      r3,48(r1)       /* return dest pointer */
        blr

.Lsrc_unaligned:
        srdi    r6,r5,3
        addi    r5,r5,-16
        subf    r4,r0,r4
        srdi    r7,r5,4
        sldi    r10,r0,3
        cmpdi   cr6,r6,3
        andi.   r5,r5,7
        mtctr   r7
        subfic  r11,r10,64
        add     r5,r5,r0

        bt      cr7*4+0,0f

        ld      r9,0(r4)        # 3+2n loads, 2+2n stores
        ld      r0,8(r4)
        sld     r6,r9,r10
        ldu     r9,16(r4)
        srd     r7,r0,r11
        sld     r8,r0,r10
        or      r7,r7,r6
        blt     cr6,4f
        ld      r0,8(r4)
        # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
        b       2f

0:      ld      r0,0(r4)        # 4+2n loads, 3+2n stores
        ldu     r9,8(r4)
        sld     r8,r0,r10
        addi    r3,r3,-8
        blt     cr6,5f
        ld      r0,8(r4)
        srd     r12,r9,r11
        sld     r6,r9,r10
        ldu     r9,16(r4)
        or      r12,r8,r12
        srd     r7,r0,r11
        sld     r8,r0,r10
        addi    r3,r3,16
        beq     cr6,3f

        # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1:      or      r7,r7,r6
        ld      r0,8(r4)
        std     r12,8(r3)
2:      srd     r12,r9,r11
        sld     r6,r9,r10
        ldu     r9,16(r4)
        or      r12,r8,r12
        stdu    r7,16(r3)
        srd     r7,r0,r11
        sld     r8,r0,r10
        bdnz    1b

3:      std     r12,8(r3)
        or      r7,r7,r6
4:      std     r7,16(r3)
5:      srd     r12,r9,r11
        or      r12,r8,r12
        std     r12,24(r3)
        beq     4f
        cmpwi   cr1,r5,8
        addi    r3,r3,32
        sld     r9,r9,r10
        ble     cr1,.Ldo_tail
        ld      r0,8(r4)
        srd     r7,r0,r11
        or      r9,r7,r9
        b       .Ldo_tail

.Ldst_unaligned:
        PPC_MTOCRF      0x01,r6         # put #bytes to 8B bdry into cr7
        subf    r5,r6,r5
        li      r7,0
        cmpldi  r1,r5,16
        bf      cr7*4+3,1f
        lbz     r0,0(r4)
        stb     r0,0(r3)
        addi    r7,r7,1
1:      bf      cr7*4+2,2f
        lhzx    r0,r7,r4
        sthx    r0,r7,r3
        addi    r7,r7,2
2:      bf      cr7*4+1,3f
        lwzx    r0,r7,r4
        stwx    r0,r7,r3
3:      PPC_MTOCRF      0x01,r5
        add     r4,r6,r4
        add     r3,r6,r3
        b       .Ldst_aligned

.Lshort_copy:
        bf      cr7*4+0,1f
        lwz     r0,0(r4)
        lwz     r9,4(r4)
        addi    r4,r4,8
        stw     r0,0(r3)
        stw     r9,4(r3)
        addi    r3,r3,8
1:      bf      cr7*4+1,2f
        lwz     r0,0(r4)
        addi    r4,r4,4
        stw     r0,0(r3)
        addi    r3,r3,4
2:      bf      cr7*4+2,3f
        lhz     r0,0(r4)
        addi    r4,r4,2
        sth     r0,0(r3)
        addi    r3,r3,2
3:      bf      cr7*4+3,4f
        lbz     r0,0(r4)
        stb     r0,0(r3)
4:      ld      r3,48(r1)       /* return dest pointer */
        blr

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.