OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [machine/] [h8300/] [memcpy.S] - Rev 407

Go to most recent revision | Compare with Previous | Blame | View Log

#include "setarch.h"

#include "defines.h"

#ifdef __H8300SX__

        .global _memcpy
_memcpy:
        stm.l   er4-er6,@-er7

        ; Set up source and destination pointers for movmd.
        mov.l   er0,er6
        mov.l   er1,er5

        ; See whether the copy is long enough to use the movmd.l code.
        ; Although the code can handle anything longer than 6 bytes,
        ; it can be more expensive than movmd.b for small moves.
        ; It's better to use a higher threshold to account for this.
        ;
        ; Note that the exact overhead of the movmd.l checks depends on
        ; the alignments of the length and pointers.  They are faster when
        ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values
        ; are 0.  This threshold is a compromise between the various cases.
        cmp     #16,LEN(r2)
        blo     simple

        ; movmd.l only works for even addresses.  If one of the addresses
        ; is odd and the other is not, fall back on a simple move.
        bld     #0,r5l
        bxor    #0,r6l
        bcs     simple

        ; Make the addresses even.
        bld     #0,r5l
        bcc     word_aligned
        mov.b   @er5+,@er6+
        sub     #1,LEN(r2)

word_aligned:
        ; See if copying one word would make the first operand longword
        ; aligned.  Although this is only really worthwhile if it aligns
        ; the second operand as well, it's no worse if doesn't, so it
        ; hardly seems worth the overhead of a "band" check.
        bld     #1,r6l
        bcc     fast_copy
        mov.w   @er5+,@er6+
        sub     #2,LEN(r2)

fast_copy:
        ; Set (e)r4 to the number of longwords to copy.
        mov     LEN(r2),LEN(r4)
        shlr    #2,LEN(r4)

#ifdef __NORMAL_MODE__
        ; 16-bit pointers and size_ts: one movmd.l is enough.  This code
        ; is never reached with r4 == 0.
        movmd.l
        and.w   #3,r2
simple:
        mov.w   r2,r4
        beq     quit
        movmd.b
quit:
        rts/l   er4-er6
#else
        ; Skip the first iteration if the number of longwords is divisible
        ; by 0x10000.
        mov.w   r4,r4
        beq     fast_loop_next

        ; This loop copies r4 (!= 0) longwords the first time round and 65536
        ; longwords on each iteration after that.
fast_loop:
        movmd.l
fast_loop_next:
        sub.w   #1,e4
        bhs     fast_loop

        ; Mop up any left-over bytes.  We could just fall through to the
        ; simple code after the "and" but the version below is quicker
        ; and only takes 10 more bytes.
        and.w   #3,r2
        beq     quit
        mov.w   r2,r4
        movmd.b
quit:
        rts/l   er4-er6

simple:
        ; Simple bytewise copy.  We need to handle all lengths, including zero.
        mov.w   r2,r4
        beq     simple_loop_next
simple_loop:
        movmd.b
simple_loop_next:
        sub.w   #1,e2
        bhs     simple_loop
        rts/l   er4-er6
#endif

#else

        .global _memcpy
_memcpy:
;       MOVP    @(2/4,r7),A0P   ; dst
;       MOVP    @(4/8,r7),A1P   ; src
;       MOVP    @(6/12,r7),A2P  ; len

        MOVP    A0P,A3P ; keep copy of final dst
        ADDP    A2P,A0P ; point to end of dst
        CMPP    A0P,A3P ; see if anything to do
        beq     quit

        ADDP    A2P,A1P ; point to end of src

        ; lets see if we can do this in words
        or      A0L,A2L ; or in the dst address
        or      A3L,A2L ; or the length 
        or      A1L,A2L ; or the src address
        btst    #0,A2L  ; see if the lsb is zero
        bne     byteloop

wordloop:
#ifdef __NORMAL_MODE__
        sub     #2,A1P
#else
        subs    #2,A1P          ; point to word
#endif
        mov.w   @A1P,A2         ; get word
        mov.w   A2,@-A0P        ; save word
        CMPP    A0P,A3P         ; at the front again ?
        bne     wordloop
        rts

byteloop:
#ifdef __NORMAL_MODE__
        sub     #1,A1P
#else
        subs    #1,A1P          ; point to byte
#endif
        mov.b   @A1P,A2L        ; get byte
        mov.b   A2L,@-A0P       ; save byte
        CMPP    A0P,A3P         ; at the front again ?
        bne     byteloop

        ; return with A0 pointing to dst
quit:   rts

#endif

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.