OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [machine/] [m68k/] [memcpy.S] - Rev 307

Go to most recent revision | Compare with Previous | Blame | View Log

/* a-memcpy.s -- memcpy, optimised for m68k asm
 *
 * Copyright (c) 2007 mocom software GmbH & Co KG)
 *
 * The authors hereby grant permission to use, copy, modify, distribute,
 * and license this software and its documentation for any purpose, provided
 * that existing copyright notices are retained in all copies and that this
 * notice is included verbatim in any distributions. No written agreement,
 * license, or royalty fee is required for any of the authorized uses.
 * Modifications to this software may be copyrighted by their authors
 * and need not follow the licensing terms described here, provided that
 * the new terms are clearly indicated on the first page of each file where
 * they apply.
 */

#include "m68kasm.h"

#if defined (__mcoldfire__) || defined (__mcpu32__) || defined (__mc68010__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__)
# define MISALIGNED_OK 1
#else
# define MISALIGNED_OK 0
#endif
        
        .text
        .align  4

        .globl  SYM(memcpy)
        .type   SYM(memcpy), @function

/*   memcpy, optimised
 *
 *   strategy:
 *       - no argument testing (the original memcpy from the GNU lib does
 *         no checking either)
 *       - make sure the destination pointer (the write pointer) is long word
 *         aligned. This is the best you can do, because writing to unaligned
 *         addresses can be the most costfull thing you could do.
 *       - Once you have figured that out, we do a little loop unrolling
 *         to further improve speed.
 */

SYM(memcpy):
        move.l  4(sp),a0        | dest ptr
        move.l  8(sp),a1        | src ptr
        move.l  12(sp),d1       | len
        cmp.l   #8,d1           | if fewer than 8 bytes to transfer,
        blo     .Lresidue       | do not optimise

#if !MISALIGNED_OK
        /* Goto .Lresidue if either dest or src is not 4-byte aligned */
        move.l  a0,d0
        and.l   #3,d0
        bne     .Lresidue
        move.l  a1,d0
        and.l   #3,d0
        bne     .Lresidue
#else /* MISALIGNED_OK */
        /* align dest */
        move.l  a0,d0           | copy of dest
        neg.l   d0
        and.l   #3,d0           | look for the lower two only
        beq     2f              | is aligned?
        sub.l   d0,d1
        lsr.l   #1,d0           | word align needed?
        bcc     1f
        move.b  (a1)+,(a0)+
1:
        lsr.l   #1,d0           | long align needed?
        bcc     2f
        move.w  (a1)+,(a0)+
2:
#endif /* !MISALIGNED_OK */

        /* long word transfers */
        move.l  d1,d0
        and.l   #3,d1           | byte residue
        lsr.l   #3,d0
        bcc     1f              | carry set for 4-byte residue
        move.l  (a1)+,(a0)+
1:
        lsr.l   #1,d0           | number of 16-byte transfers
        bcc     .Lcopy          | carry set for 8-byte residue
        bra     .Lcopy8

1:
        move.l  (a1)+,(a0)+
        move.l  (a1)+,(a0)+
.Lcopy8:
        move.l  (a1)+,(a0)+
        move.l  (a1)+,(a0)+
.Lcopy:
#if !defined (__mcoldfire__)
        dbra    d0,1b
        sub.l   #0x10000,d0
#else
        subq.l  #1,d0
#endif
        bpl     1b
        bra     .Lresidue

1:
        move.b  (a1)+,(a0)+     | move residue bytes

.Lresidue:
#if !defined (__mcoldfire__)
        dbra    d1,1b           | loop until done
#else
        subq.l  #1,d1
        bpl     1b
#endif
        move.l  4(sp),d0        | return value
        rts

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.