OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libc/] [machine/] [sh/] [memset.S] - Diff between revs 207 and 345

Only display areas with differences | Details | Blame | View Log

Rev 207 Rev 345
!
!
! Fast SH memset
! Fast SH memset
!
!
! by Toshiyasu Morita (tm@netcom.com)
! by Toshiyasu Morita (tm@netcom.com)
!
!
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
! Copyright 2002 SuperH Ltd.
! Copyright 2002 SuperH Ltd.
!
!
#include "asm.h"
#include "asm.h"
ENTRY(memset)
ENTRY(memset)
#if __SHMEDIA__
#if __SHMEDIA__
        pta/l multiquad, tr0
        pta/l multiquad, tr0
        ptabs r18, tr2
        ptabs r18, tr2
        andi r2, -8, r25
        andi r2, -8, r25
        add r2, r4, r5
        add r2, r4, r5
        addi r5, -1, r20    // calculate end address.
        addi r5, -1, r20    // calculate end address.
        andi r20, -8, r20
        andi r20, -8, r20
        cmveq r4, r25, r20
        cmveq r4, r25, r20
        bne/u r25, r20, tr0 // multiquad
        bne/u r25, r20, tr0 // multiquad
!       This sequence could clobber volatile objects that are in the same
!       This sequence could clobber volatile objects that are in the same
!       quadword as a very short char array.
!       quadword as a very short char array.
!       ldlo.q r2, 0, r7
!       ldlo.q r2, 0, r7
!       shlli r4, 2, r4
!       shlli r4, 2, r4
!       movi -1, r8
!       movi -1, r8
!       SHHI r8, r4, r8
!       SHHI r8, r4, r8
!       SHHI r8, r4, r8
!       SHHI r8, r4, r8
!       mcmv r7, r8, r3
!       mcmv r7, r8, r3
!       stlo.q r2, 0, r3
!       stlo.q r2, 0, r3
        pta/l setlongs, tr0
        pta/l setlongs, tr0
        movi 4, r8
        movi 4, r8
        bgeu/u r4, r8, tr0
        bgeu/u r4, r8, tr0
        pta/l endset, tr0
        pta/l endset, tr0
        beqi/u r4, 0, tr0
        beqi/u r4, 0, tr0
        st.b r2, 0, r3
        st.b r2, 0, r3
        beqi/u r4, 1, tr0
        beqi/u r4, 1, tr0
        nop
        nop
        st.b r2, 1, r3
        st.b r2, 1, r3
        beqi/l r4, 2, tr0
        beqi/l r4, 2, tr0
        st.b r2,2,r3
        st.b r2,2,r3
endset: blink tr2, r63
endset: blink tr2, r63
setlongs:
setlongs:
        mshflo.b r3, r3, r3
        mshflo.b r3, r3, r3
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
        stlo.l r2, 0, r3
        stlo.l r2, 0, r3
        nop
        nop
        nop
        nop
        sthi.l r5, -1, r3
        sthi.l r5, -1, r3
        blink tr2, r63
        blink tr2, r63
multiquad:
multiquad:
        mshflo.b r3, r3, r3
        mshflo.b r3, r3, r3
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
        pta/l lastquad, tr0
        pta/l lastquad, tr0
        stlo.q r2, 0, r3
        stlo.q r2, 0, r3
        sub r20, r25, r24
        sub r20, r25, r24
        movi 64, r9
        movi 64, r9
        beqi/u r24, 8, tr0 // lastquad
        beqi/u r24, 8, tr0 // lastquad
        pta/l loop, tr1
        pta/l loop, tr1
        addi r20, -7*8, r8 // loop end address; This might overflow, so we need
        addi r20, -7*8, r8 // loop end address; This might overflow, so we need
                           // to use a different test before we start the loop
                           // to use a different test before we start the loop
        bgeu/u r24, r9, tr1// loop
        bgeu/u r24, r9, tr1// loop
        st.q r25, 8, r3
        st.q r25, 8, r3
        shlri r24, 4, r24
        shlri r24, 4, r24
        st.q r20, -8, r3
        st.q r20, -8, r3
        beqi/u r24, 1, tr0 // lastquad
        beqi/u r24, 1, tr0 // lastquad
        st.q r25, 16, r3
        st.q r25, 16, r3
        st.q r20, -16, r3
        st.q r20, -16, r3
        beqi/u r24, 2, tr0 // lastquad
        beqi/u r24, 2, tr0 // lastquad
        st.q r25, 24, r3
        st.q r25, 24, r3
        st.q r20, -24, r3
        st.q r20, -24, r3
lastquad:
lastquad:
        sthi.q r5, -1, r3
        sthi.q r5, -1, r3
        blink tr2,r63
        blink tr2,r63
loop:
loop:
        alloco r25, 32
        alloco r25, 32
        st.q r25, 8, r3
        st.q r25, 8, r3
        st.q r25, 16, r3
        st.q r25, 16, r3
        st.q r25, 24, r3
        st.q r25, 24, r3
        st.q r25, 32, r3
        st.q r25, 32, r3
        addi r25, 32, r25
        addi r25, 32, r25
        bgeu/l r8, r25, tr1 // loop
        bgeu/l r8, r25, tr1 // loop
        st.q r20, -40, r3
        st.q r20, -40, r3
        st.q r20, -32, r3
        st.q r20, -32, r3
        st.q r20, -24, r3
        st.q r20, -24, r3
        st.q r20, -16, r3
        st.q r20, -16, r3
        st.q r20, -8, r3
        st.q r20, -8, r3
        sthi.q r5, -1, r3
        sthi.q r5, -1, r3
        blink tr2,r63
        blink tr2,r63
#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
! Entry: r4: destination pointer
! Entry: r4: destination pointer
!        r5: fill value
!        r5: fill value
!        r6: byte count
!        r6: byte count
!
!
! Exit:  r0-r3: trashed
! Exit:  r0-r3: trashed
!
!
! This assumes that the first four bytes of the address space (0..3) are
! This assumes that the first four bytes of the address space (0..3) are
! reserved - usually by the linker script.  Otherwise, we would had to check
! reserved - usually by the linker script.  Otherwise, we would had to check
! for the case of objects of the size 12..15 at address 0..3 .
! for the case of objects of the size 12..15 at address 0..3 .
#ifdef __SH5__
#ifdef __SH5__
#define DST r2
#define DST r2
#define VAL r3
#define VAL r3
#define CNT r4
#define CNT r4
#define TMP r5
#define TMP r5
#else
#else
#define DST r4
#define DST r4
#define VAL r5
#define VAL r5
#define CNT r6
#define CNT r6
#define TMP r2
#define TMP r2
#endif
#endif
        mov     #12,r0  ! Check for small number of bytes
        mov     #12,r0  ! Check for small number of bytes
        cmp/gt  CNT,r0
        cmp/gt  CNT,r0
        mov     DST,r0
        mov     DST,r0
        SL(bt, L_store_byte_loop_check0, add DST,CNT)
        SL(bt, L_store_byte_loop_check0, add DST,CNT)
        tst     #3,r0   ! Align destination
        tst     #3,r0   ! Align destination
        SL(bt,  L_dup_bytes, extu.b r5,r5)
        SL(bt,  L_dup_bytes, extu.b r5,r5)
        .balignw 4,0x0009
        .balignw 4,0x0009
L_align_loop:
L_align_loop:
        mov.b   VAL,@r0
        mov.b   VAL,@r0
        add     #1,r0
        add     #1,r0
        tst     #3,r0
        tst     #3,r0
        bf      L_align_loop
        bf      L_align_loop
L_dup_bytes:
L_dup_bytes:
        swap.b  VAL,TMP ! Duplicate bytes across longword
        swap.b  VAL,TMP ! Duplicate bytes across longword
        or      TMP,VAL
        or      TMP,VAL
        swap.w  VAL,TMP
        swap.w  VAL,TMP
        or      TMP,VAL
        or      TMP,VAL
        add     #-16,CNT
        add     #-16,CNT
        .balignw 4,0x0009
        .balignw 4,0x0009
L_store_long_loop:
L_store_long_loop:
        mov.l   VAL,@r0 ! Store double longs to memory
        mov.l   VAL,@r0 ! Store double longs to memory
        cmp/hs  CNT,r0
        cmp/hs  CNT,r0
        mov.l   VAL,@(4,r0)
        mov.l   VAL,@(4,r0)
        SL(bf, L_store_long_loop, add #8,r0)
        SL(bf, L_store_long_loop, add #8,r0)
        add     #16,CNT
        add     #16,CNT
L_store_byte_loop_check0:
L_store_byte_loop_check0:
        cmp/eq  CNT,r0
        cmp/eq  CNT,r0
        bt      L_exit
        bt      L_exit
        .balignw 4,0x0009
        .balignw 4,0x0009
L_store_byte_loop:
L_store_byte_loop:
        mov.b   VAL,@r0 ! Store bytes to memory
        mov.b   VAL,@r0 ! Store bytes to memory
        add     #1,r0
        add     #1,r0
        cmp/eq  CNT,r0
        cmp/eq  CNT,r0
        bf      L_store_byte_loop
        bf      L_store_byte_loop
L_exit:
L_exit:
        rts
        rts
        mov     r4,r0
        mov     r4,r0
#endif /* ! SHMEDIA */
#endif /* ! SHMEDIA */
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.