OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [machine/] [sh/] [memset.S] - Blame information for rev 297

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
!
2
! Fast SH memset
3
!
4
! by Toshiyasu Morita (tm@netcom.com)
5
!
6
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
7
! Copyright 2002 SuperH Ltd.
8
!
9
 
10
#include "asm.h"
11
 
12
ENTRY(memset)
13
#if __SHMEDIA__
14
        pta/l multiquad, tr0
15
        ptabs r18, tr2
16
 
17
        andi r2, -8, r25
18
        add r2, r4, r5
19
        addi r5, -1, r20    // calculate end address.
20
        andi r20, -8, r20
21
        cmveq r4, r25, r20
22
        bne/u r25, r20, tr0 // multiquad
23
 
24
!       This sequence could clobber volatile objects that are in the same
25
!       quadword as a very short char array.
26
!       ldlo.q r2, 0, r7
27
!       shlli r4, 2, r4
28
!       movi -1, r8
29
!       SHHI r8, r4, r8
30
!       SHHI r8, r4, r8
31
!       mcmv r7, r8, r3
32
!       stlo.q r2, 0, r3
33
 
34
        pta/l setlongs, tr0
35
        movi 4, r8
36
        bgeu/u r4, r8, tr0
37
        pta/l endset, tr0
38
        beqi/u r4, 0, tr0
39
        st.b r2, 0, r3
40
        beqi/u r4, 1, tr0
41
        nop
42
        st.b r2, 1, r3
43
        beqi/l r4, 2, tr0
44
        st.b r2,2,r3
45
endset: blink tr2, r63
46
setlongs:
47
        mshflo.b r3, r3, r3
48
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
49
        stlo.l r2, 0, r3
50
        nop
51
        nop
52
        sthi.l r5, -1, r3
53
        blink tr2, r63
54
 
55
multiquad:
56
        mshflo.b r3, r3, r3
57
        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
58
        pta/l lastquad, tr0
59
        stlo.q r2, 0, r3
60
        sub r20, r25, r24
61
        movi 64, r9
62
        beqi/u r24, 8, tr0 // lastquad
63
        pta/l loop, tr1
64
        addi r20, -7*8, r8 // loop end address; This might overflow, so we need
65
                           // to use a different test before we start the loop
66
        bgeu/u r24, r9, tr1// loop
67
        st.q r25, 8, r3
68
        shlri r24, 4, r24
69
        st.q r20, -8, r3
70
        beqi/u r24, 1, tr0 // lastquad
71
        st.q r25, 16, r3
72
        st.q r20, -16, r3
73
        beqi/u r24, 2, tr0 // lastquad
74
        st.q r25, 24, r3
75
        st.q r20, -24, r3
76
lastquad:
77
        sthi.q r5, -1, r3
78
        blink tr2,r63
79
 
80
loop:
81
        alloco r25, 32
82
        st.q r25, 8, r3
83
        st.q r25, 16, r3
84
        st.q r25, 24, r3
85
        st.q r25, 32, r3
86
        addi r25, 32, r25
87
        bgeu/l r8, r25, tr1 // loop
88
 
89
        st.q r20, -40, r3
90
        st.q r20, -32, r3
91
        st.q r20, -24, r3
92
        st.q r20, -16, r3
93
        st.q r20, -8, r3
94
        sthi.q r5, -1, r3
95
        blink tr2,r63
96
#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
97
! Entry: r4: destination pointer
98
!        r5: fill value
99
!        r6: byte count
100
!
101
! Exit:  r0-r3: trashed
102
!
103
 
104
! This assumes that the first four bytes of the address space (0..3) are
105
! reserved - usually by the linker script.  Otherwise, we would had to check
106
! for the case of objects of the size 12..15 at address 0..3 .
107
 
108
#ifdef __SH5__
109
#define DST r2
110
#define VAL r3
111
#define CNT r4
112
#define TMP r5
113
#else
114
#define DST r4
115
#define VAL r5
116
#define CNT r6
117
#define TMP r2
118
#endif
119
 
120
        mov     #12,r0  ! Check for small number of bytes
121
        cmp/gt  CNT,r0
122
        mov     DST,r0
123
        SL(bt, L_store_byte_loop_check0, add DST,CNT)
124
 
125
        tst     #3,r0   ! Align destination
126
        SL(bt,  L_dup_bytes, extu.b r5,r5)
127
        .balignw 4,0x0009
128
L_align_loop:
129
        mov.b   VAL,@r0
130
        add     #1,r0
131
        tst     #3,r0
132
        bf      L_align_loop
133
 
134
L_dup_bytes:
135
        swap.b  VAL,TMP ! Duplicate bytes across longword
136
        or      TMP,VAL
137
        swap.w  VAL,TMP
138
        or      TMP,VAL
139
 
140
        add     #-16,CNT
141
 
142
        .balignw 4,0x0009
143
L_store_long_loop:
144
        mov.l   VAL,@r0 ! Store double longs to memory
145
        cmp/hs  CNT,r0
146
        mov.l   VAL,@(4,r0)
147
        SL(bf, L_store_long_loop, add #8,r0)
148
 
149
        add     #16,CNT
150
 
151
L_store_byte_loop_check0:
152
        cmp/eq  CNT,r0
153
        bt      L_exit
154
        .balignw 4,0x0009
155
L_store_byte_loop:
156
        mov.b   VAL,@r0 ! Store bytes to memory
157
        add     #1,r0
158
        cmp/eq  CNT,r0
159
        bf      L_store_byte_loop
160
 
161
L_exit:
162
        rts
163
        mov     r4,r0
164
#endif /* ! SHMEDIA */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.