OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-newlib/] [newlib-1.17.0/] [newlib/] [libc/] [machine/] [sh/] [strncpy.S] - Blame information for rev 9

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 9 jlechner
/* Copyright 2003 SuperH Ltd.  */
2
 
3
#include "asm.h"
4
 
5
#ifdef __SH5__
6
#if __SHMEDIA__
7
 
8
#ifdef __LITTLE_ENDIAN__
9
#define ZPAD_MASK(src, dst) addi src, -1, dst
10
#else
11
#define ZPAD_MASK(src, dst) \
12
 byterev src, dst; addi dst, -1, dst; byterev dst, dst
13
#endif
14
 
15
 
16
/* We assume that the destination is not in the first 16 bytes of memory.
17
   A typical linker script will put the text section first, and as
18
   this code is longer that 16 bytes, you have to get out of your way
19
    to put data there.  */
20
ENTRY(strncpy)
21
 pt L_small, tr2
22
 ldlo.q r3, 0, r0
23
 shlli r3, 3, r19
24
 mcmpeq.b r0, r63, r1
25
 SHHI r1, r19, r7
26
 add r2, r4, r20
27
 addi r20, -8, r5
28
 /* If the size is greater than 8, we know we can read beyond the first
29
    (possibly partial) quadword, and write out a full first and last
30
    (possibly unaligned and/or overlapping) quadword.  */
31
 bge/u r2, r5, tr2 // L_small
32
 pt L_found0, tr0
33
 addi r2, 8, r22
34
 bnei/u r7, 0, tr0  // L_found0
35
 ori r3, -8, r38
36
 pt L_end_early, tr1
37
 sub r2, r38, r22
38
 stlo.q r2, 0, r0
39
 sthi.q r2, 7, r0
40
 sub r3, r2, r6
41
 ldx.q r22, r6, r0
42
 /* Before each iteration, check that we can store in full the next quad we
43
    are about to fetch.  */
44
 addi r5, -8, r36
45
 bgtu/u r22, r36, tr1 // L_end_early
46
 pt L_scan0, tr1
47
L_scan0:
48
 addi r22, 8, r22
49
 mcmpeq.b r0, r63, r1
50
 stlo.q r22, -8, r0
51
 bnei/u r1, 0, tr0   // L_found0
52
 sthi.q r22, -1, r0
53
 ldx.q r22, r6, r0
54
 bgeu/l r36, r22, tr1 // L_scan0
55
L_end:
56
 // At end; we might re-read a few bytes when we fetch the last quad.
57
 // branch mispredict, so load is ready now.
58
 mcmpeq.b r0, r63, r1
59
 addi r22, 8, r22
60
 bnei/u r1, 0, tr0   // L_found0
61
 add r3, r4, r7
62
 ldlo.q r7, -8, r1
63
 ldhi.q r7, -1, r7
64
 ptabs r18, tr0
65
 stlo.q r22, -8, r0
66
 or r1, r7, r1
67
 mcmpeq.b r1, r63, r7
68
 sthi.q r22, -1, r0
69
 ZPAD_MASK (r7, r7)
70
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
71
 stlo.q r20, -8, r1
72
 sthi.q r20, -1, r1
73
 blink tr0, r63
74
 
75
L_end_early:
76
 /* Check if we can store the current quad in full.  */
77
 pt L_end, tr1
78
 add r3, r4, r7
79
 bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short.
80
 /* If not, that means we can just proceed to process the last quad.
81
    Two pipeline stalls are unavoidable, as we don't have enough ILP.  */
82
 ldlo.q r7, -8, r1
83
 ldhi.q r7, -1, r7
84
 ptabs r18, tr0
85
 or r1, r7, r1
86
 mcmpeq.b r1, r63, r7
87
 ZPAD_MASK (r7, r7)
88
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
89
 stlo.q r20, -8, r1
90
 sthi.q r20, -1, r1
91
 blink tr0, r63
92
 
93
L_found0:
94
 // r0: string to store, not yet zero-padding normalized.
95
 // r1: result of mcmpeq.b r0, r63, r1.
96
 // r22: store address plus 8.  I.e. address where zero padding beyond the
97
 //      string in r0 goes.
98
 // r20: store end address.
99
 // r5: store end address minus 8.
100
 pt L_write0_multiquad, tr0
101
 ZPAD_MASK (r1, r1)
102
 and r0, r1, r0 // mask out non-zero bytes after first zero byte
103
 stlo.q r22, -8, r0
104
 sthi.q r22, -1, r0
105
 andi r22, -8, r1 // Check if zeros to write fit in one quad word.
106
 bgtu/l r5, r1, tr0 // L_write0_multiquad
107
 ptabs r18, tr1
108
 sub r20, r22, r1
109
 shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is
110
 SHLO r0, r1, r0 // handled correctly.
111
 SHLO r0, r1, r0
112
 sthi.q r20, -1, r0
113
 blink tr1, r63
114
 
115
L_write0_multiquad:
116
 pt L_write0_loop, tr0
117
 ptabs r18, tr1
118
 stlo.q r22, 0, r63
119
 sthi.q r20, -1, r63
120
 addi r1, 8, r1
121
 bgeu/l r5, r1, tr0 // L_write0_loop
122
 blink tr1, r63
123
 
124
L_write0_loop:
125
 st.q r1, 0 ,r63
126
 addi r1, 8, r1
127
 bgeu/l r5, r1, tr0 // L_write0_loop
128
 blink tr1, r63
129
 
130
L_small:
131
 // r0: string to store, not yet zero-padding normalized.
132
 // r1: result of mcmpeq.b r0, r63, r1.
133
 // r7: nonzero indicates relevant zero found r0.
134
 // r2: store address.
135
 // r3: read address.
136
 // r4: size, max 8
137
 // r20: store end address.
138
 // r5: store end address minus 8.
139
 pt L_nohi, tr0
140
 pt L_small_storelong, tr1
141
 ptabs r18, tr2
142
 sub r63, r4, r23
143
 bnei/u r7, 0, tr0  // L_nohi
144
 ori r3, -8, r7
145
 bge/l r23, r7, tr0 // L_nohi
146
 ldhi.q r3, 7, r1
147
 or r0, r1, r0
148
 mcmpeq.b r0, r63, r1
149
L_nohi:
150
 ZPAD_MASK (r1, r1)
151
 and r0, r1, r0
152
 movi 4, r19
153
 bge/u r4, r19, tr1 // L_small_storelong
154
 
155
 pt L_small_end, tr0
156
#ifndef __LITTLE_ENDIAN__
157
 byterev r0, r0
158
#endif
159
 beqi/u r4, 0, tr0 // L_small_end
160
 st.b r2, 0, r0
161
 beqi/u r4, 1, tr0 // L_small_end
162
 shlri r0, 8, r0
163
 st.b r2, 1, r0
164
 beqi/u r4, 2, tr0 // L_small_end
165
 shlri r0, 8, r0
166
 st.b r2, 2, r0
167
L_small_end:
168
 blink tr2, r63
169
 
170
L_small_storelong:
171
 shlli r23, 3, r7
172
 SHHI r0, r7, r1
173
#ifdef __LITTLE_ENDIAN__
174
 shlri r1, 32, r1
175
#else
176
 shlri r0, 32, r0
177
#endif
178
 stlo.l r2, 0, r0
179
 sthi.l r2, 3, r0
180
 stlo.l r20, -4, r1
181
 sthi.l r20, -1, r1
182
 blink tr2, r63
183
 
184
#else /* SHcompact */
185
 
186
/* This code is optimized for size.  Instruction selection is SH5 specific.
187
   SH4 should use a different version.  */
188
ENTRY(strncpy)
189
 mov #0, r6
190
 cmp/eq r4, r6
191
 bt return
192
 mov r2, r5
193
 add #-1, r5
194
 add r5, r4
195
loop:
196
 bt/s found0
197
 add #1, r5
198
 mov.b @r3+, r1
199
found0:
200
 cmp/eq r5,r4
201
 mov.b r1, @r5
202
 bf/s loop
203
 cmp/eq r1, r6
204
return:
205
 rts
206
 nop
207
 
208
#endif /* SHcompact */
209
#endif /* __SH5__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.