OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [alpha/] [lib/] [stxcpy.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * arch/alpha/lib/stxcpy.S
3
 * Contributed by Richard Henderson (rth@tamu.edu)
4
 *
5
 * Copy a null-terminated string from SRC to DST.
6
 *
7
 * This is an internal routine used by strcpy, stpcpy, and strcat.
8
 * As such, it uses special linkage conventions to make implementation
9
 * of these public functions more efficient.
10
 *
11
 * On input:
12
 *      t9 = return address
13
 *      a0 = DST
14
 *      a1 = SRC
15
 *
16
 * On output:
17
 *      t12 = bitmask (with one bit set) indicating the last byte written
18
 *      a0  = unaligned address of the last *word* written
19
 *
20
 * Furthermore, v0, a3-a5, t11, and t12 are untouched.
21
 */
22
 
23
#include 
24
 
25
        .set noat
26
        .set noreorder
27
 
28
        .text
29
 
30
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
31
   doesn't like putting the entry point for a procedure somewhere in the
32
   middle of the procedure descriptor.  Work around this by putting the
33
   aligned copy in its own procedure descriptor */
34
 
35
        .ent stxcpy_aligned
36
        .align 3
37
stxcpy_aligned:
38
        .frame sp, 0, t9
39
        .prologue 0
40
 
41
        /* On entry to this basic block:
42
           t0 == the first destination word for masking back in
43
           t1 == the first source word.  */
44
 
45
        /* Create the 1st output word and detect 0's in the 1st input word.  */
46
        lda     t2, -1          # e1    : build a mask against false zero
47
        mskqh   t2, a1, t2      # e0    :   detection in the src word
48
        mskqh   t1, a1, t3      # e0    :
49
        ornot   t1, t2, t2      # .. e1 :
50
        mskql   t0, a1, t0      # e0    : assemble the first output word
51
        cmpbge  zero, t2, t8    # .. e1 : bits set iff null found
52
        or      t0, t3, t1      # e0    :
53
        bne     t8, $a_eos      # .. e1 :
54
 
55
        /* On entry to this basic block:
56
           t0 == the first destination word for masking back in
57
           t1 == a source word not containing a null.  */
58
 
59
$a_loop:
60
        stq_u   t1, 0(a0)       # e0    :
61
        addq    a0, 8, a0       # .. e1 :
62
        ldq_u   t1, 0(a1)       # e0    :
63
        addq    a1, 8, a1       # .. e1 :
64
        cmpbge  zero, t1, t8    # e0 (stall)
65
        beq     t8, $a_loop     # .. e1 (zdb)
66
 
67
        /* Take care of the final (partial) word store.
68
           On entry to this basic block we have:
69
           t1 == the source word containing the null
70
           t8 == the cmpbge mask that found it.  */
71
$a_eos:
72
        negq    t8, t6          # e0    : find low bit set
73
        and     t8, t6, t12     # e1 (stall)
74
 
75
        /* For the sake of the cache, don't read a destination word
76
           if we're not going to need it.  */
77
        and     t12, 0x80, t6   # e0    :
78
        bne     t6, 1f          # .. e1 (zdb)
79
 
80
        /* We're doing a partial word store and so need to combine
81
           our source and original destination words.  */
82
        ldq_u   t0, 0(a0)       # e0    :
83
        subq    t12, 1, t6      # .. e1 :
84
        zapnot  t1, t6, t1      # e0    : clear src bytes >= null
85
        or      t12, t6, t8     # .. e1 :
86
        zap     t0, t8, t0      # e0    : clear dst bytes <= null
87
        or      t0, t1, t1      # e1    :
88
 
89
1:      stq_u   t1, 0(a0)       # e0    :
90
        ret     (t9)            # .. e1 :
91
 
92
        .end stxcpy_aligned
93
 
94
        .align 3
95
        .ent __stxcpy
96
        .globl __stxcpy
97
__stxcpy:
98
        .frame sp, 0, t9
99
        .prologue 0
100
 
101
        /* Are source and destination co-aligned?  */
102
        xor     a0, a1, t0      # e0    :
103
        unop                    #       :
104
        and     t0, 7, t0       # e0    :
105
        bne     t0, $unaligned  # .. e1 :
106
 
107
        /* We are co-aligned; take care of a partial first word.  */
108
        ldq_u   t1, 0(a1)       # e0    : load first src word
109
        and     a0, 7, t0       # .. e1 : take care not to load a word ...
110
        addq    a1, 8, a1               # e0    :
111
        beq     t0, stxcpy_aligned      # .. e1 : ... if we wont need it
112
        ldq_u   t0, 0(a0)       # e0    :
113
        br      stxcpy_aligned  # .. e1 :
114
 
115
 
116
/* The source and destination are not co-aligned.  Align the destination
117
   and cope.  We have to be very careful about not reading too much and
118
   causing a SEGV.  */
119
 
120
        .align 3
121
$u_head:
122
        /* We know just enough now to be able to assemble the first
123
           full source word.  We can still find a zero at the end of it
124
           that prevents us from outputting the whole thing.
125
 
126
           On entry to this basic block:
127
           t0 == the first dest word, for masking back in, if needed else 0
128
           t1 == the low bits of the first source word
129
           t6 == bytemask that is -1 in dest word bytes */
130
 
131
        ldq_u   t2, 8(a1)       # e0    :
132
        addq    a1, 8, a1       # .. e1 :
133
 
134
        extql   t1, a1, t1      # e0    :
135
        extqh   t2, a1, t4      # e0    :
136
        mskql   t0, a0, t0      # e0    :
137
        or      t1, t4, t1      # .. e1 :
138
        mskqh   t1, a0, t1      # e0    :
139
        or      t0, t1, t1      # e1    :
140
 
141
        or      t1, t6, t6      # e0    :
142
        cmpbge  zero, t6, t8    # .. e1 :
143
        lda     t6, -1          # e0    : for masking just below
144
        bne     t8, $u_final    # .. e1 :
145
 
146
        mskql   t6, a1, t6              # e0    : mask out the bits we have
147
        or      t6, t2, t2              # e1    :   already extracted before
148
        cmpbge  zero, t2, t8            # e0    :   testing eos
149
        bne     t8, $u_late_head_exit   # .. e1 (zdb)
150
 
151
        /* Finally, we've got all the stupid leading edge cases taken care
152
           of and we can set up to enter the main loop.  */
153
 
154
        stq_u   t1, 0(a0)       # e0    : store first output word
155
        addq    a0, 8, a0       # .. e1 :
156
        extql   t2, a1, t0      # e0    : position ho-bits of lo word
157
        ldq_u   t2, 8(a1)       # .. e1 : read next high-order source word
158
        addq    a1, 8, a1       # e0    :
159
        cmpbge  zero, t2, t8    # .. e1 :
160
        nop                     # e0    :
161
        bne     t8, $u_eos      # .. e1 :
162
 
163
        /* Unaligned copy main loop.  In order to avoid reading too much,
164
           the loop is structured to detect zeros in aligned source words.
165
           This has, unfortunately, effectively pulled half of a loop
166
           iteration out into the head and half into the tail, but it does
167
           prevent nastiness from accumulating in the very thing we want
168
           to run as fast as possible.
169
 
170
           On entry to this basic block:
171
           t0 == the shifted high-order bits from the previous source word
172
           t2 == the unshifted current source word
173
 
174
           We further know that t2 does not contain a null terminator.  */
175
 
176
        .align 3
177
$u_loop:
178
        extqh   t2, a1, t1      # e0    : extract high bits for current word
179
        addq    a1, 8, a1       # .. e1 :
180
        extql   t2, a1, t3      # e0    : extract low bits for next time
181
        addq    a0, 8, a0       # .. e1 :
182
        or      t0, t1, t1      # e0    : current dst word now complete
183
        ldq_u   t2, 0(a1)       # .. e1 : load high word for next time
184
        stq_u   t1, -8(a0)      # e0    : save the current word
185
        mov     t3, t0          # .. e1 :
186
        cmpbge  zero, t2, t8    # e0    : test new word for eos
187
        beq     t8, $u_loop     # .. e1 :
188
 
189
        /* We've found a zero somewhere in the source word we just read.
190
           If it resides in the lower half, we have one (probably partial)
191
           word to write out, and if it resides in the upper half, we
192
           have one full and one partial word left to write out.
193
 
194
           On entry to this basic block:
195
           t0 == the shifted high-order bits from the previous source word
196
           t2 == the unshifted current source word.  */
197
$u_eos:
198
        extqh   t2, a1, t1      # e0    :
199
        or      t0, t1, t1      # e1    : first (partial) source word complete
200
 
201
        cmpbge  zero, t1, t8    # e0    : is the null in this first bit?
202
        bne     t8, $u_final    # .. e1 (zdb)
203
 
204
$u_late_head_exit:
205
        stq_u   t1, 0(a0)       # e0    : the null was in the high-order bits
206
        addq    a0, 8, a0       # .. e1 :
207
        extql   t2, a1, t1      # e0    :
208
        cmpbge  zero, t1, t8    # .. e1 :
209
 
210
        /* Take care of a final (probably partial) result word.
211
           On entry to this basic block:
212
           t1 == assembled source word
213
           t8 == cmpbge mask that found the null.  */
214
$u_final:
215
        negq    t8, t6          # e0    : isolate low bit set
216
        and     t6, t8, t12     # e1    :
217
 
218
        and     t12, 0x80, t6   # e0    : avoid dest word load if we can
219
        bne     t6, 1f          # .. e1 (zdb)
220
 
221
        ldq_u   t0, 0(a0)       # e0    :
222
        subq    t12, 1, t6      # .. e1 :
223
        or      t6, t12, t8     # e0    :
224
        zapnot  t1, t6, t1      # .. e1 : kill source bytes >= null
225
        zap     t0, t8, t0      # e0    : kill dest bytes <= null
226
        or      t0, t1, t1      # e1    :
227
 
228
1:      stq_u   t1, 0(a0)       # e0    :
229
        ret     (t9)            # .. e1 :
230
 
231
        /* Unaligned copy entry point.  */
232
        .align 3
233
$unaligned:
234
 
235
        ldq_u   t1, 0(a1)       # e0    : load first source word
236
 
237
        and     a0, 7, t4       # .. e1 : find dest misalignment
238
        and     a1, 7, t5       # e0    : find src misalignment
239
 
240
        /* Conditionally load the first destination word and a bytemask
241
           with 0xff indicating that the destination byte is sacrosanct.  */
242
 
243
        mov     zero, t0        # .. e1 :
244
        mov     zero, t6        # e0    :
245
        beq     t4, 1f          # .. e1 :
246
        ldq_u   t0, 0(a0)       # e0    :
247
        lda     t6, -1          # .. e1 :
248
        mskql   t6, a0, t6      # e0    :
249
1:
250
        subq    a1, t4, a1      # .. e1 : sub dest misalignment from src addr
251
 
252
        /* If source misalignment is larger than dest misalignment, we need
253
           extra startup checks to avoid SEGV.  */
254
 
255
        cmplt   t4, t5, t12     # e0    :
256
        beq     t12, $u_head    # .. e1 (zdb)
257
 
258
        lda     t2, -1          # e1    : mask out leading garbage in source
259
        mskqh   t2, t5, t2      # e0    :
260
        nop                     # e0    :
261
        ornot   t1, t2, t3      # .. e1 :
262
        cmpbge  zero, t3, t8    # e0    : is there a zero?
263
        beq     t8, $u_head     # .. e1 (zdb)
264
 
265
        /* At this point we've found a zero in the first partial word of
266
           the source.  We need to isolate the valid source data and mask
267
           it into the original destination data.  (Incidentally, we know
268
           that we'll need at least one byte of that original dest word.) */
269
 
270
        ldq_u   t0, 0(a0)       # e0    :
271
 
272
        negq    t8, t6          # .. e1 : build bitmask of bytes <= zero
273
        and     t6, t8, t12     # e0    :
274
        and     a1, 7, t5       # .. e1 :
275
        subq    t12, 1, t6      # e0    :
276
        or      t6, t12, t8     # e1    :
277
        srl     t12, t5, t12    # e0    : adjust final null return value
278
 
279
        zapnot  t2, t8, t2      # .. e1 : prepare source word; mirror changes
280
        and     t1, t2, t1      # e1    : to source validity mask
281
        extql   t2, a1, t2      # .. e0 :
282
        extql   t1, a1, t1      # e0    :
283
 
284
        andnot  t0, t2, t0      # .. e1 : zero place for source to reside
285
        or      t0, t1, t1      # e1    : and put it there
286
        stq_u   t1, 0(a0)       # .. e0 :
287
        ret     (t9)            # e1    :
288
 
289
        .end __stxcpy

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.