OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [newlib-1.10.0/] [newlib/] [libc/] [machine/] [arm/] [memcpy.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1007 ivang
/*      $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $  */
2
 
3
/*-
4
 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5
 * All rights reserved.
6
 *
7
 * This code is derived from software contributed to The NetBSD Foundation
8
 * by Neil A. Carson and Mark Brinicombe
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 * 1. Redistributions of source code must retain the above copyright
14
 *    notice, this list of conditions and the following disclaimer.
15
 * 2. Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 * 3. All advertising materials mentioning features or use of this software
19
 *    must display the following acknowledgement:
20
 *      This product includes software developed by the NetBSD
21
 *      Foundation, Inc. and its contributors.
22
 * 4. Neither the name of The NetBSD Foundation nor the names of its
23
 *    contributors may be used to endorse or promote products derived
24
 *    from this software without specific prior written permission.
25
 *
26
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
 * POSSIBILITY OF SUCH DAMAGE.
37
 */
38
 
39
/* This was modified by Jay Monkman  to
40
 *   save and restore r12. This is necessary for RTEMS.
41
 */
42
/* #include */
43
 
44
#define ENTRY(_LABEL) \
45
  .global _LABEL ; _LABEL:
46
/*
47
.globl memcpy
48
memcpy:
49
*/
50
ENTRY(memcpy)
51
        stmfd   sp!, {r0, r12, lr}
52
        bl      _memcpy
53
        ldmfd   sp!, {r0, r12, pc}
54
 
55
 
56
/*
57
.globl memove
58
memmove:
59
*/
60
ENTRY(memmove)
61
        stmfd   sp!, {r0, r12, lr}
62
        bl      _memcpy
63
        ldmfd   sp!, {r0, r12, pc}
64
 
65
 
66
 
67
/*
68
 * This is one fun bit of code ...
69
 * Some easy listening music is suggested while trying to understand this
70
 * code e.g. Iron Maiden
71
 *
72
 * For anyone attempting to understand it :
73
 *
74
 * The core code is implemented here with simple stubs for memcpy()
75
 * memmove() and bcopy().
76
 *
77
 * All local labels are prefixed with Lmemcpy_
78
 * Following the prefix a label starting f is used in the forward copy code
79
 * while a label using b is used in the backwards copy code
80
 * The source and destination addresses determine whether a forward or
81
 * backward copy is performed.
82
 * Separate bits of code are used to deal with the following situations
83
 * for both the forward and backwards copy.
84
 * unaligned source address
85
 * unaligned destination address
86
 * Separate copy routines are used to produce an optimised result for each
87
 * of these cases.
88
 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
89
 * a time where possible.
90
 *
91
 * Note: r12 (aka ip) can be trashed during the function along with
92
 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
93
 * Additional registers are preserved prior to use i.e. r4, r5 & lr
94
 *
95
 * Apologies for the state of the comments ;-)
96
 */
97
 
98
 
99
/*
100
_memcpy:
101
*/
102
ENTRY(_memcpy)
103
        /* Determine copy direction */
104
        cmp     r1, r0
105
        bcc     Lmemcpy_backwards
106
 
107
        moveq   r0, #0                  /* Quick abort for len=0 */
108
        moveq   pc, lr
109
 
110
        stmdb   sp!, {r0, lr}           /* memcpy() returns dest addr */
111
        subs    r2, r2, #4
112
        blt     Lmemcpy_fl4             /* less than 4 bytes */
113
        ands    r12, r0, #3
114
        bne     Lmemcpy_fdestul         /* oh unaligned destination addr */
115
        ands    r12, r1, #3
116
        bne     Lmemcpy_fsrcul          /* oh unaligned source addr */
117
 
118
Lmemcpy_ft8:
119
        /* We have aligned source and destination */
120
        subs    r2, r2, #8
121
        blt     Lmemcpy_fl12            /* less than 12 bytes (4 from above) */
122
        subs    r2, r2, #0x14
123
        blt     Lmemcpy_fl32            /* less than 32 bytes (12 from above) */
124
        stmdb   sp!, {r4}               /* borrow r4 */
125
 
126
        /* blat 32 bytes at a time */
127
        /* XXX for really big copies perhaps we should use more registers */
128
Lmemcpy_floop32:
129
        ldmia   r1!, {r3, r4, r12, lr}
130
        stmia   r0!, {r3, r4, r12, lr}
131
        ldmia   r1!, {r3, r4, r12, lr}
132
        stmia   r0!, {r3, r4, r12, lr}
133
        subs    r2, r2, #0x20
134
        bge     Lmemcpy_floop32
135
 
136
        cmn     r2, #0x10
137
        ldmgeia r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
138
        stmgeia r0!, {r3, r4, r12, lr}
139
        subge   r2, r2, #0x10
140
        ldmia   sp!, {r4}               /* return r4 */
141
 
142
Lmemcpy_fl32:
143
        adds    r2, r2, #0x14
144
 
145
        /* blat 12 bytes at a time */
146
Lmemcpy_floop12:
147
        ldmgeia r1!, {r3, r12, lr}
148
        stmgeia r0!, {r3, r12, lr}
149
        subges  r2, r2, #0x0c
150
        bge     Lmemcpy_floop12
151
 
152
Lmemcpy_fl12:
153
        adds    r2, r2, #8
154
        blt     Lmemcpy_fl4
155
 
156
        subs    r2, r2, #4
157
        ldrlt   r3, [r1], #4
158
        strlt   r3, [r0], #4
159
        ldmgeia r1!, {r3, r12}
160
        stmgeia r0!, {r3, r12}
161
        subge   r2, r2, #4
162
 
163
Lmemcpy_fl4:
164
        /* less than 4 bytes to go */
165
        adds    r2, r2, #4
166
        ldmeqia sp!, {r0, pc}           /* done */
167
 
168
        /* copy the crud byte at a time */
169
        cmp     r2, #2
170
        ldrb    r3, [r1], #1
171
        strb    r3, [r0], #1
172
        ldrgeb  r3, [r1], #1
173
        strgeb  r3, [r0], #1
174
        ldrgtb  r3, [r1], #1
175
        strgtb  r3, [r0], #1
176
        ldmia   sp!, {r0, pc}
177
 
178
        /* erg - unaligned destination */
179
Lmemcpy_fdestul:
180
        rsb     r12, r12, #4
181
        cmp     r12, #2
182
 
183
        /* align destination with byte copies */
184
        ldrb    r3, [r1], #1
185
        strb    r3, [r0], #1
186
        ldrgeb  r3, [r1], #1
187
        strgeb  r3, [r0], #1
188
        ldrgtb  r3, [r1], #1
189
        strgtb  r3, [r0], #1
190
        subs    r2, r2, r12
191
        blt     Lmemcpy_fl4             /* less the 4 bytes */
192
 
193
        ands    r12, r1, #3
194
        beq     Lmemcpy_ft8             /* we have an aligned source */
195
 
196
        /* erg - unaligned source */
197
        /* This is where it gets nasty ... */
198
Lmemcpy_fsrcul:
199
        bic     r1, r1, #3
200
        ldr     lr, [r1], #4
201
        cmp     r12, #2
202
        bgt     Lmemcpy_fsrcul3
203
        beq     Lmemcpy_fsrcul2
204
        cmp     r2, #0x0c
205
        blt     Lmemcpy_fsrcul1loop4
206
        sub     r2, r2, #0x0c
207
        stmdb   sp!, {r4, r5}
208
 
209
Lmemcpy_fsrcul1loop16:
210
        mov     r3, lr, lsr #8
211
        ldmia   r1!, {r4, r5, r12, lr}
212
        orr     r3, r3, r4, lsl #24
213
        mov     r4, r4, lsr #8
214
        orr     r4, r4, r5, lsl #24
215
        mov     r5, r5, lsr #8
216
        orr     r5, r5, r12, lsl #24
217
        mov     r12, r12, lsr #8
218
        orr     r12, r12, lr, lsl #24
219
        stmia   r0!, {r3-r5, r12}
220
        subs    r2, r2, #0x10
221
        bge     Lmemcpy_fsrcul1loop16
222
        ldmia   sp!, {r4, r5}
223
        adds    r2, r2, #0x0c
224
        blt     Lmemcpy_fsrcul1l4
225
 
226
Lmemcpy_fsrcul1loop4:
227
        mov     r12, lr, lsr #8
228
        ldr     lr, [r1], #4
229
        orr     r12, r12, lr, lsl #24
230
        str     r12, [r0], #4
231
        subs    r2, r2, #4
232
        bge     Lmemcpy_fsrcul1loop4
233
 
234
Lmemcpy_fsrcul1l4:
235
        sub     r1, r1, #3
236
        b       Lmemcpy_fl4
237
 
238
Lmemcpy_fsrcul2:
239
        cmp     r2, #0x0c
240
        blt     Lmemcpy_fsrcul2loop4
241
        sub     r2, r2, #0x0c
242
        stmdb   sp!, {r4, r5}
243
 
244
Lmemcpy_fsrcul2loop16:
245
        mov     r3, lr, lsr #16
246
        ldmia   r1!, {r4, r5, r12, lr}
247
        orr     r3, r3, r4, lsl #16
248
        mov     r4, r4, lsr #16
249
        orr     r4, r4, r5, lsl #16
250
        mov     r5, r5, lsr #16
251
        orr     r5, r5, r12, lsl #16
252
        mov     r12, r12, lsr #16
253
        orr     r12, r12, lr, lsl #16
254
        stmia   r0!, {r3-r5, r12}
255
        subs    r2, r2, #0x10
256
        bge     Lmemcpy_fsrcul2loop16
257
        ldmia   sp!, {r4, r5}
258
        adds    r2, r2, #0x0c
259
        blt     Lmemcpy_fsrcul2l4
260
 
261
Lmemcpy_fsrcul2loop4:
262
        mov     r12, lr, lsr #16
263
        ldr     lr, [r1], #4
264
        orr     r12, r12, lr, lsl #16
265
        str     r12, [r0], #4
266
        subs    r2, r2, #4
267
        bge     Lmemcpy_fsrcul2loop4
268
 
269
Lmemcpy_fsrcul2l4:
270
        sub     r1, r1, #2
271
        b       Lmemcpy_fl4
272
 
273
Lmemcpy_fsrcul3:
274
        cmp     r2, #0x0c
275
        blt     Lmemcpy_fsrcul3loop4
276
        sub     r2, r2, #0x0c
277
        stmdb   sp!, {r4, r5}
278
 
279
Lmemcpy_fsrcul3loop16:
280
        mov     r3, lr, lsr #24
281
        ldmia   r1!, {r4, r5, r12, lr}
282
        orr     r3, r3, r4, lsl #8
283
        mov     r4, r4, lsr #24
284
        orr     r4, r4, r5, lsl #8
285
        mov     r5, r5, lsr #24
286
        orr     r5, r5, r12, lsl #8
287
        mov     r12, r12, lsr #24
288
        orr     r12, r12, lr, lsl #8
289
        stmia   r0!, {r3-r5, r12}
290
        subs    r2, r2, #0x10
291
        bge     Lmemcpy_fsrcul3loop16
292
        ldmia   sp!, {r4, r5}
293
        adds    r2, r2, #0x0c
294
        blt     Lmemcpy_fsrcul3l4
295
 
296
Lmemcpy_fsrcul3loop4:
297
        mov     r12, lr, lsr #24
298
        ldr     lr, [r1], #4
299
        orr     r12, r12, lr, lsl #8
300
        str     r12, [r0], #4
301
        subs    r2, r2, #4
302
        bge     Lmemcpy_fsrcul3loop4
303
 
304
Lmemcpy_fsrcul3l4:
305
        sub     r1, r1, #1
306
        b       Lmemcpy_fl4
307
 
308
Lmemcpy_backwards:
309
        add     r1, r1, r2
310
        add     r0, r0, r2
311
        subs    r2, r2, #4
312
        blt     Lmemcpy_bl4             /* less than 4 bytes */
313
        ands    r12, r0, #3
314
        bne     Lmemcpy_bdestul         /* oh unaligned destination addr */
315
        ands    r12, r1, #3
316
        bne     Lmemcpy_bsrcul          /* oh unaligned source addr */
317
 
318
Lmemcpy_bt8:
319
        /* We have aligned source and destination */
320
        subs    r2, r2, #8
321
        blt     Lmemcpy_bl12            /* less than 12 bytes (4 from above) */
322
        stmdb   sp!, {r4, lr}
323
        subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
324
        blt     Lmemcpy_bl32
325
 
326
        /* blat 32 bytes at a time */
327
        /* XXX for really big copies perhaps we should use more registers */
328
Lmemcpy_bloop32:
329
        ldmdb   r1!, {r3, r4, r12, lr}
330
        stmdb   r0!, {r3, r4, r12, lr}
331
        ldmdb   r1!, {r3, r4, r12, lr}
332
        stmdb   r0!, {r3, r4, r12, lr}
333
        subs    r2, r2, #0x20
334
        bge     Lmemcpy_bloop32
335
 
336
Lmemcpy_bl32:
337
        cmn     r2, #0x10
338
        ldmgedb r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
339
        stmgedb r0!, {r3, r4, r12, lr}
340
        subge   r2, r2, #0x10
341
        adds    r2, r2, #0x14
342
        ldmgedb r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
343
        stmgedb r0!, {r3, r12, lr}
344
        subge   r2, r2, #0x0c
345
        ldmia   sp!, {r4, lr}
346
 
347
Lmemcpy_bl12:
348
        adds    r2, r2, #8
349
        blt     Lmemcpy_bl4
350
        subs    r2, r2, #4
351
        ldrlt   r3, [r1, #-4]!
352
        strlt   r3, [r0, #-4]!
353
        ldmgedb r1!, {r3, r12}
354
        stmgedb r0!, {r3, r12}
355
        subge   r2, r2, #4
356
 
357
Lmemcpy_bl4:
358
        /* less than 4 bytes to go */
359
        adds    r2, r2, #4
360
        moveq   pc, lr                  /* done */
361
 
362
        /* copy the crud byte at a time */
363
        cmp     r2, #2
364
        ldrb    r3, [r1, #-1]!
365
        strb    r3, [r0, #-1]!
366
        ldrgeb  r3, [r1, #-1]!
367
        strgeb  r3, [r0, #-1]!
368
        ldrgtb  r3, [r1, #-1]!
369
        strgtb  r3, [r0, #-1]!
370
        mov     pc, lr
371
 
372
        /* erg - unaligned destination */
373
Lmemcpy_bdestul:
374
        cmp     r12, #2
375
 
376
        /* align destination with byte copies */
377
        ldrb    r3, [r1, #-1]!
378
        strb    r3, [r0, #-1]!
379
        ldrgeb  r3, [r1, #-1]!
380
        strgeb  r3, [r0, #-1]!
381
        ldrgtb  r3, [r1, #-1]!
382
        strgtb  r3, [r0, #-1]!
383
        subs    r2, r2, r12
384
        blt     Lmemcpy_bl4             /* less than 4 bytes to go */
385
        ands    r12, r1, #3
386
        beq     Lmemcpy_bt8             /* we have an aligned source */
387
 
388
        /* erg - unaligned source */
389
        /* This is where it gets nasty ... */
390
Lmemcpy_bsrcul:
391
        bic     r1, r1, #3
392
        ldr     r3, [r1, #0]
393
        cmp     r12, #2
394
        blt     Lmemcpy_bsrcul1
395
        beq     Lmemcpy_bsrcul2
396
        cmp     r2, #0x0c
397
        blt     Lmemcpy_bsrcul3loop4
398
        sub     r2, r2, #0x0c
399
        stmdb   sp!, {r4, r5, lr}
400
 
401
Lmemcpy_bsrcul3loop16:
402
        mov     lr, r3, lsl #8
403
        ldmdb   r1!, {r3-r5, r12}
404
        orr     lr, lr, r12, lsr #24
405
        mov     r12, r12, lsl #8
406
        orr     r12, r12, r5, lsr #24
407
        mov     r5, r5, lsl #8
408
        orr     r5, r5, r4, lsr #24
409
        mov     r4, r4, lsl #8
410
        orr     r4, r4, r3, lsr #24
411
        stmdb   r0!, {r4, r5, r12, lr}
412
        subs    r2, r2, #0x10
413
        bge     Lmemcpy_bsrcul3loop16
414
        ldmia   sp!, {r4, r5, lr}
415
        adds    r2, r2, #0x0c
416
        blt     Lmemcpy_bsrcul3l4
417
 
418
Lmemcpy_bsrcul3loop4:
419
        mov     r12, r3, lsl #8
420
        ldr     r3, [r1, #-4]!
421
        orr     r12, r12, r3, lsr #24
422
        str     r12, [r0, #-4]!
423
        subs    r2, r2, #4
424
        bge     Lmemcpy_bsrcul3loop4
425
 
426
Lmemcpy_bsrcul3l4:
427
        add     r1, r1, #3
428
        b       Lmemcpy_bl4
429
 
430
Lmemcpy_bsrcul2:
431
        cmp     r2, #0x0c
432
        blt     Lmemcpy_bsrcul2loop4
433
        sub     r2, r2, #0x0c
434
        stmdb   sp!, {r4, r5, lr}
435
 
436
Lmemcpy_bsrcul2loop16:
437
        mov     lr, r3, lsl #16
438
        ldmdb   r1!, {r3-r5, r12}
439
        orr     lr, lr, r12, lsr #16
440
        mov     r12, r12, lsl #16
441
        orr     r12, r12, r5, lsr #16
442
        mov     r5, r5, lsl #16
443
        orr     r5, r5, r4, lsr #16
444
        mov     r4, r4, lsl #16
445
        orr     r4, r4, r3, lsr #16
446
        stmdb   r0!, {r4, r5, r12, lr}
447
        subs    r2, r2, #0x10
448
        bge     Lmemcpy_bsrcul2loop16
449
        ldmia   sp!, {r4, r5, lr}
450
        adds    r2, r2, #0x0c
451
        blt     Lmemcpy_bsrcul2l4
452
 
453
Lmemcpy_bsrcul2loop4:
454
        mov     r12, r3, lsl #16
455
        ldr     r3, [r1, #-4]!
456
        orr     r12, r12, r3, lsr #16
457
        str     r12, [r0, #-4]!
458
        subs    r2, r2, #4
459
        bge     Lmemcpy_bsrcul2loop4
460
 
461
Lmemcpy_bsrcul2l4:
462
        add     r1, r1, #2
463
        b       Lmemcpy_bl4
464
 
465
Lmemcpy_bsrcul1:
466
        cmp     r2, #0x0c
467
        blt     Lmemcpy_bsrcul1loop4
468
        sub     r2, r2, #0x0c
469
        stmdb   sp!, {r4, r5, lr}
470
 
471
Lmemcpy_bsrcul1loop32:
472
        mov     lr, r3, lsl #24
473
        ldmdb   r1!, {r3-r5, r12}
474
        orr     lr, lr, r12, lsr #8
475
        mov     r12, r12, lsl #24
476
        orr     r12, r12, r5, lsr #8
477
        mov     r5, r5, lsl #24
478
        orr     r5, r5, r4, lsr #8
479
        mov     r4, r4, lsl #24
480
        orr     r4, r4, r3, lsr #8
481
        stmdb   r0!, {r4, r5, r12, lr}
482
        subs    r2, r2, #0x10
483
        bge     Lmemcpy_bsrcul1loop32
484
        ldmia   sp!, {r4, r5, lr}
485
        adds    r2, r2, #0x0c
486
        blt     Lmemcpy_bsrcul1l4
487
 
488
Lmemcpy_bsrcul1loop4:
489
        mov     r12, r3, lsl #24
490
        ldr     r3, [r1, #-4]!
491
        orr     r12, r12, r3, lsr #8
492
        str     r12, [r0, #-4]!
493
        subs    r2, r2, #4
494
        bge     Lmemcpy_bsrcul1loop4
495
 
496
Lmemcpy_bsrcul1l4:
497
        add     r1, r1, #1
498
        b       Lmemcpy_bl4
499
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.