OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [powerpc/] [lib/] [copy_32.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * Memory copy functions for 32-bit PowerPC.
3
 *
4
 * Copyright (C) 1996-2005 Paul Mackerras.
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version
9
 * 2 of the License, or (at your option) any later version.
10
 */
11
#include 
12
#include 
13
#include 
14
#include 
15
 
16
#define COPY_16_BYTES           \
17
        lwz     r7,4(r4);       \
18
        lwz     r8,8(r4);       \
19
        lwz     r9,12(r4);      \
20
        lwzu    r10,16(r4);     \
21
        stw     r7,4(r6);       \
22
        stw     r8,8(r6);       \
23
        stw     r9,12(r6);      \
24
        stwu    r10,16(r6)
25
 
26
#define COPY_16_BYTES_WITHEX(n) \
27
8 ## n ## 0:                    \
28
        lwz     r7,4(r4);       \
29
8 ## n ## 1:                    \
30
        lwz     r8,8(r4);       \
31
8 ## n ## 2:                    \
32
        lwz     r9,12(r4);      \
33
8 ## n ## 3:                    \
34
        lwzu    r10,16(r4);     \
35
8 ## n ## 4:                    \
36
        stw     r7,4(r6);       \
37
8 ## n ## 5:                    \
38
        stw     r8,8(r6);       \
39
8 ## n ## 6:                    \
40
        stw     r9,12(r6);      \
41
8 ## n ## 7:                    \
42
        stwu    r10,16(r6)
43
 
44
#define COPY_16_BYTES_EXCODE(n)                 \
45
9 ## n ## 0:                                    \
46
        addi    r5,r5,-(16 * n);                \
47
        b       104f;                           \
48
9 ## n ## 1:                                    \
49
        addi    r5,r5,-(16 * n);                \
50
        b       105f;                           \
51
.section __ex_table,"a";                        \
52
        .align  2;                              \
53
        .long   8 ## n ## 0b,9 ## n ## 0b;      \
54
        .long   8 ## n ## 1b,9 ## n ## 0b;      \
55
        .long   8 ## n ## 2b,9 ## n ## 0b;      \
56
        .long   8 ## n ## 3b,9 ## n ## 0b;      \
57
        .long   8 ## n ## 4b,9 ## n ## 1b;      \
58
        .long   8 ## n ## 5b,9 ## n ## 1b;      \
59
        .long   8 ## n ## 6b,9 ## n ## 1b;      \
60
        .long   8 ## n ## 7b,9 ## n ## 1b;      \
61
        .text
62
 
63
        .text
64
        .stabs  "arch/powerpc/lib/",N_SO,0,0,0f
65
        .stabs  "copy32.S",N_SO,0,0,0f
66
0:
67
 
68
CACHELINE_BYTES = L1_CACHE_BYTES
69
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
70
CACHELINE_MASK = (L1_CACHE_BYTES-1)
71
 
72
/*
73
 * Use dcbz on the complete cache lines in the destination
74
 * to set them to zero.  This requires that the destination
75
 * area is cacheable.  -- paulus
76
 */
77
_GLOBAL(cacheable_memzero)
78
        mr      r5,r4
79
        li      r4,0
80
        addi    r6,r3,-4
81
        cmplwi  0,r5,4
82
        blt     7f
83
        stwu    r4,4(r6)
84
        beqlr
85
        andi.   r0,r6,3
86
        add     r5,r0,r5
87
        subf    r6,r0,r6
88
        clrlwi  r7,r6,32-LG_CACHELINE_BYTES
89
        add     r8,r7,r5
90
        srwi    r9,r8,LG_CACHELINE_BYTES
91
        addic.  r9,r9,-1        /* total number of complete cachelines */
92
        ble     2f
93
        xori    r0,r7,CACHELINE_MASK & ~3
94
        srwi.   r0,r0,2
95
        beq     3f
96
        mtctr   r0
97
4:      stwu    r4,4(r6)
98
        bdnz    4b
99
3:      mtctr   r9
100
        li      r7,4
101
#if !defined(CONFIG_8xx)
102
10:     dcbz    r7,r6
103
#else
104
10:     stw     r4, 4(r6)
105
        stw     r4, 8(r6)
106
        stw     r4, 12(r6)
107
        stw     r4, 16(r6)
108
#if CACHE_LINE_SIZE >= 32
109
        stw     r4, 20(r6)
110
        stw     r4, 24(r6)
111
        stw     r4, 28(r6)
112
        stw     r4, 32(r6)
113
#endif /* CACHE_LINE_SIZE */
114
#endif
115
        addi    r6,r6,CACHELINE_BYTES
116
        bdnz    10b
117
        clrlwi  r5,r8,32-LG_CACHELINE_BYTES
118
        addi    r5,r5,4
119
2:      srwi    r0,r5,2
120
        mtctr   r0
121
        bdz     6f
122
1:      stwu    r4,4(r6)
123
        bdnz    1b
124
6:      andi.   r5,r5,3
125
7:      cmpwi   0,r5,0
126
        beqlr
127
        mtctr   r5
128
        addi    r6,r6,3
129
8:      stbu    r4,1(r6)
130
        bdnz    8b
131
        blr
132
 
133
_GLOBAL(memset)
134
        rlwimi  r4,r4,8,16,23
135
        rlwimi  r4,r4,16,0,15
136
        addi    r6,r3,-4
137
        cmplwi  0,r5,4
138
        blt     7f
139
        stwu    r4,4(r6)
140
        beqlr
141
        andi.   r0,r6,3
142
        add     r5,r0,r5
143
        subf    r6,r0,r6
144
        srwi    r0,r5,2
145
        mtctr   r0
146
        bdz     6f
147
1:      stwu    r4,4(r6)
148
        bdnz    1b
149
6:      andi.   r5,r5,3
150
7:      cmpwi   0,r5,0
151
        beqlr
152
        mtctr   r5
153
        addi    r6,r6,3
154
8:      stbu    r4,1(r6)
155
        bdnz    8b
156
        blr
157
 
158
/*
159
 * This version uses dcbz on the complete cache lines in the
160
 * destination area to reduce memory traffic.  This requires that
161
 * the destination area is cacheable.
162
 * We only use this version if the source and dest don't overlap.
163
 * -- paulus.
164
 */
165
_GLOBAL(cacheable_memcpy)
166
        add     r7,r3,r5                /* test if the src & dst overlap */
167
        add     r8,r4,r5
168
        cmplw   0,r4,r7
169
        cmplw   1,r3,r8
170
        crand   0,0,4                   /* cr0.lt &= cr1.lt */
171
        blt     memcpy                  /* if regions overlap */
172
 
173
        addi    r4,r4,-4
174
        addi    r6,r3,-4
175
        neg     r0,r3
176
        andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
177
        beq     58f
178
 
179
        cmplw   0,r5,r0                 /* is this more than total to do? */
180
        blt     63f                     /* if not much to do */
181
        andi.   r8,r0,3                 /* get it word-aligned first */
182
        subf    r5,r0,r5
183
        mtctr   r8
184
        beq+    61f
185
70:     lbz     r9,4(r4)                /* do some bytes */
186
        stb     r9,4(r6)
187
        addi    r4,r4,1
188
        addi    r6,r6,1
189
        bdnz    70b
190
61:     srwi.   r0,r0,2
191
        mtctr   r0
192
        beq     58f
193
72:     lwzu    r9,4(r4)                /* do some words */
194
        stwu    r9,4(r6)
195
        bdnz    72b
196
 
197
58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
198
        clrlwi  r5,r5,32-LG_CACHELINE_BYTES
199
        li      r11,4
200
        mtctr   r0
201
        beq     63f
202
53:
203
#if !defined(CONFIG_8xx)
204
        dcbz    r11,r6
205
#endif
206
        COPY_16_BYTES
207
#if L1_CACHE_BYTES >= 32
208
        COPY_16_BYTES
209
#if L1_CACHE_BYTES >= 64
210
        COPY_16_BYTES
211
        COPY_16_BYTES
212
#if L1_CACHE_BYTES >= 128
213
        COPY_16_BYTES
214
        COPY_16_BYTES
215
        COPY_16_BYTES
216
        COPY_16_BYTES
217
#endif
218
#endif
219
#endif
220
        bdnz    53b
221
 
222
63:     srwi.   r0,r5,2
223
        mtctr   r0
224
        beq     64f
225
30:     lwzu    r0,4(r4)
226
        stwu    r0,4(r6)
227
        bdnz    30b
228
 
229
64:     andi.   r0,r5,3
230
        mtctr   r0
231
        beq+    65f
232
40:     lbz     r0,4(r4)
233
        stb     r0,4(r6)
234
        addi    r4,r4,1
235
        addi    r6,r6,1
236
        bdnz    40b
237
65:     blr
238
 
239
_GLOBAL(memmove)
240
        cmplw   0,r3,r4
241
        bgt     backwards_memcpy
242
        /* fall through */
243
 
244
_GLOBAL(memcpy)
245
        srwi.   r7,r5,3
246
        addi    r6,r3,-4
247
        addi    r4,r4,-4
248
        beq     2f                      /* if less than 8 bytes to do */
249
        andi.   r0,r6,3                 /* get dest word aligned */
250
        mtctr   r7
251
        bne     5f
252
1:      lwz     r7,4(r4)
253
        lwzu    r8,8(r4)
254
        stw     r7,4(r6)
255
        stwu    r8,8(r6)
256
        bdnz    1b
257
        andi.   r5,r5,7
258
2:      cmplwi  0,r5,4
259
        blt     3f
260
        lwzu    r0,4(r4)
261
        addi    r5,r5,-4
262
        stwu    r0,4(r6)
263
3:      cmpwi   0,r5,0
264
        beqlr
265
        mtctr   r5
266
        addi    r4,r4,3
267
        addi    r6,r6,3
268
4:      lbzu    r0,1(r4)
269
        stbu    r0,1(r6)
270
        bdnz    4b
271
        blr
272
5:      subfic  r0,r0,4
273
        mtctr   r0
274
6:      lbz     r7,4(r4)
275
        addi    r4,r4,1
276
        stb     r7,4(r6)
277
        addi    r6,r6,1
278
        bdnz    6b
279
        subf    r5,r0,r5
280
        rlwinm. r7,r5,32-3,3,31
281
        beq     2b
282
        mtctr   r7
283
        b       1b
284
 
285
_GLOBAL(backwards_memcpy)
286
        rlwinm. r7,r5,32-3,3,31         /* r0 = r5 >> 3 */
287
        add     r6,r3,r5
288
        add     r4,r4,r5
289
        beq     2f
290
        andi.   r0,r6,3
291
        mtctr   r7
292
        bne     5f
293
1:      lwz     r7,-4(r4)
294
        lwzu    r8,-8(r4)
295
        stw     r7,-4(r6)
296
        stwu    r8,-8(r6)
297
        bdnz    1b
298
        andi.   r5,r5,7
299
2:      cmplwi  0,r5,4
300
        blt     3f
301
        lwzu    r0,-4(r4)
302
        subi    r5,r5,4
303
        stwu    r0,-4(r6)
304
3:      cmpwi   0,r5,0
305
        beqlr
306
        mtctr   r5
307
4:      lbzu    r0,-1(r4)
308
        stbu    r0,-1(r6)
309
        bdnz    4b
310
        blr
311
5:      mtctr   r0
312
6:      lbzu    r7,-1(r4)
313
        stbu    r7,-1(r6)
314
        bdnz    6b
315
        subf    r5,r0,r5
316
        rlwinm. r7,r5,32-3,3,31
317
        beq     2b
318
        mtctr   r7
319
        b       1b
320
 
321
_GLOBAL(__copy_tofrom_user)
322
        addi    r4,r4,-4
323
        addi    r6,r3,-4
324
        neg     r0,r3
325
        andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
326
        beq     58f
327
 
328
        cmplw   0,r5,r0                 /* is this more than total to do? */
329
        blt     63f                     /* if not much to do */
330
        andi.   r8,r0,3                 /* get it word-aligned first */
331
        mtctr   r8
332
        beq+    61f
333
70:     lbz     r9,4(r4)                /* do some bytes */
334
71:     stb     r9,4(r6)
335
        addi    r4,r4,1
336
        addi    r6,r6,1
337
        bdnz    70b
338
61:     subf    r5,r0,r5
339
        srwi.   r0,r0,2
340
        mtctr   r0
341
        beq     58f
342
72:     lwzu    r9,4(r4)                /* do some words */
343
73:     stwu    r9,4(r6)
344
        bdnz    72b
345
 
346
        .section __ex_table,"a"
347
        .align  2
348
        .long   70b,100f
349
        .long   71b,101f
350
        .long   72b,102f
351
        .long   73b,103f
352
        .text
353
 
354
58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355
        clrlwi  r5,r5,32-LG_CACHELINE_BYTES
356
        li      r11,4
357
        beq     63f
358
 
359
#ifdef CONFIG_8xx
360
        /* Don't use prefetch on 8xx */
361
        mtctr   r0
362
        li      r0,0
363
53:     COPY_16_BYTES_WITHEX(0)
364
        bdnz    53b
365
 
366
#else /* not CONFIG_8xx */
367
        /* Here we decide how far ahead to prefetch the source */
368
        li      r3,4
369
        cmpwi   r0,1
370
        li      r7,0
371
        ble     114f
372
        li      r7,1
373
#if MAX_COPY_PREFETCH > 1
374
        /* Heuristically, for large transfers we prefetch
375
           MAX_COPY_PREFETCH cachelines ahead.  For small transfers
376
           we prefetch 1 cacheline ahead. */
377
        cmpwi   r0,MAX_COPY_PREFETCH
378
        ble     112f
379
        li      r7,MAX_COPY_PREFETCH
380
112:    mtctr   r7
381
111:    dcbt    r3,r4
382
        addi    r3,r3,CACHELINE_BYTES
383
        bdnz    111b
384
#else
385
        dcbt    r3,r4
386
        addi    r3,r3,CACHELINE_BYTES
387
#endif /* MAX_COPY_PREFETCH > 1 */
388
 
389
114:    subf    r8,r7,r0
390
        mr      r0,r7
391
        mtctr   r8
392
 
393
53:     dcbt    r3,r4
394
54:     dcbz    r11,r6
395
        .section __ex_table,"a"
396
        .align  2
397
        .long   54b,105f
398
        .text
399
/* the main body of the cacheline loop */
400
        COPY_16_BYTES_WITHEX(0)
401
#if L1_CACHE_BYTES >= 32
402
        COPY_16_BYTES_WITHEX(1)
403
#if L1_CACHE_BYTES >= 64
404
        COPY_16_BYTES_WITHEX(2)
405
        COPY_16_BYTES_WITHEX(3)
406
#if L1_CACHE_BYTES >= 128
407
        COPY_16_BYTES_WITHEX(4)
408
        COPY_16_BYTES_WITHEX(5)
409
        COPY_16_BYTES_WITHEX(6)
410
        COPY_16_BYTES_WITHEX(7)
411
#endif
412
#endif
413
#endif
414
        bdnz    53b
415
        cmpwi   r0,0
416
        li      r3,4
417
        li      r7,0
418
        bne     114b
419
#endif /* CONFIG_8xx */
420
 
421
63:     srwi.   r0,r5,2
422
        mtctr   r0
423
        beq     64f
424
30:     lwzu    r0,4(r4)
425
31:     stwu    r0,4(r6)
426
        bdnz    30b
427
 
428
64:     andi.   r0,r5,3
429
        mtctr   r0
430
        beq+    65f
431
40:     lbz     r0,4(r4)
432
41:     stb     r0,4(r6)
433
        addi    r4,r4,1
434
        addi    r6,r6,1
435
        bdnz    40b
436
65:     li      r3,0
437
        blr
438
 
439
/* read fault, initial single-byte copy */
440
100:    li      r9,0
441
        b       90f
442
/* write fault, initial single-byte copy */
443
101:    li      r9,1
444
90:     subf    r5,r8,r5
445
        li      r3,0
446
        b       99f
447
/* read fault, initial word copy */
448
102:    li      r9,0
449
        b       91f
450
/* write fault, initial word copy */
451
103:    li      r9,1
452
91:     li      r3,2
453
        b       99f
454
 
455
/*
456
 * this stuff handles faults in the cacheline loop and branches to either
457
 * 104f (if in read part) or 105f (if in write part), after updating r5
458
 */
459
        COPY_16_BYTES_EXCODE(0)
460
#if L1_CACHE_BYTES >= 32
461
        COPY_16_BYTES_EXCODE(1)
462
#if L1_CACHE_BYTES >= 64
463
        COPY_16_BYTES_EXCODE(2)
464
        COPY_16_BYTES_EXCODE(3)
465
#if L1_CACHE_BYTES >= 128
466
        COPY_16_BYTES_EXCODE(4)
467
        COPY_16_BYTES_EXCODE(5)
468
        COPY_16_BYTES_EXCODE(6)
469
        COPY_16_BYTES_EXCODE(7)
470
#endif
471
#endif
472
#endif
473
 
474
/* read fault in cacheline loop */
475
104:    li      r9,0
476
        b       92f
477
/* fault on dcbz (effectively a write fault) */
478
/* or write fault in cacheline loop */
479
105:    li      r9,1
480
92:     li      r3,LG_CACHELINE_BYTES
481
        mfctr   r8
482
        add     r0,r0,r8
483
        b       106f
484
/* read fault in final word loop */
485
108:    li      r9,0
486
        b       93f
487
/* write fault in final word loop */
488
109:    li      r9,1
489
93:     andi.   r5,r5,3
490
        li      r3,2
491
        b       99f
492
/* read fault in final byte loop */
493
110:    li      r9,0
494
        b       94f
495
/* write fault in final byte loop */
496
111:    li      r9,1
497
94:     li      r5,0
498
        li      r3,0
499
/*
500
 * At this stage the number of bytes not copied is
501
 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
502
 */
503
99:     mfctr   r0
504
106:    slw     r3,r0,r3
505
        add.    r3,r3,r5
506
        beq     120f                    /* shouldn't happen */
507
        cmpwi   0,r9,0
508
        bne     120f
509
/* for a read fault, first try to continue the copy one byte at a time */
510
        mtctr   r3
511
130:    lbz     r0,4(r4)
512
131:    stb     r0,4(r6)
513
        addi    r4,r4,1
514
        addi    r6,r6,1
515
        bdnz    130b
516
/* then clear out the destination: r3 bytes starting at 4(r6) */
517
132:    mfctr   r3
518
        srwi.   r0,r3,2
519
        li      r9,0
520
        mtctr   r0
521
        beq     113f
522
112:    stwu    r9,4(r6)
523
        bdnz    112b
524
113:    andi.   r0,r3,3
525
        mtctr   r0
526
        beq     120f
527
114:    stb     r9,4(r6)
528
        addi    r6,r6,1
529
        bdnz    114b
530
120:    blr
531
 
532
        .section __ex_table,"a"
533
        .align  2
534
        .long   30b,108b
535
        .long   31b,109b
536
        .long   40b,110b
537
        .long   41b,111b
538
        .long   130b,132b
539
        .long   131b,120b
540
        .long   112b,120b
541
        .long   114b,120b
542
        .text

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.