OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [mips/] [lib/] [csum_partial.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * This file is subject to the terms and conditions of the GNU General Public
3
 * License.  See the file "COPYING" in the main directory of this archive
4
 * for more details.
5
 *
6
 * Quick'n'dirty IP checksum ...
7
 *
8
 * Copyright (C) 1998, 1999 Ralf Baechle
9
 * Copyright (C) 1999 Silicon Graphics, Inc.
10
 */
11
#include 
12
#include 
13
#include 
14
#include 
15
 
16
#ifdef CONFIG_64BIT
17
/*
18
 * As we are sharing code base with the mips32 tree (which use the o32 ABI
19
 * register definitions). We need to redefine the register definitions from
20
 * the n64 ABI register naming to the o32 ABI register naming.
21
 */
22
#undef t0
23
#undef t1
24
#undef t2
25
#undef t3
26
#define t0      $8
27
#define t1      $9
28
#define t2      $10
29
#define t3      $11
30
#define t4      $12
31
#define t5      $13
32
#define t6      $14
33
#define t7      $15
34
 
35
#define USE_DOUBLE
36
#endif
37
 
38
#ifdef USE_DOUBLE
39
 
40
#define LOAD   ld
41
#define ADD    daddu
42
#define NBYTES 8
43
 
44
#else
45
 
46
#define LOAD   lw
47
#define ADD    addu
48
#define NBYTES 4
49
 
50
#endif /* USE_DOUBLE */
51
 
52
#define UNIT(unit)  ((unit)*NBYTES)
53
 
54
#define ADDC(sum,reg)                                           \
55
        ADD     sum, reg;                                       \
56
        sltu    v1, sum, reg;                                   \
57
        ADD     sum, v1
58
 
59
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)    \
60
        LOAD    _t0, (offset + UNIT(0))(src);                   \
61
        LOAD    _t1, (offset + UNIT(1))(src);                   \
62
        LOAD    _t2, (offset + UNIT(2))(src);                   \
63
        LOAD    _t3, (offset + UNIT(3))(src);                   \
64
        ADDC(sum, _t0);                                         \
65
        ADDC(sum, _t1);                                         \
66
        ADDC(sum, _t2);                                         \
67
        ADDC(sum, _t3)
68
 
69
#ifdef USE_DOUBLE
70
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
71
        CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
72
#else
73
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
74
        CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);   \
75
        CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
76
#endif
77
 
78
/*
79
 * a0: source address
80
 * a1: length of the area to checksum
81
 * a2: partial checksum
82
 */
83
 
84
#define src a0
85
#define sum v0
86
 
87
        .text
88
        .set    noreorder
89
        .align  5
90
LEAF(csum_partial)
91
        move    sum, zero
92
        move    t7, zero
93
 
94
        sltiu   t8, a1, 0x8
95
        bnez    t8, small_csumcpy               /* < 8 bytes to copy */
96
         move   t2, a1
97
 
98
        andi    t7, src, 0x1                    /* odd buffer? */
99
 
100
hword_align:
101
        beqz    t7, word_align
102
         andi   t8, src, 0x2
103
 
104
        lbu     t0, (src)
105
        LONG_SUBU       a1, a1, 0x1
106
#ifdef __MIPSEL__
107
        sll     t0, t0, 8
108
#endif
109
        ADDC(sum, t0)
110
        PTR_ADDU        src, src, 0x1
111
        andi    t8, src, 0x2
112
 
113
word_align:
114
        beqz    t8, dword_align
115
         sltiu  t8, a1, 56
116
 
117
        lhu     t0, (src)
118
        LONG_SUBU       a1, a1, 0x2
119
        ADDC(sum, t0)
120
        sltiu   t8, a1, 56
121
        PTR_ADDU        src, src, 0x2
122
 
123
dword_align:
124
        bnez    t8, do_end_words
125
         move   t8, a1
126
 
127
        andi    t8, src, 0x4
128
        beqz    t8, qword_align
129
         andi   t8, src, 0x8
130
 
131
        lw      t0, 0x00(src)
132
        LONG_SUBU       a1, a1, 0x4
133
        ADDC(sum, t0)
134
        PTR_ADDU        src, src, 0x4
135
        andi    t8, src, 0x8
136
 
137
qword_align:
138
        beqz    t8, oword_align
139
         andi   t8, src, 0x10
140
 
141
#ifdef USE_DOUBLE
142
        ld      t0, 0x00(src)
143
        LONG_SUBU       a1, a1, 0x8
144
        ADDC(sum, t0)
145
#else
146
        lw      t0, 0x00(src)
147
        lw      t1, 0x04(src)
148
        LONG_SUBU       a1, a1, 0x8
149
        ADDC(sum, t0)
150
        ADDC(sum, t1)
151
#endif
152
        PTR_ADDU        src, src, 0x8
153
        andi    t8, src, 0x10
154
 
155
oword_align:
156
        beqz    t8, begin_movement
157
         LONG_SRL       t8, a1, 0x7
158
 
159
#ifdef USE_DOUBLE
160
        ld      t0, 0x00(src)
161
        ld      t1, 0x08(src)
162
        ADDC(sum, t0)
163
        ADDC(sum, t1)
164
#else
165
        CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
166
#endif
167
        LONG_SUBU       a1, a1, 0x10
168
        PTR_ADDU        src, src, 0x10
169
        LONG_SRL        t8, a1, 0x7
170
 
171
begin_movement:
172
        beqz    t8, 1f
173
         andi   t2, a1, 0x40
174
 
175
move_128bytes:
176
        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
177
        CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
178
        CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
179
        CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
180
        LONG_SUBU       t8, t8, 0x01
181
        bnez    t8, move_128bytes
182
         PTR_ADDU       src, src, 0x80
183
 
184
1:
185
        beqz    t2, 1f
186
         andi   t2, a1, 0x20
187
 
188
move_64bytes:
189
        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
190
        CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
191
        PTR_ADDU        src, src, 0x40
192
 
193
1:
194
        beqz    t2, do_end_words
195
         andi   t8, a1, 0x1c
196
 
197
move_32bytes:
198
        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
199
        andi    t8, a1, 0x1c
200
        PTR_ADDU        src, src, 0x20
201
 
202
do_end_words:
203
        beqz    t8, small_csumcpy
204
         andi   t2, a1, 0x3
205
        LONG_SRL        t8, t8, 0x2
206
 
207
end_words:
208
        lw      t0, (src)
209
        LONG_SUBU       t8, t8, 0x1
210
        ADDC(sum, t0)
211
        bnez    t8, end_words
212
         PTR_ADDU       src, src, 0x4
213
 
214
/* unknown src alignment and < 8 bytes to go  */
215
small_csumcpy:
216
        move    a1, t2
217
 
218
        andi    t0, a1, 4
219
        beqz    t0, 1f
220
         andi   t0, a1, 2
221
 
222
        /* Still a full word to go  */
223
        ulw     t1, (src)
224
        PTR_ADDIU       src, 4
225
        ADDC(sum, t1)
226
 
227
1:      move    t1, zero
228
        beqz    t0, 1f
229
         andi   t0, a1, 1
230
 
231
        /* Still a halfword to go  */
232
        ulhu    t1, (src)
233
        PTR_ADDIU       src, 2
234
 
235
1:      beqz    t0, 1f
236
         sll    t1, t1, 16
237
 
238
        lbu     t2, (src)
239
         nop
240
 
241
#ifdef __MIPSEB__
242
        sll     t2, t2, 8
243
#endif
244
        or      t1, t2
245
 
246
1:      ADDC(sum, t1)
247
 
248
        /* fold checksum */
249
#ifdef USE_DOUBLE
250
        dsll32  v1, sum, 0
251
        daddu   sum, v1
252
        sltu    v1, sum, v1
253
        dsra32  sum, sum, 0
254
        addu    sum, v1
255
#endif
256
        sll     v1, sum, 16
257
        addu    sum, v1
258
        sltu    v1, sum, v1
259
        srl     sum, sum, 16
260
        addu    sum, v1
261
 
262
        /* odd buffer alignment? */
263
        beqz    t7, 1f
264
         nop
265
        sll     v1, sum, 8
266
        srl     sum, sum, 8
267
        or      sum, v1
268
        andi    sum, 0xffff
269
1:
270
        .set    reorder
271
        /* Add the passed partial csum.  */
272
        ADDC(sum, a2)
273
        jr      ra
274
        .set    noreorder
275
        END(csum_partial)
276
 
277
 
278
/*
279
 * checksum and copy routines based on memcpy.S
280
 *
281
 *      csum_partial_copy_nocheck(src, dst, len, sum)
282
 *      __csum_partial_copy_user(src, dst, len, sum, errp)
283
 *
284
 * See "Spec" in memcpy.S for details.  Unlike __copy_user, all
285
 * function in this file use the standard calling convention.
286
 */
287
 
288
#define src a0
289
#define dst a1
290
#define len a2
291
#define psum a3
292
#define sum v0
293
#define odd t8
294
#define errptr t9
295
 
296
/*
297
 * The exception handler for loads requires that:
298
 *  1- AT contain the address of the byte just past the end of the source
299
 *     of the copy,
300
 *  2- src_entry <= src < AT, and
301
 *  3- (dst - src) == (dst_entry - src_entry),
302
 * The _entry suffix denotes values when __copy_user was called.
303
 *
304
 * (1) is set up up by __csum_partial_copy_from_user and maintained by
305
 *      not writing AT in __csum_partial_copy
306
 * (2) is met by incrementing src by the number of bytes copied
307
 * (3) is met by not doing loads between a pair of increments of dst and src
308
 *
309
 * The exception handlers for stores stores -EFAULT to errptr and return.
310
 * These handlers do not need to overwrite any data.
311
 */
312
 
313
#define EXC(inst_reg,addr,handler)              \
314
9:      inst_reg, addr;                         \
315
        .section __ex_table,"a";                \
316
        PTR     9b, handler;                    \
317
        .previous
318
 
319
#ifdef USE_DOUBLE
320
 
321
#define LOAD   ld
322
#define LOADL  ldl
323
#define LOADR  ldr
324
#define STOREL sdl
325
#define STORER sdr
326
#define STORE  sd
327
#define ADD    daddu
328
#define SUB    dsubu
329
#define SRL    dsrl
330
#define SLL    dsll
331
#define SLLV   dsllv
332
#define SRLV   dsrlv
333
#define NBYTES 8
334
#define LOG_NBYTES 3
335
 
336
#else
337
 
338
#define LOAD   lw
339
#define LOADL  lwl
340
#define LOADR  lwr
341
#define STOREL swl
342
#define STORER swr
343
#define STORE  sw
344
#define ADD    addu
345
#define SUB    subu
346
#define SRL    srl
347
#define SLL    sll
348
#define SLLV   sllv
349
#define SRLV   srlv
350
#define NBYTES 4
351
#define LOG_NBYTES 2
352
 
353
#endif /* USE_DOUBLE */
354
 
355
#ifdef CONFIG_CPU_LITTLE_ENDIAN
356
#define LDFIRST LOADR
357
#define LDREST  LOADL
358
#define STFIRST STORER
359
#define STREST  STOREL
360
#define SHIFT_DISCARD SLLV
361
#define SHIFT_DISCARD_REVERT SRLV
362
#else
363
#define LDFIRST LOADL
364
#define LDREST  LOADR
365
#define STFIRST STOREL
366
#define STREST  STORER
367
#define SHIFT_DISCARD SRLV
368
#define SHIFT_DISCARD_REVERT SLLV
369
#endif
370
 
371
#define FIRST(unit) ((unit)*NBYTES)
372
#define REST(unit)  (FIRST(unit)+NBYTES-1)
373
 
374
#define ADDRMASK (NBYTES-1)
375
 
376
        .set    noat
377
 
378
LEAF(__csum_partial_copy_user)
379
        PTR_ADDU        AT, src, len    /* See (1) above. */
380
#ifdef CONFIG_64BIT
381
        move    errptr, a4
382
#else
383
        lw      errptr, 16(sp)
384
#endif
385
FEXPORT(csum_partial_copy_nocheck)
386
        move    sum, zero
387
        move    odd, zero
388
        /*
389
         * Note: dst & src may be unaligned, len may be 0
390
         * Temps
391
         */
392
        /*
393
         * The "issue break"s below are very approximate.
394
         * Issue delays for dcache fills will perturb the schedule, as will
395
         * load queue full replay traps, etc.
396
         *
397
         * If len < NBYTES use byte operations.
398
         */
399
        sltu    t2, len, NBYTES
400
        and     t1, dst, ADDRMASK
401
        bnez    t2, copy_bytes_checklen
402
         and    t0, src, ADDRMASK
403
        andi    odd, dst, 0x1                   /* odd buffer? */
404
        bnez    t1, dst_unaligned
405
         nop
406
        bnez    t0, src_unaligned_dst_aligned
407
        /*
408
         * use delay slot for fall-through
409
         * src and dst are aligned; need to compute rem
410
         */
411
both_aligned:
412
         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
413
        beqz    t0, cleanup_both_aligned # len < 8*NBYTES
414
         nop
415
        SUB     len, 8*NBYTES           # subtract here for bgez loop
416
        .align  4
417
1:
418
EXC(    LOAD    t0, UNIT(0)(src),       l_exc)
419
EXC(    LOAD    t1, UNIT(1)(src),       l_exc_copy)
420
EXC(    LOAD    t2, UNIT(2)(src),       l_exc_copy)
421
EXC(    LOAD    t3, UNIT(3)(src),       l_exc_copy)
422
EXC(    LOAD    t4, UNIT(4)(src),       l_exc_copy)
423
EXC(    LOAD    t5, UNIT(5)(src),       l_exc_copy)
424
EXC(    LOAD    t6, UNIT(6)(src),       l_exc_copy)
425
EXC(    LOAD    t7, UNIT(7)(src),       l_exc_copy)
426
        SUB     len, len, 8*NBYTES
427
        ADD     src, src, 8*NBYTES
428
EXC(    STORE   t0, UNIT(0)(dst),       s_exc)
429
        ADDC(sum, t0)
430
EXC(    STORE   t1, UNIT(1)(dst),       s_exc)
431
        ADDC(sum, t1)
432
EXC(    STORE   t2, UNIT(2)(dst),       s_exc)
433
        ADDC(sum, t2)
434
EXC(    STORE   t3, UNIT(3)(dst),       s_exc)
435
        ADDC(sum, t3)
436
EXC(    STORE   t4, UNIT(4)(dst),       s_exc)
437
        ADDC(sum, t4)
438
EXC(    STORE   t5, UNIT(5)(dst),       s_exc)
439
        ADDC(sum, t5)
440
EXC(    STORE   t6, UNIT(6)(dst),       s_exc)
441
        ADDC(sum, t6)
442
EXC(    STORE   t7, UNIT(7)(dst),       s_exc)
443
        ADDC(sum, t7)
444
        bgez    len, 1b
445
         ADD    dst, dst, 8*NBYTES
446
        ADD     len, 8*NBYTES           # revert len (see above)
447
 
448
        /*
449
         * len == the number of bytes left to copy < 8*NBYTES
450
         */
451
cleanup_both_aligned:
452
#define rem t7
453
        beqz    len, done
454
         sltu   t0, len, 4*NBYTES
455
        bnez    t0, less_than_4units
456
         and    rem, len, (NBYTES-1)    # rem = len % NBYTES
457
        /*
458
         * len >= 4*NBYTES
459
         */
460
EXC(    LOAD    t0, UNIT(0)(src),       l_exc)
461
EXC(    LOAD    t1, UNIT(1)(src),       l_exc_copy)
462
EXC(    LOAD    t2, UNIT(2)(src),       l_exc_copy)
463
EXC(    LOAD    t3, UNIT(3)(src),       l_exc_copy)
464
        SUB     len, len, 4*NBYTES
465
        ADD     src, src, 4*NBYTES
466
EXC(    STORE   t0, UNIT(0)(dst),       s_exc)
467
        ADDC(sum, t0)
468
EXC(    STORE   t1, UNIT(1)(dst),       s_exc)
469
        ADDC(sum, t1)
470
EXC(    STORE   t2, UNIT(2)(dst),       s_exc)
471
        ADDC(sum, t2)
472
EXC(    STORE   t3, UNIT(3)(dst),       s_exc)
473
        ADDC(sum, t3)
474
        beqz    len, done
475
         ADD    dst, dst, 4*NBYTES
476
less_than_4units:
477
        /*
478
         * rem = len % NBYTES
479
         */
480
        beq     rem, len, copy_bytes
481
         nop
482
1:
483
EXC(    LOAD    t0, 0(src),             l_exc)
484
        ADD     src, src, NBYTES
485
        SUB     len, len, NBYTES
486
EXC(    STORE   t0, 0(dst),             s_exc)
487
        ADDC(sum, t0)
488
        bne     rem, len, 1b
489
         ADD    dst, dst, NBYTES
490
 
491
        /*
492
         * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
493
         * A loop would do only a byte at a time with possible branch
494
         * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
495
         * because can't assume read-access to dst.  Instead, use
496
         * STREST dst, which doesn't require read access to dst.
497
         *
498
         * This code should perform better than a simple loop on modern,
499
         * wide-issue mips processors because the code has fewer branches and
500
         * more instruction-level parallelism.
501
         */
502
#define bits t2
503
        beqz    len, done
504
         ADD    t1, dst, len    # t1 is just past last byte of dst
505
        li      bits, 8*NBYTES
506
        SLL     rem, len, 3     # rem = number of bits to keep
507
EXC(    LOAD    t0, 0(src),             l_exc)
508
        SUB     bits, bits, rem # bits = number of bits to discard
509
        SHIFT_DISCARD t0, t0, bits
510
EXC(    STREST  t0, -1(t1),             s_exc)
511
        SHIFT_DISCARD_REVERT t0, t0, bits
512
        .set reorder
513
        ADDC(sum, t0)
514
        b       done
515
        .set noreorder
516
dst_unaligned:
517
        /*
518
         * dst is unaligned
519
         * t0 = src & ADDRMASK
520
         * t1 = dst & ADDRMASK; T1 > 0
521
         * len >= NBYTES
522
         *
523
         * Copy enough bytes to align dst
524
         * Set match = (src and dst have same alignment)
525
         */
526
#define match rem
527
EXC(    LDFIRST t3, FIRST(0)(src),      l_exc)
528
        ADD     t2, zero, NBYTES
529
EXC(    LDREST  t3, REST(0)(src),       l_exc_copy)
530
        SUB     t2, t2, t1      # t2 = number of bytes copied
531
        xor     match, t0, t1
532
EXC(    STFIRST t3, FIRST(0)(dst),      s_exc)
533
        SLL     t4, t1, 3               # t4 = number of bits to discard
534
        SHIFT_DISCARD t3, t3, t4
535
        /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
536
        ADDC(sum, t3)
537
        beq     len, t2, done
538
         SUB    len, len, t2
539
        ADD     dst, dst, t2
540
        beqz    match, both_aligned
541
         ADD    src, src, t2
542
 
543
src_unaligned_dst_aligned:
544
        SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
545
        beqz    t0, cleanup_src_unaligned
546
         and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
547
1:
548
/*
549
 * Avoid consecutive LD*'s to the same register since some mips
550
 * implementations can't issue them in the same cycle.
551
 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
552
 * are to the same unit (unless src is aligned, but it's not).
553
 */
554
EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
555
EXC(    LDFIRST t1, FIRST(1)(src),      l_exc_copy)
556
        SUB     len, len, 4*NBYTES
557
EXC(    LDREST  t0, REST(0)(src),       l_exc_copy)
558
EXC(    LDREST  t1, REST(1)(src),       l_exc_copy)
559
EXC(    LDFIRST t2, FIRST(2)(src),      l_exc_copy)
560
EXC(    LDFIRST t3, FIRST(3)(src),      l_exc_copy)
561
EXC(    LDREST  t2, REST(2)(src),       l_exc_copy)
562
EXC(    LDREST  t3, REST(3)(src),       l_exc_copy)
563
        ADD     src, src, 4*NBYTES
564
#ifdef CONFIG_CPU_SB1
565
        nop                             # improves slotting
566
#endif
567
EXC(    STORE   t0, UNIT(0)(dst),       s_exc)
568
        ADDC(sum, t0)
569
EXC(    STORE   t1, UNIT(1)(dst),       s_exc)
570
        ADDC(sum, t1)
571
EXC(    STORE   t2, UNIT(2)(dst),       s_exc)
572
        ADDC(sum, t2)
573
EXC(    STORE   t3, UNIT(3)(dst),       s_exc)
574
        ADDC(sum, t3)
575
        bne     len, rem, 1b
576
         ADD    dst, dst, 4*NBYTES
577
 
578
cleanup_src_unaligned:
579
        beqz    len, done
580
         and    rem, len, NBYTES-1  # rem = len % NBYTES
581
        beq     rem, len, copy_bytes
582
         nop
583
1:
584
EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
585
EXC(    LDREST  t0, REST(0)(src),       l_exc_copy)
586
        ADD     src, src, NBYTES
587
        SUB     len, len, NBYTES
588
EXC(    STORE   t0, 0(dst),             s_exc)
589
        ADDC(sum, t0)
590
        bne     len, rem, 1b
591
         ADD    dst, dst, NBYTES
592
 
593
copy_bytes_checklen:
594
        beqz    len, done
595
         nop
596
copy_bytes:
597
        /* 0 < len < NBYTES  */
598
#ifdef CONFIG_CPU_LITTLE_ENDIAN
599
#define SHIFT_START 0
600
#define SHIFT_INC 8
601
#else
602
#define SHIFT_START 8*(NBYTES-1)
603
#define SHIFT_INC -8
604
#endif
605
        move    t2, zero        # partial word
606
        li      t3, SHIFT_START # shift
607
/* use l_exc_copy here to return correct sum on fault */
608
#define COPY_BYTE(N)                    \
609
EXC(    lbu     t0, N(src), l_exc_copy);        \
610
        SUB     len, len, 1;            \
611
EXC(    sb      t0, N(dst), s_exc);     \
612
        SLLV    t0, t0, t3;             \
613
        addu    t3, SHIFT_INC;          \
614
        beqz    len, copy_bytes_done;   \
615
         or     t2, t0
616
 
617
        COPY_BYTE(0)
618
        COPY_BYTE(1)
619
#ifdef USE_DOUBLE
620
        COPY_BYTE(2)
621
        COPY_BYTE(3)
622
        COPY_BYTE(4)
623
        COPY_BYTE(5)
624
#endif
625
EXC(    lbu     t0, NBYTES-2(src), l_exc_copy)
626
        SUB     len, len, 1
627
EXC(    sb      t0, NBYTES-2(dst), s_exc)
628
        SLLV    t0, t0, t3
629
        or      t2, t0
630
copy_bytes_done:
631
        ADDC(sum, t2)
632
done:
633
        /* fold checksum */
634
#ifdef USE_DOUBLE
635
        dsll32  v1, sum, 0
636
        daddu   sum, v1
637
        sltu    v1, sum, v1
638
        dsra32  sum, sum, 0
639
        addu    sum, v1
640
#endif
641
        sll     v1, sum, 16
642
        addu    sum, v1
643
        sltu    v1, sum, v1
644
        srl     sum, sum, 16
645
        addu    sum, v1
646
 
647
        /* odd buffer alignment? */
648
        beqz    odd, 1f
649
         nop
650
        sll     v1, sum, 8
651
        srl     sum, sum, 8
652
        or      sum, v1
653
        andi    sum, 0xffff
654
1:
655
        .set reorder
656
        ADDC(sum, psum)
657
        jr      ra
658
        .set noreorder
659
 
660
l_exc_copy:
661
        /*
662
         * Copy bytes from src until faulting load address (or until a
663
         * lb faults)
664
         *
665
         * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
666
         * may be more than a byte beyond the last address.
667
         * Hence, the lb below may get an exception.
668
         *
669
         * Assumes src < THREAD_BUADDR($28)
670
         */
671
        LOAD    t0, TI_TASK($28)
672
         li     t2, SHIFT_START
673
        LOAD    t0, THREAD_BUADDR(t0)
674
1:
675
EXC(    lbu     t1, 0(src),     l_exc)
676
        ADD     src, src, 1
677
        sb      t1, 0(dst)      # can't fault -- we're copy_from_user
678
        SLLV    t1, t1, t2
679
        addu    t2, SHIFT_INC
680
        ADDC(sum, t1)
681
        bne     src, t0, 1b
682
         ADD    dst, dst, 1
683
l_exc:
684
        LOAD    t0, TI_TASK($28)
685
         nop
686
        LOAD    t0, THREAD_BUADDR(t0)   # t0 is just past last good address
687
         nop
688
        SUB     len, AT, t0             # len number of uncopied bytes
689
        /*
690
         * Here's where we rely on src and dst being incremented in tandem,
691
         *   See (3) above.
692
         * dst += (fault addr - src) to put dst at first byte to clear
693
         */
694
        ADD     dst, t0                 # compute start address in a1
695
        SUB     dst, src
696
        /*
697
         * Clear len bytes starting at dst.  Can't call __bzero because it
698
         * might modify len.  An inefficient loop for these rare times...
699
         */
700
        beqz    len, done
701
         SUB    src, len, 1
702
1:      sb      zero, 0(dst)
703
        ADD     dst, dst, 1
704
        bnez    src, 1b
705
         SUB    src, src, 1
706
        li      v1, -EFAULT
707
        b       done
708
         sw     v1, (errptr)
709
 
710
s_exc:
711
        li      v0, -1 /* invalid checksum */
712
        li      v1, -EFAULT
713
        jr      ra
714
         sw     v1, (errptr)
715
        END(__csum_partial_copy_user)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.