OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [sh/] [lib/] [checksum.S] - Blame information for rev 1275

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/* $Id: checksum.S,v 1.1.1.1 2004-04-15 01:17:17 phoenix Exp $
2
 *
3
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
4
 *              operating system.  INET is implemented using the  BSD Socket
5
 *              interface as the means of communication with the user level.
6
 *
7
 *              IP/TCP/UDP checksumming routines
8
 *
9
 * Authors:     Jorge Cwik, 
10
 *              Arnt Gulbrandsen, 
11
 *              Tom May, 
12
 *              Pentium Pro/II routines:
13
 *              Alexander Kjeldaas 
14
 *              Finn Arne Gangstad 
15
 *              Lots of code moved from tcp.c and ip.c; see those files
16
 *              for more names.
17
 *
18
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19
 *                           handling.
20
 *              Andi Kleen,  add zeroing on error
21
 *                   converted to pure assembler
22
 *
23
 * SuperH version:  Copyright (C) 1999  Niibe Yutaka
24
 *
25
 *              This program is free software; you can redistribute it and/or
26
 *              modify it under the terms of the GNU General Public License
27
 *              as published by the Free Software Foundation; either version
28
 *              2 of the License, or (at your option) any later version.
29
 */
30
 
31
#include 
32
#include 
33
 
34
/*
35
 * computes a partial checksum, e.g. for TCP/UDP fragments
36
 */
37
 
38
/*
39
 * unsigned int csum_partial(const unsigned char *buf, int len,
40
 *                           unsigned int sum);
41
 */
42
 
43
.text
44
ENTRY(csum_partial)
45
          /*
46
           * Experiments with Ethernet and SLIP connections show that buff
47
           * is aligned on either a 2-byte or 4-byte boundary.  We get at
48
           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
49
           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
50
           * alignment for the unrolled loop.
51
           */
52
        mov     r5, r1
53
        mov     r4, r0
54
        tst     #2, r0          ! Check alignment.
55
        bt      2f              ! Jump if alignment is ok.
56
        !
57
        add     #-2, r5         ! Alignment uses up two bytes.
58
        cmp/pz  r5              !
59
        bt/s    1f              ! Jump if we had at least two bytes.
60
         clrt
61
        bra     6f
62
         add    #2, r5          ! r5 was < 2.  Deal with it.
63
1:
64
        mov     r5, r1          ! Save new len for later use.
65
        mov.w   @r4+, r0
66
        extu.w  r0, r0
67
        addc    r0, r6
68
        bf      2f
69
        add     #1, r6
70
2:
71
        mov     #-5, r0
72
        shld    r0, r5
73
        tst     r5, r5
74
        bt/s    4f              ! if it's =0, go to 4f
75
         clrt
76
        .align  2
77
3:
78
        mov.l   @r4+, r0
79
        mov.l   @r4+, r2
80
        mov.l   @r4+, r3
81
        addc    r0, r6
82
        mov.l   @r4+, r0
83
        addc    r2, r6
84
        mov.l   @r4+, r2
85
        addc    r3, r6
86
        mov.l   @r4+, r3
87
        addc    r0, r6
88
        mov.l   @r4+, r0
89
        addc    r2, r6
90
        mov.l   @r4+, r2
91
        addc    r3, r6
92
        addc    r0, r6
93
        addc    r2, r6
94
        movt    r0
95
        dt      r5
96
        bf/s    3b
97
         cmp/eq #1, r0
98
        ! here, we know r5==0
99
        addc    r5, r6                  ! add carry to r6
100
4:
101
        mov     r1, r0
102
        and     #0x1c, r0
103
        tst     r0, r0
104
        bt/s    6f
105
         mov    r0, r5
106
        shlr2   r5
107
        mov     #0, r2
108
5:
109
        addc    r2, r6
110
        mov.l   @r4+, r2
111
        movt    r0
112
        dt      r5
113
        bf/s    5b
114
         cmp/eq #1, r0
115
        addc    r2, r6
116
        addc    r5, r6          ! r5==0 here, so it means add carry-bit
117
6:
118
        mov     r1, r5
119
        mov     #3, r0
120
        and     r0, r5
121
        tst     r5, r5
122
        bt      9f              ! if it's =0 go to 9f
123
        mov     #2, r1
124
        cmp/hs  r1, r5
125
        bf      7f
126
        mov.w   @r4+, r0
127
        extu.w  r0, r0
128
        cmp/eq  r1, r5
129
        bt/s    8f
130
         clrt
131
        shll16  r0
132
        addc    r0, r6
133
7:
134
        mov.b   @r4+, r0
135
        extu.b  r0, r0
136
#ifndef __LITTLE_ENDIAN__
137
        shll8   r0
138
#endif
139
8:
140
        addc    r0, r6
141
        mov     #0, r0
142
        addc    r0, r6
143
9:
144
        rts
145
         mov    r6, r0
146
 
147
/*
148
unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
149
                                        int sum, int *src_err_ptr, int *dst_err_ptr)
150
 */
151
 
152
/*
153
 * Copy from ds while checksumming, otherwise like csum_partial
154
 *
155
 * The macros SRC and DST specify the type of access for the instruction.
156
 * thus we can call a custom exception handler for all access types.
157
 *
158
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
159
 *        DST definitions? It's damn hard to trigger all cases.  I hope I got
160
 *        them all but there's no guarantee.
161
 */
162
 
163
#define SRC(...)                        \
164
        9999: __VA_ARGS__ ;             \
165
        .section __ex_table, "a";       \
166
        .long 9999b, 6001f      ;       \
167
        .previous
168
 
169
#define DST(...)                        \
170
        9999: __VA_ARGS__ ;             \
171
        .section __ex_table, "a";       \
172
        .long 9999b, 6002f      ;       \
173
        .previous
174
 
175
!
176
! r4:   const char *SRC
177
! r5:   char *DST
178
! r6:   int LEN
179
! r7:   int SUM
180
!
181
! on stack:
182
! int *SRC_ERR_PTR
183
! int *DST_ERR_PTR
184
!
185
ENTRY(csum_partial_copy_generic)
186
        mov.l   r5,@-r15
187
        mov.l   r6,@-r15
188
 
189
        mov     #3,r0           ! Check src and dest are equally aligned
190
        mov     r4,r1
191
        and     r0,r1
192
        and     r5,r0
193
        cmp/eq  r1,r0
194
        bf      3f              ! Different alignments, use slow version
195
        tst     #1,r0           ! Check dest word aligned
196
        bf      3f              ! If not, do it the slow way
197
 
198
        mov     #2,r0
199
        tst     r0,r5           ! Check dest alignment.
200
        bt      2f              ! Jump if alignment is ok.
201
        add     #-2,r6          ! Alignment uses up two bytes.
202
        cmp/pz  r6              ! Jump if we had at least two bytes.
203
        bt/s    1f
204
         clrt
205
        bra     4f
206
         add    #2,r6           ! r6 was < 2.   Deal with it.
207
 
208
3:      ! Handle different src and dest alignments.
209
        ! This is not common, so simple byte by byte copy will do.
210
        mov     r6,r2
211
        shlr    r6
212
        tst     r6,r6
213
        bt      4f
214
        clrt
215
        .align  2
216
5:
217
SRC(    mov.b   @r4+,r1         )
218
SRC(    mov.b   @r4+,r0         )
219
        extu.b  r1,r1
220
DST(    mov.b   r1,@r5          )
221
DST(    mov.b   r0,@(1,r5)      )
222
        extu.b  r0,r0
223
        add     #2,r5
224
 
225
#ifdef  __LITTLE_ENDIAN__
226
        shll8   r0
227
#else
228
        shll8   r1
229
#endif
230
        or      r1,r0
231
 
232
        addc    r0,r7
233
        movt    r0
234
        dt      r6
235
        bf/s    5b
236
         cmp/eq #1,r0
237
        mov     #0,r0
238
        addc    r0, r7
239
 
240
        mov     r2, r0
241
        tst     #1, r0
242
        bt      7f
243
        bra     5f
244
         clrt
245
 
246
        ! src and dest equally aligned, but to a two byte boundary.
247
        ! Handle first two bytes as a special case
248
        .align  2
249
1:
250
SRC(    mov.w   @r4+,r0         )
251
DST(    mov.w   r0,@r5          )
252
        add     #2,r5
253
        extu.w  r0,r0
254
        addc    r0,r7
255
        mov     #0,r0
256
        addc    r0,r7
257
2:
258
        mov     r6,r2
259
        mov     #-5,r0
260
        shld    r0,r6
261
        tst     r6,r6
262
        bt/s    2f
263
         clrt
264
        .align  2
265
1:
266
SRC(    mov.l   @r4+,r0         )
267
SRC(    mov.l   @r4+,r1         )
268
        addc    r0,r7
269
DST(    mov.l   r0,@r5          )
270
DST(    mov.l   r1,@(4,r5)      )
271
        addc    r1,r7
272
 
273
SRC(    mov.l   @r4+,r0         )
274
SRC(    mov.l   @r4+,r1         )
275
        addc    r0,r7
276
DST(    mov.l   r0,@(8,r5)      )
277
DST(    mov.l   r1,@(12,r5)     )
278
        addc    r1,r7
279
 
280
SRC(    mov.l   @r4+,r0         )
281
SRC(    mov.l   @r4+,r1         )
282
        addc    r0,r7
283
DST(    mov.l   r0,@(16,r5)     )
284
DST(    mov.l   r1,@(20,r5)     )
285
        addc    r1,r7
286
 
287
SRC(    mov.l   @r4+,r0         )
288
SRC(    mov.l   @r4+,r1         )
289
        addc    r0,r7
290
DST(    mov.l   r0,@(24,r5)     )
291
DST(    mov.l   r1,@(28,r5)     )
292
        addc    r1,r7
293
        add     #32,r5
294
        movt    r0
295
        dt      r6
296
        bf/s    1b
297
         cmp/eq #1,r0
298
        mov     #0,r0
299
        addc    r0,r7
300
 
301
2:      mov     r2,r6
302
        mov     #0x1c,r0
303
        and     r0,r6
304
        cmp/pl  r6
305
        bf/s    4f
306
         clrt
307
        shlr2   r6
308
3:
309
SRC(    mov.l   @r4+,r0 )
310
        addc    r0,r7
311
DST(    mov.l   r0,@r5  )
312
        add     #4,r5
313
        movt    r0
314
        dt      r6
315
        bf/s    3b
316
         cmp/eq #1,r0
317
        mov     #0,r0
318
        addc    r0,r7
319
4:      mov     r2,r6
320
        mov     #3,r0
321
        and     r0,r6
322
        cmp/pl  r6
323
        bf      7f
324
        mov     #2,r1
325
        cmp/hs  r1,r6
326
        bf      5f
327
SRC(    mov.w   @r4+,r0 )
328
DST(    mov.w   r0,@r5  )
329
        extu.w  r0,r0
330
        add     #2,r5
331
        cmp/eq  r1,r6
332
        bt/s    6f
333
         clrt
334
        shll16  r0
335
        addc    r0,r7
336
5:
337
SRC(    mov.b   @r4+,r0 )
338
DST(    mov.b   r0,@r5  )
339
        extu.b  r0,r0
340
#ifndef __LITTLE_ENDIAN__
341
        shll8   r0
342
#endif
343
6:      addc    r0,r7
344
        mov     #0,r0
345
        addc    r0,r7
346
7:
347
5000:
348
 
349
# Exception handler:
350
.section .fixup, "ax"
351
 
352
6001:
353
        mov.l   @(8,r15),r0                     ! src_err_ptr
354
        mov     #-EFAULT,r1
355
        mov.l   r1,@r0
356
 
357
        ! zero the complete destination - computing the rest
358
        ! is too much work
359
        mov.l   @(4,r15),r5             ! dst
360
        mov.l   @r15,r6                 ! len
361
        mov     #0,r7
362
1:      mov.b   r7,@r5
363
        dt      r6
364
        bf/s    1b
365
         add    #1,r5
366
        mov.l   8000f,r0
367
        jmp     @r0
368
         nop
369
        .align  2
370
8000:   .long   5000b
371
 
372
6002:
373
        mov.l   @(12,r15),r0                    ! dst_err_ptr
374
        mov     #-EFAULT,r1
375
        mov.l   r1,@r0
376
        mov.l   8001f,r0
377
        jmp     @r0
378
         nop
379
        .align  2
380
8001:   .long   5000b
381
 
382
.previous
383
        add     #8,r15
384
        rts
385
         mov    r7,r0

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.