OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [m32r/] [lib/] [checksum.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
3
 *              operating system.  INET is implemented using the  BSD Socket
4
 *              interface as the means of communication with the user level.
5
 *
6
 *              IP/TCP/UDP checksumming routines
7
 *
8
 * Authors:     Jorge Cwik, 
9
 *              Arnt Gulbrandsen, 
10
 *              Tom May, 
11
 *              Pentium Pro/II routines:
12
 *              Alexander Kjeldaas 
13
 *              Finn Arne Gangstad 
14
 *              Lots of code moved from tcp.c and ip.c; see those files
15
 *              for more names.
16
 *
17
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18
 *                           handling.
19
 *              Andi Kleen,  add zeroing on error
20
 *                   converted to pure assembler
21
 *              Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22
 *
23
 *              This program is free software; you can redistribute it and/or
24
 *              modify it under the terms of the GNU General Public License
25
 *              as published by the Free Software Foundation; either version
26
 *              2 of the License, or (at your option) any later version.
27
 */
28
 
29
#include 
30
#include 
31
#include 
32
 
33
/*
34
 * computes a partial checksum, e.g. for TCP/UDP fragments
35
 */
36
 
37
/*
38
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
39
 */
40
 
41
 
42
#ifdef CONFIG_ISA_DUAL_ISSUE
43
 
44
        /*
45
         * Experiments with Ethernet and SLIP connections show that buff
46
         * is aligned on either a 2-byte or 4-byte boundary.  We get at
47
         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48
         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49
         * alignment for the unrolled loop.
50
         */
51
 
52
        .text
53
ENTRY(csum_partial)
54
        ; Function args
55
        ;  r0: unsigned char *buff
56
        ;  r1: int len
57
        ;  r2: unsigned int sum
58
 
59
        push    r2                  ||  ldi     r2, #0
60
        and3    r7, r0, #1              ; Check alignment.
61
        beqz    r7, 1f                  ; Jump if alignment is ok.
62
        ; 1-byte mis aligned
63
        ldub    r4, @r0             ||  addi    r0, #1
64
        ; clear c-bit || Alignment uses up bytes.
65
        cmp     r0, r0              ||  addi    r1, #-1
66
        ldi     r3, #0              ||  addx    r2, r4
67
        addx    r2, r3
68
        .fillinsn
69
1:
70
        and3    r4, r0, #2              ; Check alignment.
71
        beqz    r4, 2f                  ; Jump if alignment is ok.
72
        ; clear c-bit || Alignment uses up two bytes.
73
        cmp     r0, r0              ||  addi    r1, #-2
74
        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
75
        bra     4f                  ||  addi    r1, #2
76
        .fillinsn                       ; len(r1) was < 2.  Deal with it.
77
1:
78
        ; 2-byte aligned
79
        lduh    r4, @r0             ||  ldi     r3, #0
80
        addx    r2, r4              ||  addi    r0, #2
81
        addx    r2, r3
82
        .fillinsn
83
2:
84
        ; 4-byte aligned
85
        cmp     r0, r0                  ; clear c-bit
86
        srl3    r6, r1, #5
87
        beqz    r6, 2f
88
        .fillinsn
89
 
90
1:      ld      r3, @r0+
91
        ld      r4, @r0+                                        ; +4
92
        ld      r5, @r0+                                        ; +8
93
        ld      r3, @r0+            ||  addx    r2, r3          ; +12
94
        ld      r4, @r0+            ||  addx    r2, r4          ; +16
95
        ld      r5, @r0+            ||  addx    r2, r5          ; +20
96
        ld      r3, @r0+            ||  addx    r2, r3          ; +24
97
        ld      r4, @r0+            ||  addx    r2, r4          ; +28
98
        addx    r2, r5              ||  addi    r6, #-1
99
        addx    r2, r3
100
        addx    r2, r4
101
        bnez    r6, 1b
102
 
103
        addx    r2, r6                  ; r6=0
104
        cmp     r0, r0                  ; This clears c-bit
105
        .fillinsn
106
2:      and3    r6, r1, #0x1c           ; withdraw len
107
        beqz    r6, 4f
108
        srli    r6, #2
109
        .fillinsn
110
 
111
3:      ld      r4, @r0+            ||  addi    r6, #-1
112
        addx    r2, r4
113
        bnez    r6, 3b
114
 
115
        addx    r2, r6                  ; r6=0
116
        cmp     r0, r0                  ; This clears c-bit
117
        .fillinsn
118
4:      and3    r1, r1, #3
119
        beqz    r1, 7f                  ; if len == 0 goto end
120
        and3    r6, r1, #2
121
        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)
122
        lduh    r4, @r0             ||  addi    r0, #2
123
        addi    r1, #-2             ||  slli    r4, #16
124
        addx    r2, r4
125
        beqz    r1, 6f
126
        .fillinsn
127
5:      ldub    r4, @r0             ||  ldi     r1, #0
128
#ifndef __LITTLE_ENDIAN__
129
        slli    r4, #8
130
#endif
131
        addx    r2, r4
132
        .fillinsn
133
6:      addx    r2, r1
134
        .fillinsn
135
7:
136
        and3    r0, r2, #0xffff
137
        srli    r2, #16
138
        add     r0, r2
139
        srl3    r2, r0, #16
140
        beqz    r2, 1f
141
        addi    r0, #1
142
        and3    r0, r0, #0xffff
143
        .fillinsn
144
1:
145
        beqz    r7, 1f                  ; swap the upper byte for the lower
146
        and3    r2, r0, #0xff
147
        srl3    r0, r0, #8
148
        slli    r2, #8
149
        or      r0, r2
150
        .fillinsn
151
1:
152
        pop     r2                  ||  cmp     r0, r0
153
        addx    r0, r2              ||  ldi     r2, #0
154
        addx    r0, r2
155
        jmp     r14
156
 
157
#else /* not CONFIG_ISA_DUAL_ISSUE */
158
 
159
        /*
160
         * Experiments with Ethernet and SLIP connections show that buff
161
         * is aligned on either a 2-byte or 4-byte boundary.  We get at
162
         * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
163
         * Fortunately, it is easy to convert 2-byte alignment to 4-byte
164
         * alignment for the unrolled loop.
165
         */
166
 
167
        .text
168
ENTRY(csum_partial)
169
        ; Function args
170
        ;  r0: unsigned char *buff
171
        ;  r1: int len
172
        ;  r2: unsigned int sum
173
 
174
        push    r2
175
        ldi     r2, #0
176
        and3    r7, r0, #1              ; Check alignment.
177
        beqz    r7, 1f                  ; Jump if alignment is ok.
178
        ; 1-byte mis aligned
179
        ldub    r4, @r0
180
        addi    r0, #1
181
        addi    r1, #-1                 ; Alignment uses up bytes.
182
        cmp     r0, r0                  ; clear c-bit
183
        ldi     r3, #0
184
        addx    r2, r4
185
        addx    r2, r3
186
        .fillinsn
187
1:
188
        and3    r4, r0, #2              ; Check alignment.
189
        beqz    r4, 2f                  ; Jump if alignment is ok.
190
        addi    r1, #-2                 ; Alignment uses up two bytes.
191
        cmp             r0, r0                  ; clear c-bit
192
        bgtz    r1, 1f                  ; Jump if we had at least two bytes.
193
        addi    r1, #2                  ; len(r1) was < 2.  Deal with it.
194
        bra     4f
195
        .fillinsn
196
1:
197
        ; 2-byte aligned
198
        lduh    r4, @r0
199
        addi    r0, #2
200
        ldi             r3, #0
201
        addx    r2, r4
202
        addx    r2, r3
203
        .fillinsn
204
2:
205
        ; 4-byte aligned
206
        cmp     r0, r0                  ; clear c-bit
207
        srl3    r6, r1, #5
208
        beqz    r6, 2f
209
        .fillinsn
210
 
211
1:      ld      r3, @r0+
212
        ld      r4, @r0+                ; +4
213
        ld      r5, @r0+                ; +8
214
        addx    r2, r3
215
        addx    r2, r4
216
        addx    r2, r5
217
        ld      r3, @r0+                ; +12
218
        ld      r4, @r0+                ; +16
219
        ld      r5, @r0+                ; +20
220
        addx    r2, r3
221
        addx    r2, r4
222
        addx    r2, r5
223
        ld      r3, @r0+                ; +24
224
        ld      r4, @r0+                ; +28
225
        addi    r6, #-1
226
        addx    r2, r3
227
        addx    r2, r4
228
        bnez    r6, 1b
229
        addx    r2, r6                  ; r6=0
230
        cmp     r0, r0                  ; This clears c-bit
231
        .fillinsn
232
 
233
2:      and3    r6, r1, #0x1c           ; withdraw len
234
        beqz    r6, 4f
235
        srli    r6, #2
236
        .fillinsn
237
 
238
3:      ld      r4, @r0+
239
        addi    r6, #-1
240
        addx    r2, r4
241
        bnez    r6, 3b
242
        addx    r2, r6                  ; r6=0
243
        cmp     r0, r0                  ; This clears c-bit
244
        .fillinsn
245
 
246
4:      and3    r1, r1, #3
247
        beqz    r1, 7f                  ; if len == 0 goto end
248
        and3    r6, r1, #2
249
        beqz    r6, 5f                  ; if len < 2  goto 5f(1byte)
250
 
251
        lduh    r4, @r0
252
        addi    r0, #2
253
        addi    r1, #-2
254
        slli    r4, #16
255
        addx    r2, r4
256
        beqz    r1, 6f
257
        .fillinsn
258
5:      ldub    r4, @r0
259
#ifndef __LITTLE_ENDIAN__
260
        slli    r4, #8
261
#endif
262
        addx    r2, r4
263
        .fillinsn
264
6:      ldi     r5, #0
265
        addx    r2, r5
266
        .fillinsn
267
7:
268
        and3    r0, r2, #0xffff
269
        srli    r2, #16
270
        add     r0, r2
271
        srl3    r2, r0, #16
272
        beqz    r2, 1f
273
        addi    r0, #1
274
        and3    r0, r0, #0xffff
275
        .fillinsn
276
1:
277
        beqz    r7, 1f
278
        mv      r2, r0
279
        srl3    r0, r2, #8
280
        and3    r2, r2, #0xff
281
        slli    r2, #8
282
        or      r0, r2
283
        .fillinsn
284
1:
285
        pop     r2
286
        cmp     r0, r0
287
        addx    r0, r2
288
        ldi     r2, #0
289
        addx    r0, r2
290
        jmp     r14
291
 
292
#endif /* not CONFIG_ISA_DUAL_ISSUE */
293
 
294
/*
295
unsigned int csum_partial_copy_generic (const char *src, char *dst,
296
                                  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
297
 */
298
 
299
/*
300
 * Copy from ds while checksumming, otherwise like csum_partial
301
 *
302
 * The macros SRC and DST specify the type of access for the instruction.
303
 * thus we can call a custom exception handler for all access types.
304
 *
305
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
306
 *        DST definitions? It's damn hard to trigger all cases.  I hope I got
307
 *        them all but there's no guarantee.
308
 */
309
 
310
ENTRY(csum_partial_copy_generic)
311
        nop
312
        nop
313
        nop
314
        nop
315
        jmp r14
316
        nop
317
        nop
318
        nop
319
 
320
        .end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.