OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [sparc/] [lib/] [checksum.S] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/* checksum.S: Sparc optimized checksum code.
2
 *
3
 *  Copyright(C) 1995 Linus Torvalds
4
 *  Copyright(C) 1995 Miguel de Icaza
5
 *  Copyright(C) 1996 David S. Miller
6
 *  Copyright(C) 1997 Jakub Jelinek
7
 *
8
 * derived from:
9
 *      Linux/Alpha checksum c-code
10
 *      Linux/ix86 inline checksum assembly
11
 *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12
 *      David Mosberger-Tang for optimized reference c-code
13
 *      BSD4.4 portable checksum routine
14
 */
15
 
16
#include 
17
 
18
#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
19
        ldd     [buf + offset + 0x00], t0;                      \
20
        ldd     [buf + offset + 0x08], t2;                      \
21
        addxcc  t0, sum, sum;                                   \
22
        addxcc  t1, sum, sum;                                   \
23
        ldd     [buf + offset + 0x10], t4;                      \
24
        addxcc  t2, sum, sum;                                   \
25
        addxcc  t3, sum, sum;                                   \
26
        ldd     [buf + offset + 0x18], t0;                      \
27
        addxcc  t4, sum, sum;                                   \
28
        addxcc  t5, sum, sum;                                   \
29
        addxcc  t0, sum, sum;                                   \
30
        addxcc  t1, sum, sum;
31
 
32
#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)        \
33
        ldd     [buf - offset - 0x08], t0;                      \
34
        ldd     [buf - offset - 0x00], t2;                      \
35
        addxcc  t0, sum, sum;                                   \
36
        addxcc  t1, sum, sum;                                   \
37
        addxcc  t2, sum, sum;                                   \
38
        addxcc  t3, sum, sum;
39
 
40
        /* Do end cruft out of band to get better cache patterns. */
41
csum_partial_end_cruft:
42
        be      1f                              ! caller asks %o1 & 0x8
43
         andcc  %o1, 4, %g0                     ! nope, check for word remaining
44
        ldd     [%o0], %g2                      ! load two
45
        addcc   %g2, %o2, %o2                   ! add first word to sum
46
        addxcc  %g3, %o2, %o2                   ! add second word as well
47
        add     %o0, 8, %o0                     ! advance buf ptr
48
        addx    %g0, %o2, %o2                   ! add in final carry
49
        andcc   %o1, 4, %g0                     ! check again for word remaining
50
1:      be      1f                              ! nope, skip this code
51
         andcc  %o1, 3, %o1                     ! check for trailing bytes
52
        ld      [%o0], %g2                      ! load it
53
        addcc   %g2, %o2, %o2                   ! add to sum
54
        add     %o0, 4, %o0                     ! advance buf ptr
55
        addx    %g0, %o2, %o2                   ! add in final carry
56
        andcc   %o1, 3, %g0                     ! check again for trailing bytes
57
1:      be      1f                              ! no trailing bytes, return
58
         addcc  %o1, -1, %g0                    ! only one byte remains?
59
        bne     2f                              ! at least two bytes more
60
         subcc  %o1, 2, %o1                     ! only two bytes more?
61
        b       4f                              ! only one byte remains
62
         or     %g0, %g0, %o4                   ! clear fake hword value
63
2:      lduh    [%o0], %o4                      ! get hword
64
        be      6f                              ! jmp if only hword remains
65
         add    %o0, 2, %o0                     ! advance buf ptr either way
66
        sll     %o4, 16, %o4                    ! create upper hword
67
4:      ldub    [%o0], %o5                      ! get final byte
68
        sll     %o5, 8, %o5                     ! put into place
69
        or      %o5, %o4, %o4                   ! coalese with hword (if any)
70
6:      addcc   %o4, %o2, %o2                   ! add to sum
71
1:      retl                                    ! get outta here
72
         addx   %g0, %o2, %o0                   ! add final carry into retval
73
 
74
        /* Also do alignment out of band to get better cache patterns. */
75
csum_partial_fix_alignment:
76
        cmp     %o1, 6
77
        bl      cpte - 0x4
78
         andcc  %o0, 0x2, %g0
79
        be      1f
80
         andcc  %o0, 0x4, %g0
81
        lduh    [%o0 + 0x00], %g2
82
        sub     %o1, 2, %o1
83
        add     %o0, 2, %o0
84
        sll     %g2, 16, %g2
85
        addcc   %g2, %o2, %o2
86
        srl     %o2, 16, %g3
87
        addx    %g0, %g3, %g2
88
        sll     %o2, 16, %o2
89
        sll     %g2, 16, %g3
90
        srl     %o2, 16, %o2
91
        andcc   %o0, 0x4, %g0
92
        or      %g3, %o2, %o2
93
1:      be      cpa
94
         andcc  %o1, 0xffffff80, %o3
95
        ld      [%o0 + 0x00], %g2
96
        sub     %o1, 4, %o1
97
        addcc   %g2, %o2, %o2
98
        add     %o0, 4, %o0
99
        addx    %g0, %o2, %o2
100
        b       cpa
101
         andcc  %o1, 0xffffff80, %o3
102
 
103
        /* The common case is to get called with a nicely aligned
104
         * buffer of size 0x20.  Follow the code path for that case.
105
         */
106
        .globl  csum_partial
107
csum_partial:                   /* %o0=buf, %o1=len, %o2=sum */
108
        andcc   %o0, 0x7, %g0                           ! alignment problems?
109
        bne     csum_partial_fix_alignment              ! yep, handle it
110
         sethi  %hi(cpte - 8), %g7                      ! prepare table jmp ptr
111
        andcc   %o1, 0xffffff80, %o3                    ! num loop iterations
112
cpa:    be      3f                                      ! none to do
113
         andcc  %o1, 0x70, %g1                          ! clears carry flag too
114
5:      CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
115
        CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
116
        CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
117
        CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
118
        addx    %g0, %o2, %o2                           ! sink in final carry
119
        subcc   %o3, 128, %o3                           ! detract from loop iters
120
        bne     5b                                      ! more to do
121
         add    %o0, 128, %o0                           ! advance buf ptr
122
        andcc   %o1, 0x70, %g1                          ! clears carry flag too
123
3:      be      cpte                                    ! nope
124
         andcc  %o1, 0xf, %g0                           ! anything left at all?
125
        srl     %g1, 1, %o4                             ! compute offset
126
        sub     %g7, %g1, %g7                           ! adjust jmp ptr
127
        sub     %g7, %o4, %g7                           ! final jmp ptr adjust
128
        jmp     %g7 + %lo(cpte - 8)                     ! enter the table
129
         add    %o0, %g1, %o0                           ! advance buf ptr
130
cptbl:  CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
131
        CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
132
        CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
133
        CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
134
        CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
135
        CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
136
        CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
137
        addx    %g0, %o2, %o2                           ! fetch final carry
138
        andcc   %o1, 0xf, %g0                           ! anything left at all?
139
cpte:   bne     csum_partial_end_cruft                  ! yep, handle it
140
         andcc  %o1, 8, %g0                             ! check how much
141
cpout:  retl                                            ! get outta here
142
         mov    %o2, %o0                                ! return computed csum
143
 
144
        .globl __csum_partial_copy_start, __csum_partial_copy_end
145
__csum_partial_copy_start:
146
 
147
/* Work around cpp -rob */
148
#define ALLOC #alloc
149
#define EXECINSTR #execinstr
150
#define EX(x,y,a,b)                             \
151
98:     x,y;                                    \
152
        .section .fixup,ALLOC,EXECINSTR;        \
153
        .align  4;                              \
154
99:     ba 30f;                                 \
155
         a, b, %o3;                             \
156
        .section __ex_table,ALLOC;              \
157
        .align  4;                              \
158
        .word   98b, 99b;                       \
159
        .text;                                  \
160
        .align  4
161
 
162
#define EX2(x,y)                                \
163
98:     x,y;                                    \
164
        .section __ex_table,ALLOC;              \
165
        .align  4;                              \
166
        .word   98b, 30f;                       \
167
        .text;                                  \
168
        .align  4
169
 
170
#define EX3(x,y)                                \
171
98:     x,y;                                    \
172
        .section __ex_table,ALLOC;              \
173
        .align  4;                              \
174
        .word   98b, 96f;                       \
175
        .text;                                  \
176
        .align  4
177
 
178
#define EXT(start,end,handler)                  \
179
        .section __ex_table,ALLOC;              \
180
        .align  4;                              \
181
        .word   start, 0, end, handler;         \
182
        .text;                                  \
183
        .align  4
184
 
185
        /* This aligned version executes typically in 8.5 superscalar cycles, this
186
         * is the best I can do.  I say 8.5 because the final add will pair with
187
         * the next ldd in the main unrolled loop.  Thus the pipe is always full.
188
         * If you change these macros (including order of instructions),
189
         * please check the fixup code below as well.
190
         */
191
#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
192
        ldd     [src + off + 0x00], t0;                                                 \
193
        ldd     [src + off + 0x08], t2;                                                 \
194
        addxcc  t0, sum, sum;                                                           \
195
        ldd     [src + off + 0x10], t4;                                                 \
196
        addxcc  t1, sum, sum;                                                           \
197
        ldd     [src + off + 0x18], t6;                                                 \
198
        addxcc  t2, sum, sum;                                                           \
199
        std     t0, [dst + off + 0x00];                                                 \
200
        addxcc  t3, sum, sum;                                                           \
201
        std     t2, [dst + off + 0x08];                                                 \
202
        addxcc  t4, sum, sum;                                                           \
203
        std     t4, [dst + off + 0x10];                                                 \
204
        addxcc  t5, sum, sum;                                                           \
205
        std     t6, [dst + off + 0x18];                                                 \
206
        addxcc  t6, sum, sum;                                                           \
207
        addxcc  t7, sum, sum;
208
 
209
        /* 12 superscalar cycles seems to be the limit for this case,
210
         * because of this we thus do all the ldd's together to get
211
         * Viking MXCC into streaming mode.  Ho hum...
212
         */
213
#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
214
        ldd     [src + off + 0x00], t0;                                         \
215
        ldd     [src + off + 0x08], t2;                                         \
216
        ldd     [src + off + 0x10], t4;                                         \
217
        ldd     [src + off + 0x18], t6;                                         \
218
        st      t0, [dst + off + 0x00];                                         \
219
        addxcc  t0, sum, sum;                                                   \
220
        st      t1, [dst + off + 0x04];                                         \
221
        addxcc  t1, sum, sum;                                                   \
222
        st      t2, [dst + off + 0x08];                                         \
223
        addxcc  t2, sum, sum;                                                   \
224
        st      t3, [dst + off + 0x0c];                                         \
225
        addxcc  t3, sum, sum;                                                   \
226
        st      t4, [dst + off + 0x10];                                         \
227
        addxcc  t4, sum, sum;                                                   \
228
        st      t5, [dst + off + 0x14];                                         \
229
        addxcc  t5, sum, sum;                                                   \
230
        st      t6, [dst + off + 0x18];                                         \
231
        addxcc  t6, sum, sum;                                                   \
232
        st      t7, [dst + off + 0x1c];                                         \
233
        addxcc  t7, sum, sum;
234
 
235
        /* Yuck, 6 superscalar cycles... */
236
#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)  \
237
        ldd     [src - off - 0x08], t0;                         \
238
        ldd     [src - off - 0x00], t2;                         \
239
        addxcc  t0, sum, sum;                                   \
240
        st      t0, [dst - off - 0x08];                         \
241
        addxcc  t1, sum, sum;                                   \
242
        st      t1, [dst - off - 0x04];                         \
243
        addxcc  t2, sum, sum;                                   \
244
        st      t2, [dst - off - 0x00];                         \
245
        addxcc  t3, sum, sum;                                   \
246
        st      t3, [dst - off + 0x04];
247
 
248
        /* Handle the end cruft code out of band for better cache patterns. */
249
cc_end_cruft:
250
        be      1f
251
         andcc  %o3, 4, %g0
252
        EX(ldd  [%o0 + 0x00], %g2, and %o3, 0xf)
253
        add     %o1, 8, %o1
254
        addcc   %g2, %g7, %g7
255
        add     %o0, 8, %o0
256
        addxcc  %g3, %g7, %g7
257
        EX2(st  %g2, [%o1 - 0x08])
258
        addx    %g0, %g7, %g7
259
        andcc   %o3, 4, %g0
260
        EX2(st  %g3, [%o1 - 0x04])
261
1:      be      1f
262
         andcc  %o3, 3, %o3
263
        EX(ld   [%o0 + 0x00], %g2, add %o3, 4)
264
        add     %o1, 4, %o1
265
        addcc   %g2, %g7, %g7
266
        EX2(st  %g2, [%o1 - 0x04])
267
        addx    %g0, %g7, %g7
268
        andcc   %o3, 3, %g0
269
        add     %o0, 4, %o0
270
1:      be      1f
271
         addcc  %o3, -1, %g0
272
        bne     2f
273
         subcc  %o3, 2, %o3
274
        b       4f
275
         or     %g0, %g0, %o4
276
2:      EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
277
        add     %o0, 2, %o0
278
        EX2(sth %o4, [%o1 + 0x00])
279
        be      6f
280
         add    %o1, 2, %o1
281
        sll     %o4, 16, %o4
282
4:      EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
283
        EX2(stb %o5, [%o1 + 0x00])
284
        sll     %o5, 8, %o5
285
        or      %o5, %o4, %o4
286
6:      addcc   %o4, %g7, %g7
287
1:      retl
288
         addx   %g0, %g7, %o0
289
 
290
        /* Also, handle the alignment code out of band. */
291
cc_dword_align:
292
        cmp     %g1, 6
293
        bl,a    ccte
294
         andcc  %g1, 0xf, %o3
295
        andcc   %o0, 0x1, %g0
296
        bne     ccslow
297
         andcc  %o0, 0x2, %g0
298
        be      1f
299
         andcc  %o0, 0x4, %g0
300
        EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
301
        sub     %g1, 2, %g1
302
        EX2(sth %g4, [%o1 + 0x00])
303
        add     %o0, 2, %o0
304
        sll     %g4, 16, %g4
305
        addcc   %g4, %g7, %g7
306
        add     %o1, 2, %o1
307
        srl     %g7, 16, %g3
308
        addx    %g0, %g3, %g4
309
        sll     %g7, 16, %g7
310
        sll     %g4, 16, %g3
311
        srl     %g7, 16, %g7
312
        andcc   %o0, 0x4, %g0
313
        or      %g3, %g7, %g7
314
1:      be      3f
315
         andcc  %g1, 0xffffff80, %g0
316
        EX(ld   [%o0 + 0x00], %g4, add %g1, 0)
317
        sub     %g1, 4, %g1
318
        EX2(st  %g4, [%o1 + 0x00])
319
        add     %o0, 4, %o0
320
        addcc   %g4, %g7, %g7
321
        add     %o1, 4, %o1
322
        addx    %g0, %g7, %g7
323
        b       3f
324
         andcc  %g1, 0xffffff80, %g0
325
 
326
        /* Sun, you just can't beat me, you just can't.  Stop trying,
327
         * give up.  I'm serious, I am going to kick the living shit
328
         * out of you, game over, lights out.
329
         */
330
        .align  8
331
        .globl  __csum_partial_copy_sparc_generic
332
__csum_partial_copy_sparc_generic:
333
                                        /* %o0=src, %o1=dest, %g1=len, %g7=sum */
334
        xor     %o0, %o1, %o4           ! get changing bits
335
        andcc   %o4, 3, %g0             ! check for mismatched alignment
336
        bne     ccslow                  ! better this than unaligned/fixups
337
         andcc  %o0, 7, %g0             ! need to align things?
338
        bne     cc_dword_align          ! yes, we check for short lengths there
339
         andcc  %g1, 0xffffff80, %g0    ! can we use unrolled loop?
340
3:      be      3f                      ! nope, less than one loop remains
341
         andcc  %o1, 4, %g0             ! dest aligned on 4 or 8 byte boundary?
342
        be      ccdbl + 4               ! 8 byte aligned, kick ass
343
5:      CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
344
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
345
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
346
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
347
10:     EXT(5b, 10b, 20f)               ! note for exception handling
348
        sub     %g1, 128, %g1           ! detract from length
349
        addx    %g0, %g7, %g7           ! add in last carry bit
350
        andcc   %g1, 0xffffff80, %g0    ! more to csum?
351
        add     %o0, 128, %o0           ! advance src ptr
352
        bne     5b                      ! we did not go negative, continue looping
353
         add    %o1, 128, %o1           ! advance dest ptr
354
3:      andcc   %g1, 0x70, %o2          ! can use table?
355
ccmerge:be      ccte                    ! nope, go and check for end cruft
356
         andcc  %g1, 0xf, %o3           ! get low bits of length (clears carry btw)
357
        srl     %o2, 1, %o4             ! begin negative offset computation
358
        sethi   %hi(12f), %o5           ! set up table ptr end
359
        add     %o0, %o2, %o0           ! advance src ptr
360
        sub     %o5, %o4, %o5           ! continue table calculation
361
        sll     %o2, 1, %g2             ! constant multiplies are fun...
362
        sub     %o5, %g2, %o5           ! some more adjustments
363
        jmp     %o5 + %lo(12f)          ! jump into it, duff style, wheee...
364
         add    %o1, %o2, %o1           ! advance dest ptr (carry is clear btw)
365
cctbl:  CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
366
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
367
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
368
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
369
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
370
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
371
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
372
12:     EXT(cctbl, 12b, 22f)            ! note for exception table handling
373
        addx    %g0, %g7, %g7
374
        andcc   %o3, 0xf, %g0           ! check for low bits set
375
ccte:   bne     cc_end_cruft            ! something left, handle it out of band
376
         andcc  %o3, 8, %g0             ! begin checks for that code
377
        retl                            ! return
378
         mov    %g7, %o0                ! give em the computed checksum
379
ccdbl:  CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
380
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
381
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
382
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
383
11:     EXT(ccdbl, 11b, 21f)            ! note for exception table handling
384
        sub     %g1, 128, %g1           ! detract from length
385
        addx    %g0, %g7, %g7           ! add in last carry bit
386
        andcc   %g1, 0xffffff80, %g0    ! more to csum?
387
        add     %o0, 128, %o0           ! advance src ptr
388
        bne     ccdbl                   ! we did not go negative, continue looping
389
         add    %o1, 128, %o1           ! advance dest ptr
390
        b       ccmerge                 ! finish it off, above
391
         andcc  %g1, 0x70, %o2          ! can use table? (clears carry btw)
392
 
393
ccslow: cmp     %g1, 0
394
        mov     0, %g5
395
        bleu    4f
396
         andcc  %o0, 1, %o5
397
        be,a    1f
398
         srl    %g1, 1, %g4
399
        sub     %g1, 1, %g1
400
        EX(ldub [%o0], %g5, add %g1, 1)
401
        add     %o0, 1, %o0
402
        EX2(stb %g5, [%o1])
403
        srl     %g1, 1, %g4
404
        add     %o1, 1, %o1
405
1:      cmp     %g4, 0
406
        be,a    3f
407
         andcc  %g1, 1, %g0
408
        andcc   %o0, 2, %g0
409
        be,a    1f
410
         srl    %g4, 1, %g4
411
        EX(lduh [%o0], %o4, add %g1, 0)
412
        sub     %g1, 2, %g1
413
        srl     %o4, 8, %g2
414
        sub     %g4, 1, %g4
415
        EX2(stb %g2, [%o1])
416
        add     %o4, %g5, %g5
417
        EX2(stb %o4, [%o1 + 1])
418
        add     %o0, 2, %o0
419
        srl     %g4, 1, %g4
420
        add     %o1, 2, %o1
421
1:      cmp     %g4, 0
422
        be,a    2f
423
         andcc  %g1, 2, %g0
424
        EX3(ld  [%o0], %o4)
425
5:      srl     %o4, 24, %g2
426
        srl     %o4, 16, %g3
427
        EX2(stb %g2, [%o1])
428
        srl     %o4, 8, %g2
429
        EX2(stb %g3, [%o1 + 1])
430
        add     %o0, 4, %o0
431
        EX2(stb %g2, [%o1 + 2])
432
        addcc   %o4, %g5, %g5
433
        EX2(stb %o4, [%o1 + 3])
434
        addx    %g5, %g0, %g5   ! I am now to lazy to optimize this (question it
435
        add     %o1, 4, %o1     ! is worthy). Maybe some day - with the sll/srl
436
        subcc   %g4, 1, %g4     ! tricks
437
        bne,a   5b
438
         EX3(ld [%o0], %o4)
439
        sll     %g5, 16, %g2
440
        srl     %g5, 16, %g5
441
        srl     %g2, 16, %g2
442
        andcc   %g1, 2, %g0
443
        add     %g2, %g5, %g5
444
2:      be,a    3f
445
         andcc  %g1, 1, %g0
446
        EX(lduh [%o0], %o4, and %g1, 3)
447
        andcc   %g1, 1, %g0
448
        srl     %o4, 8, %g2
449
        add     %o0, 2, %o0
450
        EX2(stb %g2, [%o1])
451
        add     %g5, %o4, %g5
452
        EX2(stb %o4, [%o1 + 1])
453
        add     %o1, 2, %o1
454
3:      be,a    1f
455
         sll    %g5, 16, %o4
456
        EX(ldub [%o0], %g2, add %g0, 1)
457
        sll     %g2, 8, %o4
458
        EX2(stb %g2, [%o1])
459
        add     %g5, %o4, %g5
460
        sll     %g5, 16, %o4
461
1:      addcc   %o4, %g5, %g5
462
        srl     %g5, 16, %o4
463
        addx    %g0, %o4, %g5
464
        orcc    %o5, %g0, %g0
465
        be      4f
466
         srl    %g5, 8, %o4
467
        and     %g5, 0xff, %g2
468
        and     %o4, 0xff, %o4
469
        sll     %g2, 8, %g2
470
        or      %g2, %o4, %g5
471
4:      addcc   %g7, %g5, %g7
472
        retl
473
         addx   %g0, %g7, %o0
474
__csum_partial_copy_end:
475
 
476
/* We do these strange calculations for the csum_*_from_user case only, ie.
477
 * we only bother with faults on loads... */
478
 
479
/* o2 = ((g2%20)&3)*8
480
 * o3 = g1 - (g2/20)*32 - o2 */
481
20:
482
        cmp     %g2, 20
483
        blu,a   1f
484
         and    %g2, 3, %o2
485
        sub     %g1, 32, %g1
486
        b       20b
487
         sub    %g2, 20, %g2
488
1:
489
        sll     %o2, 3, %o2
490
        b       31f
491
         sub    %g1, %o2, %o3
492
 
493
/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
494
 * o3 = g1 - (g2/16)*32 - o2 */
495
21:
496
        andcc   %g2, 15, %o3
497
        srl     %g2, 4, %g2
498
        be,a    1f
499
         clr    %o2
500
        add     %o3, 1, %o3
501
        and     %o3, 14, %o3
502
        sll     %o3, 3, %o2
503
1:
504
        sll     %g2, 5, %g2
505
        sub     %g1, %g2, %o3
506
        b       31f
507
         sub    %o3, %o2, %o3
508
 
509
/* o0 += (g2/10)*16 - 0x70
510
 * 01 += (g2/10)*16 - 0x70
511
 * o2 = (g2 % 10) ? 8 : 0
512
 * o3 += 0x70 - (g2/10)*16 - o2 */
513
22:
514
        cmp     %g2, 10
515
        blu,a   1f
516
         sub    %o0, 0x70, %o0
517
        add     %o0, 16, %o0
518
        add     %o1, 16, %o1
519
        sub     %o3, 16, %o3
520
        b       22b
521
         sub    %g2, 10, %g2
522
1:
523
        sub     %o1, 0x70, %o1
524
        add     %o3, 0x70, %o3
525
        clr     %o2
526
        tst     %g2
527
        bne,a   1f
528
         mov    8, %o2
529
1:
530
        b       31f
531
         sub    %o3, %o2, %o3
532
96:
533
        and     %g1, 3, %g1
534
        sll     %g4, 2, %g4
535
        add     %g1, %g4, %o3
536
30:
537
/* %o1 is dst
538
 * %o3 is # bytes to zero out
539
 * %o4 is faulting address
540
 * %o5 is %pc where fault occurred */
541
        clr     %o2
542
31:
543
/* %o0 is src
544
 * %o1 is dst
545
 * %o2 is # of bytes to copy from src to dst
546
 * %o3 is # bytes to zero out
547
 * %o4 is faulting address
548
 * %o5 is %pc where fault occurred */
549
        save    %sp, -104, %sp
550
        mov     %i5, %o0
551
        mov     %i7, %o1
552
        mov     %i4, %o2
553
        call    lookup_fault
554
         mov    %g7, %i4
555
        cmp     %o0, 2
556
        bne     1f
557
         add    %g0, -EFAULT, %i5
558
        tst     %i2
559
        be      2f
560
         mov    %i0, %o1
561
        mov     %i1, %o0
562
5:
563
        call    __memcpy
564
         mov    %i2, %o2
565
        tst     %o0
566
        bne,a   2f
567
         add    %i3, %i2, %i3
568
        add     %i1, %i2, %i1
569
2:
570
        mov     %i1, %o0
571
6:
572
        call    __bzero
573
         mov    %i3, %o1
574
1:
575
        ld      [%sp + 168], %o2                ! struct_ptr of parent
576
        st      %i5, [%o2]
577
        ret
578
         restore
579
 
580
        .section __ex_table,#alloc
581
        .align 4
582
        .word 5b,2
583
        .word 6b,2

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.