URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [include/] [asm-sparc/] [checksum.h] - Blame information for rev 1782

Details | Compare with Previous | View Log


/* $Id: checksum.h,v 1.1 2005-12-20 11:32:11 jcastillo Exp $ */
#ifndef __SPARC_CHECKSUM_H
#define __SPARC_CHECKSUM_H
 
/*  checksum.h:  IP/UDP/TCP checksum routines on the Sparc.
 *
 *  Copyright(C) 1995 Linus Torvalds
 *  Copyright(C) 1995 Miguel de Icaza
 *  Copyright(C) 1996 David S. Miller
 *
 * derived from:
 *      Alpha checksum c-code
 *      ix86 inline assembly
 */
 
/*
 * computes the checksum of the TCP/UDP pseudo-header
 * returns a 16-bit checksum, already complemented
 */
 
extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
                                               unsigned long daddr,
                                               unsigned short len,
                                               unsigned short proto,
                                               unsigned int sum)
{
        __asm__ __volatile__("
                addcc   %0, %1, %0
                addxcc  %0, %4, %0
                addxcc  %0, %5, %0
                addx    %0, %%g0, %0
 
                ! We need the carry from the addition of 16-bit
                ! significant addition, so we zap out the low bits
                ! in one half, zap out the high bits in another,
                ! shift them both up to the top 16-bits of a word
                ! and do the carry producing addition, finally
                ! shift the result back down to the low 16-bits.
 
                ! Actually, we can further optimize away two shifts
                ! because we know the low bits of the original
                ! value will be added to zero-only bits so cannot
                ! affect the addition result nor the final carry
                ! bit.
 
                sll     %0, 16, %1
                addcc   %0, %1, %0              ! add and set carry, neat eh?
                srl     %0, 16, %0              ! shift back down the result
                addx    %0, %%g0, %0            ! get remaining carry bit
                xnor    %%g0, %0, %0            ! negate, sparc is cool
                "
                : "=&r" (sum), "=&r" (saddr)
                : "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
                return ((unsigned short) sum);
}
 
extern inline unsigned short from32to16(unsigned long x)
{
        __asm__ __volatile__("
                addcc   %0, %1, %0
                srl     %0, 16, %0
                addx    %%g0, %0, %0
                "
                : "=r" (x)
                : "r" (x << 16), "0" (x));
        return x;
}
 
extern inline unsigned long do_csum(unsigned char * buff, int len)
{
        int odd, count;
        unsigned long result = 0;
 
        if (len <= 0)
                goto out;
        odd = 1 & (unsigned long) buff;
        if (odd) {
                result = *buff;
                len--;
                buff++;
        }
        count = len >> 1;               /* nr of 16-bit words.. */
        if (count) {
                if (2 & (unsigned long) buff) {
                        result += *(unsigned short *) buff;
                        count--;
                        len -= 2;
                        buff += 2;
                }
                count >>= 1;            /* nr of 32-bit words.. */
                if (count) {
                        unsigned long carry = 0;
                        do {
                                unsigned long w = *(unsigned long *) buff;
                                count--;
                                buff += 4;
                                result += carry;
                                result += w;
                                carry = (w > result);
                        } while (count);
                        result += carry;
                        result = (result & 0xffff) + (result >> 16);
                }
                if (len & 2) {
                        result += *(unsigned short *) buff;
                        buff += 2;
                }
        }
        if (len & 1)
                result += (*buff << 8);
        result = from32to16(result);
        if (odd)
                result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
out:
        return result;
}
 
/* ihl is always 5 or greater, almost always is 5, iph is always word
 * aligned but can fail to be dword aligned very often.
 */
extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
{
        unsigned int sum;
 
        __asm__ __volatile__("
                ld      [%1], %0
                sub     %2, 4, %2
                ld      [%1 + 0x4], %%g1
                ld      [%1 + 0x8], %%g2
                addcc   %%g1, %0, %0
                addxcc  %%g2, %0, %0
                ld      [%1 + 0xc], %%g1
                ld      [%1 + 0x10], %%g2
                addxcc  %%g1, %0, %0
                addxcc  %0, %%g0, %0
1:
                addcc   %%g2, %0, %0
                add     %1, 0x4, %1
                addxcc  %0, %%g0, %0
                subcc   %2, 0x1, %2
                bne,a   1b
                 ld     [%1 + 0x10], %%g2
 
                sll     %0, 16, %2
                addcc   %0, %2, %2
                srl     %2, 16, %0
                addx    %0, %%g0, %2
                xnor    %%g0, %2, %0
2:
                "
                : "=&r" (sum), "=&r" (iph), "=&r" (ihl)
                : "1" (iph), "2" (ihl)
                : "g1", "g2");
        return sum;
}
 
/*
 * computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit)
 *
 * returns a 32-bit number suitable for feeding into itself
 * or csum_tcpudp_magic
 *
 * this function must be called with even lengths, except
 * for the last fragment, which may be odd
 *
 * it's best to have buff aligned on a 32-bit boundary
 */
extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
{
        __asm__ __volatile__("
                mov     0, %%g5                 ! g5 = result
                cmp     %1, 0
                bgu,a   1f
                 andcc  %0, 1, %%g7             ! g7 = odd
 
                b,a     9f
 
1:
                be,a    1f
                 srl    %1, 1, %%g6             ! g6 = count = (len >> 1)
 
                sub     %1, 1, %1       ! if(odd) { result = *buff;
                ldub    [%0], %%g5      !           len--;
                add     %0, 1, %0       !           buff++ }
 
                srl     %1, 1, %%g6
1:
                cmp     %%g6, 0         ! if (count) {
                be,a    8f
                 andcc  %1, 1, %%g0
 
                andcc   %0, 2, %%g0     ! if (2 & buff) {
                be,a    1f
                 srl    %%g6, 1, %%g6
 
                sub     %1, 2, %1       !       result += *(unsigned short *) buff;
                lduh    [%0], %%g1      !       count--;
                sub     %%g6, 1, %%g6   !       len -= 2;
                add     %%g1, %%g5, %%g5!       buff += 2;
                add     %0, 2, %0       ! }
 
                srl     %%g6, 1, %%g6
1:
                cmp     %%g6, 0         ! if (count) {
                be,a    2f
                 andcc  %1, 2, %%g0
 
                ld      [%0], %%g1              ! csum aligned 32bit words
1:
                add     %0, 4, %0
                addcc   %%g1, %%g5, %%g5
                addx    %%g5, %%g0, %%g5
                subcc   %%g6, 1, %%g6
                bne,a   1b
                 ld     [%0], %%g1
 
                sethi   %%hi(0xffff), %%g3
                srl     %%g5, 16, %%g2
                or      %%g3, %%lo(0xffff), %%g3
                and     %%g5, %%g3, %%g5
                add     %%g2, %%g5, %%g5! }
 
                andcc   %1, 2, %%g0
2:
                be,a    8f              ! if (len & 2) {
                 andcc  %1, 1, %%g0
 
                lduh    [%0], %%g1      !       result += *(unsigned short *) buff;
                add     %%g5, %%g1, %%g5!       buff += 2;
                add     %0, 2, %0       ! }
 
 
                andcc   %1, 1, %%g0
8:
                be,a    1f              ! if (len & 1) {
                 sll    %%g5, 16, %%g1
 
                ldub    [%0], %%g1
                sll     %%g1, 8, %%g1   !       result += (*buff << 8);
                add     %%g5, %%g1, %%g5! }
 
                sll     %%g5, 16, %%g1
1:
                addcc   %%g1, %%g5, %%g5! result = from32to16(result);
                srl     %%g5, 16, %%g1
                addx    %%g0, %%g1, %%g5
 
                orcc    %%g7, %%g0, %%g0! if(odd) {
                be      9f
                 srl    %%g5, 8, %%g1
 
                and     %%g5, 0xff, %%g2!       result = ((result >> 8) & 0xff) |
                and     %%g1, 0xff, %%g1!               ((result & 0xff) << 8);
                sll     %%g2, 8, %%g2
                or      %%g2, %%g1, %%g5! }
9:
                addcc   %2, %%g5, %2    ! add result and sum with carry
                addx    %%g0, %2, %2
        " :
        "=&r" (buff), "=&r" (len), "=&r" (sum) :
        "0" (buff), "1" (len), "2" (sum) :
        "g1", "g2", "g3", "g5", "g6", "g7");
 
        return sum;
}
 
/*
 * the same as csum_partial, but copies from fs:src while it
 * checksums
 *
 * here even more important to align src and dst on a 32-bit (or even
 * better 64-bit) boundary
 */
extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum)
{
        /*
         * The whole idea is to do the copy and the checksum at
         * the same time, but we do it the easy way now.
         *
         * At least csum on the source, not destination, for cache
         * reasons..
         */
        sum = csum_partial(src, len, sum);
        memcpy(dst, src, len);
        return sum;
}
 
/*
 * this routine is used for miscellaneous IP-like checksums, mainly
 * in icmp.c
 */
extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
{
        return ~from32to16(do_csum(buff,len));
}
 
#define csum_partial_copy_fromuser(s, d, l, w)  \
                       csum_partial_copy((char *) (s), (d), (l), (w))
 
/*
 *      Fold a partial checksum without adding pseudo headers
 */
extern inline unsigned int csum_fold(unsigned int sum)
{
        __asm__ __volatile__("
                addcc   %0, %1, %0
                srl     %0, 16, %0
                addx    %%g0, %0, %0
                xnor    %%g0, %0, %0
                "
                : "=r" (sum)
                : "r" (sum << 16), "0" (sum));
        return sum;
}
 
#endif /* !(__SPARC_CHECKSUM_H) */

Browse

Tools

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [include/] [asm-sparc/] [checksum.h] - Blame information for rev 1782

Line No.	Rev	Author	Line
1	1633	jcastillo	`/* $Id: checksum.h,v 1.1 2005-12-20 11:32:11 jcastillo Exp $ */`
2			`#ifndef __SPARC_CHECKSUM_H`
3			`#define __SPARC_CHECKSUM_H`
4
5			`/* checksum.h: IP/UDP/TCP checksum routines on the Sparc.`
6			`*`
7			`* Copyright(C) 1995 Linus Torvalds`
8			`* Copyright(C) 1995 Miguel de Icaza`
9			`* Copyright(C) 1996 David S. Miller`
10			`*`
11			`* derived from:`
12			`* Alpha checksum c-code`
13			`* ix86 inline assembly`
14			`*/`
15
16			`/*`
17			`* computes the checksum of the TCP/UDP pseudo-header`
18			`* returns a 16-bit checksum, already complemented`
19			`*/`
20
21			`extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,`
22			`unsigned long daddr,`
23			`unsigned short len,`
24			`unsigned short proto,`
25			`unsigned int sum)`
26			`{`
27			`__asm__ __volatile__("`
28			`addcc %0, %1, %0`
29			`addxcc %0, %4, %0`
30			`addxcc %0, %5, %0`
31			`addx %0, %%g0, %0`
32
33			`! We need the carry from the addition of 16-bit`
34			`! significant addition, so we zap out the low bits`
35			`! in one half, zap out the high bits in another,`
36			`! shift them both up to the top 16-bits of a word`
37			`! and do the carry producing addition, finally`
38			`! shift the result back down to the low 16-bits.`
39
40			`! Actually, we can further optimize away two shifts`
41			`! because we know the low bits of the original`
42			`! value will be added to zero-only bits so cannot`
43			`! affect the addition result nor the final carry`
44			`! bit.`
45
46			`sll %0, 16, %1`
47			`addcc %0, %1, %0 ! add and set carry, neat eh?`
48			`srl %0, 16, %0 ! shift back down the result`
49			`addx %0, %%g0, %0 ! get remaining carry bit`
50			`xnor %%g0, %0, %0 ! negate, sparc is cool`
51			`"`
52			`: "=&r" (sum), "=&r" (saddr)`
53			`: "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));`
54			`return ((unsigned short) sum);`
55			`}`
56
57			`extern inline unsigned short from32to16(unsigned long x)`
58			`{`
59			`__asm__ __volatile__("`
60			`addcc %0, %1, %0`
61			`srl %0, 16, %0`
62			`addx %%g0, %0, %0`
63			`"`
64			`: "=r" (x)`
65			`: "r" (x << 16), "0" (x));`
66			`return x;`
67			`}`
68
69			`extern inline unsigned long do_csum(unsigned char * buff, int len)`
70			`{`
71			`int odd, count;`
72			`unsigned long result = 0;`
73
74			`if (len <= 0)`
75			`goto out;`
76			`odd = 1 & (unsigned long) buff;`
77			`if (odd) {`
78			`result = *buff;`
79			`len--;`
80			`buff++;`
81			`}`
82			`count = len >> 1; /* nr of 16-bit words.. */`
83			`if (count) {`
84			`if (2 & (unsigned long) buff) {`
85			`result += (unsigned short ) buff;`
86			`count--;`
87			`len -= 2;`
88			`buff += 2;`
89			`}`
90			`count >>= 1; /* nr of 32-bit words.. */`
91			`if (count) {`
92			`unsigned long carry = 0;`
93			`do {`
94			`unsigned long w = (unsigned long ) buff;`
95			`count--;`
96			`buff += 4;`
97			`result += carry;`
98			`result += w;`
99			`carry = (w > result);`
100			`} while (count);`
101			`result += carry;`
102			`result = (result & 0xffff) + (result >> 16);`
103			`}`
104			`if (len & 2) {`
105			`result += (unsigned short ) buff;`
106			`buff += 2;`
107			`}`
108			`}`
109			`if (len & 1)`
110			`result += (*buff << 8);`
111			`result = from32to16(result);`
112			`if (odd)`
113			`result = ((result >> 8) & 0xff) \| ((result & 0xff) << 8);`
114			`out:`
115			`return result;`
116			`}`
117
118			`/* ihl is always 5 or greater, almost always is 5, iph is always word`
119			`* aligned but can fail to be dword aligned very often.`
120			`*/`
121			`extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)`
122			`{`
123			`unsigned int sum;`
124
125			`__asm__ __volatile__("`
126			`ld [%1], %0`
127			`sub %2, 4, %2`
128			`ld [%1 + 0x4], %%g1`
129			`ld [%1 + 0x8], %%g2`
130			`addcc %%g1, %0, %0`
131			`addxcc %%g2, %0, %0`
132			`ld [%1 + 0xc], %%g1`
133			`ld [%1 + 0x10], %%g2`
134			`addxcc %%g1, %0, %0`
135			`addxcc %0, %%g0, %0`
136			`1:`
137			`addcc %%g2, %0, %0`
138			`add %1, 0x4, %1`
139			`addxcc %0, %%g0, %0`
140			`subcc %2, 0x1, %2`
141			`bne,a 1b`
142			`ld [%1 + 0x10], %%g2`
143
144			`sll %0, 16, %2`
145			`addcc %0, %2, %2`
146			`srl %2, 16, %0`
147			`addx %0, %%g0, %2`
148			`xnor %%g0, %2, %0`
149			`2:`
150			`"`
151			`: "=&r" (sum), "=&r" (iph), "=&r" (ihl)`
152			`: "1" (iph), "2" (ihl)`
153			`: "g1", "g2");`
154			`return sum;`
155			`}`
156
157			`/*`
158			`* computes the checksum of a memory block at buff, length len,`
159			`* and adds in "sum" (32-bit)`
160			`*`
161			`* returns a 32-bit number suitable for feeding into itself`
162			`* or csum_tcpudp_magic`
163			`*`
164			`* this function must be called with even lengths, except`
165			`* for the last fragment, which may be odd`
166			`*`
167			`* it's best to have buff aligned on a 32-bit boundary`
168			`*/`
169			`extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)`
170			`{`
171			`__asm__ __volatile__("`
172			`mov 0, %%g5 ! g5 = result`
173			`cmp %1, 0`
174			`bgu,a 1f`
175			`andcc %0, 1, %%g7 ! g7 = odd`
176
177			`b,a 9f`
178
179			`1:`
180			`be,a 1f`
181			`srl %1, 1, %%g6 ! g6 = count = (len >> 1)`
182
183			`sub %1, 1, %1 ! if(odd) { result = *buff;`
184			`ldub [%0], %%g5 ! len--;`
185			`add %0, 1, %0 ! buff++ }`
186
187			`srl %1, 1, %%g6`
188			`1:`
189			`cmp %%g6, 0 ! if (count) {`
190			`be,a 8f`
191			`andcc %1, 1, %%g0`
192
193			`andcc %0, 2, %%g0 ! if (2 & buff) {`
194			`be,a 1f`
195			`srl %%g6, 1, %%g6`
196
197			`sub %1, 2, %1 ! result += (unsigned short ) buff;`
198			`lduh [%0], %%g1 ! count--;`
199			`sub %%g6, 1, %%g6 ! len -= 2;`
200			`add %%g1, %%g5, %%g5! buff += 2;`
201			`add %0, 2, %0 ! }`
202
203			`srl %%g6, 1, %%g6`
204			`1:`
205			`cmp %%g6, 0 ! if (count) {`
206			`be,a 2f`
207			`andcc %1, 2, %%g0`
208
209			`ld [%0], %%g1 ! csum aligned 32bit words`
210			`1:`
211			`add %0, 4, %0`
212			`addcc %%g1, %%g5, %%g5`
213			`addx %%g5, %%g0, %%g5`
214			`subcc %%g6, 1, %%g6`
215			`bne,a 1b`
216			`ld [%0], %%g1`
217
218			`sethi %%hi(0xffff), %%g3`
219			`srl %%g5, 16, %%g2`
220			`or %%g3, %%lo(0xffff), %%g3`
221			`and %%g5, %%g3, %%g5`
222			`add %%g2, %%g5, %%g5! }`
223
224			`andcc %1, 2, %%g0`
225			`2:`
226			`be,a 8f ! if (len & 2) {`
227			`andcc %1, 1, %%g0`
228
229			`lduh [%0], %%g1 ! result += (unsigned short ) buff;`
230			`add %%g5, %%g1, %%g5! buff += 2;`
231			`add %0, 2, %0 ! }`
232
233
234			`andcc %1, 1, %%g0`
235			`8:`
236			`be,a 1f ! if (len & 1) {`
237			`sll %%g5, 16, %%g1`
238
239			`ldub [%0], %%g1`
240			`sll %%g1, 8, %%g1 ! result += (*buff << 8);`
241			`add %%g5, %%g1, %%g5! }`
242
243			`sll %%g5, 16, %%g1`
244			`1:`
245			`addcc %%g1, %%g5, %%g5! result = from32to16(result);`
246			`srl %%g5, 16, %%g1`
247			`addx %%g0, %%g1, %%g5`
248
249			`orcc %%g7, %%g0, %%g0! if(odd) {`
250			`be 9f`
251			`srl %%g5, 8, %%g1`
252
253			`and %%g5, 0xff, %%g2! result = ((result >> 8) & 0xff) \|`
254			`and %%g1, 0xff, %%g1! ((result & 0xff) << 8);`
255			`sll %%g2, 8, %%g2`
256			`or %%g2, %%g1, %%g5! }`
257			`9:`
258			`addcc %2, %%g5, %2 ! add result and sum with carry`
259			`addx %%g0, %2, %2`
260			`" :`
261			`"=&r" (buff), "=&r" (len), "=&r" (sum) :`
262			`"0" (buff), "1" (len), "2" (sum) :`
263			`"g1", "g2", "g3", "g5", "g6", "g7");`
264
265			`return sum;`
266			`}`
267
268			`/*`
269			`* the same as csum_partial, but copies from fs:src while it`
270			`* checksums`
271			`*`
272			`* here even more important to align src and dst on a 32-bit (or even`
273			`* better 64-bit) boundary`
274			`*/`
275			`extern inline unsigned int csum_partial_copy(char src, char dst, int len, int sum)`
276			`{`
277			`/*`
278			`* The whole idea is to do the copy and the checksum at`
279			`* the same time, but we do it the easy way now.`
280			`*`
281			`* At least csum on the source, not destination, for cache`
282			`* reasons..`
283			`*/`
284			`sum = csum_partial(src, len, sum);`
285			`memcpy(dst, src, len);`
286			`return sum;`
287			`}`
288
289			`/*`
290			`* this routine is used for miscellaneous IP-like checksums, mainly`
291			`* in icmp.c`
292			`*/`
293			`extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)`
294			`{`
295			`return ~from32to16(do_csum(buff,len));`
296			`}`
297
298			`#define csum_partial_copy_fromuser(s, d, l, w) \`
299			`csum_partial_copy((char *) (s), (d), (l), (w))`
300
301			`/*`
302			`* Fold a partial checksum without adding pseudo headers`
303			`*/`
304			`extern inline unsigned int csum_fold(unsigned int sum)`
305			`{`
306			`__asm__ __volatile__("`
307			`addcc %0, %1, %0`
308			`srl %0, 16, %0`
309			`addx %%g0, %0, %0`
310			`xnor %%g0, %0, %0`
311			`"`
312			`: "=r" (sum)`
313			`: "r" (sum << 16), "0" (sum));`
314			`return sum;`
315			`}`
316
317			`#endif /* !(__SPARC_CHECKSUM_H) */`