/* $Id: checksum.h,v 1.1 2005-12-20 11:32:11 jcastillo Exp $ */
|
/* $Id: checksum.h,v 1.1 2005-12-20 11:32:11 jcastillo Exp $ */
|
#ifndef __SPARC_CHECKSUM_H
|
#ifndef __SPARC_CHECKSUM_H
|
#define __SPARC_CHECKSUM_H
|
#define __SPARC_CHECKSUM_H
|
|
|
/* checksum.h: IP/UDP/TCP checksum routines on the Sparc.
|
/* checksum.h: IP/UDP/TCP checksum routines on the Sparc.
|
*
|
*
|
* Copyright(C) 1995 Linus Torvalds
|
* Copyright(C) 1995 Linus Torvalds
|
* Copyright(C) 1995 Miguel de Icaza
|
* Copyright(C) 1995 Miguel de Icaza
|
* Copyright(C) 1996 David S. Miller
|
* Copyright(C) 1996 David S. Miller
|
*
|
*
|
* derived from:
|
* derived from:
|
* Alpha checksum c-code
|
* Alpha checksum c-code
|
* ix86 inline assembly
|
* ix86 inline assembly
|
*/
|
*/
|
|
|
/*
|
/*
|
* computes the checksum of the TCP/UDP pseudo-header
|
* computes the checksum of the TCP/UDP pseudo-header
|
* returns a 16-bit checksum, already complemented
|
* returns a 16-bit checksum, already complemented
|
*/
|
*/
|
|
|
extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
|
extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
|
unsigned long daddr,
|
unsigned long daddr,
|
unsigned short len,
|
unsigned short len,
|
unsigned short proto,
|
unsigned short proto,
|
unsigned int sum)
|
unsigned int sum)
|
{
|
{
|
__asm__ __volatile__("
|
__asm__ __volatile__("
|
addcc %0, %1, %0
|
addcc %0, %1, %0
|
addxcc %0, %4, %0
|
addxcc %0, %4, %0
|
addxcc %0, %5, %0
|
addxcc %0, %5, %0
|
addx %0, %%g0, %0
|
addx %0, %%g0, %0
|
|
|
! We need the carry from the addition of 16-bit
|
! We need the carry from the addition of 16-bit
|
! significant addition, so we zap out the low bits
|
! significant addition, so we zap out the low bits
|
! in one half, zap out the high bits in another,
|
! in one half, zap out the high bits in another,
|
! shift them both up to the top 16-bits of a word
|
! shift them both up to the top 16-bits of a word
|
! and do the carry producing addition, finally
|
! and do the carry producing addition, finally
|
! shift the result back down to the low 16-bits.
|
! shift the result back down to the low 16-bits.
|
|
|
! Actually, we can further optimize away two shifts
|
! Actually, we can further optimize away two shifts
|
! because we know the low bits of the original
|
! because we know the low bits of the original
|
! value will be added to zero-only bits so cannot
|
! value will be added to zero-only bits so cannot
|
! affect the addition result nor the final carry
|
! affect the addition result nor the final carry
|
! bit.
|
! bit.
|
|
|
sll %0, 16, %1
|
sll %0, 16, %1
|
addcc %0, %1, %0 ! add and set carry, neat eh?
|
addcc %0, %1, %0 ! add and set carry, neat eh?
|
srl %0, 16, %0 ! shift back down the result
|
srl %0, 16, %0 ! shift back down the result
|
addx %0, %%g0, %0 ! get remaining carry bit
|
addx %0, %%g0, %0 ! get remaining carry bit
|
xnor %%g0, %0, %0 ! negate, sparc is cool
|
xnor %%g0, %0, %0 ! negate, sparc is cool
|
"
|
"
|
: "=&r" (sum), "=&r" (saddr)
|
: "=&r" (sum), "=&r" (saddr)
|
: "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
|
: "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
|
return ((unsigned short) sum);
|
return ((unsigned short) sum);
|
}
|
}
|
|
|
extern inline unsigned short from32to16(unsigned long x)
|
extern inline unsigned short from32to16(unsigned long x)
|
{
|
{
|
__asm__ __volatile__("
|
__asm__ __volatile__("
|
addcc %0, %1, %0
|
addcc %0, %1, %0
|
srl %0, 16, %0
|
srl %0, 16, %0
|
addx %%g0, %0, %0
|
addx %%g0, %0, %0
|
"
|
"
|
: "=r" (x)
|
: "=r" (x)
|
: "r" (x << 16), "0" (x));
|
: "r" (x << 16), "0" (x));
|
return x;
|
return x;
|
}
|
}
|
|
|
extern inline unsigned long do_csum(unsigned char * buff, int len)
|
extern inline unsigned long do_csum(unsigned char * buff, int len)
|
{
|
{
|
int odd, count;
|
int odd, count;
|
unsigned long result = 0;
|
unsigned long result = 0;
|
|
|
if (len <= 0)
|
if (len <= 0)
|
goto out;
|
goto out;
|
odd = 1 & (unsigned long) buff;
|
odd = 1 & (unsigned long) buff;
|
if (odd) {
|
if (odd) {
|
result = *buff;
|
result = *buff;
|
len--;
|
len--;
|
buff++;
|
buff++;
|
}
|
}
|
count = len >> 1; /* nr of 16-bit words.. */
|
count = len >> 1; /* nr of 16-bit words.. */
|
if (count) {
|
if (count) {
|
if (2 & (unsigned long) buff) {
|
if (2 & (unsigned long) buff) {
|
result += *(unsigned short *) buff;
|
result += *(unsigned short *) buff;
|
count--;
|
count--;
|
len -= 2;
|
len -= 2;
|
buff += 2;
|
buff += 2;
|
}
|
}
|
count >>= 1; /* nr of 32-bit words.. */
|
count >>= 1; /* nr of 32-bit words.. */
|
if (count) {
|
if (count) {
|
unsigned long carry = 0;
|
unsigned long carry = 0;
|
do {
|
do {
|
unsigned long w = *(unsigned long *) buff;
|
unsigned long w = *(unsigned long *) buff;
|
count--;
|
count--;
|
buff += 4;
|
buff += 4;
|
result += carry;
|
result += carry;
|
result += w;
|
result += w;
|
carry = (w > result);
|
carry = (w > result);
|
} while (count);
|
} while (count);
|
result += carry;
|
result += carry;
|
result = (result & 0xffff) + (result >> 16);
|
result = (result & 0xffff) + (result >> 16);
|
}
|
}
|
if (len & 2) {
|
if (len & 2) {
|
result += *(unsigned short *) buff;
|
result += *(unsigned short *) buff;
|
buff += 2;
|
buff += 2;
|
}
|
}
|
}
|
}
|
if (len & 1)
|
if (len & 1)
|
result += (*buff << 8);
|
result += (*buff << 8);
|
result = from32to16(result);
|
result = from32to16(result);
|
if (odd)
|
if (odd)
|
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
|
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
|
out:
|
out:
|
return result;
|
return result;
|
}
|
}
|
|
|
/* ihl is always 5 or greater, almost always is 5, iph is always word
|
/* ihl is always 5 or greater, almost always is 5, iph is always word
|
* aligned but can fail to be dword aligned very often.
|
* aligned but can fail to be dword aligned very often.
|
*/
|
*/
|
extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
|
extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
|
{
|
{
|
unsigned int sum;
|
unsigned int sum;
|
|
|
__asm__ __volatile__("
|
__asm__ __volatile__("
|
ld [%1], %0
|
ld [%1], %0
|
sub %2, 4, %2
|
sub %2, 4, %2
|
ld [%1 + 0x4], %%g1
|
ld [%1 + 0x4], %%g1
|
ld [%1 + 0x8], %%g2
|
ld [%1 + 0x8], %%g2
|
addcc %%g1, %0, %0
|
addcc %%g1, %0, %0
|
addxcc %%g2, %0, %0
|
addxcc %%g2, %0, %0
|
ld [%1 + 0xc], %%g1
|
ld [%1 + 0xc], %%g1
|
ld [%1 + 0x10], %%g2
|
ld [%1 + 0x10], %%g2
|
addxcc %%g1, %0, %0
|
addxcc %%g1, %0, %0
|
addxcc %0, %%g0, %0
|
addxcc %0, %%g0, %0
|
1:
|
1:
|
addcc %%g2, %0, %0
|
addcc %%g2, %0, %0
|
add %1, 0x4, %1
|
add %1, 0x4, %1
|
addxcc %0, %%g0, %0
|
addxcc %0, %%g0, %0
|
subcc %2, 0x1, %2
|
subcc %2, 0x1, %2
|
bne,a 1b
|
bne,a 1b
|
ld [%1 + 0x10], %%g2
|
ld [%1 + 0x10], %%g2
|
|
|
sll %0, 16, %2
|
sll %0, 16, %2
|
addcc %0, %2, %2
|
addcc %0, %2, %2
|
srl %2, 16, %0
|
srl %2, 16, %0
|
addx %0, %%g0, %2
|
addx %0, %%g0, %2
|
xnor %%g0, %2, %0
|
xnor %%g0, %2, %0
|
2:
|
2:
|
"
|
"
|
: "=&r" (sum), "=&r" (iph), "=&r" (ihl)
|
: "=&r" (sum), "=&r" (iph), "=&r" (ihl)
|
: "1" (iph), "2" (ihl)
|
: "1" (iph), "2" (ihl)
|
: "g1", "g2");
|
: "g1", "g2");
|
return sum;
|
return sum;
|
}
|
}
|
|
|
/*
|
/*
|
* computes the checksum of a memory block at buff, length len,
|
* computes the checksum of a memory block at buff, length len,
|
* and adds in "sum" (32-bit)
|
* and adds in "sum" (32-bit)
|
*
|
*
|
* returns a 32-bit number suitable for feeding into itself
|
* returns a 32-bit number suitable for feeding into itself
|
* or csum_tcpudp_magic
|
* or csum_tcpudp_magic
|
*
|
*
|
* this function must be called with even lengths, except
|
* this function must be called with even lengths, except
|
* for the last fragment, which may be odd
|
* for the last fragment, which may be odd
|
*
|
*
|
* it's best to have buff aligned on a 32-bit boundary
|
* it's best to have buff aligned on a 32-bit boundary
|
*/
|
*/
|
extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
|
extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
|
{
|
{
|
__asm__ __volatile__("
|
__asm__ __volatile__("
|
mov 0, %%g5 ! g5 = result
|
mov 0, %%g5 ! g5 = result
|
cmp %1, 0
|
cmp %1, 0
|
bgu,a 1f
|
bgu,a 1f
|
andcc %0, 1, %%g7 ! g7 = odd
|
andcc %0, 1, %%g7 ! g7 = odd
|
|
|
b,a 9f
|
b,a 9f
|
|
|
1:
|
1:
|
be,a 1f
|
be,a 1f
|
srl %1, 1, %%g6 ! g6 = count = (len >> 1)
|
srl %1, 1, %%g6 ! g6 = count = (len >> 1)
|
|
|
sub %1, 1, %1 ! if(odd) { result = *buff;
|
sub %1, 1, %1 ! if(odd) { result = *buff;
|
ldub [%0], %%g5 ! len--;
|
ldub [%0], %%g5 ! len--;
|
add %0, 1, %0 ! buff++ }
|
add %0, 1, %0 ! buff++ }
|
|
|
srl %1, 1, %%g6
|
srl %1, 1, %%g6
|
1:
|
1:
|
cmp %%g6, 0 ! if (count) {
|
cmp %%g6, 0 ! if (count) {
|
be,a 8f
|
be,a 8f
|
andcc %1, 1, %%g0
|
andcc %1, 1, %%g0
|
|
|
andcc %0, 2, %%g0 ! if (2 & buff) {
|
andcc %0, 2, %%g0 ! if (2 & buff) {
|
be,a 1f
|
be,a 1f
|
srl %%g6, 1, %%g6
|
srl %%g6, 1, %%g6
|
|
|
sub %1, 2, %1 ! result += *(unsigned short *) buff;
|
sub %1, 2, %1 ! result += *(unsigned short *) buff;
|
lduh [%0], %%g1 ! count--;
|
lduh [%0], %%g1 ! count--;
|
sub %%g6, 1, %%g6 ! len -= 2;
|
sub %%g6, 1, %%g6 ! len -= 2;
|
add %%g1, %%g5, %%g5! buff += 2;
|
add %%g1, %%g5, %%g5! buff += 2;
|
add %0, 2, %0 ! }
|
add %0, 2, %0 ! }
|
|
|
srl %%g6, 1, %%g6
|
srl %%g6, 1, %%g6
|
1:
|
1:
|
cmp %%g6, 0 ! if (count) {
|
cmp %%g6, 0 ! if (count) {
|
be,a 2f
|
be,a 2f
|
andcc %1, 2, %%g0
|
andcc %1, 2, %%g0
|
|
|
ld [%0], %%g1 ! csum aligned 32bit words
|
ld [%0], %%g1 ! csum aligned 32bit words
|
1:
|
1:
|
add %0, 4, %0
|
add %0, 4, %0
|
addcc %%g1, %%g5, %%g5
|
addcc %%g1, %%g5, %%g5
|
addx %%g5, %%g0, %%g5
|
addx %%g5, %%g0, %%g5
|
subcc %%g6, 1, %%g6
|
subcc %%g6, 1, %%g6
|
bne,a 1b
|
bne,a 1b
|
ld [%0], %%g1
|
ld [%0], %%g1
|
|
|
sethi %%hi(0xffff), %%g3
|
sethi %%hi(0xffff), %%g3
|
srl %%g5, 16, %%g2
|
srl %%g5, 16, %%g2
|
or %%g3, %%lo(0xffff), %%g3
|
or %%g3, %%lo(0xffff), %%g3
|
and %%g5, %%g3, %%g5
|
and %%g5, %%g3, %%g5
|
add %%g2, %%g5, %%g5! }
|
add %%g2, %%g5, %%g5! }
|
|
|
andcc %1, 2, %%g0
|
andcc %1, 2, %%g0
|
2:
|
2:
|
be,a 8f ! if (len & 2) {
|
be,a 8f ! if (len & 2) {
|
andcc %1, 1, %%g0
|
andcc %1, 1, %%g0
|
|
|
lduh [%0], %%g1 ! result += *(unsigned short *) buff;
|
lduh [%0], %%g1 ! result += *(unsigned short *) buff;
|
add %%g5, %%g1, %%g5! buff += 2;
|
add %%g5, %%g1, %%g5! buff += 2;
|
add %0, 2, %0 ! }
|
add %0, 2, %0 ! }
|
|
|
|
|
andcc %1, 1, %%g0
|
andcc %1, 1, %%g0
|
8:
|
8:
|
be,a 1f ! if (len & 1) {
|
be,a 1f ! if (len & 1) {
|
sll %%g5, 16, %%g1
|
sll %%g5, 16, %%g1
|
|
|
ldub [%0], %%g1
|
ldub [%0], %%g1
|
sll %%g1, 8, %%g1 ! result += (*buff << 8);
|
sll %%g1, 8, %%g1 ! result += (*buff << 8);
|
add %%g5, %%g1, %%g5! }
|
add %%g5, %%g1, %%g5! }
|
|
|
sll %%g5, 16, %%g1
|
sll %%g5, 16, %%g1
|
1:
|
1:
|
addcc %%g1, %%g5, %%g5! result = from32to16(result);
|
addcc %%g1, %%g5, %%g5! result = from32to16(result);
|
srl %%g5, 16, %%g1
|
srl %%g5, 16, %%g1
|
addx %%g0, %%g1, %%g5
|
addx %%g0, %%g1, %%g5
|
|
|
orcc %%g7, %%g0, %%g0! if(odd) {
|
orcc %%g7, %%g0, %%g0! if(odd) {
|
be 9f
|
be 9f
|
srl %%g5, 8, %%g1
|
srl %%g5, 8, %%g1
|
|
|
and %%g5, 0xff, %%g2! result = ((result >> 8) & 0xff) |
|
and %%g5, 0xff, %%g2! result = ((result >> 8) & 0xff) |
|
and %%g1, 0xff, %%g1! ((result & 0xff) << 8);
|
and %%g1, 0xff, %%g1! ((result & 0xff) << 8);
|
sll %%g2, 8, %%g2
|
sll %%g2, 8, %%g2
|
or %%g2, %%g1, %%g5! }
|
or %%g2, %%g1, %%g5! }
|
9:
|
9:
|
addcc %2, %%g5, %2 ! add result and sum with carry
|
addcc %2, %%g5, %2 ! add result and sum with carry
|
addx %%g0, %2, %2
|
addx %%g0, %2, %2
|
" :
|
" :
|
"=&r" (buff), "=&r" (len), "=&r" (sum) :
|
"=&r" (buff), "=&r" (len), "=&r" (sum) :
|
"0" (buff), "1" (len), "2" (sum) :
|
"0" (buff), "1" (len), "2" (sum) :
|
"g1", "g2", "g3", "g5", "g6", "g7");
|
"g1", "g2", "g3", "g5", "g6", "g7");
|
|
|
return sum;
|
return sum;
|
}
|
}
|
|
|
/*
|
/*
|
* the same as csum_partial, but copies from fs:src while it
|
* the same as csum_partial, but copies from fs:src while it
|
* checksums
|
* checksums
|
*
|
*
|
* here even more important to align src and dst on a 32-bit (or even
|
* here even more important to align src and dst on a 32-bit (or even
|
* better 64-bit) boundary
|
* better 64-bit) boundary
|
*/
|
*/
|
extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum)
|
extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum)
|
{
|
{
|
/*
|
/*
|
* The whole idea is to do the copy and the checksum at
|
* The whole idea is to do the copy and the checksum at
|
* the same time, but we do it the easy way now.
|
* the same time, but we do it the easy way now.
|
*
|
*
|
* At least csum on the source, not destination, for cache
|
* At least csum on the source, not destination, for cache
|
* reasons..
|
* reasons..
|
*/
|
*/
|
sum = csum_partial(src, len, sum);
|
sum = csum_partial(src, len, sum);
|
memcpy(dst, src, len);
|
memcpy(dst, src, len);
|
return sum;
|
return sum;
|
}
|
}
|
|
|
/*
|
/*
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
* in icmp.c
|
* in icmp.c
|
*/
|
*/
|
extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
|
extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
|
{
|
{
|
return ~from32to16(do_csum(buff,len));
|
return ~from32to16(do_csum(buff,len));
|
}
|
}
|
|
|
#define csum_partial_copy_fromuser(s, d, l, w) \
|
#define csum_partial_copy_fromuser(s, d, l, w) \
|
csum_partial_copy((char *) (s), (d), (l), (w))
|
csum_partial_copy((char *) (s), (d), (l), (w))
|
|
|
/*
|
/*
|
* Fold a partial checksum without adding pseudo headers
|
* Fold a partial checksum without adding pseudo headers
|
*/
|
*/
|
extern inline unsigned int csum_fold(unsigned int sum)
|
extern inline unsigned int csum_fold(unsigned int sum)
|
{
|
{
|
__asm__ __volatile__("
|
__asm__ __volatile__("
|
addcc %0, %1, %0
|
addcc %0, %1, %0
|
srl %0, 16, %0
|
srl %0, 16, %0
|
addx %%g0, %0, %0
|
addx %%g0, %0, %0
|
xnor %%g0, %0, %0
|
xnor %%g0, %0, %0
|
"
|
"
|
: "=r" (sum)
|
: "=r" (sum)
|
: "r" (sum << 16), "0" (sum));
|
: "r" (sum << 16), "0" (sum));
|
return sum;
|
return sum;
|
}
|
}
|
|
|
#endif /* !(__SPARC_CHECKSUM_H) */
|
#endif /* !(__SPARC_CHECKSUM_H) */
|
|
|