URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk
Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [include/] [asm-sparc/] [checksum.h] - Rev 1782

Compare with Previous | Blame | View Log
/* $Id: checksum.h,v 1.1 2005-12-20 11:32:11 jcastillo Exp $ */
#ifndef __SPARC_CHECKSUM_H
#define __SPARC_CHECKSUM_H
 
/*  checksum.h:  IP/UDP/TCP checksum routines on the Sparc.
 *
 *  Copyright(C) 1995 Linus Torvalds
 *  Copyright(C) 1995 Miguel de Icaza
 *  Copyright(C) 1996 David S. Miller
 *
 * derived from:
 *	Alpha checksum c-code
 *      ix86 inline assembly
 */
 
/*
 * computes the checksum of the TCP/UDP pseudo-header
 * returns a 16-bit checksum, already complemented
 */
 
extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
					       unsigned long daddr,
					       unsigned short len,
					       unsigned short proto,
					       unsigned int sum)
{
	__asm__ __volatile__("
		addcc	%0, %1, %0
		addxcc	%0, %4, %0
		addxcc	%0, %5, %0
		addx	%0, %%g0, %0
 
		! We need the carry from the addition of 16-bit
		! significant addition, so we zap out the low bits
		! in one half, zap out the high bits in another,
		! shift them both up to the top 16-bits of a word
		! and do the carry producing addition, finally
		! shift the result back down to the low 16-bits.
 
		! Actually, we can further optimize away two shifts
		! because we know the low bits of the original
		! value will be added to zero-only bits so cannot
		! affect the addition result nor the final carry
		! bit.
 
		sll	%0, 16, %1
		addcc	%0, %1, %0		! add and set carry, neat eh?
		srl	%0, 16, %0		! shift back down the result
		addx	%0, %%g0, %0		! get remaining carry bit
		xnor	%%g0, %0, %0		! negate, sparc is cool
		"
		: "=&r" (sum), "=&r" (saddr)
		: "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
		return ((unsigned short) sum); 
}
 
extern inline unsigned short from32to16(unsigned long x)
{
	__asm__ __volatile__("
		addcc	%0, %1, %0
		srl	%0, 16, %0
		addx	%%g0, %0, %0
		"
		: "=r" (x)
		: "r" (x << 16), "0" (x));
	return x;
}
 
extern inline unsigned long do_csum(unsigned char * buff, int len)
{
	int odd, count;
	unsigned long result = 0;
 
	if (len <= 0)
		goto out;
	odd = 1 & (unsigned long) buff;
	if (odd) {
		result = *buff;
		len--;
		buff++;
	}
	count = len >> 1;		/* nr of 16-bit words.. */
	if (count) {
		if (2 & (unsigned long) buff) {
			result += *(unsigned short *) buff;
			count--;
			len -= 2;
			buff += 2;
		}
		count >>= 1;		/* nr of 32-bit words.. */
		if (count) {
		        unsigned long carry = 0;
			do {
				unsigned long w = *(unsigned long *) buff;
				count--;
				buff += 4;
				result += carry;
				result += w;
				carry = (w > result);
			} while (count);
			result += carry;
			result = (result & 0xffff) + (result >> 16);
		}
		if (len & 2) {
			result += *(unsigned short *) buff;
			buff += 2;
		}
	}
	if (len & 1)
		result += (*buff << 8);
	result = from32to16(result);
	if (odd)
		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
out:
	return result;
}
 
/* ihl is always 5 or greater, almost always is 5, iph is always word
 * aligned but can fail to be dword aligned very often.
 */
extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
{
	unsigned int sum;
 
	__asm__ __volatile__("
		ld	[%1], %0
		sub	%2, 4, %2
		ld	[%1 + 0x4], %%g1
		ld	[%1 + 0x8], %%g2
		addcc	%%g1, %0, %0
		addxcc	%%g2, %0, %0
		ld	[%1 + 0xc], %%g1
		ld	[%1 + 0x10], %%g2
		addxcc	%%g1, %0, %0
		addxcc	%0, %%g0, %0
1:
		addcc	%%g2, %0, %0
		add	%1, 0x4, %1
		addxcc	%0, %%g0, %0
		subcc	%2, 0x1, %2
		bne,a	1b
		 ld	[%1 + 0x10], %%g2
 
		sll	%0, 16, %2
		addcc	%0, %2, %2
		srl	%2, 16, %0
		addx	%0, %%g0, %2
		xnor	%%g0, %2, %0
2:
		"
		: "=&r" (sum), "=&r" (iph), "=&r" (ihl)
		: "1" (iph), "2" (ihl)
		: "g1", "g2");
	return sum;
}
 
/*
 * computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit)
 *
 * returns a 32-bit number suitable for feeding into itself
 * or csum_tcpudp_magic
 *
 * this function must be called with even lengths, except
 * for the last fragment, which may be odd
 *
 * it's best to have buff aligned on a 32-bit boundary
 */
extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
{
	__asm__ __volatile__("
		mov	0, %%g5			! g5 = result
		cmp	%1, 0
		bgu,a	1f
		 andcc	%0, 1, %%g7		! g7 = odd
 
		b,a	9f
 
1:
		be,a	1f
		 srl	%1, 1, %%g6		! g6 = count = (len >> 1)
 
		sub	%1, 1, %1	! if(odd) { result = *buff;
		ldub	[%0], %%g5	!           len--;
		add	%0, 1, %0	!           buff++ }
 
		srl	%1, 1, %%g6
1:
		cmp	%%g6, 0		! if (count) {
		be,a	8f
		 andcc	%1, 1, %%g0
 
		andcc	%0, 2, %%g0	! if (2 & buff) {
		be,a	1f
		 srl	%%g6, 1, %%g6
 
		sub	%1, 2, %1	!	result += *(unsigned short *) buff;
		lduh	[%0], %%g1	!	count--; 
		sub	%%g6, 1, %%g6	!	len -= 2;
		add	%%g1, %%g5, %%g5!	buff += 2; 
		add	%0, 2, %0	! }
 
		srl	%%g6, 1, %%g6
1:
		cmp	%%g6, 0		! if (count) {
		be,a	2f
		 andcc	%1, 2, %%g0
 
		ld	[%0], %%g1		! csum aligned 32bit words
1:
		add	%0, 4, %0
		addcc	%%g1, %%g5, %%g5
		addx	%%g5, %%g0, %%g5
		subcc	%%g6, 1, %%g6
		bne,a	1b
		 ld	[%0], %%g1
 
		sethi	%%hi(0xffff), %%g3
		srl	%%g5, 16, %%g2
		or	%%g3, %%lo(0xffff), %%g3
		and	%%g5, %%g3, %%g5
		add	%%g2, %%g5, %%g5! }
 
		andcc	%1, 2, %%g0
2:
		be,a	8f		! if (len & 2) {
		 andcc	%1, 1, %%g0
 
		lduh	[%0], %%g1	!	result += *(unsigned short *) buff; 
		add	%%g5, %%g1, %%g5!	buff += 2; 
		add	%0, 2, %0	! }
 
 
		andcc	%1, 1, %%g0
8:
		be,a	1f		! if (len & 1) {
		 sll	%%g5, 16, %%g1
 
		ldub	[%0], %%g1
		sll	%%g1, 8, %%g1	!	result += (*buff << 8); 
		add	%%g5, %%g1, %%g5! }
 
		sll	%%g5, 16, %%g1
1:
		addcc	%%g1, %%g5, %%g5! result = from32to16(result);
		srl	%%g5, 16, %%g1
		addx	%%g0, %%g1, %%g5
 
		orcc	%%g7, %%g0, %%g0! if(odd) {
		be	9f
		 srl	%%g5, 8, %%g1
 
		and	%%g5, 0xff, %%g2!	result = ((result >> 8) & 0xff) |
		and	%%g1, 0xff, %%g1!		((result & 0xff) << 8);
		sll	%%g2, 8, %%g2
		or	%%g2, %%g1, %%g5! }
9:
		addcc	%2, %%g5, %2	! add result and sum with carry
		addx	%%g0, %2, %2
	" :
        "=&r" (buff), "=&r" (len), "=&r" (sum) :
        "0" (buff), "1" (len), "2" (sum) :
	"g1", "g2", "g3", "g5", "g6", "g7"); 
 
	return sum;
}
 
/*
 * the same as csum_partial, but copies from fs:src while it
 * checksums
 *
 * here even more important to align src and dst on a 32-bit (or even
 * better 64-bit) boundary
 */
extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum)
{
	/*
	 * The whole idea is to do the copy and the checksum at
	 * the same time, but we do it the easy way now.
	 *
	 * At least csum on the source, not destination, for cache
	 * reasons..
	 */
	sum = csum_partial(src, len, sum);
	memcpy(dst, src, len);
	return sum;
}
 
/*
 * this routine is used for miscellaneous IP-like checksums, mainly
 * in icmp.c
 */
extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
{
	return ~from32to16(do_csum(buff,len));
}
 
#define csum_partial_copy_fromuser(s, d, l, w)  \
                       csum_partial_copy((char *) (s), (d), (l), (w))
 
/*
 *	Fold a partial checksum without adding pseudo headers
 */
extern inline unsigned int csum_fold(unsigned int sum)
{
	__asm__ __volatile__("
		addcc	%0, %1, %0
		srl	%0, 16, %0
		addx	%%g0, %0, %0
		xnor	%%g0, %0, %0
		"
		: "=r" (sum)
		: "r" (sum << 16), "0" (sum)); 
	return sum;
}
 
#endif /* !(__SPARC_CHECKSUM_H) */
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [include/] [asm-sparc/] [checksum.h] - Rev 1782