OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/trunk/linux/linux-2.4/net/core
    from Rev 1278 to Rev 1765
    Reverse comparison

Rev 1278 → Rev 1765

/sock.c
0,0 → 1,1219
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Generic socket support routines. Memory allocators, socket lock/release
* handler for protocols to use and generic option handler.
*
*
* Version: $Id: sock.c,v 1.1.1.1 2004-04-17 22:13:17 phoenix Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
* Alan Cox, <A.Cox@swansea.ac.uk>
*
* Fixes:
* Alan Cox : Numerous verify_area() problems
* Alan Cox : Connecting on a connecting socket
* now returns an error for tcp.
* Alan Cox : sock->protocol is set correctly.
* and is not sometimes left as 0.
* Alan Cox : connect handles icmp errors on a
* connect properly. Unfortunately there
* is a restart syscall nasty there. I
* can't match BSD without hacking the C
* library. Ideas urgently sought!
* Alan Cox : Disallow bind() to addresses that are
* not ours - especially broadcast ones!!
* Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
* Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
* instead they leave that for the DESTROY timer.
* Alan Cox : Clean up error flag in accept
* Alan Cox : TCP ack handling is buggy, the DESTROY timer
* was buggy. Put a remove_sock() in the handler
* for memory when we hit 0. Also altered the timer
* code. The ACK stuff can wait and needs major
* TCP layer surgery.
* Alan Cox : Fixed TCP ack bug, removed remove sock
* and fixed timer/inet_bh race.
* Alan Cox : Added zapped flag for TCP
* Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
* Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
* Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
* Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
* Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
* Rick Sladkey : Relaxed UDP rules for matching packets.
* C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
* Pauline Middelink : identd support
* Alan Cox : Fixed connect() taking signals I think.
* Alan Cox : SO_LINGER supported
* Alan Cox : Error reporting fixes
* Anonymous : inet_create tidied up (sk->reuse setting)
* Alan Cox : inet sockets don't set sk->type!
* Alan Cox : Split socket option code
* Alan Cox : Callbacks
* Alan Cox : Nagle flag for Charles & Johannes stuff
* Alex : Removed restriction on inet fioctl
* Alan Cox : Splitting INET from NET core
* Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
* Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
* Alan Cox : Split IP from generic code
* Alan Cox : New kfree_skbmem()
* Alan Cox : Make SO_DEBUG superuser only.
* Alan Cox : Allow anyone to clear SO_DEBUG
* (compatibility fix)
* Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
* Alan Cox : Allocator for a socket is settable.
* Alan Cox : SO_ERROR includes soft errors.
* Alan Cox : Allow NULL arguments on some SO_ opts
* Alan Cox : Generic socket allocation to make hooks
* easier (suggested by Craig Metz).
* Michael Pall : SO_ERROR returns positive errno again
* Steve Whitehouse: Added default destructor to free
* protocol private data.
* Steve Whitehouse: Added various other default routines
* common to several socket families.
* Chris Evans : Call suser() check last on F_SETOWN
* Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
* Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
* Andi Kleen : Fix write_space callback
* Chris Evans : Security fixes - signedness again
* Arnaldo C. Melo : cleanups, use skb_queue_purge
*
* To Fix:
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
 
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
#include <linux/init.h>
 
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/ipsec.h>
 
#ifdef CONFIG_FILTER
#include <linux/filter.h>
#endif
 
#ifdef CONFIG_INET
#include <net/tcp.h>
#endif
 
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
* platforms. This makes socket queueing behavior and performance
* not depend upon such differences.
*/
#define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
/* Run time adjustable parameters. */
__u32 sysctl_wmem_max = SK_WMEM_MAX;
__u32 sysctl_rmem_max = SK_RMEM_MAX;
__u32 sysctl_wmem_default = SK_WMEM_MAX;
__u32 sysctl_rmem_default = SK_RMEM_MAX;
 
/* Maximal space eaten by iovec or ancilliary data plus some space */
int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
 
static int sock_set_timeout(long *timeo_p, char *optval, int optlen)
{
struct timeval tv;
 
if (optlen < sizeof(tv))
return -EINVAL;
if (copy_from_user(&tv, optval, sizeof(tv)))
return -EFAULT;
 
*timeo_p = MAX_SCHEDULE_TIMEOUT;
if (tv.tv_sec == 0 && tv.tv_usec == 0)
return 0;
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
return 0;
}
 
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
*/
 
int sock_setsockopt(struct socket *sock, int level, int optname,
char *optval, int optlen)
{
struct sock *sk=sock->sk;
#ifdef CONFIG_FILTER
struct sk_filter *filter;
#endif
int val;
int valbool;
struct linger ling;
int ret = 0;
/*
* Options without arguments
*/
 
#ifdef SO_DONTLINGER /* Compatibility item... */
switch(optname)
{
case SO_DONTLINGER:
sk->linger=0;
return 0;
}
#endif
if(optlen<sizeof(int))
return(-EINVAL);
if (get_user(val, (int *)optval))
return -EFAULT;
valbool = val?1:0;
 
lock_sock(sk);
 
switch(optname)
{
case SO_DEBUG:
if(val && !capable(CAP_NET_ADMIN))
{
ret = -EACCES;
}
else
sk->debug=valbool;
break;
case SO_REUSEADDR:
sk->reuse = valbool;
break;
case SO_TYPE:
case SO_ERROR:
ret = -ENOPROTOOPT;
break;
case SO_DONTROUTE:
sk->localroute=valbool;
break;
case SO_BROADCAST:
sk->broadcast=valbool;
break;
case SO_SNDBUF:
/* Don't error on this BSD doesn't and if you think
about it this is right. Otherwise apps have to
play 'guess the biggest size' games. RCVBUF/SNDBUF
are treated in BSD as hints */
if (val > sysctl_wmem_max)
val = sysctl_wmem_max;
 
sk->userlocks |= SOCK_SNDBUF_LOCK;
if ((val * 2) < SOCK_MIN_SNDBUF)
sk->sndbuf = SOCK_MIN_SNDBUF;
else
sk->sndbuf = (val * 2);
 
/*
* Wake up sending tasks if we
* upped the value.
*/
sk->write_space(sk);
break;
 
case SO_RCVBUF:
/* Don't error on this BSD doesn't and if you think
about it this is right. Otherwise apps have to
play 'guess the biggest size' games. RCVBUF/SNDBUF
are treated in BSD as hints */
if (val > sysctl_rmem_max)
val = sysctl_rmem_max;
 
sk->userlocks |= SOCK_RCVBUF_LOCK;
/* FIXME: is this lower bound the right one? */
if ((val * 2) < SOCK_MIN_RCVBUF)
sk->rcvbuf = SOCK_MIN_RCVBUF;
else
sk->rcvbuf = (val * 2);
break;
 
case SO_KEEPALIVE:
#ifdef CONFIG_INET
if (sk->protocol == IPPROTO_TCP)
{
tcp_set_keepalive(sk, valbool);
}
#endif
sk->keepopen = valbool;
break;
 
case SO_OOBINLINE:
sk->urginline = valbool;
break;
 
case SO_NO_CHECK:
sk->no_check = valbool;
break;
 
case SO_PRIORITY:
if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
sk->priority = val;
else
ret = -EPERM;
break;
 
case SO_LINGER:
if(optlen<sizeof(ling)) {
ret = -EINVAL; /* 1003.1g */
break;
}
if (copy_from_user(&ling,optval,sizeof(ling))) {
ret = -EFAULT;
break;
}
if(ling.l_onoff==0) {
sk->linger=0;
} else {
#if (BITS_PER_LONG == 32)
if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
sk->lingertime=MAX_SCHEDULE_TIMEOUT;
else
#endif
sk->lingertime=ling.l_linger*HZ;
sk->linger=1;
}
break;
 
case SO_BSDCOMPAT:
sk->bsdism = valbool;
break;
 
case SO_PASSCRED:
sock->passcred = valbool;
break;
 
case SO_TIMESTAMP:
sk->rcvtstamp = valbool;
break;
 
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
sk->rcvlowat = val ? : 1;
break;
 
case SO_RCVTIMEO:
ret = sock_set_timeout(&sk->rcvtimeo, optval, optlen);
break;
 
case SO_SNDTIMEO:
ret = sock_set_timeout(&sk->sndtimeo, optval, optlen);
break;
 
#ifdef CONFIG_NETDEVICES
case SO_BINDTODEVICE:
{
char devname[IFNAMSIZ];
 
/* Sorry... */
if (!capable(CAP_NET_RAW)) {
ret = -EPERM;
break;
}
 
/* Bind this socket to a particular device like "eth0",
* as specified in the passed interface name. If the
* name is "" or the option length is zero the socket
* is not bound.
*/
 
if (!valbool) {
sk->bound_dev_if = 0;
} else {
if (optlen > IFNAMSIZ)
optlen = IFNAMSIZ;
if (copy_from_user(devname, optval, optlen)) {
ret = -EFAULT;
break;
}
 
/* Remove any cached route for this socket. */
sk_dst_reset(sk);
 
if (devname[0] == '\0') {
sk->bound_dev_if = 0;
} else {
struct net_device *dev = dev_get_by_name(devname);
if (!dev) {
ret = -ENODEV;
break;
}
sk->bound_dev_if = dev->ifindex;
dev_put(dev);
}
}
break;
}
#endif
 
 
#ifdef CONFIG_FILTER
case SO_ATTACH_FILTER:
ret = -EINVAL;
if (optlen == sizeof(struct sock_fprog)) {
struct sock_fprog fprog;
 
ret = -EFAULT;
if (copy_from_user(&fprog, optval, sizeof(fprog)))
break;
 
ret = sk_attach_filter(&fprog, sk);
}
break;
 
case SO_DETACH_FILTER:
spin_lock_bh(&sk->lock.slock);
filter = sk->filter;
if (filter) {
sk->filter = NULL;
spin_unlock_bh(&sk->lock.slock);
sk_filter_release(sk, filter);
break;
}
spin_unlock_bh(&sk->lock.slock);
ret = -ENONET;
break;
#endif
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
ret = -ENOPROTOOPT;
break;
}
release_sock(sk);
return ret;
}
 
 
int sock_getsockopt(struct socket *sock, int level, int optname,
char *optval, int *optlen)
{
struct sock *sk = sock->sk;
union
{
int val;
struct linger ling;
struct timeval tm;
} v;
unsigned int lv=sizeof(int),len;
if(get_user(len,optlen))
return -EFAULT;
if(len < 0)
return -EINVAL;
switch(optname)
{
case SO_DEBUG:
v.val = sk->debug;
break;
case SO_DONTROUTE:
v.val = sk->localroute;
break;
case SO_BROADCAST:
v.val= sk->broadcast;
break;
 
case SO_SNDBUF:
v.val=sk->sndbuf;
break;
case SO_RCVBUF:
v.val =sk->rcvbuf;
break;
 
case SO_REUSEADDR:
v.val = sk->reuse;
break;
 
case SO_KEEPALIVE:
v.val = sk->keepopen;
break;
 
case SO_TYPE:
v.val = sk->type;
break;
 
case SO_ERROR:
v.val = -sock_error(sk);
if(v.val==0)
v.val=xchg(&sk->err_soft,0);
break;
 
case SO_OOBINLINE:
v.val = sk->urginline;
break;
case SO_NO_CHECK:
v.val = sk->no_check;
break;
 
case SO_PRIORITY:
v.val = sk->priority;
break;
case SO_LINGER:
lv=sizeof(v.ling);
v.ling.l_onoff=sk->linger;
v.ling.l_linger=sk->lingertime/HZ;
break;
case SO_BSDCOMPAT:
v.val = sk->bsdism;
break;
 
case SO_TIMESTAMP:
v.val = sk->rcvtstamp;
break;
 
case SO_RCVTIMEO:
lv=sizeof(struct timeval);
if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->rcvtimeo/HZ;
v.tm.tv_usec = ((sk->rcvtimeo%HZ)*1000)/HZ;
}
break;
 
case SO_SNDTIMEO:
lv=sizeof(struct timeval);
if (sk->sndtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sndtimeo/HZ;
v.tm.tv_usec = ((sk->sndtimeo%HZ)*1000)/HZ;
}
break;
 
case SO_RCVLOWAT:
v.val = sk->rcvlowat;
break;
 
case SO_SNDLOWAT:
v.val=1;
break;
 
case SO_PASSCRED:
v.val = sock->passcred;
break;
 
case SO_PEERCRED:
if (len > sizeof(sk->peercred))
len = sizeof(sk->peercred);
if (copy_to_user(optval, &sk->peercred, len))
return -EFAULT;
goto lenout;
 
case SO_PEERNAME:
{
char address[128];
 
if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
return -ENOTCONN;
if (lv < len)
return -EINVAL;
if(copy_to_user((void*)optval, address, len))
return -EFAULT;
goto lenout;
}
 
/* Dubious BSD thing... Probably nobody even uses it, but
* the UNIX standard wants it for whatever reason... -DaveM
*/
case SO_ACCEPTCONN:
v.val = (sk->state == TCP_LISTEN);
break;
 
default:
return(-ENOPROTOOPT);
}
if (len > lv)
len = lv;
if (copy_to_user(optval, &v, len))
return -EFAULT;
lenout:
if (put_user(len, optlen))
return -EFAULT;
return 0;
}
 
static kmem_cache_t *sk_cachep;
 
/*
* All socket objects are allocated here. This is for future
* usage.
*/
struct sock *sk_alloc(int family, int priority, int zero_it)
{
struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
 
if(sk && zero_it) {
memset(sk, 0, sizeof(struct sock));
sk->family = family;
sock_lock_init(sk);
}
 
return sk;
}
 
void sk_free(struct sock *sk)
{
#ifdef CONFIG_FILTER
struct sk_filter *filter;
#endif
 
if (sk->destruct)
sk->destruct(sk);
 
#ifdef CONFIG_FILTER
filter = sk->filter;
if (filter) {
sk_filter_release(sk, filter);
sk->filter = NULL;
}
#endif
 
if (atomic_read(&sk->omem_alloc))
printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
 
kmem_cache_free(sk_cachep, sk);
}
 
void __init sk_init(void)
{
sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0,
SLAB_HWCACHE_ALIGN, 0, 0);
if (!sk_cachep)
printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
 
if (num_physpages <= 4096) {
sysctl_wmem_max = 32767;
sysctl_rmem_max = 32767;
sysctl_wmem_default = 32767;
sysctl_rmem_default = 32767;
} else if (num_physpages >= 131072) {
sysctl_wmem_max = 131071;
sysctl_rmem_max = 131071;
}
}
 
/*
* Simple resource managers for sockets.
*/
 
 
/*
* Write buffer destructor automatically called from kfree_skb.
*/
void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
 
/* In case it might be waiting for more memory. */
atomic_sub(skb->truesize, &sk->wmem_alloc);
if (!sk->use_write_queue)
sk->write_space(sk);
sock_put(sk);
}
 
/*
* Read buffer destructor automatically called from kfree_skb.
*/
void sock_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
 
atomic_sub(skb->truesize, &sk->rmem_alloc);
}
 
/*
* Allocate a skb from the socket's send buffer.
*/
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
{
if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
struct sk_buff * skb = alloc_skb(size, priority);
if (skb) {
skb_set_owner_w(skb, sk);
return skb;
}
}
return NULL;
}
 
/*
* Allocate a skb from the socket's receive buffer.
*/
struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
{
if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
if (skb) {
skb_set_owner_r(skb, sk);
return skb;
}
}
return NULL;
}
 
/*
* Allocate a memory block from the socket's option memory buffer.
*/
void *sock_kmalloc(struct sock *sk, int size, int priority)
{
if ((unsigned)size <= sysctl_optmem_max &&
atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
*/
atomic_add(size, &sk->omem_alloc);
mem = kmalloc(size, priority);
if (mem)
return mem;
atomic_sub(size, &sk->omem_alloc);
}
return NULL;
}
 
/*
* Free an option memory block.
*/
void sock_kfree_s(struct sock *sk, void *mem, int size)
{
kfree(mem);
atomic_sub(size, &sk->omem_alloc);
}
 
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
static long sock_wait_for_wmem(struct sock * sk, long timeo)
{
DECLARE_WAITQUEUE(wait, current);
 
clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
add_wait_queue(sk->sleep, &wait);
for (;;) {
if (!timeo)
break;
if (signal_pending(current))
break;
set_bit(SOCK_NOSPACE, &sk->socket->flags);
set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
break;
if (sk->shutdown & SEND_SHUTDOWN)
break;
if (sk->err)
break;
timeo = schedule_timeout(timeo);
}
__set_current_state(TASK_RUNNING);
remove_wait_queue(sk->sleep, &wait);
return timeo;
}
 
 
/*
* Generic send/receive buffer handlers
*/
 
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock, int *errcode)
{
struct sk_buff *skb;
long timeo;
int err;
 
timeo = sock_sndtimeo(sk, noblock);
while (1) {
err = sock_error(sk);
if (err != 0)
goto failure;
 
err = -EPIPE;
if (sk->shutdown & SEND_SHUTDOWN)
goto failure;
 
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
skb = alloc_skb(header_len, sk->allocation);
if (skb) {
int npages;
int i;
 
/* No pages, we're done... */
if (!data_len)
break;
 
npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
skb->truesize += data_len;
skb_shinfo(skb)->nr_frags = npages;
for (i = 0; i < npages; i++) {
struct page *page;
skb_frag_t *frag;
 
page = alloc_pages(sk->allocation, 0);
if (!page) {
err = -ENOBUFS;
skb_shinfo(skb)->nr_frags = i;
kfree_skb(skb);
goto failure;
}
 
frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = 0;
frag->size = (data_len >= PAGE_SIZE ?
PAGE_SIZE :
data_len);
data_len -= PAGE_SIZE;
}
 
/* Full success... */
break;
}
err = -ENOBUFS;
goto failure;
}
set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
set_bit(SOCK_NOSPACE, &sk->socket->flags);
err = -EAGAIN;
if (!timeo)
goto failure;
if (signal_pending(current))
goto interrupted;
timeo = sock_wait_for_wmem(sk, timeo);
}
 
skb_set_owner_w(skb, sk);
return skb;
 
interrupted:
err = sock_intr_errno(timeo);
failure:
*errcode = err;
return NULL;
}
 
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
{
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
}
 
void __lock_sock(struct sock *sk)
{
DECLARE_WAITQUEUE(wait, current);
 
add_wait_queue_exclusive(&sk->lock.wq, &wait);
for(;;) {
current->state = TASK_UNINTERRUPTIBLE;
spin_unlock_bh(&sk->lock.slock);
schedule();
spin_lock_bh(&sk->lock.slock);
if(!sk->lock.users)
break;
}
current->state = TASK_RUNNING;
remove_wait_queue(&sk->lock.wq, &wait);
}
 
void __release_sock(struct sock *sk)
{
struct sk_buff *skb = sk->backlog.head;
 
do {
sk->backlog.head = sk->backlog.tail = NULL;
bh_unlock_sock(sk);
 
do {
struct sk_buff *next = skb->next;
 
skb->next = NULL;
sk->backlog_rcv(sk, skb);
skb = next;
} while (skb != NULL);
 
bh_lock_sock(sk);
} while((skb = sk->backlog.head) != NULL);
}
 
/*
* Generic socket manager library. Most simpler socket families
* use this to manage their socket lists. At some point we should
* hash these. By making this generic we get the lot hashed for free.
*
* It is broken by design. All the protocols using it must be fixed. --ANK
*/
 
rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED;
void sklist_remove_socket(struct sock **list, struct sock *sk)
{
struct sock *s;
 
write_lock_bh(&net_big_sklist_lock);
 
while ((s = *list) != NULL) {
if (s == sk) {
*list = s->next;
break;
}
list = &s->next;
}
 
write_unlock_bh(&net_big_sklist_lock);
if (s)
sock_put(s);
}
 
void sklist_insert_socket(struct sock **list, struct sock *sk)
{
write_lock_bh(&net_big_sklist_lock);
sk->next= *list;
*list=sk;
sock_hold(sk);
write_unlock_bh(&net_big_sklist_lock);
}
 
/*
* This is only called from user mode. Thus it protects itself against
* interrupt users but doesn't worry about being called during work.
* Once it is removed from the queue no interrupt or bottom half will
* touch it and we are (fairly 8-) ) safe.
*/
 
void sklist_destroy_socket(struct sock **list, struct sock *sk);
 
/*
* Handler for deferred kills.
*/
 
static void sklist_destroy_timer(unsigned long data)
{
struct sock *sk=(struct sock *)data;
sklist_destroy_socket(NULL,sk);
}
 
/*
* Destroy a socket. We pass NULL for a list if we know the
* socket is not on a list.
*/
void sklist_destroy_socket(struct sock **list,struct sock *sk)
{
if(list)
sklist_remove_socket(list, sk);
 
skb_queue_purge(&sk->receive_queue);
 
if(atomic_read(&sk->wmem_alloc) == 0 &&
atomic_read(&sk->rmem_alloc) == 0 &&
sk->dead)
{
sock_put(sk);
}
else
{
/*
* Someone is using our buffers still.. defer
*/
init_timer(&sk->timer);
sk->timer.expires=jiffies+SOCK_DESTROY_TIME;
sk->timer.function=sklist_destroy_timer;
sk->timer.data = (unsigned long)sk;
add_timer(&sk->timer);
}
}
 
/*
* Set of default routines for initialising struct proto_ops when
* the protocol does not support a particular function. In certain
* cases where it makes no sense for a protocol to have a "do nothing"
* function, some default processing is provided.
*/
 
int sock_no_release(struct socket *sock)
{
return 0;
}
 
int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
{
return -EOPNOTSUPP;
}
 
int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
int len, int flags)
{
return -EOPNOTSUPP;
}
 
int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
{
return -EOPNOTSUPP;
}
 
int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
{
return -EOPNOTSUPP;
}
 
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
int *len, int peer)
{
return -EOPNOTSUPP;
}
 
unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
{
return 0;
}
 
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -EOPNOTSUPP;
}
 
int sock_no_listen(struct socket *sock, int backlog)
{
return -EOPNOTSUPP;
}
 
int sock_no_shutdown(struct socket *sock, int how)
{
return -EOPNOTSUPP;
}
 
int sock_no_setsockopt(struct socket *sock, int level, int optname,
char *optval, int optlen)
{
return -EOPNOTSUPP;
}
 
int sock_no_getsockopt(struct socket *sock, int level, int optname,
char *optval, int *optlen)
{
return -EOPNOTSUPP;
}
 
/*
* Note: if you add something that sleeps here then change sock_fcntl()
* to do proper fd locking.
*/
int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
 
switch(cmd)
{
case F_SETOWN:
/*
* This is a little restrictive, but it's the only
* way to make sure that you can't send a sigurg to
* another process.
*/
if (current->pgrp != -arg &&
current->pid != arg &&
!capable(CAP_KILL)) return(-EPERM);
sk->proc = arg;
return(0);
case F_GETOWN:
return(sk->proc);
default:
return(-EINVAL);
}
}
 
int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
struct scm_cookie *scm)
{
return -EOPNOTSUPP;
}
 
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int len, int flags,
struct scm_cookie *scm)
{
return -EOPNOTSUPP;
}
 
int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
{
/* Mirror missing mmap method error code */
return -ENODEV;
}
 
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
ssize_t res;
struct msghdr msg;
struct iovec iov;
mm_segment_t old_fs;
char *kaddr;
 
kaddr = kmap(page);
 
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = flags;
 
iov.iov_base = kaddr + offset;
iov.iov_len = size;
 
old_fs = get_fs();
set_fs(KERNEL_DS);
res = sock_sendmsg(sock, &msg, size);
set_fs(old_fs);
 
kunmap(page);
return res;
}
 
/*
* Default Socket Callbacks
*/
 
void sock_def_wakeup(struct sock *sk)
{
read_lock(&sk->callback_lock);
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep);
read_unlock(&sk->callback_lock);
}
 
void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->callback_lock);
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
sk_wake_async(sk,0,POLL_ERR);
read_unlock(&sk->callback_lock);
}
 
void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->callback_lock);
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
sk_wake_async(sk,1,POLL_IN);
read_unlock(&sk->callback_lock);
}
 
void sock_def_write_space(struct sock *sk)
{
read_lock(&sk->callback_lock);
 
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
 
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
sk_wake_async(sk, 2, POLL_OUT);
}
 
read_unlock(&sk->callback_lock);
}
 
void sock_def_destruct(struct sock *sk)
{
if (sk->protinfo.destruct_hook)
kfree(sk->protinfo.destruct_hook);
}
 
void sock_init_data(struct socket *sock, struct sock *sk)
{
skb_queue_head_init(&sk->receive_queue);
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->error_queue);
 
init_timer(&sk->timer);
sk->allocation = GFP_KERNEL;
sk->rcvbuf = sysctl_rmem_default;
sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
 
if(sock)
{
sk->type = sock->type;
sk->sleep = &sock->wait;
sock->sk = sk;
} else
sk->sleep = NULL;
 
sk->dst_lock = RW_LOCK_UNLOCKED;
sk->callback_lock = RW_LOCK_UNLOCKED;
 
sk->state_change = sock_def_wakeup;
sk->data_ready = sock_def_readable;
sk->write_space = sock_def_write_space;
sk->error_report = sock_def_error_report;
sk->destruct = sock_def_destruct;
 
sk->peercred.pid = 0;
sk->peercred.uid = -1;
sk->peercred.gid = -1;
sk->rcvlowat = 1;
sk->rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sndtimeo = MAX_SCHEDULE_TIMEOUT;
 
atomic_set(&sk->refcnt, 1);
}
/sysctl_net_core.c
0,0 → 1,98
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/core directory entry (empty =) ). [MS]
*/
 
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/config.h>
 
#ifdef CONFIG_SYSCTL
 
extern int netdev_max_backlog;
extern int weight_p;
extern int no_cong_thresh;
extern int no_cong;
extern int lo_cong;
extern int mod_cong;
extern int netdev_fastroute;
extern int net_msg_cost;
extern int net_msg_burst;
 
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
 
extern int sysctl_core_destroy_delay;
extern int sysctl_optmem_max;
extern int sysctl_somaxconn;
extern int sysctl_hot_list_len;
 
#ifdef CONFIG_NET_DIVERT
extern char sysctl_divert_version[];
#endif /* CONFIG_NET_DIVERT */
 
ctl_table core_table[] = {
#ifdef CONFIG_NET
{NET_CORE_WMEM_MAX, "wmem_max",
&sysctl_wmem_max, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_RMEM_MAX, "rmem_max",
&sysctl_rmem_max, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_WMEM_DEFAULT, "wmem_default",
&sysctl_wmem_default, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_RMEM_DEFAULT, "rmem_default",
&sysctl_rmem_default, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_DEV_WEIGHT, "dev_weight",
&weight_p, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
&netdev_max_backlog, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_NO_CONG_THRESH, "no_cong_thresh",
&no_cong_thresh, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_NO_CONG, "no_cong",
&no_cong, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_LO_CONG, "lo_cong",
&lo_cong, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_MOD_CONG, "mod_cong",
&mod_cong, sizeof(int), 0644, NULL,
&proc_dointvec},
#ifdef CONFIG_NET_FASTROUTE
{NET_CORE_FASTROUTE, "netdev_fastroute",
&netdev_fastroute, sizeof(int), 0644, NULL,
&proc_dointvec},
#endif
{NET_CORE_MSG_COST, "message_cost",
&net_msg_cost, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_CORE_MSG_BURST, "message_burst",
&net_msg_burst, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_CORE_OPTMEM_MAX, "optmem_max",
&sysctl_optmem_max, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_HOT_LIST_LENGTH, "hot_list_length",
&sysctl_hot_list_len, sizeof(int), 0644, NULL,
&proc_dointvec},
#ifdef CONFIG_NET_DIVERT
{NET_CORE_DIVERT_VERSION, "divert_version",
(void *)sysctl_divert_version, 32, 0444, NULL,
&proc_dostring},
#endif /* CONFIG_NET_DIVERT */
{NET_CORE_SOMAXCONN, "somaxconn",
&sysctl_somaxconn, sizeof(int), 0644, NULL,
&proc_dointvec },
#endif /* CONFIG_NET */
{ 0 }
};
#endif
/utils.c
0,0 → 1,73
/*
* Generic address resultion entity
*
* Authors:
* net_random Alan Cox
* net_ratelimit Andy Kleen
*
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
 
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
 
static unsigned long net_rand_seed = 152L;
 
unsigned long net_random(void)
{
net_rand_seed=net_rand_seed*69069L+1;
return net_rand_seed^jiffies;
}
 
void net_srandom(unsigned long entropy)
{
net_rand_seed ^= entropy;
net_random();
}
 
int net_msg_cost = 5*HZ;
int net_msg_burst = 10*5*HZ;
 
/*
* This enforces a rate limit: not more than one kernel message
* every 5secs to make a denial-of-service attack impossible.
*
* All warning printk()s should be guarded by this function.
*/
int net_ratelimit(void)
{
static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
static unsigned long toks = 10*5*HZ;
static unsigned long last_msg;
static int missed;
unsigned long flags;
unsigned long now = jiffies;
 
spin_lock_irqsave(&ratelimit_lock, flags);
toks += now - last_msg;
last_msg = now;
if (toks > net_msg_burst)
toks = net_msg_burst;
if (toks >= net_msg_cost) {
int lost = missed;
missed = 0;
toks -= net_msg_cost;
spin_unlock_irqrestore(&ratelimit_lock, flags);
if (lost)
printk(KERN_WARNING "NET: %d messages suppressed.\n", lost);
return 1;
}
missed++;
spin_unlock_irqrestore(&ratelimit_lock, flags);
return 0;
}
/profile.c
0,0 → 1,293
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/inet.h>
#include <net/checksum.h>
 
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <net/profile.h>
 
#ifdef CONFIG_NET_PROFILE
 
atomic_t net_profile_active;
struct timeval net_profile_adjust;
 
NET_PROFILE_DEFINE(total);
 
struct net_profile_slot *net_profile_chain = &net_prof_total;
 
#ifdef __alpha__
__u32 alpha_lo;
long alpha_hi;
 
static void alpha_tick(unsigned long);
 
static struct timer_list alpha_timer =
{ NULL, NULL, 0, 0L, alpha_tick };
 
void alpha_tick(unsigned long dummy)
{
struct timeval dummy_stamp;
net_profile_stamp(&dummy_stamp);
alpha_timer.expires = jiffies + 4*HZ;
add_timer(&alpha_timer);
}
 
#endif
 
void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
{
struct net_profile_slot *s;
 
net_profile_sub(entered, leaved);
for (s = net_profile_chain; s; s = s->next) {
if (s->active)
net_profile_add(leaved, &s->irq);
}
}
 
 
#ifdef CONFIG_PROC_FS
static int profile_read_proc(char *buffer, char **start, off_t offset,
int length, int *eof, void *data)
{
off_t pos=0;
off_t begin=0;
int len=0;
struct net_profile_slot *s;
 
len+= sprintf(buffer, "Slot Hits Hi Lo OnIrqHi OnIrqLo Ufl\n");
 
if (offset == 0) {
cli();
net_prof_total.active = 1;
atomic_inc(&net_profile_active);
NET_PROFILE_LEAVE(total);
sti();
}
for (s = net_profile_chain; s; s = s->next) {
struct net_profile_slot tmp;
 
cli();
tmp = *s;
 
/* Wrong, but pretty close to truth */
 
s->accumulator.tv_sec = 0;
s->accumulator.tv_usec = 0;
s->irq.tv_sec = 0;
s->irq.tv_usec = 0;
s->hits = 0;
s->underflow = 0;
/* Repair active count, it is possible, only if code has a bug */
if (s->active) {
s->active = 0;
atomic_dec(&net_profile_active);
}
sti();
 
net_profile_sub(&tmp.irq, &tmp.accumulator);
 
len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
tmp.id,
tmp.hits,
tmp.accumulator.tv_sec,
tmp.accumulator.tv_usec,
tmp.irq.tv_sec,
tmp.irq.tv_usec,
tmp.underflow, tmp.active);
 
buffer[len++]='\n';
pos=begin+len;
if(pos<offset) {
len=0;
begin=pos;
}
if(pos>offset+length)
goto done;
}
*eof = 1;
 
done:
*start=buffer+(offset-begin);
len-=(offset-begin);
if(len>length)
len=length;
if (len < 0)
len = 0;
if (offset == 0) {
cli();
net_prof_total.active = 0;
net_prof_total.hits = 0;
net_profile_stamp(&net_prof_total.entered);
sti();
}
return len;
}
#endif
 
struct iphdr whitehole_iph;
int whitehole_count;
 
static int whitehole_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_device_stats *stats;
 
stats = (struct net_device_stats *)dev->priv;
stats->tx_packets++;
stats->tx_bytes+=skb->len;
 
dev_kfree_skb(skb);
return 0;
}
 
static void whitehole_inject(unsigned long);
int whitehole_init(struct net_device *dev);
 
static struct timer_list whitehole_timer =
{ NULL, NULL, 0, 0L, whitehole_inject };
 
static struct net_device whitehole_dev = {
"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
 
static int whitehole_open(struct net_device *dev)
{
whitehole_count = 100000;
whitehole_timer.expires = jiffies + 5*HZ;
add_timer(&whitehole_timer);
return 0;
}
 
static int whitehole_close(struct net_device *dev)
{
del_timer(&whitehole_timer);
return 0;
}
 
static void whitehole_inject(unsigned long dummy)
{
struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
extern int netdev_dropping;
 
do {
struct iphdr *iph;
struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
if (!skb)
break;
skb_reserve(skb, 32);
iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
skb->mac.raw = ((u8*)iph) - 14;
memcpy(iph, &whitehole_iph, sizeof(*iph));
skb->protocol = __constant_htons(ETH_P_IP);
skb->dev = &whitehole_dev;
skb->pkt_type = PACKET_HOST;
stats->rx_packets++;
stats->rx_bytes += skb->len;
netif_rx(skb);
whitehole_count--;
} while (netdev_dropping == 0 && whitehole_count>0);
if (whitehole_count > 0) {
whitehole_timer.expires = jiffies + 1;
add_timer(&whitehole_timer);
}
}
 
static struct net_device_stats *whitehole_get_stats(struct net_device *dev)
{
struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
return stats;
}
 
int __init whitehole_init(struct net_device *dev)
{
dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (dev->priv == NULL)
return -ENOBUFS;
memset(dev->priv, 0, sizeof(struct net_device_stats));
dev->get_stats = whitehole_get_stats;
dev->hard_start_xmit = whitehole_xmit;
dev->open = whitehole_open;
dev->stop = whitehole_close;
ether_setup(dev);
dev->tx_queue_len = 0;
dev->flags |= IFF_NOARP;
dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
dev->iflink = 0;
whitehole_iph.ihl = 5;
whitehole_iph.version = 4;
whitehole_iph.ttl = 2;
whitehole_iph.saddr = in_aton("193.233.7.21");
whitehole_iph.daddr = in_aton("193.233.7.10");
whitehole_iph.tot_len = htons(20);
whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
return 0;
}
 
int net_profile_register(struct net_profile_slot *slot)
{
cli();
slot->next = net_profile_chain;
net_profile_chain = slot;
sti();
return 0;
}
 
int net_profile_unregister(struct net_profile_slot *slot)
{
struct net_profile_slot **sp, *s;
 
for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
if (s == slot) {
cli();
*sp = s->next;
sti();
return 0;
}
}
return -ESRCH;
}
 
 
int __init net_profile_init(void)
{
int i;
 
#ifdef CONFIG_PROC_FS
create_proc_read_entry("net/profile", 0, 0, profile_read_proc, NULL);
#endif
 
register_netdevice(&whitehole_dev);
 
printk("Evaluating net profiler cost ...");
#ifdef __alpha__
alpha_tick(0);
#endif
for (i=0; i<1024; i++) {
NET_PROFILE_ENTER(total);
NET_PROFILE_LEAVE(total);
}
if (net_prof_total.accumulator.tv_sec) {
printk(" too high!\n");
} else {
net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
printk("%ld units\n", net_profile_adjust.tv_usec);
}
net_prof_total.hits = 0;
net_profile_stamp(&net_prof_total.entered);
return 0;
}
 
#endif
/dev_mcast.c
0,0 → 1,275
/*
* Linux NET3: Multicast List maintenance.
*
* Authors:
* Tim Kordas <tjk@nostromo.eeap.cwru.edu>
* Richard Underwood <richard@wuzz.demon.co.uk>
*
* Stir fried together from the IP multicast and CAP patches above
* Alan Cox <Alan.Cox@linux.org>
*
* Fixes:
* Alan Cox : Update the device on a real delete
* rather than any time but...
* Alan Cox : IFF_ALLMULTI support.
* Alan Cox : New format set_multicast_list() calls.
* Gleb Natapov : Remove dev_mc_lock.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
 
#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
 
 
/*
* Device multicast list maintenance.
*
* This is used both by IP and by the user level maintenance functions.
* Unlike BSD we maintain a usage count on a given multicast address so
* that a casual user application can add/delete multicasts used by
* protocols without doing damage to the protocols when it deletes the
* entries. It also helps IP as it tracks overlapping maps.
*
* Device mc lists are changed by bh at least if IPv6 is enabled,
* so that it must be bh protected.
*
* We block accesses to device mc filters with dev->xmit_lock.
*/
 
/*
* Update the multicast list into the physical NIC controller.
*/
static void __dev_mc_upload(struct net_device *dev)
{
/* Don't do anything till we up the interface
* [dev_open will call this function so the list will
* stay sane]
*/
 
if (!(dev->flags&IFF_UP))
return;
 
/*
* Devices with no set multicast or which have been
* detached don't get set.
*/
 
if (dev->set_multicast_list == NULL ||
!netif_device_present(dev))
return;
 
dev->set_multicast_list(dev);
}
 
void dev_mc_upload(struct net_device *dev)
{
spin_lock_bh(&dev->xmit_lock);
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
}
 
/*
* Delete a device level multicast
*/
int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, **dmip;
 
spin_lock_bh(&dev->xmit_lock);
 
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
/*
* Find the entry we want to delete. The device could
* have variable length entries so check these too.
*/
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
alen == dmi->dmi_addrlen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 0;
if (old_glbl == 0)
break;
}
if (--dmi->dmi_users)
goto done;
 
/*
* Last user. So delete the entry.
*/
*dmip = dmi->next;
dev->mc_count--;
 
kfree(dmi);
 
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
return 0;
}
}
err = -ENOENT;
done:
spin_unlock_bh(&dev->xmit_lock);
return err;
}
 
/*
* Add a device level multicast
*/
int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, *dmi1;
 
dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
 
spin_lock_bh(&dev->xmit_lock);
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
dmi->dmi_addrlen == alen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 1;
if (old_glbl)
goto done;
}
dmi->dmi_users++;
goto done;
}
}
 
if ((dmi = dmi1) == NULL) {
spin_unlock_bh(&dev->xmit_lock);
return -ENOMEM;
}
memcpy(dmi->dmi_addr, addr, alen);
dmi->dmi_addrlen = alen;
dmi->next = dev->mc_list;
dmi->dmi_users = 1;
dmi->dmi_gusers = glbl ? 1 : 0;
dev->mc_list = dmi;
dev->mc_count++;
 
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
return 0;
 
done:
spin_unlock_bh(&dev->xmit_lock);
if (dmi1)
kfree(dmi1);
return err;
}
 
/*
* Discard multicast list when a device is downed
*/
 
void dev_mc_discard(struct net_device *dev)
{
spin_lock_bh(&dev->xmit_lock);
while (dev->mc_list != NULL) {
struct dev_mc_list *tmp = dev->mc_list;
dev->mc_list = tmp->next;
if (tmp->dmi_users > tmp->dmi_gusers)
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
kfree(tmp);
}
dev->mc_count = 0;
 
spin_unlock_bh(&dev->xmit_lock);
}
 
#ifdef CONFIG_PROC_FS
static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
int length, int *eof, void *data)
{
off_t pos = 0, begin = 0;
struct dev_mc_list *m;
int len = 0;
struct net_device *dev;
 
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next) {
spin_lock_bh(&dev->xmit_lock);
for (m = dev->mc_list; m; m = m->next) {
int i;
 
len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex,
dev->name, m->dmi_users, m->dmi_gusers);
 
for (i = 0; i < m->dmi_addrlen; i++)
len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
 
len += sprintf(buffer+len, "\n");
 
pos = begin + len;
if (pos < offset) {
len = 0;
begin = pos;
}
if (pos > offset + length) {
spin_unlock_bh(&dev->xmit_lock);
goto done;
}
}
spin_unlock_bh(&dev->xmit_lock);
}
*eof = 1;
 
done:
read_unlock(&dev_base_lock);
*start = buffer + (offset - begin);
len -= (offset - begin);
if (len > length)
len = length;
if (len < 0)
len = 0;
return len;
}
#endif
 
void __init dev_mcast_init(void)
{
#ifdef CONFIG_PROC_FS
create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL);
#endif
}
 
/iovec.c
0,0 → 1,279
/*
* iovec manipulation routines.
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Andrew Lunn : Errors in iovec copying.
* Pedro Roque : Added memcpy_fromiovecend and
* csum_..._fromiovecend.
* Andi Kleen : fixed error handling for 2.1
* Alexey Kuznetsov: 2.1 optimisations
* Andi Kleen : Fix csum*fromiovecend for IPv6.
*/
 
 
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <net/checksum.h>
#include <net/sock.h>
 
/*
* Verify iovec. The caller must ensure that the iovec is big enough
* to hold the message iovec.
*
* Save time not doing verify_area. copy_*_user will make this work
* in any case.
*/
 
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
int size, err, ct;
if(m->msg_namelen)
{
if(mode==VERIFY_READ)
{
err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
if(err<0)
goto out;
}
m->msg_name = address;
} else
m->msg_name = NULL;
 
err = -EFAULT;
size = m->msg_iovlen * sizeof(struct iovec);
if (copy_from_user(iov, m->msg_iov, size))
goto out;
m->msg_iov=iov;
 
for (err = 0, ct = 0; ct < m->msg_iovlen; ct++) {
err += iov[ct].iov_len;
/* Goal is not to verify user data, but to prevent returning
negative value, which is interpreted as errno.
Overflow is still possible, but it is harmless.
*/
if (err < 0)
return -EMSGSIZE;
}
out:
return err;
}
 
/*
* Copy kernel to iovec. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
int err = -EFAULT;
 
while(len>0)
{
if(iov->iov_len)
{
int copy = min_t(unsigned int, iov->iov_len, len);
if (copy_to_user(iov->iov_base, kdata, copy))
goto out;
kdata+=copy;
len-=copy;
iov->iov_len-=copy;
iov->iov_base+=copy;
}
iov++;
}
err = 0;
out:
return err;
}
 
/*
* In kernel copy to iovec. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
void memcpy_tokerneliovec(struct iovec *iov, unsigned char *kdata, int len)
{
while(len>0)
{
if(iov->iov_len)
{
int copy = min_t(unsigned int, iov->iov_len, len);
memcpy(iov->iov_base, kdata, copy);
kdata+=copy;
len-=copy;
iov->iov_len-=copy;
iov->iov_base+=copy;
}
iov++;
}
}
 
 
/*
* Copy iovec to kernel. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
{
int err = -EFAULT;
 
while(len>0)
{
if(iov->iov_len)
{
int copy = min_t(unsigned int, len, iov->iov_len);
if (copy_from_user(kdata, iov->iov_base, copy))
goto out;
len-=copy;
kdata+=copy;
iov->iov_base+=copy;
iov->iov_len-=copy;
}
iov++;
}
err = 0;
out:
return err;
}
 
 
/*
* For use with ip_build_xmit
*/
 
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
int len)
{
int err = -EFAULT;
 
/* Skip over the finished iovecs */
while(offset >= iov->iov_len)
{
offset -= iov->iov_len;
iov++;
}
 
while (len > 0)
{
u8 *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
 
offset = 0;
if (copy_from_user(kdata, base, copy))
goto out;
len -= copy;
kdata += copy;
iov++;
}
err = 0;
out:
return err;
}
 
/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
*
* ip_build_xmit must ensure that when fragmenting only the last
* call to this function will be unaligned also.
*/
 
int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
int offset, unsigned int len, int *csump)
{
int csum = *csump;
int partial_cnt = 0, err = 0;
 
/* Skip over the finished iovecs */
while (offset >= iov->iov_len)
{
offset -= iov->iov_len;
iov++;
}
 
while (len > 0)
{
u8 *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
 
offset = 0;
/* There is a remnant from previous iov. */
if (partial_cnt)
{
int par_len = 4 - partial_cnt;
 
/* iov component is too short ... */
if (par_len > copy) {
if (copy_from_user(kdata, base, copy))
goto out_fault;
kdata += copy;
base += copy;
partial_cnt += copy;
len -= copy;
iov++;
if (len)
continue;
*csump = csum_partial(kdata - partial_cnt,
partial_cnt, csum);
goto out;
}
if (copy_from_user(kdata, base, par_len))
goto out_fault;
csum = csum_partial(kdata - partial_cnt, 4, csum);
kdata += par_len;
base += par_len;
copy -= par_len;
len -= par_len;
partial_cnt = 0;
}
 
if (len > copy)
{
partial_cnt = copy % 4;
if (partial_cnt)
{
copy -= partial_cnt;
if (copy_from_user(kdata + copy, base + copy,
partial_cnt))
goto out_fault;
}
}
 
if (copy) {
csum = csum_and_copy_from_user(base, kdata, copy,
csum, &err);
if (err)
goto out;
}
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
}
*csump = csum;
out:
return err;
 
out_fault:
err = -EFAULT;
goto out;
}
/dv.c
0,0 → 1,559
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Generic frame diversion
*
* Version: @(#)eth.c 0.41 09/09/2000
*
* Authors:
* Benoit LOCHER: initial integration within the kernel with support for ethernet
* Dave Miller: improvement on the code (correctness, performance and source files)
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <net/dst.h>
#include <net/arp.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/checksum.h>
#include <linux/divert.h>
#include <linux/sockios.h>
 
const char sysctl_divert_version[32]="0.46"; /* Current version */
 
int __init dv_init(void)
{
printk(KERN_INFO "NET4: Frame Diverter %s\n", sysctl_divert_version);
return 0;
}
 
/*
* Allocate a divert_blk for a device. This must be an ethernet nic.
*/
int alloc_divert_blk(struct net_device *dev)
{
int alloc_size = (sizeof(struct divert_blk) + 3) & ~3;
 
if (dev->type == ARPHRD_ETHER) {
printk(KERN_DEBUG "divert: allocating divert_blk for %s\n",
dev->name);
 
dev->divert = (struct divert_blk *)
kmalloc(alloc_size, GFP_KERNEL);
if (dev->divert == NULL) {
printk(KERN_DEBUG "divert: unable to allocate divert_blk for %s\n",
dev->name);
return -ENOMEM;
} else {
memset(dev->divert, 0, sizeof(struct divert_blk));
}
dev_hold(dev);
} else {
printk(KERN_DEBUG "divert: not allocating divert_blk for non-ethernet device %s\n",
dev->name);
 
dev->divert = NULL;
}
return 0;
}
 
/*
* Free a divert_blk allocated by the above function, if it was
* allocated on that device.
*/
void free_divert_blk(struct net_device *dev)
{
if (dev->divert) {
kfree(dev->divert);
dev->divert=NULL;
dev_put(dev);
printk(KERN_DEBUG "divert: freeing divert_blk for %s\n",
dev->name);
} else {
printk(KERN_DEBUG "divert: no divert_blk to free, %s not ethernet\n",
dev->name);
}
}
 
/*
* Adds a tcp/udp (source or dest) port to an array
*/
int add_port(u16 ports[], u16 port)
{
int i;
 
if (port == 0)
return -EINVAL;
 
/* Storing directly in network format for performance,
* thanks Dave :)
*/
port = htons(port);
 
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == port)
return -EALREADY;
}
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == 0) {
ports[i] = port;
return 0;
}
}
 
return -ENOBUFS;
}
 
/*
* Removes a port from an array tcp/udp (source or dest)
*/
int remove_port(u16 ports[], u16 port)
{
int i;
 
if (port == 0)
return -EINVAL;
/* Storing directly in network format for performance,
* thanks Dave !
*/
port = htons(port);
 
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == port) {
ports[i] = 0;
return 0;
}
}
 
return -EINVAL;
}
 
/* Some basic sanity checks on the arguments passed to divert_ioctl() */
int check_args(struct divert_cf *div_cf, struct net_device **dev)
{
char devname[32];
int ret;
 
if (dev == NULL)
return -EFAULT;
/* GETVERSION: all other args are unused */
if (div_cf->cmd == DIVCMD_GETVERSION)
return 0;
/* Network device index should reasonably be between 0 and 1000 :) */
if (div_cf->dev_index < 0 || div_cf->dev_index > 1000)
return -EINVAL;
/* Let's try to find the ifname */
sprintf(devname, "eth%d", div_cf->dev_index);
*dev = dev_get_by_name(devname);
/* dev should NOT be null */
if (*dev == NULL)
return -EINVAL;
 
ret = 0;
 
/* user issuing the ioctl must be a super one :) */
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out;
}
 
/* Device must have a divert_blk member NOT null */
if ((*dev)->divert == NULL)
ret = -EINVAL;
out:
dev_put(*dev);
return ret;
}
 
/*
* control function of the diverter
*/
#define DVDBG(a) \
printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a))
 
int divert_ioctl(unsigned int cmd, struct divert_cf *arg)
{
struct divert_cf div_cf;
struct divert_blk *div_blk;
struct net_device *dev;
int ret;
 
switch (cmd) {
case SIOCGIFDIVERT:
DVDBG("SIOCGIFDIVERT, copy_from_user");
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
return -EFAULT;
DVDBG("before check_args");
ret = check_args(&div_cf, &dev);
if (ret)
return ret;
DVDBG("after checkargs");
div_blk = dev->divert;
DVDBG("befre switch()");
switch (div_cf.cmd) {
case DIVCMD_GETSTATUS:
/* Now, just give the user the raw divert block
* for him to play with :)
*/
if (copy_to_user(div_cf.arg1.ptr, dev->divert,
sizeof(struct divert_blk)))
return -EFAULT;
break;
 
case DIVCMD_GETVERSION:
DVDBG("GETVERSION: checking ptr");
if (div_cf.arg1.ptr == NULL)
return -EINVAL;
DVDBG("GETVERSION: copying data to userland");
if (copy_to_user(div_cf.arg1.ptr,
sysctl_divert_version, 32))
return -EFAULT;
DVDBG("GETVERSION: data copied");
break;
 
default:
return -EINVAL;
};
 
break;
 
case SIOCSIFDIVERT:
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
return -EFAULT;
 
ret = check_args(&div_cf, &dev);
if (ret)
return ret;
 
div_blk = dev->divert;
 
switch(div_cf.cmd) {
case DIVCMD_RESET:
div_blk->divert = 0;
div_blk->protos = DIVERT_PROTO_NONE;
memset(div_blk->tcp_dst, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->tcp_src, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->udp_dst, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->udp_src, 0,
MAX_DIVERT_PORTS * sizeof(u16));
return 0;
case DIVCMD_DIVERT:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->divert)
return -EALREADY;
div_blk->divert = 1;
break;
 
case DIVARG1_DISABLE:
if (!div_blk->divert)
return -EALREADY;
div_blk->divert = 0;
break;
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_IP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_IP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_IP;
break;
 
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_IP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_IP;
break;
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_TCP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_TCP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_TCP;
break;
 
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_TCP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_TCP;
break;
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_TCPDST:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->tcp_dst,
div_cf.arg2.uint16);
case DIVARG1_REMOVE:
return remove_port(div_blk->tcp_dst,
div_cf.arg2.uint16);
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_TCPSRC:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->tcp_src,
div_cf.arg2.uint16);
 
case DIVARG1_REMOVE:
return remove_port(div_blk->tcp_src,
div_cf.arg2.uint16);
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_UDP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_UDP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_UDP;
break;
 
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_UDP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_UDP;
break;
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_UDPDST:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->udp_dst,
div_cf.arg2.uint16);
 
case DIVARG1_REMOVE:
return remove_port(div_blk->udp_dst,
div_cf.arg2.uint16);
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_UDPSRC:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->udp_src,
div_cf.arg2.uint16);
 
case DIVARG1_REMOVE:
return remove_port(div_blk->udp_src,
div_cf.arg2.uint16);
 
default:
return -EINVAL;
};
 
break;
 
case DIVCMD_ICMP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_ICMP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_ICMP;
break;
 
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_ICMP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_ICMP;
break;
 
default:
return -EINVAL;
};
 
break;
 
default:
return -EINVAL;
};
 
break;
 
default:
return -EINVAL;
};
 
return 0;
}
 
 
/*
* Check if packet should have its dest mac address set to the box itself
* for diversion
*/
 
#define ETH_DIVERT_FRAME(skb) \
memcpy(skb->mac.ethernet, skb->dev->dev_addr, ETH_ALEN); \
skb->pkt_type=PACKET_HOST
void divert_frame(struct sk_buff *skb)
{
struct ethhdr *eth = skb->mac.ethernet;
struct iphdr *iph;
struct tcphdr *tcph;
struct udphdr *udph;
struct divert_blk *divert = skb->dev->divert;
int i, src, dst;
unsigned char *skb_data_end = skb->data + skb->len;
 
/* Packet is already aimed at us, return */
if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN))
return;
/* proto is not IP, do nothing */
if (eth->h_proto != htons(ETH_P_IP))
return;
/* Divert all IP frames ? */
if (divert->protos & DIVERT_PROTO_IP) {
ETH_DIVERT_FRAME(skb);
return;
}
/* Check for possible (maliciously) malformed IP frame (thanks Dave) */
iph = (struct iphdr *) skb->data;
if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) {
printk(KERN_INFO "divert: malformed IP packet !\n");
return;
}
 
switch (iph->protocol) {
/* Divert all ICMP frames ? */
case IPPROTO_ICMP:
if (divert->protos & DIVERT_PROTO_ICMP) {
ETH_DIVERT_FRAME(skb);
return;
}
break;
 
/* Divert all TCP frames ? */
case IPPROTO_TCP:
if (divert->protos & DIVERT_PROTO_TCP) {
ETH_DIVERT_FRAME(skb);
return;
}
 
/* Check for possible (maliciously) malformed IP
* frame (thanx Dave)
*/
tcph = (struct tcphdr *)
(((unsigned char *)iph) + (iph->ihl<<2));
if (((unsigned char *)(tcph+1)) >= skb_data_end) {
printk(KERN_INFO "divert: malformed TCP packet !\n");
return;
}
 
/* Divert some tcp dst/src ports only ?*/
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
dst = divert->tcp_dst[i];
src = divert->tcp_src[i];
if ((dst && dst == tcph->dest) ||
(src && src == tcph->source)) {
ETH_DIVERT_FRAME(skb);
return;
}
}
break;
 
/* Divert all UDP frames ? */
case IPPROTO_UDP:
if (divert->protos & DIVERT_PROTO_UDP) {
ETH_DIVERT_FRAME(skb);
return;
}
 
/* Check for possible (maliciously) malformed IP
* packet (thanks Dave)
*/
udph = (struct udphdr *)
(((unsigned char *)iph) + (iph->ihl<<2));
if (((unsigned char *)(udph+1)) >= skb_data_end) {
printk(KERN_INFO
"divert: malformed UDP packet !\n");
return;
}
 
/* Divert some udp dst/src ports only ? */
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
dst = divert->udp_dst[i];
src = divert->udp_src[i];
if ((dst && dst == udph->dest) ||
(src && src == udph->source)) {
ETH_DIVERT_FRAME(skb);
return;
}
}
break;
};
 
return;
}
 
/rtnetlink.c
0,0 → 1,530
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Routing netlink socket interface: protocol independent part.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
 
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
 
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/string.h>
 
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
 
DECLARE_MUTEX(rtnl_sem);
 
void rtnl_lock(void)
{
rtnl_shlock();
rtnl_exlock();
}
void rtnl_unlock(void)
{
rtnl_exunlock();
rtnl_shunlock();
}
 
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr*)*maxattr);
 
while (RTA_OK(rta, len)) {
unsigned flavor = rta->rta_type;
if (flavor && flavor <= maxattr)
tb[flavor-1] = rta;
rta = RTA_NEXT(rta, len);
}
return 0;
}
 
struct sock *rtnl;
 
struct rtnetlink_link * rtnetlink_links[NPROTO];
 
static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
{
NLMSG_LENGTH(sizeof(struct ifinfomsg)),
NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
NLMSG_LENGTH(sizeof(struct rtmsg)),
NLMSG_LENGTH(sizeof(struct ndmsg)),
NLMSG_LENGTH(sizeof(struct rtmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg))
};
 
static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
{
IFLA_MAX,
IFA_MAX,
RTA_MAX,
NDA_MAX,
RTA_MAX,
TCA_MAX,
TCA_MAX,
TCA_MAX
};
 
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
struct rtattr *rta;
int size = RTA_LENGTH(attrlen);
 
rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
rta->rta_type = attrtype;
rta->rta_len = size;
memcpy(RTA_DATA(rta), data, attrlen);
}
 
int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
 
NETLINK_CB(skb).dst_groups = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
return err;
}
 
int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
{
struct rtattr *mx = (struct rtattr*)skb->tail;
int i;
 
RTA_PUT(skb, RTA_METRICS, 0, NULL);
for (i=0; i<RTAX_MAX; i++) {
if (metrics[i])
RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
}
mx->rta_len = skb->tail - (u8*)mx;
if (mx->rta_len == RTA_LENGTH(0))
skb_trim(skb, (u8*)mx - skb->data);
return 0;
 
rtattr_failure:
skb_trim(skb, (u8*)mx - skb->data);
return -1;
}
 
 
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
 
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev->flags;
r->ifi_change = change;
 
if (!netif_running(dev) || !netif_carrier_ok(dev))
r->ifi_flags &= ~IFF_RUNNING;
else
r->ifi_flags |= IFF_RUNNING;
 
RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
if (dev->addr_len) {
RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
if (1) {
unsigned mtu = dev->mtu;
RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
}
if (dev->ifindex != dev->iflink)
RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
if (dev->qdisc_sleeping)
RTA_PUT(skb, IFLA_QDISC,
strlen(dev->qdisc_sleeping->ops->id) + 1,
dev->qdisc_sleeping->ops->id);
if (dev->master)
RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex);
if (dev->get_stats) {
struct net_device_stats *stats = dev->get_stats(dev);
if (stats)
RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
}
nlh->nlmsg_len = skb->tail - b;
return skb->len;
 
nlmsg_failure:
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
 
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
 
read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
break;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
 
return skb->len;
}
 
int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
 
if (s_idx == 0)
s_idx = 1;
for (idx=1; idx<NPROTO; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
if (rtnetlink_links[idx] == NULL ||
rtnetlink_links[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
if (rtnetlink_links[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
 
return skb->len;
}
 
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
 
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
 
if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
}
 
static int rtnetlink_done(struct netlink_callback *cb)
{
return 0;
}
 
/* Process one rtnetlink message. */
 
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
struct rtattr *rta[RTATTR_MAX];
 
int exclusive = 0;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
 
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
 
type = nlh->nlmsg_type;
 
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
 
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
 
type -= RTM_BASE;
 
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
 
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family > NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
 
link_tab = rtnetlink_links[family];
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type];
 
sz_idx = type>>2;
kind = type&3;
 
if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
*errp = -EPERM;
return -1;
}
 
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
u32 rlen;
 
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
 
if (link->dumpit == NULL)
goto err_inval;
 
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit,
rtnetlink_done)) != 0) {
return -1;
}
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
skb_pull(skb, rlen);
return -1;
}
 
if (kind != 2) {
if (rtnl_exlock_nowait()) {
*errp = 0;
return -1;
}
exclusive = 1;
}
 
memset(&rta, 0, sizeof(rta));
 
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
 
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
 
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
 
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta);
 
if (exclusive)
rtnl_exunlock();
*errp = err;
return err;
 
err_inval:
if (exclusive)
rtnl_exunlock();
*errp = -EINVAL;
return -1;
}
 
/*
* Process one packet of messages.
* Malformed skbs with wrong lengths of messages are discarded silently.
*/
 
static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr * nlh;
 
while (skb->len >= NLMSG_SPACE(0)) {
u32 rlen;
 
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return 0;
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
if (rtnetlink_rcv_msg(skb, nlh, &err)) {
/* Not error, but we must interrupt processing here:
* Note, that in this case we do not pull message
* from skb, it will be processed later.
*/
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags&NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
}
 
return 0;
}
 
/*
* rtnetlink input queue processing routine:
* - try to acquire shared lock. If it is failed, defer processing.
* - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
* that will occur, when a dump started and/or acquisition of
* exclusive lock failed.
*/
 
static void rtnetlink_rcv(struct sock *sk, int len)
{
do {
struct sk_buff *skb;
 
if (rtnl_shlock_nowait())
return;
 
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
if (rtnetlink_rcv_skb(skb)) {
if (skb->len)
skb_queue_head(&sk->receive_queue, skb);
else
kfree_skb(skb);
break;
}
kfree_skb(skb);
}
 
up(&rtnl_sem);
} while (rtnl && rtnl->receive_queue.qlen);
}
 
static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
{
{ NULL, NULL, },
{ NULL, NULL, },
{ NULL, rtnetlink_dump_ifinfo, },
{ NULL, NULL, },
 
{ NULL, NULL, },
{ NULL, NULL, },
{ NULL, rtnetlink_dump_all, },
{ NULL, NULL, },
 
{ NULL, NULL, },
{ NULL, NULL, },
{ NULL, rtnetlink_dump_all, },
{ NULL, NULL, },
 
{ neigh_add, NULL, },
{ neigh_delete, NULL, },
{ NULL, neigh_dump_info, },
{ NULL, NULL, },
 
{ NULL, NULL, },
{ NULL, NULL, },
{ NULL, NULL, },
{ NULL, NULL, },
};
 
 
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
break;
case NETDEV_REGISTER:
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
break;
case NETDEV_UP:
case NETDEV_DOWN:
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
break;
case NETDEV_CHANGE:
case NETDEV_GOING_DOWN:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
break;
}
return NOTIFY_DONE;
}
 
struct notifier_block rtnetlink_dev_notifier = {
rtnetlink_event,
NULL,
0
};
 
 
void __init rtnetlink_init(void)
{
#ifdef RTNL_DEBUG
printk("Initializing RT netlink socket\n");
#endif
rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}
/ethtool.c
0,0 → 1,694
/*
* net/core/ethtool.c - Ethtool ioctl handler
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
*
* This file is where we call all the ethtool_ops commands to get
* the information ethtool needs. We fall back to calling do_ioctl()
* for drivers which haven't been converted to ethtool_ops yet.
*
* It's GPL, stupid.
*/
 
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <asm/uaccess.h>
 
/*
* Some useful ethtool_ops methods that're device independent.
* If we find that all drivers want to do the same thing here,
* we can turn these into dev_() function calls.
*/
 
u32 ethtool_op_get_link(struct net_device *dev)
{
return netif_carrier_ok(dev) ? 1 : 0;
}
 
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
return (dev->features & NETIF_F_IP_CSUM) != 0;
}
 
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_IP_CSUM;
else
dev->features &= ~NETIF_F_IP_CSUM;
 
return 0;
}
 
u32 ethtool_op_get_sg(struct net_device *dev)
{
return (dev->features & NETIF_F_SG) != 0;
}
 
int ethtool_op_set_sg(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_SG;
else
dev->features &= ~NETIF_F_SG;
 
return 0;
}
 
/* Handlers for each ethtool command */
 
static int ethtool_get_settings(struct net_device *dev, void *useraddr)
{
struct ethtool_cmd cmd = { ETHTOOL_GSET };
int err;
 
if (!dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
 
err = dev->ethtool_ops->get_settings(dev, &cmd);
if (err < 0)
return err;
 
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_settings(struct net_device *dev, void *useraddr)
{
struct ethtool_cmd cmd;
 
if (!dev->ethtool_ops->set_settings)
return -EOPNOTSUPP;
 
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
 
return dev->ethtool_ops->set_settings(dev, &cmd);
}
 
static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
{
struct ethtool_drvinfo info;
struct ethtool_ops *ops = dev->ethtool_ops;
 
if (!ops->get_drvinfo)
return -EOPNOTSUPP;
 
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
 
if (ops->self_test_count)
info.testinfo_len = ops->self_test_count(dev);
if (ops->get_stats_count)
info.n_stats = ops->get_stats_count(dev);
if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
 
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
return 0;
}
 
static int ethtool_get_regs(struct net_device *dev, char *useraddr)
{
struct ethtool_regs regs;
struct ethtool_ops *ops = dev->ethtool_ops;
void *regbuf;
int reglen, ret;
 
if (!ops->get_regs || !ops->get_regs_len)
return -EOPNOTSUPP;
 
if (copy_from_user(&regs, useraddr, sizeof(regs)))
return -EFAULT;
 
reglen = ops->get_regs_len(dev);
if (regs.len > reglen)
regs.len = reglen;
 
regbuf = kmalloc(reglen, GFP_USER);
if (!regbuf)
return -ENOMEM;
 
ops->get_regs(dev, &regs, regbuf);
 
ret = -EFAULT;
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
if (copy_to_user(useraddr, regbuf, reglen))
goto out;
ret = 0;
 
out:
kfree(regbuf);
return ret;
}
 
static int ethtool_get_wol(struct net_device *dev, char *useraddr)
{
struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
 
if (!dev->ethtool_ops->get_wol)
return -EOPNOTSUPP;
 
dev->ethtool_ops->get_wol(dev, &wol);
 
if (copy_to_user(useraddr, &wol, sizeof(wol)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_wol(struct net_device *dev, char *useraddr)
{
struct ethtool_wolinfo wol;
 
if (!dev->ethtool_ops->set_wol)
return -EOPNOTSUPP;
 
if (copy_from_user(&wol, useraddr, sizeof(wol)))
return -EFAULT;
 
return dev->ethtool_ops->set_wol(dev, &wol);
}
 
static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GMSGLVL };
 
if (!dev->ethtool_ops->get_msglevel)
return -EOPNOTSUPP;
 
edata.data = dev->ethtool_ops->get_msglevel(dev);
 
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata;
 
if (!dev->ethtool_ops->set_msglevel)
return -EOPNOTSUPP;
 
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
 
dev->ethtool_ops->set_msglevel(dev, edata.data);
return 0;
}
 
static int ethtool_nway_reset(struct net_device *dev)
{
if (!dev->ethtool_ops->nway_reset)
return -EOPNOTSUPP;
 
return dev->ethtool_ops->nway_reset(dev);
}
 
static int ethtool_get_link(struct net_device *dev, void *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GLINK };
 
if (!dev->ethtool_ops->get_link)
return -EOPNOTSUPP;
 
edata.data = dev->ethtool_ops->get_link(dev);
 
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
 
static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
{
struct ethtool_eeprom eeprom;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
 
if (!ops->get_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
 
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
 
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
 
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
 
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
 
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
 
ret = ops->get_eeprom(dev, &eeprom, data);
if (ret)
goto out;
 
ret = -EFAULT;
if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
goto out;
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
goto out;
ret = 0;
 
out:
kfree(data);
return ret;
}
 
static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
{
struct ethtool_eeprom eeprom;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
 
if (!ops->set_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
 
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
 
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
 
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
 
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
 
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
 
ret = ops->set_eeprom(dev, &eeprom, data);
if (ret)
goto out;
 
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
ret = -EFAULT;
 
out:
kfree(data);
return ret;
}
 
static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
{
struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
 
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
 
dev->ethtool_ops->get_coalesce(dev, &coalesce);
 
if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
{
struct ethtool_coalesce coalesce;
 
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
 
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
return -EFAULT;
 
return dev->ethtool_ops->set_coalesce(dev, &coalesce);
}
 
static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
{
struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
 
if (!dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
 
dev->ethtool_ops->get_ringparam(dev, &ringparam);
 
if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
{
struct ethtool_ringparam ringparam;
 
if (!dev->ethtool_ops->set_ringparam)
return -EOPNOTSUPP;
 
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
 
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
 
static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
{
struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
 
if (!dev->ethtool_ops->get_pauseparam)
return -EOPNOTSUPP;
 
dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
 
if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
{
struct ethtool_pauseparam pauseparam;
 
if (!dev->ethtool_ops->get_pauseparam)
return -EOPNOTSUPP;
 
if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
return -EFAULT;
 
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
 
static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GRXCSUM };
 
if (!dev->ethtool_ops->get_rx_csum)
return -EOPNOTSUPP;
 
edata.data = dev->ethtool_ops->get_rx_csum(dev);
 
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata;
 
if (!dev->ethtool_ops->set_rx_csum)
return -EOPNOTSUPP;
 
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
 
dev->ethtool_ops->set_rx_csum(dev, edata.data);
return 0;
}
 
static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GTXCSUM };
 
if (!dev->ethtool_ops->get_tx_csum)
return -EOPNOTSUPP;
 
edata.data = dev->ethtool_ops->get_tx_csum(dev);
 
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata;
 
if (!dev->ethtool_ops->set_tx_csum)
return -EOPNOTSUPP;
 
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
 
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
 
static int ethtool_get_sg(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GSG };
 
if (!dev->ethtool_ops->get_sg)
return -EOPNOTSUPP;
 
edata.data = dev->ethtool_ops->get_sg(dev);
 
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
 
static int ethtool_set_sg(struct net_device *dev, char *useraddr)
{
struct ethtool_value edata;
 
if (!dev->ethtool_ops->set_sg)
return -EOPNOTSUPP;
 
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
 
return dev->ethtool_ops->set_sg(dev, edata.data);
}
 
static int ethtool_self_test(struct net_device *dev, char *useraddr)
{
struct ethtool_test test;
struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
 
if (!ops->self_test || !ops->self_test_count)
return -EOPNOTSUPP;
 
if (copy_from_user(&test, useraddr, sizeof(test)))
return -EFAULT;
 
test.len = ops->self_test_count(dev);
data = kmalloc(test.len * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
 
ops->self_test(dev, &test, data);
 
ret = -EFAULT;
if (copy_to_user(useraddr, &test, sizeof(test)))
goto out;
useraddr += sizeof(test);
if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
goto out;
ret = 0;
 
out:
kfree(data);
return ret;
}
 
static int ethtool_get_strings(struct net_device *dev, void *useraddr)
{
struct ethtool_gstrings gstrings;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
 
if (!ops->get_strings)
return -EOPNOTSUPP;
 
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
 
switch (gstrings.string_set) {
case ETH_SS_TEST:
if (!ops->self_test_count)
return -EOPNOTSUPP;
gstrings.len = ops->self_test_count(dev);
break;
case ETH_SS_STATS:
if (!ops->get_stats_count)
return -EOPNOTSUPP;
gstrings.len = ops->get_stats_count(dev);
break;
default:
return -EINVAL;
}
 
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
 
ops->get_strings(dev, gstrings.string_set, data);
 
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
goto out;
useraddr += sizeof(gstrings);
if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
goto out;
ret = 0;
 
out:
kfree(data);
return ret;
}
 
static int ethtool_phys_id(struct net_device *dev, void *useraddr)
{
struct ethtool_value id;
 
if (!dev->ethtool_ops->phys_id)
return -EOPNOTSUPP;
 
if (copy_from_user(&id, useraddr, sizeof(id)))
return -EFAULT;
 
return dev->ethtool_ops->phys_id(dev, id.data);
}
 
static int ethtool_get_stats(struct net_device *dev, void *useraddr)
{
struct ethtool_stats stats;
struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
 
if (!ops->get_ethtool_stats || !ops->get_stats_count)
return -EOPNOTSUPP;
 
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
 
stats.n_stats = ops->get_stats_count(dev);
data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
 
ops->get_ethtool_stats(dev, &stats, data);
 
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
goto out;
ret = 0;
 
out:
kfree(data);
return ret;
}
 
/* The main entry point in this file. Called from net/core/dev.c */
 
int dev_ethtool(struct ifreq *ifr)
{
struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
void *useraddr = (void *) ifr->ifr_data;
u32 ethcmd;
 
/*
* XXX: This can be pushed down into the ethtool_* handlers that
* need it. Keep existing behaviour for the moment.
*/
if (!capable(CAP_NET_ADMIN))
return -EPERM;
 
if (!dev || !netif_device_present(dev))
return -ENODEV;
 
if (!dev->ethtool_ops)
goto ioctl;
 
if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
return -EFAULT;
 
switch (ethcmd) {
case ETHTOOL_GSET:
return ethtool_get_settings(dev, useraddr);
case ETHTOOL_SSET:
return ethtool_set_settings(dev, useraddr);
case ETHTOOL_GDRVINFO:
return ethtool_get_drvinfo(dev, useraddr);
case ETHTOOL_GREGS:
return ethtool_get_regs(dev, useraddr);
case ETHTOOL_GWOL:
return ethtool_get_wol(dev, useraddr);
case ETHTOOL_SWOL:
return ethtool_set_wol(dev, useraddr);
case ETHTOOL_GMSGLVL:
return ethtool_get_msglevel(dev, useraddr);
case ETHTOOL_SMSGLVL:
return ethtool_set_msglevel(dev, useraddr);
case ETHTOOL_NWAY_RST:
return ethtool_nway_reset(dev);
case ETHTOOL_GLINK:
return ethtool_get_link(dev, useraddr);
case ETHTOOL_GEEPROM:
return ethtool_get_eeprom(dev, useraddr);
case ETHTOOL_SEEPROM:
return ethtool_set_eeprom(dev, useraddr);
case ETHTOOL_GCOALESCE:
return ethtool_get_coalesce(dev, useraddr);
case ETHTOOL_SCOALESCE:
return ethtool_set_coalesce(dev, useraddr);
case ETHTOOL_GRINGPARAM:
return ethtool_get_ringparam(dev, useraddr);
case ETHTOOL_SRINGPARAM:
return ethtool_set_ringparam(dev, useraddr);
case ETHTOOL_GPAUSEPARAM:
return ethtool_get_pauseparam(dev, useraddr);
case ETHTOOL_SPAUSEPARAM:
return ethtool_set_pauseparam(dev, useraddr);
case ETHTOOL_GRXCSUM:
return ethtool_get_rx_csum(dev, useraddr);
case ETHTOOL_SRXCSUM:
return ethtool_set_rx_csum(dev, useraddr);
case ETHTOOL_GTXCSUM:
return ethtool_get_tx_csum(dev, useraddr);
case ETHTOOL_STXCSUM:
return ethtool_set_tx_csum(dev, useraddr);
case ETHTOOL_GSG:
return ethtool_get_sg(dev, useraddr);
case ETHTOOL_SSG:
return ethtool_set_sg(dev, useraddr);
case ETHTOOL_TEST:
return ethtool_self_test(dev, useraddr);
case ETHTOOL_GSTRINGS:
return ethtool_get_strings(dev, useraddr);
case ETHTOOL_PHYS_ID:
return ethtool_phys_id(dev, useraddr);
case ETHTOOL_GSTATS:
return ethtool_get_stats(dev, useraddr);
default:
return -EOPNOTSUPP;
}
 
ioctl:
if (dev->do_ioctl)
return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
return -EOPNOTSUPP;
}
/dev.c
0,0 → 1,2910
/*
* NET3 Protocol independent device support routines.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Derived from the non IP parts of dev.c 1.0.19
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
*
* Additional Authors:
* Florian la Roche <rzsfl@rz.uni-sb.de>
* Alan Cox <gw4pts@gw4pts.ampr.org>
* David Hinds <dahinds@users.sourceforge.net>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
* Adam Sulmicki <adam@cfar.umd.edu>
* Pekka Riikonen <priikone@poesidon.pspt.fi>
*
* Changes:
* D.J. Barrow : Fixed bug where dev->refcnt gets set to 2
* if register_netdev gets called before
* net_dev_init & also removed a few lines
* of code in the process.
* Alan Cox : device private ioctl copies fields back.
* Alan Cox : Transmit queue code does relevant stunts to
* keep the queue safe.
* Alan Cox : Fixed double lock.
* Alan Cox : Fixed promisc NULL pointer trap
* ???????? : Support the full private ioctl range
* Alan Cox : Moved ioctl permission check into drivers
* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
* Alan Cox : 100 backlog just doesn't cut it when
* you start doing multicast video 8)
* Alan Cox : Rewrote net_bh and list manager.
* Alan Cox : Fix ETH_P_ALL echoback lengths.
* Alan Cox : Took out transmit every packet pass
* Saved a few bytes in the ioctl handler
* Alan Cox : Network driver sets packet type before calling netif_rx. Saves
* a function call a packet.
* Alan Cox : Hashed net_bh()
* Richard Kooijman: Timestamp fixes.
* Alan Cox : Wrong field in SIOCGIFDSTADDR
* Alan Cox : Device lock protection.
* Alan Cox : Fixed nasty side effect of device close changes.
* Rudi Cilibrasi : Pass the right thing to set_mac_address()
* Dave Miller : 32bit quantity for the device lock to make it work out
* on a Sparc.
* Bjorn Ekwall : Added KERNELD hack.
* Alan Cox : Cleaned up the backlog initialise.
* Craig Metz : SIOCGIFCONF fix if space for under
* 1 device.
* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
* is no device open function.
* Andi Kleen : Fix error reporting for SIOCGIFCONF
* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
* Cyrus Durgin : Cleaned for KMOD
* Adam Sulmicki : Bug Fix : Network Device Unload
* A network device unload needs to purge
* the backlog queue.
* Paul Rusty Russell : SIOCSIFNAME
* Pekka Riikonen : Netdev boot-time settings code
* Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
* J Hadi Salim : - Backlog queue sampling
* - netif_rx() feedback
*/
 
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/brlock.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/if_bridge.h>
#include <linux/divert.h>
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/profile.h>
#include <net/checksum.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/module.h>
#if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
#include <net/iw_handler.h>
#endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
#ifdef CONFIG_PLIP
extern int plip_init(void);
#endif
 
 
/* This define, if set, will randomly drop a packet when congestion
* is more than moderate. It helps fairness in the multi-interface
* case when one of them is a hog, but it kills performance for the
* single interface case so it is off now by default.
*/
#undef RAND_LIE
 
/* Setting this will sample the queue lengths and thus congestion
* via a timer instead of as each packet is received.
*/
#undef OFFLINE_SAMPLE
 
NET_PROFILE_DEFINE(dev_queue_xmit)
NET_PROFILE_DEFINE(softnet_process)
 
const char *if_port_text[] = {
"unknown",
"BNC",
"10baseT",
"AUI",
"100baseT",
"100baseTX",
"100baseFX"
};
 
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
*
* Why 16. Because with 16 the only overlap we get on a hash of the
* low nibble of the protocol value is RARP/SNAP/X.25.
*
* NOTE: That is no longer true with the addition of VLAN tags. Not
* sure which should go first, but I bet it won't make much
* difference if we are running VLANs. The good news is that
* this protocol won't be in the list unless compiled in, so
* the average user (w/out VLANs) will not be adversly affected.
* --BLG
*
* 0800 IP
* 8100 802.1Q VLAN
* 0001 802.3
* 0002 AX.25
* 0004 802.2
* 8035 RARP
* 0005 SNAP
* 0805 X.25
* 0806 ARP
* 8137 IPX
* 0009 Localtalk
* 86DD IPv6
*/
 
static struct packet_type *ptype_base[16]; /* 16 way hashed list */
static struct packet_type *ptype_all = NULL; /* Taps */
 
#ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy);
static struct timer_list samp_timer = { function: sample_queue };
#endif
 
#ifdef CONFIG_HOTPLUG
static int net_run_sbin_hotplug(struct net_device *dev, char *action);
#else
#define net_run_sbin_hotplug(dev, action) ({ 0; })
#endif
 
/*
* Our notifier list
*/
static struct notifier_block *netdev_chain=NULL;
 
/*
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
 
#ifdef CONFIG_NET_FASTROUTE
int netdev_fastroute;
int netdev_fastroute_obstacles;
#endif
 
 
/******************************************************************************************
 
Protocol management and registration routines
 
*******************************************************************************************/
 
/*
* For efficiency
*/
 
int netdev_nit=0;
 
/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
*
* BEWARE!!! Protocol handlers, mangling input packets,
* MUST BE last in hash buckets and checking protocol handlers
* MUST start from promiscous ptype_all chain in net_bh.
* It is true now, do not change it.
* Explantion follows: if protocol handler, mangling packet, will
* be the first on list, it is not able to sense, that packet
* is cloned and should be copied-on-write, so that it will
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
 
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
*
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*/
void dev_add_pack(struct packet_type *pt)
{
int hash;
 
br_write_lock_bh(BR_NETPROTO_LOCK);
 
#ifdef CONFIG_NET_FASTROUTE
/* Hack to detect packet socket */
if ((pt->data) && ((int)(pt->data)!=1)) {
netdev_fastroute_obstacles++;
dev_clear_fastroute(pt->dev);
}
#endif
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++;
pt->next=ptype_all;
ptype_all=pt;
} else {
hash=ntohs(pt->type)&15;
pt->next = ptype_base[hash];
ptype_base[hash] = pt;
}
br_write_unlock_bh(BR_NETPROTO_LOCK);
}
 
 
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*/
void dev_remove_pack(struct packet_type *pt)
{
struct packet_type **pt1;
 
br_write_lock_bh(BR_NETPROTO_LOCK);
 
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--;
pt1=&ptype_all;
} else {
pt1=&ptype_base[ntohs(pt->type)&15];
}
 
for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
if (pt == (*pt1)) {
*pt1 = pt->next;
#ifdef CONFIG_NET_FASTROUTE
if (pt->data)
netdev_fastroute_obstacles--;
#endif
br_write_unlock_bh(BR_NETPROTO_LOCK);
return;
}
}
br_write_unlock_bh(BR_NETPROTO_LOCK);
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
}
 
/******************************************************************************
 
Device Boot-time Settings Routines
 
*******************************************************************************/
 
/* Boot time configuration table */
static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 
/**
* netdev_boot_setup_add - add new setup entry
* @name: name of the device
* @map: configured settings for the device
*
* Adds new setup entry to the dev_boot_setup list. The function
* returns 0 on error and 1 on success. This is a generic routine to
* all netdevices.
*/
int netdev_boot_setup_add(char *name, struct ifmap *map)
{
struct netdev_boot_setup *s;
int i;
 
s = dev_boot_setup;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
memset(s[i].name, 0, sizeof(s[i].name));
strcpy(s[i].name, name);
memcpy(&s[i].map, map, sizeof(s[i].map));
break;
}
}
 
if (i >= NETDEV_BOOT_SETUP_MAX)
return 0;
 
return 1;
}
 
/**
* netdev_boot_setup_check - check boot time settings
* @dev: the netdevice
*
* Check boot time settings for the device.
* The found settings are set for the device to be used
* later in the device probing.
* Returns 0 if no settings found, 1 if they are.
*/
int netdev_boot_setup_check(struct net_device *dev)
{
struct netdev_boot_setup *s;
int i;
 
s = dev_boot_setup;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
!strncmp(dev->name, s[i].name, strlen(s[i].name))) {
dev->irq = s[i].map.irq;
dev->base_addr = s[i].map.base_addr;
dev->mem_start = s[i].map.mem_start;
dev->mem_end = s[i].map.mem_end;
return 1;
}
}
return 0;
}
 
/*
* Saves at boot time configured settings for any netdevice.
*/
int __init netdev_boot_setup(char *str)
{
int ints[5];
struct ifmap map;
 
str = get_options(str, ARRAY_SIZE(ints), ints);
if (!str || !*str)
return 0;
 
/* Save settings */
memset(&map, 0, sizeof(map));
if (ints[0] > 0)
map.irq = ints[1];
if (ints[0] > 1)
map.base_addr = ints[2];
if (ints[0] > 2)
map.mem_start = ints[3];
if (ints[0] > 3)
map.mem_end = ints[4];
 
/* Add new entry to the list */
return netdev_boot_setup_add(str, &map);
}
 
__setup("netdev=", netdev_boot_setup);
 
/*****************************************************************************************
 
Device Interface Subroutines
 
******************************************************************************************/
 
/**
* __dev_get_by_name - find a device by its name
* @name: name to find
*
* Find an interface by name. Must be called under RTNL semaphore
* or @dev_base_lock. If the name is found a pointer to the device
* is returned. If the name is not found then %NULL is returned. The
* reference counters are not incremented so the caller must be
* careful with locks.
*/
 
struct net_device *__dev_get_by_name(const char *name)
{
struct net_device *dev;
 
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (strncmp(dev->name, name, IFNAMSIZ) == 0)
return dev;
}
return NULL;
}
 
/**
* dev_get_by_name - find a device by its name
* @name: name to find
*
* Find an interface by name. This can be called from any
* context and does its own locking. The returned handle has
* the usage count incremented and the caller must use dev_put() to
* release it when it is no longer needed. %NULL is returned if no
* matching device is found.
*/
 
struct net_device *dev_get_by_name(const char *name)
{
struct net_device *dev;
 
read_lock(&dev_base_lock);
dev = __dev_get_by_name(name);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
return dev;
}
 
/*
Return value is changed to int to prevent illegal usage in future.
It is still legal to use to check for device existence.
 
User should understand, that the result returned by this function
is meaningless, if it was not issued under rtnl semaphore.
*/
 
/**
* dev_get - test if a device exists
* @name: name to test for
*
* Test if a name exists. Returns true if the name is found. In order
* to be sure the name is not allocated or removed during the test the
* caller must hold the rtnl semaphore.
*
* This function primarily exists for back compatibility with older
* drivers.
*/
int dev_get(const char *name)
{
struct net_device *dev;
 
read_lock(&dev_base_lock);
dev = __dev_get_by_name(name);
read_unlock(&dev_base_lock);
return dev != NULL;
}
 
/**
* __dev_get_by_index - find a device by its ifindex
* @ifindex: index of device
*
* Search for an interface by index. Returns %NULL if the device
* is not found or a pointer to the device. The device has not
* had its reference counter increased so the caller must be careful
* about locking. The caller must hold either the RTNL semaphore
* or @dev_base_lock.
*/
 
struct net_device * __dev_get_by_index(int ifindex)
{
struct net_device *dev;
 
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->ifindex == ifindex)
return dev;
}
return NULL;
}
 
 
/**
* dev_get_by_index - find a device by its ifindex
* @ifindex: index of device
*
* Search for an interface by index. Returns NULL if the device
* is not found or a pointer to the device. The device returned has
* had a reference added and the pointer is safe until the user calls
* dev_put to indicate they have finished with it.
*/
 
struct net_device * dev_get_by_index(int ifindex)
{
struct net_device *dev;
 
read_lock(&dev_base_lock);
dev = __dev_get_by_index(ifindex);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
return dev;
}
 
/**
* dev_getbyhwaddr - find a device by its hardware address
* @type: media type of device
* @ha: hardware address
*
* Search for an interface by MAC address. Returns NULL if the device
* is not found or a pointer to the device. The caller must hold the
* rtnl semaphore. The returned device has not had its ref count increased
* and the caller must therefore be careful about locking
*
* BUGS:
* If the API was consistent this would be __dev_get_by_hwaddr
*/
 
struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
{
struct net_device *dev;
 
ASSERT_RTNL();
 
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->type == type &&
memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
return dev;
}
return NULL;
}
 
/**
* dev_get_by_flags - find any device with given flags
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
* Search for any interface with the given flags. Returns NULL if a device
* is not found or a pointer to the device. The device returned has
* had a reference added and the pointer is safe until the user calls
* dev_put to indicate they have finished with it.
*/
 
struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
{
struct net_device *dev;
 
read_lock(&dev_base_lock);
dev = __dev_get_by_flags(if_flags, mask);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
return dev;
}
 
/**
* __dev_get_by_flags - find any device with given flags
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
* Search for any interface with the given flags. Returns NULL if a device
* is not found or a pointer to the device. The caller must hold either
* the RTNL semaphore or @dev_base_lock.
*/
 
struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
{
struct net_device *dev;
 
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (((dev->flags ^ if_flags) & mask) == 0)
return dev;
}
return NULL;
}
 
/**
* dev_alloc_name - allocate a name for a device
* @dev: device
* @name: name format string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
* id. Not efficient for many devices, not called a lot. The caller
* must hold the dev_base or rtnl lock while allocating the name and
* adding the device in order to avoid duplicates. Returns the number
* of the unit assigned or a negative errno code.
*/
 
int dev_alloc_name(struct net_device *dev, const char *name)
{
int i;
char buf[32];
char *p;
 
/*
* Verify the string as this thing may have come from
* the user. There must be either one "%d" and no other "%"
* characters, or no "%" characters at all.
*/
p = strchr(name, '%');
if (p && (p[1] != 'd' || strchr(p+2, '%')))
return -EINVAL;
 
/*
* If you need over 100 please also fix the algorithm...
*/
for (i = 0; i < 100; i++) {
snprintf(buf,sizeof(buf),name,i);
if (__dev_get_by_name(buf) == NULL) {
strcpy(dev->name, buf);
return i;
}
}
return -ENFILE; /* Over 100 of the things .. bail out! */
}
 
/**
* dev_alloc - allocate a network device and name
* @name: name format string
* @err: error return pointer
*
* Passed a format string, eg. "lt%d", it will allocate a network device
* and space for the name. %NULL is returned if no memory is available.
* If the allocation succeeds then the name is assigned and the
* device pointer returned. %NULL is returned if the name allocation
* failed. The cause of an error is returned as a negative errno code
* in the variable @err points to.
*
* The caller must hold the @dev_base or RTNL locks when doing this in
* order to avoid duplicate name allocations.
*/
 
struct net_device *dev_alloc(const char *name, int *err)
{
struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
if (dev == NULL) {
*err = -ENOBUFS;
return NULL;
}
memset(dev, 0, sizeof(struct net_device));
*err = dev_alloc_name(dev, name);
if (*err < 0) {
kfree(dev);
return NULL;
}
return dev;
}
 
/**
* netdev_state_change - device changes state
* @dev: device to cause notification
*
* Called to indicate a device has changed state. This function calls
* the notifier chains for netdev_chain and sends a NEWLINK message
* to the routing socket.
*/
void netdev_state_change(struct net_device *dev)
{
if (dev->flags&IFF_UP) {
notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
 
 
#ifdef CONFIG_KMOD
 
/**
* dev_load - load a network module
* @name: name of interface
*
* If a network interface is not present and the process has suitable
* privileges this function loads the module. If module loading is not
* available in this kernel then it becomes a nop.
*/
 
void dev_load(const char *name)
{
if (!dev_get(name) && capable(CAP_SYS_MODULE))
request_module(name);
}
 
#else
 
extern inline void dev_load(const char *unused){;}
 
#endif
 
static int default_rebuild_header(struct sk_buff *skb)
{
printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
kfree_skb(skb);
return 1;
}
 
/**
* dev_open - prepare an interface for use.
* @dev: device to open
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
* the device is moved into the up state and a %NETDEV_UP message is
* sent to the netdev notifier chain.
*
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
int dev_open(struct net_device *dev)
{
int ret = 0;
 
/*
* Is it already up?
*/
 
if (dev->flags&IFF_UP)
return 0;
 
/*
* Is it even present?
*/
if (!netif_device_present(dev))
return -ENODEV;
 
/*
* Call device private open method
*/
if (try_inc_mod_count(dev->owner)) {
set_bit(__LINK_STATE_START, &dev->state);
if (dev->open) {
ret = dev->open(dev);
if (ret != 0) {
clear_bit(__LINK_STATE_START, &dev->state);
if (dev->owner)
__MOD_DEC_USE_COUNT(dev->owner);
}
}
} else {
ret = -ENODEV;
}
 
/*
* If it went open OK then:
*/
if (ret == 0)
{
/*
* Set the flags.
*/
dev->flags |= IFF_UP;
 
/*
* Initialize multicasting status
*/
dev_mc_upload(dev);
 
/*
* Wakeup transmit queue engine
*/
dev_activate(dev);
 
/*
* ... and announce new interface.
*/
notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
}
return(ret);
}
 
#ifdef CONFIG_NET_FASTROUTE
 
static void dev_do_clear_fastroute(struct net_device *dev)
{
if (dev->accept_fastpath) {
int i;
 
for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
struct dst_entry *dst;
 
write_lock_irq(&dev->fastpath_lock);
dst = dev->fastpath[i];
dev->fastpath[i] = NULL;
write_unlock_irq(&dev->fastpath_lock);
 
dst_release(dst);
}
}
}
 
void dev_clear_fastroute(struct net_device *dev)
{
if (dev) {
dev_do_clear_fastroute(dev);
} else {
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next)
dev_do_clear_fastroute(dev);
read_unlock(&dev_base_lock);
}
}
#endif
 
/**
* dev_close - shutdown an interface.
* @dev: device to shutdown
*
* This function moves an active device into down state. A
* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
* chain.
*/
int dev_close(struct net_device *dev)
{
if (!(dev->flags&IFF_UP))
return 0;
 
/*
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
*/
notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 
dev_deactivate(dev);
 
clear_bit(__LINK_STATE_START, &dev->state);
 
/* Synchronize to scheduled poll. We cannot touch poll list,
* it can be even on different cpu. So just clear netif_running(),
* and wait when poll really will happen. Actually, the best place
* for this is inside dev->stop() after device stopped its irq
* engine, but this requires more changes in devices. */
 
smp_mb__after_clear_bit(); /* Commit netif_running(). */
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
/* No hurry. */
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(1);
}
 
/*
* Call the device specific close. This cannot fail.
* Only if device is UP
*
* We allow it to be called even after a DETACH hot-plug
* event.
*/
if (dev->stop)
dev->stop(dev);
 
/*
* Device is now down.
*/
 
dev->flags &= ~IFF_UP;
#ifdef CONFIG_NET_FASTROUTE
dev_clear_fastroute(dev);
#endif
 
/*
* Tell people we are down
*/
notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 
/*
* Drop the module refcount
*/
if (dev->owner)
__MOD_DEC_USE_COUNT(dev->owner);
 
return(0);
}
 
 
/*
* Device change register/unregister. These are not inline or static
* as we export them to the world.
*/
/**
* register_netdevice_notifier - register a network notifier block
* @nb: notifier
*
* Register a notifier to be called when network device events occur.
* The notifier passed is linked into the kernel structures and must
* not be reused until it has been unregistered. A negative errno code
* is returned on a failure.
*/
 
int register_netdevice_notifier(struct notifier_block *nb)
{
return notifier_chain_register(&netdev_chain, nb);
}
 
/**
* unregister_netdevice_notifier - unregister a network notifier block
* @nb: notifier
*
* Unregister a notifier previously registered by
* register_netdevice_notifier(). The notifier is unlinked into the
* kernel structures and may then be reused. A negative errno code
* is returned on a failure.
*/
 
int unregister_netdevice_notifier(struct notifier_block *nb)
{
return notifier_chain_unregister(&netdev_chain,nb);
}
 
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
*/
 
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
do_gettimeofday(&skb->stamp);
 
br_read_lock(BR_NETPROTO_LOCK);
for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
{
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
if ((ptype->dev == dev || !ptype->dev) &&
((struct sock *)ptype->data != skb->sk))
{
struct sk_buff *skb2;
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
break;
 
/* skb->nh should be correctly
set by sender, so that the second statement is
just protection against buggy protocols.
*/
skb2->mac.raw = skb2->data;
 
if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) {
if (net_ratelimit())
printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
skb2->nh.raw = skb2->data;
}
 
skb2->h.raw = skb2->nh.raw;
skb2->pkt_type = PACKET_OUTGOING;
ptype->func(skb2, skb->dev, ptype);
}
}
br_read_unlock(BR_NETPROTO_LOCK);
}
 
/* Calculate csum in the case, when packet is misrouted.
* If it failed by some reason, ignore and send skb with wrong
* checksum.
*/
struct sk_buff * skb_checksum_help(struct sk_buff *skb)
{
int offset;
unsigned int csum;
 
offset = skb->h.raw - skb->data;
if (offset > (int)skb->len)
BUG();
csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
offset = skb->tail - skb->h.raw;
if (offset <= 0)
BUG();
if (skb->csum+2 > offset)
BUG();
 
*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
return skb;
}
 
#ifdef CONFIG_HIGHMEM
/* Actually, we should eliminate this check as soon as we know, that:
* 1. IOMMU is present and allows to map all the memory.
* 2. No high memory really exists on this machine.
*/
 
static inline int
illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
int i;
 
if (dev->features&NETIF_F_HIGHDMA)
return 0;
 
for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
return 1;
 
return 0;
}
#else
#define illegal_highdma(dev, skb) (0)
#endif
 
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
*
* Queue a buffer for transmission to a network device. The caller must
* have set the device and priority and built the buffer before calling this
* function. The function can be called from an interrupt.
*
* A negative errno code is returned on a failure. A success does not
* guarantee the frame will be transmitted as it may be dropped due
* to congestion or traffic shaping.
*/
 
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct Qdisc *q;
 
if (skb_shinfo(skb)->frag_list &&
!(dev->features&NETIF_F_FRAGLIST) &&
skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
return -ENOMEM;
}
 
/* Fragmented skb is linearized if device does not support SG,
* or if at least one of fragments is in highmem and device
* does not support DMA from it.
*/
if (skb_shinfo(skb)->nr_frags &&
(!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
return -ENOMEM;
}
 
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
if (skb->ip_summed == CHECKSUM_HW &&
(!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) &&
(!(dev->features&NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP)))) {
if ((skb = skb_checksum_help(skb)) == NULL)
return -ENOMEM;
}
 
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
int ret = q->enqueue(skb, q);
 
qdisc_run(dev);
 
spin_unlock_bh(&dev->queue_lock);
return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
}
 
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
 
Really, it is unlikely that xmit_lock protection is necessary here.
(f.e. loopback and IP tunnels are clean ignoring statistics counters.)
However, it is possible, that they rely on protection
made by us here.
 
Check this and shot the lock. It is not prone from deadlocks.
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags&IFF_UP) {
int cpu = smp_processor_id();
 
if (dev->xmit_lock_owner != cpu) {
spin_unlock(&dev->queue_lock);
spin_lock(&dev->xmit_lock);
dev->xmit_lock_owner = cpu;
 
if (!netif_queue_stopped(dev)) {
if (netdev_nit)
dev_queue_xmit_nit(skb,dev);
 
if (dev->hard_start_xmit(skb, dev) == 0) {
dev->xmit_lock_owner = -1;
spin_unlock_bh(&dev->xmit_lock);
return 0;
}
}
dev->xmit_lock_owner = -1;
spin_unlock_bh(&dev->xmit_lock);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name);
kfree_skb(skb);
return -ENETDOWN;
} else {
/* Recursion is detected! It is possible, unfortunately */
if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
}
}
spin_unlock_bh(&dev->queue_lock);
 
kfree_skb(skb);
return -ENETDOWN;
}
 
 
/*=======================================================================
Receiver routines
=======================================================================*/
 
int netdev_max_backlog = 300;
int weight_p = 64; /* old backlog weight */
/* These numbers are selected based on intuition and some
* experimentatiom, if you have more scientific way of doing this
* please go ahead and fix things.
*/
int no_cong_thresh = 10;
int no_cong = 20;
int lo_cong = 100;
int mod_cong = 290;
 
struct netif_rx_stats netdev_rx_stat[NR_CPUS];
 
 
#ifdef CONFIG_NET_HW_FLOWCONTROL
atomic_t netdev_dropping = ATOMIC_INIT(0);
static unsigned long netdev_fc_mask = 1;
unsigned long netdev_fc_xoff = 0;
spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
 
static struct
{
void (*stimul)(struct net_device *);
struct net_device *dev;
} netdev_fc_slots[BITS_PER_LONG];
 
int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
{
int bit = 0;
unsigned long flags;
 
spin_lock_irqsave(&netdev_fc_lock, flags);
if (netdev_fc_mask != ~0UL) {
bit = ffz(netdev_fc_mask);
netdev_fc_slots[bit].stimul = stimul;
netdev_fc_slots[bit].dev = dev;
set_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
spin_unlock_irqrestore(&netdev_fc_lock, flags);
return bit;
}
 
void netdev_unregister_fc(int bit)
{
unsigned long flags;
 
spin_lock_irqsave(&netdev_fc_lock, flags);
if (bit > 0) {
netdev_fc_slots[bit].stimul = NULL;
netdev_fc_slots[bit].dev = NULL;
clear_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
spin_unlock_irqrestore(&netdev_fc_lock, flags);
}
 
static void netdev_wakeup(void)
{
unsigned long xoff;
 
spin_lock(&netdev_fc_lock);
xoff = netdev_fc_xoff;
netdev_fc_xoff = 0;
while (xoff) {
int i = ffz(~xoff);
xoff &= ~(1<<i);
netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
}
spin_unlock(&netdev_fc_lock);
}
#endif
 
static void get_sample_stats(int cpu)
{
#ifdef RAND_LIE
unsigned long rd;
int rq;
#endif
int blog = softnet_data[cpu].input_pkt_queue.qlen;
int avg_blog = softnet_data[cpu].avg_blog;
 
avg_blog = (avg_blog >> 1)+ (blog >> 1);
 
if (avg_blog > mod_cong) {
/* Above moderate congestion levels. */
softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
#ifdef RAND_LIE
rd = net_random();
rq = rd % netdev_max_backlog;
if (rq < avg_blog) /* unlucky bastard */
softnet_data[cpu].cng_level = NET_RX_DROP;
#endif
} else if (avg_blog > lo_cong) {
softnet_data[cpu].cng_level = NET_RX_CN_MOD;
#ifdef RAND_LIE
rd = net_random();
rq = rd % netdev_max_backlog;
if (rq < avg_blog) /* unlucky bastard */
softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
#endif
} else if (avg_blog > no_cong)
softnet_data[cpu].cng_level = NET_RX_CN_LOW;
else /* no congestion */
softnet_data[cpu].cng_level = NET_RX_SUCCESS;
 
softnet_data[cpu].avg_blog = avg_blog;
}
 
#ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy)
{
/* 10 ms 0r 1ms -- i dont care -- JHS */
int next_tick = 1;
int cpu = smp_processor_id();
 
get_sample_stats(cpu);
next_tick += jiffies;
mod_timer(&samp_timer, next_tick);
}
#endif
 
 
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_CN_LOW (low congestion)
* NET_RX_CN_MOD (moderate congestion)
* NET_RX_CN_HIGH (high congestion)
* NET_RX_DROP (packet was dropped)
*
*
*/
 
int netif_rx(struct sk_buff *skb)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue;
unsigned long flags;
 
if (skb->stamp.tv_sec == 0)
do_gettimeofday(&skb->stamp);
 
/* The code is rearranged so that the path is the most
short when CPU is congested, but is still operating.
*/
queue = &softnet_data[this_cpu];
 
local_irq_save(flags);
 
netdev_rx_stat[this_cpu].total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
if (queue->throttle)
goto drop;
 
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue,skb);
local_irq_restore(flags);
#ifndef OFFLINE_SAMPLE
get_sample_stats(this_cpu);
#endif
return queue->cng_level;
}
 
if (queue->throttle) {
queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
#endif
}
 
netif_rx_schedule(&queue->blog_dev);
goto enqueue;
}
 
if (queue->throttle == 0) {
queue->throttle = 1;
netdev_rx_stat[this_cpu].throttled++;
#ifdef CONFIG_NET_HW_FLOWCONTROL
atomic_inc(&netdev_dropping);
#endif
}
 
drop:
netdev_rx_stat[this_cpu].dropped++;
local_irq_restore(flags);
 
kfree_skb(skb);
return NET_RX_DROP;
}
 
/* Deliver skb to an old protocol, which is not threaded well
or which do not understand shared skbs.
*/
static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
{
static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
int ret = NET_RX_DROP;
 
 
if (!last) {
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
return ret;
}
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
return ret;
}
 
/* The assumption (correct one) is that old protocols
did not depened on BHs different of NET_BH and TIMER_BH.
*/
 
/* Emulate NET_BH with special spinlock */
spin_lock(&net_bh_lock);
 
/* Disable timers and wait for all timers completion */
tasklet_disable(bh_task_vec+TIMER_BH);
 
ret = pt->func(skb, skb->dev, pt);
 
tasklet_hi_enable(bh_task_vec+TIMER_BH);
spin_unlock(&net_bh_lock);
return ret;
}
 
static __inline__ void skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
 
if (dev->master) {
skb->real_dev = skb->dev;
skb->dev = dev->master;
}
}
 
static void net_tx_action(struct softirq_action *h)
{
int cpu = smp_processor_id();
 
if (softnet_data[cpu].completion_queue) {
struct sk_buff *clist;
 
local_irq_disable();
clist = softnet_data[cpu].completion_queue;
softnet_data[cpu].completion_queue = NULL;
local_irq_enable();
 
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
 
BUG_TRAP(atomic_read(&skb->users) == 0);
__kfree_skb(skb);
}
}
 
if (softnet_data[cpu].output_queue) {
struct net_device *head;
 
local_irq_disable();
head = softnet_data[cpu].output_queue;
softnet_data[cpu].output_queue = NULL;
local_irq_enable();
 
while (head != NULL) {
struct net_device *dev = head;
head = head->next_sched;
 
smp_mb__before_clear_bit();
clear_bit(__LINK_STATE_SCHED, &dev->state);
 
if (spin_trylock(&dev->queue_lock)) {
qdisc_run(dev);
spin_unlock(&dev->queue_lock);
} else {
netif_schedule(dev);
}
}
}
}
 
 
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
#endif
 
static __inline__ int handle_bridge(struct sk_buff *skb,
struct packet_type *pt_prev)
{
int ret = NET_RX_DROP;
 
if (pt_prev) {
if (!pt_prev->data)
ret = deliver_to_old_ones(pt_prev, skb, 0);
else {
atomic_inc(&skb->users);
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
 
br_handle_frame_hook(skb);
return ret;
}
 
 
#ifdef CONFIG_NET_DIVERT
static inline int handle_diverter(struct sk_buff *skb)
{
/* if diversion is supported on device, then divert */
if (skb->dev->divert && skb->dev->divert->divert)
divert_frame(skb);
return 0;
}
#endif /* CONFIG_NET_DIVERT */
 
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
int ret = NET_RX_DROP;
unsigned short type;
 
if (skb->stamp.tv_sec == 0)
do_gettimeofday(&skb->stamp);
 
skb_bond(skb);
 
netdev_rx_stat[smp_processor_id()].total++;
 
#ifdef CONFIG_NET_FASTROUTE
if (skb->pkt_type == PACKET_FASTROUTE) {
netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
return dev_queue_xmit(skb);
}
#endif
 
skb->h.raw = skb->nh.raw = skb->data;
 
pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
if (!pt_prev->data) {
ret = deliver_to_old_ones(pt_prev, skb, 0);
} else {
atomic_inc(&skb->users);
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
pt_prev = ptype;
}
}
 
#ifdef CONFIG_NET_DIVERT
if (skb->dev->divert && skb->dev->divert->divert)
ret = handle_diverter(skb);
#endif /* CONFIG_NET_DIVERT */
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL &&
skb->pkt_type != PACKET_LOOPBACK) {
return handle_bridge(skb, pt_prev);
}
#endif
 
type = skb->protocol;
for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
if (!pt_prev->data) {
ret = deliver_to_old_ones(pt_prev, skb, 0);
} else {
atomic_inc(&skb->users);
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
pt_prev = ptype;
}
}
 
if (pt_prev) {
if (!pt_prev->data) {
ret = deliver_to_old_ones(pt_prev, skb, 1);
} else {
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}
 
return ret;
}
 
static int process_backlog(struct net_device *backlog_dev, int *budget)
{
int work = 0;
int quota = min(backlog_dev->quota, *budget);
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
 
for (;;) {
struct sk_buff *skb;
struct net_device *dev;
 
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
if (skb == NULL)
goto job_done;
local_irq_enable();
 
dev = skb->dev;
 
netif_receive_skb(skb);
 
dev_put(dev);
 
work++;
 
if (work >= quota || jiffies - start_time > 1)
break;
 
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
queue->throttle = 0;
if (atomic_dec_and_test(&netdev_dropping)) {
netdev_wakeup();
break;
}
}
#endif
}
 
backlog_dev->quota -= work;
*budget -= work;
return -1;
 
job_done:
backlog_dev->quota -= work;
*budget -= work;
 
list_del(&backlog_dev->poll_list);
smp_mb__before_clear_bit();
netif_poll_enable(backlog_dev);
 
if (queue->throttle) {
queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
#endif
}
local_irq_enable();
return 0;
}
 
static void net_rx_action(struct softirq_action *h)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
int budget = netdev_max_backlog;
 
br_read_lock(BR_NETPROTO_LOCK);
local_irq_disable();
 
while (!list_empty(&queue->poll_list)) {
struct net_device *dev;
 
if (budget <= 0 || jiffies - start_time > 1)
goto softnet_break;
 
local_irq_enable();
 
dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
 
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
local_irq_disable();
list_del(&dev->poll_list);
list_add_tail(&dev->poll_list, &queue->poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
} else {
dev_put(dev);
local_irq_disable();
}
}
 
local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK);
return;
 
softnet_break:
netdev_rx_stat[this_cpu].time_squeeze++;
__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
 
local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK);
}
 
static gifconf_func_t * gifconf_list [NPROTO];
 
/**
* register_gifconf - register a SIOCGIF handler
* @family: Address family
* @gifconf: Function handler
*
* Register protocol dependent address dumping routines. The handler
* that is passed must not be freed or reused until it has been replaced
* by another handler.
*/
int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
{
if (family>=NPROTO)
return -EINVAL;
gifconf_list[family] = gifconf;
return 0;
}
 
 
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
 
/*
* We need this ioctl for efficient implementation of the
* if_indextoname() function required by the IPv6 API. Without
* it, we would have to search all the interfaces to find a
* match. --pb
*/
 
static int dev_ifname(struct ifreq *arg)
{
struct net_device *dev;
struct ifreq ifr;
 
/*
* Fetch the caller's info block.
*/
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
 
read_lock(&dev_base_lock);
dev = __dev_get_by_index(ifr.ifr_ifindex);
if (!dev) {
read_unlock(&dev_base_lock);
return -ENODEV;
}
 
strcpy(ifr.ifr_name, dev->name);
read_unlock(&dev_base_lock);
 
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
return 0;
}
 
/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
* Thus we will need a 'compatibility mode'.
*/
 
static int dev_ifconf(char *arg)
{
struct ifconf ifc;
struct net_device *dev;
char *pos;
int len;
int total;
int i;
 
/*
* Fetch the caller's info block.
*/
if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
return -EFAULT;
 
pos = ifc.ifc_buf;
len = ifc.ifc_len;
 
/*
* Loop over the interfaces, and write an info block for each.
*/
 
total = 0;
for (dev = dev_base; dev != NULL; dev = dev->next) {
for (i=0; i<NPROTO; i++) {
if (gifconf_list[i]) {
int done;
if (pos==NULL) {
done = gifconf_list[i](dev, NULL, 0);
} else {
done = gifconf_list[i](dev, pos+total, len-total);
}
if (done<0) {
return -EFAULT;
}
total += done;
}
}
}
 
/*
* All done. Write the updated control block back to the caller.
*/
ifc.ifc_len = total;
 
if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
return -EFAULT;
 
/*
* Both BSD and Solaris return 0 here, so we do too.
*/
return 0;
}
 
/*
* This is invoked by the /proc filesystem handler to display a device
* in detail.
*/
 
#ifdef CONFIG_PROC_FS
 
static int sprintf_stats(char *buffer, struct net_device *dev)
{
struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
int size;
if (stats)
size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
dev->name,
stats->rx_bytes,
stats->rx_packets, stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
stats->rx_fifo_errors,
stats->rx_length_errors + stats->rx_over_errors
+ stats->rx_crc_errors + stats->rx_frame_errors,
stats->rx_compressed, stats->multicast,
stats->tx_bytes,
stats->tx_packets, stats->tx_errors, stats->tx_dropped,
stats->tx_fifo_errors, stats->collisions,
stats->tx_carrier_errors + stats->tx_aborted_errors
+ stats->tx_window_errors + stats->tx_heartbeat_errors,
stats->tx_compressed);
else
size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
 
return size;
}
 
/*
* Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
* to create /proc/net/dev
*/
static int dev_get_info(char *buffer, char **start, off_t offset, int length)
{
int len = 0;
off_t begin = 0;
off_t pos = 0;
int size;
struct net_device *dev;
 
 
size = sprintf(buffer,
"Inter-| Receive | Transmit\n"
" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
pos += size;
len += size;
 
read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_stats(buffer+len, dev);
len += size;
pos = begin + len;
if (pos < offset) {
len = 0;
begin = pos;
}
if (pos > offset + length)
break;
}
read_unlock(&dev_base_lock);
 
*start = buffer + (offset - begin); /* Start of wanted data */
len -= (offset - begin); /* Start slop */
if (len > length)
len = length; /* Ending slop */
if (len < 0)
len = 0;
return len;
}
 
static int dev_proc_stats(char *buffer, char **start, off_t offset,
int length, int *eof, void *data)
{
int i, lcpu;
int len=0;
 
for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
i = cpu_logical_map(lcpu);
len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
netdev_rx_stat[i].total,
netdev_rx_stat[i].dropped,
netdev_rx_stat[i].time_squeeze,
netdev_rx_stat[i].throttled,
netdev_rx_stat[i].fastroute_hit,
netdev_rx_stat[i].fastroute_success,
netdev_rx_stat[i].fastroute_defer,
netdev_rx_stat[i].fastroute_deferred_out,
#if 0
netdev_rx_stat[i].fastroute_latency_reduction
#else
netdev_rx_stat[i].cpu_collision
#endif
);
}
 
len -= offset;
 
if (len > length)
len = length;
if (len < 0)
len = 0;
 
*start = buffer + offset;
*eof = 1;
 
return len;
}
 
#endif /* CONFIG_PROC_FS */
 
 
/**
* netdev_set_master - set up master/slave pair
* @slave: slave device
* @master: new master device
*
* Changes the master device of the slave. Pass %NULL to break the
* bonding. The caller must hold the RTNL semaphore. On a failure
* a negative errno code is returned. On success the reference counts
* are adjusted, %RTM_NEWLINK is sent to the routing socket and the
* function returns zero.
*/
int netdev_set_master(struct net_device *slave, struct net_device *master)
{
struct net_device *old = slave->master;
 
ASSERT_RTNL();
 
if (master) {
if (old)
return -EBUSY;
dev_hold(master);
}
 
br_write_lock_bh(BR_NETPROTO_LOCK);
slave->master = master;
br_write_unlock_bh(BR_NETPROTO_LOCK);
 
if (old)
dev_put(old);
 
if (master)
slave->flags |= IFF_SLAVE;
else
slave->flags &= ~IFF_SLAVE;
 
rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
return 0;
}
 
/**
* dev_set_promiscuity - update promiscuity count on a device
* @dev: device
* @inc: modifier
*
* Add or remove promsicuity from a device. While the count in the device
* remains above zero the interface remains promiscuous. Once it hits zero
* the device reverts back to normal filtering operation. A negative inc
* value is used to drop promiscuity on the device.
*/
void dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
 
dev->flags |= IFF_PROMISC;
if ((dev->promiscuity += inc) == 0)
dev->flags &= ~IFF_PROMISC;
if (dev->flags^old_flags) {
#ifdef CONFIG_NET_FASTROUTE
if (dev->flags&IFF_PROMISC) {
netdev_fastroute_obstacles++;
dev_clear_fastroute(dev);
} else
netdev_fastroute_obstacles--;
#endif
dev_mc_upload(dev);
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
}
}
 
/**
* dev_set_allmulti - update allmulti count on a device
* @dev: device
* @inc: modifier
*
* Add or remove reception of all multicast frames to a device. While the
* count in the device remains above zero the interface remains listening
* to all interfaces. Once it hits zero the device reverts back to normal
* filtering operation. A negative @inc value is used to drop the counter
* when releasing a resource needing all multicasts.
*/
 
void dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
 
dev->flags |= IFF_ALLMULTI;
if ((dev->allmulti += inc) == 0)
dev->flags &= ~IFF_ALLMULTI;
if (dev->flags^old_flags)
dev_mc_upload(dev);
}
 
int dev_change_flags(struct net_device *dev, unsigned flags)
{
int ret;
int old_flags = dev->flags;
 
/*
* Set the flags on our device.
*/
 
dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
(dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
 
/*
* Load in the correct multicast list now the flags have changed.
*/
 
dev_mc_upload(dev);
 
/*
* Have we downed the interface. We handle IFF_UP ourselves
* according to user attempts to set it, rather than blindly
* setting it.
*/
 
ret = 0;
if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
{
ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
 
if (ret == 0)
dev_mc_upload(dev);
}
 
if (dev->flags&IFF_UP &&
((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 
if ((flags^dev->gflags)&IFF_PROMISC) {
int inc = (flags&IFF_PROMISC) ? +1 : -1;
dev->gflags ^= IFF_PROMISC;
dev_set_promiscuity(dev, inc);
}
 
/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
is important. Some (broken) drivers set IFF_PROMISC, when
IFF_ALLMULTI is requested not asking us and not reporting.
*/
if ((flags^dev->gflags)&IFF_ALLMULTI) {
int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
dev->gflags ^= IFF_ALLMULTI;
dev_set_allmulti(dev, inc);
}
 
if (old_flags^dev->flags)
rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
 
return ret;
}
 
/*
* Perform the SIOCxIFxxx calls.
*/
static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
{
struct net_device *dev;
int err;
 
if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
return -ENODEV;
 
switch(cmd)
{
case SIOCGIFFLAGS: /* Get interface flags */
ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
if (netif_running(dev) && netif_carrier_ok(dev))
ifr->ifr_flags |= IFF_RUNNING;
return 0;
 
case SIOCSIFFLAGS: /* Set interface flags */
return dev_change_flags(dev, ifr->ifr_flags);
case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
ifr->ifr_metric = 0;
return 0;
case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
return -EOPNOTSUPP;
case SIOCGIFMTU: /* Get the MTU of a device */
ifr->ifr_mtu = dev->mtu;
return 0;
case SIOCSIFMTU: /* Set the MTU of a device */
if (ifr->ifr_mtu == dev->mtu)
return 0;
 
/*
* MTU must be positive.
*/
if (ifr->ifr_mtu<0)
return -EINVAL;
 
if (!netif_device_present(dev))
return -ENODEV;
 
if (dev->change_mtu)
err = dev->change_mtu(dev, ifr->ifr_mtu);
else {
dev->mtu = ifr->ifr_mtu;
err = 0;
}
if (!err && dev->flags&IFF_UP)
notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
return err;
 
case SIOCGIFHWADDR:
memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
ifr->ifr_hwaddr.sa_family=dev->type;
return 0;
case SIOCSIFHWADDR:
if (dev->set_mac_address == NULL)
return -EOPNOTSUPP;
if (ifr->ifr_hwaddr.sa_family!=dev->type)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
if (!err)
notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
return err;
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family!=dev->type)
return -EINVAL;
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
return 0;
 
case SIOCGIFMAP:
ifr->ifr_map.mem_start=dev->mem_start;
ifr->ifr_map.mem_end=dev->mem_end;
ifr->ifr_map.base_addr=dev->base_addr;
ifr->ifr_map.irq=dev->irq;
ifr->ifr_map.dma=dev->dma;
ifr->ifr_map.port=dev->if_port;
return 0;
case SIOCSIFMAP:
if (dev->set_config) {
if (!netif_device_present(dev))
return -ENODEV;
return dev->set_config(dev,&ifr->ifr_map);
}
return -EOPNOTSUPP;
case SIOCADDMULTI:
if (dev->set_multicast_list == NULL ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
return 0;
 
case SIOCDELMULTI:
if (dev->set_multicast_list == NULL ||
ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
return 0;
 
case SIOCGIFINDEX:
ifr->ifr_ifindex = dev->ifindex;
return 0;
 
case SIOCGIFTXQLEN:
ifr->ifr_qlen = dev->tx_queue_len;
return 0;
 
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen<0)
return -EINVAL;
dev->tx_queue_len = ifr->ifr_qlen;
return 0;
 
case SIOCSIFNAME:
if (dev->flags&IFF_UP)
return -EBUSY;
if (__dev_get_by_name(ifr->ifr_newname))
return -EEXIST;
memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
dev->name[IFNAMSIZ-1] = 0;
notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
return 0;
 
/*
* Unknown or private ioctl
*/
 
default:
if ((cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15) ||
cmd == SIOCBONDENSLAVE ||
cmd == SIOCBONDRELEASE ||
cmd == SIOCBONDSETHWADDR ||
cmd == SIOCBONDSLAVEINFOQUERY ||
cmd == SIOCBONDINFOQUERY ||
cmd == SIOCBONDCHANGEACTIVE ||
cmd == SIOCGMIIPHY ||
cmd == SIOCGMIIREG ||
cmd == SIOCSMIIREG ||
cmd == SIOCWANDEV) {
if (dev->do_ioctl) {
if (!netif_device_present(dev))
return -ENODEV;
return dev->do_ioctl(dev, ifr, cmd);
}
return -EOPNOTSUPP;
}
 
}
return -EINVAL;
}
 
/*
* This function handles all "interface"-type I/O control requests. The actual
* 'doing' part of this is dev_ifsioc above.
*/
 
/**
* dev_ioctl - network device ioctl
* @cmd: command to issue
* @arg: pointer to a struct ifreq in user space
*
* Issue ioctl functions to devices. This is normally called by the
* user space syscall interfaces but can sometimes be useful for
* other purposes. The return value is the return from the syscall if
* positive or a negative errno code on error.
*/
 
int dev_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
int ret;
char *colon;
 
/* One special case: SIOCGIFCONF takes ifconf argument
and requires shared lock, because it sleeps writing
to user space.
*/
if (cmd == SIOCGIFCONF) {
rtnl_shlock();
ret = dev_ifconf((char *) arg);
rtnl_shunlock();
return ret;
}
if (cmd == SIOCGIFNAME) {
return dev_ifname((struct ifreq *)arg);
}
 
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
 
ifr.ifr_name[IFNAMSIZ-1] = 0;
 
colon = strchr(ifr.ifr_name, ':');
if (colon)
*colon = 0;
 
/*
* See which interface the caller is talking about.
*/
switch(cmd)
{
/*
* These ioctl calls:
* - can be done by all.
* - atomic and do not require locking.
* - return a value
*/
case SIOCGIFFLAGS:
case SIOCGIFMETRIC:
case SIOCGIFMTU:
case SIOCGIFHWADDR:
case SIOCGIFSLAVE:
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
dev_load(ifr.ifr_name);
read_lock(&dev_base_lock);
ret = dev_ifsioc(&ifr, cmd);
read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
}
return ret;
 
case SIOCETHTOOL:
dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ethtool(&ifr);
rtnl_unlock();
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
}
return ret;
 
/*
* These ioctl calls:
* - require superuser power.
* - require strict serialization.
* - return a value
*/
case SIOCGMIIPHY:
case SIOCGMIIREG:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
dev_load(ifr.ifr_name);
dev_probe_lock();
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
dev_probe_unlock();
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
}
return ret;
 
/*
* These ioctl calls:
* - require superuser power.
* - require strict serialization.
* - do not return a value
*/
case SIOCSIFFLAGS:
case SIOCSIFMETRIC:
case SIOCSIFMTU:
case SIOCSIFMAP:
case SIOCSIFHWADDR:
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
case SIOCSIFTXQLEN:
case SIOCSIFNAME:
case SIOCSMIIREG:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
case SIOCBONDCHANGEACTIVE:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
dev_load(ifr.ifr_name);
dev_probe_lock();
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
dev_probe_unlock();
return ret;
case SIOCGIFMEM:
/* Get the per device memory space. We can add this but currently
do not support it */
case SIOCSIFMEM:
/* Set the per device memory buffer space. Not applicable in our case */
case SIOCSIFLINK:
return -EINVAL;
 
/*
* Unknown or private ioctl.
*/
default:
if (cmd == SIOCWANDEV ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
dev_load(ifr.ifr_name);
dev_probe_lock();
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
dev_probe_unlock();
if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
return ret;
}
#ifdef WIRELESS_EXT
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
/* If command is `set a parameter', or
* `get the encoding parameters', check if
* the user has the right to do it */
if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
if(!capable(CAP_NET_ADMIN))
return -EPERM;
}
dev_load(ifr.ifr_name);
rtnl_lock();
/* Follow me in net/core/wireless.c */
ret = wireless_process_ioctl(&ifr, cmd);
rtnl_unlock();
if (!ret && IW_IS_GET(cmd) &&
copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
return ret;
}
#endif /* WIRELESS_EXT */
return -EINVAL;
}
}
 
 
/**
* dev_new_index - allocate an ifindex
*
* Returns a suitable unique value for a new device interface
* number. The caller must hold the rtnl semaphore or the
* dev_base_lock to be sure it remains unique.
*/
int dev_new_index(void)
{
static int ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex=1;
if (__dev_get_by_index(ifindex) == NULL)
return ifindex;
}
}
 
static int dev_boot_phase = 1;
 
/**
* register_netdevice - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* Callers must hold the rtnl semaphore. See the comment at the
* end of Space.c for details about the locking. You may want
* register_netdev() instead of this.
*
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
*/
 
int net_dev_init(void);
 
int register_netdevice(struct net_device *dev)
{
struct net_device *d, **dp;
#ifdef CONFIG_NET_DIVERT
int ret;
#endif
 
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock);
dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_FASTROUTE
dev->fastpath_lock=RW_LOCK_UNLOCKED;
#endif
 
if (dev_boot_phase)
net_dev_init();
 
#ifdef CONFIG_NET_DIVERT
ret = alloc_divert_blk(dev);
if (ret)
return ret;
#endif /* CONFIG_NET_DIVERT */
dev->iflink = -1;
 
/* Init, if this function is available */
if (dev->init && dev->init(dev) != 0) {
#ifdef CONFIG_NET_DIVERT
free_divert_blk(dev);
#endif
return -EIO;
}
 
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
 
/* Check for existence, and append to tail of chain */
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
if (d == dev || strcmp(d->name, dev->name) == 0) {
#ifdef CONFIG_NET_DIVERT
free_divert_blk(dev);
#endif
return -EEXIST;
}
}
/* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) &&
!(dev->features & (NETIF_F_IP_CSUM |
NETIF_F_NO_CSUM |
NETIF_F_HW_CSUM))) {
printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
}
 
/*
* nil rebuild_header routine,
* that should be never called and used as just bug trap.
*/
 
if (dev->rebuild_header == NULL)
dev->rebuild_header = default_rebuild_header;
 
/*
* Default initial state at registry is that the
* device is present.
*/
 
set_bit(__LINK_STATE_PRESENT, &dev->state);
 
dev->next = NULL;
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
*dp = dev;
dev_hold(dev);
dev->deadbeaf = 0;
write_unlock_bh(&dev_base_lock);
 
/* Notify protocols, that a new device appeared. */
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
 
net_run_sbin_hotplug(dev, "register");
 
return 0;
}
 
/**
* netdev_finish_unregister - complete unregistration
* @dev: device
*
* Destroy and free a dead device. A value of zero is returned on
* success.
*/
int netdev_finish_unregister(struct net_device *dev)
{
BUG_TRAP(dev->ip_ptr==NULL);
BUG_TRAP(dev->ip6_ptr==NULL);
BUG_TRAP(dev->dn_ptr==NULL);
 
if (!dev->deadbeaf) {
printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
return 0;
}
#ifdef NET_REFCNT_DEBUG
printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
(dev->features & NETIF_F_DYNALLOC)?"":", old style");
#endif
if (dev->destructor)
dev->destructor(dev);
if (dev->features & NETIF_F_DYNALLOC)
kfree(dev);
return 0;
}
 
/**
* unregister_netdevice - remove device from the kernel
* @dev: device
*
* This function shuts down a device interface and removes it
* from the kernel tables. On success 0 is returned, on a failure
* a negative errno code is returned.
*
* Callers must hold the rtnl semaphore. See the comment at the
* end of Space.c for details about the locking. You may want
* unregister_netdev() instead of this.
*/
 
int unregister_netdevice(struct net_device *dev)
{
unsigned long now, warning_time;
struct net_device *d, **dp;
 
/* If device is running, close it first. */
if (dev->flags & IFF_UP)
dev_close(dev);
 
BUG_TRAP(dev->deadbeaf==0);
dev->deadbeaf = 1;
 
/* And unlink it from device chain. */
for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
if (d == dev) {
write_lock_bh(&dev_base_lock);
*dp = d->next;
write_unlock_bh(&dev_base_lock);
break;
}
}
if (d == NULL) {
printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
return -ENODEV;
}
 
/* Synchronize to net_rx_action. */
br_write_lock_bh(BR_NETPROTO_LOCK);
br_write_unlock_bh(BR_NETPROTO_LOCK);
 
if (dev_boot_phase == 0) {
#ifdef CONFIG_NET_FASTROUTE
dev_clear_fastroute(dev);
#endif
 
/* Shutdown queueing discipline. */
dev_shutdown(dev);
 
net_run_sbin_hotplug(dev, "unregister");
 
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
 
/*
* Flush the multicast chain
*/
dev_mc_discard(dev);
}
 
if (dev->uninit)
dev->uninit(dev);
 
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(dev->master==NULL);
 
#ifdef CONFIG_NET_DIVERT
free_divert_blk(dev);
#endif
 
if (dev->features & NETIF_F_DYNALLOC) {
#ifdef NET_REFCNT_DEBUG
if (atomic_read(&dev->refcnt) != 1)
printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
#endif
dev_put(dev);
return 0;
}
 
/* Last reference is our one */
if (atomic_read(&dev->refcnt) == 1) {
dev_put(dev);
return 0;
}
 
#ifdef NET_REFCNT_DEBUG
printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
#endif
 
/* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
it means that someone in the kernel still has a reference
to this device and we cannot release it.
 
"New style" devices have destructors, hence we can return from this
function and destructor will do all the work later. As of kernel 2.4.0
there are very few "New Style" devices.
 
"Old style" devices expect that the device is free of any references
upon exit from this function.
We cannot return from this function until all such references have
fallen away. This is because the caller of this function will probably
immediately kfree(*dev) and then be unloaded via sys_delete_module.
 
So, we linger until all references fall away. The duration of the
linger is basically unbounded! It is driven by, for example, the
current setting of sysctl_ipfrag_time.
 
After 1 second, we start to rebroadcast unregister notifications
in hope that careless clients will release the device.
 
*/
 
now = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 1) {
if ((jiffies - now) > 1*HZ) {
/* Rebroadcast unregister notification */
notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
}
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ/4);
current->state = TASK_RUNNING;
if ((jiffies - warning_time) > 10*HZ) {
printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
"become free. Usage count = %d\n",
dev->name, atomic_read(&dev->refcnt));
warning_time = jiffies;
}
}
dev_put(dev);
return 0;
}
 
 
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
*/
 
extern void net_device_init(void);
extern void ip_auto_config(void);
struct proc_dir_entry *proc_net_drivers;
#ifdef CONFIG_NET_DIVERT
extern void dv_init(void);
#endif /* CONFIG_NET_DIVERT */
 
 
/*
* Callers must hold the rtnl semaphore. See the comment at the
* end of Space.c for details about the locking.
*/
int __init net_dev_init(void)
{
struct net_device *dev, **dp;
int i;
 
if (!dev_boot_phase)
return 0;
 
 
#ifdef CONFIG_NET_DIVERT
dv_init();
#endif /* CONFIG_NET_DIVERT */
/*
* Initialise the packet receive queues.
*/
 
for (i = 0; i < NR_CPUS; i++) {
struct softnet_data *queue;
 
queue = &softnet_data[i];
skb_queue_head_init(&queue->input_pkt_queue);
queue->throttle = 0;
queue->cng_level = 0;
queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->blog_dev.state);
queue->blog_dev.weight = weight_p;
queue->blog_dev.poll = process_backlog;
atomic_set(&queue->blog_dev.refcnt, 1);
}
 
#ifdef CONFIG_NET_PROFILE
net_profile_init();
NET_PROFILE_REGISTER(dev_queue_xmit);
NET_PROFILE_REGISTER(softnet_process);
#endif
 
#ifdef OFFLINE_SAMPLE
samp_timer.expires = jiffies + (10 * HZ);
add_timer(&samp_timer);
#endif
 
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
* from the chain disconnecting the device until the
* next reboot.
*
* NB At boot phase networking is dead. No locking is required.
* But we still preserve dev_base_lock for sanity.
*/
 
dp = &dev_base;
while ((dev = *dp) != NULL) {
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock);
#ifdef CONFIG_NET_FASTROUTE
dev->fastpath_lock = RW_LOCK_UNLOCKED;
#endif
dev->xmit_lock_owner = -1;
dev->iflink = -1;
dev_hold(dev);
 
/*
* Allocate name. If the init() fails
* the name will be reissued correctly.
*/
if (strchr(dev->name, '%'))
dev_alloc_name(dev, dev->name);
 
/*
* Check boot time settings for the device.
*/
netdev_boot_setup_check(dev);
 
if (dev->init && dev->init(dev)) {
/*
* It failed to come up. It will be unhooked later.
* dev_alloc_name can now advance to next suitable
* name that is checked next.
*/
dev->deadbeaf = 1;
dp = &dev->next;
} else {
dp = &dev->next;
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
if (dev->rebuild_header == NULL)
dev->rebuild_header = default_rebuild_header;
dev_init_scheduler(dev);
set_bit(__LINK_STATE_PRESENT, &dev->state);
}
}
 
/*
* Unhook devices that failed to come up
*/
dp = &dev_base;
while ((dev = *dp) != NULL) {
if (dev->deadbeaf) {
write_lock_bh(&dev_base_lock);
*dp = dev->next;
write_unlock_bh(&dev_base_lock);
dev_put(dev);
} else {
dp = &dev->next;
}
}
 
#ifdef CONFIG_PROC_FS
proc_net_create("dev", 0, dev_get_info);
create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
proc_net_drivers = proc_mkdir("net/drivers", 0);
#ifdef WIRELESS_EXT
/* Available in net/core/wireless.c */
proc_net_create("wireless", 0, dev_get_wireless_info);
#endif /* WIRELESS_EXT */
#endif /* CONFIG_PROC_FS */
 
dev_boot_phase = 0;
 
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
 
dst_init();
dev_mcast_init();
 
#ifdef CONFIG_NET_SCHED
pktsched_init();
#endif
/*
* Initialise network devices
*/
net_device_init();
 
return 0;
}
 
#ifdef CONFIG_HOTPLUG
 
/* Notify userspace when a netdevice event occurs,
* by running '/sbin/hotplug net' with certain
* environment variables set.
*/
 
static int net_run_sbin_hotplug(struct net_device *dev, char *action)
{
char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
int i;
 
sprintf(ifname, "INTERFACE=%s", dev->name);
sprintf(action_str, "ACTION=%s", action);
 
i = 0;
argv[i++] = hotplug_path;
argv[i++] = "net";
argv[i] = 0;
 
i = 0;
/* minimal command environment */
envp [i++] = "HOME=/";
envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp [i++] = ifname;
envp [i++] = action_str;
envp [i] = 0;
return call_usermodehelper(argv [0], argv, envp);
}
#endif
/datagram.c
0,0 → 1,448
/*
* SUCS NET3:
*
* Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top
* of these would make sense. Not tonight however 8-).
* This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly
* identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it.
*
* Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code)
*
* Fixes:
* Alan Cox : NULL return from skb_peek_copy() understood
* Alan Cox : Rewrote skb_read_datagram to avoid the skb_peek_copy stuff.
* Alan Cox : Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but
* AX.25 now works right, and SPX is feasible.
* Alan Cox : Fixed write poll of non IP protocol crash.
* Florian La Roche: Changed for my new skbuff handling.
* Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
* Linus Torvalds : BSD semantic fixes.
* Alan Cox : Datagram iovec handling
* Darryl Miles : Fixed non-blocking SOCK_STREAM.
* Alan Cox : POSIXisms
* Pete Wyckoff : Unconnected accept() fix.
*
*/
 
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
#include <linux/highmem.h>
 
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/checksum.h>
 
 
/*
* Is a socket 'connection oriented' ?
*/
static inline int connection_based(struct sock *sk)
{
return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
}
 
 
/*
* Wait for a packet..
*/
 
static int wait_for_packet(struct sock * sk, int *err, long *timeo_p)
{
int error;
 
DECLARE_WAITQUEUE(wait, current);
 
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue_exclusive(sk->sleep, &wait);
 
/* Socket errors? */
error = sock_error(sk);
if (error)
goto out_err;
 
if (!skb_queue_empty(&sk->receive_queue))
goto ready;
 
/* Socket shut down? */
if (sk->shutdown & RCV_SHUTDOWN)
goto out_noerr;
 
/* Sequenced packets can come disconnected. If so we report the problem */
error = -ENOTCONN;
if(connection_based(sk) && !(sk->state==TCP_ESTABLISHED || sk->state==TCP_LISTEN))
goto out_err;
 
/* handle signals */
if (signal_pending(current))
goto interrupted;
 
*timeo_p = schedule_timeout(*timeo_p);
 
ready:
current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
return 0;
 
interrupted:
error = sock_intr_errno(*timeo_p);
out_err:
*err = error;
out:
current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
return error;
out_noerr:
*err = 0;
error = 1;
goto out;
}
 
/*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible
* races. This replaces identical code in packet,raw and udp, as well as the IPX
* AX.25 and Appletalk. It also finally fixes the long standing peek and read
* race for datagram sockets. If you alter this routine remember it must be
* re-entrant.
*
* This function will lock the socket if a skb is returned, so the caller
* needs to unlock the socket in that case (usually by calling skb_free_datagram)
*
* * It does not lock socket since today. This function is
* * free of race conditions. This measure should/can improve
* * significantly datagram socket latencies at high loads,
* * when data copying to user space takes lots of time.
* * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
* * 8) Great win.)
* * --ANK (980729)
*
* The order of the tests when we find no data waiting are specified
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
 
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err)
{
int error;
struct sk_buff *skb;
long timeo;
 
/* Caller is allowed not to check sk->err before skb_recv_datagram() */
error = sock_error(sk);
if (error)
goto no_packet;
 
timeo = sock_rcvtimeo(sk, noblock);
 
do {
/* Again only user level code calls this function, so nothing interrupt level
will suddenly eat the receive_queue.
 
Look at current nfs client by the way...
However, this function was corrent in any case. 8)
*/
if (flags & MSG_PEEK)
{
unsigned long cpu_flags;
 
spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
skb = skb_peek(&sk->receive_queue);
if(skb!=NULL)
atomic_inc(&skb->users);
spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
} else
skb = skb_dequeue(&sk->receive_queue);
 
if (skb)
return skb;
 
/* User doesn't want to wait */
error = -EAGAIN;
if (!timeo)
goto no_packet;
 
} while (wait_for_packet(sk, err, &timeo) == 0);
 
return NULL;
 
no_packet:
*err = error;
return NULL;
}
 
void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
{
kfree_skb(skb);
}
 
/*
* Copy a datagram to a linear buffer.
*/
 
int skb_copy_datagram(const struct sk_buff *skb, int offset, char *to, int size)
{
struct iovec iov = { to, size };
 
return skb_copy_datagram_iovec(skb, offset, &iov, size);
}
 
/*
* Copy a datagram to an iovec.
* Note: the iovec is modified during the copy.
*/
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to,
int len)
{
int i, copy;
int start = skb->len - skb->data_len;
 
/* Copy header. */
if ((copy = start-offset) > 0) {
if (copy > len)
copy = len;
if (memcpy_toiovec(to, skb->data + offset, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
 
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end-offset) > 0) {
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
 
if (copy > len)
copy = len;
vaddr = kmap(page);
err = memcpy_toiovec(to, vaddr + frag->page_offset +
offset-start, copy);
kunmap(page);
if (err)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
}
start = end;
}
 
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + list->len;
if ((copy = end-offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_datagram_iovec(list, offset-start, to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
start = end;
}
}
if (len == 0)
return 0;
 
fault:
return -EFAULT;
}
 
int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump)
{
int i, copy;
int start = skb->len - skb->data_len;
int pos = 0;
 
/* Copy header. */
if ((copy = start-offset) > 0) {
int err = 0;
if (copy > len)
copy = len;
*csump = csum_and_copy_to_user(skb->data+offset, to, copy, *csump, &err);
if (err)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos = copy;
}
 
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end-offset) > 0) {
unsigned int csum2;
int err = 0;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
 
if (copy > len)
copy = len;
vaddr = kmap(page);
csum2 = csum_and_copy_to_user(vaddr + frag->page_offset +
offset-start, to, copy, 0, &err);
kunmap(page);
if (err)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
 
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + list->len;
if ((copy = end-offset) > 0) {
unsigned int csum2 = 0;
if (copy > len)
copy = len;
if (skb_copy_and_csum_datagram(list, offset-start, to, copy, &csum2))
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
}
if (len == 0)
return 0;
 
fault:
return -EFAULT;
}
 
/* Copy and checkum skb to user iovec. Caller _must_ check that
skb will fit to this iovec.
 
Returns: 0 - success.
-EINVAL - checksum failure.
-EFAULT - fault during copy. Beware, in this case iovec can be
modified!
*/
 
int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov)
{
unsigned int csum;
int chunk = skb->len - hlen;
 
/* Skip filled elements. Pretty silly, look at memcpy_toiovec, though 8) */
while (iov->iov_len == 0)
iov++;
 
if (iov->iov_len < chunk) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk+hlen, skb->csum)))
goto csum_error;
if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
goto fault;
} else {
csum = csum_partial(skb->data, hlen, skb->csum);
if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum))
goto fault;
if ((unsigned short)csum_fold(csum))
goto csum_error;
iov->iov_len -= chunk;
iov->iov_base += chunk;
}
return 0;
 
csum_error:
return -EINVAL;
 
fault:
return -EFAULT;
}
 
 
 
/*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
*
* Note: when you _don't_ use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
 
unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
 
poll_wait(file, sk->sleep, wait);
mask = 0;
 
/* exceptional events? */
if (sk->err || !skb_queue_empty(&sk->error_queue))
mask |= POLLERR;
if (sk->shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
 
/* readable? */
if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
mask |= POLLIN | POLLRDNORM;
 
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->state==TCP_CLOSE)
mask |= POLLHUP;
/* connection hasn't started yet? */
if (sk->state == TCP_SYN_SENT)
return mask;
}
 
/* writable? */
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 
return mask;
}
/skbuff.c
0,0 → 1,1238
/*
* Routines having to do with the 'struct sk_buff' memory handlers.
*
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
* Version: $Id: skbuff.c,v 1.1.1.1 2004-04-17 22:13:13 phoenix Exp $
*
* Fixes:
* Alan Cox : Fixed the worst of the load balancer bugs.
* Dave Platt : Interrupt stacking fix.
* Richard Kooijman : Timestamp fixes.
* Alan Cox : Changed buffer format.
* Alan Cox : destructor hook for AF_UNIX etc.
* Linus Torvalds : Better skb_clone.
* Alan Cox : Added skb_copy.
* Alan Cox : Added all the changed routines Linus
* only put in the headers
* Ray VanTassle : Fixed --skb->lock in free
* Alan Cox : skb_copy copy arp field
* Andi Kleen : slabified it.
*
* NOTE:
* The __skb_ routines should be called with interrupts
* disabled, or you better be *real* sure that the operation is atomic
* with respect to whatever list is being frobbed (e.g. via lock_sock()
* or via disabling bottom half handlers, etc).
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
 
/*
* The functions in this file will not compile correctly with gcc 2.4.x
*/
 
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/highmem.h>
 
#include <net/protocol.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
 
#include <asm/uaccess.h>
#include <asm/system.h>
 
int sysctl_hot_list_len = 128;
 
static kmem_cache_t *skbuff_head_cache;
 
static union {
struct sk_buff_head list;
char pad[SMP_CACHE_BYTES];
} skb_head_pool[NR_CPUS];
 
/*
* Keep out-of-line to prevent kernel bloat.
* __builtin_return_address is not used because it is not always
* reliable.
*/
 
/**
* skb_over_panic - private function
* @skb: buffer
* @sz: size
* @here: address
*
* Out of line support code for skb_put(). Not user callable.
*/
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
printk("skput:over: %p:%d put:%d dev:%s",
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
 
/**
* skb_under_panic - private function
* @skb: buffer
* @sz: size
* @here: address
*
* Out of line support code for skb_push(). Not user callable.
*/
 
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
printk("skput:under: %p:%d put:%d dev:%s",
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
 
static __inline__ struct sk_buff *skb_head_from_pool(void)
{
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
 
if (skb_queue_len(list)) {
struct sk_buff *skb;
unsigned long flags;
 
local_irq_save(flags);
skb = __skb_dequeue(list);
local_irq_restore(flags);
return skb;
}
return NULL;
}
 
static __inline__ void skb_head_to_pool(struct sk_buff *skb)
{
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
 
if (skb_queue_len(list) < sysctl_hot_list_len) {
unsigned long flags;
 
local_irq_save(flags);
__skb_queue_head(list, skb);
local_irq_restore(flags);
 
return;
}
kmem_cache_free(skbuff_head_cache, skb);
}
 
 
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
*
*/
 
/**
* alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
* tail room of size bytes. The object has a reference count of one.
* The return is the buffer. On a failure the return is %NULL.
*
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
{
struct sk_buff *skb;
u8 *data;
 
if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
static int count = 0;
if (++count < 5) {
printk(KERN_ERR "alloc_skb called nonatomically "
"from interrupt %p\n", NET_CALLER(size));
BUG();
}
gfp_mask &= ~__GFP_WAIT;
}
 
/* Get the HEAD */
skb = skb_head_from_pool();
if (skb == NULL) {
skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
if (skb == NULL)
goto nohead;
}
 
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (data == NULL)
goto nodata;
 
/* XXX: does not include slab overhead */
skb->truesize = size + sizeof(struct sk_buff);
 
/* Load the data pointers. */
skb->head = data;
skb->data = data;
skb->tail = data;
skb->end = data + size;
 
/* Set up other state */
skb->len = 0;
skb->cloned = 0;
skb->data_len = 0;
 
atomic_set(&skb->users, 1);
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
return skb;
 
nodata:
skb_head_to_pool(skb);
nohead:
return NULL;
}
 
 
/*
* Slab constructor for a skb head.
*/
static inline void skb_headerinit(void *p, kmem_cache_t *cache,
unsigned long flags)
{
struct sk_buff *skb = p;
 
skb->next = NULL;
skb->prev = NULL;
skb->list = NULL;
skb->sk = NULL;
skb->stamp.tv_sec=0; /* No idea about time */
skb->dev = NULL;
skb->real_dev = NULL;
skb->dst = NULL;
memset(skb->cb, 0, sizeof(skb->cb));
skb->pkt_type = PACKET_HOST; /* Default type */
skb->ip_summed = 0;
skb->priority = 0;
skb->security = 0; /* By default packets are insecure */
skb->destructor = NULL;
 
#ifdef CONFIG_NETFILTER
skb->nfmark = skb->nfcache = 0;
skb->nfct = NULL;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
#endif
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#endif
}
 
static void skb_drop_fraglist(struct sk_buff *skb)
{
struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
skb_shinfo(skb)->frag_list = NULL;
 
do {
struct sk_buff *this = list;
list = list->next;
kfree_skb(this);
} while (list);
}
 
static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
skb_get(list);
}
 
static void skb_release_data(struct sk_buff *skb)
{
if (!skb->cloned ||
atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
put_page(skb_shinfo(skb)->frags[i].page);
}
 
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
 
kfree(skb->head);
}
}
 
/*
* Free an skbuff by memory without cleaning the state.
*/
void kfree_skbmem(struct sk_buff *skb)
{
skb_release_data(skb);
skb_head_to_pool(skb);
}
 
/**
* __kfree_skb - private function
* @skb: buffer
*
* Free an sk_buff. Release anything attached to the buffer.
* Clean the state. This is an internal helper function. Users should
* always call kfree_skb
*/
 
void __kfree_skb(struct sk_buff *skb)
{
if (skb->list) {
printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
"on a list (from %p).\n", NET_CALLER(skb));
BUG();
}
 
dst_release(skb->dst);
if(skb->destructor) {
if (in_irq()) {
printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
NET_CALLER(skb));
}
skb->destructor(skb);
}
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
#endif
skb_headerinit(skb, NULL, 0); /* clean state */
kfree_skbmem(skb);
}
 
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
* @gfp_mask: allocation priority
*
* Duplicate an &sk_buff. The new one is not owned by a socket. Both
* copies share the same packet data but not structure. The new
* buffer has a reference count of 1. If the allocation fails the
* function returns %NULL otherwise the new buffer is returned.
*
* If this function is called from an interrupt gfp_mask() must be
* %GFP_ATOMIC.
*/
 
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
 
n = skb_head_from_pool();
if (!n) {
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
}
 
#define C(x) n->x = skb->x
 
n->next = n->prev = NULL;
n->list = NULL;
n->sk = NULL;
C(stamp);
C(dev);
C(real_dev);
C(h);
C(nh);
C(mac);
C(dst);
dst_clone(n->dst);
memcpy(n->cb, skb->cb, sizeof(skb->cb));
C(len);
C(data_len);
C(csum);
n->cloned = 1;
C(pkt_type);
C(ip_summed);
C(priority);
atomic_set(&n->users, 1);
C(protocol);
C(security);
C(truesize);
C(head);
C(data);
C(tail);
C(end);
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
C(nfcache);
C(nfct);
#ifdef CONFIG_NETFILTER_DEBUG
C(nf_debug);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
C(private);
#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#endif
 
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
#ifdef CONFIG_NETFILTER
nf_conntrack_get(skb->nfct);
#endif
return n;
}
 
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
/*
* Shift between the two data areas in bytes
*/
unsigned long offset = new->data - old->data;
 
new->list=NULL;
new->sk=NULL;
new->dev=old->dev;
new->real_dev=old->real_dev;
new->priority=old->priority;
new->protocol=old->protocol;
new->dst=dst_clone(old->dst);
new->h.raw=old->h.raw+offset;
new->nh.raw=old->nh.raw+offset;
new->mac.raw=old->mac.raw+offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
atomic_set(&new->users, 1);
new->pkt_type=old->pkt_type;
new->stamp=old->stamp;
new->destructor = NULL;
new->security=old->security;
#ifdef CONFIG_NETFILTER
new->nfmark=old->nfmark;
new->nfcache=old->nfcache;
new->nfct=old->nfct;
nf_conntrack_get(new->nfct);
#ifdef CONFIG_NETFILTER_DEBUG
new->nf_debug=old->nf_debug;
#endif
#endif
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#endif
}
 
/**
* skb_copy - create private copy of an sk_buff
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and its data. This is used when the
* caller wishes to modify the data and needs a private copy of the
* data to alter. Returns %NULL on failure or the pointer to the buffer
* on success. The returned buffer has a reference count of 1.
*
* As by-product this function converts non-linear &sk_buff to linear
* one, so that &sk_buff becomes completely private and caller is allowed
* to modify all the data of returned buffer. This means that this
* function is not recommended for use in circumstances when only
* header is going to be modified. Use pskb_copy() instead.
*/
struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
int headerlen = skb->data-skb->head;
 
/*
* Allocate the copy buffer
*/
n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
if(n==NULL)
return NULL;
 
/* Set the data pointer */
skb_reserve(n,headerlen);
/* Set the tail pointer and length */
skb_put(n,skb->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
 
if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
BUG();
 
copy_skb_header(n, skb);
 
return n;
}
 
/* Keep head the same: replace data */
int skb_linearize(struct sk_buff *skb, int gfp_mask)
{
unsigned int size;
u8 *data;
long offset;
int headerlen = skb->data - skb->head;
int expand = (skb->tail+skb->data_len) - skb->end;
 
if (skb_shared(skb))
BUG();
 
if (expand <= 0)
expand = 0;
 
size = (skb->end - skb->head + expand);
size = SKB_DATA_ALIGN(size);
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (data == NULL)
return -ENOMEM;
 
/* Copy entire thing */
if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
BUG();
 
/* Offset between the two in bytes */
offset = data - skb->head;
 
/* Free old data. */
skb_release_data(skb);
 
skb->head = data;
skb->end = data + size;
 
/* Set up new pointers */
skb->h.raw += offset;
skb->nh.raw += offset;
skb->mac.raw += offset;
skb->tail += offset;
skb->data += offset;
 
/* Set up shinfo */
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
 
/* We are no longer a clone, even if we were. */
skb->cloned = 0;
 
skb->tail += skb->data_len;
skb->data_len = 0;
return 0;
}
 
 
/**
* pskb_copy - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
* the caller wishes to modify only header of &sk_buff and needs
* private copy of the header to alter. Returns %NULL on failure
* or the pointer to the buffer on success.
* The returned buffer has a reference count of 1.
*/
 
struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
 
/*
* Allocate the copy buffer
*/
n=alloc_skb(skb->end - skb->head, gfp_mask);
if(n==NULL)
return NULL;
 
/* Set the data pointer */
skb_reserve(n,skb->data-skb->head);
/* Set the tail pointer and length */
skb_put(n,skb_headlen(skb));
/* Copy the bytes */
memcpy(n->data, skb->data, n->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
 
n->data_len = skb->data_len;
n->len = skb->len;
 
if (skb_shinfo(skb)->nr_frags) {
int i;
 
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
 
if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
 
copy_skb_header(n, skb);
 
return n;
}
 
/**
* pskb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
* @nhead: room to add at head
* @ntail: room to add at tail
* @gfp_mask: allocation priority
*
* Expands (or creates identical copy, if &nhead and &ntail are zero)
* header of skb. &sk_buff itself is not changed. &sk_buff MUST have
* reference count of 1. Returns zero in the case of success or error,
* if expansion failed. In the last case, &sk_buff is not changed.
*
* All the pointers pointing into skb header may change and must be
* reloaded after call to this function.
*/
 
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
{
int i;
u8 *data;
int size = nhead + (skb->end - skb->head) + ntail;
long off;
 
if (skb_shared(skb))
BUG();
 
size = SKB_DATA_ALIGN(size);
 
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (data == NULL)
goto nodata;
 
/* Copy only real data... and, alas, header. This should be
* optimized for the cases when header is void. */
memcpy(data+nhead, skb->head, skb->tail-skb->head);
memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
 
for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
 
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
 
skb_release_data(skb);
 
off = (data+nhead) - skb->head;
 
skb->head = data;
skb->end = data+size;
 
skb->data += off;
skb->tail += off;
skb->mac.raw += off;
skb->h.raw += off;
skb->nh.raw += off;
skb->cloned = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
 
nodata:
return -ENOMEM;
}
 
/* Make private copy of skb with writable head and some headroom */
 
struct sk_buff *
skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
{
struct sk_buff *skb2;
int delta = headroom - skb_headroom(skb);
 
if (delta <= 0)
return pskb_copy(skb, GFP_ATOMIC);
 
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 == NULL ||
!pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
return skb2;
 
kfree_skb(skb2);
return NULL;
}
 
 
/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
* @newheadroom: new free bytes at head
* @newtailroom: new free bytes at tail
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and its data and while doing so
* allocate additional space.
*
* This is used when the caller wishes to modify the data and needs a
* private copy of the data to alter as well as more space for new fields.
* Returns %NULL on failure or the pointer to the buffer
* on success. The returned buffer has a reference count of 1.
*
* You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt.
*/
 
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom,
int newtailroom,
int gfp_mask)
{
struct sk_buff *n;
 
/*
* Allocate the copy buffer
*/
n=alloc_skb(newheadroom + skb->len + newtailroom,
gfp_mask);
if(n==NULL)
return NULL;
 
skb_reserve(n,newheadroom);
 
/* Set the tail pointer and length */
skb_put(n,skb->len);
 
/* Copy the data only. */
if (skb_copy_bits(skb, 0, n->data, skb->len))
BUG();
 
copy_skb_header(n, skb);
return n;
}
 
/**
* skb_pad - zero pad the tail of an skb
* @skb: buffer to pad
* @pad: space to pad
*
* Ensure that a buffer is followed by a padding area that is zero
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
* May return NULL in out of memory cases.
*/
struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
{
struct sk_buff *nskb;
/* If the skbuff is non linear tailroom is always zero.. */
if(skb_tailroom(skb) >= pad)
{
memset(skb->data+skb->len, 0, pad);
return skb;
}
nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
kfree_skb(skb);
if(nskb)
memset(nskb->data+nskb->len, 0, pad);
return nskb;
}
/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
* If realloc==0 and trimming is impossible without change of data,
* it is BUG().
*/
 
int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
{
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int i;
 
for (i=0; i<nfrags; i++) {
int end = offset + skb_shinfo(skb)->frags[i].size;
if (end > len) {
if (skb_cloned(skb)) {
if (!realloc)
BUG();
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
}
if (len <= offset) {
put_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb)->nr_frags--;
} else {
skb_shinfo(skb)->frags[i].size = len-offset;
}
}
offset = end;
}
 
if (offset < len) {
skb->data_len -= skb->len - len;
skb->len = len;
} else {
if (len <= skb_headlen(skb)) {
skb->len = len;
skb->data_len = 0;
skb->tail = skb->data + len;
if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
skb_drop_fraglist(skb);
} else {
skb->data_len -= skb->len - len;
skb->len = len;
}
}
 
return 0;
}
 
/**
* __pskb_pull_tail - advance tail of skb header
* @skb: buffer to reallocate
* @delta: number of bytes to advance tail
*
* The function makes a sense only on a fragmented &sk_buff,
* it expands header moving its tail forward and copying necessary
* data from fragmented part.
*
* &sk_buff MUST have reference count of 1.
*
* Returns %NULL (and &sk_buff does not change) if pull failed
* or value of new tail of skb in the case of success.
*
* All the pointers pointing into skb header may change and must be
* reloaded after call to this function.
*/
 
/* Moves tail of skb head forward, copying data from fragmented part,
* when it is necessary.
* 1. It may fail due to malloc failure.
* 2. It may change skb pointers.
*
* It is pretty complicated. Luckily, it is called only in exceptional cases.
*/
unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
{
int i, k, eat;
 
/* If skb has not enough free space at tail, get new one
* plus 128 bytes for future expansions. If we have enough
* room at tail, reallocate without expansion only if skb is cloned.
*/
eat = (skb->tail+delta) - skb->end;
 
if (eat > 0 || skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
return NULL;
}
 
if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
BUG();
 
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
*/
if (skb_shinfo(skb)->frag_list == NULL)
goto pull_pages;
 
/* Estimate size of pulled pages. */
eat = delta;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size >= eat)
goto pull_pages;
eat -= skb_shinfo(skb)->frags[i].size;
}
 
/* If we need update frag list, we are in troubles.
* Certainly, it possible to add an offset to skb data,
* but taking into account that pulling is expected to
* be very rare operation, it is worth to fight against
* further bloating skb head and crucify ourselves here instead.
* Pure masohism, indeed. 8)8)
*/
if (eat) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
struct sk_buff *clone = NULL;
struct sk_buff *insp = NULL;
 
do {
if (list == NULL)
BUG();
 
if (list->len <= eat) {
/* Eaten as whole. */
eat -= list->len;
list = list->next;
insp = list;
} else {
/* Eaten partially. */
 
if (skb_shared(list)) {
/* Sucks! We need to fork list. :-( */
clone = skb_clone(list, GFP_ATOMIC);
if (clone == NULL)
return NULL;
insp = list->next;
list = clone;
} else {
/* This may be pulled without
* problems. */
insp = list;
}
if (pskb_pull(list, eat) == NULL) {
if (clone)
kfree_skb(clone);
return NULL;
}
break;
}
} while (eat);
 
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
kfree_skb(list);
}
/* And insert new clone at head. */
if (clone) {
clone->next = list;
skb_shinfo(skb)->frag_list = clone;
}
}
/* Success! Now we may commit changes to skb data. */
 
pull_pages:
eat = delta;
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
skb_shinfo(skb)->frags[k].size -= eat;
eat = 0;
}
k++;
}
}
skb_shinfo(skb)->nr_frags = k;
 
skb->tail += delta;
skb->data_len -= delta;
 
return skb->tail;
}
 
/* Copy some data bits from skb to kernel buffer. */
 
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
{
int i, copy;
int start = skb->len - skb->data_len;
 
if (offset > (int)skb->len-len)
goto fault;
 
/* Copy header. */
if ((copy = start-offset) > 0) {
if (copy > len)
copy = len;
memcpy(to, skb->data + offset, copy);
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
}
 
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end-offset) > 0) {
u8 *vaddr;
 
if (copy > len)
copy = len;
 
vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
offset-start, copy);
kunmap_skb_frag(vaddr);
 
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
}
start = end;
}
 
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + list->len;
if ((copy = end-offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_bits(list, offset-start, to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
}
start = end;
}
}
if (len == 0)
return 0;
 
fault:
return -EFAULT;
}
 
/* Checksum skb data. */
 
unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
{
int i, copy;
int start = skb->len - skb->data_len;
int pos = 0;
 
/* Checksum header. */
if ((copy = start-offset) > 0) {
if (copy > len)
copy = len;
csum = csum_partial(skb->data+offset, copy, csum);
if ((len -= copy) == 0)
return csum;
offset += copy;
pos = copy;
}
 
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end-offset) > 0) {
unsigned int csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
if (copy > len)
copy = len;
vaddr = kmap_skb_frag(frag);
csum2 = csum_partial(vaddr + frag->page_offset +
offset-start, copy, 0);
kunmap_skb_frag(vaddr);
csum = csum_block_add(csum, csum2, pos);
if (!(len -= copy))
return csum;
offset += copy;
pos += copy;
}
start = end;
}
 
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + list->len;
if ((copy = end-offset) > 0) {
unsigned int csum2;
if (copy > len)
copy = len;
csum2 = skb_checksum(list, offset-start, copy, 0);
csum = csum_block_add(csum, csum2, pos);
if ((len -= copy) == 0)
return csum;
offset += copy;
pos += copy;
}
start = end;
}
}
if (len == 0)
return csum;
 
BUG();
return csum;
}
 
/* Both of above in one bottle. */
 
unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
{
int i, copy;
int start = skb->len - skb->data_len;
int pos = 0;
 
/* Copy header. */
if ((copy = start-offset) > 0) {
if (copy > len)
copy = len;
csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
if ((len -= copy) == 0)
return csum;
offset += copy;
to += copy;
pos = copy;
}
 
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end-offset) > 0) {
unsigned int csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
if (copy > len)
copy = len;
vaddr = kmap_skb_frag(frag);
csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
offset-start, to, copy, 0);
kunmap_skb_frag(vaddr);
csum = csum_block_add(csum, csum2, pos);
if (!(len -= copy))
return csum;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
 
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list;
 
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
unsigned int csum2;
int end;
 
BUG_TRAP(start <= offset+len);
 
end = start + list->len;
if ((copy = end-offset) > 0) {
if (copy > len)
copy = len;
csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
csum = csum_block_add(csum, csum2, pos);
if ((len -= copy) == 0)
return csum;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
}
if (len == 0)
return csum;
 
BUG();
return csum;
}
 
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
unsigned int csum;
long csstart;
 
if (skb->ip_summed == CHECKSUM_HW)
csstart = skb->h.raw - skb->data;
else
csstart = skb->len - skb->data_len;
 
if (csstart > skb->len - skb->data_len)
BUG();
 
memcpy(to, skb->data, csstart);
 
csum = 0;
if (csstart != skb->len)
csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
skb->len-csstart, 0);
 
if (skb->ip_summed == CHECKSUM_HW) {
long csstuff = csstart + skb->csum;
 
*((unsigned short *)(to + csstuff)) = csum_fold(csum);
}
}
 
#if 0
/*
* Tune the memory allocator for a new MTU size.
*/
void skb_add_mtu(int mtu)
{
/* Must match allocation in alloc_skb */
mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 
kmem_add_cache_size(mtu);
}
#endif
 
void __init skb_init(void)
{
int i;
 
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN,
skb_headerinit, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
 
for (i=0; i<NR_CPUS; i++)
skb_queue_head_init(&skb_head_pool[i].list);
}
/neighbour.c
0,0 → 1,1588
/*
* Generic address resolution entity
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
*/
 
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/sched.h>
#include <linux/netdevice.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
 
#define NEIGH_DEBUG 1
 
#define NEIGH_PRINTK(x...) printk(x)
#define NEIGH_NOPRINTK(x...) do { ; } while(0)
#define NEIGH_PRINTK0 NEIGH_PRINTK
#define NEIGH_PRINTK1 NEIGH_NOPRINTK
#define NEIGH_PRINTK2 NEIGH_NOPRINTK
 
#if NEIGH_DEBUG >= 1
#undef NEIGH_PRINTK1
#define NEIGH_PRINTK1 NEIGH_PRINTK
#endif
#if NEIGH_DEBUG >= 2
#undef NEIGH_PRINTK2
#define NEIGH_PRINTK2 NEIGH_PRINTK
#endif
 
static void neigh_timer_handler(unsigned long arg);
#ifdef CONFIG_ARPD
static void neigh_app_notify(struct neighbour *n);
#endif
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 
static int neigh_glbl_allocs;
static struct neigh_table *neigh_tables;
 
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
 
- All the scans/updates to hash buckets MUST be made under this lock.
- NOTHING clever should be made under this lock: no callbacks
to protocol backends, no attempts to send something to network.
It will result in deadlocks, if backend/driver wants to use neighbour
cache.
- If the entry requires some non-trivial actions, increase
its reference count and release table lock.
Neighbour entries are protected:
- with reference count.
- with rwlock neigh->lock
 
Reference count prevents destruction.
 
neigh->lock mainly serializes ll address data and its validity state.
However, the same lock is used to protect another entry fields:
- timer
- resolution queue
 
Again, nothing clever shall be made under neigh->lock,
the most complicated procedure, which we allow is dev->hard_header.
It is supposed, that dev->hard_header is simplistic and does
not make callbacks to neighbour tables.
 
The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
list of neighbour tables. This list is used only in process context,
*/
 
static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
 
static int neigh_blackhole(struct sk_buff *skb)
{
kfree_skb(skb);
return -ENETDOWN;
}
 
/*
* It is random distribution in the interval (1/2)*base...(3/2)*base.
* It corresponds to default IPv6 settings and is not overridable,
* because it is really reasonable choice.
*/
 
unsigned long neigh_rand_reach_time(unsigned long base)
{
return (net_random() % base) + (base>>1);
}
 
 
static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
int i;
 
for (i=0; i<=NEIGH_HASHMASK; i++) {
struct neighbour *n, **np;
 
np = &tbl->hash_buckets[i];
write_lock_bh(&tbl->lock);
while ((n = *np) != NULL) {
/* Neighbour record may be discarded if:
- nobody refers to it.
- it is not permanent
- (NEW and probably wrong)
INCOMPLETE entries are kept at least for
n->parms->retrans_time, otherwise we could
flood network with resolution requests.
It is not clear, what is better table overflow
or flooding.
*/
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state&NUD_PERMANENT) &&
(n->nud_state != NUD_INCOMPLETE ||
jiffies - n->used > n->parms->retrans_time)) {
*np = n->next;
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
neigh_release(n);
continue;
}
write_unlock(&n->lock);
np = &n->next;
}
write_unlock_bh(&tbl->lock);
}
tbl->last_flush = jiffies;
return shrunk;
}
 
static int neigh_del_timer(struct neighbour *n)
{
if (n->nud_state & NUD_IN_TIMER) {
if (del_timer(&n->timer)) {
neigh_release(n);
return 1;
}
}
return 0;
}
 
static void pneigh_queue_purge(struct sk_buff_head *list)
{
struct sk_buff *skb;
 
while ((skb = skb_dequeue(list)) != NULL) {
dev_put(skb->dev);
kfree_skb(skb);
}
}
 
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
int i;
 
write_lock_bh(&tbl->lock);
 
for (i=0; i <= NEIGH_HASHMASK; i++) {
struct neighbour *n, **np;
 
np = &tbl->hash_buckets[i];
while ((n = *np) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
*np = n->next;
write_lock_bh(&n->lock);
n->dead = 1;
neigh_del_timer(n);
write_unlock_bh(&n->lock);
neigh_release(n);
}
}
 
write_unlock_bh(&tbl->lock);
}
 
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
int i;
 
write_lock_bh(&tbl->lock);
 
for (i=0; i<=NEIGH_HASHMASK; i++) {
struct neighbour *n, **np;
 
np = &tbl->hash_buckets[i];
while ((n = *np) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
*np = n->next;
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
 
if (atomic_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
but someone still uses it.
 
The destroy will be delayed until
the last user releases us, but
we must kill timers etc. and move
it to safe state.
*/
n->parms = &tbl->parms;
skb_queue_purge(&n->arp_queue);
n->output = neigh_blackhole;
if (n->nud_state&NUD_VALID)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
write_unlock(&n->lock);
neigh_release(n);
}
}
 
pneigh_ifdown(tbl, dev);
write_unlock_bh(&tbl->lock);
 
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
return 0;
}
 
static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
struct neighbour *n;
unsigned long now = jiffies;
 
if (tbl->entries > tbl->gc_thresh3 ||
(tbl->entries > tbl->gc_thresh2 &&
now - tbl->last_flush > 5*HZ)) {
if (neigh_forced_gc(tbl) == 0 &&
tbl->entries > tbl->gc_thresh3)
return NULL;
}
 
n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
if (n == NULL)
return NULL;
 
memset(n, 0, tbl->entry_size);
 
skb_queue_head_init(&n->arp_queue);
n->lock = RW_LOCK_UNLOCKED;
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
n->parms = &tbl->parms;
init_timer(&n->timer);
n->timer.function = neigh_timer_handler;
n->timer.data = (unsigned long)n;
tbl->stats.allocs++;
neigh_glbl_allocs++;
tbl->entries++;
n->tbl = tbl;
atomic_set(&n->refcnt, 1);
n->dead = 1;
return n;
}
 
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
struct neighbour *n;
u32 hash_val;
int key_len = tbl->key_len;
 
hash_val = tbl->hash(pkey, dev);
 
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
if (dev == n->dev &&
memcmp(n->primary_key, pkey, key_len) == 0) {
neigh_hold(n);
break;
}
}
read_unlock_bh(&tbl->lock);
return n;
}
 
struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
struct neighbour *n, *n1;
u32 hash_val;
int key_len = tbl->key_len;
int error;
 
n = neigh_alloc(tbl);
if (n == NULL)
return ERR_PTR(-ENOBUFS);
 
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
dev_hold(dev);
 
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
neigh_release(n);
return ERR_PTR(error);
}
 
/* Device specific setup. */
if (n->parms->neigh_setup &&
(error = n->parms->neigh_setup(n)) < 0) {
neigh_release(n);
return ERR_PTR(error);
}
 
n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
 
hash_val = tbl->hash(pkey, dev);
 
write_lock_bh(&tbl->lock);
for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
if (dev == n1->dev &&
memcmp(n1->primary_key, pkey, key_len) == 0) {
neigh_hold(n1);
write_unlock_bh(&tbl->lock);
neigh_release(n);
return n1;
}
}
 
n->next = tbl->hash_buckets[hash_val];
tbl->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
return n;
}
 
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
u32 hash_val;
int key_len = tbl->key_len;
 
hash_val = *(u32*)(pkey + key_len - 4);
hash_val ^= (hash_val>>16);
hash_val ^= hash_val>>8;
hash_val ^= hash_val>>4;
hash_val &= PNEIGH_HASHMASK;
 
read_lock_bh(&tbl->lock);
 
for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
if (memcmp(n->key, pkey, key_len) == 0 &&
(n->dev == dev || !n->dev)) {
read_unlock_bh(&tbl->lock);
return n;
}
}
read_unlock_bh(&tbl->lock);
if (!creat)
return NULL;
 
n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
if (n == NULL)
return NULL;
 
memcpy(n->key, pkey, key_len);
n->dev = dev;
 
if (tbl->pconstructor && tbl->pconstructor(n)) {
kfree(n);
return NULL;
}
 
write_lock_bh(&tbl->lock);
n->next = tbl->phash_buckets[hash_val];
tbl->phash_buckets[hash_val] = n;
write_unlock_bh(&tbl->lock);
return n;
}
 
 
int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
struct pneigh_entry *n, **np;
u32 hash_val;
int key_len = tbl->key_len;
 
hash_val = *(u32*)(pkey + key_len - 4);
hash_val ^= (hash_val>>16);
hash_val ^= hash_val>>8;
hash_val ^= hash_val>>4;
hash_val &= PNEIGH_HASHMASK;
 
for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
write_lock_bh(&tbl->lock);
*np = n->next;
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
kfree(n);
return 0;
}
}
return -ENOENT;
}
 
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
struct pneigh_entry *n, **np;
u32 h;
 
for (h=0; h<=PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
while ((n=*np) != NULL) {
if (n->dev == dev || dev == NULL) {
*np = n->next;
if (tbl->pdestructor)
tbl->pdestructor(n);
kfree(n);
continue;
}
np = &n->next;
}
}
return -ENOENT;
}
 
 
/*
* neighbour must already be out of the table;
*
*/
void neigh_destroy(struct neighbour *neigh)
{
struct hh_cache *hh;
 
if (!neigh->dead) {
printk("Destroying alive neighbour %p\n", neigh);
dump_stack();
return;
}
 
if (neigh_del_timer(neigh))
printk("Impossible event.\n");
 
while ((hh = neigh->hh) != NULL) {
neigh->hh = hh->hh_next;
hh->hh_next = NULL;
write_lock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
write_unlock_bh(&hh->hh_lock);
if (atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
}
 
if (neigh->ops && neigh->ops->destructor)
(neigh->ops->destructor)(neigh);
 
skb_queue_purge(&neigh->arp_queue);
 
dev_put(neigh->dev);
 
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
neigh_glbl_allocs--;
neigh->tbl->entries--;
kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}
 
/* Neighbour state is suspicious;
disable fast path.
 
Called with write_locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
struct hh_cache *hh;
 
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
 
neigh->output = neigh->ops->output;
 
for (hh = neigh->hh; hh; hh = hh->hh_next)
hh->hh_output = neigh->ops->output;
}
 
/* Neighbour state is OK;
enable fast path.
 
Called with write_locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
struct hh_cache *hh;
 
NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
 
neigh->output = neigh->ops->connected_output;
 
for (hh = neigh->hh; hh; hh = hh->hh_next)
hh->hh_output = neigh->ops->hh_output;
}
 
/*
Transitions NUD_STALE <-> NUD_REACHABLE do not occur
when fast path is built: we have no timers associated with
these states, we do not have time to check state when sending.
neigh_periodic_timer check periodically neigh->confirmed
time and moves NUD_REACHABLE -> NUD_STALE.
 
If a routine wants to know TRUE entry state, it calls
neigh_sync before checking state.
 
Called with write_locked neigh.
*/
 
static void neigh_sync(struct neighbour *n)
{
unsigned long now = jiffies;
u8 state = n->nud_state;
 
if (state&(NUD_NOARP|NUD_PERMANENT))
return;
if (state&NUD_REACHABLE) {
if (now - n->confirmed > n->parms->reachable_time) {
n->nud_state = NUD_STALE;
neigh_suspect(n);
}
} else if (state&NUD_VALID) {
if (now - n->confirmed < n->parms->reachable_time) {
neigh_del_timer(n);
n->nud_state = NUD_REACHABLE;
neigh_connect(n);
}
}
}
 
static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg)
{
struct neigh_table *tbl = (struct neigh_table*)arg;
unsigned long now = jiffies;
int i;
 
 
write_lock(&tbl->lock);
 
/*
* periodicly recompute ReachableTime from random function
*/
if (now - tbl->last_rand > 300*HZ) {
struct neigh_parms *p;
tbl->last_rand = now;
for (p=&tbl->parms; p; p = p->next)
p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
}
 
for (i=0; i <= NEIGH_HASHMASK; i++) {
struct neighbour *n, **np;
 
np = &tbl->hash_buckets[i];
while ((n = *np) != NULL) {
unsigned state;
 
write_lock(&n->lock);
 
state = n->nud_state;
if (state&(NUD_PERMANENT|NUD_IN_TIMER)) {
write_unlock(&n->lock);
goto next_elt;
}
 
if ((long)(n->used - n->confirmed) < 0)
n->used = n->confirmed;
 
if (atomic_read(&n->refcnt) == 1 &&
(state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
*np = n->next;
n->dead = 1;
write_unlock(&n->lock);
neigh_release(n);
continue;
}
 
if (n->nud_state&NUD_REACHABLE &&
now - n->confirmed > n->parms->reachable_time) {
n->nud_state = NUD_STALE;
neigh_suspect(n);
}
write_unlock(&n->lock);
 
next_elt:
np = &n->next;
}
}
 
mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
write_unlock(&tbl->lock);
}
 
#ifdef CONFIG_SMP
static void neigh_periodic_timer(unsigned long arg)
{
struct neigh_table *tbl = (struct neigh_table*)arg;
tasklet_schedule(&tbl->gc_task);
}
#endif
 
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
return p->ucast_probes + p->app_probes + p->mcast_probes;
}
 
 
/* Called when a timer expires for a neighbour entry. */
 
static void neigh_timer_handler(unsigned long arg)
{
unsigned long now = jiffies;
struct neighbour *neigh = (struct neighbour*)arg;
unsigned state;
int notify = 0;
 
write_lock(&neigh->lock);
 
state = neigh->nud_state;
 
if (!(state&NUD_IN_TIMER)) {
#ifndef CONFIG_SMP
printk("neigh: timer & !nud_in_timer\n");
#endif
goto out;
}
 
if ((state&NUD_VALID) &&
now - neigh->confirmed < neigh->parms->reachable_time) {
neigh->nud_state = NUD_REACHABLE;
NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
neigh_connect(neigh);
goto out;
}
if (state == NUD_DELAY) {
NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
neigh->nud_state = NUD_PROBE;
atomic_set(&neigh->probes, 0);
}
 
if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
struct sk_buff *skb;
 
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh->tbl->stats.res_failed++;
NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
 
/* It is very thin place. report_unreachable is very complicated
routine. Particularly, it can hit the same neighbour entry!
So that, we try to be accurate and avoid dead loop. --ANK
*/
while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
write_unlock(&neigh->lock);
neigh->ops->error_report(neigh, skb);
write_lock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
goto out;
}
 
neigh->timer.expires = now + neigh->parms->retrans_time;
add_timer(&neigh->timer);
write_unlock(&neigh->lock);
 
neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
atomic_inc(&neigh->probes);
return;
 
out:
write_unlock(&neigh->lock);
#ifdef CONFIG_ARPD
if (notify && neigh->parms->app_probes)
neigh_app_notify(neigh);
#endif
neigh_release(neigh);
}
 
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
write_lock_bh(&neigh->lock);
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
neigh_hold(neigh);
neigh->timer.expires = jiffies + neigh->parms->retrans_time;
add_timer(&neigh->timer);
write_unlock_bh(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
write_lock_bh(&neigh->lock);
} else {
neigh->nud_state = NUD_FAILED;
write_unlock_bh(&neigh->lock);
 
if (skb)
kfree_skb(skb);
return 1;
}
}
if (neigh->nud_state == NUD_INCOMPLETE) {
if (skb) {
if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
struct sk_buff *buff;
buff = neigh->arp_queue.next;
__skb_unlink(buff, &neigh->arp_queue);
kfree_skb(buff);
}
__skb_queue_tail(&neigh->arp_queue, skb);
}
write_unlock_bh(&neigh->lock);
return 1;
}
if (neigh->nud_state == NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh_hold(neigh);
neigh->nud_state = NUD_DELAY;
neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
add_timer(&neigh->timer);
}
}
write_unlock_bh(&neigh->lock);
return 0;
}
 
static __inline__ void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, struct net_device*, unsigned char*) =
neigh->dev->header_cache_update;
 
if (update) {
for (hh=neigh->hh; hh; hh=hh->hh_next) {
write_lock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_unlock_bh(&hh->hh_lock);
}
}
}
 
 
 
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
-- override==1 allows to override existing lladdr, if it is different.
-- arp==0 means that the change is administrative.
 
Caller MUST hold reference count on the entry.
*/
 
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override, int arp)
{
u8 old;
int err;
int notify = 0;
struct net_device *dev = neigh->dev;
 
write_lock_bh(&neigh->lock);
old = neigh->nud_state;
 
err = -EPERM;
if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
goto out;
 
if (!(new&NUD_VALID)) {
neigh_del_timer(neigh);
if (old&NUD_CONNECTED)
neigh_suspect(neigh);
neigh->nud_state = new;
err = 0;
notify = old&NUD_VALID;
goto out;
}
 
/* Compare new lladdr with cached one */
if (dev->addr_len == 0) {
/* First case: device needs no address. */
lladdr = neigh->ha;
} else if (lladdr) {
/* The second case: if something is already cached
and a new address is proposed:
- compare new & old
- if they are different, check override flag
*/
if (old&NUD_VALID) {
if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
lladdr = neigh->ha;
else if (!override)
goto out;
}
} else {
/* No address is supplied; if we know something,
use it, otherwise discard the request.
*/
err = -EINVAL;
if (!(old&NUD_VALID))
goto out;
lladdr = neigh->ha;
}
 
neigh_sync(neigh);
old = neigh->nud_state;
if (new&NUD_CONNECTED)
neigh->confirmed = jiffies;
neigh->updated = jiffies;
 
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
err = 0;
if (old&NUD_VALID) {
if (lladdr == neigh->ha)
if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
goto out;
}
neigh_del_timer(neigh);
neigh->nud_state = new;
if (lladdr != neigh->ha) {
memcpy(&neigh->ha, lladdr, dev->addr_len);
neigh_update_hhs(neigh);
if (!(new&NUD_CONNECTED))
neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
#ifdef CONFIG_ARPD
notify = 1;
#endif
}
if (new == old)
goto out;
if (new&NUD_CONNECTED)
neigh_connect(neigh);
else
neigh_suspect(neigh);
if (!(old&NUD_VALID)) {
struct sk_buff *skb;
 
/* Again: avoid dead loop if something went wrong */
 
while (neigh->nud_state&NUD_VALID &&
(skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
struct neighbour *n1 = neigh;
write_unlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
if (skb->dst && skb->dst->neighbour)
n1 = skb->dst->neighbour;
n1->output(skb);
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
out:
write_unlock_bh(&neigh->lock);
#ifdef CONFIG_ARPD
if (notify && neigh->parms->app_probes)
neigh_app_notify(neigh);
#endif
return err;
}
 
struct neighbour * neigh_event_ns(struct neigh_table *tbl,
u8 *lladdr, void *saddr,
struct net_device *dev)
{
struct neighbour *neigh;
 
neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
return neigh;
}
 
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
{
struct hh_cache *hh = NULL;
struct net_device *dev = dst->dev;
 
for (hh=n->hh; hh; hh = hh->hh_next)
if (hh->hh_type == protocol)
break;
 
if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
memset(hh, 0, sizeof(struct hh_cache));
hh->hh_lock = RW_LOCK_UNLOCKED;
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 0);
hh->hh_next = NULL;
if (dev->hard_header_cache(n, hh)) {
kfree(hh);
hh = NULL;
} else {
atomic_inc(&hh->hh_refcnt);
hh->hh_next = n->hh;
n->hh = hh;
if (n->nud_state&NUD_CONNECTED)
hh->hh_output = n->ops->hh_output;
else
hh->hh_output = n->ops->output;
}
}
if (hh) {
atomic_inc(&hh->hh_refcnt);
dst->hh = hh;
}
}
 
/* This function can be used in contexts, where only old dev_queue_xmit
worked, f.e. if you want to override normal output path (eql, shaper),
but resolution is not made yet.
*/
 
int neigh_compat_output(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
 
__skb_pull(skb, skb->nh.raw - skb->data);
 
if (dev->hard_header &&
dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
dev->rebuild_header(skb))
return 0;
 
return dev_queue_xmit(skb);
}
 
/* Slow and careful. */
 
int neigh_resolve_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct neighbour *neigh;
 
if (!dst || !(neigh = dst->neighbour))
goto discard;
 
__skb_pull(skb, skb->nh.raw - skb->data);
 
if (neigh_event_send(neigh, skb) == 0) {
int err;
struct net_device *dev = neigh->dev;
if (dev->hard_header_cache && dst->hh == NULL) {
write_lock_bh(&neigh->lock);
if (dst->hh == NULL)
neigh_hh_init(neigh, dst, dst->ops->protocol);
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
write_unlock_bh(&neigh->lock);
} else {
read_lock_bh(&neigh->lock);
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
}
if (err >= 0)
return neigh->ops->queue_xmit(skb);
kfree_skb(skb);
return -EINVAL;
}
return 0;
 
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
kfree_skb(skb);
return -EINVAL;
}
 
/* As fast as possible without hh cache */
 
int neigh_connected_output(struct sk_buff *skb)
{
int err;
struct dst_entry *dst = skb->dst;
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
 
__skb_pull(skb, skb->nh.raw - skb->data);
 
read_lock_bh(&neigh->lock);
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
if (err >= 0)
return neigh->ops->queue_xmit(skb);
kfree_skb(skb);
return -EINVAL;
}
 
static void neigh_proxy_process(unsigned long arg)
{
struct neigh_table *tbl = (struct neigh_table *)arg;
long sched_next = 0;
unsigned long now = jiffies;
struct sk_buff *skb;
 
spin_lock(&tbl->proxy_queue.lock);
 
skb = tbl->proxy_queue.next;
 
while (skb != (struct sk_buff*)&tbl->proxy_queue) {
struct sk_buff *back = skb;
long tdif = back->stamp.tv_usec - now;
 
skb = skb->next;
if (tdif <= 0) {
struct net_device *dev = back->dev;
__skb_unlink(back, &tbl->proxy_queue);
if (tbl->proxy_redo && netif_running(dev))
tbl->proxy_redo(back);
else
kfree_skb(back);
 
dev_put(dev);
} else if (!sched_next || tdif < sched_next)
sched_next = tdif;
}
del_timer(&tbl->proxy_timer);
if (sched_next)
mod_timer(&tbl->proxy_timer, jiffies + sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
 
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
unsigned long now = jiffies;
long sched_next = net_random()%p->proxy_delay;
 
if (tbl->proxy_queue.qlen > p->proxy_qlen) {
kfree_skb(skb);
return;
}
skb->stamp.tv_sec = 0;
skb->stamp.tv_usec = now + sched_next;
 
spin_lock(&tbl->proxy_queue.lock);
if (del_timer(&tbl->proxy_timer)) {
long tval = tbl->proxy_timer.expires - now;
if (tval < sched_next)
sched_next = tval;
}
dst_release(skb->dst);
skb->dst = NULL;
dev_hold(skb->dev);
__skb_queue_tail(&tbl->proxy_queue, skb);
mod_timer(&tbl->proxy_timer, now + sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
 
 
struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl)
{
struct neigh_parms *p;
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (p) {
memcpy(p, &tbl->parms, sizeof(*p));
p->tbl = tbl;
p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
if (dev && dev->neigh_setup) {
if (dev->neigh_setup(dev, p)) {
kfree(p);
return NULL;
}
}
p->sysctl_table = NULL;
write_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
tbl->parms.next = p;
write_unlock_bh(&tbl->lock);
}
return p;
}
 
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
struct neigh_parms **p;
if (parms == NULL || parms == &tbl->parms)
return;
write_lock_bh(&tbl->lock);
for (p = &tbl->parms.next; *p; p = &(*p)->next) {
if (*p == parms) {
*p = parms->next;
write_unlock_bh(&tbl->lock);
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(parms);
#endif
kfree(parms);
return;
}
}
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
 
 
void neigh_table_init(struct neigh_table *tbl)
{
unsigned long now = jiffies;
 
tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
if (tbl->kmem_cachep == NULL)
tbl->kmem_cachep = kmem_cache_create(tbl->id,
(tbl->entry_size+15)&~15,
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
 
#ifdef CONFIG_SMP
tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl);
#endif
init_timer(&tbl->gc_timer);
tbl->lock = RW_LOCK_UNLOCKED;
tbl->gc_timer.data = (unsigned long)tbl;
tbl->gc_timer.function = neigh_periodic_timer;
tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
add_timer(&tbl->gc_timer);
 
init_timer(&tbl->proxy_timer);
tbl->proxy_timer.data = (unsigned long)tbl;
tbl->proxy_timer.function = neigh_proxy_process;
skb_queue_head_init(&tbl->proxy_queue);
 
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time*20;
write_lock(&neigh_tbl_lock);
tbl->next = neigh_tables;
neigh_tables = tbl;
write_unlock(&neigh_tbl_lock);
}
 
int neigh_table_clear(struct neigh_table *tbl)
{
struct neigh_table **tp;
 
/* It is not clean... Fix it to unload IPv6 module safely */
del_timer_sync(&tbl->gc_timer);
tasklet_kill(&tbl->gc_task);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
neigh_ifdown(tbl, NULL);
if (tbl->entries)
printk(KERN_CRIT "neighbour leakage\n");
write_lock(&neigh_tbl_lock);
for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
if (*tp == tbl) {
*tp = tbl->next;
break;
}
}
write_unlock(&neigh_tbl_lock);
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&tbl->parms);
#endif
return 0;
}
 
int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm = NLMSG_DATA(nlh);
struct rtattr **nda = arg;
struct neigh_table *tbl;
struct net_device *dev = NULL;
int err = 0;
 
if (ndm->ndm_ifindex) {
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
return -ENODEV;
}
 
read_lock(&neigh_tbl_lock);
for (tbl=neigh_tables; tbl; tbl = tbl->next) {
struct neighbour *n;
 
if (tbl->family != ndm->ndm_family)
continue;
read_unlock(&neigh_tbl_lock);
 
err = -EINVAL;
if (nda[NDA_DST-1] == NULL ||
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
goto out;
 
if (ndm->ndm_flags&NTF_PROXY) {
err = pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
goto out;
}
 
if (dev == NULL)
return -EINVAL;
 
n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
if (n) {
err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
neigh_release(n);
}
out:
if (dev)
dev_put(dev);
return err;
}
read_unlock(&neigh_tbl_lock);
 
if (dev)
dev_put(dev);
 
return -EADDRNOTAVAIL;
}
 
int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm = NLMSG_DATA(nlh);
struct rtattr **nda = arg;
struct neigh_table *tbl;
struct net_device *dev = NULL;
 
if (ndm->ndm_ifindex) {
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
return -ENODEV;
}
 
read_lock(&neigh_tbl_lock);
for (tbl=neigh_tables; tbl; tbl = tbl->next) {
int err = 0;
int override = 1;
struct neighbour *n;
 
if (tbl->family != ndm->ndm_family)
continue;
read_unlock(&neigh_tbl_lock);
 
err = -EINVAL;
if (nda[NDA_DST-1] == NULL ||
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
goto out;
if (ndm->ndm_flags&NTF_PROXY) {
err = -ENOBUFS;
if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
err = 0;
goto out;
}
if (dev == NULL)
return -EINVAL;
err = -EINVAL;
if (nda[NDA_LLADDR-1] != NULL &&
nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
goto out;
err = 0;
n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
if (n) {
if (nlh->nlmsg_flags&NLM_F_EXCL)
err = -EEXIST;
override = nlh->nlmsg_flags&NLM_F_REPLACE;
} else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
err = -ENOENT;
else {
n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
if (IS_ERR(n)) {
err = PTR_ERR(n);
n = NULL;
}
}
if (err == 0) {
err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
ndm->ndm_state,
override, 0);
}
if (n)
neigh_release(n);
out:
if (dev)
dev_put(dev);
return err;
}
read_unlock(&neigh_tbl_lock);
 
if (dev)
dev_put(dev);
return -EADDRNOTAVAIL;
}
 
 
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
u32 pid, u32 seq, int event)
{
unsigned long now = jiffies;
struct ndmsg *ndm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
struct nda_cacheinfo ci;
int locked = 0;
 
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
ndm = NLMSG_DATA(nlh);
ndm->ndm_family = n->ops->family;
ndm->ndm_flags = n->flags;
ndm->ndm_type = n->type;
ndm->ndm_ifindex = n->dev->ifindex;
RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
read_lock_bh(&n->lock);
locked=1;
ndm->ndm_state = n->nud_state;
if (n->nud_state&NUD_VALID)
RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
ci.ndm_used = now - n->used;
ci.ndm_confirmed = now - n->confirmed;
ci.ndm_updated = now - n->updated;
ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
read_unlock_bh(&n->lock);
locked=0;
RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
nlh->nlmsg_len = skb->tail - b;
return skb->len;
 
nlmsg_failure:
rtattr_failure:
if (locked)
read_unlock_bh(&n->lock);
skb_trim(skb, b - skb->data);
return -1;
}
 
 
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
{
struct neighbour *n;
int h, s_h;
int idx, s_idx;
 
s_h = cb->args[1];
s_idx = idx = cb->args[2];
for (h=0; h <= NEIGH_HASHMASK; h++) {
if (h < s_h) continue;
if (h > s_h)
s_idx = 0;
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[h], idx = 0; n;
n = n->next, idx++) {
if (idx < s_idx)
continue;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
read_unlock_bh(&tbl->lock);
cb->args[1] = h;
cb->args[2] = idx;
return -1;
}
}
read_unlock_bh(&tbl->lock);
}
 
cb->args[1] = h;
cb->args[2] = idx;
return skb->len;
}
 
int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
int t;
int s_t;
struct neigh_table *tbl;
int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
 
s_t = cb->args[0];
 
read_lock(&neigh_tbl_lock);
for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
if (t < s_t) continue;
if (family && tbl->family != family)
continue;
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
if (neigh_dump_table(tbl, skb, cb) < 0)
break;
}
read_unlock(&neigh_tbl_lock);
 
cb->args[0] = t;
 
return skb->len;
}
 
#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
struct sk_buff *skb;
struct nlmsghdr *nlh;
int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
 
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
return;
 
if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
kfree_skb(skb);
return;
}
nlh = (struct nlmsghdr*)skb->data;
nlh->nlmsg_flags = NLM_F_REQUEST;
NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
}
 
static void neigh_app_notify(struct neighbour *n)
{
struct sk_buff *skb;
struct nlmsghdr *nlh;
int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
 
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
return;
 
if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
kfree_skb(skb);
return;
}
nlh = (struct nlmsghdr*)skb->data;
NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
}
 
#endif /* CONFIG_ARPD */
 
#ifdef CONFIG_SYSCTL
 
struct neigh_sysctl_table
{
struct ctl_table_header *sysctl_header;
ctl_table neigh_vars[17];
ctl_table neigh_dev[2];
ctl_table neigh_neigh_dir[2];
ctl_table neigh_proto_dir[2];
ctl_table neigh_root_dir[2];
} neigh_sysctl_template = {
NULL,
{{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_APP_SOLICIT, "app_solicit",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_RETRANS_TIME, "retrans_time",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_NEIGH_UNRES_QLEN, "unres_qlen",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_PROXY_QLEN, "proxy_qlen",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_PROXY_DELAY, "proxy_delay",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_LOCKTIME, "locktime",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_GC_INTERVAL, "gc_interval",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
{NET_NEIGH_GC_THRESH1, "gc_thresh1",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_GC_THRESH2, "gc_thresh2",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_NEIGH_GC_THRESH3, "gc_thresh3",
NULL, sizeof(int), 0644, NULL,
&proc_dointvec},
{0}},
 
{{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
{{0, "neigh", NULL, 0, 0555, NULL},{0}},
{{0, NULL, NULL, 0, 0555, NULL},{0}},
{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
};
 
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
int p_id, int pdev_id, char *p_name)
{
struct neigh_sysctl_table *t;
 
t = kmalloc(sizeof(*t), GFP_KERNEL);
if (t == NULL)
return -ENOBUFS;
memcpy(t, &neigh_sysctl_template, sizeof(*t));
t->neigh_vars[0].data = &p->mcast_probes;
t->neigh_vars[1].data = &p->ucast_probes;
t->neigh_vars[2].data = &p->app_probes;
t->neigh_vars[3].data = &p->retrans_time;
t->neigh_vars[4].data = &p->base_reachable_time;
t->neigh_vars[5].data = &p->delay_probe_time;
t->neigh_vars[6].data = &p->gc_staletime;
t->neigh_vars[7].data = &p->queue_len;
t->neigh_vars[8].data = &p->proxy_qlen;
t->neigh_vars[9].data = &p->anycast_delay;
t->neigh_vars[10].data = &p->proxy_delay;
t->neigh_vars[11].data = &p->locktime;
if (dev) {
t->neigh_dev[0].procname = dev->name;
t->neigh_dev[0].ctl_name = dev->ifindex;
memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
} else {
t->neigh_vars[12].data = (int*)(p+1);
t->neigh_vars[13].data = (int*)(p+1) + 1;
t->neigh_vars[14].data = (int*)(p+1) + 2;
t->neigh_vars[15].data = (int*)(p+1) + 3;
}
t->neigh_neigh_dir[0].ctl_name = pdev_id;
 
t->neigh_proto_dir[0].procname = p_name;
t->neigh_proto_dir[0].ctl_name = p_id;
 
t->neigh_dev[0].child = t->neigh_vars;
t->neigh_neigh_dir[0].child = t->neigh_dev;
t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
t->neigh_root_dir[0].child = t->neigh_proto_dir;
 
t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
if (t->sysctl_header == NULL) {
kfree(t);
return -ENOBUFS;
}
p->sysctl_table = t;
return 0;
}
 
void neigh_sysctl_unregister(struct neigh_parms *p)
{
if (p->sysctl_table) {
struct neigh_sysctl_table *t = p->sysctl_table;
p->sysctl_table = NULL;
unregister_sysctl_table(t->sysctl_header);
kfree(t);
}
}
 
#endif /* CONFIG_SYSCTL */
/scm.c
0,0 → 1,273
/* scm.c - Socket level control messages processing.
*
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Alignment and value checking mods by Craig Metz
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
 
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
 
#include <asm/system.h>
#include <asm/uaccess.h>
 
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/scm.h>
 
 
/*
* Only allow a user to send credentials, that they could set with
* setu(g)id.
*/
 
static __inline__ int scm_check_creds(struct ucred *creds)
{
if ((creds->pid == current->pid || capable(CAP_SYS_ADMIN)) &&
((creds->uid == current->uid || creds->uid == current->euid ||
creds->uid == current->suid) || capable(CAP_SETUID)) &&
((creds->gid == current->gid || creds->gid == current->egid ||
creds->gid == current->sgid) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
}
 
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
int i, num;
 
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
 
if (num <= 0)
return 0;
 
if (num > SCM_MAX_FD)
return -EINVAL;
 
if (!fpl)
{
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
}
fpp = &fpl->fp[fpl->count];
 
if (fpl->count + num > SCM_MAX_FD)
return -EINVAL;
/*
* Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
int fd = fdp[i];
struct file *file;
 
if (fd < 0 || !(file = fget(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
}
return num;
}
 
void __scm_destroy(struct scm_cookie *scm)
{
struct scm_fp_list *fpl = scm->fp;
int i;
 
if (fpl) {
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
kfree(fpl);
}
}
 
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
struct cmsghdr *cmsg;
int err;
 
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
err = -EINVAL;
 
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
/* The first check was omitted in <= 2.2.5. The reasoning was
that parser checks cmsg_len in any case, so that
additional check would be work duplication.
But if cmsg_level is not SOL_SOCKET, we do not check
for too short ancillary data object at all! Oops.
OK, let's add it...
*/
if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
(unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+ cmsg->cmsg_len) > msg->msg_controllen)
goto error;
 
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
 
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
goto error;
break;
case SCM_CREDENTIALS:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
err = scm_check_creds(&p->creds);
if (err)
goto error;
break;
default:
goto error;
}
}
 
if (p->fp && !p->fp->count)
{
kfree(p->fp);
p->fp = NULL;
}
return 0;
error:
scm_destroy(p);
return err;
}
 
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
 
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
msg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
if (msg->msg_controllen < cmlen) {
msg->msg_flags |= MSG_CTRUNC;
cmlen = msg->msg_controllen;
}
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
 
err = -EFAULT;
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
goto out;
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
goto out;
cmlen = CMSG_SPACE(len);
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
err = 0;
out:
return err;
}
 
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
 
int fdmax = 0;
int fdnum = scm->fp->count;
struct file **fp = scm->fp->fp;
int *cmfptr;
int err = 0, i;
 
if (msg->msg_controllen > sizeof(struct cmsghdr))
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
/ sizeof(int));
 
if (fdnum < fdmax)
fdmax = fdnum;
 
for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
{
int new_fd;
err = get_unused_fd();
if (err < 0)
break;
new_fd = err;
err = put_user(new_fd, cmfptr);
if (err) {
put_unused_fd(new_fd);
break;
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
fd_install(new_fd, fp[i]);
}
 
if (i > 0)
{
int cmlen = CMSG_LEN(i*sizeof(int));
if (!err)
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
if (!err)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
}
if (i < fdnum || (fdnum && fdmax <= 0))
msg->msg_flags |= MSG_CTRUNC;
 
/*
* All of the files that fit in the message have had their
* usage counts incremented, so we just free the list.
*/
__scm_destroy(scm);
}
 
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
struct scm_fp_list *new_fpl;
int i;
 
if (!fpl)
return NULL;
 
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
for (i=fpl->count-1; i>=0; i--)
get_file(fpl->fp[i]);
memcpy(new_fpl, fpl, sizeof(*fpl));
}
return new_fpl;
}
/filter.c
0,0 → 1,497
/*
* Linux Socket Filter - Kernel level socket filtering
*
* Author:
* Jay Schulist <jschlst@samba.org>
*
* Based on the design of:
* - The Berkeley Packet Filter
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
*/
 
#include <linux/config.h>
#if defined(CONFIG_FILTER)
 
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/filter.h>
 
/* No hurry in this branch */
 
static u8 *load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
 
if (k>=SKF_NET_OFF)
ptr = skb->nh.raw + k - SKF_NET_OFF;
else if (k>=SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
 
if (ptr >= skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
 
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
* @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
* filtering, filter is the array of filter instructions, and
* len is the number of filter blocks in the array.
*/
int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
unsigned char *data = skb->data;
/* len is UNSIGNED. Byte wide insns relies only on implicit
type casts to prevent reading arbitrary memory locations.
*/
unsigned int len = skb->len-skb->data_len;
struct sock_filter *fentry; /* We walk down these */
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
int k;
int pc;
 
/*
* Process array of filter instructions.
*/
 
for(pc = 0; pc < flen; pc++)
{
fentry = &filter[pc];
switch(fentry->code)
{
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
 
case BPF_ALU|BPF_ADD|BPF_K:
A += fentry->k;
continue;
 
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
 
case BPF_ALU|BPF_SUB|BPF_K:
A -= fentry->k;
continue;
 
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
 
case BPF_ALU|BPF_MUL|BPF_K:
A *= fentry->k;
continue;
 
case BPF_ALU|BPF_DIV|BPF_X:
if(X == 0)
return (0);
A /= X;
continue;
 
case BPF_ALU|BPF_DIV|BPF_K:
if(fentry->k == 0)
return (0);
A /= fentry->k;
continue;
 
case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
 
case BPF_ALU|BPF_AND|BPF_K:
A &= fentry->k;
continue;
 
case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
 
case BPF_ALU|BPF_OR|BPF_K:
A |= fentry->k;
continue;
 
case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
 
case BPF_ALU|BPF_LSH|BPF_K:
A <<= fentry->k;
continue;
 
case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
 
case BPF_ALU|BPF_RSH|BPF_K:
A >>= fentry->k;
continue;
 
case BPF_ALU|BPF_NEG:
A = -A;
continue;
 
case BPF_JMP|BPF_JA:
pc += fentry->k;
continue;
 
case BPF_JMP|BPF_JGT|BPF_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JGE|BPF_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JEQ|BPF_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JSET|BPF_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JGT|BPF_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JGE|BPF_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JEQ|BPF_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
 
case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
 
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
load_w:
if(k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
A = ntohl(*(u32*)&data[k]);
continue;
}
if (k<0) {
u8 *ptr;
 
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohl(*(u32*)ptr);
continue;
}
} else {
u32 tmp;
if (!skb_copy_bits(skb, k, &tmp, 4)) {
A = ntohl(tmp);
continue;
}
}
return 0;
 
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
if(k >= 0 && (unsigned int) (k + sizeof(u16)) <= len) {
A = ntohs(*(u16*)&data[k]);
continue;
}
if (k<0) {
u8 *ptr;
 
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohs(*(u16*)ptr);
continue;
}
} else {
u16 tmp;
if (!skb_copy_bits(skb, k, &tmp, 2)) {
A = ntohs(tmp);
continue;
}
}
return 0;
 
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
if(k >= 0 && (unsigned int)k < len) {
A = data[k];
continue;
}
if (k<0) {
u8 *ptr;
 
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = *ptr;
continue;
}
} else {
u8 tmp;
if (!skb_copy_bits(skb, k, &tmp, 1)) {
A = tmp;
continue;
}
}
return 0;
 
case BPF_LD|BPF_W|BPF_LEN:
A = len;
continue;
 
case BPF_LDX|BPF_W|BPF_LEN:
X = len;
continue;
 
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
goto load_w;
 
case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k;
goto load_h;
 
case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k;
goto load_b;
 
case BPF_LDX|BPF_B|BPF_MSH:
if(fentry->k >= len)
return (0);
X = (data[fentry->k] & 0xf) << 2;
continue;
 
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
 
case BPF_LDX|BPF_IMM:
X = fentry->k;
continue;
 
case BPF_LD|BPF_MEM:
A = mem[fentry->k];
continue;
 
case BPF_LDX|BPF_MEM:
X = mem[fentry->k];
continue;
 
case BPF_MISC|BPF_TAX:
X = A;
continue;
 
case BPF_MISC|BPF_TXA:
A = X;
continue;
 
case BPF_RET|BPF_K:
return ((unsigned int)fentry->k);
 
case BPF_RET|BPF_A:
return ((unsigned int)A);
 
case BPF_ST:
mem[fentry->k] = A;
continue;
 
case BPF_STX:
mem[fentry->k] = X;
continue;
 
default:
/* Invalid instruction counts as RET */
return (0);
}
 
/* Handle ancillary data, which are impossible
(or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = htons(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
default:
return 0;
}
}
 
return (0);
}
 
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
* @flen: length of filter
*
* Check the user's filter code. If we let some ugly
* filter code slip through kaboom! The filter must contain
* no references or jumps that are out of range, no illegal instructions
* and no backward jumps. It must end with a RET instruction
*
* Returns 0 if the rule set is legal or a negative errno code if not.
*/
 
int sk_chk_filter(struct sock_filter *filter, int flen)
{
struct sock_filter *ftest;
int pc;
 
if ((unsigned int) flen >= (~0U / sizeof(struct sock_filter)))
return -EINVAL;
 
/*
* Check the filter code now.
*/
for(pc = 0; pc < flen; pc++)
{
/*
* All jumps are forward as they are not signed
*/
ftest = &filter[pc];
if(BPF_CLASS(ftest->code) == BPF_JMP)
{
/*
* But they mustn't jump off the end.
*/
if(BPF_OP(ftest->code) == BPF_JA)
{
/* Note, the large ftest->k might cause
loops. Compare this with conditional
jumps below, where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
}
else
{
/*
* For conditionals both must be safe
*/
if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
return -EINVAL;
}
}
 
/*
* Check that memory operations use valid addresses.
*/
if (ftest->k >= BPF_MEMWORDS)
{
/*
* But it might not be a memory operation...
*/
switch (ftest->code) {
case BPF_ST:
case BPF_STX:
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
return -EINVAL;
}
}
}
 
/*
* The program must end with a return. We don't care where they
* jumped within the script (its always forwards) but in the
* end they _will_ hit this.
*/
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
}
 
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
 
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
 
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
return (-EINVAL);
 
fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if(fp == NULL)
return (-ENOMEM);
 
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
 
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
 
if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
struct sk_filter *old_fp;
 
spin_lock_bh(&sk->lock.slock);
old_fp = sk->filter;
sk->filter = fp;
spin_unlock_bh(&sk->lock.slock);
fp = old_fp;
}
 
if (fp)
sk_filter_release(sk, fp);
 
return (err);
}
#endif /* CONFIG_FILTER */
/pktgen.c
0,0 → 1,1405
/* -*-linux-c-*-
* $Id: pktgen.c,v 1.1.1.1 2004-04-17 22:13:21 phoenix Exp $
* pktgen.c: Packet Generator for performance evaluation.
*
* Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
* Uppsala University, Sweden
*
* A tool for loading the network with preconfigurated packets.
* The tool is implemented as a linux module. Parameters are output
* device, IPG (interpacket gap), number of packets, and whether
* to use multiple SKBs or just the same one.
* pktgen uses the installed interface's output routine.
*
* Additional hacking by:
*
* Jens.Laas@data.slu.se
* Improved by ANK. 010120.
* Improved by ANK even more. 010212.
* MAC address typo fixed. 010417 --ro
* Integrated. 020301 --DaveM
* Added multiskb option 020301 --DaveM
* Scaling of results. 020417--sigurdur@linpro.no
* Significant re-work of the module:
* * Updated to support generation over multiple interfaces at once
* by creating 32 /proc/net/pg* files. Each file can be manipulated
* individually.
* * Converted many counters to __u64 to allow longer runs.
* * Allow configuration of ranges, like min/max IP address, MACs,
* and UDP-ports, for both source and destination, and can
* set to use a random distribution or sequentially walk the range.
* * Can now change some values after starting.
* * Place 12-byte packet in UDP payload with magic number,
* sequence number, and timestamp. Will write receiver next.
* * The new changes seem to have a performance impact of around 1%,
* as far as I can tell.
* --Ben Greear <greearb@candelatech.com>
*
* Renamed multiskb to clone_skb and cleaned up sending core for two distinct
* skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0
* as a "fastpath" with a configurable number of clones after alloc's.
*
* clone_skb=0 means all packets are allocated this also means ranges time
* stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100
* clones.
*
* Also moved to /proc/net/pktgen/
* --ro
*
* Fix refcount off by one if first packet fails, potential null deref,
* memleak 030710- KJP
*
* See Documentation/networking/pktgen.txt for how to use this.
*/
 
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/inet.h>
#include <asm/byteorder.h>
#include <asm/bitops.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/uaccess.h>
 
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/if_arp.h>
#include <net/checksum.h>
#include <asm/timex.h>
 
#define cycles() ((u32)get_cycles())
 
 
#define VERSION "pktgen version 1.3"
static char version[] __initdata =
"pktgen.c: v1.3: Packet Generator for packet performance testing.\n";
 
/* Used to help with determining the pkts on receive */
 
#define PKTGEN_MAGIC 0xbe9be955
 
 
/* Keep information per interface */
struct pktgen_info {
/* Parameters */
 
/* If min != max, then we will either do a linear iteration, or
* we will do a random selection from within the range.
*/
__u32 flags;
 
#define F_IPSRC_RND (1<<0) /* IP-Src Random */
#define F_IPDST_RND (1<<1) /* IP-Dst Random */
#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */
#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */
#define F_MACSRC_RND (1<<4) /* MAC-Src Random */
#define F_MACDST_RND (1<<5) /* MAC-Dst Random */
#define F_SET_SRCMAC (1<<6) /* Specify-Src-Mac
(default is to use Interface's MAC Addr) */
#define F_SET_SRCIP (1<<7) /* Specify-Src-IP
(default is to use Interface's IP Addr) */
 
int pkt_size; /* = ETH_ZLEN; */
int nfrags;
__u32 ipg; /* Default Interpacket gap in nsec */
__u64 count; /* Default No packets to send */
__u64 sofar; /* How many pkts we've sent so far */
__u64 errors; /* Errors when trying to transmit, pkts will be re-sent */
struct timeval started_at;
struct timeval stopped_at;
__u64 idle_acc;
__u32 seq_num;
int clone_skb; /* Use multiple SKBs during packet gen. If this number
* is greater than 1, then that many coppies of the same
* packet will be sent before a new packet is allocated.
* For instance, if you want to send 1024 identical packets
* before creating a new packet, set clone_skb to 1024.
*/
int busy;
int do_run_run; /* if this changes to false, the test will stop */
char outdev[32];
char dst_min[32];
char dst_max[32];
char src_min[32];
char src_max[32];
 
/* If we're doing ranges, random or incremental, then this
* defines the min/max for those ranges.
*/
__u32 saddr_min; /* inclusive, source IP address */
__u32 saddr_max; /* exclusive, source IP address */
__u32 daddr_min; /* inclusive, dest IP address */
__u32 daddr_max; /* exclusive, dest IP address */
 
__u16 udp_src_min; /* inclusive, source UDP port */
__u16 udp_src_max; /* exclusive, source UDP port */
__u16 udp_dst_min; /* inclusive, dest UDP port */
__u16 udp_dst_max; /* exclusive, dest UDP port */
 
__u32 src_mac_count; /* How many MACs to iterate through */
__u32 dst_mac_count; /* How many MACs to iterate through */
unsigned char dst_mac[6];
unsigned char src_mac[6];
__u32 cur_dst_mac_offset;
__u32 cur_src_mac_offset;
__u32 cur_saddr;
__u32 cur_daddr;
__u16 cur_udp_dst;
__u16 cur_udp_src;
__u8 hh[14];
/* = {
0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB,
We fill in SRC address later
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00
};
*/
__u16 pad; /* pad out the hh struct to an even 16 bytes */
char result[512];
 
/* proc file names */
char fname[80];
char busy_fname[80];
struct proc_dir_entry *proc_ent;
struct proc_dir_entry *busy_proc_ent;
};
 
struct pktgen_hdr {
__u32 pgh_magic;
__u32 seq_num;
struct timeval timestamp;
};
 
static int cpu_speed;
static int debug;
 
/* Module parameters, defaults. */
static int count_d = 100000;
static int ipg_d = 0;
static int clone_skb_d = 0;
 
 
#define MAX_PKTGEN 8
static struct pktgen_info pginfos[MAX_PKTGEN];
 
 
/** Convert to miliseconds */
inline __u64 tv_to_ms(const struct timeval* tv) {
__u64 ms = tv->tv_usec / 1000;
ms += (__u64)tv->tv_sec * (__u64)1000;
return ms;
}
 
inline __u64 getCurMs(void) {
struct timeval tv;
do_gettimeofday(&tv);
return tv_to_ms(&tv);
}
 
#define PG_PROC_DIR "pktgen"
static struct proc_dir_entry *proc_dir = 0;
 
static struct net_device *setup_inject(struct pktgen_info* info)
{
struct net_device *odev;
 
rtnl_lock();
odev = __dev_get_by_name(info->outdev);
if (!odev) {
sprintf(info->result, "No such netdevice: \"%s\"", info->outdev);
goto out_unlock;
}
 
if (odev->type != ARPHRD_ETHER) {
sprintf(info->result, "Not ethernet device: \"%s\"", info->outdev);
goto out_unlock;
}
 
if (!netif_running(odev)) {
sprintf(info->result, "Device is down: \"%s\"", info->outdev);
goto out_unlock;
}
 
/* Default to the interface's mac if not explicitly set. */
if (!(info->flags & F_SET_SRCMAC)) {
memcpy(&(info->hh[6]), odev->dev_addr, 6);
}
else {
memcpy(&(info->hh[6]), info->src_mac, 6);
}
 
/* Set up Dest MAC */
memcpy(&(info->hh[0]), info->dst_mac, 6);
info->saddr_min = 0;
info->saddr_max = 0;
if (strlen(info->src_min) == 0) {
if (odev->ip_ptr) {
struct in_device *in_dev = odev->ip_ptr;
 
if (in_dev->ifa_list) {
info->saddr_min = in_dev->ifa_list->ifa_address;
info->saddr_max = info->saddr_min;
}
}
}
else {
info->saddr_min = in_aton(info->src_min);
info->saddr_max = in_aton(info->src_max);
}
 
info->daddr_min = in_aton(info->dst_min);
info->daddr_max = in_aton(info->dst_max);
 
/* Initialize current values. */
info->cur_dst_mac_offset = 0;
info->cur_src_mac_offset = 0;
info->cur_saddr = info->saddr_min;
info->cur_daddr = info->daddr_min;
info->cur_udp_dst = info->udp_dst_min;
info->cur_udp_src = info->udp_src_min;
atomic_inc(&odev->refcnt);
rtnl_unlock();
 
return odev;
 
out_unlock:
rtnl_unlock();
return NULL;
}
 
static void nanospin(int ipg, struct pktgen_info* info)
{
u32 idle_start, idle;
 
idle_start = cycles();
 
for (;;) {
barrier();
idle = cycles() - idle_start;
if (idle * 1000 >= ipg * cpu_speed)
break;
}
info->idle_acc += idle;
}
 
static int calc_mhz(void)
{
struct timeval start, stop;
u32 start_s, elapsed;
 
do_gettimeofday(&start);
start_s = cycles();
do {
barrier();
elapsed = cycles() - start_s;
if (elapsed == 0)
return 0;
} while (elapsed < 1000 * 50000);
do_gettimeofday(&stop);
return elapsed/(stop.tv_usec-start.tv_usec+1000000*(stop.tv_sec-start.tv_sec));
}
 
static void cycles_calibrate(void)
{
int i;
 
for (i = 0; i < 3; i++) {
int res = calc_mhz();
if (res > cpu_speed)
cpu_speed = res;
}
}
 
 
/* Increment/randomize headers according to flags and current values
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst
*/
static void mod_cur_headers(struct pktgen_info* info) {
__u32 imn;
__u32 imx;
/* Deal with source MAC */
if (info->src_mac_count > 1) {
__u32 mc;
__u32 tmp;
if (info->flags & F_MACSRC_RND) {
mc = net_random() % (info->src_mac_count);
}
else {
mc = info->cur_src_mac_offset++;
if (info->cur_src_mac_offset > info->src_mac_count) {
info->cur_src_mac_offset = 0;
}
}
 
tmp = info->src_mac[5] + (mc & 0xFF);
info->hh[11] = tmp;
tmp = (info->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
info->hh[10] = tmp;
tmp = (info->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
info->hh[9] = tmp;
tmp = (info->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
info->hh[8] = tmp;
tmp = (info->src_mac[1] + (tmp >> 8));
info->hh[7] = tmp;
}
 
/* Deal with Destination MAC */
if (info->dst_mac_count > 1) {
__u32 mc;
__u32 tmp;
if (info->flags & F_MACDST_RND) {
mc = net_random() % (info->dst_mac_count);
}
else {
mc = info->cur_dst_mac_offset++;
if (info->cur_dst_mac_offset > info->dst_mac_count) {
info->cur_dst_mac_offset = 0;
}
}
 
tmp = info->dst_mac[5] + (mc & 0xFF);
info->hh[5] = tmp;
tmp = (info->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
info->hh[4] = tmp;
tmp = (info->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
info->hh[3] = tmp;
tmp = (info->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
info->hh[2] = tmp;
tmp = (info->dst_mac[1] + (tmp >> 8));
info->hh[1] = tmp;
}
 
if (info->udp_src_min < info->udp_src_max) {
if (info->flags & F_UDPSRC_RND) {
info->cur_udp_src = ((net_random() % (info->udp_src_max - info->udp_src_min))
+ info->udp_src_min);
}
else {
info->cur_udp_src++;
if (info->cur_udp_src >= info->udp_src_max) {
info->cur_udp_src = info->udp_src_min;
}
}
}
 
if (info->udp_dst_min < info->udp_dst_max) {
if (info->flags & F_UDPDST_RND) {
info->cur_udp_dst = ((net_random() % (info->udp_dst_max - info->udp_dst_min))
+ info->udp_dst_min);
}
else {
info->cur_udp_dst++;
if (info->cur_udp_dst >= info->udp_dst_max) {
info->cur_udp_dst = info->udp_dst_min;
}
}
}
 
if ((imn = ntohl(info->saddr_min)) < (imx = ntohl(info->saddr_max))) {
__u32 t;
if (info->flags & F_IPSRC_RND) {
t = ((net_random() % (imx - imn)) + imn);
}
else {
t = ntohl(info->cur_saddr);
t++;
if (t >= imx) {
t = imn;
}
}
info->cur_saddr = htonl(t);
}
 
if ((imn = ntohl(info->daddr_min)) < (imx = ntohl(info->daddr_max))) {
__u32 t;
if (info->flags & F_IPDST_RND) {
t = ((net_random() % (imx - imn)) + imn);
}
else {
t = ntohl(info->cur_daddr);
t++;
if (t >= imx) {
t = imn;
}
}
info->cur_daddr = htonl(t);
}
}/* mod_cur_headers */
 
 
static struct sk_buff *fill_packet(struct net_device *odev, struct pktgen_info* info)
{
struct sk_buff *skb = NULL;
__u8 *eth;
struct udphdr *udph;
int datalen, iplen;
struct iphdr *iph;
struct pktgen_hdr *pgh = NULL;
skb = alloc_skb(info->pkt_size + 64 + 16, GFP_ATOMIC);
if (!skb) {
sprintf(info->result, "No memory");
return NULL;
}
 
skb_reserve(skb, 16);
 
/* Reserve for ethernet and IP header */
eth = (__u8 *) skb_push(skb, 14);
iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
 
/* Update any of the values, used when we're incrementing various
* fields.
*/
mod_cur_headers(info);
 
memcpy(eth, info->hh, 14);
datalen = info->pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */
if (datalen < sizeof(struct pktgen_hdr)) {
datalen = sizeof(struct pktgen_hdr);
}
udph->source = htons(info->cur_udp_src);
udph->dest = htons(info->cur_udp_dst);
udph->len = htons(datalen + 8); /* DATA + udphdr */
udph->check = 0; /* No checksum */
 
iph->ihl = 5;
iph->version = 4;
iph->ttl = 3;
iph->tos = 0;
iph->protocol = IPPROTO_UDP; /* UDP */
iph->saddr = info->cur_saddr;
iph->daddr = info->cur_daddr;
iph->frag_off = 0;
iplen = 20 + 8 + datalen;
iph->tot_len = htons(iplen);
iph->check = 0;
iph->check = ip_fast_csum((void *) iph, iph->ihl);
skb->protocol = __constant_htons(ETH_P_IP);
skb->mac.raw = ((u8 *)iph) - 14;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
 
if (info->nfrags <= 0) {
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
} else {
int frags = info->nfrags;
int i;
 
/* TODO: Verify this is OK...it sure is ugly. --Ben */
pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8);
if (frags > MAX_SKB_FRAGS)
frags = MAX_SKB_FRAGS;
if (datalen > frags*PAGE_SIZE) {
skb_put(skb, datalen-frags*PAGE_SIZE);
datalen = frags*PAGE_SIZE;
}
 
i = 0;
while (datalen > 0) {
struct page *page = alloc_pages(GFP_KERNEL, 0);
skb_shinfo(skb)->frags[i].page = page;
skb_shinfo(skb)->frags[i].page_offset = 0;
skb_shinfo(skb)->frags[i].size =
(datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
datalen -= skb_shinfo(skb)->frags[i].size;
skb->len += skb_shinfo(skb)->frags[i].size;
skb->data_len += skb_shinfo(skb)->frags[i].size;
i++;
skb_shinfo(skb)->nr_frags = i;
}
 
while (i < frags) {
int rem;
 
if (i == 0)
break;
 
rem = skb_shinfo(skb)->frags[i - 1].size / 2;
if (rem == 0)
break;
 
skb_shinfo(skb)->frags[i - 1].size -= rem;
 
skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1];
get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page;
skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size;
skb_shinfo(skb)->frags[i].size = rem;
i++;
skb_shinfo(skb)->nr_frags = i;
}
}
 
/* Stamp the time, and sequence number, convert them to network byte order */
if (pgh) {
pgh->pgh_magic = htonl(PKTGEN_MAGIC);
do_gettimeofday(&(pgh->timestamp));
pgh->timestamp.tv_usec = htonl(pgh->timestamp.tv_usec);
pgh->timestamp.tv_sec = htonl(pgh->timestamp.tv_sec);
pgh->seq_num = htonl(info->seq_num);
}
return skb;
}
 
 
static void inject(struct pktgen_info* info)
{
struct net_device *odev = NULL;
struct sk_buff *skb = NULL;
__u64 total = 0;
__u64 idle = 0;
__u64 lcount = 0;
int nr_frags = 0;
int last_ok = 1; /* Was last skb sent?
* Or a failed transmit of some sort? This will keep
* sequence numbers in order, for example.
*/
__u64 fp = 0;
__u32 fp_tmp = 0;
 
odev = setup_inject(info);
if (!odev)
return;
 
info->do_run_run = 1; /* Cranke yeself! */
info->idle_acc = 0;
info->sofar = 0;
lcount = info->count;
 
 
/* Build our initial pkt and place it as a re-try pkt. */
skb = fill_packet(odev, info);
if (skb == NULL) goto out_reldev;
 
do_gettimeofday(&(info->started_at));
 
while(info->do_run_run) {
 
/* Set a time-stamp, so build a new pkt each time */
 
if (last_ok) {
if (++fp_tmp >= info->clone_skb ) {
kfree_skb(skb);
skb = fill_packet(odev, info);
if (skb == NULL) {
goto out_reldev;
}
fp++;
fp_tmp = 0; /* reset counter */
}
}
 
nr_frags = skb_shinfo(skb)->nr_frags;
spin_lock_bh(&odev->xmit_lock);
if (!netif_queue_stopped(odev)) {
 
atomic_inc(&skb->users);
 
if (odev->hard_start_xmit(skb, odev)) {
 
atomic_dec(&skb->users);
if (net_ratelimit()) {
printk(KERN_INFO "Hard xmit error\n");
}
info->errors++;
last_ok = 0;
}
else {
last_ok = 1;
info->sofar++;
info->seq_num++;
}
}
else {
/* Re-try it next time */
last_ok = 0;
}
 
spin_unlock_bh(&odev->xmit_lock);
 
if (info->ipg) {
/* Try not to busy-spin if we have larger sleep times.
* TODO: Investigate better ways to do this.
*/
if (info->ipg < 10000) { /* 10 usecs or less */
nanospin(info->ipg, info);
}
else if (info->ipg < 10000000) { /* 10ms or less */
udelay(info->ipg / 1000);
}
else {
mdelay(info->ipg / 1000000);
}
}
if (signal_pending(current)) {
break;
}
 
/* If lcount is zero, then run forever */
if ((lcount != 0) && (--lcount == 0)) {
if (atomic_read(&skb->users) != 1) {
u32 idle_start, idle;
 
idle_start = cycles();
while (atomic_read(&skb->users) != 1) {
if (signal_pending(current)) {
break;
}
schedule();
}
idle = cycles() - idle_start;
info->idle_acc += idle;
}
break;
}
 
if (netif_queue_stopped(odev) || current->need_resched) {
u32 idle_start, idle;
 
idle_start = cycles();
do {
if (signal_pending(current)) {
info->do_run_run = 0;
break;
}
if (!netif_running(odev)) {
info->do_run_run = 0;
break;
}
if (current->need_resched)
schedule();
else
do_softirq();
} while (netif_queue_stopped(odev));
idle = cycles() - idle_start;
info->idle_acc += idle;
}
}/* while we should be running */
 
do_gettimeofday(&(info->stopped_at));
 
total = (info->stopped_at.tv_sec - info->started_at.tv_sec) * 1000000 +
info->stopped_at.tv_usec - info->started_at.tv_usec;
 
idle = (__u32)(info->idle_acc)/(__u32)(cpu_speed);
 
{
char *p = info->result;
__u64 pps = (__u32)(info->sofar * 1000) / ((__u32)(total) / 1000);
__u64 bps = pps * 8 * (info->pkt_size + 4); /* take 32bit ethernet CRC into account */
p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags) %llupps %lluMb/sec (%llubps) errors: %llu",
(unsigned long long) total,
(unsigned long long) (total - idle),
(unsigned long long) idle,
(unsigned long long) info->sofar,
skb->len + 4, /* Add 4 to account for the ethernet checksum */
nr_frags,
(unsigned long long) pps,
(unsigned long long) (bps / (u64) 1024 / (u64) 1024),
(unsigned long long) bps,
(unsigned long long) info->errors
);
}
 
kfree_skb(skb);
 
out_reldev:
if (odev) {
dev_put(odev);
odev = NULL;
}
 
return;
 
}
 
/* proc/net/pktgen/pg */
 
static int proc_busy_read(char *buf , char **start, off_t offset,
int len, int *eof, void *data)
{
char *p;
int idx = (int)(long)(data);
struct pktgen_info* info = NULL;
if ((idx < 0) || (idx >= MAX_PKTGEN)) {
printk("ERROR: idx: %i is out of range in proc_write\n", idx);
return -EINVAL;
}
info = &(pginfos[idx]);
p = buf;
p += sprintf(p, "%d\n", info->busy);
*eof = 1;
return p-buf;
}
 
static int proc_read(char *buf , char **start, off_t offset,
int len, int *eof, void *data)
{
char *p;
int i;
int idx = (int)(long)(data);
struct pktgen_info* info = NULL;
__u64 sa;
__u64 stopped;
__u64 now = getCurMs();
if ((idx < 0) || (idx >= MAX_PKTGEN)) {
printk("ERROR: idx: %i is out of range in proc_write\n", idx);
return -EINVAL;
}
info = &(pginfos[idx]);
p = buf;
p += sprintf(p, "%s\n", VERSION); /* Help with parsing compatibility */
p += sprintf(p, "Params: count %llu pkt_size: %u frags: %d ipg: %u clone_skb: %d odev \"%s\"\n",
(unsigned long long) info->count,
info->pkt_size, info->nfrags, info->ipg,
info->clone_skb, info->outdev);
p += sprintf(p, " dst_min: %s dst_max: %s src_min: %s src_max: %s\n",
info->dst_min, info->dst_max, info->src_min, info->src_max);
p += sprintf(p, " src_mac: ");
for (i = 0; i < 6; i++) {
p += sprintf(p, "%02X%s", info->src_mac[i], i == 5 ? " " : ":");
}
p += sprintf(p, "dst_mac: ");
for (i = 0; i < 6; i++) {
p += sprintf(p, "%02X%s", info->dst_mac[i], i == 5 ? "\n" : ":");
}
p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
info->udp_src_min, info->udp_src_max, info->udp_dst_min,
info->udp_dst_max);
p += sprintf(p, " src_mac_count: %d dst_mac_count: %d\n Flags: ",
info->src_mac_count, info->dst_mac_count);
if (info->flags & F_IPSRC_RND) {
p += sprintf(p, "IPSRC_RND ");
}
if (info->flags & F_IPDST_RND) {
p += sprintf(p, "IPDST_RND ");
}
if (info->flags & F_UDPSRC_RND) {
p += sprintf(p, "UDPSRC_RND ");
}
if (info->flags & F_UDPDST_RND) {
p += sprintf(p, "UDPDST_RND ");
}
if (info->flags & F_MACSRC_RND) {
p += sprintf(p, "MACSRC_RND ");
}
if (info->flags & F_MACDST_RND) {
p += sprintf(p, "MACDST_RND ");
}
p += sprintf(p, "\n");
sa = tv_to_ms(&(info->started_at));
stopped = tv_to_ms(&(info->stopped_at));
if (info->do_run_run) {
stopped = now; /* not really stopped, more like last-running-at */
}
p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %llums stopped: %llums now: %llums idle: %lluns\n",
(unsigned long long) info->sofar,
(unsigned long long) info->errors,
(unsigned long long) sa,
(unsigned long long) stopped,
(unsigned long long) now,
(unsigned long long) info->idle_acc);
p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n",
info->seq_num, info->cur_dst_mac_offset, info->cur_src_mac_offset);
p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x cur_udp_dst: %d cur_udp_src: %d\n",
info->cur_saddr, info->cur_daddr, info->cur_udp_dst, info->cur_udp_src);
if (info->result[0])
p += sprintf(p, "Result: %s\n", info->result);
else
p += sprintf(p, "Result: Idle\n");
*eof = 1;
 
return p - buf;
}
 
static int count_trail_chars(const char *user_buffer, unsigned int maxlen)
{
int i;
 
for (i = 0; i < maxlen; i++) {
char c;
 
if (get_user(c, &user_buffer[i]))
return -EFAULT;
switch (c) {
case '\"':
case '\n':
case '\r':
case '\t':
case ' ':
case '=':
break;
default:
goto done;
};
}
done:
return i;
}
 
static unsigned long num_arg(const char *user_buffer, unsigned long maxlen,
unsigned long *num)
{
int i = 0;
 
*num = 0;
for(; i < maxlen; i++) {
char c;
 
if (get_user(c, &user_buffer[i]))
return -EFAULT;
if ((c >= '0') && (c <= '9')) {
*num *= 10;
*num += c -'0';
} else
break;
}
return i;
}
 
static int strn_len(const char *user_buffer, unsigned int maxlen)
{
int i = 0;
 
for(; i < maxlen; i++) {
char c;
 
if (get_user(c, &user_buffer[i]))
return -EFAULT;
switch (c) {
case '\"':
case '\n':
case '\r':
case '\t':
case ' ':
goto done_str;
default:
break;
};
}
done_str:
return i;
}
 
static int proc_write(struct file *file, const char *user_buffer,
unsigned long count, void *data)
{
int i = 0, max, len;
char name[16], valstr[32];
unsigned long value = 0;
int idx = (int)(long)(data);
struct pktgen_info* info = NULL;
char* result = NULL;
int tmp;
if ((idx < 0) || (idx >= MAX_PKTGEN)) {
printk("ERROR: idx: %i is out of range in proc_write\n", idx);
return -EINVAL;
}
info = &(pginfos[idx]);
result = &(info->result[0]);
if (count < 1) {
sprintf(result, "Wrong command format");
return -EINVAL;
}
max = count - i;
tmp = count_trail_chars(&user_buffer[i], max);
if (tmp < 0)
return tmp;
i += tmp;
/* Read variable name */
 
len = strn_len(&user_buffer[i], sizeof(name) - 1);
if (len < 0)
return len;
memset(name, 0, sizeof(name));
if (copy_from_user(name, &user_buffer[i], len))
return -EFAULT;
i += len;
max = count -i;
len = count_trail_chars(&user_buffer[i], max);
if (len < 0)
return len;
i += len;
 
if (debug)
printk("pg: %s,%lu\n", name, count);
 
if (!strcmp(name, "stop")) {
if (info->do_run_run) {
strcpy(result, "Stopping");
}
else {
strcpy(result, "Already stopped...\n");
}
info->do_run_run = 0;
return count;
}
 
if (!strcmp(name, "pkt_size")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
if (value < 14+20+8)
value = 14+20+8;
info->pkt_size = value;
sprintf(result, "OK: pkt_size=%u", info->pkt_size);
return count;
}
if (!strcmp(name, "frags")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->nfrags = value;
sprintf(result, "OK: frags=%u", info->nfrags);
return count;
}
if (!strcmp(name, "ipg")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->ipg = value;
sprintf(result, "OK: ipg=%u", info->ipg);
return count;
}
if (!strcmp(name, "udp_src_min")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->udp_src_min = value;
sprintf(result, "OK: udp_src_min=%u", info->udp_src_min);
return count;
}
if (!strcmp(name, "udp_dst_min")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->udp_dst_min = value;
sprintf(result, "OK: udp_dst_min=%u", info->udp_dst_min);
return count;
}
if (!strcmp(name, "udp_src_max")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->udp_src_max = value;
sprintf(result, "OK: udp_src_max=%u", info->udp_src_max);
return count;
}
if (!strcmp(name, "udp_dst_max")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->udp_dst_max = value;
sprintf(result, "OK: udp_dst_max=%u", info->udp_dst_max);
return count;
}
if (!strcmp(name, "clone_skb")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->clone_skb = value;
sprintf(result, "OK: clone_skb=%d", info->clone_skb);
return count;
}
if (!strcmp(name, "count")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->count = value;
sprintf(result, "OK: count=%llu", (unsigned long long) info->count);
return count;
}
if (!strcmp(name, "src_mac_count")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->src_mac_count = value;
sprintf(result, "OK: src_mac_count=%d", info->src_mac_count);
return count;
}
if (!strcmp(name, "dst_mac_count")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
i += len;
info->dst_mac_count = value;
sprintf(result, "OK: dst_mac_count=%d", info->dst_mac_count);
return count;
}
if (!strcmp(name, "odev")) {
len = strn_len(&user_buffer[i], sizeof(info->outdev) - 1);
if (len < 0)
return len;
memset(info->outdev, 0, sizeof(info->outdev));
if (copy_from_user(info->outdev, &user_buffer[i], len))
return -EFAULT;
i += len;
sprintf(result, "OK: odev=%s", info->outdev);
return count;
}
if (!strcmp(name, "flag")) {
char f[32];
len = strn_len(&user_buffer[i], sizeof(f) - 1);
if (len < 0)
return len;
memset(f, 0, 32);
if (copy_from_user(f, &user_buffer[i], len))
return -EFAULT;
i += len;
if (strcmp(f, "IPSRC_RND") == 0) {
info->flags |= F_IPSRC_RND;
}
else if (strcmp(f, "!IPSRC_RND") == 0) {
info->flags &= ~F_IPSRC_RND;
}
else if (strcmp(f, "IPDST_RND") == 0) {
info->flags |= F_IPDST_RND;
}
else if (strcmp(f, "!IPDST_RND") == 0) {
info->flags &= ~F_IPDST_RND;
}
else if (strcmp(f, "UDPSRC_RND") == 0) {
info->flags |= F_UDPSRC_RND;
}
else if (strcmp(f, "!UDPSRC_RND") == 0) {
info->flags &= ~F_UDPSRC_RND;
}
else if (strcmp(f, "UDPDST_RND") == 0) {
info->flags |= F_UDPDST_RND;
}
else if (strcmp(f, "!UDPDST_RND") == 0) {
info->flags &= ~F_UDPDST_RND;
}
else if (strcmp(f, "MACSRC_RND") == 0) {
info->flags |= F_MACSRC_RND;
}
else if (strcmp(f, "!MACSRC_RND") == 0) {
info->flags &= ~F_MACSRC_RND;
}
else if (strcmp(f, "MACDST_RND") == 0) {
info->flags |= F_MACDST_RND;
}
else if (strcmp(f, "!MACDST_RND") == 0) {
info->flags &= ~F_MACDST_RND;
}
else {
sprintf(result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n");
return count;
}
sprintf(result, "OK: flags=0x%x", info->flags);
return count;
}
if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
len = strn_len(&user_buffer[i], sizeof(info->dst_min) - 1);
if (len < 0)
return len;
memset(info->dst_min, 0, sizeof(info->dst_min));
if (copy_from_user(info->dst_min, &user_buffer[i], len))
return -EFAULT;
if(debug)
printk("pg: dst_min set to: %s\n", info->dst_min);
i += len;
sprintf(result, "OK: dst_min=%s", info->dst_min);
return count;
}
if (!strcmp(name, "dst_max")) {
len = strn_len(&user_buffer[i], sizeof(info->dst_max) - 1);
if (len < 0)
return len;
memset(info->dst_max, 0, sizeof(info->dst_max));
if (copy_from_user(info->dst_max, &user_buffer[i], len))
return -EFAULT;
if(debug)
printk("pg: dst_max set to: %s\n", info->dst_max);
i += len;
sprintf(result, "OK: dst_max=%s", info->dst_max);
return count;
}
if (!strcmp(name, "src_min")) {
len = strn_len(&user_buffer[i], sizeof(info->src_min) - 1);
if (len < 0)
return len;
memset(info->src_min, 0, sizeof(info->src_min));
if (copy_from_user(info->src_min, &user_buffer[i], len))
return -EFAULT;
if(debug)
printk("pg: src_min set to: %s\n", info->src_min);
i += len;
sprintf(result, "OK: src_min=%s", info->src_min);
return count;
}
if (!strcmp(name, "src_max")) {
len = strn_len(&user_buffer[i], sizeof(info->src_max) - 1);
if (len < 0)
return len;
memset(info->src_max, 0, sizeof(info->src_max));
if (copy_from_user(info->src_max, &user_buffer[i], len))
return -EFAULT;
if(debug)
printk("pg: src_max set to: %s\n", info->src_max);
i += len;
sprintf(result, "OK: src_max=%s", info->src_max);
return count;
}
if (!strcmp(name, "dstmac")) {
char *v = valstr;
unsigned char *m = info->dst_mac;
 
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0)
return len;
memset(valstr, 0, sizeof(valstr));
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
i += len;
 
for(*m = 0;*v && m < info->dst_mac + 6; v++) {
if (*v >= '0' && *v <= '9') {
*m *= 16;
*m += *v - '0';
}
if (*v >= 'A' && *v <= 'F') {
*m *= 16;
*m += *v - 'A' + 10;
}
if (*v >= 'a' && *v <= 'f') {
*m *= 16;
*m += *v - 'a' + 10;
}
if (*v == ':') {
m++;
*m = 0;
}
}
sprintf(result, "OK: dstmac");
return count;
}
if (!strcmp(name, "srcmac")) {
char *v = valstr;
unsigned char *m = info->src_mac;
 
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0)
return len;
memset(valstr, 0, sizeof(valstr));
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
i += len;
 
for(*m = 0;*v && m < info->src_mac + 6; v++) {
if (*v >= '0' && *v <= '9') {
*m *= 16;
*m += *v - '0';
}
if (*v >= 'A' && *v <= 'F') {
*m *= 16;
*m += *v - 'A' + 10;
}
if (*v >= 'a' && *v <= 'f') {
*m *= 16;
*m += *v - 'a' + 10;
}
if (*v == ':') {
m++;
*m = 0;
}
}
sprintf(result, "OK: srcmac");
return count;
}
 
if (!strcmp(name, "inject") || !strcmp(name, "start")) {
MOD_INC_USE_COUNT;
if (info->busy) {
strcpy(info->result, "Already running...\n");
}
else {
info->busy = 1;
strcpy(info->result, "Starting");
inject(info);
info->busy = 0;
}
MOD_DEC_USE_COUNT;
return count;
}
 
sprintf(info->result, "No such parameter \"%s\"", name);
return -EINVAL;
}
 
 
int create_proc_dir(void)
{
int len;
/* does proc_dir already exists */
len = strlen(PG_PROC_DIR);
 
for (proc_dir = proc_net->subdir; proc_dir;
proc_dir=proc_dir->next) {
if ((proc_dir->namelen == len) &&
(! memcmp(proc_dir->name, PG_PROC_DIR, len)))
break;
}
if (!proc_dir)
proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net);
if (!proc_dir) return -ENODEV;
return 1;
}
 
int remove_proc_dir(void)
{
remove_proc_entry(PG_PROC_DIR, proc_net);
return 1;
}
 
static int __init init(void)
{
int i;
printk(version);
cycles_calibrate();
if (cpu_speed == 0) {
printk("pktgen: Error: your machine does not have working cycle counter.\n");
return -EINVAL;
}
 
create_proc_dir();
 
for (i = 0; i<MAX_PKTGEN; i++) {
memset(&(pginfos[i]), 0, sizeof(pginfos[i]));
pginfos[i].pkt_size = ETH_ZLEN;
pginfos[i].nfrags = 0;
pginfos[i].clone_skb = clone_skb_d;
pginfos[i].ipg = ipg_d;
pginfos[i].count = count_d;
pginfos[i].sofar = 0;
pginfos[i].hh[12] = 0x08; /* fill in protocol. Rest is filled in later. */
pginfos[i].hh[13] = 0x00;
pginfos[i].udp_src_min = 9; /* sink NULL */
pginfos[i].udp_src_max = 9;
pginfos[i].udp_dst_min = 9;
pginfos[i].udp_dst_max = 9;
sprintf(pginfos[i].fname, "net/%s/pg%i", PG_PROC_DIR, i);
pginfos[i].proc_ent = create_proc_entry(pginfos[i].fname, 0600, 0);
if (!pginfos[i].proc_ent) {
printk("pktgen: Error: cannot create net/%s/pg procfs entry.\n", PG_PROC_DIR);
goto cleanup_mem;
}
pginfos[i].proc_ent->read_proc = proc_read;
pginfos[i].proc_ent->write_proc = proc_write;
pginfos[i].proc_ent->data = (void*)(long)(i);
 
sprintf(pginfos[i].busy_fname, "net/%s/pg_busy%i", PG_PROC_DIR, i);
pginfos[i].busy_proc_ent = create_proc_entry(pginfos[i].busy_fname, 0, 0);
if (!pginfos[i].busy_proc_ent) {
printk("pktgen: Error: cannot create net/%s/pg_busy procfs entry.\n", PG_PROC_DIR);
goto cleanup_mem;
}
pginfos[i].busy_proc_ent->read_proc = proc_busy_read;
pginfos[i].busy_proc_ent->data = (void*)(long)(i);
}
return 0;
cleanup_mem:
for (i = 0; i<MAX_PKTGEN; i++) {
if (strlen(pginfos[i].fname)) {
remove_proc_entry(pginfos[i].fname, NULL);
}
if (strlen(pginfos[i].busy_fname)) {
remove_proc_entry(pginfos[i].busy_fname, NULL);
}
}
return -ENOMEM;
}
 
 
static void __exit cleanup(void)
{
int i;
for (i = 0; i<MAX_PKTGEN; i++) {
if (strlen(pginfos[i].fname)) {
remove_proc_entry(pginfos[i].fname, NULL);
}
if (strlen(pginfos[i].busy_fname)) {
remove_proc_entry(pginfos[i].busy_fname, NULL);
}
}
remove_proc_dir();
}
 
module_init(init);
module_exit(cleanup);
 
MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se");
MODULE_DESCRIPTION("Packet Generator tool");
MODULE_LICENSE("GPL");
MODULE_PARM(count_d, "i");
MODULE_PARM(ipg_d, "i");
MODULE_PARM(cpu_speed, "i");
MODULE_PARM(clone_skb_d, "i");
 
 
 
/dst.c
0,0 → 1,219
/*
* net/dst.c Protocol independent destination cache.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
 
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
 
#include <net/dst.h>
 
/* Locking strategy:
* 1) Garbage collection state of dead destination cache
* entries is protected by dst_lock.
* 2) GC is run only from BH context, and is the only remover
* of entries.
* 3) Entries are added to the garbage list from both BH
* and non-BH context, so local BH disabling is needed.
* 4) All operations modify state, so a spinlock is used.
*/
static struct dst_entry *dst_garbage_list;
#if RT_CACHE_DEBUG >= 2
static atomic_t dst_total = ATOMIC_INIT(0);
#endif
static spinlock_t dst_lock = SPIN_LOCK_UNLOCKED;
 
static unsigned long dst_gc_timer_expires;
static unsigned long dst_gc_timer_inc = DST_GC_MAX;
static void dst_run_gc(unsigned long);
 
static struct timer_list dst_gc_timer =
{ data: DST_GC_MIN, function: dst_run_gc };
 
 
static void dst_run_gc(unsigned long dummy)
{
int delayed = 0;
struct dst_entry * dst, **dstp;
 
if (!spin_trylock(&dst_lock)) {
mod_timer(&dst_gc_timer, jiffies + HZ/10);
return;
}
 
 
del_timer(&dst_gc_timer);
dstp = &dst_garbage_list;
while ((dst = *dstp) != NULL) {
if (atomic_read(&dst->__refcnt)) {
dstp = &dst->next;
delayed++;
continue;
}
*dstp = dst->next;
dst_destroy(dst);
}
if (!dst_garbage_list) {
dst_gc_timer_inc = DST_GC_MAX;
goto out;
}
if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
dst_gc_timer_expires = DST_GC_MAX;
dst_gc_timer_inc += DST_GC_INC;
dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
printk("dst_total: %d/%d %ld\n",
atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
add_timer(&dst_gc_timer);
 
out:
spin_unlock(&dst_lock);
}
 
static int dst_discard(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
 
static int dst_blackhole(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
 
void * dst_alloc(struct dst_ops * ops)
{
struct dst_entry * dst;
 
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
if (ops->gc())
return NULL;
}
dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
if (!dst)
return NULL;
memset(dst, 0, ops->entry_size);
atomic_set(&dst->__refcnt, 0);
dst->ops = ops;
dst->lastuse = jiffies;
dst->input = dst_discard;
dst->output = dst_blackhole;
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
atomic_inc(&ops->entries);
return dst;
}
 
void __dst_free(struct dst_entry * dst)
{
spin_lock_bh(&dst_lock);
 
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard;
dst->output = dst_blackhole;
}
dst->obsolete = 2;
dst->next = dst_garbage_list;
dst_garbage_list = dst;
if (dst_gc_timer_inc > DST_GC_INC) {
dst_gc_timer_inc = DST_GC_INC;
dst_gc_timer_expires = DST_GC_MIN;
mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
}
 
spin_unlock_bh(&dst_lock);
}
 
void dst_destroy(struct dst_entry * dst)
{
struct neighbour *neigh = dst->neighbour;
struct hh_cache *hh = dst->hh;
 
dst->hh = NULL;
if (hh && atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
 
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
}
 
atomic_dec(&dst->ops->entries);
 
if (dst->ops->destroy)
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
#if RT_CACHE_DEBUG >= 2
atomic_dec(&dst_total);
#endif
kmem_cache_free(dst->ops->kmem_cachep, dst);
}
 
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
struct dst_entry *dst;
 
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
spin_lock_bh(&dst_lock);
for (dst = dst_garbage_list; dst; dst = dst->next) {
if (dst->dev == dev) {
/* Dirty hack. We did it in 2.2 (in __dst_free),
we have _very_ good reasons not to repeat
this mistake in 2.3, but we have no choice
now. _It_ _is_ _explicit_ _deliberate_
_race_ _condition_.
*/
if (event!=NETDEV_DOWN &&
!(dev->features & NETIF_F_DYNALLOC) &&
dst->output == dst_blackhole) {
dst->dev = &loopback_dev;
dev_put(dev);
dev_hold(&loopback_dev);
dst->output = dst_discard;
if (dst->neighbour && dst->neighbour->dev == dev) {
dst->neighbour->dev = &loopback_dev;
dev_put(dev);
dev_hold(&loopback_dev);
}
} else {
dst->input = dst_discard;
dst->output = dst_blackhole;
}
}
}
spin_unlock_bh(&dst_lock);
break;
}
return NOTIFY_DONE;
}
 
struct notifier_block dst_dev_notifier = {
dst_dev_event,
NULL,
0
};
 
void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
/Makefile
0,0 → 1,35
#
# Makefile for the Linux networking core.
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (ie not a .c file).
#
# Note 2! The CFLAGS definition is now in the main makefile...
 
O_TARGET := core.o
 
export-objs := netfilter.o profile.o
 
obj-y := sock.o skbuff.o iovec.o datagram.o scm.o
 
ifeq ($(CONFIG_SYSCTL),y)
ifeq ($(CONFIG_NET),y)
obj-y += sysctl_net_core.o
endif
endif
 
obj-$(CONFIG_FILTER) += filter.o
 
obj-$(CONFIG_NET) += dev.o ethtool.o dev_mcast.o dst.o neighbour.o \
rtnetlink.o utils.o
 
obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PROFILE) += profile.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NET_RADIO) += wireless.o
# Ugly. I wish all wireless drivers were moved in drivers/net/wireless
obj-$(CONFIG_NET_PCMCIA_RADIO) += wireless.o
 
include $(TOPDIR)/Rules.make
/netfilter.c
0,0 → 1,639
/* netfilter.c: look after the filters for various protocols.
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
*
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
* way.
*
* Rusty Russell (C)2000 -- This code is GPL.
*
* February 2000: Modified by James Morris to have 1 queue per protocol.
* 15-Mar-2000: Added NF_REPEAT --RR.
*/
#include <linux/config.h>
#include <linux/netfilter.h>
#include <net/protocol.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/if.h>
#include <linux/netdevice.h>
#include <linux/brlock.h>
#include <linux/inetdevice.h>
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
 
#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
 
/* In this code, we can be waiting indefinitely for userspace to
* service a packet if a hook returns NF_QUEUE. We could keep a count
* of skbuffs queued for userspace, and not deregister a hook unless
* this is zero, but that sucks. Now, we simply check when the
* packets come back: if the hook is gone, the packet is discarded. */
#ifdef CONFIG_NETFILTER_DEBUG
#define NFDEBUG(format, args...) printk(format , ## args)
#else
#define NFDEBUG(format, args...)
#endif
 
/* Sockopts only registered and called from user context, so
BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
sleep. */
static DECLARE_MUTEX(nf_sockopt_mutex);
 
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
static LIST_HEAD(nf_sockopts);
 
/*
* A queue handler may be registered for each protocol. Each is protected by
* long term mutex. The handler must provide an an outfn() to accept packets
* for queueing and must reinject all packets it receives, no matter what.
*/
static struct nf_queue_handler_t {
nf_queue_outfn_t outfn;
void *data;
} queue_handler[NPROTO];
 
int nf_register_hook(struct nf_hook_ops *reg)
{
struct list_head *i;
 
br_write_lock_bh(BR_NETPROTO_LOCK);
for (i = nf_hooks[reg->pf][reg->hooknum].next;
i != &nf_hooks[reg->pf][reg->hooknum];
i = i->next) {
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
break;
}
list_add(&reg->list, i->prev);
br_write_unlock_bh(BR_NETPROTO_LOCK);
return 0;
}
 
void nf_unregister_hook(struct nf_hook_ops *reg)
{
br_write_lock_bh(BR_NETPROTO_LOCK);
list_del(&reg->list);
br_write_unlock_bh(BR_NETPROTO_LOCK);
}
 
/* Do exclusive ranges overlap? */
static inline int overlap(int min1, int max1, int min2, int max2)
{
return max1 > min2 && min1 < max2;
}
 
/* Functions to register sockopt ranges (exclusive). */
int nf_register_sockopt(struct nf_sockopt_ops *reg)
{
struct list_head *i;
int ret = 0;
 
if (down_interruptible(&nf_sockopt_mutex) != 0)
return -EINTR;
 
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
if (ops->pf == reg->pf
&& (overlap(ops->set_optmin, ops->set_optmax,
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
reg->get_optmin, reg->get_optmax);
ret = -EBUSY;
goto out;
}
}
 
list_add(&reg->list, &nf_sockopts);
out:
up(&nf_sockopt_mutex);
return ret;
}
 
void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
{
/* No point being interruptible: we're probably in cleanup_module() */
restart:
down(&nf_sockopt_mutex);
if (reg->use != 0) {
/* To be woken by nf_sockopt call... */
/* FIXME: Stuart Young's name appears gratuitously. */
set_current_state(TASK_UNINTERRUPTIBLE);
reg->cleanup_task = current;
up(&nf_sockopt_mutex);
schedule();
goto restart;
}
list_del(&reg->list);
up(&nf_sockopt_mutex);
}
 
#ifdef CONFIG_NETFILTER_DEBUG
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4.h>
 
static void debug_print_hooks_ip(unsigned int nf_debug)
{
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
printk("PRE_ROUTING ");
nf_debug ^= (1 << NF_IP_PRE_ROUTING);
}
if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
printk("LOCAL_IN ");
nf_debug ^= (1 << NF_IP_LOCAL_IN);
}
if (nf_debug & (1 << NF_IP_FORWARD)) {
printk("FORWARD ");
nf_debug ^= (1 << NF_IP_FORWARD);
}
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
printk("LOCAL_OUT ");
nf_debug ^= (1 << NF_IP_LOCAL_OUT);
}
if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
printk("POST_ROUTING ");
nf_debug ^= (1 << NF_IP_POST_ROUTING);
}
if (nf_debug)
printk("Crap bits: 0x%04X", nf_debug);
printk("\n");
}
 
void nf_dump_skb(int pf, struct sk_buff *skb)
{
printk("skb: pf=%i %s dev=%s len=%u\n",
pf,
skb->sk ? "(owned)" : "(unowned)",
skb->dev ? skb->dev->name : "(no dev)",
skb->len);
switch (pf) {
case PF_INET: {
const struct iphdr *ip = skb->nh.iph;
__u32 *opt = (__u32 *) (ip + 1);
int opti;
__u16 src_port = 0, dst_port = 0;
 
if (ip->protocol == IPPROTO_TCP
|| ip->protocol == IPPROTO_UDP) {
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
src_port = ntohs(tcp->source);
dst_port = ntohs(tcp->dest);
}
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
ip->protocol, NIPQUAD(ip->saddr),
src_port, NIPQUAD(ip->daddr),
dst_port,
ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
ntohs(ip->frag_off), ip->ttl);
 
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
printk(" O=0x%8.8X", *opt++);
printk("\n");
}
}
}
 
void nf_debug_ip_local_deliver(struct sk_buff *skb)
{
/* If it's a loopback packet, it must have come through
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
* NF_IP_LOCAL_IN. Otherwise, must have gone through
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
if (!skb->dev) {
printk("ip_local_deliver: skb->dev is NULL.\n");
}
else if (strcmp(skb->dev->name, "lo") == 0) {
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING)
| (1 << NF_IP_PRE_ROUTING)
| (1 << NF_IP_LOCAL_IN))) {
printk("ip_local_deliver: bad loopback skb: ");
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
else {
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
| (1<<NF_IP_LOCAL_IN))) {
printk("ip_local_deliver: bad non-lo skb: ");
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
 
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
{
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
newskb);
debug_print_hooks_ip(newskb->nf_debug);
nf_dump_skb(PF_INET, newskb);
}
/* Clear to avoid confusing input check */
newskb->nf_debug = 0;
}
 
void nf_debug_ip_finish_output2(struct sk_buff *skb)
{
/* If it's owned, it must have gone through the
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
* Otherwise, must have gone through
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
*/
if (skb->sk) {
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_finish_output: bad owned skb = %p: ", skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
} else {
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
| (1 << NF_IP_FORWARD)
| (1 << NF_IP_POST_ROUTING))) {
/* Fragments, entunnelled packets, TCP RSTs
generated by ipt_REJECT will have no
owners, but still may be local */
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))){
printk("ip_finish_output:"
" bad unowned skb = %p: ",skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
}
#endif /*CONFIG_NETFILTER_DEBUG*/
 
/* Call get/setsockopt() */
static int nf_sockopt(struct sock *sk, int pf, int val,
char *opt, int *len, int get)
{
struct list_head *i;
struct nf_sockopt_ops *ops;
int ret;
 
if (down_interruptible(&nf_sockopt_mutex) != 0)
return -EINTR;
 
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
ops = (struct nf_sockopt_ops *)i;
if (ops->pf == pf) {
if (get) {
if (val >= ops->get_optmin
&& val < ops->get_optmax) {
ops->use++;
up(&nf_sockopt_mutex);
ret = ops->get(sk, val, opt, len);
goto out;
}
} else {
if (val >= ops->set_optmin
&& val < ops->set_optmax) {
ops->use++;
up(&nf_sockopt_mutex);
ret = ops->set(sk, val, opt, *len);
goto out;
}
}
}
}
up(&nf_sockopt_mutex);
return -ENOPROTOOPT;
out:
down(&nf_sockopt_mutex);
ops->use--;
if (ops->cleanup_task)
wake_up_process(ops->cleanup_task);
up(&nf_sockopt_mutex);
return ret;
}
 
int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
int len)
{
return nf_sockopt(sk, pf, val, opt, &len, 0);
}
 
int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
{
return nf_sockopt(sk, pf, val, opt, len, 1);
}
 
static unsigned int nf_iterate(struct list_head *head,
struct sk_buff **skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *))
{
for (*i = (*i)->next; *i != head; *i = (*i)->next) {
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
switch (elem->hook(hook, skb, indev, outdev, okfn)) {
case NF_QUEUE:
return NF_QUEUE;
 
case NF_STOLEN:
return NF_STOLEN;
 
case NF_DROP:
return NF_DROP;
 
case NF_REPEAT:
*i = (*i)->prev;
break;
 
#ifdef CONFIG_NETFILTER_DEBUG
case NF_ACCEPT:
break;
 
default:
NFDEBUG("Evil return from %p(%u).\n",
elem->hook, hook);
#endif
}
}
return NF_ACCEPT;
}
 
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
{
int ret;
 
br_write_lock_bh(BR_NETPROTO_LOCK);
if (queue_handler[pf].outfn)
ret = -EBUSY;
else {
queue_handler[pf].outfn = outfn;
queue_handler[pf].data = data;
ret = 0;
}
br_write_unlock_bh(BR_NETPROTO_LOCK);
 
return ret;
}
 
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(int pf)
{
br_write_lock_bh(BR_NETPROTO_LOCK);
queue_handler[pf].outfn = NULL;
queue_handler[pf].data = NULL;
br_write_unlock_bh(BR_NETPROTO_LOCK);
return 0;
}
 
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
*/
static void nf_queue(struct sk_buff *skb,
struct list_head *elem,
int pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
int status;
struct nf_info *info;
 
if (!queue_handler[pf].outfn) {
kfree_skb(skb);
return;
}
 
info = kmalloc(sizeof(*info), GFP_ATOMIC);
if (!info) {
if (net_ratelimit())
printk(KERN_ERR "OOM queueing packet %p\n",
skb);
kfree_skb(skb);
return;
}
 
*info = (struct nf_info) {
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
 
/* Bump dev refs so they don't vanish while packet is out */
if (indev) dev_hold(indev);
if (outdev) dev_hold(outdev);
 
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
if (status < 0) {
/* James M doesn't say fuck enough. */
if (indev) dev_put(indev);
if (outdev) dev_put(outdev);
kfree(info);
kfree_skb(skb);
return;
}
}
 
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
 
/* This stopgap cannot be removed until all the hooks are audited. */
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
return -ENOMEM;
}
if (skb->ip_summed == CHECKSUM_HW) {
if (outdev == NULL) {
skb->ip_summed = CHECKSUM_NONE;
} else {
skb_checksum_help(skb);
}
}
 
/* We may already have this, but read-locks nest anyway */
br_read_lock_bh(BR_NETPROTO_LOCK);
 
#ifdef CONFIG_NETFILTER_DEBUG
if (skb->nf_debug & (1 << hook)) {
printk("nf_hook: hook %i already set.\n", hook);
nf_dump_skb(pf, skb);
}
skb->nf_debug |= (1 << hook);
#endif
 
elem = &nf_hooks[pf][hook];
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
outdev, &elem, okfn);
if (verdict == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
}
 
switch (verdict) {
case NF_ACCEPT:
ret = okfn(skb);
break;
 
case NF_DROP:
kfree_skb(skb);
ret = -EPERM;
break;
}
 
br_read_unlock_bh(BR_NETPROTO_LOCK);
return ret;
}
 
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
unsigned int verdict)
{
struct list_head *elem = &info->elem->list;
struct list_head *i;
 
/* We don't have BR_NETPROTO_LOCK here */
br_read_lock_bh(BR_NETPROTO_LOCK);
for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
if (i == &nf_hooks[info->pf][info->hook]) {
/* The module which sent it to userspace is gone. */
NFDEBUG("%s: module disappeared, dropping packet.\n",
__FUNCTION__);
verdict = NF_DROP;
break;
}
}
 
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT) {
elem = elem->prev;
verdict = NF_ACCEPT;
}
 
if (verdict == NF_ACCEPT) {
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
&skb, info->hook,
info->indev, info->outdev, &elem,
info->okfn);
}
 
switch (verdict) {
case NF_ACCEPT:
info->okfn(skb);
break;
 
case NF_QUEUE:
nf_queue(skb, elem, info->pf, info->hook,
info->indev, info->outdev, info->okfn);
break;
 
case NF_DROP:
kfree_skb(skb);
break;
}
br_read_unlock_bh(BR_NETPROTO_LOCK);
 
/* Release those devices we held, or Alexey will kill me. */
if (info->indev) dev_put(info->indev);
if (info->outdev) dev_put(info->outdev);
kfree(info);
return;
}
 
#ifdef CONFIG_INET
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
struct rt_key key = {};
struct dst_entry *odst;
unsigned int hh_len;
 
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
*/
if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
key.dst = iph->daddr;
key.src = iph->saddr;
key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
key.tos = RT_TOS(iph->tos);
#ifdef CONFIG_IP_ROUTE_FWMARK
key.fwmark = (*pskb)->nfmark;
#endif
if (ip_route_output_key(&rt, &key) != 0)
return -1;
 
/* Drop old route. */
dst_release((*pskb)->dst);
(*pskb)->dst = &rt->u.dst;
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
key.dst = iph->saddr;
if (ip_route_output_key(&rt, &key) != 0)
return -1;
 
odst = (*pskb)->dst;
if (ip_route_input(*pskb, iph->daddr, iph->saddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
return -1;
}
dst_release(&rt->u.dst);
dst_release(odst);
}
if ((*pskb)->dst->error)
return -1;
 
/* Change in oif may mean change in hh_len. */
hh_len = (*pskb)->dst->dev->hard_header_len;
if (skb_headroom(*pskb) < hh_len) {
struct sk_buff *nskb;
 
nskb = skb_realloc_headroom(*pskb, hh_len);
if (!nskb)
return -1;
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
kfree_skb(*pskb);
*pskb = nskb;
}
 
return 0;
}
#endif /*CONFIG_INET*/
 
/* This does not belong here, but ipt_REJECT needs it if connection
tracking in use: without this, connection may not be in hash table,
and hence manufactured ICMP or RST packets will not be associated
with it. */
void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
 
void __init netfilter_init(void)
{
int i, h;
 
for (i = 0; i < NPROTO; i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
}
/wireless.c
0,0 → 1,1282
/*
* This file implement the Wireless Extensions APIs.
*
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
* Copyright (c) 1997-2003 Jean Tourrilhes, All Rights Reserved.
*
* (As all part of the Linux kernel, this file is GPL)
*/
 
/************************** DOCUMENTATION **************************/
/*
* API definition :
* --------------
* See <linux/wireless.h> for details of the APIs and the rest.
*
* History :
* -------
*
* v1 - 5.12.01 - Jean II
* o Created this file.
*
* v2 - 13.12.01 - Jean II
* o Move /proc/net/wireless stuff from net/core/dev.c to here
* o Make Wireless Extension IOCTLs go through here
* o Added iw_handler handling ;-)
* o Added standard ioctl description
* o Initial dumb commit strategy based on orinoco.c
*
* v3 - 19.12.01 - Jean II
* o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call
* o Add event dispatcher function
* o Add event description
* o Propagate events as rtnetlink IFLA_WIRELESS option
* o Generate event on selected SET requests
*
* v4 - 18.04.02 - Jean II
* o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1
*
* v5 - 21.06.02 - Jean II
* o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup)
* o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes
* o Add IWEVCUSTOM for driver specific event/scanning token
* o Turn on WE_STRICT_WRITE by default + kernel warning
* o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num)
* o Fix off-by-one in test (extra_size <= IFNAMSIZ)
*
* v6 - 9.01.03 - Jean II
* o Add common spy support : iw_handler_set_spy(), wireless_spy_update()
* o Add enhanced spy support : iw_handler_set_thrspy() and event.
* o Add WIRELESS_EXT version display in /proc/net/wireless
*/
 
/***************************** INCLUDES *****************************/
 
#include <asm/uaccess.h> /* copy_to_user() */
#include <linux/config.h> /* Not needed ??? */
#include <linux/types.h> /* off_t */
#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */
#include <linux/rtnetlink.h> /* rtnetlink stuff */
#include <linux/if_arp.h> /* ARPHRD_ETHER */
 
#include <linux/wireless.h> /* Pretty obvious */
#include <net/iw_handler.h> /* New driver API */
 
/**************************** CONSTANTS ****************************/
 
/* Enough lenience, let's make sure things are proper... */
#define WE_STRICT_WRITE /* Check write buffer size */
/* I'll probably drop both the define and kernel message in the next version */
 
/* Debuging stuff */
#undef WE_IOCTL_DEBUG /* Debug IOCTL API */
#undef WE_EVENT_DEBUG /* Debug Event dispatcher */
#undef WE_SPY_DEBUG /* Debug enhanced spy support */
 
/* Options */
#define WE_EVENT_NETLINK /* Propagate events using rtnetlink */
#define WE_SET_EVENT /* Generate an event on some set commands */
 
/************************* GLOBAL VARIABLES *************************/
/*
* You should not use global variables, because of re-entrancy.
* On our case, it's only const, so it's OK...
*/
/*
* Meta-data about all the standard Wireless Extension request we
* know about.
*/
static const struct iw_ioctl_description standard_ioctl[] = {
/* SIOCSIWCOMMIT */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCGIWNAME */
{ IW_HEADER_TYPE_CHAR, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* SIOCSIWNWID */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT},
/* SIOCGIWNWID */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* SIOCSIWFREQ */
{ IW_HEADER_TYPE_FREQ, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT},
/* SIOCGIWFREQ */
{ IW_HEADER_TYPE_FREQ, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* SIOCSIWMODE */
{ IW_HEADER_TYPE_UINT, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT},
/* SIOCGIWMODE */
{ IW_HEADER_TYPE_UINT, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* SIOCSIWSENS */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWSENS */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWRANGE */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCGIWRANGE */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, sizeof(struct iw_range), IW_DESCR_FLAG_DUMP},
/* SIOCSIWPRIV */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCGIWPRIV (handled directly by us) */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCSIWSTATS */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCGIWSTATS (handled directly by us) */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* SIOCSIWSPY */
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct sockaddr), 0, IW_MAX_SPY, 0},
/* SIOCGIWSPY */
{ IW_HEADER_TYPE_POINT, 0, (sizeof(struct sockaddr) + sizeof(struct iw_quality)), 0, IW_MAX_SPY, 0},
/* SIOCSIWTHRSPY */
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct iw_thrspy), 1, 1, 0},
/* SIOCGIWTHRSPY */
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct iw_thrspy), 1, 1, 0},
/* SIOCSIWAP */
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0},
/* SIOCGIWAP */
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP},
/* -- hole -- */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCGIWAPLIST */
{ IW_HEADER_TYPE_POINT, 0, (sizeof(struct sockaddr) + sizeof(struct iw_quality)), 0, IW_MAX_AP, 0},
/* SIOCSIWSCAN */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWSCAN */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_SCAN_MAX_DATA, 0},
/* SIOCSIWESSID */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, IW_DESCR_FLAG_EVENT},
/* SIOCGIWESSID */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, IW_DESCR_FLAG_DUMP},
/* SIOCSIWNICKN */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, 0},
/* SIOCGIWNICKN */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, 0},
/* -- hole -- */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* -- hole -- */
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0},
/* SIOCSIWRATE */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWRATE */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWRTS */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWRTS */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWFRAG */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWFRAG */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWTXPOW */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWTXPOW */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWRETRY */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWRETRY */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCSIWENCODE */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ENCODING_TOKEN_MAX, IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT},
/* SIOCGIWENCODE */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ENCODING_TOKEN_MAX, IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT},
/* SIOCSIWPOWER */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
/* SIOCGIWPOWER */
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0},
};
static const int standard_ioctl_num = (sizeof(standard_ioctl) /
sizeof(struct iw_ioctl_description));
 
/*
* Meta-data about all the additional standard Wireless Extension events
* we know about.
*/
static const struct iw_ioctl_description standard_event[] = {
/* IWEVTXDROP */
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0},
/* IWEVQUAL */
{ IW_HEADER_TYPE_QUAL, 0, 0, 0, 0, 0},
/* IWEVCUSTOM */
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_CUSTOM_MAX, 0},
/* IWEVREGISTERED */
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0},
/* IWEVEXPIRED */
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0},
};
static const int standard_event_num = (sizeof(standard_event) /
sizeof(struct iw_ioctl_description));
 
/* Size (in bytes) of the various private data types */
static const char priv_type_size[] = {
0, /* IW_PRIV_TYPE_NONE */
1, /* IW_PRIV_TYPE_BYTE */
1, /* IW_PRIV_TYPE_CHAR */
0, /* Not defined */
sizeof(__u32), /* IW_PRIV_TYPE_INT */
sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */
sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */
0, /* Not defined */
};
 
/* Size (in bytes) of various events */
static const int event_type_size[] = {
IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */
0,
IW_EV_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */
0,
IW_EV_UINT_LEN, /* IW_HEADER_TYPE_UINT */
IW_EV_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */
IW_EV_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */
0,
IW_EV_POINT_LEN, /* Without variable payload */
IW_EV_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */
IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */
};
 
/************************ COMMON SUBROUTINES ************************/
/*
* Stuff that may be used in various place or doesn't fit in one
* of the section below.
*/
 
/* ---------------------------------------------------------------- */
/*
* Return the driver handler associated with a specific Wireless Extension.
* Called from various place, so make sure it remains efficient.
*/
static inline iw_handler get_handler(struct net_device *dev,
unsigned int cmd)
{
/* Don't "optimise" the following variable, it will crash */
unsigned int index; /* *MUST* be unsigned */
 
/* Check if we have some wireless handlers defined */
if(dev->wireless_handlers == NULL)
return NULL;
 
/* Try as a standard command */
index = cmd - SIOCIWFIRST;
if(index < dev->wireless_handlers->num_standard)
return dev->wireless_handlers->standard[index];
 
/* Try as a private command */
index = cmd - SIOCIWFIRSTPRIV;
if(index < dev->wireless_handlers->num_private)
return dev->wireless_handlers->private[index];
 
/* Not found */
return NULL;
}
 
/* ---------------------------------------------------------------- */
/*
* Get statistics out of the driver
*/
static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
{
return (dev->get_wireless_stats ?
dev->get_wireless_stats(dev) :
(struct iw_statistics *) NULL);
/* In the future, get_wireless_stats may move from 'struct net_device'
* to 'struct iw_handler_def', to de-bloat struct net_device.
* Definitely worse a thought... */
}
 
/* ---------------------------------------------------------------- */
/*
* Call the commit handler in the driver
* (if exist and if conditions are right)
*
* Note : our current commit strategy is currently pretty dumb,
* but we will be able to improve on that...
* The goal is to try to agreagate as many changes as possible
* before doing the commit. Drivers that will define a commit handler
* are usually those that need a reset after changing parameters, so
* we want to minimise the number of reset.
* A cool idea is to use a timer : at each "set" command, we re-set the
* timer, when the timer eventually fires, we call the driver.
* Hopefully, more on that later.
*
* Also, I'm waiting to see how many people will complain about the
* netif_running(dev) test. I'm open on that one...
* Hopefully, the driver will remember to do a commit in "open()" ;-)
*/
static inline int call_commit_handler(struct net_device * dev)
{
if((netif_running(dev)) &&
(dev->wireless_handlers->standard[0] != NULL)) {
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
} else
return 0; /* Command completed successfully */
}
 
/* ---------------------------------------------------------------- */
/*
* Number of private arguments
*/
static inline int get_priv_size(__u16 args)
{
int num = args & IW_PRIV_SIZE_MASK;
int type = (args & IW_PRIV_TYPE_MASK) >> 12;
 
return num * priv_type_size[type];
}
 
 
/******************** /proc/net/wireless SUPPORT ********************/
/*
* The /proc/net/wireless file is a human readable user-space interface
* exporting various wireless specific statistics from the wireless devices.
* This is the most popular part of the Wireless Extensions ;-)
*
* This interface is a pure clone of /proc/net/dev (in net/core/dev.c).
* The content of the file is basically the content of "struct iw_statistics".
*/
 
#ifdef CONFIG_PROC_FS
 
/* ---------------------------------------------------------------- */
/*
* Print one entry (line) of /proc/net/wireless
*/
static inline int sprintf_wireless_stats(char *buffer, struct net_device *dev)
{
/* Get stats from the driver */
struct iw_statistics *stats;
int size;
 
stats = get_wireless_stats(dev);
if (stats != (struct iw_statistics *) NULL) {
size = sprintf(buffer,
"%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d %6d %6d %6d\n",
dev->name,
stats->status,
stats->qual.qual,
stats->qual.updated & 1 ? '.' : ' ',
((__u8) stats->qual.level),
stats->qual.updated & 2 ? '.' : ' ',
((__u8) stats->qual.noise),
stats->qual.updated & 4 ? '.' : ' ',
stats->discard.nwid,
stats->discard.code,
stats->discard.fragment,
stats->discard.retries,
stats->discard.misc,
stats->miss.beacon);
stats->qual.updated = 0;
}
else
size = 0;
 
return size;
}
 
/* ---------------------------------------------------------------- */
/*
* Print info for /proc/net/wireless (print all entries)
*/
int dev_get_wireless_info(char * buffer, char **start, off_t offset,
int length)
{
int len = 0;
off_t begin = 0;
off_t pos = 0;
int size;
struct net_device * dev;
 
size = sprintf(buffer,
"Inter-| sta-| Quality | Discarded packets | Missed | WE\n"
" face | tus | link level noise | nwid crypt frag retry misc | beacon | %d\n",
WIRELESS_EXT);
pos += size;
len += size;
 
read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_wireless_stats(buffer + len, dev);
len += size;
pos = begin + len;
 
if (pos < offset) {
len = 0;
begin = pos;
}
if (pos > offset + length)
break;
}
read_unlock(&dev_base_lock);
 
*start = buffer + (offset - begin); /* Start of wanted data */
len -= (offset - begin); /* Start slop */
if (len > length)
len = length; /* Ending slop */
if (len < 0)
len = 0;
 
return len;
}
#endif /* CONFIG_PROC_FS */
 
/************************** IOCTL SUPPORT **************************/
/*
* The original user space API to configure all those Wireless Extensions
* is through IOCTLs.
* In there, we check if we need to call the new driver API (iw_handler)
* or just call the driver ioctl handler.
*/
 
/* ---------------------------------------------------------------- */
/*
* Allow programatic access to /proc/net/wireless even if /proc
* doesn't exist... Also more efficient...
*/
static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr)
{
/* Get stats from the driver */
struct iw_statistics *stats;
 
stats = get_wireless_stats(dev);
if (stats != (struct iw_statistics *) NULL) {
struct iwreq * wrq = (struct iwreq *)ifr;
 
/* Copy statistics to the user buffer */
if(copy_to_user(wrq->u.data.pointer, stats,
sizeof(struct iw_statistics)))
return -EFAULT;
 
/* Check if we need to clear the update flag */
if(wrq->u.data.flags != 0)
stats->qual.updated = 0;
return 0;
} else
return -EOPNOTSUPP;
}
 
/* ---------------------------------------------------------------- */
/*
* Export the driver private handler definition
* They will be picked up by tools like iwpriv...
*/
static inline int ioctl_export_private(struct net_device * dev,
struct ifreq * ifr)
{
struct iwreq * iwr = (struct iwreq *) ifr;
 
/* Check if the driver has something to export */
if((dev->wireless_handlers->num_private_args == 0) ||
(dev->wireless_handlers->private_args == NULL))
return -EOPNOTSUPP;
 
/* Check NULL pointer */
if(iwr->u.data.pointer == NULL)
return -EFAULT;
#ifdef WE_STRICT_WRITE
/* Check if there is enough buffer up there */
if(iwr->u.data.length < dev->wireless_handlers->num_private_args) {
printk(KERN_ERR "%s (WE) : Buffer for request SIOCGIWPRIV too small (%d<%d)\n", dev->name, iwr->u.data.length, dev->wireless_handlers->num_private_args);
return -E2BIG;
}
#endif /* WE_STRICT_WRITE */
 
/* Set the number of available ioctls. */
iwr->u.data.length = dev->wireless_handlers->num_private_args;
 
/* Copy structure to the user buffer. */
if (copy_to_user(iwr->u.data.pointer,
dev->wireless_handlers->private_args,
sizeof(struct iw_priv_args) * iwr->u.data.length))
return -EFAULT;
 
return 0;
}
 
/* ---------------------------------------------------------------- */
/*
* Wrapper to call a standard Wireless Extension handler.
* We do various checks and also take care of moving data between
* user space and kernel space.
*/
static inline int ioctl_standard_call(struct net_device * dev,
struct ifreq * ifr,
unsigned int cmd,
iw_handler handler)
{
struct iwreq * iwr = (struct iwreq *) ifr;
const struct iw_ioctl_description * descr;
struct iw_request_info info;
int ret = -EINVAL;
int user_size = 0;
 
/* Get the description of the IOCTL */
if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
return -EOPNOTSUPP;
descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
ifr->ifr_name, cmd);
printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
#endif /* WE_IOCTL_DEBUG */
 
/* Prepare the call */
info.cmd = cmd;
info.flags = 0;
 
/* Check if we have a pointer to user space data or not */
if(descr->header_type != IW_HEADER_TYPE_POINT) {
 
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), NULL);
 
#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
if((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT)))
wireless_send_event(dev, cmd, &(iwr->u), NULL);
#endif /* WE_SET_EVENT */
} else {
char * extra;
int err;
 
/* Check what user space is giving us */
if(IW_IS_SET(cmd)) {
/* Check NULL pointer */
if((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
/* Check if number of token fits within bounds */
if(iwr->u.data.length > descr->max_tokens)
return -E2BIG;
if(iwr->u.data.length < descr->min_tokens)
return -EINVAL;
} else {
/* Check NULL pointer */
if(iwr->u.data.pointer == NULL)
return -EFAULT;
/* Save user space buffer size for checking */
user_size = iwr->u.data.length;
}
 
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
dev->name, descr->max_tokens * descr->token_size);
#endif /* WE_IOCTL_DEBUG */
 
/* Always allocate for max space. Easier, and won't last
* long... */
extra = kmalloc(descr->max_tokens * descr->token_size,
GFP_KERNEL);
if (extra == NULL) {
return -ENOMEM;
}
 
/* If it is a SET, get all the extra data in here */
if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
iwr->u.data.length *
descr->token_size);
if (err) {
kfree(extra);
return -EFAULT;
}
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
dev->name,
iwr->u.data.length * descr->token_size);
#endif /* WE_IOCTL_DEBUG */
}
 
/* Call the handler */
ret = handler(dev, &info, &(iwr->u), extra);
 
/* If we have something to return to the user */
if (!ret && IW_IS_GET(cmd)) {
#ifdef WE_STRICT_WRITE
/* Check if there is enough buffer up there */
if(user_size < iwr->u.data.length) {
printk(KERN_ERR "%s (WE) : Buffer for request %04X too small (%d<%d)\n", dev->name, cmd, user_size, iwr->u.data.length);
kfree(extra);
return -E2BIG;
}
#endif /* WE_STRICT_WRITE */
 
err = copy_to_user(iwr->u.data.pointer, extra,
iwr->u.data.length *
descr->token_size);
if (err)
ret = -EFAULT;
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
dev->name,
iwr->u.data.length * descr->token_size);
#endif /* WE_IOCTL_DEBUG */
}
 
#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
if((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT))) {
if(descr->flags & IW_DESCR_FLAG_RESTRICT)
/* If the event is restricted, don't
* export the payload */
wireless_send_event(dev, cmd, &(iwr->u), NULL);
else
wireless_send_event(dev, cmd, &(iwr->u),
extra);
}
#endif /* WE_SET_EVENT */
 
/* Cleanup - I told you it wasn't that long ;-) */
kfree(extra);
}
 
/* Call commit handler if needed and defined */
if(ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
 
/* Here, we will generate the appropriate event if needed */
 
return ret;
}
 
/* ---------------------------------------------------------------- */
/*
* Wrapper to call a private Wireless Extension handler.
* We do various checks and also take care of moving data between
* user space and kernel space.
* It's not as nice and slimline as the standard wrapper. The cause
* is struct iw_priv_args, which was not really designed for the
* job we are going here.
*
* IMPORTANT : This function prevent to set and get data on the same
* IOCTL and enforce the SET/GET convention. Not doing it would be
* far too hairy...
* If you need to set and get data at the same time, please don't use
* a iw_handler but process it in your ioctl handler (i.e. use the
* old driver API).
*/
static inline int ioctl_private_call(struct net_device * dev,
struct ifreq * ifr,
unsigned int cmd,
iw_handler handler)
{
struct iwreq * iwr = (struct iwreq *) ifr;
struct iw_priv_args * descr = NULL;
struct iw_request_info info;
int extra_size = 0;
int i;
int ret = -EINVAL;
 
/* Get the description of the IOCTL */
for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
if(cmd == dev->wireless_handlers->private_args[i].cmd) {
descr = &(dev->wireless_handlers->private_args[i]);
break;
}
 
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
ifr->ifr_name, cmd);
if(descr) {
printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
dev->name, descr->name,
descr->set_args, descr->get_args);
}
#endif /* WE_IOCTL_DEBUG */
 
/* Compute the size of the set/get arguments */
if(descr != NULL) {
if(IW_IS_SET(cmd)) {
int offset = 0; /* For sub-ioctls */
/* Check for sub-ioctl handler */
if(descr->name[0] == '\0')
/* Reserve one int for sub-ioctl index */
offset = sizeof(__u32);
 
/* Size of set arguments */
extra_size = get_priv_size(descr->set_args);
 
/* Does it fits in iwr ? */
if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
((extra_size + offset) <= IFNAMSIZ))
extra_size = 0;
} else {
/* Size of set arguments */
extra_size = get_priv_size(descr->get_args);
 
/* Does it fits in iwr ? */
if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
(extra_size <= IFNAMSIZ))
extra_size = 0;
}
}
 
/* Prepare the call */
info.cmd = cmd;
info.flags = 0;
 
/* Check if we have a pointer to user space data or not. */
if(extra_size == 0) {
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
} else {
char * extra;
int err;
 
/* Check what user space is giving us */
if(IW_IS_SET(cmd)) {
/* Check NULL pointer */
if((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
 
/* Does it fits within bounds ? */
if(iwr->u.data.length > (descr->set_args &
IW_PRIV_SIZE_MASK))
return -E2BIG;
} else {
/* Check NULL pointer */
if(iwr->u.data.pointer == NULL)
return -EFAULT;
}
 
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
dev->name, extra_size);
#endif /* WE_IOCTL_DEBUG */
 
/* Always allocate for max space. Easier, and won't last
* long... */
extra = kmalloc(extra_size, GFP_KERNEL);
if (extra == NULL) {
return -ENOMEM;
}
 
/* If it is a SET, get all the extra data in here */
if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
extra_size);
if (err) {
kfree(extra);
return -EFAULT;
}
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
dev->name, iwr->u.data.length);
#endif /* WE_IOCTL_DEBUG */
}
 
/* Call the handler */
ret = handler(dev, &info, &(iwr->u), extra);
 
/* If we have something to return to the user */
if (!ret && IW_IS_GET(cmd)) {
err = copy_to_user(iwr->u.data.pointer, extra,
extra_size);
if (err)
ret = -EFAULT;
#ifdef WE_IOCTL_DEBUG
printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
dev->name, iwr->u.data.length);
#endif /* WE_IOCTL_DEBUG */
}
 
/* Cleanup - I told you it wasn't that long ;-) */
kfree(extra);
}
 
 
/* Call commit handler if needed and defined */
if(ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
 
return ret;
}
 
/* ---------------------------------------------------------------- */
/*
* Main IOCTl dispatcher. Called from the main networking code
* (dev_ioctl() in net/core/dev.c).
* Check the type of IOCTL and call the appropriate wrapper...
*/
int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
{
struct net_device *dev;
iw_handler handler;
 
/* Permissions are already checked in dev_ioctl() before calling us.
* The copy_to/from_user() of ifr is also dealt with in there */
 
/* Make sure the device exist */
if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
return -ENODEV;
 
/* A bunch of special cases, then the generic case...
* Note that 'cmd' is already filtered in dev_ioctl() with
* (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
switch(cmd)
{
case SIOCGIWSTATS:
/* Get Wireless Stats */
return dev_iwstats(dev, ifr);
 
case SIOCGIWPRIV:
/* Check if we have some wireless handlers defined */
if(dev->wireless_handlers != NULL) {
/* We export to user space the definition of
* the private handler ourselves */
return ioctl_export_private(dev, ifr);
}
// ## Fall-through for old API ##
default:
/* Generic IOCTL */
/* Basic check */
if (!netif_device_present(dev))
return -ENODEV;
/* New driver API : try to find the handler */
handler = get_handler(dev, cmd);
if(handler != NULL) {
/* Standard and private are not the same */
if(cmd < SIOCIWFIRSTPRIV)
return ioctl_standard_call(dev,
ifr,
cmd,
handler);
else
return ioctl_private_call(dev,
ifr,
cmd,
handler);
}
/* Old driver API : call driver ioctl handler */
if (dev->do_ioctl) {
return dev->do_ioctl(dev, ifr, cmd);
}
return -EOPNOTSUPP;
}
/* Not reached */
return -EINVAL;
}
 
/************************* EVENT PROCESSING *************************/
/*
* Process events generated by the wireless layer or the driver.
* Most often, the event will be propagated through rtnetlink
*/
 
#ifdef WE_EVENT_NETLINK
/* "rtnl" is defined in net/core/rtnetlink.c, but we need it here.
* It is declared in <linux/rtnetlink.h> */
 
/* ---------------------------------------------------------------- */
/*
* Fill a rtnetlink message with our event data.
* Note that we propage only the specified event and don't dump the
* current wireless config. Dumping the wireless config is far too
* expensive (for each parameter, the driver need to query the hardware).
*/
static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
struct net_device * dev,
int type,
char * event,
int event_len)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
 
nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev->flags;
r->ifi_change = 0; /* Wireless changes don't affect those flags */
 
/* Add the wireless events in the netlink packet */
RTA_PUT(skb, IFLA_WIRELESS,
event_len, event);
 
nlh->nlmsg_len = skb->tail - b;
return skb->len;
 
nlmsg_failure:
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
 
/* ---------------------------------------------------------------- */
/*
* Create and broadcast and send it on the standard rtnetlink socket
* This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c
* Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
* within a RTM_NEWLINK event.
*/
static inline void rtmsg_iwinfo(struct net_device * dev,
char * event,
int event_len)
{
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
 
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
return;
 
if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK,
event, event_len) < 0) {
kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
}
#endif /* WE_EVENT_NETLINK */
 
/* ---------------------------------------------------------------- */
/*
* Main event dispatcher. Called from other parts and drivers.
* Send the event on the apropriate channels.
* May be called from interrupt context.
*/
void wireless_send_event(struct net_device * dev,
unsigned int cmd,
union iwreq_data * wrqu,
char * extra)
{
const struct iw_ioctl_description * descr = NULL;
int extra_len = 0;
struct iw_event *event; /* Mallocated whole event */
int event_len; /* Its size */
int hdr_len; /* Size of the event header */
/* Don't "optimise" the following variable, it will crash */
unsigned cmd_index; /* *MUST* be unsigned */
 
/* Get the description of the IOCTL */
if(cmd <= SIOCIWLAST) {
cmd_index = cmd - SIOCIWFIRST;
if(cmd_index < standard_ioctl_num)
descr = &(standard_ioctl[cmd_index]);
} else {
cmd_index = cmd - IWEVFIRST;
if(cmd_index < standard_event_num)
descr = &(standard_event[cmd_index]);
}
/* Don't accept unknown events */
if(descr == NULL) {
/* Note : we don't return an error to the driver, because
* the driver would not know what to do about it. It can't
* return an error to the user, because the event is not
* initiated by a user request.
* The best the driver could do is to log an error message.
* We will do it ourselves instead...
*/
printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
dev->name, cmd);
return;
}
#ifdef WE_EVENT_DEBUG
printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
dev->name, cmd);
printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
#endif /* WE_EVENT_DEBUG */
 
/* Check extra parameters and set extra_len */
if(descr->header_type == IW_HEADER_TYPE_POINT) {
/* Check if number of token fits within bounds */
if(wrqu->data.length > descr->max_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
return;
}
if(wrqu->data.length < descr->min_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
return;
}
/* Calculate extra_len - extra is NULL for restricted events */
if(extra != NULL)
extra_len = wrqu->data.length * descr->token_size;
#ifdef WE_EVENT_DEBUG
printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
#endif /* WE_EVENT_DEBUG */
}
 
/* Total length of the event */
hdr_len = event_type_size[descr->header_type];
event_len = hdr_len + extra_len;
 
#ifdef WE_EVENT_DEBUG
printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len);
#endif /* WE_EVENT_DEBUG */
 
/* Create temporary buffer to hold the event */
event = kmalloc(event_len, GFP_ATOMIC);
if(event == NULL)
return;
 
/* Fill event */
event->len = event_len;
event->cmd = cmd;
memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN);
if(extra != NULL)
memcpy(((char *) event) + hdr_len, extra, extra_len);
 
#ifdef WE_EVENT_NETLINK
/* rtnetlink event channel */
rtmsg_iwinfo(dev, (char *) event, event_len);
#endif /* WE_EVENT_NETLINK */
 
/* Cleanup */
kfree(event);
 
return; /* Always success, I guess ;-) */
}
 
/********************** ENHANCED IWSPY SUPPORT **********************/
/*
* In the old days, the driver was handling spy support all by itself.
* Now, the driver can delegate this task to Wireless Extensions.
* It needs to use those standard spy iw_handler in struct iw_handler_def,
* push data to us via XXX and include struct iw_spy_data in its
* private part.
* One of the main advantage of centralising spy support here is that
* it becomes much easier to improve and extend it without having to touch
* the drivers. One example is the addition of the Spy-Threshold events.
* Note : IW_WIRELESS_SPY is defined in iw_handler.h
*/
 
/*------------------------------------------------------------------*/
/*
* Standard Wireless Handler : set Spy List
*/
int iw_handler_set_spy(struct net_device * dev,
struct iw_request_info * info,
union iwreq_data * wrqu,
char * extra)
{
#ifdef IW_WIRELESS_SPY
struct iw_spy_data * spydata = (dev->priv +
dev->wireless_handlers->spy_offset);
struct sockaddr * address = (struct sockaddr *) extra;
 
/* Disable spy collection while we copy the addresses.
* As we don't disable interrupts, we need to do this to avoid races.
* As we are the only writer, this is good enough. */
spydata->spy_number = 0;
 
/* Are there are addresses to copy? */
if(wrqu->data.length > 0) {
int i;
 
/* Copy addresses */
for(i = 0; i < wrqu->data.length; i++)
memcpy(spydata->spy_address[i], address[i].sa_data,
ETH_ALEN);
/* Reset stats */
memset(spydata->spy_stat, 0,
sizeof(struct iw_quality) * IW_MAX_SPY);
 
#ifdef WE_SPY_DEBUG
printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length);
for (i = 0; i < wrqu->data.length; i++)
printk(KERN_DEBUG
"%02X:%02X:%02X:%02X:%02X:%02X \n",
spydata->spy_address[i][0],
spydata->spy_address[i][1],
spydata->spy_address[i][2],
spydata->spy_address[i][3],
spydata->spy_address[i][4],
spydata->spy_address[i][5]);
#endif /* WE_SPY_DEBUG */
}
/* Enable addresses */
spydata->spy_number = wrqu->data.length;
 
return 0;
#else /* IW_WIRELESS_SPY */
return -EOPNOTSUPP;
#endif /* IW_WIRELESS_SPY */
}
 
/*------------------------------------------------------------------*/
/*
* Standard Wireless Handler : get Spy List
*/
int iw_handler_get_spy(struct net_device * dev,
struct iw_request_info * info,
union iwreq_data * wrqu,
char * extra)
{
#ifdef IW_WIRELESS_SPY
struct iw_spy_data * spydata = (dev->priv +
dev->wireless_handlers->spy_offset);
struct sockaddr * address = (struct sockaddr *) extra;
int i;
 
wrqu->data.length = spydata->spy_number;
 
/* Copy addresses. */
for(i = 0; i < spydata->spy_number; i++) {
memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
address[i].sa_family = AF_UNIX;
}
/* Copy stats to the user buffer (just after). */
if(spydata->spy_number > 0)
memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
spydata->spy_stat,
sizeof(struct iw_quality) * spydata->spy_number);
/* Reset updated flags. */
for(i = 0; i < spydata->spy_number; i++)
spydata->spy_stat[i].updated = 0;
return 0;
#else /* IW_WIRELESS_SPY */
return -EOPNOTSUPP;
#endif /* IW_WIRELESS_SPY */
}
 
/*------------------------------------------------------------------*/
/*
* Standard Wireless Handler : set spy threshold
*/
int iw_handler_set_thrspy(struct net_device * dev,
struct iw_request_info *info,
union iwreq_data * wrqu,
char * extra)
{
#ifdef IW_WIRELESS_THRSPY
struct iw_spy_data * spydata = (dev->priv +
dev->wireless_handlers->spy_offset);
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
 
/* Just do it */
memcpy(&(spydata->spy_thr_low), &(threshold->low),
2 * sizeof(struct iw_quality));
 
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
 
#ifdef WE_SPY_DEBUG
printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
#endif /* WE_SPY_DEBUG */
 
return 0;
#else /* IW_WIRELESS_THRSPY */
return -EOPNOTSUPP;
#endif /* IW_WIRELESS_THRSPY */
}
 
/*------------------------------------------------------------------*/
/*
* Standard Wireless Handler : get spy threshold
*/
int iw_handler_get_thrspy(struct net_device * dev,
struct iw_request_info *info,
union iwreq_data * wrqu,
char * extra)
{
#ifdef IW_WIRELESS_THRSPY
struct iw_spy_data * spydata = (dev->priv +
dev->wireless_handlers->spy_offset);
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
 
/* Just do it */
memcpy(&(threshold->low), &(spydata->spy_thr_low),
2 * sizeof(struct iw_quality));
 
return 0;
#else /* IW_WIRELESS_THRSPY */
return -EOPNOTSUPP;
#endif /* IW_WIRELESS_THRSPY */
}
 
#ifdef IW_WIRELESS_THRSPY
/*------------------------------------------------------------------*/
/*
* Prepare and send a Spy Threshold event
*/
static void iw_send_thrspy_event(struct net_device * dev,
struct iw_spy_data * spydata,
unsigned char * address,
struct iw_quality * wstats)
{
union iwreq_data wrqu;
struct iw_thrspy threshold;
 
/* Init */
wrqu.data.length = 1;
wrqu.data.flags = 0;
/* Copy address */
memcpy(threshold.addr.sa_data, address, ETH_ALEN);
threshold.addr.sa_family = ARPHRD_ETHER;
/* Copy stats */
memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
/* Copy also thresholds */
memcpy(&(threshold.low), &(spydata->spy_thr_low),
2 * sizeof(struct iw_quality));
 
#ifdef WE_SPY_DEBUG
printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
threshold.addr.sa_data[0],
threshold.addr.sa_data[1],
threshold.addr.sa_data[2],
threshold.addr.sa_data[3],
threshold.addr.sa_data[4],
threshold.addr.sa_data[5], threshold.qual.level);
#endif /* WE_SPY_DEBUG */
 
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
}
#endif /* IW_WIRELESS_THRSPY */
 
/* ---------------------------------------------------------------- */
/*
* Call for the driver to update the spy data.
* For now, the spy data is a simple array. As the size of the array is
* small, this is good enough. If we wanted to support larger number of
* spy addresses, we should use something more efficient...
*/
void wireless_spy_update(struct net_device * dev,
unsigned char * address,
struct iw_quality * wstats)
{
#ifdef IW_WIRELESS_SPY
struct iw_spy_data * spydata = (dev->priv +
dev->wireless_handlers->spy_offset);
int i;
int match = -1;
 
#ifdef WE_SPY_DEBUG
printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
#endif /* WE_SPY_DEBUG */
 
/* Update all records that match */
for(i = 0; i < spydata->spy_number; i++)
if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) {
memcpy(&(spydata->spy_stat[i]), wstats,
sizeof(struct iw_quality));
match = i;
}
#ifdef IW_WIRELESS_THRSPY
/* Generate an event if we cross the spy threshold.
* To avoid event storms, we have a simple hysteresis : we generate
* event only when we go under the low threshold or above the
* high threshold. */
if(match >= 0) {
if(spydata->spy_thr_under[match]) {
if(wstats->level > spydata->spy_thr_high.level) {
spydata->spy_thr_under[match] = 0;
iw_send_thrspy_event(dev, spydata,
address, wstats);
}
} else {
if(wstats->level < spydata->spy_thr_low.level) {
spydata->spy_thr_under[match] = 1;
iw_send_thrspy_event(dev, spydata,
address, wstats);
}
}
}
#endif /* IW_WIRELESS_THRSPY */
#endif /* IW_WIRELESS_SPY */
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.