URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k/trunk/linux/linux-2.4/net/core
- from Rev 1278 to Rev 1765
- ↔ Reverse comparison
Rev 1278 → Rev 1765
/sock.c
0,0 → 1,1219
/* |
* INET An implementation of the TCP/IP protocol suite for the LINUX |
* operating system. INET is implemented using the BSD Socket |
* interface as the means of communication with the user level. |
* |
* Generic socket support routines. Memory allocators, socket lock/release |
* handler for protocols to use and generic option handler. |
* |
* |
* Version: $Id: sock.c,v 1.1.1.1 2004-04-17 22:13:17 phoenix Exp $ |
* |
* Authors: Ross Biro, <bir7@leland.Stanford.Edu> |
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
* Florian La Roche, <flla@stud.uni-sb.de> |
* Alan Cox, <A.Cox@swansea.ac.uk> |
* |
* Fixes: |
* Alan Cox : Numerous verify_area() problems |
* Alan Cox : Connecting on a connecting socket |
* now returns an error for tcp. |
* Alan Cox : sock->protocol is set correctly. |
* and is not sometimes left as 0. |
* Alan Cox : connect handles icmp errors on a |
* connect properly. Unfortunately there |
* is a restart syscall nasty there. I |
* can't match BSD without hacking the C |
* library. Ideas urgently sought! |
* Alan Cox : Disallow bind() to addresses that are |
* not ours - especially broadcast ones!! |
* Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) |
* Alan Cox : sock_wfree/sock_rfree don't destroy sockets, |
* instead they leave that for the DESTROY timer. |
* Alan Cox : Clean up error flag in accept |
* Alan Cox : TCP ack handling is buggy, the DESTROY timer |
* was buggy. Put a remove_sock() in the handler |
* for memory when we hit 0. Also altered the timer |
* code. The ACK stuff can wait and needs major |
* TCP layer surgery. |
* Alan Cox : Fixed TCP ack bug, removed remove sock |
* and fixed timer/inet_bh race. |
* Alan Cox : Added zapped flag for TCP |
* Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code |
* Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb |
* Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources |
* Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. |
* Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... |
* Rick Sladkey : Relaxed UDP rules for matching packets. |
* C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support |
* Pauline Middelink : identd support |
* Alan Cox : Fixed connect() taking signals I think. |
* Alan Cox : SO_LINGER supported |
* Alan Cox : Error reporting fixes |
* Anonymous : inet_create tidied up (sk->reuse setting) |
* Alan Cox : inet sockets don't set sk->type! |
* Alan Cox : Split socket option code |
* Alan Cox : Callbacks |
* Alan Cox : Nagle flag for Charles & Johannes stuff |
* Alex : Removed restriction on inet fioctl |
* Alan Cox : Splitting INET from NET core |
* Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() |
* Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code |
* Alan Cox : Split IP from generic code |
* Alan Cox : New kfree_skbmem() |
* Alan Cox : Make SO_DEBUG superuser only. |
* Alan Cox : Allow anyone to clear SO_DEBUG |
* (compatibility fix) |
* Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. |
* Alan Cox : Allocator for a socket is settable. |
* Alan Cox : SO_ERROR includes soft errors. |
* Alan Cox : Allow NULL arguments on some SO_ opts |
* Alan Cox : Generic socket allocation to make hooks |
* easier (suggested by Craig Metz). |
* Michael Pall : SO_ERROR returns positive errno again |
* Steve Whitehouse: Added default destructor to free |
* protocol private data. |
* Steve Whitehouse: Added various other default routines |
* common to several socket families. |
* Chris Evans : Call suser() check last on F_SETOWN |
* Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. |
* Andi Kleen : Add sock_kmalloc()/sock_kfree_s() |
* Andi Kleen : Fix write_space callback |
* Chris Evans : Security fixes - signedness again |
* Arnaldo C. Melo : cleanups, use skb_queue_purge |
* |
* To Fix: |
* |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
*/ |
|
#include <linux/config.h> |
#include <linux/errno.h> |
#include <linux/types.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/kernel.h> |
#include <linux/major.h> |
#include <linux/sched.h> |
#include <linux/timer.h> |
#include <linux/string.h> |
#include <linux/sockios.h> |
#include <linux/net.h> |
#include <linux/fcntl.h> |
#include <linux/mm.h> |
#include <linux/slab.h> |
#include <linux/interrupt.h> |
#include <linux/poll.h> |
#include <linux/tcp.h> |
#include <linux/init.h> |
|
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/netdevice.h> |
#include <net/protocol.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <linux/ipsec.h> |
|
#ifdef CONFIG_FILTER |
#include <linux/filter.h> |
#endif |
|
#ifdef CONFIG_INET |
#include <net/tcp.h> |
#endif |
|
/* Take into consideration the size of the struct sk_buff overhead in the |
* determination of these values, since that is non-constant across |
* platforms. This makes socket queueing behavior and performance |
* not depend upon such differences. |
*/ |
#define _SK_MEM_PACKETS 256 |
#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) |
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
|
/* Run time adjustable parameters. */ |
__u32 sysctl_wmem_max = SK_WMEM_MAX; |
__u32 sysctl_rmem_max = SK_RMEM_MAX; |
__u32 sysctl_wmem_default = SK_WMEM_MAX; |
__u32 sysctl_rmem_default = SK_RMEM_MAX; |
|
/* Maximal space eaten by iovec or ancilliary data plus some space */ |
int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); |
|
static int sock_set_timeout(long *timeo_p, char *optval, int optlen) |
{ |
struct timeval tv; |
|
if (optlen < sizeof(tv)) |
return -EINVAL; |
if (copy_from_user(&tv, optval, sizeof(tv))) |
return -EFAULT; |
|
*timeo_p = MAX_SCHEDULE_TIMEOUT; |
if (tv.tv_sec == 0 && tv.tv_usec == 0) |
return 0; |
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) |
*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); |
return 0; |
} |
|
/* |
* This is meant for all protocols to use and covers goings on |
* at the socket level. Everything here is generic. |
*/ |
|
int sock_setsockopt(struct socket *sock, int level, int optname, |
char *optval, int optlen) |
{ |
struct sock *sk=sock->sk; |
#ifdef CONFIG_FILTER |
struct sk_filter *filter; |
#endif |
int val; |
int valbool; |
struct linger ling; |
int ret = 0; |
|
/* |
* Options without arguments |
*/ |
|
#ifdef SO_DONTLINGER /* Compatibility item... */ |
switch(optname) |
{ |
case SO_DONTLINGER: |
sk->linger=0; |
return 0; |
} |
#endif |
|
if(optlen<sizeof(int)) |
return(-EINVAL); |
|
if (get_user(val, (int *)optval)) |
return -EFAULT; |
|
valbool = val?1:0; |
|
lock_sock(sk); |
|
switch(optname) |
{ |
case SO_DEBUG: |
if(val && !capable(CAP_NET_ADMIN)) |
{ |
ret = -EACCES; |
} |
else |
sk->debug=valbool; |
break; |
case SO_REUSEADDR: |
sk->reuse = valbool; |
break; |
case SO_TYPE: |
case SO_ERROR: |
ret = -ENOPROTOOPT; |
break; |
case SO_DONTROUTE: |
sk->localroute=valbool; |
break; |
case SO_BROADCAST: |
sk->broadcast=valbool; |
break; |
case SO_SNDBUF: |
/* Don't error on this BSD doesn't and if you think |
about it this is right. Otherwise apps have to |
play 'guess the biggest size' games. RCVBUF/SNDBUF |
are treated in BSD as hints */ |
|
if (val > sysctl_wmem_max) |
val = sysctl_wmem_max; |
|
sk->userlocks |= SOCK_SNDBUF_LOCK; |
if ((val * 2) < SOCK_MIN_SNDBUF) |
sk->sndbuf = SOCK_MIN_SNDBUF; |
else |
sk->sndbuf = (val * 2); |
|
/* |
* Wake up sending tasks if we |
* upped the value. |
*/ |
sk->write_space(sk); |
break; |
|
case SO_RCVBUF: |
/* Don't error on this BSD doesn't and if you think |
about it this is right. Otherwise apps have to |
play 'guess the biggest size' games. RCVBUF/SNDBUF |
are treated in BSD as hints */ |
|
if (val > sysctl_rmem_max) |
val = sysctl_rmem_max; |
|
sk->userlocks |= SOCK_RCVBUF_LOCK; |
/* FIXME: is this lower bound the right one? */ |
if ((val * 2) < SOCK_MIN_RCVBUF) |
sk->rcvbuf = SOCK_MIN_RCVBUF; |
else |
sk->rcvbuf = (val * 2); |
break; |
|
case SO_KEEPALIVE: |
#ifdef CONFIG_INET |
if (sk->protocol == IPPROTO_TCP) |
{ |
tcp_set_keepalive(sk, valbool); |
} |
#endif |
sk->keepopen = valbool; |
break; |
|
case SO_OOBINLINE: |
sk->urginline = valbool; |
break; |
|
case SO_NO_CHECK: |
sk->no_check = valbool; |
break; |
|
case SO_PRIORITY: |
if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) |
sk->priority = val; |
else |
ret = -EPERM; |
break; |
|
case SO_LINGER: |
if(optlen<sizeof(ling)) { |
ret = -EINVAL; /* 1003.1g */ |
break; |
} |
if (copy_from_user(&ling,optval,sizeof(ling))) { |
ret = -EFAULT; |
break; |
} |
if(ling.l_onoff==0) { |
sk->linger=0; |
} else { |
#if (BITS_PER_LONG == 32) |
if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) |
sk->lingertime=MAX_SCHEDULE_TIMEOUT; |
else |
#endif |
sk->lingertime=ling.l_linger*HZ; |
sk->linger=1; |
} |
break; |
|
case SO_BSDCOMPAT: |
sk->bsdism = valbool; |
break; |
|
case SO_PASSCRED: |
sock->passcred = valbool; |
break; |
|
case SO_TIMESTAMP: |
sk->rcvtstamp = valbool; |
break; |
|
case SO_RCVLOWAT: |
if (val < 0) |
val = INT_MAX; |
sk->rcvlowat = val ? : 1; |
break; |
|
case SO_RCVTIMEO: |
ret = sock_set_timeout(&sk->rcvtimeo, optval, optlen); |
break; |
|
case SO_SNDTIMEO: |
ret = sock_set_timeout(&sk->sndtimeo, optval, optlen); |
break; |
|
#ifdef CONFIG_NETDEVICES |
case SO_BINDTODEVICE: |
{ |
char devname[IFNAMSIZ]; |
|
/* Sorry... */ |
if (!capable(CAP_NET_RAW)) { |
ret = -EPERM; |
break; |
} |
|
/* Bind this socket to a particular device like "eth0", |
* as specified in the passed interface name. If the |
* name is "" or the option length is zero the socket |
* is not bound. |
*/ |
|
if (!valbool) { |
sk->bound_dev_if = 0; |
} else { |
if (optlen > IFNAMSIZ) |
optlen = IFNAMSIZ; |
if (copy_from_user(devname, optval, optlen)) { |
ret = -EFAULT; |
break; |
} |
|
/* Remove any cached route for this socket. */ |
sk_dst_reset(sk); |
|
if (devname[0] == '\0') { |
sk->bound_dev_if = 0; |
} else { |
struct net_device *dev = dev_get_by_name(devname); |
if (!dev) { |
ret = -ENODEV; |
break; |
} |
sk->bound_dev_if = dev->ifindex; |
dev_put(dev); |
} |
} |
break; |
} |
#endif |
|
|
#ifdef CONFIG_FILTER |
case SO_ATTACH_FILTER: |
ret = -EINVAL; |
if (optlen == sizeof(struct sock_fprog)) { |
struct sock_fprog fprog; |
|
ret = -EFAULT; |
if (copy_from_user(&fprog, optval, sizeof(fprog))) |
break; |
|
ret = sk_attach_filter(&fprog, sk); |
} |
break; |
|
case SO_DETACH_FILTER: |
spin_lock_bh(&sk->lock.slock); |
filter = sk->filter; |
if (filter) { |
sk->filter = NULL; |
spin_unlock_bh(&sk->lock.slock); |
sk_filter_release(sk, filter); |
break; |
} |
spin_unlock_bh(&sk->lock.slock); |
ret = -ENONET; |
break; |
#endif |
/* We implement the SO_SNDLOWAT etc to |
not be settable (1003.1g 5.3) */ |
default: |
ret = -ENOPROTOOPT; |
break; |
} |
release_sock(sk); |
return ret; |
} |
|
|
int sock_getsockopt(struct socket *sock, int level, int optname, |
char *optval, int *optlen) |
{ |
struct sock *sk = sock->sk; |
|
union |
{ |
int val; |
struct linger ling; |
struct timeval tm; |
} v; |
|
unsigned int lv=sizeof(int),len; |
|
if(get_user(len,optlen)) |
return -EFAULT; |
if(len < 0) |
return -EINVAL; |
|
switch(optname) |
{ |
case SO_DEBUG: |
v.val = sk->debug; |
break; |
|
case SO_DONTROUTE: |
v.val = sk->localroute; |
break; |
|
case SO_BROADCAST: |
v.val= sk->broadcast; |
break; |
|
case SO_SNDBUF: |
v.val=sk->sndbuf; |
break; |
|
case SO_RCVBUF: |
v.val =sk->rcvbuf; |
break; |
|
case SO_REUSEADDR: |
v.val = sk->reuse; |
break; |
|
case SO_KEEPALIVE: |
v.val = sk->keepopen; |
break; |
|
case SO_TYPE: |
v.val = sk->type; |
break; |
|
case SO_ERROR: |
v.val = -sock_error(sk); |
if(v.val==0) |
v.val=xchg(&sk->err_soft,0); |
break; |
|
case SO_OOBINLINE: |
v.val = sk->urginline; |
break; |
|
case SO_NO_CHECK: |
v.val = sk->no_check; |
break; |
|
case SO_PRIORITY: |
v.val = sk->priority; |
break; |
|
case SO_LINGER: |
lv=sizeof(v.ling); |
v.ling.l_onoff=sk->linger; |
v.ling.l_linger=sk->lingertime/HZ; |
break; |
|
case SO_BSDCOMPAT: |
v.val = sk->bsdism; |
break; |
|
case SO_TIMESTAMP: |
v.val = sk->rcvtstamp; |
break; |
|
case SO_RCVTIMEO: |
lv=sizeof(struct timeval); |
if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) { |
v.tm.tv_sec = 0; |
v.tm.tv_usec = 0; |
} else { |
v.tm.tv_sec = sk->rcvtimeo/HZ; |
v.tm.tv_usec = ((sk->rcvtimeo%HZ)*1000)/HZ; |
} |
break; |
|
case SO_SNDTIMEO: |
lv=sizeof(struct timeval); |
if (sk->sndtimeo == MAX_SCHEDULE_TIMEOUT) { |
v.tm.tv_sec = 0; |
v.tm.tv_usec = 0; |
} else { |
v.tm.tv_sec = sk->sndtimeo/HZ; |
v.tm.tv_usec = ((sk->sndtimeo%HZ)*1000)/HZ; |
} |
break; |
|
case SO_RCVLOWAT: |
v.val = sk->rcvlowat; |
break; |
|
case SO_SNDLOWAT: |
v.val=1; |
break; |
|
case SO_PASSCRED: |
v.val = sock->passcred; |
break; |
|
case SO_PEERCRED: |
if (len > sizeof(sk->peercred)) |
len = sizeof(sk->peercred); |
if (copy_to_user(optval, &sk->peercred, len)) |
return -EFAULT; |
goto lenout; |
|
case SO_PEERNAME: |
{ |
char address[128]; |
|
if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) |
return -ENOTCONN; |
if (lv < len) |
return -EINVAL; |
if(copy_to_user((void*)optval, address, len)) |
return -EFAULT; |
goto lenout; |
} |
|
/* Dubious BSD thing... Probably nobody even uses it, but |
* the UNIX standard wants it for whatever reason... -DaveM |
*/ |
case SO_ACCEPTCONN: |
v.val = (sk->state == TCP_LISTEN); |
break; |
|
default: |
return(-ENOPROTOOPT); |
} |
if (len > lv) |
len = lv; |
if (copy_to_user(optval, &v, len)) |
return -EFAULT; |
lenout: |
if (put_user(len, optlen)) |
return -EFAULT; |
return 0; |
} |
|
static kmem_cache_t *sk_cachep; |
|
/* |
* All socket objects are allocated here. This is for future |
* usage. |
*/ |
|
struct sock *sk_alloc(int family, int priority, int zero_it) |
{ |
struct sock *sk = kmem_cache_alloc(sk_cachep, priority); |
|
if(sk && zero_it) { |
memset(sk, 0, sizeof(struct sock)); |
sk->family = family; |
sock_lock_init(sk); |
} |
|
return sk; |
} |
|
void sk_free(struct sock *sk) |
{ |
#ifdef CONFIG_FILTER |
struct sk_filter *filter; |
#endif |
|
if (sk->destruct) |
sk->destruct(sk); |
|
#ifdef CONFIG_FILTER |
filter = sk->filter; |
if (filter) { |
sk_filter_release(sk, filter); |
sk->filter = NULL; |
} |
#endif |
|
if (atomic_read(&sk->omem_alloc)) |
printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc)); |
|
kmem_cache_free(sk_cachep, sk); |
} |
|
void __init sk_init(void) |
{ |
sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0, |
SLAB_HWCACHE_ALIGN, 0, 0); |
if (!sk_cachep) |
printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!"); |
|
if (num_physpages <= 4096) { |
sysctl_wmem_max = 32767; |
sysctl_rmem_max = 32767; |
sysctl_wmem_default = 32767; |
sysctl_rmem_default = 32767; |
} else if (num_physpages >= 131072) { |
sysctl_wmem_max = 131071; |
sysctl_rmem_max = 131071; |
} |
} |
|
/* |
* Simple resource managers for sockets. |
*/ |
|
|
/* |
* Write buffer destructor automatically called from kfree_skb. |
*/ |
void sock_wfree(struct sk_buff *skb) |
{ |
struct sock *sk = skb->sk; |
|
/* In case it might be waiting for more memory. */ |
atomic_sub(skb->truesize, &sk->wmem_alloc); |
if (!sk->use_write_queue) |
sk->write_space(sk); |
sock_put(sk); |
} |
|
/* |
* Read buffer destructor automatically called from kfree_skb. |
*/ |
void sock_rfree(struct sk_buff *skb) |
{ |
struct sock *sk = skb->sk; |
|
atomic_sub(skb->truesize, &sk->rmem_alloc); |
} |
|
/* |
* Allocate a skb from the socket's send buffer. |
*/ |
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) |
{ |
if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) { |
struct sk_buff * skb = alloc_skb(size, priority); |
if (skb) { |
skb_set_owner_w(skb, sk); |
return skb; |
} |
} |
return NULL; |
} |
|
/* |
* Allocate a skb from the socket's receive buffer. |
*/ |
struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) |
{ |
if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) { |
struct sk_buff *skb = alloc_skb(size, priority); |
if (skb) { |
skb_set_owner_r(skb, sk); |
return skb; |
} |
} |
return NULL; |
} |
|
/* |
* Allocate a memory block from the socket's option memory buffer. |
*/ |
void *sock_kmalloc(struct sock *sk, int size, int priority) |
{ |
if ((unsigned)size <= sysctl_optmem_max && |
atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) { |
void *mem; |
/* First do the add, to avoid the race if kmalloc |
* might sleep. |
*/ |
atomic_add(size, &sk->omem_alloc); |
mem = kmalloc(size, priority); |
if (mem) |
return mem; |
atomic_sub(size, &sk->omem_alloc); |
} |
return NULL; |
} |
|
/* |
* Free an option memory block. |
*/ |
void sock_kfree_s(struct sock *sk, void *mem, int size) |
{ |
kfree(mem); |
atomic_sub(size, &sk->omem_alloc); |
} |
|
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. |
I think, these locks should be removed for datagram sockets. |
*/ |
static long sock_wait_for_wmem(struct sock * sk, long timeo) |
{ |
DECLARE_WAITQUEUE(wait, current); |
|
clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); |
add_wait_queue(sk->sleep, &wait); |
for (;;) { |
if (!timeo) |
break; |
if (signal_pending(current)) |
break; |
set_bit(SOCK_NOSPACE, &sk->socket->flags); |
set_current_state(TASK_INTERRUPTIBLE); |
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) |
break; |
if (sk->shutdown & SEND_SHUTDOWN) |
break; |
if (sk->err) |
break; |
timeo = schedule_timeout(timeo); |
} |
__set_current_state(TASK_RUNNING); |
remove_wait_queue(sk->sleep, &wait); |
return timeo; |
} |
|
|
/* |
* Generic send/receive buffer handlers |
*/ |
|
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, |
unsigned long data_len, int noblock, int *errcode) |
{ |
struct sk_buff *skb; |
long timeo; |
int err; |
|
timeo = sock_sndtimeo(sk, noblock); |
while (1) { |
err = sock_error(sk); |
if (err != 0) |
goto failure; |
|
err = -EPIPE; |
if (sk->shutdown & SEND_SHUTDOWN) |
goto failure; |
|
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) { |
skb = alloc_skb(header_len, sk->allocation); |
if (skb) { |
int npages; |
int i; |
|
/* No pages, we're done... */ |
if (!data_len) |
break; |
|
npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; |
skb->truesize += data_len; |
skb_shinfo(skb)->nr_frags = npages; |
for (i = 0; i < npages; i++) { |
struct page *page; |
skb_frag_t *frag; |
|
page = alloc_pages(sk->allocation, 0); |
if (!page) { |
err = -ENOBUFS; |
skb_shinfo(skb)->nr_frags = i; |
kfree_skb(skb); |
goto failure; |
} |
|
frag = &skb_shinfo(skb)->frags[i]; |
frag->page = page; |
frag->page_offset = 0; |
frag->size = (data_len >= PAGE_SIZE ? |
PAGE_SIZE : |
data_len); |
data_len -= PAGE_SIZE; |
} |
|
/* Full success... */ |
break; |
} |
err = -ENOBUFS; |
goto failure; |
} |
set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); |
set_bit(SOCK_NOSPACE, &sk->socket->flags); |
err = -EAGAIN; |
if (!timeo) |
goto failure; |
if (signal_pending(current)) |
goto interrupted; |
timeo = sock_wait_for_wmem(sk, timeo); |
} |
|
skb_set_owner_w(skb, sk); |
return skb; |
|
interrupted: |
err = sock_intr_errno(timeo); |
failure: |
*errcode = err; |
return NULL; |
} |
|
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, |
int noblock, int *errcode) |
{ |
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); |
} |
|
void __lock_sock(struct sock *sk) |
{ |
DECLARE_WAITQUEUE(wait, current); |
|
add_wait_queue_exclusive(&sk->lock.wq, &wait); |
for(;;) { |
current->state = TASK_UNINTERRUPTIBLE; |
spin_unlock_bh(&sk->lock.slock); |
schedule(); |
spin_lock_bh(&sk->lock.slock); |
if(!sk->lock.users) |
break; |
} |
current->state = TASK_RUNNING; |
remove_wait_queue(&sk->lock.wq, &wait); |
} |
|
void __release_sock(struct sock *sk) |
{ |
struct sk_buff *skb = sk->backlog.head; |
|
do { |
sk->backlog.head = sk->backlog.tail = NULL; |
bh_unlock_sock(sk); |
|
do { |
struct sk_buff *next = skb->next; |
|
skb->next = NULL; |
sk->backlog_rcv(sk, skb); |
skb = next; |
} while (skb != NULL); |
|
bh_lock_sock(sk); |
} while((skb = sk->backlog.head) != NULL); |
} |
|
/* |
* Generic socket manager library. Most simpler socket families |
* use this to manage their socket lists. At some point we should |
* hash these. By making this generic we get the lot hashed for free. |
* |
* It is broken by design. All the protocols using it must be fixed. --ANK |
*/ |
|
rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED; |
|
void sklist_remove_socket(struct sock **list, struct sock *sk) |
{ |
struct sock *s; |
|
write_lock_bh(&net_big_sklist_lock); |
|
while ((s = *list) != NULL) { |
if (s == sk) { |
*list = s->next; |
break; |
} |
list = &s->next; |
} |
|
write_unlock_bh(&net_big_sklist_lock); |
if (s) |
sock_put(s); |
} |
|
void sklist_insert_socket(struct sock **list, struct sock *sk) |
{ |
write_lock_bh(&net_big_sklist_lock); |
sk->next= *list; |
*list=sk; |
sock_hold(sk); |
write_unlock_bh(&net_big_sklist_lock); |
} |
|
/* |
* This is only called from user mode. Thus it protects itself against |
* interrupt users but doesn't worry about being called during work. |
* Once it is removed from the queue no interrupt or bottom half will |
* touch it and we are (fairly 8-) ) safe. |
*/ |
|
void sklist_destroy_socket(struct sock **list, struct sock *sk); |
|
/* |
* Handler for deferred kills. |
*/ |
|
static void sklist_destroy_timer(unsigned long data) |
{ |
struct sock *sk=(struct sock *)data; |
sklist_destroy_socket(NULL,sk); |
} |
|
/* |
* Destroy a socket. We pass NULL for a list if we know the |
* socket is not on a list. |
*/ |
|
void sklist_destroy_socket(struct sock **list,struct sock *sk) |
{ |
if(list) |
sklist_remove_socket(list, sk); |
|
skb_queue_purge(&sk->receive_queue); |
|
if(atomic_read(&sk->wmem_alloc) == 0 && |
atomic_read(&sk->rmem_alloc) == 0 && |
sk->dead) |
{ |
sock_put(sk); |
} |
else |
{ |
/* |
* Someone is using our buffers still.. defer |
*/ |
init_timer(&sk->timer); |
sk->timer.expires=jiffies+SOCK_DESTROY_TIME; |
sk->timer.function=sklist_destroy_timer; |
sk->timer.data = (unsigned long)sk; |
add_timer(&sk->timer); |
} |
} |
|
/* |
* Set of default routines for initialising struct proto_ops when |
* the protocol does not support a particular function. In certain |
* cases where it makes no sense for a protocol to have a "do nothing" |
* function, some default processing is provided. |
*/ |
|
int sock_no_release(struct socket *sock) |
{ |
return 0; |
} |
|
int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_connect(struct socket *sock, struct sockaddr *saddr, |
int len, int flags) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_socketpair(struct socket *sock1, struct socket *sock2) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_getname(struct socket *sock, struct sockaddr *saddr, |
int *len, int peer) |
{ |
return -EOPNOTSUPP; |
} |
|
unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) |
{ |
return 0; |
} |
|
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_listen(struct socket *sock, int backlog) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_shutdown(struct socket *sock, int how) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_setsockopt(struct socket *sock, int level, int optname, |
char *optval, int optlen) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_getsockopt(struct socket *sock, int level, int optname, |
char *optval, int *optlen) |
{ |
return -EOPNOTSUPP; |
} |
|
/* |
* Note: if you add something that sleeps here then change sock_fcntl() |
* to do proper fd locking. |
*/ |
int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) |
{ |
struct sock *sk = sock->sk; |
|
switch(cmd) |
{ |
case F_SETOWN: |
/* |
* This is a little restrictive, but it's the only |
* way to make sure that you can't send a sigurg to |
* another process. |
*/ |
if (current->pgrp != -arg && |
current->pid != arg && |
!capable(CAP_KILL)) return(-EPERM); |
sk->proc = arg; |
return(0); |
case F_GETOWN: |
return(sk->proc); |
default: |
return(-EINVAL); |
} |
} |
|
int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags, |
struct scm_cookie *scm) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int len, int flags, |
struct scm_cookie *scm) |
{ |
return -EOPNOTSUPP; |
} |
|
int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) |
{ |
/* Mirror missing mmap method error code */ |
return -ENODEV; |
} |
|
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) |
{ |
ssize_t res; |
struct msghdr msg; |
struct iovec iov; |
mm_segment_t old_fs; |
char *kaddr; |
|
kaddr = kmap(page); |
|
msg.msg_name = NULL; |
msg.msg_namelen = 0; |
msg.msg_iov = &iov; |
msg.msg_iovlen = 1; |
msg.msg_control = NULL; |
msg.msg_controllen = 0; |
msg.msg_flags = flags; |
|
iov.iov_base = kaddr + offset; |
iov.iov_len = size; |
|
old_fs = get_fs(); |
set_fs(KERNEL_DS); |
res = sock_sendmsg(sock, &msg, size); |
set_fs(old_fs); |
|
kunmap(page); |
return res; |
} |
|
/* |
* Default Socket Callbacks |
*/ |
|
void sock_def_wakeup(struct sock *sk) |
{ |
read_lock(&sk->callback_lock); |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible_all(sk->sleep); |
read_unlock(&sk->callback_lock); |
} |
|
void sock_def_error_report(struct sock *sk) |
{ |
read_lock(&sk->callback_lock); |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
sk_wake_async(sk,0,POLL_ERR); |
read_unlock(&sk->callback_lock); |
} |
|
void sock_def_readable(struct sock *sk, int len) |
{ |
read_lock(&sk->callback_lock); |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
sk_wake_async(sk,1,POLL_IN); |
read_unlock(&sk->callback_lock); |
} |
|
void sock_def_write_space(struct sock *sk) |
{ |
read_lock(&sk->callback_lock); |
|
/* Do not wake up a writer until he can make "significant" |
* progress. --DaveM |
*/ |
if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) { |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
|
/* Should agree with poll, otherwise some programs break */ |
if (sock_writeable(sk)) |
sk_wake_async(sk, 2, POLL_OUT); |
} |
|
read_unlock(&sk->callback_lock); |
} |
|
void sock_def_destruct(struct sock *sk) |
{ |
if (sk->protinfo.destruct_hook) |
kfree(sk->protinfo.destruct_hook); |
} |
|
void sock_init_data(struct socket *sock, struct sock *sk) |
{ |
skb_queue_head_init(&sk->receive_queue); |
skb_queue_head_init(&sk->write_queue); |
skb_queue_head_init(&sk->error_queue); |
|
init_timer(&sk->timer); |
|
sk->allocation = GFP_KERNEL; |
sk->rcvbuf = sysctl_rmem_default; |
sk->sndbuf = sysctl_wmem_default; |
sk->state = TCP_CLOSE; |
sk->zapped = 1; |
sk->socket = sock; |
|
if(sock) |
{ |
sk->type = sock->type; |
sk->sleep = &sock->wait; |
sock->sk = sk; |
} else |
sk->sleep = NULL; |
|
sk->dst_lock = RW_LOCK_UNLOCKED; |
sk->callback_lock = RW_LOCK_UNLOCKED; |
|
sk->state_change = sock_def_wakeup; |
sk->data_ready = sock_def_readable; |
sk->write_space = sock_def_write_space; |
sk->error_report = sock_def_error_report; |
sk->destruct = sock_def_destruct; |
|
sk->peercred.pid = 0; |
sk->peercred.uid = -1; |
sk->peercred.gid = -1; |
sk->rcvlowat = 1; |
sk->rcvtimeo = MAX_SCHEDULE_TIMEOUT; |
sk->sndtimeo = MAX_SCHEDULE_TIMEOUT; |
|
atomic_set(&sk->refcnt, 1); |
} |
/sysctl_net_core.c
0,0 → 1,98
/* -*- linux-c -*- |
* sysctl_net_core.c: sysctl interface to net core subsystem. |
* |
* Begun April 1, 1996, Mike Shaver. |
* Added /proc/sys/net/core directory entry (empty =) ). [MS] |
*/ |
|
#include <linux/mm.h> |
#include <linux/sysctl.h> |
#include <linux/config.h> |
|
#ifdef CONFIG_SYSCTL |
|
extern int netdev_max_backlog; |
extern int weight_p; |
extern int no_cong_thresh; |
extern int no_cong; |
extern int lo_cong; |
extern int mod_cong; |
extern int netdev_fastroute; |
extern int net_msg_cost; |
extern int net_msg_burst; |
|
extern __u32 sysctl_wmem_max; |
extern __u32 sysctl_rmem_max; |
extern __u32 sysctl_wmem_default; |
extern __u32 sysctl_rmem_default; |
|
extern int sysctl_core_destroy_delay; |
extern int sysctl_optmem_max; |
extern int sysctl_somaxconn; |
extern int sysctl_hot_list_len; |
|
#ifdef CONFIG_NET_DIVERT |
extern char sysctl_divert_version[]; |
#endif /* CONFIG_NET_DIVERT */ |
|
ctl_table core_table[] = { |
#ifdef CONFIG_NET |
{NET_CORE_WMEM_MAX, "wmem_max", |
&sysctl_wmem_max, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_RMEM_MAX, "rmem_max", |
&sysctl_rmem_max, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_WMEM_DEFAULT, "wmem_default", |
&sysctl_wmem_default, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_RMEM_DEFAULT, "rmem_default", |
&sysctl_rmem_default, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_DEV_WEIGHT, "dev_weight", |
&weight_p, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_MAX_BACKLOG, "netdev_max_backlog", |
&netdev_max_backlog, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_NO_CONG_THRESH, "no_cong_thresh", |
&no_cong_thresh, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_NO_CONG, "no_cong", |
&no_cong, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_LO_CONG, "lo_cong", |
&lo_cong, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_MOD_CONG, "mod_cong", |
&mod_cong, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
#ifdef CONFIG_NET_FASTROUTE |
{NET_CORE_FASTROUTE, "netdev_fastroute", |
&netdev_fastroute, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
#endif |
{NET_CORE_MSG_COST, "message_cost", |
&net_msg_cost, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_CORE_MSG_BURST, "message_burst", |
&net_msg_burst, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_CORE_OPTMEM_MAX, "optmem_max", |
&sysctl_optmem_max, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_CORE_HOT_LIST_LENGTH, "hot_list_length", |
&sysctl_hot_list_len, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
#ifdef CONFIG_NET_DIVERT |
{NET_CORE_DIVERT_VERSION, "divert_version", |
(void *)sysctl_divert_version, 32, 0444, NULL, |
&proc_dostring}, |
#endif /* CONFIG_NET_DIVERT */ |
{NET_CORE_SOMAXCONN, "somaxconn", |
&sysctl_somaxconn, sizeof(int), 0644, NULL, |
&proc_dointvec }, |
#endif /* CONFIG_NET */ |
{ 0 } |
}; |
#endif |
/utils.c
0,0 → 1,73
/* |
* Generic address resultion entity |
* |
* Authors: |
* net_random Alan Cox |
* net_ratelimit Andy Kleen |
* |
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
*/ |
|
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/string.h> |
#include <linux/mm.h> |
|
static unsigned long net_rand_seed = 152L; |
|
unsigned long net_random(void) |
{ |
net_rand_seed=net_rand_seed*69069L+1; |
return net_rand_seed^jiffies; |
} |
|
void net_srandom(unsigned long entropy) |
{ |
net_rand_seed ^= entropy; |
net_random(); |
} |
|
int net_msg_cost = 5*HZ; |
int net_msg_burst = 10*5*HZ; |
|
/* |
* This enforces a rate limit: not more than one kernel message |
* every 5secs to make a denial-of-service attack impossible. |
* |
* All warning printk()s should be guarded by this function. |
*/ |
int net_ratelimit(void) |
{ |
static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; |
static unsigned long toks = 10*5*HZ; |
static unsigned long last_msg; |
static int missed; |
unsigned long flags; |
unsigned long now = jiffies; |
|
spin_lock_irqsave(&ratelimit_lock, flags); |
toks += now - last_msg; |
last_msg = now; |
if (toks > net_msg_burst) |
toks = net_msg_burst; |
if (toks >= net_msg_cost) { |
int lost = missed; |
missed = 0; |
toks -= net_msg_cost; |
spin_unlock_irqrestore(&ratelimit_lock, flags); |
if (lost) |
printk(KERN_WARNING "NET: %d messages suppressed.\n", lost); |
return 1; |
} |
missed++; |
spin_unlock_irqrestore(&ratelimit_lock, flags); |
return 0; |
} |
/profile.c
0,0 → 1,293
#include <linux/config.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/interrupt.h> |
#include <linux/netdevice.h> |
#include <linux/string.h> |
#include <linux/skbuff.h> |
#include <linux/proc_fs.h> |
#include <linux/init.h> |
#include <linux/ip.h> |
#include <linux/inet.h> |
#include <net/checksum.h> |
|
#include <asm/processor.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <net/profile.h> |
|
#ifdef CONFIG_NET_PROFILE |
|
atomic_t net_profile_active; |
struct timeval net_profile_adjust; |
|
NET_PROFILE_DEFINE(total); |
|
struct net_profile_slot *net_profile_chain = &net_prof_total; |
|
#ifdef __alpha__ |
__u32 alpha_lo; |
long alpha_hi; |
|
static void alpha_tick(unsigned long); |
|
static struct timer_list alpha_timer = |
{ NULL, NULL, 0, 0L, alpha_tick }; |
|
void alpha_tick(unsigned long dummy) |
{ |
struct timeval dummy_stamp; |
net_profile_stamp(&dummy_stamp); |
alpha_timer.expires = jiffies + 4*HZ; |
add_timer(&alpha_timer); |
} |
|
#endif |
|
void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved) |
{ |
struct net_profile_slot *s; |
|
net_profile_sub(entered, leaved); |
for (s = net_profile_chain; s; s = s->next) { |
if (s->active) |
net_profile_add(leaved, &s->irq); |
} |
} |
|
|
#ifdef CONFIG_PROC_FS |
static int profile_read_proc(char *buffer, char **start, off_t offset, |
int length, int *eof, void *data) |
{ |
off_t pos=0; |
off_t begin=0; |
int len=0; |
struct net_profile_slot *s; |
|
len+= sprintf(buffer, "Slot Hits Hi Lo OnIrqHi OnIrqLo Ufl\n"); |
|
if (offset == 0) { |
cli(); |
net_prof_total.active = 1; |
atomic_inc(&net_profile_active); |
NET_PROFILE_LEAVE(total); |
sti(); |
} |
for (s = net_profile_chain; s; s = s->next) { |
struct net_profile_slot tmp; |
|
cli(); |
tmp = *s; |
|
/* Wrong, but pretty close to truth */ |
|
s->accumulator.tv_sec = 0; |
s->accumulator.tv_usec = 0; |
s->irq.tv_sec = 0; |
s->irq.tv_usec = 0; |
s->hits = 0; |
s->underflow = 0; |
/* Repair active count, it is possible, only if code has a bug */ |
if (s->active) { |
s->active = 0; |
atomic_dec(&net_profile_active); |
} |
sti(); |
|
net_profile_sub(&tmp.irq, &tmp.accumulator); |
|
len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d", |
tmp.id, |
tmp.hits, |
tmp.accumulator.tv_sec, |
tmp.accumulator.tv_usec, |
tmp.irq.tv_sec, |
tmp.irq.tv_usec, |
tmp.underflow, tmp.active); |
|
buffer[len++]='\n'; |
|
pos=begin+len; |
if(pos<offset) { |
len=0; |
begin=pos; |
} |
if(pos>offset+length) |
goto done; |
} |
*eof = 1; |
|
done: |
*start=buffer+(offset-begin); |
len-=(offset-begin); |
if(len>length) |
len=length; |
if (len < 0) |
len = 0; |
if (offset == 0) { |
cli(); |
net_prof_total.active = 0; |
net_prof_total.hits = 0; |
net_profile_stamp(&net_prof_total.entered); |
sti(); |
} |
return len; |
} |
#endif |
|
struct iphdr whitehole_iph; |
int whitehole_count; |
|
static int whitehole_xmit(struct sk_buff *skb, struct net_device *dev) |
{ |
struct net_device_stats *stats; |
|
stats = (struct net_device_stats *)dev->priv; |
stats->tx_packets++; |
stats->tx_bytes+=skb->len; |
|
dev_kfree_skb(skb); |
return 0; |
} |
|
static void whitehole_inject(unsigned long); |
int whitehole_init(struct net_device *dev); |
|
static struct timer_list whitehole_timer = |
{ NULL, NULL, 0, 0L, whitehole_inject }; |
|
static struct net_device whitehole_dev = { |
"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, }; |
|
static int whitehole_open(struct net_device *dev) |
{ |
whitehole_count = 100000; |
whitehole_timer.expires = jiffies + 5*HZ; |
add_timer(&whitehole_timer); |
return 0; |
} |
|
static int whitehole_close(struct net_device *dev) |
{ |
del_timer(&whitehole_timer); |
return 0; |
} |
|
static void whitehole_inject(unsigned long dummy) |
{ |
struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv; |
extern int netdev_dropping; |
|
do { |
struct iphdr *iph; |
struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); |
if (!skb) |
break; |
skb_reserve(skb, 32); |
iph = (struct iphdr*)skb_put(skb, sizeof(*iph)); |
skb->mac.raw = ((u8*)iph) - 14; |
memcpy(iph, &whitehole_iph, sizeof(*iph)); |
skb->protocol = __constant_htons(ETH_P_IP); |
skb->dev = &whitehole_dev; |
skb->pkt_type = PACKET_HOST; |
stats->rx_packets++; |
stats->rx_bytes += skb->len; |
netif_rx(skb); |
whitehole_count--; |
} while (netdev_dropping == 0 && whitehole_count>0); |
if (whitehole_count > 0) { |
whitehole_timer.expires = jiffies + 1; |
add_timer(&whitehole_timer); |
} |
} |
|
static struct net_device_stats *whitehole_get_stats(struct net_device *dev) |
{ |
struct net_device_stats *stats = (struct net_device_stats *) dev->priv; |
return stats; |
} |
|
int __init whitehole_init(struct net_device *dev) |
{ |
dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); |
if (dev->priv == NULL) |
return -ENOBUFS; |
memset(dev->priv, 0, sizeof(struct net_device_stats)); |
dev->get_stats = whitehole_get_stats; |
dev->hard_start_xmit = whitehole_xmit; |
dev->open = whitehole_open; |
dev->stop = whitehole_close; |
ether_setup(dev); |
dev->tx_queue_len = 0; |
dev->flags |= IFF_NOARP; |
dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST); |
dev->iflink = 0; |
whitehole_iph.ihl = 5; |
whitehole_iph.version = 4; |
whitehole_iph.ttl = 2; |
whitehole_iph.saddr = in_aton("193.233.7.21"); |
whitehole_iph.daddr = in_aton("193.233.7.10"); |
whitehole_iph.tot_len = htons(20); |
whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20); |
return 0; |
} |
|
int net_profile_register(struct net_profile_slot *slot) |
{ |
cli(); |
slot->next = net_profile_chain; |
net_profile_chain = slot; |
sti(); |
return 0; |
} |
|
int net_profile_unregister(struct net_profile_slot *slot) |
{ |
struct net_profile_slot **sp, *s; |
|
for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) { |
if (s == slot) { |
cli(); |
*sp = s->next; |
sti(); |
return 0; |
} |
} |
return -ESRCH; |
} |
|
|
int __init net_profile_init(void) |
{ |
int i; |
|
#ifdef CONFIG_PROC_FS |
create_proc_read_entry("net/profile", 0, 0, profile_read_proc, NULL); |
#endif |
|
register_netdevice(&whitehole_dev); |
|
printk("Evaluating net profiler cost ..."); |
#ifdef __alpha__ |
alpha_tick(0); |
#endif |
for (i=0; i<1024; i++) { |
NET_PROFILE_ENTER(total); |
NET_PROFILE_LEAVE(total); |
} |
if (net_prof_total.accumulator.tv_sec) { |
printk(" too high!\n"); |
} else { |
net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10; |
printk("%ld units\n", net_profile_adjust.tv_usec); |
} |
net_prof_total.hits = 0; |
net_profile_stamp(&net_prof_total.entered); |
return 0; |
} |
|
#endif |
/dev_mcast.c
0,0 → 1,275
/* |
* Linux NET3: Multicast List maintenance. |
* |
* Authors: |
* Tim Kordas <tjk@nostromo.eeap.cwru.edu> |
* Richard Underwood <richard@wuzz.demon.co.uk> |
* |
* Stir fried together from the IP multicast and CAP patches above |
* Alan Cox <Alan.Cox@linux.org> |
* |
* Fixes: |
* Alan Cox : Update the device on a real delete |
* rather than any time but... |
* Alan Cox : IFF_ALLMULTI support. |
* Alan Cox : New format set_multicast_list() calls. |
* Gleb Natapov : Remove dev_mc_lock. |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
*/ |
|
#include <linux/config.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <asm/bitops.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/string.h> |
#include <linux/mm.h> |
#include <linux/socket.h> |
#include <linux/sockios.h> |
#include <linux/in.h> |
#include <linux/errno.h> |
#include <linux/interrupt.h> |
#include <linux/if_ether.h> |
#include <linux/inet.h> |
#include <linux/netdevice.h> |
#include <linux/etherdevice.h> |
#include <linux/proc_fs.h> |
#include <linux/init.h> |
#include <net/ip.h> |
#include <net/route.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <net/arp.h> |
|
|
/* |
* Device multicast list maintenance. |
* |
* This is used both by IP and by the user level maintenance functions. |
* Unlike BSD we maintain a usage count on a given multicast address so |
* that a casual user application can add/delete multicasts used by |
* protocols without doing damage to the protocols when it deletes the |
* entries. It also helps IP as it tracks overlapping maps. |
* |
* Device mc lists are changed by bh at least if IPv6 is enabled, |
* so that it must be bh protected. |
* |
* We block accesses to device mc filters with dev->xmit_lock. |
*/ |
|
/* |
* Update the multicast list into the physical NIC controller. |
*/ |
|
static void __dev_mc_upload(struct net_device *dev) |
{ |
/* Don't do anything till we up the interface |
* [dev_open will call this function so the list will |
* stay sane] |
*/ |
|
if (!(dev->flags&IFF_UP)) |
return; |
|
/* |
* Devices with no set multicast or which have been |
* detached don't get set. |
*/ |
|
if (dev->set_multicast_list == NULL || |
!netif_device_present(dev)) |
return; |
|
dev->set_multicast_list(dev); |
} |
|
void dev_mc_upload(struct net_device *dev) |
{ |
spin_lock_bh(&dev->xmit_lock); |
__dev_mc_upload(dev); |
spin_unlock_bh(&dev->xmit_lock); |
} |
|
/* |
* Delete a device level multicast |
*/ |
|
int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) |
{ |
int err = 0; |
struct dev_mc_list *dmi, **dmip; |
|
spin_lock_bh(&dev->xmit_lock); |
|
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { |
/* |
* Find the entry we want to delete. The device could |
* have variable length entries so check these too. |
*/ |
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && |
alen == dmi->dmi_addrlen) { |
if (glbl) { |
int old_glbl = dmi->dmi_gusers; |
dmi->dmi_gusers = 0; |
if (old_glbl == 0) |
break; |
} |
if (--dmi->dmi_users) |
goto done; |
|
/* |
* Last user. So delete the entry. |
*/ |
*dmip = dmi->next; |
dev->mc_count--; |
|
kfree(dmi); |
|
/* |
* We have altered the list, so the card |
* loaded filter is now wrong. Fix it |
*/ |
__dev_mc_upload(dev); |
|
spin_unlock_bh(&dev->xmit_lock); |
return 0; |
} |
} |
err = -ENOENT; |
done: |
spin_unlock_bh(&dev->xmit_lock); |
return err; |
} |
|
/* |
* Add a device level multicast |
*/ |
|
int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) |
{ |
int err = 0; |
struct dev_mc_list *dmi, *dmi1; |
|
dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC); |
|
spin_lock_bh(&dev->xmit_lock); |
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { |
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && |
dmi->dmi_addrlen == alen) { |
if (glbl) { |
int old_glbl = dmi->dmi_gusers; |
dmi->dmi_gusers = 1; |
if (old_glbl) |
goto done; |
} |
dmi->dmi_users++; |
goto done; |
} |
} |
|
if ((dmi = dmi1) == NULL) { |
spin_unlock_bh(&dev->xmit_lock); |
return -ENOMEM; |
} |
memcpy(dmi->dmi_addr, addr, alen); |
dmi->dmi_addrlen = alen; |
dmi->next = dev->mc_list; |
dmi->dmi_users = 1; |
dmi->dmi_gusers = glbl ? 1 : 0; |
dev->mc_list = dmi; |
dev->mc_count++; |
|
__dev_mc_upload(dev); |
|
spin_unlock_bh(&dev->xmit_lock); |
return 0; |
|
done: |
spin_unlock_bh(&dev->xmit_lock); |
if (dmi1) |
kfree(dmi1); |
return err; |
} |
|
/* |
* Discard multicast list when a device is downed |
*/ |
|
void dev_mc_discard(struct net_device *dev) |
{ |
spin_lock_bh(&dev->xmit_lock); |
|
while (dev->mc_list != NULL) { |
struct dev_mc_list *tmp = dev->mc_list; |
dev->mc_list = tmp->next; |
if (tmp->dmi_users > tmp->dmi_gusers) |
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); |
kfree(tmp); |
} |
dev->mc_count = 0; |
|
spin_unlock_bh(&dev->xmit_lock); |
} |
|
#ifdef CONFIG_PROC_FS |
static int dev_mc_read_proc(char *buffer, char **start, off_t offset, |
int length, int *eof, void *data) |
{ |
off_t pos = 0, begin = 0; |
struct dev_mc_list *m; |
int len = 0; |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
for (dev = dev_base; dev; dev = dev->next) { |
spin_lock_bh(&dev->xmit_lock); |
for (m = dev->mc_list; m; m = m->next) { |
int i; |
|
len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, |
dev->name, m->dmi_users, m->dmi_gusers); |
|
for (i = 0; i < m->dmi_addrlen; i++) |
len += sprintf(buffer+len, "%02x", m->dmi_addr[i]); |
|
len += sprintf(buffer+len, "\n"); |
|
pos = begin + len; |
if (pos < offset) { |
len = 0; |
begin = pos; |
} |
if (pos > offset + length) { |
spin_unlock_bh(&dev->xmit_lock); |
goto done; |
} |
} |
spin_unlock_bh(&dev->xmit_lock); |
} |
*eof = 1; |
|
done: |
read_unlock(&dev_base_lock); |
*start = buffer + (offset - begin); |
len -= (offset - begin); |
if (len > length) |
len = length; |
if (len < 0) |
len = 0; |
return len; |
} |
#endif |
|
void __init dev_mcast_init(void) |
{ |
#ifdef CONFIG_PROC_FS |
create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL); |
#endif |
} |
|
/iovec.c
0,0 → 1,279
/* |
* iovec manipulation routines. |
* |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
* |
* Fixes: |
* Andrew Lunn : Errors in iovec copying. |
* Pedro Roque : Added memcpy_fromiovecend and |
* csum_..._fromiovecend. |
* Andi Kleen : fixed error handling for 2.1 |
* Alexey Kuznetsov: 2.1 optimisations |
* Andi Kleen : Fix csum*fromiovecend for IPv6. |
*/ |
|
|
#include <linux/errno.h> |
#include <linux/sched.h> |
#include <linux/kernel.h> |
#include <linux/mm.h> |
#include <linux/slab.h> |
#include <linux/net.h> |
#include <linux/in6.h> |
#include <asm/uaccess.h> |
#include <asm/byteorder.h> |
#include <net/checksum.h> |
#include <net/sock.h> |
|
/* |
* Verify iovec. The caller must ensure that the iovec is big enough |
* to hold the message iovec. |
* |
* Save time not doing verify_area. copy_*_user will make this work |
* in any case. |
*/ |
|
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) |
{ |
int size, err, ct; |
|
if(m->msg_namelen) |
{ |
if(mode==VERIFY_READ) |
{ |
err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address); |
if(err<0) |
goto out; |
} |
|
m->msg_name = address; |
} else |
m->msg_name = NULL; |
|
err = -EFAULT; |
size = m->msg_iovlen * sizeof(struct iovec); |
if (copy_from_user(iov, m->msg_iov, size)) |
goto out; |
m->msg_iov=iov; |
|
for (err = 0, ct = 0; ct < m->msg_iovlen; ct++) { |
err += iov[ct].iov_len; |
/* Goal is not to verify user data, but to prevent returning |
negative value, which is interpreted as errno. |
Overflow is still possible, but it is harmless. |
*/ |
if (err < 0) |
return -EMSGSIZE; |
} |
out: |
return err; |
} |
|
/* |
* Copy kernel to iovec. Returns -EFAULT on error. |
* |
* Note: this modifies the original iovec. |
*/ |
|
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) |
{ |
int err = -EFAULT; |
|
while(len>0) |
{ |
if(iov->iov_len) |
{ |
int copy = min_t(unsigned int, iov->iov_len, len); |
if (copy_to_user(iov->iov_base, kdata, copy)) |
goto out; |
kdata+=copy; |
len-=copy; |
iov->iov_len-=copy; |
iov->iov_base+=copy; |
} |
iov++; |
} |
err = 0; |
out: |
return err; |
} |
|
/* |
* In kernel copy to iovec. Returns -EFAULT on error. |
* |
* Note: this modifies the original iovec. |
*/ |
|
void memcpy_tokerneliovec(struct iovec *iov, unsigned char *kdata, int len) |
{ |
while(len>0) |
{ |
if(iov->iov_len) |
{ |
int copy = min_t(unsigned int, iov->iov_len, len); |
memcpy(iov->iov_base, kdata, copy); |
kdata+=copy; |
len-=copy; |
iov->iov_len-=copy; |
iov->iov_base+=copy; |
} |
iov++; |
} |
} |
|
|
/* |
* Copy iovec to kernel. Returns -EFAULT on error. |
* |
* Note: this modifies the original iovec. |
*/ |
|
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) |
{ |
int err = -EFAULT; |
|
while(len>0) |
{ |
if(iov->iov_len) |
{ |
int copy = min_t(unsigned int, len, iov->iov_len); |
if (copy_from_user(kdata, iov->iov_base, copy)) |
goto out; |
len-=copy; |
kdata+=copy; |
iov->iov_base+=copy; |
iov->iov_len-=copy; |
} |
iov++; |
} |
err = 0; |
out: |
return err; |
} |
|
|
/* |
* For use with ip_build_xmit |
*/ |
|
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, |
int len) |
{ |
int err = -EFAULT; |
|
/* Skip over the finished iovecs */ |
while(offset >= iov->iov_len) |
{ |
offset -= iov->iov_len; |
iov++; |
} |
|
while (len > 0) |
{ |
u8 *base = iov->iov_base + offset; |
int copy = min_t(unsigned int, len, iov->iov_len - offset); |
|
offset = 0; |
if (copy_from_user(kdata, base, copy)) |
goto out; |
len -= copy; |
kdata += copy; |
iov++; |
} |
err = 0; |
out: |
return err; |
} |
|
/* |
* And now for the all-in-one: copy and checksum from a user iovec |
* directly to a datagram |
* Calls to csum_partial but the last must be in 32 bit chunks |
* |
* ip_build_xmit must ensure that when fragmenting only the last |
* call to this function will be unaligned also. |
*/ |
|
int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, |
int offset, unsigned int len, int *csump) |
{ |
int csum = *csump; |
int partial_cnt = 0, err = 0; |
|
/* Skip over the finished iovecs */ |
while (offset >= iov->iov_len) |
{ |
offset -= iov->iov_len; |
iov++; |
} |
|
while (len > 0) |
{ |
u8 *base = iov->iov_base + offset; |
int copy = min_t(unsigned int, len, iov->iov_len - offset); |
|
offset = 0; |
/* There is a remnant from previous iov. */ |
if (partial_cnt) |
{ |
int par_len = 4 - partial_cnt; |
|
/* iov component is too short ... */ |
if (par_len > copy) { |
if (copy_from_user(kdata, base, copy)) |
goto out_fault; |
kdata += copy; |
base += copy; |
partial_cnt += copy; |
len -= copy; |
iov++; |
if (len) |
continue; |
*csump = csum_partial(kdata - partial_cnt, |
partial_cnt, csum); |
goto out; |
} |
if (copy_from_user(kdata, base, par_len)) |
goto out_fault; |
csum = csum_partial(kdata - partial_cnt, 4, csum); |
kdata += par_len; |
base += par_len; |
copy -= par_len; |
len -= par_len; |
partial_cnt = 0; |
} |
|
if (len > copy) |
{ |
partial_cnt = copy % 4; |
if (partial_cnt) |
{ |
copy -= partial_cnt; |
if (copy_from_user(kdata + copy, base + copy, |
partial_cnt)) |
goto out_fault; |
} |
} |
|
if (copy) { |
csum = csum_and_copy_from_user(base, kdata, copy, |
csum, &err); |
if (err) |
goto out; |
} |
len -= copy + partial_cnt; |
kdata += copy + partial_cnt; |
iov++; |
} |
*csump = csum; |
out: |
return err; |
|
out_fault: |
err = -EFAULT; |
goto out; |
} |
/dv.c
0,0 → 1,559
/* |
* INET An implementation of the TCP/IP protocol suite for the LINUX |
* operating system. INET is implemented using the BSD Socket |
* interface as the means of communication with the user level. |
* |
* Generic frame diversion |
* |
* Version: @(#)eth.c 0.41 09/09/2000 |
* |
* Authors: |
* Benoit LOCHER: initial integration within the kernel with support for ethernet |
* Dave Miller: improvement on the code (correctness, performance and source files) |
* |
*/ |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/string.h> |
#include <linux/mm.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/inet.h> |
#include <linux/ip.h> |
#include <linux/udp.h> |
#include <linux/netdevice.h> |
#include <linux/etherdevice.h> |
#include <linux/skbuff.h> |
#include <linux/errno.h> |
#include <linux/init.h> |
#include <net/dst.h> |
#include <net/arp.h> |
#include <net/sock.h> |
#include <net/ipv6.h> |
#include <net/ip.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <asm/checksum.h> |
#include <linux/divert.h> |
#include <linux/sockios.h> |
|
const char sysctl_divert_version[32]="0.46"; /* Current version */ |
|
int __init dv_init(void) |
{ |
printk(KERN_INFO "NET4: Frame Diverter %s\n", sysctl_divert_version); |
return 0; |
} |
|
/* |
* Allocate a divert_blk for a device. This must be an ethernet nic. |
*/ |
int alloc_divert_blk(struct net_device *dev) |
{ |
int alloc_size = (sizeof(struct divert_blk) + 3) & ~3; |
|
if (dev->type == ARPHRD_ETHER) { |
printk(KERN_DEBUG "divert: allocating divert_blk for %s\n", |
dev->name); |
|
dev->divert = (struct divert_blk *) |
kmalloc(alloc_size, GFP_KERNEL); |
if (dev->divert == NULL) { |
printk(KERN_DEBUG "divert: unable to allocate divert_blk for %s\n", |
dev->name); |
return -ENOMEM; |
} else { |
memset(dev->divert, 0, sizeof(struct divert_blk)); |
} |
dev_hold(dev); |
} else { |
printk(KERN_DEBUG "divert: not allocating divert_blk for non-ethernet device %s\n", |
dev->name); |
|
dev->divert = NULL; |
} |
return 0; |
} |
|
/* |
* Free a divert_blk allocated by the above function, if it was |
* allocated on that device. |
*/ |
void free_divert_blk(struct net_device *dev) |
{ |
if (dev->divert) { |
kfree(dev->divert); |
dev->divert=NULL; |
dev_put(dev); |
printk(KERN_DEBUG "divert: freeing divert_blk for %s\n", |
dev->name); |
} else { |
printk(KERN_DEBUG "divert: no divert_blk to free, %s not ethernet\n", |
dev->name); |
} |
} |
|
/* |
* Adds a tcp/udp (source or dest) port to an array |
*/ |
int add_port(u16 ports[], u16 port) |
{ |
int i; |
|
if (port == 0) |
return -EINVAL; |
|
/* Storing directly in network format for performance, |
* thanks Dave :) |
*/ |
port = htons(port); |
|
for (i = 0; i < MAX_DIVERT_PORTS; i++) { |
if (ports[i] == port) |
return -EALREADY; |
} |
|
for (i = 0; i < MAX_DIVERT_PORTS; i++) { |
if (ports[i] == 0) { |
ports[i] = port; |
return 0; |
} |
} |
|
return -ENOBUFS; |
} |
|
/* |
* Removes a port from an array tcp/udp (source or dest) |
*/ |
int remove_port(u16 ports[], u16 port) |
{ |
int i; |
|
if (port == 0) |
return -EINVAL; |
|
/* Storing directly in network format for performance, |
* thanks Dave ! |
*/ |
port = htons(port); |
|
for (i = 0; i < MAX_DIVERT_PORTS; i++) { |
if (ports[i] == port) { |
ports[i] = 0; |
return 0; |
} |
} |
|
return -EINVAL; |
} |
|
/* Some basic sanity checks on the arguments passed to divert_ioctl() */ |
int check_args(struct divert_cf *div_cf, struct net_device **dev) |
{ |
char devname[32]; |
int ret; |
|
if (dev == NULL) |
return -EFAULT; |
|
/* GETVERSION: all other args are unused */ |
if (div_cf->cmd == DIVCMD_GETVERSION) |
return 0; |
|
/* Network device index should reasonably be between 0 and 1000 :) */ |
if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) |
return -EINVAL; |
|
/* Let's try to find the ifname */ |
sprintf(devname, "eth%d", div_cf->dev_index); |
*dev = dev_get_by_name(devname); |
|
/* dev should NOT be null */ |
if (*dev == NULL) |
return -EINVAL; |
|
ret = 0; |
|
/* user issuing the ioctl must be a super one :) */ |
if (!capable(CAP_SYS_ADMIN)) { |
ret = -EPERM; |
goto out; |
} |
|
/* Device must have a divert_blk member NOT null */ |
if ((*dev)->divert == NULL) |
ret = -EINVAL; |
out: |
dev_put(*dev); |
return ret; |
} |
|
/* |
* control function of the diverter |
*/ |
#define DVDBG(a) \ |
printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) |
|
int divert_ioctl(unsigned int cmd, struct divert_cf *arg) |
{ |
struct divert_cf div_cf; |
struct divert_blk *div_blk; |
struct net_device *dev; |
int ret; |
|
switch (cmd) { |
case SIOCGIFDIVERT: |
DVDBG("SIOCGIFDIVERT, copy_from_user"); |
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) |
return -EFAULT; |
DVDBG("before check_args"); |
ret = check_args(&div_cf, &dev); |
if (ret) |
return ret; |
DVDBG("after checkargs"); |
div_blk = dev->divert; |
|
DVDBG("befre switch()"); |
switch (div_cf.cmd) { |
case DIVCMD_GETSTATUS: |
/* Now, just give the user the raw divert block |
* for him to play with :) |
*/ |
if (copy_to_user(div_cf.arg1.ptr, dev->divert, |
sizeof(struct divert_blk))) |
return -EFAULT; |
break; |
|
case DIVCMD_GETVERSION: |
DVDBG("GETVERSION: checking ptr"); |
if (div_cf.arg1.ptr == NULL) |
return -EINVAL; |
DVDBG("GETVERSION: copying data to userland"); |
if (copy_to_user(div_cf.arg1.ptr, |
sysctl_divert_version, 32)) |
return -EFAULT; |
DVDBG("GETVERSION: data copied"); |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case SIOCSIFDIVERT: |
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) |
return -EFAULT; |
|
ret = check_args(&div_cf, &dev); |
if (ret) |
return ret; |
|
div_blk = dev->divert; |
|
switch(div_cf.cmd) { |
case DIVCMD_RESET: |
div_blk->divert = 0; |
div_blk->protos = DIVERT_PROTO_NONE; |
memset(div_blk->tcp_dst, 0, |
MAX_DIVERT_PORTS * sizeof(u16)); |
memset(div_blk->tcp_src, 0, |
MAX_DIVERT_PORTS * sizeof(u16)); |
memset(div_blk->udp_dst, 0, |
MAX_DIVERT_PORTS * sizeof(u16)); |
memset(div_blk->udp_src, 0, |
MAX_DIVERT_PORTS * sizeof(u16)); |
return 0; |
|
case DIVCMD_DIVERT: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ENABLE: |
if (div_blk->divert) |
return -EALREADY; |
div_blk->divert = 1; |
break; |
|
case DIVARG1_DISABLE: |
if (!div_blk->divert) |
return -EALREADY; |
div_blk->divert = 0; |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_IP: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ENABLE: |
if (div_blk->protos & DIVERT_PROTO_IP) |
return -EALREADY; |
div_blk->protos |= DIVERT_PROTO_IP; |
break; |
|
case DIVARG1_DISABLE: |
if (!(div_blk->protos & DIVERT_PROTO_IP)) |
return -EALREADY; |
div_blk->protos &= ~DIVERT_PROTO_IP; |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_TCP: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ENABLE: |
if (div_blk->protos & DIVERT_PROTO_TCP) |
return -EALREADY; |
div_blk->protos |= DIVERT_PROTO_TCP; |
break; |
|
case DIVARG1_DISABLE: |
if (!(div_blk->protos & DIVERT_PROTO_TCP)) |
return -EALREADY; |
div_blk->protos &= ~DIVERT_PROTO_TCP; |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_TCPDST: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ADD: |
return add_port(div_blk->tcp_dst, |
div_cf.arg2.uint16); |
|
case DIVARG1_REMOVE: |
return remove_port(div_blk->tcp_dst, |
div_cf.arg2.uint16); |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_TCPSRC: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ADD: |
return add_port(div_blk->tcp_src, |
div_cf.arg2.uint16); |
|
case DIVARG1_REMOVE: |
return remove_port(div_blk->tcp_src, |
div_cf.arg2.uint16); |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_UDP: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ENABLE: |
if (div_blk->protos & DIVERT_PROTO_UDP) |
return -EALREADY; |
div_blk->protos |= DIVERT_PROTO_UDP; |
break; |
|
case DIVARG1_DISABLE: |
if (!(div_blk->protos & DIVERT_PROTO_UDP)) |
return -EALREADY; |
div_blk->protos &= ~DIVERT_PROTO_UDP; |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_UDPDST: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ADD: |
return add_port(div_blk->udp_dst, |
div_cf.arg2.uint16); |
|
case DIVARG1_REMOVE: |
return remove_port(div_blk->udp_dst, |
div_cf.arg2.uint16); |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_UDPSRC: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ADD: |
return add_port(div_blk->udp_src, |
div_cf.arg2.uint16); |
|
case DIVARG1_REMOVE: |
return remove_port(div_blk->udp_src, |
div_cf.arg2.uint16); |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
case DIVCMD_ICMP: |
switch(div_cf.arg1.int32) { |
case DIVARG1_ENABLE: |
if (div_blk->protos & DIVERT_PROTO_ICMP) |
return -EALREADY; |
div_blk->protos |= DIVERT_PROTO_ICMP; |
break; |
|
case DIVARG1_DISABLE: |
if (!(div_blk->protos & DIVERT_PROTO_ICMP)) |
return -EALREADY; |
div_blk->protos &= ~DIVERT_PROTO_ICMP; |
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
default: |
return -EINVAL; |
}; |
|
break; |
|
default: |
return -EINVAL; |
}; |
|
return 0; |
} |
|
|
/* |
* Check if packet should have its dest mac address set to the box itself |
* for diversion |
*/ |
|
#define ETH_DIVERT_FRAME(skb) \ |
memcpy(skb->mac.ethernet, skb->dev->dev_addr, ETH_ALEN); \ |
skb->pkt_type=PACKET_HOST |
|
void divert_frame(struct sk_buff *skb) |
{ |
struct ethhdr *eth = skb->mac.ethernet; |
struct iphdr *iph; |
struct tcphdr *tcph; |
struct udphdr *udph; |
struct divert_blk *divert = skb->dev->divert; |
int i, src, dst; |
unsigned char *skb_data_end = skb->data + skb->len; |
|
/* Packet is already aimed at us, return */ |
if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN)) |
return; |
|
/* proto is not IP, do nothing */ |
if (eth->h_proto != htons(ETH_P_IP)) |
return; |
|
/* Divert all IP frames ? */ |
if (divert->protos & DIVERT_PROTO_IP) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
|
/* Check for possible (maliciously) malformed IP frame (thanks Dave) */ |
iph = (struct iphdr *) skb->data; |
if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) { |
printk(KERN_INFO "divert: malformed IP packet !\n"); |
return; |
} |
|
switch (iph->protocol) { |
/* Divert all ICMP frames ? */ |
case IPPROTO_ICMP: |
if (divert->protos & DIVERT_PROTO_ICMP) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
break; |
|
/* Divert all TCP frames ? */ |
case IPPROTO_TCP: |
if (divert->protos & DIVERT_PROTO_TCP) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
|
/* Check for possible (maliciously) malformed IP |
* frame (thanx Dave) |
*/ |
tcph = (struct tcphdr *) |
(((unsigned char *)iph) + (iph->ihl<<2)); |
if (((unsigned char *)(tcph+1)) >= skb_data_end) { |
printk(KERN_INFO "divert: malformed TCP packet !\n"); |
return; |
} |
|
/* Divert some tcp dst/src ports only ?*/ |
for (i = 0; i < MAX_DIVERT_PORTS; i++) { |
dst = divert->tcp_dst[i]; |
src = divert->tcp_src[i]; |
if ((dst && dst == tcph->dest) || |
(src && src == tcph->source)) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
} |
break; |
|
/* Divert all UDP frames ? */ |
case IPPROTO_UDP: |
if (divert->protos & DIVERT_PROTO_UDP) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
|
/* Check for possible (maliciously) malformed IP |
* packet (thanks Dave) |
*/ |
udph = (struct udphdr *) |
(((unsigned char *)iph) + (iph->ihl<<2)); |
if (((unsigned char *)(udph+1)) >= skb_data_end) { |
printk(KERN_INFO |
"divert: malformed UDP packet !\n"); |
return; |
} |
|
/* Divert some udp dst/src ports only ? */ |
for (i = 0; i < MAX_DIVERT_PORTS; i++) { |
dst = divert->udp_dst[i]; |
src = divert->udp_src[i]; |
if ((dst && dst == udph->dest) || |
(src && src == udph->source)) { |
ETH_DIVERT_FRAME(skb); |
return; |
} |
} |
break; |
}; |
|
return; |
} |
|
/rtnetlink.c
0,0 → 1,530
/* |
* INET An implementation of the TCP/IP protocol suite for the LINUX |
* operating system. INET is implemented using the BSD Socket |
* interface as the means of communication with the user level. |
* |
* Routing netlink socket interface: protocol independent part. |
* |
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
* |
* Fixes: |
* Vitaly E. Lavrov RTA_OK arithmetics was wrong. |
*/ |
|
#include <linux/config.h> |
#include <linux/errno.h> |
#include <linux/types.h> |
#include <linux/socket.h> |
#include <linux/kernel.h> |
#include <linux/major.h> |
#include <linux/sched.h> |
#include <linux/timer.h> |
#include <linux/string.h> |
#include <linux/sockios.h> |
#include <linux/net.h> |
#include <linux/fcntl.h> |
#include <linux/mm.h> |
#include <linux/slab.h> |
#include <linux/interrupt.h> |
#include <linux/capability.h> |
#include <linux/skbuff.h> |
#include <linux/init.h> |
|
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <asm/string.h> |
|
#include <linux/inet.h> |
#include <linux/netdevice.h> |
#include <net/ip.h> |
#include <net/protocol.h> |
#include <net/arp.h> |
#include <net/route.h> |
#include <net/udp.h> |
#include <net/sock.h> |
#include <net/pkt_sched.h> |
|
DECLARE_MUTEX(rtnl_sem); |
|
void rtnl_lock(void) |
{ |
rtnl_shlock(); |
rtnl_exlock(); |
} |
|
void rtnl_unlock(void) |
{ |
rtnl_exunlock(); |
rtnl_shunlock(); |
} |
|
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) |
{ |
memset(tb, 0, sizeof(struct rtattr*)*maxattr); |
|
while (RTA_OK(rta, len)) { |
unsigned flavor = rta->rta_type; |
if (flavor && flavor <= maxattr) |
tb[flavor-1] = rta; |
rta = RTA_NEXT(rta, len); |
} |
return 0; |
} |
|
struct sock *rtnl; |
|
struct rtnetlink_link * rtnetlink_links[NPROTO]; |
|
static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] = |
{ |
NLMSG_LENGTH(sizeof(struct ifinfomsg)), |
NLMSG_LENGTH(sizeof(struct ifaddrmsg)), |
NLMSG_LENGTH(sizeof(struct rtmsg)), |
NLMSG_LENGTH(sizeof(struct ndmsg)), |
NLMSG_LENGTH(sizeof(struct rtmsg)), |
NLMSG_LENGTH(sizeof(struct tcmsg)), |
NLMSG_LENGTH(sizeof(struct tcmsg)), |
NLMSG_LENGTH(sizeof(struct tcmsg)) |
}; |
|
static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] = |
{ |
IFLA_MAX, |
IFA_MAX, |
RTA_MAX, |
NDA_MAX, |
RTA_MAX, |
TCA_MAX, |
TCA_MAX, |
TCA_MAX |
}; |
|
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) |
{ |
struct rtattr *rta; |
int size = RTA_LENGTH(attrlen); |
|
rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); |
rta->rta_type = attrtype; |
rta->rta_len = size; |
memcpy(RTA_DATA(rta), data, attrlen); |
} |
|
int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) |
{ |
int err = 0; |
|
NETLINK_CB(skb).dst_groups = group; |
if (echo) |
atomic_inc(&skb->users); |
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); |
if (echo) |
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); |
return err; |
} |
|
int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics) |
{ |
struct rtattr *mx = (struct rtattr*)skb->tail; |
int i; |
|
RTA_PUT(skb, RTA_METRICS, 0, NULL); |
for (i=0; i<RTAX_MAX; i++) { |
if (metrics[i]) |
RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i); |
} |
mx->rta_len = skb->tail - (u8*)mx; |
if (mx->rta_len == RTA_LENGTH(0)) |
skb_trim(skb, (u8*)mx - skb->data); |
return 0; |
|
rtattr_failure: |
skb_trim(skb, (u8*)mx - skb->data); |
return -1; |
} |
|
|
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, |
int type, u32 pid, u32 seq, u32 change) |
{ |
struct ifinfomsg *r; |
struct nlmsghdr *nlh; |
unsigned char *b = skb->tail; |
|
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); |
if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; |
r = NLMSG_DATA(nlh); |
r->ifi_family = AF_UNSPEC; |
r->ifi_type = dev->type; |
r->ifi_index = dev->ifindex; |
r->ifi_flags = dev->flags; |
r->ifi_change = change; |
|
if (!netif_running(dev) || !netif_carrier_ok(dev)) |
r->ifi_flags &= ~IFF_RUNNING; |
else |
r->ifi_flags |= IFF_RUNNING; |
|
RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); |
if (dev->addr_len) { |
RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); |
RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); |
} |
if (1) { |
unsigned mtu = dev->mtu; |
RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); |
} |
if (dev->ifindex != dev->iflink) |
RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink); |
if (dev->qdisc_sleeping) |
RTA_PUT(skb, IFLA_QDISC, |
strlen(dev->qdisc_sleeping->ops->id) + 1, |
dev->qdisc_sleeping->ops->id); |
if (dev->master) |
RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex); |
if (dev->get_stats) { |
struct net_device_stats *stats = dev->get_stats(dev); |
if (stats) |
RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats); |
} |
nlh->nlmsg_len = skb->tail - b; |
return skb->len; |
|
nlmsg_failure: |
rtattr_failure: |
skb_trim(skb, b - skb->data); |
return -1; |
} |
|
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) |
{ |
int idx; |
int s_idx = cb->args[0]; |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { |
if (idx < s_idx) |
continue; |
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) |
break; |
} |
read_unlock(&dev_base_lock); |
cb->args[0] = idx; |
|
return skb->len; |
} |
|
int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) |
{ |
int idx; |
int s_idx = cb->family; |
|
if (s_idx == 0) |
s_idx = 1; |
for (idx=1; idx<NPROTO; idx++) { |
int type = cb->nlh->nlmsg_type-RTM_BASE; |
if (idx < s_idx || idx == PF_PACKET) |
continue; |
if (rtnetlink_links[idx] == NULL || |
rtnetlink_links[idx][type].dumpit == NULL) |
continue; |
if (idx > s_idx) |
memset(&cb->args[0], 0, sizeof(cb->args)); |
if (rtnetlink_links[idx][type].dumpit(skb, cb)) |
break; |
} |
cb->family = idx; |
|
return skb->len; |
} |
|
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) |
{ |
struct sk_buff *skb; |
int size = NLMSG_GOODSIZE; |
|
skb = alloc_skb(size, GFP_KERNEL); |
if (!skb) |
return; |
|
if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { |
kfree_skb(skb); |
return; |
} |
NETLINK_CB(skb).dst_groups = RTMGRP_LINK; |
netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); |
} |
|
static int rtnetlink_done(struct netlink_callback *cb) |
{ |
return 0; |
} |
|
/* Process one rtnetlink message. */ |
|
static __inline__ int |
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) |
{ |
struct rtnetlink_link *link; |
struct rtnetlink_link *link_tab; |
struct rtattr *rta[RTATTR_MAX]; |
|
int exclusive = 0; |
int sz_idx, kind; |
int min_len; |
int family; |
int type; |
int err; |
|
/* Only requests are handled by kernel now */ |
if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) |
return 0; |
|
type = nlh->nlmsg_type; |
|
/* A control message: ignore them */ |
if (type < RTM_BASE) |
return 0; |
|
/* Unknown message: reply with EINVAL */ |
if (type > RTM_MAX) |
goto err_inval; |
|
type -= RTM_BASE; |
|
/* All the messages must have at least 1 byte length */ |
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) |
return 0; |
|
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; |
if (family > NPROTO) { |
*errp = -EAFNOSUPPORT; |
return -1; |
} |
|
link_tab = rtnetlink_links[family]; |
if (link_tab == NULL) |
link_tab = rtnetlink_links[PF_UNSPEC]; |
link = &link_tab[type]; |
|
sz_idx = type>>2; |
kind = type&3; |
|
if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { |
*errp = -EPERM; |
return -1; |
} |
|
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { |
u32 rlen; |
|
if (link->dumpit == NULL) |
link = &(rtnetlink_links[PF_UNSPEC][type]); |
|
if (link->dumpit == NULL) |
goto err_inval; |
|
if ((*errp = netlink_dump_start(rtnl, skb, nlh, |
link->dumpit, |
rtnetlink_done)) != 0) { |
return -1; |
} |
rlen = NLMSG_ALIGN(nlh->nlmsg_len); |
if (rlen > skb->len) |
rlen = skb->len; |
skb_pull(skb, rlen); |
return -1; |
} |
|
if (kind != 2) { |
if (rtnl_exlock_nowait()) { |
*errp = 0; |
return -1; |
} |
exclusive = 1; |
} |
|
memset(&rta, 0, sizeof(rta)); |
|
min_len = rtm_min[sz_idx]; |
if (nlh->nlmsg_len < min_len) |
goto err_inval; |
|
if (nlh->nlmsg_len > min_len) { |
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); |
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); |
|
while (RTA_OK(attr, attrlen)) { |
unsigned flavor = attr->rta_type; |
if (flavor) { |
if (flavor > rta_max[sz_idx]) |
goto err_inval; |
rta[flavor-1] = attr; |
} |
attr = RTA_NEXT(attr, attrlen); |
} |
} |
|
if (link->doit == NULL) |
link = &(rtnetlink_links[PF_UNSPEC][type]); |
if (link->doit == NULL) |
goto err_inval; |
err = link->doit(skb, nlh, (void *)&rta); |
|
if (exclusive) |
rtnl_exunlock(); |
*errp = err; |
return err; |
|
err_inval: |
if (exclusive) |
rtnl_exunlock(); |
*errp = -EINVAL; |
return -1; |
} |
|
/* |
* Process one packet of messages. |
* Malformed skbs with wrong lengths of messages are discarded silently. |
*/ |
|
static inline int rtnetlink_rcv_skb(struct sk_buff *skb) |
{ |
int err; |
struct nlmsghdr * nlh; |
|
while (skb->len >= NLMSG_SPACE(0)) { |
u32 rlen; |
|
nlh = (struct nlmsghdr *)skb->data; |
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) |
return 0; |
rlen = NLMSG_ALIGN(nlh->nlmsg_len); |
if (rlen > skb->len) |
rlen = skb->len; |
if (rtnetlink_rcv_msg(skb, nlh, &err)) { |
/* Not error, but we must interrupt processing here: |
* Note, that in this case we do not pull message |
* from skb, it will be processed later. |
*/ |
if (err == 0) |
return -1; |
netlink_ack(skb, nlh, err); |
} else if (nlh->nlmsg_flags&NLM_F_ACK) |
netlink_ack(skb, nlh, 0); |
skb_pull(skb, rlen); |
} |
|
return 0; |
} |
|
/* |
* rtnetlink input queue processing routine: |
* - try to acquire shared lock. If it is failed, defer processing. |
* - feed skbs to rtnetlink_rcv_skb, until it refuse a message, |
* that will occur, when a dump started and/or acquisition of |
* exclusive lock failed. |
*/ |
|
static void rtnetlink_rcv(struct sock *sk, int len) |
{ |
do { |
struct sk_buff *skb; |
|
if (rtnl_shlock_nowait()) |
return; |
|
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { |
if (rtnetlink_rcv_skb(skb)) { |
if (skb->len) |
skb_queue_head(&sk->receive_queue, skb); |
else |
kfree_skb(skb); |
break; |
} |
kfree_skb(skb); |
} |
|
up(&rtnl_sem); |
} while (rtnl && rtnl->receive_queue.qlen); |
} |
|
static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] = |
{ |
{ NULL, NULL, }, |
{ NULL, NULL, }, |
{ NULL, rtnetlink_dump_ifinfo, }, |
{ NULL, NULL, }, |
|
{ NULL, NULL, }, |
{ NULL, NULL, }, |
{ NULL, rtnetlink_dump_all, }, |
{ NULL, NULL, }, |
|
{ NULL, NULL, }, |
{ NULL, NULL, }, |
{ NULL, rtnetlink_dump_all, }, |
{ NULL, NULL, }, |
|
{ neigh_add, NULL, }, |
{ neigh_delete, NULL, }, |
{ NULL, neigh_dump_info, }, |
{ NULL, NULL, }, |
|
{ NULL, NULL, }, |
{ NULL, NULL, }, |
{ NULL, NULL, }, |
{ NULL, NULL, }, |
}; |
|
|
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) |
{ |
struct net_device *dev = ptr; |
switch (event) { |
case NETDEV_UNREGISTER: |
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); |
break; |
case NETDEV_REGISTER: |
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); |
break; |
case NETDEV_UP: |
case NETDEV_DOWN: |
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
break; |
case NETDEV_CHANGE: |
case NETDEV_GOING_DOWN: |
break; |
default: |
rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
break; |
} |
return NOTIFY_DONE; |
} |
|
struct notifier_block rtnetlink_dev_notifier = { |
rtnetlink_event, |
NULL, |
0 |
}; |
|
|
void __init rtnetlink_init(void) |
{ |
#ifdef RTNL_DEBUG |
printk("Initializing RT netlink socket\n"); |
#endif |
rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); |
if (rtnl == NULL) |
panic("rtnetlink_init: cannot initialize rtnetlink\n"); |
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); |
register_netdevice_notifier(&rtnetlink_dev_notifier); |
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; |
rtnetlink_links[PF_PACKET] = link_rtnetlink_table; |
} |
/ethtool.c
0,0 → 1,694
/* |
* net/core/ethtool.c - Ethtool ioctl handler |
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> |
* |
* This file is where we call all the ethtool_ops commands to get |
* the information ethtool needs. We fall back to calling do_ioctl() |
* for drivers which haven't been converted to ethtool_ops yet. |
* |
* It's GPL, stupid. |
*/ |
|
#include <linux/types.h> |
#include <linux/errno.h> |
#include <linux/ethtool.h> |
#include <linux/netdevice.h> |
#include <asm/uaccess.h> |
|
/* |
* Some useful ethtool_ops methods that're device independent. |
* If we find that all drivers want to do the same thing here, |
* we can turn these into dev_() function calls. |
*/ |
|
u32 ethtool_op_get_link(struct net_device *dev) |
{ |
return netif_carrier_ok(dev) ? 1 : 0; |
} |
|
u32 ethtool_op_get_tx_csum(struct net_device *dev) |
{ |
return (dev->features & NETIF_F_IP_CSUM) != 0; |
} |
|
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) |
{ |
if (data) |
dev->features |= NETIF_F_IP_CSUM; |
else |
dev->features &= ~NETIF_F_IP_CSUM; |
|
return 0; |
} |
|
u32 ethtool_op_get_sg(struct net_device *dev) |
{ |
return (dev->features & NETIF_F_SG) != 0; |
} |
|
int ethtool_op_set_sg(struct net_device *dev, u32 data) |
{ |
if (data) |
dev->features |= NETIF_F_SG; |
else |
dev->features &= ~NETIF_F_SG; |
|
return 0; |
} |
|
/* Handlers for each ethtool command */ |
|
static int ethtool_get_settings(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_cmd cmd = { ETHTOOL_GSET }; |
int err; |
|
if (!dev->ethtool_ops->get_settings) |
return -EOPNOTSUPP; |
|
err = dev->ethtool_ops->get_settings(dev, &cmd); |
if (err < 0) |
return err; |
|
if (copy_to_user(useraddr, &cmd, sizeof(cmd))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_settings(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_cmd cmd; |
|
if (!dev->ethtool_ops->set_settings) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_settings(dev, &cmd); |
} |
|
static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_drvinfo info; |
struct ethtool_ops *ops = dev->ethtool_ops; |
|
if (!ops->get_drvinfo) |
return -EOPNOTSUPP; |
|
memset(&info, 0, sizeof(info)); |
info.cmd = ETHTOOL_GDRVINFO; |
ops->get_drvinfo(dev, &info); |
|
if (ops->self_test_count) |
info.testinfo_len = ops->self_test_count(dev); |
if (ops->get_stats_count) |
info.n_stats = ops->get_stats_count(dev); |
if (ops->get_regs_len) |
info.regdump_len = ops->get_regs_len(dev); |
if (ops->get_eeprom_len) |
info.eedump_len = ops->get_eeprom_len(dev); |
|
if (copy_to_user(useraddr, &info, sizeof(info))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_get_regs(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_regs regs; |
struct ethtool_ops *ops = dev->ethtool_ops; |
void *regbuf; |
int reglen, ret; |
|
if (!ops->get_regs || !ops->get_regs_len) |
return -EOPNOTSUPP; |
|
if (copy_from_user(®s, useraddr, sizeof(regs))) |
return -EFAULT; |
|
reglen = ops->get_regs_len(dev); |
if (regs.len > reglen) |
regs.len = reglen; |
|
regbuf = kmalloc(reglen, GFP_USER); |
if (!regbuf) |
return -ENOMEM; |
|
ops->get_regs(dev, ®s, regbuf); |
|
ret = -EFAULT; |
if (copy_to_user(useraddr, ®s, sizeof(regs))) |
goto out; |
useraddr += offsetof(struct ethtool_regs, data); |
if (copy_to_user(useraddr, regbuf, reglen)) |
goto out; |
ret = 0; |
|
out: |
kfree(regbuf); |
return ret; |
} |
|
static int ethtool_get_wol(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; |
|
if (!dev->ethtool_ops->get_wol) |
return -EOPNOTSUPP; |
|
dev->ethtool_ops->get_wol(dev, &wol); |
|
if (copy_to_user(useraddr, &wol, sizeof(wol))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_wol(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_wolinfo wol; |
|
if (!dev->ethtool_ops->set_wol) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&wol, useraddr, sizeof(wol))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_wol(dev, &wol); |
} |
|
static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata = { ETHTOOL_GMSGLVL }; |
|
if (!dev->ethtool_ops->get_msglevel) |
return -EOPNOTSUPP; |
|
edata.data = dev->ethtool_ops->get_msglevel(dev); |
|
if (copy_to_user(useraddr, &edata, sizeof(edata))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata; |
|
if (!dev->ethtool_ops->set_msglevel) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&edata, useraddr, sizeof(edata))) |
return -EFAULT; |
|
dev->ethtool_ops->set_msglevel(dev, edata.data); |
return 0; |
} |
|
static int ethtool_nway_reset(struct net_device *dev) |
{ |
if (!dev->ethtool_ops->nway_reset) |
return -EOPNOTSUPP; |
|
return dev->ethtool_ops->nway_reset(dev); |
} |
|
static int ethtool_get_link(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_value edata = { ETHTOOL_GLINK }; |
|
if (!dev->ethtool_ops->get_link) |
return -EOPNOTSUPP; |
|
edata.data = dev->ethtool_ops->get_link(dev); |
|
if (copy_to_user(useraddr, &edata, sizeof(edata))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_eeprom eeprom; |
struct ethtool_ops *ops = dev->ethtool_ops; |
u8 *data; |
int ret; |
|
if (!ops->get_eeprom || !ops->get_eeprom_len) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) |
return -EFAULT; |
|
/* Check for wrap and zero */ |
if (eeprom.offset + eeprom.len <= eeprom.offset) |
return -EINVAL; |
|
/* Check for exceeding total eeprom len */ |
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) |
return -EINVAL; |
|
data = kmalloc(eeprom.len, GFP_USER); |
if (!data) |
return -ENOMEM; |
|
ret = -EFAULT; |
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) |
goto out; |
|
ret = ops->get_eeprom(dev, &eeprom, data); |
if (ret) |
goto out; |
|
ret = -EFAULT; |
if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) |
goto out; |
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) |
goto out; |
ret = 0; |
|
out: |
kfree(data); |
return ret; |
} |
|
static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_eeprom eeprom; |
struct ethtool_ops *ops = dev->ethtool_ops; |
u8 *data; |
int ret; |
|
if (!ops->set_eeprom || !ops->get_eeprom_len) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) |
return -EFAULT; |
|
/* Check for wrap and zero */ |
if (eeprom.offset + eeprom.len <= eeprom.offset) |
return -EINVAL; |
|
/* Check for exceeding total eeprom len */ |
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) |
return -EINVAL; |
|
data = kmalloc(eeprom.len, GFP_USER); |
if (!data) |
return -ENOMEM; |
|
ret = -EFAULT; |
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) |
goto out; |
|
ret = ops->set_eeprom(dev, &eeprom, data); |
if (ret) |
goto out; |
|
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) |
ret = -EFAULT; |
|
out: |
kfree(data); |
return ret; |
} |
|
static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; |
|
if (!dev->ethtool_ops->get_coalesce) |
return -EOPNOTSUPP; |
|
dev->ethtool_ops->get_coalesce(dev, &coalesce); |
|
if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_coalesce coalesce; |
|
if (!dev->ethtool_ops->get_coalesce) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_coalesce(dev, &coalesce); |
} |
|
static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; |
|
if (!dev->ethtool_ops->get_ringparam) |
return -EOPNOTSUPP; |
|
dev->ethtool_ops->get_ringparam(dev, &ringparam); |
|
if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_ringparam ringparam; |
|
if (!dev->ethtool_ops->set_ringparam) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_ringparam(dev, &ringparam); |
} |
|
static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; |
|
if (!dev->ethtool_ops->get_pauseparam) |
return -EOPNOTSUPP; |
|
dev->ethtool_ops->get_pauseparam(dev, &pauseparam); |
|
if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_pauseparam pauseparam; |
|
if (!dev->ethtool_ops->get_pauseparam) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); |
} |
|
static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata = { ETHTOOL_GRXCSUM }; |
|
if (!dev->ethtool_ops->get_rx_csum) |
return -EOPNOTSUPP; |
|
edata.data = dev->ethtool_ops->get_rx_csum(dev); |
|
if (copy_to_user(useraddr, &edata, sizeof(edata))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata; |
|
if (!dev->ethtool_ops->set_rx_csum) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&edata, useraddr, sizeof(edata))) |
return -EFAULT; |
|
dev->ethtool_ops->set_rx_csum(dev, edata.data); |
return 0; |
} |
|
static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata = { ETHTOOL_GTXCSUM }; |
|
if (!dev->ethtool_ops->get_tx_csum) |
return -EOPNOTSUPP; |
|
edata.data = dev->ethtool_ops->get_tx_csum(dev); |
|
if (copy_to_user(useraddr, &edata, sizeof(edata))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata; |
|
if (!dev->ethtool_ops->set_tx_csum) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&edata, useraddr, sizeof(edata))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_tx_csum(dev, edata.data); |
} |
|
static int ethtool_get_sg(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata = { ETHTOOL_GSG }; |
|
if (!dev->ethtool_ops->get_sg) |
return -EOPNOTSUPP; |
|
edata.data = dev->ethtool_ops->get_sg(dev); |
|
if (copy_to_user(useraddr, &edata, sizeof(edata))) |
return -EFAULT; |
return 0; |
} |
|
static int ethtool_set_sg(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_value edata; |
|
if (!dev->ethtool_ops->set_sg) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&edata, useraddr, sizeof(edata))) |
return -EFAULT; |
|
return dev->ethtool_ops->set_sg(dev, edata.data); |
} |
|
static int ethtool_self_test(struct net_device *dev, char *useraddr) |
{ |
struct ethtool_test test; |
struct ethtool_ops *ops = dev->ethtool_ops; |
u64 *data; |
int ret; |
|
if (!ops->self_test || !ops->self_test_count) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&test, useraddr, sizeof(test))) |
return -EFAULT; |
|
test.len = ops->self_test_count(dev); |
data = kmalloc(test.len * sizeof(u64), GFP_USER); |
if (!data) |
return -ENOMEM; |
|
ops->self_test(dev, &test, data); |
|
ret = -EFAULT; |
if (copy_to_user(useraddr, &test, sizeof(test))) |
goto out; |
useraddr += sizeof(test); |
if (copy_to_user(useraddr, data, test.len * sizeof(u64))) |
goto out; |
ret = 0; |
|
out: |
kfree(data); |
return ret; |
} |
|
static int ethtool_get_strings(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_gstrings gstrings; |
struct ethtool_ops *ops = dev->ethtool_ops; |
u8 *data; |
int ret; |
|
if (!ops->get_strings) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) |
return -EFAULT; |
|
switch (gstrings.string_set) { |
case ETH_SS_TEST: |
if (!ops->self_test_count) |
return -EOPNOTSUPP; |
gstrings.len = ops->self_test_count(dev); |
break; |
case ETH_SS_STATS: |
if (!ops->get_stats_count) |
return -EOPNOTSUPP; |
gstrings.len = ops->get_stats_count(dev); |
break; |
default: |
return -EINVAL; |
} |
|
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); |
if (!data) |
return -ENOMEM; |
|
ops->get_strings(dev, gstrings.string_set, data); |
|
ret = -EFAULT; |
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) |
goto out; |
useraddr += sizeof(gstrings); |
if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) |
goto out; |
ret = 0; |
|
out: |
kfree(data); |
return ret; |
} |
|
static int ethtool_phys_id(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_value id; |
|
if (!dev->ethtool_ops->phys_id) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&id, useraddr, sizeof(id))) |
return -EFAULT; |
|
return dev->ethtool_ops->phys_id(dev, id.data); |
} |
|
static int ethtool_get_stats(struct net_device *dev, void *useraddr) |
{ |
struct ethtool_stats stats; |
struct ethtool_ops *ops = dev->ethtool_ops; |
u64 *data; |
int ret; |
|
if (!ops->get_ethtool_stats || !ops->get_stats_count) |
return -EOPNOTSUPP; |
|
if (copy_from_user(&stats, useraddr, sizeof(stats))) |
return -EFAULT; |
|
stats.n_stats = ops->get_stats_count(dev); |
data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); |
if (!data) |
return -ENOMEM; |
|
ops->get_ethtool_stats(dev, &stats, data); |
|
ret = -EFAULT; |
if (copy_to_user(useraddr, &stats, sizeof(stats))) |
goto out; |
useraddr += sizeof(stats); |
if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) |
goto out; |
ret = 0; |
|
out: |
kfree(data); |
return ret; |
} |
|
/* The main entry point in this file. Called from net/core/dev.c */ |
|
int dev_ethtool(struct ifreq *ifr) |
{ |
struct net_device *dev = __dev_get_by_name(ifr->ifr_name); |
void *useraddr = (void *) ifr->ifr_data; |
u32 ethcmd; |
|
/* |
* XXX: This can be pushed down into the ethtool_* handlers that |
* need it. Keep existing behaviour for the moment. |
*/ |
if (!capable(CAP_NET_ADMIN)) |
return -EPERM; |
|
if (!dev || !netif_device_present(dev)) |
return -ENODEV; |
|
if (!dev->ethtool_ops) |
goto ioctl; |
|
if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) |
return -EFAULT; |
|
switch (ethcmd) { |
case ETHTOOL_GSET: |
return ethtool_get_settings(dev, useraddr); |
case ETHTOOL_SSET: |
return ethtool_set_settings(dev, useraddr); |
case ETHTOOL_GDRVINFO: |
return ethtool_get_drvinfo(dev, useraddr); |
case ETHTOOL_GREGS: |
return ethtool_get_regs(dev, useraddr); |
case ETHTOOL_GWOL: |
return ethtool_get_wol(dev, useraddr); |
case ETHTOOL_SWOL: |
return ethtool_set_wol(dev, useraddr); |
case ETHTOOL_GMSGLVL: |
return ethtool_get_msglevel(dev, useraddr); |
case ETHTOOL_SMSGLVL: |
return ethtool_set_msglevel(dev, useraddr); |
case ETHTOOL_NWAY_RST: |
return ethtool_nway_reset(dev); |
case ETHTOOL_GLINK: |
return ethtool_get_link(dev, useraddr); |
case ETHTOOL_GEEPROM: |
return ethtool_get_eeprom(dev, useraddr); |
case ETHTOOL_SEEPROM: |
return ethtool_set_eeprom(dev, useraddr); |
case ETHTOOL_GCOALESCE: |
return ethtool_get_coalesce(dev, useraddr); |
case ETHTOOL_SCOALESCE: |
return ethtool_set_coalesce(dev, useraddr); |
case ETHTOOL_GRINGPARAM: |
return ethtool_get_ringparam(dev, useraddr); |
case ETHTOOL_SRINGPARAM: |
return ethtool_set_ringparam(dev, useraddr); |
case ETHTOOL_GPAUSEPARAM: |
return ethtool_get_pauseparam(dev, useraddr); |
case ETHTOOL_SPAUSEPARAM: |
return ethtool_set_pauseparam(dev, useraddr); |
case ETHTOOL_GRXCSUM: |
return ethtool_get_rx_csum(dev, useraddr); |
case ETHTOOL_SRXCSUM: |
return ethtool_set_rx_csum(dev, useraddr); |
case ETHTOOL_GTXCSUM: |
return ethtool_get_tx_csum(dev, useraddr); |
case ETHTOOL_STXCSUM: |
return ethtool_set_tx_csum(dev, useraddr); |
case ETHTOOL_GSG: |
return ethtool_get_sg(dev, useraddr); |
case ETHTOOL_SSG: |
return ethtool_set_sg(dev, useraddr); |
case ETHTOOL_TEST: |
return ethtool_self_test(dev, useraddr); |
case ETHTOOL_GSTRINGS: |
return ethtool_get_strings(dev, useraddr); |
case ETHTOOL_PHYS_ID: |
return ethtool_phys_id(dev, useraddr); |
case ETHTOOL_GSTATS: |
return ethtool_get_stats(dev, useraddr); |
default: |
return -EOPNOTSUPP; |
} |
|
ioctl: |
if (dev->do_ioctl) |
return dev->do_ioctl(dev, ifr, SIOCETHTOOL); |
return -EOPNOTSUPP; |
} |
/dev.c
0,0 → 1,2910
/* |
* NET3 Protocol independent device support routines. |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
* |
* Derived from the non IP parts of dev.c 1.0.19 |
* Authors: Ross Biro, <bir7@leland.Stanford.Edu> |
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
* Mark Evans, <evansmp@uhura.aston.ac.uk> |
* |
* Additional Authors: |
* Florian la Roche <rzsfl@rz.uni-sb.de> |
* Alan Cox <gw4pts@gw4pts.ampr.org> |
* David Hinds <dahinds@users.sourceforge.net> |
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
* Adam Sulmicki <adam@cfar.umd.edu> |
* Pekka Riikonen <priikone@poesidon.pspt.fi> |
* |
* Changes: |
* D.J. Barrow : Fixed bug where dev->refcnt gets set to 2 |
* if register_netdev gets called before |
* net_dev_init & also removed a few lines |
* of code in the process. |
* Alan Cox : device private ioctl copies fields back. |
* Alan Cox : Transmit queue code does relevant stunts to |
* keep the queue safe. |
* Alan Cox : Fixed double lock. |
* Alan Cox : Fixed promisc NULL pointer trap |
* ???????? : Support the full private ioctl range |
* Alan Cox : Moved ioctl permission check into drivers |
* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI |
* Alan Cox : 100 backlog just doesn't cut it when |
* you start doing multicast video 8) |
* Alan Cox : Rewrote net_bh and list manager. |
* Alan Cox : Fix ETH_P_ALL echoback lengths. |
* Alan Cox : Took out transmit every packet pass |
* Saved a few bytes in the ioctl handler |
* Alan Cox : Network driver sets packet type before calling netif_rx. Saves |
* a function call a packet. |
* Alan Cox : Hashed net_bh() |
* Richard Kooijman: Timestamp fixes. |
* Alan Cox : Wrong field in SIOCGIFDSTADDR |
* Alan Cox : Device lock protection. |
* Alan Cox : Fixed nasty side effect of device close changes. |
* Rudi Cilibrasi : Pass the right thing to set_mac_address() |
* Dave Miller : 32bit quantity for the device lock to make it work out |
* on a Sparc. |
* Bjorn Ekwall : Added KERNELD hack. |
* Alan Cox : Cleaned up the backlog initialise. |
* Craig Metz : SIOCGIFCONF fix if space for under |
* 1 device. |
* Thomas Bogendoerfer : Return ENODEV for dev_open, if there |
* is no device open function. |
* Andi Kleen : Fix error reporting for SIOCGIFCONF |
* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF |
* Cyrus Durgin : Cleaned for KMOD |
* Adam Sulmicki : Bug Fix : Network Device Unload |
* A network device unload needs to purge |
* the backlog queue. |
* Paul Rusty Russell : SIOCSIFNAME |
* Pekka Riikonen : Netdev boot-time settings code |
* Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt |
* J Hadi Salim : - Backlog queue sampling |
* - netif_rx() feedback |
*/ |
|
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <asm/bitops.h> |
#include <linux/config.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/string.h> |
#include <linux/mm.h> |
#include <linux/socket.h> |
#include <linux/sockios.h> |
#include <linux/errno.h> |
#include <linux/interrupt.h> |
#include <linux/if_ether.h> |
#include <linux/netdevice.h> |
#include <linux/etherdevice.h> |
#include <linux/notifier.h> |
#include <linux/skbuff.h> |
#include <linux/brlock.h> |
#include <net/sock.h> |
#include <linux/rtnetlink.h> |
#include <linux/proc_fs.h> |
#include <linux/stat.h> |
#include <linux/if_bridge.h> |
#include <linux/divert.h> |
#include <net/dst.h> |
#include <net/pkt_sched.h> |
#include <net/profile.h> |
#include <net/checksum.h> |
#include <linux/highmem.h> |
#include <linux/init.h> |
#include <linux/kmod.h> |
#include <linux/module.h> |
#if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO) |
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ |
#include <net/iw_handler.h> |
#endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ |
#ifdef CONFIG_PLIP |
extern int plip_init(void); |
#endif |
|
|
/* This define, if set, will randomly drop a packet when congestion |
* is more than moderate. It helps fairness in the multi-interface |
* case when one of them is a hog, but it kills performance for the |
* single interface case so it is off now by default. |
*/ |
#undef RAND_LIE |
|
/* Setting this will sample the queue lengths and thus congestion |
* via a timer instead of as each packet is received. |
*/ |
#undef OFFLINE_SAMPLE |
|
NET_PROFILE_DEFINE(dev_queue_xmit) |
NET_PROFILE_DEFINE(softnet_process) |
|
const char *if_port_text[] = { |
"unknown", |
"BNC", |
"10baseT", |
"AUI", |
"100baseT", |
"100baseTX", |
"100baseFX" |
}; |
|
/* |
* The list of packet types we will receive (as opposed to discard) |
* and the routines to invoke. |
* |
* Why 16. Because with 16 the only overlap we get on a hash of the |
* low nibble of the protocol value is RARP/SNAP/X.25. |
* |
* NOTE: That is no longer true with the addition of VLAN tags. Not |
* sure which should go first, but I bet it won't make much |
* difference if we are running VLANs. The good news is that |
* this protocol won't be in the list unless compiled in, so |
* the average user (w/out VLANs) will not be adversly affected. |
* --BLG |
* |
* 0800 IP |
* 8100 802.1Q VLAN |
* 0001 802.3 |
* 0002 AX.25 |
* 0004 802.2 |
* 8035 RARP |
* 0005 SNAP |
* 0805 X.25 |
* 0806 ARP |
* 8137 IPX |
* 0009 Localtalk |
* 86DD IPv6 |
*/ |
|
static struct packet_type *ptype_base[16]; /* 16 way hashed list */ |
static struct packet_type *ptype_all = NULL; /* Taps */ |
|
#ifdef OFFLINE_SAMPLE |
static void sample_queue(unsigned long dummy); |
static struct timer_list samp_timer = { function: sample_queue }; |
#endif |
|
#ifdef CONFIG_HOTPLUG |
static int net_run_sbin_hotplug(struct net_device *dev, char *action); |
#else |
#define net_run_sbin_hotplug(dev, action) ({ 0; }) |
#endif |
|
/* |
* Our notifier list |
*/ |
|
static struct notifier_block *netdev_chain=NULL; |
|
/* |
* Device drivers call our routines to queue packets here. We empty the |
* queue in the local softnet handler. |
*/ |
struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; |
|
#ifdef CONFIG_NET_FASTROUTE |
int netdev_fastroute; |
int netdev_fastroute_obstacles; |
#endif |
|
|
/****************************************************************************************** |
|
Protocol management and registration routines |
|
*******************************************************************************************/ |
|
/* |
* For efficiency |
*/ |
|
int netdev_nit=0; |
|
/* |
* Add a protocol ID to the list. Now that the input handler is |
* smarter we can dispense with all the messy stuff that used to be |
* here. |
* |
* BEWARE!!! Protocol handlers, mangling input packets, |
* MUST BE last in hash buckets and checking protocol handlers |
* MUST start from promiscous ptype_all chain in net_bh. |
* It is true now, do not change it. |
* Explantion follows: if protocol handler, mangling packet, will |
* be the first on list, it is not able to sense, that packet |
* is cloned and should be copied-on-write, so that it will |
* change it and subsequent readers will get broken packet. |
* --ANK (980803) |
*/ |
|
/** |
* dev_add_pack - add packet handler |
* @pt: packet type declaration |
* |
* Add a protocol handler to the networking stack. The passed &packet_type |
* is linked into kernel lists and may not be freed until it has been |
* removed from the kernel lists. |
*/ |
|
void dev_add_pack(struct packet_type *pt) |
{ |
int hash; |
|
br_write_lock_bh(BR_NETPROTO_LOCK); |
|
#ifdef CONFIG_NET_FASTROUTE |
/* Hack to detect packet socket */ |
if ((pt->data) && ((int)(pt->data)!=1)) { |
netdev_fastroute_obstacles++; |
dev_clear_fastroute(pt->dev); |
} |
#endif |
if (pt->type == htons(ETH_P_ALL)) { |
netdev_nit++; |
pt->next=ptype_all; |
ptype_all=pt; |
} else { |
hash=ntohs(pt->type)&15; |
pt->next = ptype_base[hash]; |
ptype_base[hash] = pt; |
} |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
} |
|
|
/** |
* dev_remove_pack - remove packet handler |
* @pt: packet type declaration |
* |
* Remove a protocol handler that was previously added to the kernel |
* protocol handlers by dev_add_pack(). The passed &packet_type is removed |
* from the kernel lists and can be freed or reused once this function |
* returns. |
*/ |
|
void dev_remove_pack(struct packet_type *pt) |
{ |
struct packet_type **pt1; |
|
br_write_lock_bh(BR_NETPROTO_LOCK); |
|
if (pt->type == htons(ETH_P_ALL)) { |
netdev_nit--; |
pt1=&ptype_all; |
} else { |
pt1=&ptype_base[ntohs(pt->type)&15]; |
} |
|
for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) { |
if (pt == (*pt1)) { |
*pt1 = pt->next; |
#ifdef CONFIG_NET_FASTROUTE |
if (pt->data) |
netdev_fastroute_obstacles--; |
#endif |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
return; |
} |
} |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
} |
|
/****************************************************************************** |
|
Device Boot-time Settings Routines |
|
*******************************************************************************/ |
|
/* Boot time configuration table */ |
static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; |
|
/** |
* netdev_boot_setup_add - add new setup entry |
* @name: name of the device |
* @map: configured settings for the device |
* |
* Adds new setup entry to the dev_boot_setup list. The function |
* returns 0 on error and 1 on success. This is a generic routine to |
* all netdevices. |
*/ |
int netdev_boot_setup_add(char *name, struct ifmap *map) |
{ |
struct netdev_boot_setup *s; |
int i; |
|
s = dev_boot_setup; |
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { |
memset(s[i].name, 0, sizeof(s[i].name)); |
strcpy(s[i].name, name); |
memcpy(&s[i].map, map, sizeof(s[i].map)); |
break; |
} |
} |
|
if (i >= NETDEV_BOOT_SETUP_MAX) |
return 0; |
|
return 1; |
} |
|
/** |
* netdev_boot_setup_check - check boot time settings |
* @dev: the netdevice |
* |
* Check boot time settings for the device. |
* The found settings are set for the device to be used |
* later in the device probing. |
* Returns 0 if no settings found, 1 if they are. |
*/ |
int netdev_boot_setup_check(struct net_device *dev) |
{ |
struct netdev_boot_setup *s; |
int i; |
|
s = dev_boot_setup; |
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && |
!strncmp(dev->name, s[i].name, strlen(s[i].name))) { |
dev->irq = s[i].map.irq; |
dev->base_addr = s[i].map.base_addr; |
dev->mem_start = s[i].map.mem_start; |
dev->mem_end = s[i].map.mem_end; |
return 1; |
} |
} |
return 0; |
} |
|
/* |
* Saves at boot time configured settings for any netdevice. |
*/ |
int __init netdev_boot_setup(char *str) |
{ |
int ints[5]; |
struct ifmap map; |
|
str = get_options(str, ARRAY_SIZE(ints), ints); |
if (!str || !*str) |
return 0; |
|
/* Save settings */ |
memset(&map, 0, sizeof(map)); |
if (ints[0] > 0) |
map.irq = ints[1]; |
if (ints[0] > 1) |
map.base_addr = ints[2]; |
if (ints[0] > 2) |
map.mem_start = ints[3]; |
if (ints[0] > 3) |
map.mem_end = ints[4]; |
|
/* Add new entry to the list */ |
return netdev_boot_setup_add(str, &map); |
} |
|
__setup("netdev=", netdev_boot_setup); |
|
/***************************************************************************************** |
|
Device Interface Subroutines |
|
******************************************************************************************/ |
|
/** |
* __dev_get_by_name - find a device by its name |
* @name: name to find |
* |
* Find an interface by name. Must be called under RTNL semaphore |
* or @dev_base_lock. If the name is found a pointer to the device |
* is returned. If the name is not found then %NULL is returned. The |
* reference counters are not incremented so the caller must be |
* careful with locks. |
*/ |
|
|
struct net_device *__dev_get_by_name(const char *name) |
{ |
struct net_device *dev; |
|
for (dev = dev_base; dev != NULL; dev = dev->next) { |
if (strncmp(dev->name, name, IFNAMSIZ) == 0) |
return dev; |
} |
return NULL; |
} |
|
/** |
* dev_get_by_name - find a device by its name |
* @name: name to find |
* |
* Find an interface by name. This can be called from any |
* context and does its own locking. The returned handle has |
* the usage count incremented and the caller must use dev_put() to |
* release it when it is no longer needed. %NULL is returned if no |
* matching device is found. |
*/ |
|
struct net_device *dev_get_by_name(const char *name) |
{ |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
dev = __dev_get_by_name(name); |
if (dev) |
dev_hold(dev); |
read_unlock(&dev_base_lock); |
return dev; |
} |
|
/* |
Return value is changed to int to prevent illegal usage in future. |
It is still legal to use to check for device existence. |
|
User should understand, that the result returned by this function |
is meaningless, if it was not issued under rtnl semaphore. |
*/ |
|
/** |
* dev_get - test if a device exists |
* @name: name to test for |
* |
* Test if a name exists. Returns true if the name is found. In order |
* to be sure the name is not allocated or removed during the test the |
* caller must hold the rtnl semaphore. |
* |
* This function primarily exists for back compatibility with older |
* drivers. |
*/ |
|
int dev_get(const char *name) |
{ |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
dev = __dev_get_by_name(name); |
read_unlock(&dev_base_lock); |
return dev != NULL; |
} |
|
/** |
* __dev_get_by_index - find a device by its ifindex |
* @ifindex: index of device |
* |
* Search for an interface by index. Returns %NULL if the device |
* is not found or a pointer to the device. The device has not |
* had its reference counter increased so the caller must be careful |
* about locking. The caller must hold either the RTNL semaphore |
* or @dev_base_lock. |
*/ |
|
struct net_device * __dev_get_by_index(int ifindex) |
{ |
struct net_device *dev; |
|
for (dev = dev_base; dev != NULL; dev = dev->next) { |
if (dev->ifindex == ifindex) |
return dev; |
} |
return NULL; |
} |
|
|
/** |
* dev_get_by_index - find a device by its ifindex |
* @ifindex: index of device |
* |
* Search for an interface by index. Returns NULL if the device |
* is not found or a pointer to the device. The device returned has |
* had a reference added and the pointer is safe until the user calls |
* dev_put to indicate they have finished with it. |
*/ |
|
struct net_device * dev_get_by_index(int ifindex) |
{ |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
dev = __dev_get_by_index(ifindex); |
if (dev) |
dev_hold(dev); |
read_unlock(&dev_base_lock); |
return dev; |
} |
|
/** |
* dev_getbyhwaddr - find a device by its hardware address |
* @type: media type of device |
* @ha: hardware address |
* |
* Search for an interface by MAC address. Returns NULL if the device |
* is not found or a pointer to the device. The caller must hold the |
* rtnl semaphore. The returned device has not had its ref count increased |
* and the caller must therefore be careful about locking |
* |
* BUGS: |
* If the API was consistent this would be __dev_get_by_hwaddr |
*/ |
|
struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) |
{ |
struct net_device *dev; |
|
ASSERT_RTNL(); |
|
for (dev = dev_base; dev != NULL; dev = dev->next) { |
if (dev->type == type && |
memcmp(dev->dev_addr, ha, dev->addr_len) == 0) |
return dev; |
} |
return NULL; |
} |
|
/** |
* dev_get_by_flags - find any device with given flags |
* @if_flags: IFF_* values |
* @mask: bitmask of bits in if_flags to check |
* |
* Search for any interface with the given flags. Returns NULL if a device |
* is not found or a pointer to the device. The device returned has |
* had a reference added and the pointer is safe until the user calls |
* dev_put to indicate they have finished with it. |
*/ |
|
struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) |
{ |
struct net_device *dev; |
|
read_lock(&dev_base_lock); |
dev = __dev_get_by_flags(if_flags, mask); |
if (dev) |
dev_hold(dev); |
read_unlock(&dev_base_lock); |
return dev; |
} |
|
/** |
* __dev_get_by_flags - find any device with given flags |
* @if_flags: IFF_* values |
* @mask: bitmask of bits in if_flags to check |
* |
* Search for any interface with the given flags. Returns NULL if a device |
* is not found or a pointer to the device. The caller must hold either |
* the RTNL semaphore or @dev_base_lock. |
*/ |
|
struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask) |
{ |
struct net_device *dev; |
|
for (dev = dev_base; dev != NULL; dev = dev->next) { |
if (((dev->flags ^ if_flags) & mask) == 0) |
return dev; |
} |
return NULL; |
} |
|
/** |
* dev_alloc_name - allocate a name for a device |
* @dev: device |
* @name: name format string |
* |
* Passed a format string - eg "lt%d" it will try and find a suitable |
* id. Not efficient for many devices, not called a lot. The caller |
* must hold the dev_base or rtnl lock while allocating the name and |
* adding the device in order to avoid duplicates. Returns the number |
* of the unit assigned or a negative errno code. |
*/ |
|
int dev_alloc_name(struct net_device *dev, const char *name) |
{ |
int i; |
char buf[32]; |
char *p; |
|
/* |
* Verify the string as this thing may have come from |
* the user. There must be either one "%d" and no other "%" |
* characters, or no "%" characters at all. |
*/ |
p = strchr(name, '%'); |
if (p && (p[1] != 'd' || strchr(p+2, '%'))) |
return -EINVAL; |
|
/* |
* If you need over 100 please also fix the algorithm... |
*/ |
for (i = 0; i < 100; i++) { |
snprintf(buf,sizeof(buf),name,i); |
if (__dev_get_by_name(buf) == NULL) { |
strcpy(dev->name, buf); |
return i; |
} |
} |
return -ENFILE; /* Over 100 of the things .. bail out! */ |
} |
|
/** |
* dev_alloc - allocate a network device and name |
* @name: name format string |
* @err: error return pointer |
* |
* Passed a format string, eg. "lt%d", it will allocate a network device |
* and space for the name. %NULL is returned if no memory is available. |
* If the allocation succeeds then the name is assigned and the |
* device pointer returned. %NULL is returned if the name allocation |
* failed. The cause of an error is returned as a negative errno code |
* in the variable @err points to. |
* |
* The caller must hold the @dev_base or RTNL locks when doing this in |
* order to avoid duplicate name allocations. |
*/ |
|
struct net_device *dev_alloc(const char *name, int *err) |
{ |
struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL); |
if (dev == NULL) { |
*err = -ENOBUFS; |
return NULL; |
} |
memset(dev, 0, sizeof(struct net_device)); |
*err = dev_alloc_name(dev, name); |
if (*err < 0) { |
kfree(dev); |
return NULL; |
} |
return dev; |
} |
|
/** |
* netdev_state_change - device changes state |
* @dev: device to cause notification |
* |
* Called to indicate a device has changed state. This function calls |
* the notifier chains for netdev_chain and sends a NEWLINK message |
* to the routing socket. |
*/ |
|
void netdev_state_change(struct net_device *dev) |
{ |
if (dev->flags&IFF_UP) { |
notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); |
rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
} |
} |
|
|
#ifdef CONFIG_KMOD |
|
/** |
* dev_load - load a network module |
* @name: name of interface |
* |
* If a network interface is not present and the process has suitable |
* privileges this function loads the module. If module loading is not |
* available in this kernel then it becomes a nop. |
*/ |
|
void dev_load(const char *name) |
{ |
if (!dev_get(name) && capable(CAP_SYS_MODULE)) |
request_module(name); |
} |
|
#else |
|
extern inline void dev_load(const char *unused){;} |
|
#endif |
|
static int default_rebuild_header(struct sk_buff *skb) |
{ |
printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!"); |
kfree_skb(skb); |
return 1; |
} |
|
/** |
* dev_open - prepare an interface for use. |
* @dev: device to open |
* |
* Takes a device from down to up state. The device's private open |
* function is invoked and then the multicast lists are loaded. Finally |
* the device is moved into the up state and a %NETDEV_UP message is |
* sent to the netdev notifier chain. |
* |
* Calling this function on an active interface is a nop. On a failure |
* a negative errno code is returned. |
*/ |
|
int dev_open(struct net_device *dev) |
{ |
int ret = 0; |
|
/* |
* Is it already up? |
*/ |
|
if (dev->flags&IFF_UP) |
return 0; |
|
/* |
* Is it even present? |
*/ |
if (!netif_device_present(dev)) |
return -ENODEV; |
|
/* |
* Call device private open method |
*/ |
if (try_inc_mod_count(dev->owner)) { |
set_bit(__LINK_STATE_START, &dev->state); |
if (dev->open) { |
ret = dev->open(dev); |
if (ret != 0) { |
clear_bit(__LINK_STATE_START, &dev->state); |
if (dev->owner) |
__MOD_DEC_USE_COUNT(dev->owner); |
} |
} |
} else { |
ret = -ENODEV; |
} |
|
/* |
* If it went open OK then: |
*/ |
|
if (ret == 0) |
{ |
/* |
* Set the flags. |
*/ |
dev->flags |= IFF_UP; |
|
/* |
* Initialize multicasting status |
*/ |
dev_mc_upload(dev); |
|
/* |
* Wakeup transmit queue engine |
*/ |
dev_activate(dev); |
|
/* |
* ... and announce new interface. |
*/ |
notifier_call_chain(&netdev_chain, NETDEV_UP, dev); |
} |
return(ret); |
} |
|
#ifdef CONFIG_NET_FASTROUTE |
|
static void dev_do_clear_fastroute(struct net_device *dev) |
{ |
if (dev->accept_fastpath) { |
int i; |
|
for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) { |
struct dst_entry *dst; |
|
write_lock_irq(&dev->fastpath_lock); |
dst = dev->fastpath[i]; |
dev->fastpath[i] = NULL; |
write_unlock_irq(&dev->fastpath_lock); |
|
dst_release(dst); |
} |
} |
} |
|
void dev_clear_fastroute(struct net_device *dev) |
{ |
if (dev) { |
dev_do_clear_fastroute(dev); |
} else { |
read_lock(&dev_base_lock); |
for (dev = dev_base; dev; dev = dev->next) |
dev_do_clear_fastroute(dev); |
read_unlock(&dev_base_lock); |
} |
} |
#endif |
|
/** |
* dev_close - shutdown an interface. |
* @dev: device to shutdown |
* |
* This function moves an active device into down state. A |
* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device |
* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier |
* chain. |
*/ |
|
int dev_close(struct net_device *dev) |
{ |
if (!(dev->flags&IFF_UP)) |
return 0; |
|
/* |
* Tell people we are going down, so that they can |
* prepare to death, when device is still operating. |
*/ |
notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); |
|
dev_deactivate(dev); |
|
clear_bit(__LINK_STATE_START, &dev->state); |
|
/* Synchronize to scheduled poll. We cannot touch poll list, |
* it can be even on different cpu. So just clear netif_running(), |
* and wait when poll really will happen. Actually, the best place |
* for this is inside dev->stop() after device stopped its irq |
* engine, but this requires more changes in devices. */ |
|
smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { |
/* No hurry. */ |
current->state = TASK_INTERRUPTIBLE; |
schedule_timeout(1); |
} |
|
/* |
* Call the device specific close. This cannot fail. |
* Only if device is UP |
* |
* We allow it to be called even after a DETACH hot-plug |
* event. |
*/ |
|
if (dev->stop) |
dev->stop(dev); |
|
/* |
* Device is now down. |
*/ |
|
dev->flags &= ~IFF_UP; |
#ifdef CONFIG_NET_FASTROUTE |
dev_clear_fastroute(dev); |
#endif |
|
/* |
* Tell people we are down |
*/ |
notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); |
|
/* |
* Drop the module refcount |
*/ |
if (dev->owner) |
__MOD_DEC_USE_COUNT(dev->owner); |
|
return(0); |
} |
|
|
/* |
* Device change register/unregister. These are not inline or static |
* as we export them to the world. |
*/ |
|
/** |
* register_netdevice_notifier - register a network notifier block |
* @nb: notifier |
* |
* Register a notifier to be called when network device events occur. |
* The notifier passed is linked into the kernel structures and must |
* not be reused until it has been unregistered. A negative errno code |
* is returned on a failure. |
*/ |
|
int register_netdevice_notifier(struct notifier_block *nb) |
{ |
return notifier_chain_register(&netdev_chain, nb); |
} |
|
/** |
* unregister_netdevice_notifier - unregister a network notifier block |
* @nb: notifier |
* |
* Unregister a notifier previously registered by |
* register_netdevice_notifier(). The notifier is unlinked into the |
* kernel structures and may then be reused. A negative errno code |
* is returned on a failure. |
*/ |
|
int unregister_netdevice_notifier(struct notifier_block *nb) |
{ |
return notifier_chain_unregister(&netdev_chain,nb); |
} |
|
/* |
* Support routine. Sends outgoing frames to any network |
* taps currently in use. |
*/ |
|
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
{ |
struct packet_type *ptype; |
do_gettimeofday(&skb->stamp); |
|
br_read_lock(BR_NETPROTO_LOCK); |
for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) |
{ |
/* Never send packets back to the socket |
* they originated from - MvS (miquels@drinkel.ow.org) |
*/ |
if ((ptype->dev == dev || !ptype->dev) && |
((struct sock *)ptype->data != skb->sk)) |
{ |
struct sk_buff *skb2; |
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) |
break; |
|
/* skb->nh should be correctly |
set by sender, so that the second statement is |
just protection against buggy protocols. |
*/ |
skb2->mac.raw = skb2->data; |
|
if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) { |
if (net_ratelimit()) |
printk(KERN_CRIT "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name); |
skb2->nh.raw = skb2->data; |
} |
|
skb2->h.raw = skb2->nh.raw; |
skb2->pkt_type = PACKET_OUTGOING; |
ptype->func(skb2, skb->dev, ptype); |
} |
} |
br_read_unlock(BR_NETPROTO_LOCK); |
} |
|
/* Calculate csum in the case, when packet is misrouted. |
* If it failed by some reason, ignore and send skb with wrong |
* checksum. |
*/ |
struct sk_buff * skb_checksum_help(struct sk_buff *skb) |
{ |
int offset; |
unsigned int csum; |
|
offset = skb->h.raw - skb->data; |
if (offset > (int)skb->len) |
BUG(); |
csum = skb_checksum(skb, offset, skb->len-offset, 0); |
|
offset = skb->tail - skb->h.raw; |
if (offset <= 0) |
BUG(); |
if (skb->csum+2 > offset) |
BUG(); |
|
*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); |
skb->ip_summed = CHECKSUM_NONE; |
return skb; |
} |
|
#ifdef CONFIG_HIGHMEM |
/* Actually, we should eliminate this check as soon as we know, that: |
* 1. IOMMU is present and allows to map all the memory. |
* 2. No high memory really exists on this machine. |
*/ |
|
static inline int |
illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
{ |
int i; |
|
if (dev->features&NETIF_F_HIGHDMA) |
return 0; |
|
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) |
if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) |
return 1; |
|
return 0; |
} |
#else |
#define illegal_highdma(dev, skb) (0) |
#endif |
|
/** |
* dev_queue_xmit - transmit a buffer |
* @skb: buffer to transmit |
* |
* Queue a buffer for transmission to a network device. The caller must |
* have set the device and priority and built the buffer before calling this |
* function. The function can be called from an interrupt. |
* |
* A negative errno code is returned on a failure. A success does not |
* guarantee the frame will be transmitted as it may be dropped due |
* to congestion or traffic shaping. |
*/ |
|
int dev_queue_xmit(struct sk_buff *skb) |
{ |
struct net_device *dev = skb->dev; |
struct Qdisc *q; |
|
if (skb_shinfo(skb)->frag_list && |
!(dev->features&NETIF_F_FRAGLIST) && |
skb_linearize(skb, GFP_ATOMIC) != 0) { |
kfree_skb(skb); |
return -ENOMEM; |
} |
|
/* Fragmented skb is linearized if device does not support SG, |
* or if at least one of fragments is in highmem and device |
* does not support DMA from it. |
*/ |
if (skb_shinfo(skb)->nr_frags && |
(!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) && |
skb_linearize(skb, GFP_ATOMIC) != 0) { |
kfree_skb(skb); |
return -ENOMEM; |
} |
|
/* If packet is not checksummed and device does not support |
* checksumming for this protocol, complete checksumming here. |
*/ |
if (skb->ip_summed == CHECKSUM_HW && |
(!(dev->features&(NETIF_F_HW_CSUM|NETIF_F_NO_CSUM)) && |
(!(dev->features&NETIF_F_IP_CSUM) || |
skb->protocol != htons(ETH_P_IP)))) { |
if ((skb = skb_checksum_help(skb)) == NULL) |
return -ENOMEM; |
} |
|
/* Grab device queue */ |
spin_lock_bh(&dev->queue_lock); |
q = dev->qdisc; |
if (q->enqueue) { |
int ret = q->enqueue(skb, q); |
|
qdisc_run(dev); |
|
spin_unlock_bh(&dev->queue_lock); |
return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret; |
} |
|
/* The device has no queue. Common case for software devices: |
loopback, all the sorts of tunnels... |
|
Really, it is unlikely that xmit_lock protection is necessary here. |
(f.e. loopback and IP tunnels are clean ignoring statistics counters.) |
However, it is possible, that they rely on protection |
made by us here. |
|
Check this and shot the lock. It is not prone from deadlocks. |
Either shot noqueue qdisc, it is even simpler 8) |
*/ |
if (dev->flags&IFF_UP) { |
int cpu = smp_processor_id(); |
|
if (dev->xmit_lock_owner != cpu) { |
spin_unlock(&dev->queue_lock); |
spin_lock(&dev->xmit_lock); |
dev->xmit_lock_owner = cpu; |
|
if (!netif_queue_stopped(dev)) { |
if (netdev_nit) |
dev_queue_xmit_nit(skb,dev); |
|
if (dev->hard_start_xmit(skb, dev) == 0) { |
dev->xmit_lock_owner = -1; |
spin_unlock_bh(&dev->xmit_lock); |
return 0; |
} |
} |
dev->xmit_lock_owner = -1; |
spin_unlock_bh(&dev->xmit_lock); |
if (net_ratelimit()) |
printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", dev->name); |
kfree_skb(skb); |
return -ENETDOWN; |
} else { |
/* Recursion is detected! It is possible, unfortunately */ |
if (net_ratelimit()) |
printk(KERN_CRIT "Dead loop on virtual device %s, fix it urgently!\n", dev->name); |
} |
} |
spin_unlock_bh(&dev->queue_lock); |
|
kfree_skb(skb); |
return -ENETDOWN; |
} |
|
|
/*======================================================================= |
Receiver routines |
=======================================================================*/ |
|
int netdev_max_backlog = 300; |
int weight_p = 64; /* old backlog weight */ |
/* These numbers are selected based on intuition and some |
* experimentatiom, if you have more scientific way of doing this |
* please go ahead and fix things. |
*/ |
int no_cong_thresh = 10; |
int no_cong = 20; |
int lo_cong = 100; |
int mod_cong = 290; |
|
struct netif_rx_stats netdev_rx_stat[NR_CPUS]; |
|
|
#ifdef CONFIG_NET_HW_FLOWCONTROL |
atomic_t netdev_dropping = ATOMIC_INIT(0); |
static unsigned long netdev_fc_mask = 1; |
unsigned long netdev_fc_xoff = 0; |
spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; |
|
static struct |
{ |
void (*stimul)(struct net_device *); |
struct net_device *dev; |
} netdev_fc_slots[BITS_PER_LONG]; |
|
int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)) |
{ |
int bit = 0; |
unsigned long flags; |
|
spin_lock_irqsave(&netdev_fc_lock, flags); |
if (netdev_fc_mask != ~0UL) { |
bit = ffz(netdev_fc_mask); |
netdev_fc_slots[bit].stimul = stimul; |
netdev_fc_slots[bit].dev = dev; |
set_bit(bit, &netdev_fc_mask); |
clear_bit(bit, &netdev_fc_xoff); |
} |
spin_unlock_irqrestore(&netdev_fc_lock, flags); |
return bit; |
} |
|
void netdev_unregister_fc(int bit) |
{ |
unsigned long flags; |
|
spin_lock_irqsave(&netdev_fc_lock, flags); |
if (bit > 0) { |
netdev_fc_slots[bit].stimul = NULL; |
netdev_fc_slots[bit].dev = NULL; |
clear_bit(bit, &netdev_fc_mask); |
clear_bit(bit, &netdev_fc_xoff); |
} |
spin_unlock_irqrestore(&netdev_fc_lock, flags); |
} |
|
static void netdev_wakeup(void) |
{ |
unsigned long xoff; |
|
spin_lock(&netdev_fc_lock); |
xoff = netdev_fc_xoff; |
netdev_fc_xoff = 0; |
while (xoff) { |
int i = ffz(~xoff); |
xoff &= ~(1<<i); |
netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); |
} |
spin_unlock(&netdev_fc_lock); |
} |
#endif |
|
static void get_sample_stats(int cpu) |
{ |
#ifdef RAND_LIE |
unsigned long rd; |
int rq; |
#endif |
int blog = softnet_data[cpu].input_pkt_queue.qlen; |
int avg_blog = softnet_data[cpu].avg_blog; |
|
avg_blog = (avg_blog >> 1)+ (blog >> 1); |
|
if (avg_blog > mod_cong) { |
/* Above moderate congestion levels. */ |
softnet_data[cpu].cng_level = NET_RX_CN_HIGH; |
#ifdef RAND_LIE |
rd = net_random(); |
rq = rd % netdev_max_backlog; |
if (rq < avg_blog) /* unlucky bastard */ |
softnet_data[cpu].cng_level = NET_RX_DROP; |
#endif |
} else if (avg_blog > lo_cong) { |
softnet_data[cpu].cng_level = NET_RX_CN_MOD; |
#ifdef RAND_LIE |
rd = net_random(); |
rq = rd % netdev_max_backlog; |
if (rq < avg_blog) /* unlucky bastard */ |
softnet_data[cpu].cng_level = NET_RX_CN_HIGH; |
#endif |
} else if (avg_blog > no_cong) |
softnet_data[cpu].cng_level = NET_RX_CN_LOW; |
else /* no congestion */ |
softnet_data[cpu].cng_level = NET_RX_SUCCESS; |
|
softnet_data[cpu].avg_blog = avg_blog; |
} |
|
#ifdef OFFLINE_SAMPLE |
static void sample_queue(unsigned long dummy) |
{ |
/* 10 ms 0r 1ms -- i dont care -- JHS */ |
int next_tick = 1; |
int cpu = smp_processor_id(); |
|
get_sample_stats(cpu); |
next_tick += jiffies; |
mod_timer(&samp_timer, next_tick); |
} |
#endif |
|
|
/** |
* netif_rx - post buffer to the network code |
* @skb: buffer to post |
* |
* This function receives a packet from a device driver and queues it for |
* the upper (protocol) levels to process. It always succeeds. The buffer |
* may be dropped during processing for congestion control or by the |
* protocol layers. |
* |
* return values: |
* NET_RX_SUCCESS (no congestion) |
* NET_RX_CN_LOW (low congestion) |
* NET_RX_CN_MOD (moderate congestion) |
* NET_RX_CN_HIGH (high congestion) |
* NET_RX_DROP (packet was dropped) |
* |
* |
*/ |
|
int netif_rx(struct sk_buff *skb) |
{ |
int this_cpu = smp_processor_id(); |
struct softnet_data *queue; |
unsigned long flags; |
|
if (skb->stamp.tv_sec == 0) |
do_gettimeofday(&skb->stamp); |
|
/* The code is rearranged so that the path is the most |
short when CPU is congested, but is still operating. |
*/ |
queue = &softnet_data[this_cpu]; |
|
local_irq_save(flags); |
|
netdev_rx_stat[this_cpu].total++; |
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { |
if (queue->input_pkt_queue.qlen) { |
if (queue->throttle) |
goto drop; |
|
enqueue: |
dev_hold(skb->dev); |
__skb_queue_tail(&queue->input_pkt_queue,skb); |
local_irq_restore(flags); |
#ifndef OFFLINE_SAMPLE |
get_sample_stats(this_cpu); |
#endif |
return queue->cng_level; |
} |
|
if (queue->throttle) { |
queue->throttle = 0; |
#ifdef CONFIG_NET_HW_FLOWCONTROL |
if (atomic_dec_and_test(&netdev_dropping)) |
netdev_wakeup(); |
#endif |
} |
|
netif_rx_schedule(&queue->blog_dev); |
goto enqueue; |
} |
|
if (queue->throttle == 0) { |
queue->throttle = 1; |
netdev_rx_stat[this_cpu].throttled++; |
#ifdef CONFIG_NET_HW_FLOWCONTROL |
atomic_inc(&netdev_dropping); |
#endif |
} |
|
drop: |
netdev_rx_stat[this_cpu].dropped++; |
local_irq_restore(flags); |
|
kfree_skb(skb); |
return NET_RX_DROP; |
} |
|
/* Deliver skb to an old protocol, which is not threaded well |
or which do not understand shared skbs. |
*/ |
static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) |
{ |
static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; |
int ret = NET_RX_DROP; |
|
|
if (!last) { |
skb = skb_clone(skb, GFP_ATOMIC); |
if (skb == NULL) |
return ret; |
} |
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { |
kfree_skb(skb); |
return ret; |
} |
|
/* The assumption (correct one) is that old protocols |
did not depened on BHs different of NET_BH and TIMER_BH. |
*/ |
|
/* Emulate NET_BH with special spinlock */ |
spin_lock(&net_bh_lock); |
|
/* Disable timers and wait for all timers completion */ |
tasklet_disable(bh_task_vec+TIMER_BH); |
|
ret = pt->func(skb, skb->dev, pt); |
|
tasklet_hi_enable(bh_task_vec+TIMER_BH); |
spin_unlock(&net_bh_lock); |
return ret; |
} |
|
static __inline__ void skb_bond(struct sk_buff *skb) |
{ |
struct net_device *dev = skb->dev; |
|
if (dev->master) { |
skb->real_dev = skb->dev; |
skb->dev = dev->master; |
} |
} |
|
static void net_tx_action(struct softirq_action *h) |
{ |
int cpu = smp_processor_id(); |
|
if (softnet_data[cpu].completion_queue) { |
struct sk_buff *clist; |
|
local_irq_disable(); |
clist = softnet_data[cpu].completion_queue; |
softnet_data[cpu].completion_queue = NULL; |
local_irq_enable(); |
|
while (clist != NULL) { |
struct sk_buff *skb = clist; |
clist = clist->next; |
|
BUG_TRAP(atomic_read(&skb->users) == 0); |
__kfree_skb(skb); |
} |
} |
|
if (softnet_data[cpu].output_queue) { |
struct net_device *head; |
|
local_irq_disable(); |
head = softnet_data[cpu].output_queue; |
softnet_data[cpu].output_queue = NULL; |
local_irq_enable(); |
|
while (head != NULL) { |
struct net_device *dev = head; |
head = head->next_sched; |
|
smp_mb__before_clear_bit(); |
clear_bit(__LINK_STATE_SCHED, &dev->state); |
|
if (spin_trylock(&dev->queue_lock)) { |
qdisc_run(dev); |
spin_unlock(&dev->queue_lock); |
} else { |
netif_schedule(dev); |
} |
} |
} |
} |
|
|
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) |
void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL; |
#endif |
|
static __inline__ int handle_bridge(struct sk_buff *skb, |
struct packet_type *pt_prev) |
{ |
int ret = NET_RX_DROP; |
|
if (pt_prev) { |
if (!pt_prev->data) |
ret = deliver_to_old_ones(pt_prev, skb, 0); |
else { |
atomic_inc(&skb->users); |
ret = pt_prev->func(skb, skb->dev, pt_prev); |
} |
} |
|
br_handle_frame_hook(skb); |
return ret; |
} |
|
|
#ifdef CONFIG_NET_DIVERT |
static inline int handle_diverter(struct sk_buff *skb) |
{ |
/* if diversion is supported on device, then divert */ |
if (skb->dev->divert && skb->dev->divert->divert) |
divert_frame(skb); |
return 0; |
} |
#endif /* CONFIG_NET_DIVERT */ |
|
int netif_receive_skb(struct sk_buff *skb) |
{ |
struct packet_type *ptype, *pt_prev; |
int ret = NET_RX_DROP; |
unsigned short type; |
|
if (skb->stamp.tv_sec == 0) |
do_gettimeofday(&skb->stamp); |
|
skb_bond(skb); |
|
netdev_rx_stat[smp_processor_id()].total++; |
|
#ifdef CONFIG_NET_FASTROUTE |
if (skb->pkt_type == PACKET_FASTROUTE) { |
netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++; |
return dev_queue_xmit(skb); |
} |
#endif |
|
skb->h.raw = skb->nh.raw = skb->data; |
|
pt_prev = NULL; |
for (ptype = ptype_all; ptype; ptype = ptype->next) { |
if (!ptype->dev || ptype->dev == skb->dev) { |
if (pt_prev) { |
if (!pt_prev->data) { |
ret = deliver_to_old_ones(pt_prev, skb, 0); |
} else { |
atomic_inc(&skb->users); |
ret = pt_prev->func(skb, skb->dev, pt_prev); |
} |
} |
pt_prev = ptype; |
} |
} |
|
#ifdef CONFIG_NET_DIVERT |
if (skb->dev->divert && skb->dev->divert->divert) |
ret = handle_diverter(skb); |
#endif /* CONFIG_NET_DIVERT */ |
|
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) |
if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL && |
skb->pkt_type != PACKET_LOOPBACK) { |
return handle_bridge(skb, pt_prev); |
} |
#endif |
|
type = skb->protocol; |
for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) { |
if (ptype->type == type && |
(!ptype->dev || ptype->dev == skb->dev)) { |
if (pt_prev) { |
if (!pt_prev->data) { |
ret = deliver_to_old_ones(pt_prev, skb, 0); |
} else { |
atomic_inc(&skb->users); |
ret = pt_prev->func(skb, skb->dev, pt_prev); |
} |
} |
pt_prev = ptype; |
} |
} |
|
if (pt_prev) { |
if (!pt_prev->data) { |
ret = deliver_to_old_ones(pt_prev, skb, 1); |
} else { |
ret = pt_prev->func(skb, skb->dev, pt_prev); |
} |
} else { |
kfree_skb(skb); |
/* Jamal, now you will not able to escape explaining |
* me how you were going to use this. :-) |
*/ |
ret = NET_RX_DROP; |
} |
|
return ret; |
} |
|
static int process_backlog(struct net_device *backlog_dev, int *budget) |
{ |
int work = 0; |
int quota = min(backlog_dev->quota, *budget); |
int this_cpu = smp_processor_id(); |
struct softnet_data *queue = &softnet_data[this_cpu]; |
unsigned long start_time = jiffies; |
|
for (;;) { |
struct sk_buff *skb; |
struct net_device *dev; |
|
local_irq_disable(); |
skb = __skb_dequeue(&queue->input_pkt_queue); |
if (skb == NULL) |
goto job_done; |
local_irq_enable(); |
|
dev = skb->dev; |
|
netif_receive_skb(skb); |
|
dev_put(dev); |
|
work++; |
|
if (work >= quota || jiffies - start_time > 1) |
break; |
|
#ifdef CONFIG_NET_HW_FLOWCONTROL |
if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) { |
queue->throttle = 0; |
if (atomic_dec_and_test(&netdev_dropping)) { |
netdev_wakeup(); |
break; |
} |
} |
#endif |
} |
|
backlog_dev->quota -= work; |
*budget -= work; |
return -1; |
|
job_done: |
backlog_dev->quota -= work; |
*budget -= work; |
|
list_del(&backlog_dev->poll_list); |
smp_mb__before_clear_bit(); |
netif_poll_enable(backlog_dev); |
|
if (queue->throttle) { |
queue->throttle = 0; |
#ifdef CONFIG_NET_HW_FLOWCONTROL |
if (atomic_dec_and_test(&netdev_dropping)) |
netdev_wakeup(); |
#endif |
} |
local_irq_enable(); |
return 0; |
} |
|
static void net_rx_action(struct softirq_action *h) |
{ |
int this_cpu = smp_processor_id(); |
struct softnet_data *queue = &softnet_data[this_cpu]; |
unsigned long start_time = jiffies; |
int budget = netdev_max_backlog; |
|
br_read_lock(BR_NETPROTO_LOCK); |
local_irq_disable(); |
|
while (!list_empty(&queue->poll_list)) { |
struct net_device *dev; |
|
if (budget <= 0 || jiffies - start_time > 1) |
goto softnet_break; |
|
local_irq_enable(); |
|
dev = list_entry(queue->poll_list.next, struct net_device, poll_list); |
|
if (dev->quota <= 0 || dev->poll(dev, &budget)) { |
local_irq_disable(); |
list_del(&dev->poll_list); |
list_add_tail(&dev->poll_list, &queue->poll_list); |
if (dev->quota < 0) |
dev->quota += dev->weight; |
else |
dev->quota = dev->weight; |
} else { |
dev_put(dev); |
local_irq_disable(); |
} |
} |
|
local_irq_enable(); |
br_read_unlock(BR_NETPROTO_LOCK); |
return; |
|
softnet_break: |
netdev_rx_stat[this_cpu].time_squeeze++; |
__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); |
|
local_irq_enable(); |
br_read_unlock(BR_NETPROTO_LOCK); |
} |
|
static gifconf_func_t * gifconf_list [NPROTO]; |
|
/** |
* register_gifconf - register a SIOCGIF handler |
* @family: Address family |
* @gifconf: Function handler |
* |
* Register protocol dependent address dumping routines. The handler |
* that is passed must not be freed or reused until it has been replaced |
* by another handler. |
*/ |
|
int register_gifconf(unsigned int family, gifconf_func_t * gifconf) |
{ |
if (family>=NPROTO) |
return -EINVAL; |
gifconf_list[family] = gifconf; |
return 0; |
} |
|
|
/* |
* Map an interface index to its name (SIOCGIFNAME) |
*/ |
|
/* |
* We need this ioctl for efficient implementation of the |
* if_indextoname() function required by the IPv6 API. Without |
* it, we would have to search all the interfaces to find a |
* match. --pb |
*/ |
|
static int dev_ifname(struct ifreq *arg) |
{ |
struct net_device *dev; |
struct ifreq ifr; |
|
/* |
* Fetch the caller's info block. |
*/ |
|
if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
return -EFAULT; |
|
read_lock(&dev_base_lock); |
dev = __dev_get_by_index(ifr.ifr_ifindex); |
if (!dev) { |
read_unlock(&dev_base_lock); |
return -ENODEV; |
} |
|
strcpy(ifr.ifr_name, dev->name); |
read_unlock(&dev_base_lock); |
|
if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
return -EFAULT; |
return 0; |
} |
|
/* |
* Perform a SIOCGIFCONF call. This structure will change |
* size eventually, and there is nothing I can do about it. |
* Thus we will need a 'compatibility mode'. |
*/ |
|
static int dev_ifconf(char *arg) |
{ |
struct ifconf ifc; |
struct net_device *dev; |
char *pos; |
int len; |
int total; |
int i; |
|
/* |
* Fetch the caller's info block. |
*/ |
|
if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) |
return -EFAULT; |
|
pos = ifc.ifc_buf; |
len = ifc.ifc_len; |
|
/* |
* Loop over the interfaces, and write an info block for each. |
*/ |
|
total = 0; |
for (dev = dev_base; dev != NULL; dev = dev->next) { |
for (i=0; i<NPROTO; i++) { |
if (gifconf_list[i]) { |
int done; |
if (pos==NULL) { |
done = gifconf_list[i](dev, NULL, 0); |
} else { |
done = gifconf_list[i](dev, pos+total, len-total); |
} |
if (done<0) { |
return -EFAULT; |
} |
total += done; |
} |
} |
} |
|
/* |
* All done. Write the updated control block back to the caller. |
*/ |
ifc.ifc_len = total; |
|
if (copy_to_user(arg, &ifc, sizeof(struct ifconf))) |
return -EFAULT; |
|
/* |
* Both BSD and Solaris return 0 here, so we do too. |
*/ |
return 0; |
} |
|
/* |
* This is invoked by the /proc filesystem handler to display a device |
* in detail. |
*/ |
|
#ifdef CONFIG_PROC_FS |
|
static int sprintf_stats(char *buffer, struct net_device *dev) |
{ |
struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL); |
int size; |
|
if (stats) |
size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", |
dev->name, |
stats->rx_bytes, |
stats->rx_packets, stats->rx_errors, |
stats->rx_dropped + stats->rx_missed_errors, |
stats->rx_fifo_errors, |
stats->rx_length_errors + stats->rx_over_errors |
+ stats->rx_crc_errors + stats->rx_frame_errors, |
stats->rx_compressed, stats->multicast, |
stats->tx_bytes, |
stats->tx_packets, stats->tx_errors, stats->tx_dropped, |
stats->tx_fifo_errors, stats->collisions, |
stats->tx_carrier_errors + stats->tx_aborted_errors |
+ stats->tx_window_errors + stats->tx_heartbeat_errors, |
stats->tx_compressed); |
else |
size = sprintf(buffer, "%6s: No statistics available.\n", dev->name); |
|
return size; |
} |
|
/* |
* Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface |
* to create /proc/net/dev |
*/ |
|
static int dev_get_info(char *buffer, char **start, off_t offset, int length) |
{ |
int len = 0; |
off_t begin = 0; |
off_t pos = 0; |
int size; |
struct net_device *dev; |
|
|
size = sprintf(buffer, |
"Inter-| Receive | Transmit\n" |
" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n"); |
|
pos += size; |
len += size; |
|
|
read_lock(&dev_base_lock); |
for (dev = dev_base; dev != NULL; dev = dev->next) { |
size = sprintf_stats(buffer+len, dev); |
len += size; |
pos = begin + len; |
|
if (pos < offset) { |
len = 0; |
begin = pos; |
} |
if (pos > offset + length) |
break; |
} |
read_unlock(&dev_base_lock); |
|
*start = buffer + (offset - begin); /* Start of wanted data */ |
len -= (offset - begin); /* Start slop */ |
if (len > length) |
len = length; /* Ending slop */ |
if (len < 0) |
len = 0; |
return len; |
} |
|
static int dev_proc_stats(char *buffer, char **start, off_t offset, |
int length, int *eof, void *data) |
{ |
int i, lcpu; |
int len=0; |
|
for (lcpu=0; lcpu<smp_num_cpus; lcpu++) { |
i = cpu_logical_map(lcpu); |
len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
netdev_rx_stat[i].total, |
netdev_rx_stat[i].dropped, |
netdev_rx_stat[i].time_squeeze, |
netdev_rx_stat[i].throttled, |
netdev_rx_stat[i].fastroute_hit, |
netdev_rx_stat[i].fastroute_success, |
netdev_rx_stat[i].fastroute_defer, |
netdev_rx_stat[i].fastroute_deferred_out, |
#if 0 |
netdev_rx_stat[i].fastroute_latency_reduction |
#else |
netdev_rx_stat[i].cpu_collision |
#endif |
); |
} |
|
len -= offset; |
|
if (len > length) |
len = length; |
if (len < 0) |
len = 0; |
|
*start = buffer + offset; |
*eof = 1; |
|
return len; |
} |
|
#endif /* CONFIG_PROC_FS */ |
|
|
/** |
* netdev_set_master - set up master/slave pair |
* @slave: slave device |
* @master: new master device |
* |
* Changes the master device of the slave. Pass %NULL to break the |
* bonding. The caller must hold the RTNL semaphore. On a failure |
* a negative errno code is returned. On success the reference counts |
* are adjusted, %RTM_NEWLINK is sent to the routing socket and the |
* function returns zero. |
*/ |
|
int netdev_set_master(struct net_device *slave, struct net_device *master) |
{ |
struct net_device *old = slave->master; |
|
ASSERT_RTNL(); |
|
if (master) { |
if (old) |
return -EBUSY; |
dev_hold(master); |
} |
|
br_write_lock_bh(BR_NETPROTO_LOCK); |
slave->master = master; |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
|
if (old) |
dev_put(old); |
|
if (master) |
slave->flags |= IFF_SLAVE; |
else |
slave->flags &= ~IFF_SLAVE; |
|
rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
return 0; |
} |
|
/** |
* dev_set_promiscuity - update promiscuity count on a device |
* @dev: device |
* @inc: modifier |
* |
* Add or remove promsicuity from a device. While the count in the device |
* remains above zero the interface remains promiscuous. Once it hits zero |
* the device reverts back to normal filtering operation. A negative inc |
* value is used to drop promiscuity on the device. |
*/ |
|
void dev_set_promiscuity(struct net_device *dev, int inc) |
{ |
unsigned short old_flags = dev->flags; |
|
dev->flags |= IFF_PROMISC; |
if ((dev->promiscuity += inc) == 0) |
dev->flags &= ~IFF_PROMISC; |
if (dev->flags^old_flags) { |
#ifdef CONFIG_NET_FASTROUTE |
if (dev->flags&IFF_PROMISC) { |
netdev_fastroute_obstacles++; |
dev_clear_fastroute(dev); |
} else |
netdev_fastroute_obstacles--; |
#endif |
dev_mc_upload(dev); |
printk(KERN_INFO "device %s %s promiscuous mode\n", |
dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left"); |
} |
} |
|
/** |
* dev_set_allmulti - update allmulti count on a device |
* @dev: device |
* @inc: modifier |
* |
* Add or remove reception of all multicast frames to a device. While the |
* count in the device remains above zero the interface remains listening |
* to all interfaces. Once it hits zero the device reverts back to normal |
* filtering operation. A negative @inc value is used to drop the counter |
* when releasing a resource needing all multicasts. |
*/ |
|
void dev_set_allmulti(struct net_device *dev, int inc) |
{ |
unsigned short old_flags = dev->flags; |
|
dev->flags |= IFF_ALLMULTI; |
if ((dev->allmulti += inc) == 0) |
dev->flags &= ~IFF_ALLMULTI; |
if (dev->flags^old_flags) |
dev_mc_upload(dev); |
} |
|
int dev_change_flags(struct net_device *dev, unsigned flags) |
{ |
int ret; |
int old_flags = dev->flags; |
|
/* |
* Set the flags on our device. |
*/ |
|
dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| |
IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | |
(dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); |
|
/* |
* Load in the correct multicast list now the flags have changed. |
*/ |
|
dev_mc_upload(dev); |
|
/* |
* Have we downed the interface. We handle IFF_UP ourselves |
* according to user attempts to set it, rather than blindly |
* setting it. |
*/ |
|
ret = 0; |
if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ |
{ |
ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); |
|
if (ret == 0) |
dev_mc_upload(dev); |
} |
|
if (dev->flags&IFF_UP && |
((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) |
notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); |
|
if ((flags^dev->gflags)&IFF_PROMISC) { |
int inc = (flags&IFF_PROMISC) ? +1 : -1; |
dev->gflags ^= IFF_PROMISC; |
dev_set_promiscuity(dev, inc); |
} |
|
/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI |
is important. Some (broken) drivers set IFF_PROMISC, when |
IFF_ALLMULTI is requested not asking us and not reporting. |
*/ |
if ((flags^dev->gflags)&IFF_ALLMULTI) { |
int inc = (flags&IFF_ALLMULTI) ? +1 : -1; |
dev->gflags ^= IFF_ALLMULTI; |
dev_set_allmulti(dev, inc); |
} |
|
if (old_flags^dev->flags) |
rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); |
|
return ret; |
} |
|
/* |
* Perform the SIOCxIFxxx calls. |
*/ |
|
static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) |
{ |
struct net_device *dev; |
int err; |
|
if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) |
return -ENODEV; |
|
switch(cmd) |
{ |
case SIOCGIFFLAGS: /* Get interface flags */ |
ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) |
|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); |
if (netif_running(dev) && netif_carrier_ok(dev)) |
ifr->ifr_flags |= IFF_RUNNING; |
return 0; |
|
case SIOCSIFFLAGS: /* Set interface flags */ |
return dev_change_flags(dev, ifr->ifr_flags); |
|
case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ |
ifr->ifr_metric = 0; |
return 0; |
|
case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ |
return -EOPNOTSUPP; |
|
case SIOCGIFMTU: /* Get the MTU of a device */ |
ifr->ifr_mtu = dev->mtu; |
return 0; |
|
case SIOCSIFMTU: /* Set the MTU of a device */ |
if (ifr->ifr_mtu == dev->mtu) |
return 0; |
|
/* |
* MTU must be positive. |
*/ |
|
if (ifr->ifr_mtu<0) |
return -EINVAL; |
|
if (!netif_device_present(dev)) |
return -ENODEV; |
|
if (dev->change_mtu) |
err = dev->change_mtu(dev, ifr->ifr_mtu); |
else { |
dev->mtu = ifr->ifr_mtu; |
err = 0; |
} |
if (!err && dev->flags&IFF_UP) |
notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); |
return err; |
|
case SIOCGIFHWADDR: |
memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); |
ifr->ifr_hwaddr.sa_family=dev->type; |
return 0; |
|
case SIOCSIFHWADDR: |
if (dev->set_mac_address == NULL) |
return -EOPNOTSUPP; |
if (ifr->ifr_hwaddr.sa_family!=dev->type) |
return -EINVAL; |
if (!netif_device_present(dev)) |
return -ENODEV; |
err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); |
if (!err) |
notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); |
return err; |
|
case SIOCSIFHWBROADCAST: |
if (ifr->ifr_hwaddr.sa_family!=dev->type) |
return -EINVAL; |
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); |
notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); |
return 0; |
|
case SIOCGIFMAP: |
ifr->ifr_map.mem_start=dev->mem_start; |
ifr->ifr_map.mem_end=dev->mem_end; |
ifr->ifr_map.base_addr=dev->base_addr; |
ifr->ifr_map.irq=dev->irq; |
ifr->ifr_map.dma=dev->dma; |
ifr->ifr_map.port=dev->if_port; |
return 0; |
|
case SIOCSIFMAP: |
if (dev->set_config) { |
if (!netif_device_present(dev)) |
return -ENODEV; |
return dev->set_config(dev,&ifr->ifr_map); |
} |
return -EOPNOTSUPP; |
|
case SIOCADDMULTI: |
if (dev->set_multicast_list == NULL || |
ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
return -EINVAL; |
if (!netif_device_present(dev)) |
return -ENODEV; |
dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); |
return 0; |
|
case SIOCDELMULTI: |
if (dev->set_multicast_list == NULL || |
ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) |
return -EINVAL; |
if (!netif_device_present(dev)) |
return -ENODEV; |
dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); |
return 0; |
|
case SIOCGIFINDEX: |
ifr->ifr_ifindex = dev->ifindex; |
return 0; |
|
case SIOCGIFTXQLEN: |
ifr->ifr_qlen = dev->tx_queue_len; |
return 0; |
|
case SIOCSIFTXQLEN: |
if (ifr->ifr_qlen<0) |
return -EINVAL; |
dev->tx_queue_len = ifr->ifr_qlen; |
return 0; |
|
case SIOCSIFNAME: |
if (dev->flags&IFF_UP) |
return -EBUSY; |
if (__dev_get_by_name(ifr->ifr_newname)) |
return -EEXIST; |
memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ); |
dev->name[IFNAMSIZ-1] = 0; |
notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); |
return 0; |
|
/* |
* Unknown or private ioctl |
*/ |
|
default: |
if ((cmd >= SIOCDEVPRIVATE && |
cmd <= SIOCDEVPRIVATE + 15) || |
cmd == SIOCBONDENSLAVE || |
cmd == SIOCBONDRELEASE || |
cmd == SIOCBONDSETHWADDR || |
cmd == SIOCBONDSLAVEINFOQUERY || |
cmd == SIOCBONDINFOQUERY || |
cmd == SIOCBONDCHANGEACTIVE || |
cmd == SIOCGMIIPHY || |
cmd == SIOCGMIIREG || |
cmd == SIOCSMIIREG || |
cmd == SIOCWANDEV) { |
if (dev->do_ioctl) { |
if (!netif_device_present(dev)) |
return -ENODEV; |
return dev->do_ioctl(dev, ifr, cmd); |
} |
return -EOPNOTSUPP; |
} |
|
} |
return -EINVAL; |
} |
|
/* |
* This function handles all "interface"-type I/O control requests. The actual |
* 'doing' part of this is dev_ifsioc above. |
*/ |
|
/** |
* dev_ioctl - network device ioctl |
* @cmd: command to issue |
* @arg: pointer to a struct ifreq in user space |
* |
* Issue ioctl functions to devices. This is normally called by the |
* user space syscall interfaces but can sometimes be useful for |
* other purposes. The return value is the return from the syscall if |
* positive or a negative errno code on error. |
*/ |
|
int dev_ioctl(unsigned int cmd, void *arg) |
{ |
struct ifreq ifr; |
int ret; |
char *colon; |
|
/* One special case: SIOCGIFCONF takes ifconf argument |
and requires shared lock, because it sleeps writing |
to user space. |
*/ |
|
if (cmd == SIOCGIFCONF) { |
rtnl_shlock(); |
ret = dev_ifconf((char *) arg); |
rtnl_shunlock(); |
return ret; |
} |
if (cmd == SIOCGIFNAME) { |
return dev_ifname((struct ifreq *)arg); |
} |
|
if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
return -EFAULT; |
|
ifr.ifr_name[IFNAMSIZ-1] = 0; |
|
colon = strchr(ifr.ifr_name, ':'); |
if (colon) |
*colon = 0; |
|
/* |
* See which interface the caller is talking about. |
*/ |
|
switch(cmd) |
{ |
/* |
* These ioctl calls: |
* - can be done by all. |
* - atomic and do not require locking. |
* - return a value |
*/ |
|
case SIOCGIFFLAGS: |
case SIOCGIFMETRIC: |
case SIOCGIFMTU: |
case SIOCGIFHWADDR: |
case SIOCGIFSLAVE: |
case SIOCGIFMAP: |
case SIOCGIFINDEX: |
case SIOCGIFTXQLEN: |
dev_load(ifr.ifr_name); |
read_lock(&dev_base_lock); |
ret = dev_ifsioc(&ifr, cmd); |
read_unlock(&dev_base_lock); |
if (!ret) { |
if (colon) |
*colon = ':'; |
if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
return -EFAULT; |
} |
return ret; |
|
case SIOCETHTOOL: |
dev_load(ifr.ifr_name); |
rtnl_lock(); |
ret = dev_ethtool(&ifr); |
rtnl_unlock(); |
if (!ret) { |
if (colon) |
*colon = ':'; |
if (copy_to_user(arg, &ifr, |
sizeof(struct ifreq))) |
ret = -EFAULT; |
} |
return ret; |
|
/* |
* These ioctl calls: |
* - require superuser power. |
* - require strict serialization. |
* - return a value |
*/ |
|
case SIOCGMIIPHY: |
case SIOCGMIIREG: |
if (!capable(CAP_NET_ADMIN)) |
return -EPERM; |
dev_load(ifr.ifr_name); |
dev_probe_lock(); |
rtnl_lock(); |
ret = dev_ifsioc(&ifr, cmd); |
rtnl_unlock(); |
dev_probe_unlock(); |
if (!ret) { |
if (colon) |
*colon = ':'; |
if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
return -EFAULT; |
} |
return ret; |
|
/* |
* These ioctl calls: |
* - require superuser power. |
* - require strict serialization. |
* - do not return a value |
*/ |
|
case SIOCSIFFLAGS: |
case SIOCSIFMETRIC: |
case SIOCSIFMTU: |
case SIOCSIFMAP: |
case SIOCSIFHWADDR: |
case SIOCSIFSLAVE: |
case SIOCADDMULTI: |
case SIOCDELMULTI: |
case SIOCSIFHWBROADCAST: |
case SIOCSIFTXQLEN: |
case SIOCSIFNAME: |
case SIOCSMIIREG: |
case SIOCBONDENSLAVE: |
case SIOCBONDRELEASE: |
case SIOCBONDSETHWADDR: |
case SIOCBONDSLAVEINFOQUERY: |
case SIOCBONDINFOQUERY: |
case SIOCBONDCHANGEACTIVE: |
if (!capable(CAP_NET_ADMIN)) |
return -EPERM; |
dev_load(ifr.ifr_name); |
dev_probe_lock(); |
rtnl_lock(); |
ret = dev_ifsioc(&ifr, cmd); |
rtnl_unlock(); |
dev_probe_unlock(); |
return ret; |
|
case SIOCGIFMEM: |
/* Get the per device memory space. We can add this but currently |
do not support it */ |
case SIOCSIFMEM: |
/* Set the per device memory buffer space. Not applicable in our case */ |
case SIOCSIFLINK: |
return -EINVAL; |
|
/* |
* Unknown or private ioctl. |
*/ |
|
default: |
if (cmd == SIOCWANDEV || |
(cmd >= SIOCDEVPRIVATE && |
cmd <= SIOCDEVPRIVATE + 15)) { |
dev_load(ifr.ifr_name); |
dev_probe_lock(); |
rtnl_lock(); |
ret = dev_ifsioc(&ifr, cmd); |
rtnl_unlock(); |
dev_probe_unlock(); |
if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
return -EFAULT; |
return ret; |
} |
#ifdef WIRELESS_EXT |
/* Take care of Wireless Extensions */ |
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { |
/* If command is `set a parameter', or |
* `get the encoding parameters', check if |
* the user has the right to do it */ |
if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) { |
if(!capable(CAP_NET_ADMIN)) |
return -EPERM; |
} |
dev_load(ifr.ifr_name); |
rtnl_lock(); |
/* Follow me in net/core/wireless.c */ |
ret = wireless_process_ioctl(&ifr, cmd); |
rtnl_unlock(); |
if (!ret && IW_IS_GET(cmd) && |
copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
return -EFAULT; |
return ret; |
} |
#endif /* WIRELESS_EXT */ |
return -EINVAL; |
} |
} |
|
|
/** |
* dev_new_index - allocate an ifindex |
* |
* Returns a suitable unique value for a new device interface |
* number. The caller must hold the rtnl semaphore or the |
* dev_base_lock to be sure it remains unique. |
*/ |
|
int dev_new_index(void) |
{ |
static int ifindex; |
for (;;) { |
if (++ifindex <= 0) |
ifindex=1; |
if (__dev_get_by_index(ifindex) == NULL) |
return ifindex; |
} |
} |
|
static int dev_boot_phase = 1; |
|
/** |
* register_netdevice - register a network device |
* @dev: device to register |
* |
* Take a completed network device structure and add it to the kernel |
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
* chain. 0 is returned on success. A negative errno code is returned |
* on a failure to set up the device, or if the name is a duplicate. |
* |
* Callers must hold the rtnl semaphore. See the comment at the |
* end of Space.c for details about the locking. You may want |
* register_netdev() instead of this. |
* |
* BUGS: |
* The locking appears insufficient to guarantee two parallel registers |
* will not get the same name. |
*/ |
|
int net_dev_init(void); |
|
int register_netdevice(struct net_device *dev) |
{ |
struct net_device *d, **dp; |
#ifdef CONFIG_NET_DIVERT |
int ret; |
#endif |
|
spin_lock_init(&dev->queue_lock); |
spin_lock_init(&dev->xmit_lock); |
dev->xmit_lock_owner = -1; |
#ifdef CONFIG_NET_FASTROUTE |
dev->fastpath_lock=RW_LOCK_UNLOCKED; |
#endif |
|
if (dev_boot_phase) |
net_dev_init(); |
|
#ifdef CONFIG_NET_DIVERT |
ret = alloc_divert_blk(dev); |
if (ret) |
return ret; |
#endif /* CONFIG_NET_DIVERT */ |
|
dev->iflink = -1; |
|
/* Init, if this function is available */ |
if (dev->init && dev->init(dev) != 0) { |
#ifdef CONFIG_NET_DIVERT |
free_divert_blk(dev); |
#endif |
return -EIO; |
} |
|
dev->ifindex = dev_new_index(); |
if (dev->iflink == -1) |
dev->iflink = dev->ifindex; |
|
/* Check for existence, and append to tail of chain */ |
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { |
if (d == dev || strcmp(d->name, dev->name) == 0) { |
#ifdef CONFIG_NET_DIVERT |
free_divert_blk(dev); |
#endif |
return -EEXIST; |
} |
} |
|
/* Fix illegal SG+CSUM combinations. */ |
if ((dev->features & NETIF_F_SG) && |
!(dev->features & (NETIF_F_IP_CSUM | |
NETIF_F_NO_CSUM | |
NETIF_F_HW_CSUM))) { |
printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", |
dev->name); |
dev->features &= ~NETIF_F_SG; |
} |
|
/* |
* nil rebuild_header routine, |
* that should be never called and used as just bug trap. |
*/ |
|
if (dev->rebuild_header == NULL) |
dev->rebuild_header = default_rebuild_header; |
|
/* |
* Default initial state at registry is that the |
* device is present. |
*/ |
|
set_bit(__LINK_STATE_PRESENT, &dev->state); |
|
dev->next = NULL; |
dev_init_scheduler(dev); |
write_lock_bh(&dev_base_lock); |
*dp = dev; |
dev_hold(dev); |
dev->deadbeaf = 0; |
write_unlock_bh(&dev_base_lock); |
|
/* Notify protocols, that a new device appeared. */ |
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); |
|
net_run_sbin_hotplug(dev, "register"); |
|
return 0; |
} |
|
/** |
* netdev_finish_unregister - complete unregistration |
* @dev: device |
* |
* Destroy and free a dead device. A value of zero is returned on |
* success. |
*/ |
|
int netdev_finish_unregister(struct net_device *dev) |
{ |
BUG_TRAP(dev->ip_ptr==NULL); |
BUG_TRAP(dev->ip6_ptr==NULL); |
BUG_TRAP(dev->dn_ptr==NULL); |
|
if (!dev->deadbeaf) { |
printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name); |
return 0; |
} |
#ifdef NET_REFCNT_DEBUG |
printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, |
(dev->features & NETIF_F_DYNALLOC)?"":", old style"); |
#endif |
if (dev->destructor) |
dev->destructor(dev); |
if (dev->features & NETIF_F_DYNALLOC) |
kfree(dev); |
return 0; |
} |
|
/** |
* unregister_netdevice - remove device from the kernel |
* @dev: device |
* |
* This function shuts down a device interface and removes it |
* from the kernel tables. On success 0 is returned, on a failure |
* a negative errno code is returned. |
* |
* Callers must hold the rtnl semaphore. See the comment at the |
* end of Space.c for details about the locking. You may want |
* unregister_netdev() instead of this. |
*/ |
|
int unregister_netdevice(struct net_device *dev) |
{ |
unsigned long now, warning_time; |
struct net_device *d, **dp; |
|
/* If device is running, close it first. */ |
if (dev->flags & IFF_UP) |
dev_close(dev); |
|
BUG_TRAP(dev->deadbeaf==0); |
dev->deadbeaf = 1; |
|
/* And unlink it from device chain. */ |
for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { |
if (d == dev) { |
write_lock_bh(&dev_base_lock); |
*dp = d->next; |
write_unlock_bh(&dev_base_lock); |
break; |
} |
} |
if (d == NULL) { |
printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev); |
return -ENODEV; |
} |
|
/* Synchronize to net_rx_action. */ |
br_write_lock_bh(BR_NETPROTO_LOCK); |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
|
if (dev_boot_phase == 0) { |
#ifdef CONFIG_NET_FASTROUTE |
dev_clear_fastroute(dev); |
#endif |
|
/* Shutdown queueing discipline. */ |
dev_shutdown(dev); |
|
net_run_sbin_hotplug(dev, "unregister"); |
|
/* Notify protocols, that we are about to destroy |
this device. They should clean all the things. |
*/ |
notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); |
|
/* |
* Flush the multicast chain |
*/ |
dev_mc_discard(dev); |
} |
|
if (dev->uninit) |
dev->uninit(dev); |
|
/* Notifier chain MUST detach us from master device. */ |
BUG_TRAP(dev->master==NULL); |
|
#ifdef CONFIG_NET_DIVERT |
free_divert_blk(dev); |
#endif |
|
if (dev->features & NETIF_F_DYNALLOC) { |
#ifdef NET_REFCNT_DEBUG |
if (atomic_read(&dev->refcnt) != 1) |
printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1); |
#endif |
dev_put(dev); |
return 0; |
} |
|
/* Last reference is our one */ |
if (atomic_read(&dev->refcnt) == 1) { |
dev_put(dev); |
return 0; |
} |
|
#ifdef NET_REFCNT_DEBUG |
printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)); |
#endif |
|
/* EXPLANATION. If dev->refcnt is not now 1 (our own reference) |
it means that someone in the kernel still has a reference |
to this device and we cannot release it. |
|
"New style" devices have destructors, hence we can return from this |
function and destructor will do all the work later. As of kernel 2.4.0 |
there are very few "New Style" devices. |
|
"Old style" devices expect that the device is free of any references |
upon exit from this function. |
We cannot return from this function until all such references have |
fallen away. This is because the caller of this function will probably |
immediately kfree(*dev) and then be unloaded via sys_delete_module. |
|
So, we linger until all references fall away. The duration of the |
linger is basically unbounded! It is driven by, for example, the |
current setting of sysctl_ipfrag_time. |
|
After 1 second, we start to rebroadcast unregister notifications |
in hope that careless clients will release the device. |
|
*/ |
|
now = warning_time = jiffies; |
while (atomic_read(&dev->refcnt) != 1) { |
if ((jiffies - now) > 1*HZ) { |
/* Rebroadcast unregister notification */ |
notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); |
} |
current->state = TASK_INTERRUPTIBLE; |
schedule_timeout(HZ/4); |
current->state = TASK_RUNNING; |
if ((jiffies - warning_time) > 10*HZ) { |
printk(KERN_EMERG "unregister_netdevice: waiting for %s to " |
"become free. Usage count = %d\n", |
dev->name, atomic_read(&dev->refcnt)); |
warning_time = jiffies; |
} |
} |
dev_put(dev); |
return 0; |
} |
|
|
/* |
* Initialize the DEV module. At boot time this walks the device list and |
* unhooks any devices that fail to initialise (normally hardware not |
* present) and leaves us with a valid list of present and active devices. |
* |
*/ |
|
extern void net_device_init(void); |
extern void ip_auto_config(void); |
struct proc_dir_entry *proc_net_drivers; |
#ifdef CONFIG_NET_DIVERT |
extern void dv_init(void); |
#endif /* CONFIG_NET_DIVERT */ |
|
|
/* |
* Callers must hold the rtnl semaphore. See the comment at the |
* end of Space.c for details about the locking. |
*/ |
int __init net_dev_init(void) |
{ |
struct net_device *dev, **dp; |
int i; |
|
if (!dev_boot_phase) |
return 0; |
|
|
#ifdef CONFIG_NET_DIVERT |
dv_init(); |
#endif /* CONFIG_NET_DIVERT */ |
|
/* |
* Initialise the packet receive queues. |
*/ |
|
for (i = 0; i < NR_CPUS; i++) { |
struct softnet_data *queue; |
|
queue = &softnet_data[i]; |
skb_queue_head_init(&queue->input_pkt_queue); |
queue->throttle = 0; |
queue->cng_level = 0; |
queue->avg_blog = 10; /* arbitrary non-zero */ |
queue->completion_queue = NULL; |
INIT_LIST_HEAD(&queue->poll_list); |
set_bit(__LINK_STATE_START, &queue->blog_dev.state); |
queue->blog_dev.weight = weight_p; |
queue->blog_dev.poll = process_backlog; |
atomic_set(&queue->blog_dev.refcnt, 1); |
} |
|
#ifdef CONFIG_NET_PROFILE |
net_profile_init(); |
NET_PROFILE_REGISTER(dev_queue_xmit); |
NET_PROFILE_REGISTER(softnet_process); |
#endif |
|
#ifdef OFFLINE_SAMPLE |
samp_timer.expires = jiffies + (10 * HZ); |
add_timer(&samp_timer); |
#endif |
|
/* |
* Add the devices. |
* If the call to dev->init fails, the dev is removed |
* from the chain disconnecting the device until the |
* next reboot. |
* |
* NB At boot phase networking is dead. No locking is required. |
* But we still preserve dev_base_lock for sanity. |
*/ |
|
dp = &dev_base; |
while ((dev = *dp) != NULL) { |
spin_lock_init(&dev->queue_lock); |
spin_lock_init(&dev->xmit_lock); |
#ifdef CONFIG_NET_FASTROUTE |
dev->fastpath_lock = RW_LOCK_UNLOCKED; |
#endif |
dev->xmit_lock_owner = -1; |
dev->iflink = -1; |
dev_hold(dev); |
|
/* |
* Allocate name. If the init() fails |
* the name will be reissued correctly. |
*/ |
if (strchr(dev->name, '%')) |
dev_alloc_name(dev, dev->name); |
|
/* |
* Check boot time settings for the device. |
*/ |
netdev_boot_setup_check(dev); |
|
if (dev->init && dev->init(dev)) { |
/* |
* It failed to come up. It will be unhooked later. |
* dev_alloc_name can now advance to next suitable |
* name that is checked next. |
*/ |
dev->deadbeaf = 1; |
dp = &dev->next; |
} else { |
dp = &dev->next; |
dev->ifindex = dev_new_index(); |
if (dev->iflink == -1) |
dev->iflink = dev->ifindex; |
if (dev->rebuild_header == NULL) |
dev->rebuild_header = default_rebuild_header; |
dev_init_scheduler(dev); |
set_bit(__LINK_STATE_PRESENT, &dev->state); |
} |
} |
|
/* |
* Unhook devices that failed to come up |
*/ |
dp = &dev_base; |
while ((dev = *dp) != NULL) { |
if (dev->deadbeaf) { |
write_lock_bh(&dev_base_lock); |
*dp = dev->next; |
write_unlock_bh(&dev_base_lock); |
dev_put(dev); |
} else { |
dp = &dev->next; |
} |
} |
|
#ifdef CONFIG_PROC_FS |
proc_net_create("dev", 0, dev_get_info); |
create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL); |
proc_net_drivers = proc_mkdir("net/drivers", 0); |
#ifdef WIRELESS_EXT |
/* Available in net/core/wireless.c */ |
proc_net_create("wireless", 0, dev_get_wireless_info); |
#endif /* WIRELESS_EXT */ |
#endif /* CONFIG_PROC_FS */ |
|
dev_boot_phase = 0; |
|
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); |
open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); |
|
dst_init(); |
dev_mcast_init(); |
|
#ifdef CONFIG_NET_SCHED |
pktsched_init(); |
#endif |
/* |
* Initialise network devices |
*/ |
|
net_device_init(); |
|
return 0; |
} |
|
#ifdef CONFIG_HOTPLUG |
|
/* Notify userspace when a netdevice event occurs, |
* by running '/sbin/hotplug net' with certain |
* environment variables set. |
*/ |
|
static int net_run_sbin_hotplug(struct net_device *dev, char *action) |
{ |
char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32]; |
int i; |
|
sprintf(ifname, "INTERFACE=%s", dev->name); |
sprintf(action_str, "ACTION=%s", action); |
|
i = 0; |
argv[i++] = hotplug_path; |
argv[i++] = "net"; |
argv[i] = 0; |
|
i = 0; |
/* minimal command environment */ |
envp [i++] = "HOME=/"; |
envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; |
envp [i++] = ifname; |
envp [i++] = action_str; |
envp [i] = 0; |
|
return call_usermodehelper(argv [0], argv, envp); |
} |
#endif |
/datagram.c
0,0 → 1,448
/* |
* SUCS NET3: |
* |
* Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top |
* of these would make sense. Not tonight however 8-). |
* This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly |
* identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it. |
* |
* Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code) |
* |
* Fixes: |
* Alan Cox : NULL return from skb_peek_copy() understood |
* Alan Cox : Rewrote skb_read_datagram to avoid the skb_peek_copy stuff. |
* Alan Cox : Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but |
* AX.25 now works right, and SPX is feasible. |
* Alan Cox : Fixed write poll of non IP protocol crash. |
* Florian La Roche: Changed for my new skbuff handling. |
* Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. |
* Linus Torvalds : BSD semantic fixes. |
* Alan Cox : Datagram iovec handling |
* Darryl Miles : Fixed non-blocking SOCK_STREAM. |
* Alan Cox : POSIXisms |
* Pete Wyckoff : Unconnected accept() fix. |
* |
*/ |
|
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <linux/mm.h> |
#include <linux/interrupt.h> |
#include <linux/errno.h> |
#include <linux/sched.h> |
#include <linux/inet.h> |
#include <linux/netdevice.h> |
#include <linux/rtnetlink.h> |
#include <linux/poll.h> |
#include <linux/highmem.h> |
|
#include <net/protocol.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <net/checksum.h> |
|
|
/* |
* Is a socket 'connection oriented' ? |
*/ |
|
static inline int connection_based(struct sock *sk) |
{ |
return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM); |
} |
|
|
/* |
* Wait for a packet.. |
*/ |
|
static int wait_for_packet(struct sock * sk, int *err, long *timeo_p) |
{ |
int error; |
|
DECLARE_WAITQUEUE(wait, current); |
|
__set_current_state(TASK_INTERRUPTIBLE); |
add_wait_queue_exclusive(sk->sleep, &wait); |
|
/* Socket errors? */ |
error = sock_error(sk); |
if (error) |
goto out_err; |
|
if (!skb_queue_empty(&sk->receive_queue)) |
goto ready; |
|
/* Socket shut down? */ |
if (sk->shutdown & RCV_SHUTDOWN) |
goto out_noerr; |
|
/* Sequenced packets can come disconnected. If so we report the problem */ |
error = -ENOTCONN; |
if(connection_based(sk) && !(sk->state==TCP_ESTABLISHED || sk->state==TCP_LISTEN)) |
goto out_err; |
|
/* handle signals */ |
if (signal_pending(current)) |
goto interrupted; |
|
*timeo_p = schedule_timeout(*timeo_p); |
|
ready: |
current->state = TASK_RUNNING; |
remove_wait_queue(sk->sleep, &wait); |
return 0; |
|
interrupted: |
error = sock_intr_errno(*timeo_p); |
out_err: |
*err = error; |
out: |
current->state = TASK_RUNNING; |
remove_wait_queue(sk->sleep, &wait); |
return error; |
out_noerr: |
*err = 0; |
error = 1; |
goto out; |
} |
|
/* |
* Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible |
* races. This replaces identical code in packet,raw and udp, as well as the IPX |
* AX.25 and Appletalk. It also finally fixes the long standing peek and read |
* race for datagram sockets. If you alter this routine remember it must be |
* re-entrant. |
* |
* This function will lock the socket if a skb is returned, so the caller |
* needs to unlock the socket in that case (usually by calling skb_free_datagram) |
* |
* * It does not lock socket since today. This function is |
* * free of race conditions. This measure should/can improve |
* * significantly datagram socket latencies at high loads, |
* * when data copying to user space takes lots of time. |
* * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet |
* * 8) Great win.) |
* * --ANK (980729) |
* |
* The order of the tests when we find no data waiting are specified |
* quite explicitly by POSIX 1003.1g, don't change them without having |
* the standard around please. |
*/ |
|
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) |
{ |
int error; |
struct sk_buff *skb; |
long timeo; |
|
/* Caller is allowed not to check sk->err before skb_recv_datagram() */ |
error = sock_error(sk); |
if (error) |
goto no_packet; |
|
timeo = sock_rcvtimeo(sk, noblock); |
|
do { |
/* Again only user level code calls this function, so nothing interrupt level |
will suddenly eat the receive_queue. |
|
Look at current nfs client by the way... |
However, this function was corrent in any case. 8) |
*/ |
if (flags & MSG_PEEK) |
{ |
unsigned long cpu_flags; |
|
spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags); |
skb = skb_peek(&sk->receive_queue); |
if(skb!=NULL) |
atomic_inc(&skb->users); |
spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags); |
} else |
skb = skb_dequeue(&sk->receive_queue); |
|
if (skb) |
return skb; |
|
/* User doesn't want to wait */ |
error = -EAGAIN; |
if (!timeo) |
goto no_packet; |
|
} while (wait_for_packet(sk, err, &timeo) == 0); |
|
return NULL; |
|
no_packet: |
*err = error; |
return NULL; |
} |
|
void skb_free_datagram(struct sock * sk, struct sk_buff *skb) |
{ |
kfree_skb(skb); |
} |
|
/* |
* Copy a datagram to a linear buffer. |
*/ |
|
int skb_copy_datagram(const struct sk_buff *skb, int offset, char *to, int size) |
{ |
struct iovec iov = { to, size }; |
|
return skb_copy_datagram_iovec(skb, offset, &iov, size); |
} |
|
/* |
* Copy a datagram to an iovec. |
* Note: the iovec is modified during the copy. |
*/ |
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, |
int len) |
{ |
int i, copy; |
int start = skb->len - skb->data_len; |
|
/* Copy header. */ |
if ((copy = start-offset) > 0) { |
if (copy > len) |
copy = len; |
if (memcpy_toiovec(to, skb->data + offset, copy)) |
goto fault; |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
} |
|
/* Copy paged appendix. Hmm... why does this look so complicated? */ |
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + skb_shinfo(skb)->frags[i].size; |
if ((copy = end-offset) > 0) { |
int err; |
u8 *vaddr; |
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
struct page *page = frag->page; |
|
if (copy > len) |
copy = len; |
vaddr = kmap(page); |
err = memcpy_toiovec(to, vaddr + frag->page_offset + |
offset-start, copy); |
kunmap(page); |
if (err) |
goto fault; |
if (!(len -= copy)) |
return 0; |
offset += copy; |
} |
start = end; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + list->len; |
if ((copy = end-offset) > 0) { |
if (copy > len) |
copy = len; |
if (skb_copy_datagram_iovec(list, offset-start, to, copy)) |
goto fault; |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
} |
start = end; |
} |
} |
if (len == 0) |
return 0; |
|
fault: |
return -EFAULT; |
} |
|
int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump) |
{ |
int i, copy; |
int start = skb->len - skb->data_len; |
int pos = 0; |
|
/* Copy header. */ |
if ((copy = start-offset) > 0) { |
int err = 0; |
if (copy > len) |
copy = len; |
*csump = csum_and_copy_to_user(skb->data+offset, to, copy, *csump, &err); |
if (err) |
goto fault; |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
to += copy; |
pos = copy; |
} |
|
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + skb_shinfo(skb)->frags[i].size; |
if ((copy = end-offset) > 0) { |
unsigned int csum2; |
int err = 0; |
u8 *vaddr; |
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
struct page *page = frag->page; |
|
if (copy > len) |
copy = len; |
vaddr = kmap(page); |
csum2 = csum_and_copy_to_user(vaddr + frag->page_offset + |
offset-start, to, copy, 0, &err); |
kunmap(page); |
if (err) |
goto fault; |
*csump = csum_block_add(*csump, csum2, pos); |
if (!(len -= copy)) |
return 0; |
offset += copy; |
to += copy; |
pos += copy; |
} |
start = end; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + list->len; |
if ((copy = end-offset) > 0) { |
unsigned int csum2 = 0; |
if (copy > len) |
copy = len; |
if (skb_copy_and_csum_datagram(list, offset-start, to, copy, &csum2)) |
goto fault; |
*csump = csum_block_add(*csump, csum2, pos); |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
to += copy; |
pos += copy; |
} |
start = end; |
} |
} |
if (len == 0) |
return 0; |
|
fault: |
return -EFAULT; |
} |
|
/* Copy and checkum skb to user iovec. Caller _must_ check that |
skb will fit to this iovec. |
|
Returns: 0 - success. |
-EINVAL - checksum failure. |
-EFAULT - fault during copy. Beware, in this case iovec can be |
modified! |
*/ |
|
int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov) |
{ |
unsigned int csum; |
int chunk = skb->len - hlen; |
|
/* Skip filled elements. Pretty silly, look at memcpy_toiovec, though 8) */ |
while (iov->iov_len == 0) |
iov++; |
|
if (iov->iov_len < chunk) { |
if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk+hlen, skb->csum))) |
goto csum_error; |
if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) |
goto fault; |
} else { |
csum = csum_partial(skb->data, hlen, skb->csum); |
if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum)) |
goto fault; |
if ((unsigned short)csum_fold(csum)) |
goto csum_error; |
iov->iov_len -= chunk; |
iov->iov_base += chunk; |
} |
return 0; |
|
csum_error: |
return -EINVAL; |
|
fault: |
return -EFAULT; |
} |
|
|
|
/* |
* Datagram poll: Again totally generic. This also handles |
* sequenced packet sockets providing the socket receive queue |
* is only ever holding data ready to receive. |
* |
* Note: when you _don't_ use this routine for this protocol, |
* and you use a different write policy from sock_writeable() |
* then please supply your own write_space callback. |
*/ |
|
unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait) |
{ |
struct sock *sk = sock->sk; |
unsigned int mask; |
|
poll_wait(file, sk->sleep, wait); |
mask = 0; |
|
/* exceptional events? */ |
if (sk->err || !skb_queue_empty(&sk->error_queue)) |
mask |= POLLERR; |
if (sk->shutdown == SHUTDOWN_MASK) |
mask |= POLLHUP; |
|
/* readable? */ |
if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN)) |
mask |= POLLIN | POLLRDNORM; |
|
/* Connection-based need to check for termination and startup */ |
if (connection_based(sk)) { |
if (sk->state==TCP_CLOSE) |
mask |= POLLHUP; |
/* connection hasn't started yet? */ |
if (sk->state == TCP_SYN_SENT) |
return mask; |
} |
|
/* writable? */ |
if (sock_writeable(sk)) |
mask |= POLLOUT | POLLWRNORM | POLLWRBAND; |
else |
set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); |
|
return mask; |
} |
/skbuff.c
0,0 → 1,1238
/* |
* Routines having to do with the 'struct sk_buff' memory handlers. |
* |
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk> |
* Florian La Roche <rzsfl@rz.uni-sb.de> |
* |
* Version: $Id: skbuff.c,v 1.1.1.1 2004-04-17 22:13:13 phoenix Exp $ |
* |
* Fixes: |
* Alan Cox : Fixed the worst of the load balancer bugs. |
* Dave Platt : Interrupt stacking fix. |
* Richard Kooijman : Timestamp fixes. |
* Alan Cox : Changed buffer format. |
* Alan Cox : destructor hook for AF_UNIX etc. |
* Linus Torvalds : Better skb_clone. |
* Alan Cox : Added skb_copy. |
* Alan Cox : Added all the changed routines Linus |
* only put in the headers |
* Ray VanTassle : Fixed --skb->lock in free |
* Alan Cox : skb_copy copy arp field |
* Andi Kleen : slabified it. |
* |
* NOTE: |
* The __skb_ routines should be called with interrupts |
* disabled, or you better be *real* sure that the operation is atomic |
* with respect to whatever list is being frobbed (e.g. via lock_sock() |
* or via disabling bottom half handlers, etc). |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
*/ |
|
/* |
* The functions in this file will not compile correctly with gcc 2.4.x |
*/ |
|
#include <linux/config.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/interrupt.h> |
#include <linux/in.h> |
#include <linux/inet.h> |
#include <linux/slab.h> |
#include <linux/netdevice.h> |
#include <linux/string.h> |
#include <linux/skbuff.h> |
#include <linux/cache.h> |
#include <linux/rtnetlink.h> |
#include <linux/init.h> |
#include <linux/highmem.h> |
|
#include <net/protocol.h> |
#include <net/dst.h> |
#include <net/sock.h> |
#include <net/checksum.h> |
|
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
int sysctl_hot_list_len = 128; |
|
static kmem_cache_t *skbuff_head_cache; |
|
static union { |
struct sk_buff_head list; |
char pad[SMP_CACHE_BYTES]; |
} skb_head_pool[NR_CPUS]; |
|
/* |
* Keep out-of-line to prevent kernel bloat. |
* __builtin_return_address is not used because it is not always |
* reliable. |
*/ |
|
/** |
* skb_over_panic - private function |
* @skb: buffer |
* @sz: size |
* @here: address |
* |
* Out of line support code for skb_put(). Not user callable. |
*/ |
|
void skb_over_panic(struct sk_buff *skb, int sz, void *here) |
{ |
printk("skput:over: %p:%d put:%d dev:%s", |
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); |
BUG(); |
} |
|
/** |
* skb_under_panic - private function |
* @skb: buffer |
* @sz: size |
* @here: address |
* |
* Out of line support code for skb_push(). Not user callable. |
*/ |
|
|
void skb_under_panic(struct sk_buff *skb, int sz, void *here) |
{ |
printk("skput:under: %p:%d put:%d dev:%s", |
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); |
BUG(); |
} |
|
static __inline__ struct sk_buff *skb_head_from_pool(void) |
{ |
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; |
|
if (skb_queue_len(list)) { |
struct sk_buff *skb; |
unsigned long flags; |
|
local_irq_save(flags); |
skb = __skb_dequeue(list); |
local_irq_restore(flags); |
return skb; |
} |
return NULL; |
} |
|
static __inline__ void skb_head_to_pool(struct sk_buff *skb) |
{ |
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; |
|
if (skb_queue_len(list) < sysctl_hot_list_len) { |
unsigned long flags; |
|
local_irq_save(flags); |
__skb_queue_head(list, skb); |
local_irq_restore(flags); |
|
return; |
} |
kmem_cache_free(skbuff_head_cache, skb); |
} |
|
|
/* Allocate a new skbuff. We do this ourselves so we can fill in a few |
* 'private' fields and also do memory statistics to find all the |
* [BEEP] leaks. |
* |
*/ |
|
/** |
* alloc_skb - allocate a network buffer |
* @size: size to allocate |
* @gfp_mask: allocation mask |
* |
* Allocate a new &sk_buff. The returned buffer has no headroom and a |
* tail room of size bytes. The object has a reference count of one. |
* The return is the buffer. On a failure the return is %NULL. |
* |
* Buffers may only be allocated from interrupts using a @gfp_mask of |
* %GFP_ATOMIC. |
*/ |
|
struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) |
{ |
struct sk_buff *skb; |
u8 *data; |
|
if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { |
static int count = 0; |
if (++count < 5) { |
printk(KERN_ERR "alloc_skb called nonatomically " |
"from interrupt %p\n", NET_CALLER(size)); |
BUG(); |
} |
gfp_mask &= ~__GFP_WAIT; |
} |
|
/* Get the HEAD */ |
skb = skb_head_from_pool(); |
if (skb == NULL) { |
skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); |
if (skb == NULL) |
goto nohead; |
} |
|
/* Get the DATA. Size must match skb_add_mtu(). */ |
size = SKB_DATA_ALIGN(size); |
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); |
if (data == NULL) |
goto nodata; |
|
/* XXX: does not include slab overhead */ |
skb->truesize = size + sizeof(struct sk_buff); |
|
/* Load the data pointers. */ |
skb->head = data; |
skb->data = data; |
skb->tail = data; |
skb->end = data + size; |
|
/* Set up other state */ |
skb->len = 0; |
skb->cloned = 0; |
skb->data_len = 0; |
|
atomic_set(&skb->users, 1); |
atomic_set(&(skb_shinfo(skb)->dataref), 1); |
skb_shinfo(skb)->nr_frags = 0; |
skb_shinfo(skb)->frag_list = NULL; |
return skb; |
|
nodata: |
skb_head_to_pool(skb); |
nohead: |
return NULL; |
} |
|
|
/* |
* Slab constructor for a skb head. |
*/ |
static inline void skb_headerinit(void *p, kmem_cache_t *cache, |
unsigned long flags) |
{ |
struct sk_buff *skb = p; |
|
skb->next = NULL; |
skb->prev = NULL; |
skb->list = NULL; |
skb->sk = NULL; |
skb->stamp.tv_sec=0; /* No idea about time */ |
skb->dev = NULL; |
skb->real_dev = NULL; |
skb->dst = NULL; |
memset(skb->cb, 0, sizeof(skb->cb)); |
skb->pkt_type = PACKET_HOST; /* Default type */ |
skb->ip_summed = 0; |
skb->priority = 0; |
skb->security = 0; /* By default packets are insecure */ |
skb->destructor = NULL; |
|
#ifdef CONFIG_NETFILTER |
skb->nfmark = skb->nfcache = 0; |
skb->nfct = NULL; |
#ifdef CONFIG_NETFILTER_DEBUG |
skb->nf_debug = 0; |
#endif |
#endif |
#ifdef CONFIG_NET_SCHED |
skb->tc_index = 0; |
#endif |
} |
|
static void skb_drop_fraglist(struct sk_buff *skb) |
{ |
struct sk_buff *list = skb_shinfo(skb)->frag_list; |
|
skb_shinfo(skb)->frag_list = NULL; |
|
do { |
struct sk_buff *this = list; |
list = list->next; |
kfree_skb(this); |
} while (list); |
} |
|
static void skb_clone_fraglist(struct sk_buff *skb) |
{ |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) |
skb_get(list); |
} |
|
static void skb_release_data(struct sk_buff *skb) |
{ |
if (!skb->cloned || |
atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { |
if (skb_shinfo(skb)->nr_frags) { |
int i; |
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
put_page(skb_shinfo(skb)->frags[i].page); |
} |
|
if (skb_shinfo(skb)->frag_list) |
skb_drop_fraglist(skb); |
|
kfree(skb->head); |
} |
} |
|
/* |
* Free an skbuff by memory without cleaning the state. |
*/ |
void kfree_skbmem(struct sk_buff *skb) |
{ |
skb_release_data(skb); |
skb_head_to_pool(skb); |
} |
|
/** |
* __kfree_skb - private function |
* @skb: buffer |
* |
* Free an sk_buff. Release anything attached to the buffer. |
* Clean the state. This is an internal helper function. Users should |
* always call kfree_skb |
*/ |
|
void __kfree_skb(struct sk_buff *skb) |
{ |
if (skb->list) { |
printk(KERN_WARNING "Warning: kfree_skb passed an skb still " |
"on a list (from %p).\n", NET_CALLER(skb)); |
BUG(); |
} |
|
dst_release(skb->dst); |
if(skb->destructor) { |
if (in_irq()) { |
printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", |
NET_CALLER(skb)); |
} |
skb->destructor(skb); |
} |
#ifdef CONFIG_NETFILTER |
nf_conntrack_put(skb->nfct); |
#endif |
skb_headerinit(skb, NULL, 0); /* clean state */ |
kfree_skbmem(skb); |
} |
|
/** |
* skb_clone - duplicate an sk_buff |
* @skb: buffer to clone |
* @gfp_mask: allocation priority |
* |
* Duplicate an &sk_buff. The new one is not owned by a socket. Both |
* copies share the same packet data but not structure. The new |
* buffer has a reference count of 1. If the allocation fails the |
* function returns %NULL otherwise the new buffer is returned. |
* |
* If this function is called from an interrupt gfp_mask() must be |
* %GFP_ATOMIC. |
*/ |
|
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) |
{ |
struct sk_buff *n; |
|
n = skb_head_from_pool(); |
if (!n) { |
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
if (!n) |
return NULL; |
} |
|
#define C(x) n->x = skb->x |
|
n->next = n->prev = NULL; |
n->list = NULL; |
n->sk = NULL; |
C(stamp); |
C(dev); |
C(real_dev); |
C(h); |
C(nh); |
C(mac); |
C(dst); |
dst_clone(n->dst); |
memcpy(n->cb, skb->cb, sizeof(skb->cb)); |
C(len); |
C(data_len); |
C(csum); |
n->cloned = 1; |
C(pkt_type); |
C(ip_summed); |
C(priority); |
atomic_set(&n->users, 1); |
C(protocol); |
C(security); |
C(truesize); |
C(head); |
C(data); |
C(tail); |
C(end); |
n->destructor = NULL; |
#ifdef CONFIG_NETFILTER |
C(nfmark); |
C(nfcache); |
C(nfct); |
#ifdef CONFIG_NETFILTER_DEBUG |
C(nf_debug); |
#endif |
#endif /*CONFIG_NETFILTER*/ |
#if defined(CONFIG_HIPPI) |
C(private); |
#endif |
#ifdef CONFIG_NET_SCHED |
C(tc_index); |
#endif |
|
atomic_inc(&(skb_shinfo(skb)->dataref)); |
skb->cloned = 1; |
#ifdef CONFIG_NETFILTER |
nf_conntrack_get(skb->nfct); |
#endif |
return n; |
} |
|
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) |
{ |
/* |
* Shift between the two data areas in bytes |
*/ |
unsigned long offset = new->data - old->data; |
|
new->list=NULL; |
new->sk=NULL; |
new->dev=old->dev; |
new->real_dev=old->real_dev; |
new->priority=old->priority; |
new->protocol=old->protocol; |
new->dst=dst_clone(old->dst); |
new->h.raw=old->h.raw+offset; |
new->nh.raw=old->nh.raw+offset; |
new->mac.raw=old->mac.raw+offset; |
memcpy(new->cb, old->cb, sizeof(old->cb)); |
atomic_set(&new->users, 1); |
new->pkt_type=old->pkt_type; |
new->stamp=old->stamp; |
new->destructor = NULL; |
new->security=old->security; |
#ifdef CONFIG_NETFILTER |
new->nfmark=old->nfmark; |
new->nfcache=old->nfcache; |
new->nfct=old->nfct; |
nf_conntrack_get(new->nfct); |
#ifdef CONFIG_NETFILTER_DEBUG |
new->nf_debug=old->nf_debug; |
#endif |
#endif |
#ifdef CONFIG_NET_SCHED |
new->tc_index = old->tc_index; |
#endif |
} |
|
/** |
* skb_copy - create private copy of an sk_buff |
* @skb: buffer to copy |
* @gfp_mask: allocation priority |
* |
* Make a copy of both an &sk_buff and its data. This is used when the |
* caller wishes to modify the data and needs a private copy of the |
* data to alter. Returns %NULL on failure or the pointer to the buffer |
* on success. The returned buffer has a reference count of 1. |
* |
* As by-product this function converts non-linear &sk_buff to linear |
* one, so that &sk_buff becomes completely private and caller is allowed |
* to modify all the data of returned buffer. This means that this |
* function is not recommended for use in circumstances when only |
* header is going to be modified. Use pskb_copy() instead. |
*/ |
|
struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) |
{ |
struct sk_buff *n; |
int headerlen = skb->data-skb->head; |
|
/* |
* Allocate the copy buffer |
*/ |
n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); |
if(n==NULL) |
return NULL; |
|
/* Set the data pointer */ |
skb_reserve(n,headerlen); |
/* Set the tail pointer and length */ |
skb_put(n,skb->len); |
n->csum = skb->csum; |
n->ip_summed = skb->ip_summed; |
|
if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) |
BUG(); |
|
copy_skb_header(n, skb); |
|
return n; |
} |
|
/* Keep head the same: replace data */ |
int skb_linearize(struct sk_buff *skb, int gfp_mask) |
{ |
unsigned int size; |
u8 *data; |
long offset; |
int headerlen = skb->data - skb->head; |
int expand = (skb->tail+skb->data_len) - skb->end; |
|
if (skb_shared(skb)) |
BUG(); |
|
if (expand <= 0) |
expand = 0; |
|
size = (skb->end - skb->head + expand); |
size = SKB_DATA_ALIGN(size); |
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); |
if (data == NULL) |
return -ENOMEM; |
|
/* Copy entire thing */ |
if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) |
BUG(); |
|
/* Offset between the two in bytes */ |
offset = data - skb->head; |
|
/* Free old data. */ |
skb_release_data(skb); |
|
skb->head = data; |
skb->end = data + size; |
|
/* Set up new pointers */ |
skb->h.raw += offset; |
skb->nh.raw += offset; |
skb->mac.raw += offset; |
skb->tail += offset; |
skb->data += offset; |
|
/* Set up shinfo */ |
atomic_set(&(skb_shinfo(skb)->dataref), 1); |
skb_shinfo(skb)->nr_frags = 0; |
skb_shinfo(skb)->frag_list = NULL; |
|
/* We are no longer a clone, even if we were. */ |
skb->cloned = 0; |
|
skb->tail += skb->data_len; |
skb->data_len = 0; |
return 0; |
} |
|
|
/** |
* pskb_copy - create copy of an sk_buff with private head. |
* @skb: buffer to copy |
* @gfp_mask: allocation priority |
* |
* Make a copy of both an &sk_buff and part of its data, located |
* in header. Fragmented data remain shared. This is used when |
* the caller wishes to modify only header of &sk_buff and needs |
* private copy of the header to alter. Returns %NULL on failure |
* or the pointer to the buffer on success. |
* The returned buffer has a reference count of 1. |
*/ |
|
struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) |
{ |
struct sk_buff *n; |
|
/* |
* Allocate the copy buffer |
*/ |
n=alloc_skb(skb->end - skb->head, gfp_mask); |
if(n==NULL) |
return NULL; |
|
/* Set the data pointer */ |
skb_reserve(n,skb->data-skb->head); |
/* Set the tail pointer and length */ |
skb_put(n,skb_headlen(skb)); |
/* Copy the bytes */ |
memcpy(n->data, skb->data, n->len); |
n->csum = skb->csum; |
n->ip_summed = skb->ip_summed; |
|
n->data_len = skb->data_len; |
n->len = skb->len; |
|
if (skb_shinfo(skb)->nr_frags) { |
int i; |
|
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; |
get_page(skb_shinfo(n)->frags[i].page); |
} |
skb_shinfo(n)->nr_frags = i; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; |
skb_clone_fraglist(n); |
} |
|
copy_skb_header(n, skb); |
|
return n; |
} |
|
/** |
* pskb_expand_head - reallocate header of &sk_buff |
* @skb: buffer to reallocate |
* @nhead: room to add at head |
* @ntail: room to add at tail |
* @gfp_mask: allocation priority |
* |
* Expands (or creates identical copy, if &nhead and &ntail are zero) |
* header of skb. &sk_buff itself is not changed. &sk_buff MUST have |
* reference count of 1. Returns zero in the case of success or error, |
* if expansion failed. In the last case, &sk_buff is not changed. |
* |
* All the pointers pointing into skb header may change and must be |
* reloaded after call to this function. |
*/ |
|
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) |
{ |
int i; |
u8 *data; |
int size = nhead + (skb->end - skb->head) + ntail; |
long off; |
|
if (skb_shared(skb)) |
BUG(); |
|
size = SKB_DATA_ALIGN(size); |
|
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); |
if (data == NULL) |
goto nodata; |
|
/* Copy only real data... and, alas, header. This should be |
* optimized for the cases when header is void. */ |
memcpy(data+nhead, skb->head, skb->tail-skb->head); |
memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); |
|
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) |
get_page(skb_shinfo(skb)->frags[i].page); |
|
if (skb_shinfo(skb)->frag_list) |
skb_clone_fraglist(skb); |
|
skb_release_data(skb); |
|
off = (data+nhead) - skb->head; |
|
skb->head = data; |
skb->end = data+size; |
|
skb->data += off; |
skb->tail += off; |
skb->mac.raw += off; |
skb->h.raw += off; |
skb->nh.raw += off; |
skb->cloned = 0; |
atomic_set(&skb_shinfo(skb)->dataref, 1); |
return 0; |
|
nodata: |
return -ENOMEM; |
} |
|
/* Make private copy of skb with writable head and some headroom */ |
|
struct sk_buff * |
skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) |
{ |
struct sk_buff *skb2; |
int delta = headroom - skb_headroom(skb); |
|
if (delta <= 0) |
return pskb_copy(skb, GFP_ATOMIC); |
|
skb2 = skb_clone(skb, GFP_ATOMIC); |
if (skb2 == NULL || |
!pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) |
return skb2; |
|
kfree_skb(skb2); |
return NULL; |
} |
|
|
/** |
* skb_copy_expand - copy and expand sk_buff |
* @skb: buffer to copy |
* @newheadroom: new free bytes at head |
* @newtailroom: new free bytes at tail |
* @gfp_mask: allocation priority |
* |
* Make a copy of both an &sk_buff and its data and while doing so |
* allocate additional space. |
* |
* This is used when the caller wishes to modify the data and needs a |
* private copy of the data to alter as well as more space for new fields. |
* Returns %NULL on failure or the pointer to the buffer |
* on success. The returned buffer has a reference count of 1. |
* |
* You must pass %GFP_ATOMIC as the allocation priority if this function |
* is called from an interrupt. |
*/ |
|
|
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, |
int newheadroom, |
int newtailroom, |
int gfp_mask) |
{ |
struct sk_buff *n; |
|
/* |
* Allocate the copy buffer |
*/ |
|
n=alloc_skb(newheadroom + skb->len + newtailroom, |
gfp_mask); |
if(n==NULL) |
return NULL; |
|
skb_reserve(n,newheadroom); |
|
/* Set the tail pointer and length */ |
skb_put(n,skb->len); |
|
/* Copy the data only. */ |
if (skb_copy_bits(skb, 0, n->data, skb->len)) |
BUG(); |
|
copy_skb_header(n, skb); |
return n; |
} |
|
/** |
* skb_pad - zero pad the tail of an skb |
* @skb: buffer to pad |
* @pad: space to pad |
* |
* Ensure that a buffer is followed by a padding area that is zero |
* filled. Used by network drivers which may DMA or transfer data |
* beyond the buffer end onto the wire. |
* |
* May return NULL in out of memory cases. |
*/ |
|
struct sk_buff *skb_pad(struct sk_buff *skb, int pad) |
{ |
struct sk_buff *nskb; |
|
/* If the skbuff is non linear tailroom is always zero.. */ |
if(skb_tailroom(skb) >= pad) |
{ |
memset(skb->data+skb->len, 0, pad); |
return skb; |
} |
|
nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); |
kfree_skb(skb); |
if(nskb) |
memset(nskb->data+nskb->len, 0, pad); |
return nskb; |
} |
|
/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. |
* If realloc==0 and trimming is impossible without change of data, |
* it is BUG(). |
*/ |
|
int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) |
{ |
int offset = skb_headlen(skb); |
int nfrags = skb_shinfo(skb)->nr_frags; |
int i; |
|
for (i=0; i<nfrags; i++) { |
int end = offset + skb_shinfo(skb)->frags[i].size; |
if (end > len) { |
if (skb_cloned(skb)) { |
if (!realloc) |
BUG(); |
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
return -ENOMEM; |
} |
if (len <= offset) { |
put_page(skb_shinfo(skb)->frags[i].page); |
skb_shinfo(skb)->nr_frags--; |
} else { |
skb_shinfo(skb)->frags[i].size = len-offset; |
} |
} |
offset = end; |
} |
|
if (offset < len) { |
skb->data_len -= skb->len - len; |
skb->len = len; |
} else { |
if (len <= skb_headlen(skb)) { |
skb->len = len; |
skb->data_len = 0; |
skb->tail = skb->data + len; |
if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) |
skb_drop_fraglist(skb); |
} else { |
skb->data_len -= skb->len - len; |
skb->len = len; |
} |
} |
|
return 0; |
} |
|
/** |
* __pskb_pull_tail - advance tail of skb header |
* @skb: buffer to reallocate |
* @delta: number of bytes to advance tail |
* |
* The function makes a sense only on a fragmented &sk_buff, |
* it expands header moving its tail forward and copying necessary |
* data from fragmented part. |
* |
* &sk_buff MUST have reference count of 1. |
* |
* Returns %NULL (and &sk_buff does not change) if pull failed |
* or value of new tail of skb in the case of success. |
* |
* All the pointers pointing into skb header may change and must be |
* reloaded after call to this function. |
*/ |
|
/* Moves tail of skb head forward, copying data from fragmented part, |
* when it is necessary. |
* 1. It may fail due to malloc failure. |
* 2. It may change skb pointers. |
* |
* It is pretty complicated. Luckily, it is called only in exceptional cases. |
*/ |
unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) |
{ |
int i, k, eat; |
|
/* If skb has not enough free space at tail, get new one |
* plus 128 bytes for future expansions. If we have enough |
* room at tail, reallocate without expansion only if skb is cloned. |
*/ |
eat = (skb->tail+delta) - skb->end; |
|
if (eat > 0 || skb_cloned(skb)) { |
if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) |
return NULL; |
} |
|
if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) |
BUG(); |
|
/* Optimization: no fragments, no reasons to preestimate |
* size of pulled pages. Superb. |
*/ |
if (skb_shinfo(skb)->frag_list == NULL) |
goto pull_pages; |
|
/* Estimate size of pulled pages. */ |
eat = delta; |
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
if (skb_shinfo(skb)->frags[i].size >= eat) |
goto pull_pages; |
eat -= skb_shinfo(skb)->frags[i].size; |
} |
|
/* If we need update frag list, we are in troubles. |
* Certainly, it possible to add an offset to skb data, |
* but taking into account that pulling is expected to |
* be very rare operation, it is worth to fight against |
* further bloating skb head and crucify ourselves here instead. |
* Pure masohism, indeed. 8)8) |
*/ |
if (eat) { |
struct sk_buff *list = skb_shinfo(skb)->frag_list; |
struct sk_buff *clone = NULL; |
struct sk_buff *insp = NULL; |
|
do { |
if (list == NULL) |
BUG(); |
|
if (list->len <= eat) { |
/* Eaten as whole. */ |
eat -= list->len; |
list = list->next; |
insp = list; |
} else { |
/* Eaten partially. */ |
|
if (skb_shared(list)) { |
/* Sucks! We need to fork list. :-( */ |
clone = skb_clone(list, GFP_ATOMIC); |
if (clone == NULL) |
return NULL; |
insp = list->next; |
list = clone; |
} else { |
/* This may be pulled without |
* problems. */ |
insp = list; |
} |
if (pskb_pull(list, eat) == NULL) { |
if (clone) |
kfree_skb(clone); |
return NULL; |
} |
break; |
} |
} while (eat); |
|
/* Free pulled out fragments. */ |
while ((list = skb_shinfo(skb)->frag_list) != insp) { |
skb_shinfo(skb)->frag_list = list->next; |
kfree_skb(list); |
} |
/* And insert new clone at head. */ |
if (clone) { |
clone->next = list; |
skb_shinfo(skb)->frag_list = clone; |
} |
} |
/* Success! Now we may commit changes to skb data. */ |
|
pull_pages: |
eat = delta; |
k = 0; |
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
if (skb_shinfo(skb)->frags[i].size <= eat) { |
put_page(skb_shinfo(skb)->frags[i].page); |
eat -= skb_shinfo(skb)->frags[i].size; |
} else { |
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; |
if (eat) { |
skb_shinfo(skb)->frags[k].page_offset += eat; |
skb_shinfo(skb)->frags[k].size -= eat; |
eat = 0; |
} |
k++; |
} |
} |
skb_shinfo(skb)->nr_frags = k; |
|
skb->tail += delta; |
skb->data_len -= delta; |
|
return skb->tail; |
} |
|
/* Copy some data bits from skb to kernel buffer. */ |
|
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) |
{ |
int i, copy; |
int start = skb->len - skb->data_len; |
|
if (offset > (int)skb->len-len) |
goto fault; |
|
/* Copy header. */ |
if ((copy = start-offset) > 0) { |
if (copy > len) |
copy = len; |
memcpy(to, skb->data + offset, copy); |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
to += copy; |
} |
|
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + skb_shinfo(skb)->frags[i].size; |
if ((copy = end-offset) > 0) { |
u8 *vaddr; |
|
if (copy > len) |
copy = len; |
|
vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); |
memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ |
offset-start, copy); |
kunmap_skb_frag(vaddr); |
|
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
to += copy; |
} |
start = end; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + list->len; |
if ((copy = end-offset) > 0) { |
if (copy > len) |
copy = len; |
if (skb_copy_bits(list, offset-start, to, copy)) |
goto fault; |
if ((len -= copy) == 0) |
return 0; |
offset += copy; |
to += copy; |
} |
start = end; |
} |
} |
if (len == 0) |
return 0; |
|
fault: |
return -EFAULT; |
} |
|
/* Checksum skb data. */ |
|
unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) |
{ |
int i, copy; |
int start = skb->len - skb->data_len; |
int pos = 0; |
|
/* Checksum header. */ |
if ((copy = start-offset) > 0) { |
if (copy > len) |
copy = len; |
csum = csum_partial(skb->data+offset, copy, csum); |
if ((len -= copy) == 0) |
return csum; |
offset += copy; |
pos = copy; |
} |
|
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + skb_shinfo(skb)->frags[i].size; |
if ((copy = end-offset) > 0) { |
unsigned int csum2; |
u8 *vaddr; |
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
|
if (copy > len) |
copy = len; |
vaddr = kmap_skb_frag(frag); |
csum2 = csum_partial(vaddr + frag->page_offset + |
offset-start, copy, 0); |
kunmap_skb_frag(vaddr); |
csum = csum_block_add(csum, csum2, pos); |
if (!(len -= copy)) |
return csum; |
offset += copy; |
pos += copy; |
} |
start = end; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + list->len; |
if ((copy = end-offset) > 0) { |
unsigned int csum2; |
if (copy > len) |
copy = len; |
csum2 = skb_checksum(list, offset-start, copy, 0); |
csum = csum_block_add(csum, csum2, pos); |
if ((len -= copy) == 0) |
return csum; |
offset += copy; |
pos += copy; |
} |
start = end; |
} |
} |
if (len == 0) |
return csum; |
|
BUG(); |
return csum; |
} |
|
/* Both of above in one bottle. */ |
|
unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) |
{ |
int i, copy; |
int start = skb->len - skb->data_len; |
int pos = 0; |
|
/* Copy header. */ |
if ((copy = start-offset) > 0) { |
if (copy > len) |
copy = len; |
csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); |
if ((len -= copy) == 0) |
return csum; |
offset += copy; |
to += copy; |
pos = copy; |
} |
|
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + skb_shinfo(skb)->frags[i].size; |
if ((copy = end-offset) > 0) { |
unsigned int csum2; |
u8 *vaddr; |
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
|
if (copy > len) |
copy = len; |
vaddr = kmap_skb_frag(frag); |
csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + |
offset-start, to, copy, 0); |
kunmap_skb_frag(vaddr); |
csum = csum_block_add(csum, csum2, pos); |
if (!(len -= copy)) |
return csum; |
offset += copy; |
to += copy; |
pos += copy; |
} |
start = end; |
} |
|
if (skb_shinfo(skb)->frag_list) { |
struct sk_buff *list; |
|
for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { |
unsigned int csum2; |
int end; |
|
BUG_TRAP(start <= offset+len); |
|
end = start + list->len; |
if ((copy = end-offset) > 0) { |
if (copy > len) |
copy = len; |
csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); |
csum = csum_block_add(csum, csum2, pos); |
if ((len -= copy) == 0) |
return csum; |
offset += copy; |
to += copy; |
pos += copy; |
} |
start = end; |
} |
} |
if (len == 0) |
return csum; |
|
BUG(); |
return csum; |
} |
|
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) |
{ |
unsigned int csum; |
long csstart; |
|
if (skb->ip_summed == CHECKSUM_HW) |
csstart = skb->h.raw - skb->data; |
else |
csstart = skb->len - skb->data_len; |
|
if (csstart > skb->len - skb->data_len) |
BUG(); |
|
memcpy(to, skb->data, csstart); |
|
csum = 0; |
if (csstart != skb->len) |
csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, |
skb->len-csstart, 0); |
|
if (skb->ip_summed == CHECKSUM_HW) { |
long csstuff = csstart + skb->csum; |
|
*((unsigned short *)(to + csstuff)) = csum_fold(csum); |
} |
} |
|
#if 0 |
/* |
* Tune the memory allocator for a new MTU size. |
*/ |
void skb_add_mtu(int mtu) |
{ |
/* Must match allocation in alloc_skb */ |
mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); |
|
kmem_add_cache_size(mtu); |
} |
#endif |
|
void __init skb_init(void) |
{ |
int i; |
|
skbuff_head_cache = kmem_cache_create("skbuff_head_cache", |
sizeof(struct sk_buff), |
0, |
SLAB_HWCACHE_ALIGN, |
skb_headerinit, NULL); |
if (!skbuff_head_cache) |
panic("cannot create skbuff cache"); |
|
for (i=0; i<NR_CPUS; i++) |
skb_queue_head_init(&skb_head_pool[i].list); |
} |
/neighbour.c
0,0 → 1,1588
/* |
* Generic address resolution entity |
* |
* Authors: |
* Pedro Roque <roque@di.fc.ul.pt> |
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
* |
* Fixes: |
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add. |
*/ |
|
#include <linux/config.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/socket.h> |
#include <linux/sched.h> |
#include <linux/netdevice.h> |
#ifdef CONFIG_SYSCTL |
#include <linux/sysctl.h> |
#endif |
#include <net/neighbour.h> |
#include <net/dst.h> |
#include <net/sock.h> |
#include <linux/rtnetlink.h> |
|
#define NEIGH_DEBUG 1 |
|
#define NEIGH_PRINTK(x...) printk(x) |
#define NEIGH_NOPRINTK(x...) do { ; } while(0) |
#define NEIGH_PRINTK0 NEIGH_PRINTK |
#define NEIGH_PRINTK1 NEIGH_NOPRINTK |
#define NEIGH_PRINTK2 NEIGH_NOPRINTK |
|
#if NEIGH_DEBUG >= 1 |
#undef NEIGH_PRINTK1 |
#define NEIGH_PRINTK1 NEIGH_PRINTK |
#endif |
#if NEIGH_DEBUG >= 2 |
#undef NEIGH_PRINTK2 |
#define NEIGH_PRINTK2 NEIGH_PRINTK |
#endif |
|
static void neigh_timer_handler(unsigned long arg); |
#ifdef CONFIG_ARPD |
static void neigh_app_notify(struct neighbour *n); |
#endif |
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); |
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); |
|
static int neigh_glbl_allocs; |
static struct neigh_table *neigh_tables; |
|
/* |
Neighbour hash table buckets are protected with rwlock tbl->lock. |
|
- All the scans/updates to hash buckets MUST be made under this lock. |
- NOTHING clever should be made under this lock: no callbacks |
to protocol backends, no attempts to send something to network. |
It will result in deadlocks, if backend/driver wants to use neighbour |
cache. |
- If the entry requires some non-trivial actions, increase |
its reference count and release table lock. |
|
Neighbour entries are protected: |
- with reference count. |
- with rwlock neigh->lock |
|
Reference count prevents destruction. |
|
neigh->lock mainly serializes ll address data and its validity state. |
However, the same lock is used to protect another entry fields: |
- timer |
- resolution queue |
|
Again, nothing clever shall be made under neigh->lock, |
the most complicated procedure, which we allow is dev->hard_header. |
It is supposed, that dev->hard_header is simplistic and does |
not make callbacks to neighbour tables. |
|
The last lock is neigh_tbl_lock. It is pure SMP lock, protecting |
list of neighbour tables. This list is used only in process context, |
*/ |
|
static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED; |
|
static int neigh_blackhole(struct sk_buff *skb) |
{ |
kfree_skb(skb); |
return -ENETDOWN; |
} |
|
/* |
* It is random distribution in the interval (1/2)*base...(3/2)*base. |
* It corresponds to default IPv6 settings and is not overridable, |
* because it is really reasonable choice. |
*/ |
|
unsigned long neigh_rand_reach_time(unsigned long base) |
{ |
return (net_random() % base) + (base>>1); |
} |
|
|
static int neigh_forced_gc(struct neigh_table *tbl) |
{ |
int shrunk = 0; |
int i; |
|
for (i=0; i<=NEIGH_HASHMASK; i++) { |
struct neighbour *n, **np; |
|
np = &tbl->hash_buckets[i]; |
write_lock_bh(&tbl->lock); |
while ((n = *np) != NULL) { |
/* Neighbour record may be discarded if: |
- nobody refers to it. |
- it is not permanent |
- (NEW and probably wrong) |
INCOMPLETE entries are kept at least for |
n->parms->retrans_time, otherwise we could |
flood network with resolution requests. |
It is not clear, what is better table overflow |
or flooding. |
*/ |
write_lock(&n->lock); |
if (atomic_read(&n->refcnt) == 1 && |
!(n->nud_state&NUD_PERMANENT) && |
(n->nud_state != NUD_INCOMPLETE || |
jiffies - n->used > n->parms->retrans_time)) { |
*np = n->next; |
n->dead = 1; |
shrunk = 1; |
write_unlock(&n->lock); |
neigh_release(n); |
continue; |
} |
write_unlock(&n->lock); |
np = &n->next; |
} |
write_unlock_bh(&tbl->lock); |
} |
|
tbl->last_flush = jiffies; |
return shrunk; |
} |
|
static int neigh_del_timer(struct neighbour *n) |
{ |
if (n->nud_state & NUD_IN_TIMER) { |
if (del_timer(&n->timer)) { |
neigh_release(n); |
return 1; |
} |
} |
return 0; |
} |
|
static void pneigh_queue_purge(struct sk_buff_head *list) |
{ |
struct sk_buff *skb; |
|
while ((skb = skb_dequeue(list)) != NULL) { |
dev_put(skb->dev); |
kfree_skb(skb); |
} |
} |
|
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) |
{ |
int i; |
|
write_lock_bh(&tbl->lock); |
|
for (i=0; i <= NEIGH_HASHMASK; i++) { |
struct neighbour *n, **np; |
|
np = &tbl->hash_buckets[i]; |
while ((n = *np) != NULL) { |
if (dev && n->dev != dev) { |
np = &n->next; |
continue; |
} |
*np = n->next; |
write_lock_bh(&n->lock); |
n->dead = 1; |
neigh_del_timer(n); |
write_unlock_bh(&n->lock); |
neigh_release(n); |
} |
} |
|
write_unlock_bh(&tbl->lock); |
} |
|
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) |
{ |
int i; |
|
write_lock_bh(&tbl->lock); |
|
for (i=0; i<=NEIGH_HASHMASK; i++) { |
struct neighbour *n, **np; |
|
np = &tbl->hash_buckets[i]; |
while ((n = *np) != NULL) { |
if (dev && n->dev != dev) { |
np = &n->next; |
continue; |
} |
*np = n->next; |
write_lock(&n->lock); |
neigh_del_timer(n); |
n->dead = 1; |
|
if (atomic_read(&n->refcnt) != 1) { |
/* The most unpleasant situation. |
We must destroy neighbour entry, |
but someone still uses it. |
|
The destroy will be delayed until |
the last user releases us, but |
we must kill timers etc. and move |
it to safe state. |
*/ |
n->parms = &tbl->parms; |
skb_queue_purge(&n->arp_queue); |
n->output = neigh_blackhole; |
if (n->nud_state&NUD_VALID) |
n->nud_state = NUD_NOARP; |
else |
n->nud_state = NUD_NONE; |
NEIGH_PRINTK2("neigh %p is stray.\n", n); |
} |
write_unlock(&n->lock); |
neigh_release(n); |
} |
} |
|
pneigh_ifdown(tbl, dev); |
write_unlock_bh(&tbl->lock); |
|
del_timer_sync(&tbl->proxy_timer); |
pneigh_queue_purge(&tbl->proxy_queue); |
return 0; |
} |
|
static struct neighbour *neigh_alloc(struct neigh_table *tbl) |
{ |
struct neighbour *n; |
unsigned long now = jiffies; |
|
if (tbl->entries > tbl->gc_thresh3 || |
(tbl->entries > tbl->gc_thresh2 && |
now - tbl->last_flush > 5*HZ)) { |
if (neigh_forced_gc(tbl) == 0 && |
tbl->entries > tbl->gc_thresh3) |
return NULL; |
} |
|
n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); |
if (n == NULL) |
return NULL; |
|
memset(n, 0, tbl->entry_size); |
|
skb_queue_head_init(&n->arp_queue); |
n->lock = RW_LOCK_UNLOCKED; |
n->updated = n->used = now; |
n->nud_state = NUD_NONE; |
n->output = neigh_blackhole; |
n->parms = &tbl->parms; |
init_timer(&n->timer); |
n->timer.function = neigh_timer_handler; |
n->timer.data = (unsigned long)n; |
tbl->stats.allocs++; |
neigh_glbl_allocs++; |
tbl->entries++; |
n->tbl = tbl; |
atomic_set(&n->refcnt, 1); |
n->dead = 1; |
return n; |
} |
|
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, |
struct net_device *dev) |
{ |
struct neighbour *n; |
u32 hash_val; |
int key_len = tbl->key_len; |
|
hash_val = tbl->hash(pkey, dev); |
|
read_lock_bh(&tbl->lock); |
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { |
if (dev == n->dev && |
memcmp(n->primary_key, pkey, key_len) == 0) { |
neigh_hold(n); |
break; |
} |
} |
read_unlock_bh(&tbl->lock); |
return n; |
} |
|
struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey, |
struct net_device *dev) |
{ |
struct neighbour *n, *n1; |
u32 hash_val; |
int key_len = tbl->key_len; |
int error; |
|
n = neigh_alloc(tbl); |
if (n == NULL) |
return ERR_PTR(-ENOBUFS); |
|
memcpy(n->primary_key, pkey, key_len); |
n->dev = dev; |
dev_hold(dev); |
|
/* Protocol specific setup. */ |
if (tbl->constructor && (error = tbl->constructor(n)) < 0) { |
neigh_release(n); |
return ERR_PTR(error); |
} |
|
/* Device specific setup. */ |
if (n->parms->neigh_setup && |
(error = n->parms->neigh_setup(n)) < 0) { |
neigh_release(n); |
return ERR_PTR(error); |
} |
|
n->confirmed = jiffies - (n->parms->base_reachable_time<<1); |
|
hash_val = tbl->hash(pkey, dev); |
|
write_lock_bh(&tbl->lock); |
for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { |
if (dev == n1->dev && |
memcmp(n1->primary_key, pkey, key_len) == 0) { |
neigh_hold(n1); |
write_unlock_bh(&tbl->lock); |
neigh_release(n); |
return n1; |
} |
} |
|
n->next = tbl->hash_buckets[hash_val]; |
tbl->hash_buckets[hash_val] = n; |
n->dead = 0; |
neigh_hold(n); |
write_unlock_bh(&tbl->lock); |
NEIGH_PRINTK2("neigh %p is created.\n", n); |
return n; |
} |
|
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, |
struct net_device *dev, int creat) |
{ |
struct pneigh_entry *n; |
u32 hash_val; |
int key_len = tbl->key_len; |
|
hash_val = *(u32*)(pkey + key_len - 4); |
hash_val ^= (hash_val>>16); |
hash_val ^= hash_val>>8; |
hash_val ^= hash_val>>4; |
hash_val &= PNEIGH_HASHMASK; |
|
read_lock_bh(&tbl->lock); |
|
for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { |
if (memcmp(n->key, pkey, key_len) == 0 && |
(n->dev == dev || !n->dev)) { |
read_unlock_bh(&tbl->lock); |
return n; |
} |
} |
read_unlock_bh(&tbl->lock); |
if (!creat) |
return NULL; |
|
n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); |
if (n == NULL) |
return NULL; |
|
memcpy(n->key, pkey, key_len); |
n->dev = dev; |
|
if (tbl->pconstructor && tbl->pconstructor(n)) { |
kfree(n); |
return NULL; |
} |
|
write_lock_bh(&tbl->lock); |
n->next = tbl->phash_buckets[hash_val]; |
tbl->phash_buckets[hash_val] = n; |
write_unlock_bh(&tbl->lock); |
return n; |
} |
|
|
int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct net_device *dev) |
{ |
struct pneigh_entry *n, **np; |
u32 hash_val; |
int key_len = tbl->key_len; |
|
hash_val = *(u32*)(pkey + key_len - 4); |
hash_val ^= (hash_val>>16); |
hash_val ^= hash_val>>8; |
hash_val ^= hash_val>>4; |
hash_val &= PNEIGH_HASHMASK; |
|
for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) { |
if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) { |
write_lock_bh(&tbl->lock); |
*np = n->next; |
write_unlock_bh(&tbl->lock); |
if (tbl->pdestructor) |
tbl->pdestructor(n); |
kfree(n); |
return 0; |
} |
} |
return -ENOENT; |
} |
|
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) |
{ |
struct pneigh_entry *n, **np; |
u32 h; |
|
for (h=0; h<=PNEIGH_HASHMASK; h++) { |
np = &tbl->phash_buckets[h]; |
while ((n=*np) != NULL) { |
if (n->dev == dev || dev == NULL) { |
*np = n->next; |
if (tbl->pdestructor) |
tbl->pdestructor(n); |
kfree(n); |
continue; |
} |
np = &n->next; |
} |
} |
return -ENOENT; |
} |
|
|
/* |
* neighbour must already be out of the table; |
* |
*/ |
void neigh_destroy(struct neighbour *neigh) |
{ |
struct hh_cache *hh; |
|
if (!neigh->dead) { |
printk("Destroying alive neighbour %p\n", neigh); |
dump_stack(); |
return; |
} |
|
if (neigh_del_timer(neigh)) |
printk("Impossible event.\n"); |
|
while ((hh = neigh->hh) != NULL) { |
neigh->hh = hh->hh_next; |
hh->hh_next = NULL; |
write_lock_bh(&hh->hh_lock); |
hh->hh_output = neigh_blackhole; |
write_unlock_bh(&hh->hh_lock); |
if (atomic_dec_and_test(&hh->hh_refcnt)) |
kfree(hh); |
} |
|
if (neigh->ops && neigh->ops->destructor) |
(neigh->ops->destructor)(neigh); |
|
skb_queue_purge(&neigh->arp_queue); |
|
dev_put(neigh->dev); |
|
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); |
|
neigh_glbl_allocs--; |
neigh->tbl->entries--; |
kmem_cache_free(neigh->tbl->kmem_cachep, neigh); |
} |
|
/* Neighbour state is suspicious; |
disable fast path. |
|
Called with write_locked neigh. |
*/ |
static void neigh_suspect(struct neighbour *neigh) |
{ |
struct hh_cache *hh; |
|
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); |
|
neigh->output = neigh->ops->output; |
|
for (hh = neigh->hh; hh; hh = hh->hh_next) |
hh->hh_output = neigh->ops->output; |
} |
|
/* Neighbour state is OK; |
enable fast path. |
|
Called with write_locked neigh. |
*/ |
static void neigh_connect(struct neighbour *neigh) |
{ |
struct hh_cache *hh; |
|
NEIGH_PRINTK2("neigh %p is connected.\n", neigh); |
|
neigh->output = neigh->ops->connected_output; |
|
for (hh = neigh->hh; hh; hh = hh->hh_next) |
hh->hh_output = neigh->ops->hh_output; |
} |
|
/* |
Transitions NUD_STALE <-> NUD_REACHABLE do not occur |
when fast path is built: we have no timers associated with |
these states, we do not have time to check state when sending. |
neigh_periodic_timer check periodically neigh->confirmed |
time and moves NUD_REACHABLE -> NUD_STALE. |
|
If a routine wants to know TRUE entry state, it calls |
neigh_sync before checking state. |
|
Called with write_locked neigh. |
*/ |
|
static void neigh_sync(struct neighbour *n) |
{ |
unsigned long now = jiffies; |
u8 state = n->nud_state; |
|
if (state&(NUD_NOARP|NUD_PERMANENT)) |
return; |
if (state&NUD_REACHABLE) { |
if (now - n->confirmed > n->parms->reachable_time) { |
n->nud_state = NUD_STALE; |
neigh_suspect(n); |
} |
} else if (state&NUD_VALID) { |
if (now - n->confirmed < n->parms->reachable_time) { |
neigh_del_timer(n); |
n->nud_state = NUD_REACHABLE; |
neigh_connect(n); |
} |
} |
} |
|
static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg) |
{ |
struct neigh_table *tbl = (struct neigh_table*)arg; |
unsigned long now = jiffies; |
int i; |
|
|
write_lock(&tbl->lock); |
|
/* |
* periodicly recompute ReachableTime from random function |
*/ |
|
if (now - tbl->last_rand > 300*HZ) { |
struct neigh_parms *p; |
tbl->last_rand = now; |
for (p=&tbl->parms; p; p = p->next) |
p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); |
} |
|
for (i=0; i <= NEIGH_HASHMASK; i++) { |
struct neighbour *n, **np; |
|
np = &tbl->hash_buckets[i]; |
while ((n = *np) != NULL) { |
unsigned state; |
|
write_lock(&n->lock); |
|
state = n->nud_state; |
if (state&(NUD_PERMANENT|NUD_IN_TIMER)) { |
write_unlock(&n->lock); |
goto next_elt; |
} |
|
if ((long)(n->used - n->confirmed) < 0) |
n->used = n->confirmed; |
|
if (atomic_read(&n->refcnt) == 1 && |
(state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) { |
*np = n->next; |
n->dead = 1; |
write_unlock(&n->lock); |
neigh_release(n); |
continue; |
} |
|
if (n->nud_state&NUD_REACHABLE && |
now - n->confirmed > n->parms->reachable_time) { |
n->nud_state = NUD_STALE; |
neigh_suspect(n); |
} |
write_unlock(&n->lock); |
|
next_elt: |
np = &n->next; |
} |
} |
|
mod_timer(&tbl->gc_timer, now + tbl->gc_interval); |
write_unlock(&tbl->lock); |
} |
|
#ifdef CONFIG_SMP |
static void neigh_periodic_timer(unsigned long arg) |
{ |
struct neigh_table *tbl = (struct neigh_table*)arg; |
|
tasklet_schedule(&tbl->gc_task); |
} |
#endif |
|
static __inline__ int neigh_max_probes(struct neighbour *n) |
{ |
struct neigh_parms *p = n->parms; |
return p->ucast_probes + p->app_probes + p->mcast_probes; |
} |
|
|
/* Called when a timer expires for a neighbour entry. */ |
|
static void neigh_timer_handler(unsigned long arg) |
{ |
unsigned long now = jiffies; |
struct neighbour *neigh = (struct neighbour*)arg; |
unsigned state; |
int notify = 0; |
|
write_lock(&neigh->lock); |
|
state = neigh->nud_state; |
|
if (!(state&NUD_IN_TIMER)) { |
#ifndef CONFIG_SMP |
printk("neigh: timer & !nud_in_timer\n"); |
#endif |
goto out; |
} |
|
if ((state&NUD_VALID) && |
now - neigh->confirmed < neigh->parms->reachable_time) { |
neigh->nud_state = NUD_REACHABLE; |
NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); |
neigh_connect(neigh); |
goto out; |
} |
if (state == NUD_DELAY) { |
NEIGH_PRINTK2("neigh %p is probed.\n", neigh); |
neigh->nud_state = NUD_PROBE; |
atomic_set(&neigh->probes, 0); |
} |
|
if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { |
struct sk_buff *skb; |
|
neigh->nud_state = NUD_FAILED; |
notify = 1; |
neigh->tbl->stats.res_failed++; |
NEIGH_PRINTK2("neigh %p is failed.\n", neigh); |
|
/* It is very thin place. report_unreachable is very complicated |
routine. Particularly, it can hit the same neighbour entry! |
|
So that, we try to be accurate and avoid dead loop. --ANK |
*/ |
while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) { |
write_unlock(&neigh->lock); |
neigh->ops->error_report(neigh, skb); |
write_lock(&neigh->lock); |
} |
skb_queue_purge(&neigh->arp_queue); |
goto out; |
} |
|
neigh->timer.expires = now + neigh->parms->retrans_time; |
add_timer(&neigh->timer); |
write_unlock(&neigh->lock); |
|
neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue)); |
atomic_inc(&neigh->probes); |
return; |
|
out: |
write_unlock(&neigh->lock); |
#ifdef CONFIG_ARPD |
if (notify && neigh->parms->app_probes) |
neigh_app_notify(neigh); |
#endif |
neigh_release(neigh); |
} |
|
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) |
{ |
write_lock_bh(&neigh->lock); |
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) { |
if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) { |
if (neigh->parms->mcast_probes + neigh->parms->app_probes) { |
atomic_set(&neigh->probes, neigh->parms->ucast_probes); |
neigh->nud_state = NUD_INCOMPLETE; |
neigh_hold(neigh); |
neigh->timer.expires = jiffies + neigh->parms->retrans_time; |
add_timer(&neigh->timer); |
write_unlock_bh(&neigh->lock); |
neigh->ops->solicit(neigh, skb); |
atomic_inc(&neigh->probes); |
write_lock_bh(&neigh->lock); |
} else { |
neigh->nud_state = NUD_FAILED; |
write_unlock_bh(&neigh->lock); |
|
if (skb) |
kfree_skb(skb); |
return 1; |
} |
} |
if (neigh->nud_state == NUD_INCOMPLETE) { |
if (skb) { |
if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { |
struct sk_buff *buff; |
buff = neigh->arp_queue.next; |
__skb_unlink(buff, &neigh->arp_queue); |
kfree_skb(buff); |
} |
__skb_queue_tail(&neigh->arp_queue, skb); |
} |
write_unlock_bh(&neigh->lock); |
return 1; |
} |
if (neigh->nud_state == NUD_STALE) { |
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); |
neigh_hold(neigh); |
neigh->nud_state = NUD_DELAY; |
neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; |
add_timer(&neigh->timer); |
} |
} |
write_unlock_bh(&neigh->lock); |
return 0; |
} |
|
static __inline__ void neigh_update_hhs(struct neighbour *neigh) |
{ |
struct hh_cache *hh; |
void (*update)(struct hh_cache*, struct net_device*, unsigned char*) = |
neigh->dev->header_cache_update; |
|
if (update) { |
for (hh=neigh->hh; hh; hh=hh->hh_next) { |
write_lock_bh(&hh->hh_lock); |
update(hh, neigh->dev, neigh->ha); |
write_unlock_bh(&hh->hh_lock); |
} |
} |
} |
|
|
|
/* Generic update routine. |
-- lladdr is new lladdr or NULL, if it is not supplied. |
-- new is new state. |
-- override==1 allows to override existing lladdr, if it is different. |
-- arp==0 means that the change is administrative. |
|
Caller MUST hold reference count on the entry. |
*/ |
|
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override, int arp) |
{ |
u8 old; |
int err; |
int notify = 0; |
struct net_device *dev = neigh->dev; |
|
write_lock_bh(&neigh->lock); |
old = neigh->nud_state; |
|
err = -EPERM; |
if (arp && (old&(NUD_NOARP|NUD_PERMANENT))) |
goto out; |
|
if (!(new&NUD_VALID)) { |
neigh_del_timer(neigh); |
if (old&NUD_CONNECTED) |
neigh_suspect(neigh); |
neigh->nud_state = new; |
err = 0; |
notify = old&NUD_VALID; |
goto out; |
} |
|
/* Compare new lladdr with cached one */ |
if (dev->addr_len == 0) { |
/* First case: device needs no address. */ |
lladdr = neigh->ha; |
} else if (lladdr) { |
/* The second case: if something is already cached |
and a new address is proposed: |
- compare new & old |
- if they are different, check override flag |
*/ |
if (old&NUD_VALID) { |
if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0) |
lladdr = neigh->ha; |
else if (!override) |
goto out; |
} |
} else { |
/* No address is supplied; if we know something, |
use it, otherwise discard the request. |
*/ |
err = -EINVAL; |
if (!(old&NUD_VALID)) |
goto out; |
lladdr = neigh->ha; |
} |
|
neigh_sync(neigh); |
old = neigh->nud_state; |
if (new&NUD_CONNECTED) |
neigh->confirmed = jiffies; |
neigh->updated = jiffies; |
|
/* If entry was valid and address is not changed, |
do not change entry state, if new one is STALE. |
*/ |
err = 0; |
if (old&NUD_VALID) { |
if (lladdr == neigh->ha) |
if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED))) |
goto out; |
} |
neigh_del_timer(neigh); |
neigh->nud_state = new; |
if (lladdr != neigh->ha) { |
memcpy(&neigh->ha, lladdr, dev->addr_len); |
neigh_update_hhs(neigh); |
if (!(new&NUD_CONNECTED)) |
neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1); |
#ifdef CONFIG_ARPD |
notify = 1; |
#endif |
} |
if (new == old) |
goto out; |
if (new&NUD_CONNECTED) |
neigh_connect(neigh); |
else |
neigh_suspect(neigh); |
if (!(old&NUD_VALID)) { |
struct sk_buff *skb; |
|
/* Again: avoid dead loop if something went wrong */ |
|
while (neigh->nud_state&NUD_VALID && |
(skb=__skb_dequeue(&neigh->arp_queue)) != NULL) { |
struct neighbour *n1 = neigh; |
write_unlock_bh(&neigh->lock); |
/* On shaper/eql skb->dst->neighbour != neigh :( */ |
if (skb->dst && skb->dst->neighbour) |
n1 = skb->dst->neighbour; |
n1->output(skb); |
write_lock_bh(&neigh->lock); |
} |
skb_queue_purge(&neigh->arp_queue); |
} |
out: |
write_unlock_bh(&neigh->lock); |
#ifdef CONFIG_ARPD |
if (notify && neigh->parms->app_probes) |
neigh_app_notify(neigh); |
#endif |
return err; |
} |
|
struct neighbour * neigh_event_ns(struct neigh_table *tbl, |
u8 *lladdr, void *saddr, |
struct net_device *dev) |
{ |
struct neighbour *neigh; |
|
neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); |
if (neigh) |
neigh_update(neigh, lladdr, NUD_STALE, 1, 1); |
return neigh; |
} |
|
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol) |
{ |
struct hh_cache *hh = NULL; |
struct net_device *dev = dst->dev; |
|
for (hh=n->hh; hh; hh = hh->hh_next) |
if (hh->hh_type == protocol) |
break; |
|
if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { |
memset(hh, 0, sizeof(struct hh_cache)); |
hh->hh_lock = RW_LOCK_UNLOCKED; |
hh->hh_type = protocol; |
atomic_set(&hh->hh_refcnt, 0); |
hh->hh_next = NULL; |
if (dev->hard_header_cache(n, hh)) { |
kfree(hh); |
hh = NULL; |
} else { |
atomic_inc(&hh->hh_refcnt); |
hh->hh_next = n->hh; |
n->hh = hh; |
if (n->nud_state&NUD_CONNECTED) |
hh->hh_output = n->ops->hh_output; |
else |
hh->hh_output = n->ops->output; |
} |
} |
if (hh) { |
atomic_inc(&hh->hh_refcnt); |
dst->hh = hh; |
} |
} |
|
/* This function can be used in contexts, where only old dev_queue_xmit |
worked, f.e. if you want to override normal output path (eql, shaper), |
but resolution is not made yet. |
*/ |
|
int neigh_compat_output(struct sk_buff *skb) |
{ |
struct net_device *dev = skb->dev; |
|
__skb_pull(skb, skb->nh.raw - skb->data); |
|
if (dev->hard_header && |
dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && |
dev->rebuild_header(skb)) |
return 0; |
|
return dev_queue_xmit(skb); |
} |
|
/* Slow and careful. */ |
|
int neigh_resolve_output(struct sk_buff *skb) |
{ |
struct dst_entry *dst = skb->dst; |
struct neighbour *neigh; |
|
if (!dst || !(neigh = dst->neighbour)) |
goto discard; |
|
__skb_pull(skb, skb->nh.raw - skb->data); |
|
if (neigh_event_send(neigh, skb) == 0) { |
int err; |
struct net_device *dev = neigh->dev; |
if (dev->hard_header_cache && dst->hh == NULL) { |
write_lock_bh(&neigh->lock); |
if (dst->hh == NULL) |
neigh_hh_init(neigh, dst, dst->ops->protocol); |
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); |
write_unlock_bh(&neigh->lock); |
} else { |
read_lock_bh(&neigh->lock); |
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); |
read_unlock_bh(&neigh->lock); |
} |
if (err >= 0) |
return neigh->ops->queue_xmit(skb); |
kfree_skb(skb); |
return -EINVAL; |
} |
return 0; |
|
discard: |
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL); |
kfree_skb(skb); |
return -EINVAL; |
} |
|
/* As fast as possible without hh cache */ |
|
int neigh_connected_output(struct sk_buff *skb) |
{ |
int err; |
struct dst_entry *dst = skb->dst; |
struct neighbour *neigh = dst->neighbour; |
struct net_device *dev = neigh->dev; |
|
__skb_pull(skb, skb->nh.raw - skb->data); |
|
read_lock_bh(&neigh->lock); |
err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); |
read_unlock_bh(&neigh->lock); |
if (err >= 0) |
return neigh->ops->queue_xmit(skb); |
kfree_skb(skb); |
return -EINVAL; |
} |
|
static void neigh_proxy_process(unsigned long arg) |
{ |
struct neigh_table *tbl = (struct neigh_table *)arg; |
long sched_next = 0; |
unsigned long now = jiffies; |
struct sk_buff *skb; |
|
spin_lock(&tbl->proxy_queue.lock); |
|
skb = tbl->proxy_queue.next; |
|
while (skb != (struct sk_buff*)&tbl->proxy_queue) { |
struct sk_buff *back = skb; |
long tdif = back->stamp.tv_usec - now; |
|
skb = skb->next; |
if (tdif <= 0) { |
struct net_device *dev = back->dev; |
__skb_unlink(back, &tbl->proxy_queue); |
if (tbl->proxy_redo && netif_running(dev)) |
tbl->proxy_redo(back); |
else |
kfree_skb(back); |
|
dev_put(dev); |
} else if (!sched_next || tdif < sched_next) |
sched_next = tdif; |
} |
del_timer(&tbl->proxy_timer); |
if (sched_next) |
mod_timer(&tbl->proxy_timer, jiffies + sched_next); |
spin_unlock(&tbl->proxy_queue.lock); |
} |
|
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, |
struct sk_buff *skb) |
{ |
unsigned long now = jiffies; |
long sched_next = net_random()%p->proxy_delay; |
|
if (tbl->proxy_queue.qlen > p->proxy_qlen) { |
kfree_skb(skb); |
return; |
} |
skb->stamp.tv_sec = 0; |
skb->stamp.tv_usec = now + sched_next; |
|
spin_lock(&tbl->proxy_queue.lock); |
if (del_timer(&tbl->proxy_timer)) { |
long tval = tbl->proxy_timer.expires - now; |
if (tval < sched_next) |
sched_next = tval; |
} |
dst_release(skb->dst); |
skb->dst = NULL; |
dev_hold(skb->dev); |
__skb_queue_tail(&tbl->proxy_queue, skb); |
mod_timer(&tbl->proxy_timer, now + sched_next); |
spin_unlock(&tbl->proxy_queue.lock); |
} |
|
|
struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) |
{ |
struct neigh_parms *p; |
p = kmalloc(sizeof(*p), GFP_KERNEL); |
if (p) { |
memcpy(p, &tbl->parms, sizeof(*p)); |
p->tbl = tbl; |
p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); |
if (dev && dev->neigh_setup) { |
if (dev->neigh_setup(dev, p)) { |
kfree(p); |
return NULL; |
} |
} |
p->sysctl_table = NULL; |
write_lock_bh(&tbl->lock); |
p->next = tbl->parms.next; |
tbl->parms.next = p; |
write_unlock_bh(&tbl->lock); |
} |
return p; |
} |
|
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) |
{ |
struct neigh_parms **p; |
|
if (parms == NULL || parms == &tbl->parms) |
return; |
write_lock_bh(&tbl->lock); |
for (p = &tbl->parms.next; *p; p = &(*p)->next) { |
if (*p == parms) { |
*p = parms->next; |
write_unlock_bh(&tbl->lock); |
#ifdef CONFIG_SYSCTL |
neigh_sysctl_unregister(parms); |
#endif |
kfree(parms); |
return; |
} |
} |
write_unlock_bh(&tbl->lock); |
NEIGH_PRINTK1("neigh_parms_release: not found\n"); |
} |
|
|
void neigh_table_init(struct neigh_table *tbl) |
{ |
unsigned long now = jiffies; |
|
tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); |
|
if (tbl->kmem_cachep == NULL) |
tbl->kmem_cachep = kmem_cache_create(tbl->id, |
(tbl->entry_size+15)&~15, |
0, SLAB_HWCACHE_ALIGN, |
NULL, NULL); |
|
#ifdef CONFIG_SMP |
tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl); |
#endif |
init_timer(&tbl->gc_timer); |
tbl->lock = RW_LOCK_UNLOCKED; |
tbl->gc_timer.data = (unsigned long)tbl; |
tbl->gc_timer.function = neigh_periodic_timer; |
tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time; |
add_timer(&tbl->gc_timer); |
|
init_timer(&tbl->proxy_timer); |
tbl->proxy_timer.data = (unsigned long)tbl; |
tbl->proxy_timer.function = neigh_proxy_process; |
skb_queue_head_init(&tbl->proxy_queue); |
|
tbl->last_flush = now; |
tbl->last_rand = now + tbl->parms.reachable_time*20; |
write_lock(&neigh_tbl_lock); |
tbl->next = neigh_tables; |
neigh_tables = tbl; |
write_unlock(&neigh_tbl_lock); |
} |
|
int neigh_table_clear(struct neigh_table *tbl) |
{ |
struct neigh_table **tp; |
|
/* It is not clean... Fix it to unload IPv6 module safely */ |
del_timer_sync(&tbl->gc_timer); |
tasklet_kill(&tbl->gc_task); |
del_timer_sync(&tbl->proxy_timer); |
pneigh_queue_purge(&tbl->proxy_queue); |
neigh_ifdown(tbl, NULL); |
if (tbl->entries) |
printk(KERN_CRIT "neighbour leakage\n"); |
write_lock(&neigh_tbl_lock); |
for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { |
if (*tp == tbl) { |
*tp = tbl->next; |
break; |
} |
} |
write_unlock(&neigh_tbl_lock); |
#ifdef CONFIG_SYSCTL |
neigh_sysctl_unregister(&tbl->parms); |
#endif |
return 0; |
} |
|
int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
{ |
struct ndmsg *ndm = NLMSG_DATA(nlh); |
struct rtattr **nda = arg; |
struct neigh_table *tbl; |
struct net_device *dev = NULL; |
int err = 0; |
|
if (ndm->ndm_ifindex) { |
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) |
return -ENODEV; |
} |
|
read_lock(&neigh_tbl_lock); |
for (tbl=neigh_tables; tbl; tbl = tbl->next) { |
struct neighbour *n; |
|
if (tbl->family != ndm->ndm_family) |
continue; |
read_unlock(&neigh_tbl_lock); |
|
err = -EINVAL; |
if (nda[NDA_DST-1] == NULL || |
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len)) |
goto out; |
|
if (ndm->ndm_flags&NTF_PROXY) { |
err = pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev); |
goto out; |
} |
|
if (dev == NULL) |
return -EINVAL; |
|
n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev); |
if (n) { |
err = neigh_update(n, NULL, NUD_FAILED, 1, 0); |
neigh_release(n); |
} |
out: |
if (dev) |
dev_put(dev); |
return err; |
} |
read_unlock(&neigh_tbl_lock); |
|
if (dev) |
dev_put(dev); |
|
return -EADDRNOTAVAIL; |
} |
|
int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
{ |
struct ndmsg *ndm = NLMSG_DATA(nlh); |
struct rtattr **nda = arg; |
struct neigh_table *tbl; |
struct net_device *dev = NULL; |
|
if (ndm->ndm_ifindex) { |
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) |
return -ENODEV; |
} |
|
read_lock(&neigh_tbl_lock); |
for (tbl=neigh_tables; tbl; tbl = tbl->next) { |
int err = 0; |
int override = 1; |
struct neighbour *n; |
|
if (tbl->family != ndm->ndm_family) |
continue; |
read_unlock(&neigh_tbl_lock); |
|
err = -EINVAL; |
if (nda[NDA_DST-1] == NULL || |
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len)) |
goto out; |
if (ndm->ndm_flags&NTF_PROXY) { |
err = -ENOBUFS; |
if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1)) |
err = 0; |
goto out; |
} |
if (dev == NULL) |
return -EINVAL; |
err = -EINVAL; |
if (nda[NDA_LLADDR-1] != NULL && |
nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len)) |
goto out; |
err = 0; |
n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev); |
if (n) { |
if (nlh->nlmsg_flags&NLM_F_EXCL) |
err = -EEXIST; |
override = nlh->nlmsg_flags&NLM_F_REPLACE; |
} else if (!(nlh->nlmsg_flags&NLM_F_CREATE)) |
err = -ENOENT; |
else { |
n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST-1]), dev); |
if (IS_ERR(n)) { |
err = PTR_ERR(n); |
n = NULL; |
} |
} |
if (err == 0) { |
err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL, |
ndm->ndm_state, |
override, 0); |
} |
if (n) |
neigh_release(n); |
out: |
if (dev) |
dev_put(dev); |
return err; |
} |
read_unlock(&neigh_tbl_lock); |
|
if (dev) |
dev_put(dev); |
return -EADDRNOTAVAIL; |
} |
|
|
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, |
u32 pid, u32 seq, int event) |
{ |
unsigned long now = jiffies; |
struct ndmsg *ndm; |
struct nlmsghdr *nlh; |
unsigned char *b = skb->tail; |
struct nda_cacheinfo ci; |
int locked = 0; |
|
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm)); |
ndm = NLMSG_DATA(nlh); |
ndm->ndm_family = n->ops->family; |
ndm->ndm_flags = n->flags; |
ndm->ndm_type = n->type; |
ndm->ndm_ifindex = n->dev->ifindex; |
RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); |
read_lock_bh(&n->lock); |
locked=1; |
ndm->ndm_state = n->nud_state; |
if (n->nud_state&NUD_VALID) |
RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); |
ci.ndm_used = now - n->used; |
ci.ndm_confirmed = now - n->confirmed; |
ci.ndm_updated = now - n->updated; |
ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; |
read_unlock_bh(&n->lock); |
locked=0; |
RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); |
nlh->nlmsg_len = skb->tail - b; |
return skb->len; |
|
nlmsg_failure: |
rtattr_failure: |
if (locked) |
read_unlock_bh(&n->lock); |
skb_trim(skb, b - skb->data); |
return -1; |
} |
|
|
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) |
{ |
struct neighbour *n; |
int h, s_h; |
int idx, s_idx; |
|
s_h = cb->args[1]; |
s_idx = idx = cb->args[2]; |
for (h=0; h <= NEIGH_HASHMASK; h++) { |
if (h < s_h) continue; |
if (h > s_h) |
s_idx = 0; |
read_lock_bh(&tbl->lock); |
for (n = tbl->hash_buckets[h], idx = 0; n; |
n = n->next, idx++) { |
if (idx < s_idx) |
continue; |
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, |
cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) { |
read_unlock_bh(&tbl->lock); |
cb->args[1] = h; |
cb->args[2] = idx; |
return -1; |
} |
} |
read_unlock_bh(&tbl->lock); |
} |
|
cb->args[1] = h; |
cb->args[2] = idx; |
return skb->len; |
} |
|
int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) |
{ |
int t; |
int s_t; |
struct neigh_table *tbl; |
int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family; |
|
s_t = cb->args[0]; |
|
read_lock(&neigh_tbl_lock); |
for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) { |
if (t < s_t) continue; |
if (family && tbl->family != family) |
continue; |
if (t > s_t) |
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); |
if (neigh_dump_table(tbl, skb, cb) < 0) |
break; |
} |
read_unlock(&neigh_tbl_lock); |
|
cb->args[0] = t; |
|
return skb->len; |
} |
|
#ifdef CONFIG_ARPD |
void neigh_app_ns(struct neighbour *n) |
{ |
struct sk_buff *skb; |
struct nlmsghdr *nlh; |
int size = NLMSG_SPACE(sizeof(struct ndmsg)+256); |
|
skb = alloc_skb(size, GFP_ATOMIC); |
if (!skb) |
return; |
|
if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { |
kfree_skb(skb); |
return; |
} |
nlh = (struct nlmsghdr*)skb->data; |
nlh->nlmsg_flags = NLM_F_REQUEST; |
NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; |
netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); |
} |
|
static void neigh_app_notify(struct neighbour *n) |
{ |
struct sk_buff *skb; |
struct nlmsghdr *nlh; |
int size = NLMSG_SPACE(sizeof(struct ndmsg)+256); |
|
skb = alloc_skb(size, GFP_ATOMIC); |
if (!skb) |
return; |
|
if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { |
kfree_skb(skb); |
return; |
} |
nlh = (struct nlmsghdr*)skb->data; |
NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; |
netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); |
} |
|
#endif /* CONFIG_ARPD */ |
|
#ifdef CONFIG_SYSCTL |
|
struct neigh_sysctl_table |
{ |
struct ctl_table_header *sysctl_header; |
ctl_table neigh_vars[17]; |
ctl_table neigh_dev[2]; |
ctl_table neigh_neigh_dir[2]; |
ctl_table neigh_proto_dir[2]; |
ctl_table neigh_root_dir[2]; |
} neigh_sysctl_template = { |
NULL, |
{{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_UCAST_SOLICIT, "ucast_solicit", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_APP_SOLICIT, "app_solicit", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_RETRANS_TIME, "retrans_time", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_REACHABLE_TIME, "base_reachable_time", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_NEIGH_GC_STALE_TIME, "gc_stale_time", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_NEIGH_UNRES_QLEN, "unres_qlen", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_PROXY_QLEN, "proxy_qlen", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_ANYCAST_DELAY, "anycast_delay", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_PROXY_DELAY, "proxy_delay", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_LOCKTIME, "locktime", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_GC_INTERVAL, "gc_interval", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec_jiffies}, |
{NET_NEIGH_GC_THRESH1, "gc_thresh1", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_GC_THRESH2, "gc_thresh2", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{NET_NEIGH_GC_THRESH3, "gc_thresh3", |
NULL, sizeof(int), 0644, NULL, |
&proc_dointvec}, |
{0}}, |
|
{{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}}, |
{{0, "neigh", NULL, 0, 0555, NULL},{0}}, |
{{0, NULL, NULL, 0, 0555, NULL},{0}}, |
{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}} |
}; |
|
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, |
int p_id, int pdev_id, char *p_name) |
{ |
struct neigh_sysctl_table *t; |
|
t = kmalloc(sizeof(*t), GFP_KERNEL); |
if (t == NULL) |
return -ENOBUFS; |
memcpy(t, &neigh_sysctl_template, sizeof(*t)); |
t->neigh_vars[0].data = &p->mcast_probes; |
t->neigh_vars[1].data = &p->ucast_probes; |
t->neigh_vars[2].data = &p->app_probes; |
t->neigh_vars[3].data = &p->retrans_time; |
t->neigh_vars[4].data = &p->base_reachable_time; |
t->neigh_vars[5].data = &p->delay_probe_time; |
t->neigh_vars[6].data = &p->gc_staletime; |
t->neigh_vars[7].data = &p->queue_len; |
t->neigh_vars[8].data = &p->proxy_qlen; |
t->neigh_vars[9].data = &p->anycast_delay; |
t->neigh_vars[10].data = &p->proxy_delay; |
t->neigh_vars[11].data = &p->locktime; |
if (dev) { |
t->neigh_dev[0].procname = dev->name; |
t->neigh_dev[0].ctl_name = dev->ifindex; |
memset(&t->neigh_vars[12], 0, sizeof(ctl_table)); |
} else { |
t->neigh_vars[12].data = (int*)(p+1); |
t->neigh_vars[13].data = (int*)(p+1) + 1; |
t->neigh_vars[14].data = (int*)(p+1) + 2; |
t->neigh_vars[15].data = (int*)(p+1) + 3; |
} |
t->neigh_neigh_dir[0].ctl_name = pdev_id; |
|
t->neigh_proto_dir[0].procname = p_name; |
t->neigh_proto_dir[0].ctl_name = p_id; |
|
t->neigh_dev[0].child = t->neigh_vars; |
t->neigh_neigh_dir[0].child = t->neigh_dev; |
t->neigh_proto_dir[0].child = t->neigh_neigh_dir; |
t->neigh_root_dir[0].child = t->neigh_proto_dir; |
|
t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0); |
if (t->sysctl_header == NULL) { |
kfree(t); |
return -ENOBUFS; |
} |
p->sysctl_table = t; |
return 0; |
} |
|
void neigh_sysctl_unregister(struct neigh_parms *p) |
{ |
if (p->sysctl_table) { |
struct neigh_sysctl_table *t = p->sysctl_table; |
p->sysctl_table = NULL; |
unregister_sysctl_table(t->sysctl_header); |
kfree(t); |
} |
} |
|
#endif /* CONFIG_SYSCTL */ |
/scm.c
0,0 → 1,273
/* scm.c - Socket level control messages processing. |
* |
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
* Alignment and value checking mods by Craig Metz |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
*/ |
|
#include <linux/signal.h> |
#include <linux/errno.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/kernel.h> |
#include <linux/major.h> |
#include <linux/stat.h> |
#include <linux/socket.h> |
#include <linux/file.h> |
#include <linux/fcntl.h> |
#include <linux/net.h> |
#include <linux/interrupt.h> |
#include <linux/netdevice.h> |
|
#include <asm/system.h> |
#include <asm/uaccess.h> |
|
#include <net/protocol.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <net/scm.h> |
|
|
/* |
* Only allow a user to send credentials, that they could set with |
* setu(g)id. |
*/ |
|
static __inline__ int scm_check_creds(struct ucred *creds) |
{ |
if ((creds->pid == current->pid || capable(CAP_SYS_ADMIN)) && |
((creds->uid == current->uid || creds->uid == current->euid || |
creds->uid == current->suid) || capable(CAP_SETUID)) && |
((creds->gid == current->gid || creds->gid == current->egid || |
creds->gid == current->sgid) || capable(CAP_SETGID))) { |
return 0; |
} |
return -EPERM; |
} |
|
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) |
{ |
int *fdp = (int*)CMSG_DATA(cmsg); |
struct scm_fp_list *fpl = *fplp; |
struct file **fpp; |
int i, num; |
|
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int); |
|
if (num <= 0) |
return 0; |
|
if (num > SCM_MAX_FD) |
return -EINVAL; |
|
if (!fpl) |
{ |
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL); |
if (!fpl) |
return -ENOMEM; |
*fplp = fpl; |
fpl->count = 0; |
} |
fpp = &fpl->fp[fpl->count]; |
|
if (fpl->count + num > SCM_MAX_FD) |
return -EINVAL; |
|
/* |
* Verify the descriptors and increment the usage count. |
*/ |
|
for (i=0; i< num; i++) |
{ |
int fd = fdp[i]; |
struct file *file; |
|
if (fd < 0 || !(file = fget(fd))) |
return -EBADF; |
*fpp++ = file; |
fpl->count++; |
} |
return num; |
} |
|
void __scm_destroy(struct scm_cookie *scm) |
{ |
struct scm_fp_list *fpl = scm->fp; |
int i; |
|
if (fpl) { |
scm->fp = NULL; |
for (i=fpl->count-1; i>=0; i--) |
fput(fpl->fp[i]); |
kfree(fpl); |
} |
} |
|
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) |
{ |
struct cmsghdr *cmsg; |
int err; |
|
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) |
{ |
err = -EINVAL; |
|
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ |
/* The first check was omitted in <= 2.2.5. The reasoning was |
that parser checks cmsg_len in any case, so that |
additional check would be work duplication. |
But if cmsg_level is not SOL_SOCKET, we do not check |
for too short ancillary data object at all! Oops. |
OK, let's add it... |
*/ |
if (cmsg->cmsg_len < sizeof(struct cmsghdr) || |
(unsigned long)(((char*)cmsg - (char*)msg->msg_control) |
+ cmsg->cmsg_len) > msg->msg_controllen) |
goto error; |
|
if (cmsg->cmsg_level != SOL_SOCKET) |
continue; |
|
switch (cmsg->cmsg_type) |
{ |
case SCM_RIGHTS: |
err=scm_fp_copy(cmsg, &p->fp); |
if (err<0) |
goto error; |
break; |
case SCM_CREDENTIALS: |
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) |
goto error; |
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); |
err = scm_check_creds(&p->creds); |
if (err) |
goto error; |
break; |
default: |
goto error; |
} |
} |
|
if (p->fp && !p->fp->count) |
{ |
kfree(p->fp); |
p->fp = NULL; |
} |
return 0; |
|
error: |
scm_destroy(p); |
return err; |
} |
|
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) |
{ |
struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control; |
struct cmsghdr cmhdr; |
int cmlen = CMSG_LEN(len); |
int err; |
|
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { |
msg->msg_flags |= MSG_CTRUNC; |
return 0; /* XXX: return error? check spec. */ |
} |
if (msg->msg_controllen < cmlen) { |
msg->msg_flags |= MSG_CTRUNC; |
cmlen = msg->msg_controllen; |
} |
cmhdr.cmsg_level = level; |
cmhdr.cmsg_type = type; |
cmhdr.cmsg_len = cmlen; |
|
err = -EFAULT; |
if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) |
goto out; |
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr))) |
goto out; |
cmlen = CMSG_SPACE(len); |
msg->msg_control += cmlen; |
msg->msg_controllen -= cmlen; |
err = 0; |
out: |
return err; |
} |
|
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) |
{ |
struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control; |
|
int fdmax = 0; |
int fdnum = scm->fp->count; |
struct file **fp = scm->fp->fp; |
int *cmfptr; |
int err = 0, i; |
|
if (msg->msg_controllen > sizeof(struct cmsghdr)) |
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr)) |
/ sizeof(int)); |
|
if (fdnum < fdmax) |
fdmax = fdnum; |
|
for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) |
{ |
int new_fd; |
err = get_unused_fd(); |
if (err < 0) |
break; |
new_fd = err; |
err = put_user(new_fd, cmfptr); |
if (err) { |
put_unused_fd(new_fd); |
break; |
} |
/* Bump the usage count and install the file. */ |
get_file(fp[i]); |
fd_install(new_fd, fp[i]); |
} |
|
if (i > 0) |
{ |
int cmlen = CMSG_LEN(i*sizeof(int)); |
if (!err) |
err = put_user(SOL_SOCKET, &cm->cmsg_level); |
if (!err) |
err = put_user(SCM_RIGHTS, &cm->cmsg_type); |
if (!err) |
err = put_user(cmlen, &cm->cmsg_len); |
if (!err) { |
cmlen = CMSG_SPACE(i*sizeof(int)); |
msg->msg_control += cmlen; |
msg->msg_controllen -= cmlen; |
} |
} |
if (i < fdnum || (fdnum && fdmax <= 0)) |
msg->msg_flags |= MSG_CTRUNC; |
|
/* |
* All of the files that fit in the message have had their |
* usage counts incremented, so we just free the list. |
*/ |
__scm_destroy(scm); |
} |
|
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) |
{ |
struct scm_fp_list *new_fpl; |
int i; |
|
if (!fpl) |
return NULL; |
|
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); |
if (new_fpl) { |
for (i=fpl->count-1; i>=0; i--) |
get_file(fpl->fp[i]); |
memcpy(new_fpl, fpl, sizeof(*fpl)); |
} |
return new_fpl; |
} |
/filter.c
0,0 → 1,497
/* |
* Linux Socket Filter - Kernel level socket filtering |
* |
* Author: |
* Jay Schulist <jschlst@samba.org> |
* |
* Based on the design of: |
* - The Berkeley Packet Filter |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* as published by the Free Software Foundation; either version |
* 2 of the License, or (at your option) any later version. |
* |
* Andi Kleen - Fix a few bad bugs and races. |
*/ |
|
#include <linux/config.h> |
#if defined(CONFIG_FILTER) |
|
#include <linux/module.h> |
#include <linux/types.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/fcntl.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/inet.h> |
#include <linux/netdevice.h> |
#include <linux/if_packet.h> |
#include <net/ip.h> |
#include <net/protocol.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <linux/errno.h> |
#include <linux/timer.h> |
#include <asm/system.h> |
#include <asm/uaccess.h> |
#include <linux/filter.h> |
|
/* No hurry in this branch */ |
|
static u8 *load_pointer(struct sk_buff *skb, int k) |
{ |
u8 *ptr = NULL; |
|
if (k>=SKF_NET_OFF) |
ptr = skb->nh.raw + k - SKF_NET_OFF; |
else if (k>=SKF_LL_OFF) |
ptr = skb->mac.raw + k - SKF_LL_OFF; |
|
if (ptr >= skb->head && ptr < skb->tail) |
return ptr; |
return NULL; |
} |
|
/** |
* sk_run_filter - run a filter on a socket |
* @skb: buffer to run the filter on |
* @filter: filter to apply |
* @flen: length of filter |
* |
* Decode and apply filter instructions to the skb->data. |
* Return length to keep, 0 for none. skb is the data we are |
* filtering, filter is the array of filter instructions, and |
* len is the number of filter blocks in the array. |
*/ |
|
int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) |
{ |
unsigned char *data = skb->data; |
/* len is UNSIGNED. Byte wide insns relies only on implicit |
type casts to prevent reading arbitrary memory locations. |
*/ |
unsigned int len = skb->len-skb->data_len; |
struct sock_filter *fentry; /* We walk down these */ |
u32 A = 0; /* Accumulator */ |
u32 X = 0; /* Index Register */ |
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ |
int k; |
int pc; |
|
/* |
* Process array of filter instructions. |
*/ |
|
for(pc = 0; pc < flen; pc++) |
{ |
fentry = &filter[pc]; |
|
switch(fentry->code) |
{ |
case BPF_ALU|BPF_ADD|BPF_X: |
A += X; |
continue; |
|
case BPF_ALU|BPF_ADD|BPF_K: |
A += fentry->k; |
continue; |
|
case BPF_ALU|BPF_SUB|BPF_X: |
A -= X; |
continue; |
|
case BPF_ALU|BPF_SUB|BPF_K: |
A -= fentry->k; |
continue; |
|
case BPF_ALU|BPF_MUL|BPF_X: |
A *= X; |
continue; |
|
case BPF_ALU|BPF_MUL|BPF_K: |
A *= fentry->k; |
continue; |
|
case BPF_ALU|BPF_DIV|BPF_X: |
if(X == 0) |
return (0); |
A /= X; |
continue; |
|
case BPF_ALU|BPF_DIV|BPF_K: |
if(fentry->k == 0) |
return (0); |
A /= fentry->k; |
continue; |
|
case BPF_ALU|BPF_AND|BPF_X: |
A &= X; |
continue; |
|
case BPF_ALU|BPF_AND|BPF_K: |
A &= fentry->k; |
continue; |
|
case BPF_ALU|BPF_OR|BPF_X: |
A |= X; |
continue; |
|
case BPF_ALU|BPF_OR|BPF_K: |
A |= fentry->k; |
continue; |
|
case BPF_ALU|BPF_LSH|BPF_X: |
A <<= X; |
continue; |
|
case BPF_ALU|BPF_LSH|BPF_K: |
A <<= fentry->k; |
continue; |
|
case BPF_ALU|BPF_RSH|BPF_X: |
A >>= X; |
continue; |
|
case BPF_ALU|BPF_RSH|BPF_K: |
A >>= fentry->k; |
continue; |
|
case BPF_ALU|BPF_NEG: |
A = -A; |
continue; |
|
case BPF_JMP|BPF_JA: |
pc += fentry->k; |
continue; |
|
case BPF_JMP|BPF_JGT|BPF_K: |
pc += (A > fentry->k) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JGE|BPF_K: |
pc += (A >= fentry->k) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JEQ|BPF_K: |
pc += (A == fentry->k) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JSET|BPF_K: |
pc += (A & fentry->k) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JGT|BPF_X: |
pc += (A > X) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JGE|BPF_X: |
pc += (A >= X) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JEQ|BPF_X: |
pc += (A == X) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_JMP|BPF_JSET|BPF_X: |
pc += (A & X) ? fentry->jt : fentry->jf; |
continue; |
|
case BPF_LD|BPF_W|BPF_ABS: |
k = fentry->k; |
load_w: |
if(k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) { |
A = ntohl(*(u32*)&data[k]); |
continue; |
} |
if (k<0) { |
u8 *ptr; |
|
if (k>=SKF_AD_OFF) |
break; |
if ((ptr = load_pointer(skb, k)) != NULL) { |
A = ntohl(*(u32*)ptr); |
continue; |
} |
} else { |
u32 tmp; |
if (!skb_copy_bits(skb, k, &tmp, 4)) { |
A = ntohl(tmp); |
continue; |
} |
} |
return 0; |
|
case BPF_LD|BPF_H|BPF_ABS: |
k = fentry->k; |
load_h: |
if(k >= 0 && (unsigned int) (k + sizeof(u16)) <= len) { |
A = ntohs(*(u16*)&data[k]); |
continue; |
} |
if (k<0) { |
u8 *ptr; |
|
if (k>=SKF_AD_OFF) |
break; |
if ((ptr = load_pointer(skb, k)) != NULL) { |
A = ntohs(*(u16*)ptr); |
continue; |
} |
} else { |
u16 tmp; |
if (!skb_copy_bits(skb, k, &tmp, 2)) { |
A = ntohs(tmp); |
continue; |
} |
} |
return 0; |
|
case BPF_LD|BPF_B|BPF_ABS: |
k = fentry->k; |
load_b: |
if(k >= 0 && (unsigned int)k < len) { |
A = data[k]; |
continue; |
} |
if (k<0) { |
u8 *ptr; |
|
if (k>=SKF_AD_OFF) |
break; |
if ((ptr = load_pointer(skb, k)) != NULL) { |
A = *ptr; |
continue; |
} |
} else { |
u8 tmp; |
if (!skb_copy_bits(skb, k, &tmp, 1)) { |
A = tmp; |
continue; |
} |
} |
return 0; |
|
case BPF_LD|BPF_W|BPF_LEN: |
A = len; |
continue; |
|
case BPF_LDX|BPF_W|BPF_LEN: |
X = len; |
continue; |
|
case BPF_LD|BPF_W|BPF_IND: |
k = X + fentry->k; |
goto load_w; |
|
case BPF_LD|BPF_H|BPF_IND: |
k = X + fentry->k; |
goto load_h; |
|
case BPF_LD|BPF_B|BPF_IND: |
k = X + fentry->k; |
goto load_b; |
|
case BPF_LDX|BPF_B|BPF_MSH: |
if(fentry->k >= len) |
return (0); |
X = (data[fentry->k] & 0xf) << 2; |
continue; |
|
case BPF_LD|BPF_IMM: |
A = fentry->k; |
continue; |
|
case BPF_LDX|BPF_IMM: |
X = fentry->k; |
continue; |
|
case BPF_LD|BPF_MEM: |
A = mem[fentry->k]; |
continue; |
|
case BPF_LDX|BPF_MEM: |
X = mem[fentry->k]; |
continue; |
|
case BPF_MISC|BPF_TAX: |
X = A; |
continue; |
|
case BPF_MISC|BPF_TXA: |
A = X; |
continue; |
|
case BPF_RET|BPF_K: |
return ((unsigned int)fentry->k); |
|
case BPF_RET|BPF_A: |
return ((unsigned int)A); |
|
case BPF_ST: |
mem[fentry->k] = A; |
continue; |
|
case BPF_STX: |
mem[fentry->k] = X; |
continue; |
|
default: |
/* Invalid instruction counts as RET */ |
return (0); |
} |
|
/* Handle ancillary data, which are impossible |
(or very difficult) to get parsing packet contents. |
*/ |
switch (k-SKF_AD_OFF) { |
case SKF_AD_PROTOCOL: |
A = htons(skb->protocol); |
continue; |
case SKF_AD_PKTTYPE: |
A = skb->pkt_type; |
continue; |
case SKF_AD_IFINDEX: |
A = skb->dev->ifindex; |
continue; |
default: |
return 0; |
} |
} |
|
return (0); |
} |
|
/** |
* sk_chk_filter - verify socket filter code |
* @filter: filter to verify |
* @flen: length of filter |
* |
* Check the user's filter code. If we let some ugly |
* filter code slip through kaboom! The filter must contain |
* no references or jumps that are out of range, no illegal instructions |
* and no backward jumps. It must end with a RET instruction |
* |
* Returns 0 if the rule set is legal or a negative errno code if not. |
*/ |
|
int sk_chk_filter(struct sock_filter *filter, int flen) |
{ |
struct sock_filter *ftest; |
int pc; |
|
if ((unsigned int) flen >= (~0U / sizeof(struct sock_filter))) |
return -EINVAL; |
|
/* |
* Check the filter code now. |
*/ |
for(pc = 0; pc < flen; pc++) |
{ |
/* |
* All jumps are forward as they are not signed |
*/ |
|
ftest = &filter[pc]; |
if(BPF_CLASS(ftest->code) == BPF_JMP) |
{ |
/* |
* But they mustn't jump off the end. |
*/ |
if(BPF_OP(ftest->code) == BPF_JA) |
{ |
/* Note, the large ftest->k might cause |
loops. Compare this with conditional |
jumps below, where offsets are limited. --ANK (981016) |
*/ |
if (ftest->k >= (unsigned)(flen-pc-1)) |
return -EINVAL; |
} |
else |
{ |
/* |
* For conditionals both must be safe |
*/ |
if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen) |
return -EINVAL; |
} |
} |
|
/* |
* Check that memory operations use valid addresses. |
*/ |
|
if (ftest->k >= BPF_MEMWORDS) |
{ |
/* |
* But it might not be a memory operation... |
*/ |
switch (ftest->code) { |
case BPF_ST: |
case BPF_STX: |
case BPF_LD|BPF_MEM: |
case BPF_LDX|BPF_MEM: |
return -EINVAL; |
} |
} |
} |
|
/* |
* The program must end with a return. We don't care where they |
* jumped within the script (its always forwards) but in the |
* end they _will_ hit this. |
*/ |
|
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL; |
} |
|
/** |
* sk_attach_filter - attach a socket filter |
* @fprog: the filter program |
* @sk: the socket to use |
* |
* Attach the user's filter code. We first run some sanity checks on |
* it to make sure it does not explode on us later. If an error |
* occurs or there is insufficient memory for the filter a negative |
* errno code is returned. On success the return is zero. |
*/ |
|
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) |
{ |
struct sk_filter *fp; |
unsigned int fsize = sizeof(struct sock_filter) * fprog->len; |
int err; |
|
/* Make sure new filter is there and in the right amounts. */ |
if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS) |
return (-EINVAL); |
|
fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); |
if(fp == NULL) |
return (-ENOMEM); |
|
if (copy_from_user(fp->insns, fprog->filter, fsize)) { |
sock_kfree_s(sk, fp, fsize+sizeof(*fp)); |
return -EFAULT; |
} |
|
atomic_set(&fp->refcnt, 1); |
fp->len = fprog->len; |
|
if ((err = sk_chk_filter(fp->insns, fp->len))==0) { |
struct sk_filter *old_fp; |
|
spin_lock_bh(&sk->lock.slock); |
old_fp = sk->filter; |
sk->filter = fp; |
spin_unlock_bh(&sk->lock.slock); |
fp = old_fp; |
} |
|
if (fp) |
sk_filter_release(sk, fp); |
|
return (err); |
} |
#endif /* CONFIG_FILTER */ |
/pktgen.c
0,0 → 1,1405
/* -*-linux-c-*- |
* $Id: pktgen.c,v 1.1.1.1 2004-04-17 22:13:21 phoenix Exp $ |
* pktgen.c: Packet Generator for performance evaluation. |
* |
* Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se> |
* Uppsala University, Sweden |
* |
* A tool for loading the network with preconfigurated packets. |
* The tool is implemented as a linux module. Parameters are output |
* device, IPG (interpacket gap), number of packets, and whether |
* to use multiple SKBs or just the same one. |
* pktgen uses the installed interface's output routine. |
* |
* Additional hacking by: |
* |
* Jens.Laas@data.slu.se |
* Improved by ANK. 010120. |
* Improved by ANK even more. 010212. |
* MAC address typo fixed. 010417 --ro |
* Integrated. 020301 --DaveM |
* Added multiskb option 020301 --DaveM |
* Scaling of results. 020417--sigurdur@linpro.no |
* Significant re-work of the module: |
* * Updated to support generation over multiple interfaces at once |
* by creating 32 /proc/net/pg* files. Each file can be manipulated |
* individually. |
* * Converted many counters to __u64 to allow longer runs. |
* * Allow configuration of ranges, like min/max IP address, MACs, |
* and UDP-ports, for both source and destination, and can |
* set to use a random distribution or sequentially walk the range. |
* * Can now change some values after starting. |
* * Place 12-byte packet in UDP payload with magic number, |
* sequence number, and timestamp. Will write receiver next. |
* * The new changes seem to have a performance impact of around 1%, |
* as far as I can tell. |
* --Ben Greear <greearb@candelatech.com> |
* |
* Renamed multiskb to clone_skb and cleaned up sending core for two distinct |
* skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 |
* as a "fastpath" with a configurable number of clones after alloc's. |
* |
* clone_skb=0 means all packets are allocated this also means ranges time |
* stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100 |
* clones. |
* |
* Also moved to /proc/net/pktgen/ |
* --ro |
* |
* Fix refcount off by one if first packet fails, potential null deref, |
* memleak 030710- KJP |
* |
* See Documentation/networking/pktgen.txt for how to use this. |
*/ |
|
#include <linux/module.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/types.h> |
#include <linux/string.h> |
#include <linux/ptrace.h> |
#include <linux/errno.h> |
#include <linux/ioport.h> |
#include <linux/slab.h> |
#include <linux/interrupt.h> |
#include <linux/pci.h> |
#include <linux/delay.h> |
#include <linux/init.h> |
#include <linux/inet.h> |
#include <asm/byteorder.h> |
#include <asm/bitops.h> |
#include <asm/io.h> |
#include <asm/dma.h> |
#include <asm/uaccess.h> |
|
#include <linux/in.h> |
#include <linux/ip.h> |
#include <linux/udp.h> |
#include <linux/skbuff.h> |
#include <linux/netdevice.h> |
#include <linux/inetdevice.h> |
#include <linux/rtnetlink.h> |
#include <linux/proc_fs.h> |
#include <linux/if_arp.h> |
#include <net/checksum.h> |
#include <asm/timex.h> |
|
#define cycles() ((u32)get_cycles()) |
|
|
#define VERSION "pktgen version 1.3" |
static char version[] __initdata = |
"pktgen.c: v1.3: Packet Generator for packet performance testing.\n"; |
|
/* Used to help with determining the pkts on receive */ |
|
#define PKTGEN_MAGIC 0xbe9be955 |
|
|
/* Keep information per interface */ |
struct pktgen_info { |
/* Parameters */ |
|
/* If min != max, then we will either do a linear iteration, or |
* we will do a random selection from within the range. |
*/ |
__u32 flags; |
|
#define F_IPSRC_RND (1<<0) /* IP-Src Random */ |
#define F_IPDST_RND (1<<1) /* IP-Dst Random */ |
#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ |
#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ |
#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ |
#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ |
#define F_SET_SRCMAC (1<<6) /* Specify-Src-Mac |
(default is to use Interface's MAC Addr) */ |
#define F_SET_SRCIP (1<<7) /* Specify-Src-IP |
(default is to use Interface's IP Addr) */ |
|
|
int pkt_size; /* = ETH_ZLEN; */ |
int nfrags; |
__u32 ipg; /* Default Interpacket gap in nsec */ |
__u64 count; /* Default No packets to send */ |
__u64 sofar; /* How many pkts we've sent so far */ |
__u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ |
struct timeval started_at; |
struct timeval stopped_at; |
__u64 idle_acc; |
__u32 seq_num; |
|
int clone_skb; /* Use multiple SKBs during packet gen. If this number |
* is greater than 1, then that many coppies of the same |
* packet will be sent before a new packet is allocated. |
* For instance, if you want to send 1024 identical packets |
* before creating a new packet, set clone_skb to 1024. |
*/ |
int busy; |
int do_run_run; /* if this changes to false, the test will stop */ |
|
char outdev[32]; |
char dst_min[32]; |
char dst_max[32]; |
char src_min[32]; |
char src_max[32]; |
|
/* If we're doing ranges, random or incremental, then this |
* defines the min/max for those ranges. |
*/ |
__u32 saddr_min; /* inclusive, source IP address */ |
__u32 saddr_max; /* exclusive, source IP address */ |
__u32 daddr_min; /* inclusive, dest IP address */ |
__u32 daddr_max; /* exclusive, dest IP address */ |
|
__u16 udp_src_min; /* inclusive, source UDP port */ |
__u16 udp_src_max; /* exclusive, source UDP port */ |
__u16 udp_dst_min; /* inclusive, dest UDP port */ |
__u16 udp_dst_max; /* exclusive, dest UDP port */ |
|
__u32 src_mac_count; /* How many MACs to iterate through */ |
__u32 dst_mac_count; /* How many MACs to iterate through */ |
|
unsigned char dst_mac[6]; |
unsigned char src_mac[6]; |
|
__u32 cur_dst_mac_offset; |
__u32 cur_src_mac_offset; |
__u32 cur_saddr; |
__u32 cur_daddr; |
__u16 cur_udp_dst; |
__u16 cur_udp_src; |
|
__u8 hh[14]; |
/* = { |
0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, |
|
We fill in SRC address later |
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
0x08, 0x00 |
}; |
*/ |
__u16 pad; /* pad out the hh struct to an even 16 bytes */ |
char result[512]; |
|
/* proc file names */ |
char fname[80]; |
char busy_fname[80]; |
|
struct proc_dir_entry *proc_ent; |
struct proc_dir_entry *busy_proc_ent; |
}; |
|
struct pktgen_hdr { |
__u32 pgh_magic; |
__u32 seq_num; |
struct timeval timestamp; |
}; |
|
static int cpu_speed; |
static int debug; |
|
/* Module parameters, defaults. */ |
static int count_d = 100000; |
static int ipg_d = 0; |
static int clone_skb_d = 0; |
|
|
#define MAX_PKTGEN 8 |
static struct pktgen_info pginfos[MAX_PKTGEN]; |
|
|
/** Convert to miliseconds */ |
inline __u64 tv_to_ms(const struct timeval* tv) { |
__u64 ms = tv->tv_usec / 1000; |
ms += (__u64)tv->tv_sec * (__u64)1000; |
return ms; |
} |
|
inline __u64 getCurMs(void) { |
struct timeval tv; |
do_gettimeofday(&tv); |
return tv_to_ms(&tv); |
} |
|
#define PG_PROC_DIR "pktgen" |
static struct proc_dir_entry *proc_dir = 0; |
|
static struct net_device *setup_inject(struct pktgen_info* info) |
{ |
struct net_device *odev; |
|
rtnl_lock(); |
odev = __dev_get_by_name(info->outdev); |
if (!odev) { |
sprintf(info->result, "No such netdevice: \"%s\"", info->outdev); |
goto out_unlock; |
} |
|
if (odev->type != ARPHRD_ETHER) { |
sprintf(info->result, "Not ethernet device: \"%s\"", info->outdev); |
goto out_unlock; |
} |
|
if (!netif_running(odev)) { |
sprintf(info->result, "Device is down: \"%s\"", info->outdev); |
goto out_unlock; |
} |
|
/* Default to the interface's mac if not explicitly set. */ |
if (!(info->flags & F_SET_SRCMAC)) { |
memcpy(&(info->hh[6]), odev->dev_addr, 6); |
} |
else { |
memcpy(&(info->hh[6]), info->src_mac, 6); |
} |
|
/* Set up Dest MAC */ |
memcpy(&(info->hh[0]), info->dst_mac, 6); |
|
info->saddr_min = 0; |
info->saddr_max = 0; |
if (strlen(info->src_min) == 0) { |
if (odev->ip_ptr) { |
struct in_device *in_dev = odev->ip_ptr; |
|
if (in_dev->ifa_list) { |
info->saddr_min = in_dev->ifa_list->ifa_address; |
info->saddr_max = info->saddr_min; |
} |
} |
} |
else { |
info->saddr_min = in_aton(info->src_min); |
info->saddr_max = in_aton(info->src_max); |
} |
|
info->daddr_min = in_aton(info->dst_min); |
info->daddr_max = in_aton(info->dst_max); |
|
/* Initialize current values. */ |
info->cur_dst_mac_offset = 0; |
info->cur_src_mac_offset = 0; |
info->cur_saddr = info->saddr_min; |
info->cur_daddr = info->daddr_min; |
info->cur_udp_dst = info->udp_dst_min; |
info->cur_udp_src = info->udp_src_min; |
|
atomic_inc(&odev->refcnt); |
rtnl_unlock(); |
|
return odev; |
|
out_unlock: |
rtnl_unlock(); |
return NULL; |
} |
|
static void nanospin(int ipg, struct pktgen_info* info) |
{ |
u32 idle_start, idle; |
|
idle_start = cycles(); |
|
for (;;) { |
barrier(); |
idle = cycles() - idle_start; |
if (idle * 1000 >= ipg * cpu_speed) |
break; |
} |
info->idle_acc += idle; |
} |
|
static int calc_mhz(void) |
{ |
struct timeval start, stop; |
u32 start_s, elapsed; |
|
do_gettimeofday(&start); |
start_s = cycles(); |
do { |
barrier(); |
elapsed = cycles() - start_s; |
if (elapsed == 0) |
return 0; |
} while (elapsed < 1000 * 50000); |
do_gettimeofday(&stop); |
return elapsed/(stop.tv_usec-start.tv_usec+1000000*(stop.tv_sec-start.tv_sec)); |
} |
|
static void cycles_calibrate(void) |
{ |
int i; |
|
for (i = 0; i < 3; i++) { |
int res = calc_mhz(); |
if (res > cpu_speed) |
cpu_speed = res; |
} |
} |
|
|
/* Increment/randomize headers according to flags and current values |
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst |
*/ |
static void mod_cur_headers(struct pktgen_info* info) { |
__u32 imn; |
__u32 imx; |
|
/* Deal with source MAC */ |
if (info->src_mac_count > 1) { |
__u32 mc; |
__u32 tmp; |
if (info->flags & F_MACSRC_RND) { |
mc = net_random() % (info->src_mac_count); |
} |
else { |
mc = info->cur_src_mac_offset++; |
if (info->cur_src_mac_offset > info->src_mac_count) { |
info->cur_src_mac_offset = 0; |
} |
} |
|
tmp = info->src_mac[5] + (mc & 0xFF); |
info->hh[11] = tmp; |
tmp = (info->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); |
info->hh[10] = tmp; |
tmp = (info->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); |
info->hh[9] = tmp; |
tmp = (info->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); |
info->hh[8] = tmp; |
tmp = (info->src_mac[1] + (tmp >> 8)); |
info->hh[7] = tmp; |
} |
|
/* Deal with Destination MAC */ |
if (info->dst_mac_count > 1) { |
__u32 mc; |
__u32 tmp; |
if (info->flags & F_MACDST_RND) { |
mc = net_random() % (info->dst_mac_count); |
} |
else { |
mc = info->cur_dst_mac_offset++; |
if (info->cur_dst_mac_offset > info->dst_mac_count) { |
info->cur_dst_mac_offset = 0; |
} |
} |
|
tmp = info->dst_mac[5] + (mc & 0xFF); |
info->hh[5] = tmp; |
tmp = (info->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); |
info->hh[4] = tmp; |
tmp = (info->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); |
info->hh[3] = tmp; |
tmp = (info->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); |
info->hh[2] = tmp; |
tmp = (info->dst_mac[1] + (tmp >> 8)); |
info->hh[1] = tmp; |
} |
|
if (info->udp_src_min < info->udp_src_max) { |
if (info->flags & F_UDPSRC_RND) { |
info->cur_udp_src = ((net_random() % (info->udp_src_max - info->udp_src_min)) |
+ info->udp_src_min); |
} |
else { |
info->cur_udp_src++; |
if (info->cur_udp_src >= info->udp_src_max) { |
info->cur_udp_src = info->udp_src_min; |
} |
} |
} |
|
if (info->udp_dst_min < info->udp_dst_max) { |
if (info->flags & F_UDPDST_RND) { |
info->cur_udp_dst = ((net_random() % (info->udp_dst_max - info->udp_dst_min)) |
+ info->udp_dst_min); |
} |
else { |
info->cur_udp_dst++; |
if (info->cur_udp_dst >= info->udp_dst_max) { |
info->cur_udp_dst = info->udp_dst_min; |
} |
} |
} |
|
if ((imn = ntohl(info->saddr_min)) < (imx = ntohl(info->saddr_max))) { |
__u32 t; |
if (info->flags & F_IPSRC_RND) { |
t = ((net_random() % (imx - imn)) + imn); |
} |
else { |
t = ntohl(info->cur_saddr); |
t++; |
if (t >= imx) { |
t = imn; |
} |
} |
info->cur_saddr = htonl(t); |
} |
|
if ((imn = ntohl(info->daddr_min)) < (imx = ntohl(info->daddr_max))) { |
__u32 t; |
if (info->flags & F_IPDST_RND) { |
t = ((net_random() % (imx - imn)) + imn); |
} |
else { |
t = ntohl(info->cur_daddr); |
t++; |
if (t >= imx) { |
t = imn; |
} |
} |
info->cur_daddr = htonl(t); |
} |
}/* mod_cur_headers */ |
|
|
static struct sk_buff *fill_packet(struct net_device *odev, struct pktgen_info* info) |
{ |
struct sk_buff *skb = NULL; |
__u8 *eth; |
struct udphdr *udph; |
int datalen, iplen; |
struct iphdr *iph; |
struct pktgen_hdr *pgh = NULL; |
|
skb = alloc_skb(info->pkt_size + 64 + 16, GFP_ATOMIC); |
if (!skb) { |
sprintf(info->result, "No memory"); |
return NULL; |
} |
|
skb_reserve(skb, 16); |
|
/* Reserve for ethernet and IP header */ |
eth = (__u8 *) skb_push(skb, 14); |
iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); |
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); |
|
/* Update any of the values, used when we're incrementing various |
* fields. |
*/ |
mod_cur_headers(info); |
|
memcpy(eth, info->hh, 14); |
|
datalen = info->pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */ |
if (datalen < sizeof(struct pktgen_hdr)) { |
datalen = sizeof(struct pktgen_hdr); |
} |
|
udph->source = htons(info->cur_udp_src); |
udph->dest = htons(info->cur_udp_dst); |
udph->len = htons(datalen + 8); /* DATA + udphdr */ |
udph->check = 0; /* No checksum */ |
|
iph->ihl = 5; |
iph->version = 4; |
iph->ttl = 3; |
iph->tos = 0; |
iph->protocol = IPPROTO_UDP; /* UDP */ |
iph->saddr = info->cur_saddr; |
iph->daddr = info->cur_daddr; |
iph->frag_off = 0; |
iplen = 20 + 8 + datalen; |
iph->tot_len = htons(iplen); |
iph->check = 0; |
iph->check = ip_fast_csum((void *) iph, iph->ihl); |
skb->protocol = __constant_htons(ETH_P_IP); |
skb->mac.raw = ((u8 *)iph) - 14; |
skb->dev = odev; |
skb->pkt_type = PACKET_HOST; |
|
if (info->nfrags <= 0) { |
pgh = (struct pktgen_hdr *)skb_put(skb, datalen); |
} else { |
int frags = info->nfrags; |
int i; |
|
/* TODO: Verify this is OK...it sure is ugly. --Ben */ |
pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8); |
|
if (frags > MAX_SKB_FRAGS) |
frags = MAX_SKB_FRAGS; |
if (datalen > frags*PAGE_SIZE) { |
skb_put(skb, datalen-frags*PAGE_SIZE); |
datalen = frags*PAGE_SIZE; |
} |
|
i = 0; |
while (datalen > 0) { |
struct page *page = alloc_pages(GFP_KERNEL, 0); |
skb_shinfo(skb)->frags[i].page = page; |
skb_shinfo(skb)->frags[i].page_offset = 0; |
skb_shinfo(skb)->frags[i].size = |
(datalen < PAGE_SIZE ? datalen : PAGE_SIZE); |
datalen -= skb_shinfo(skb)->frags[i].size; |
skb->len += skb_shinfo(skb)->frags[i].size; |
skb->data_len += skb_shinfo(skb)->frags[i].size; |
i++; |
skb_shinfo(skb)->nr_frags = i; |
} |
|
while (i < frags) { |
int rem; |
|
if (i == 0) |
break; |
|
rem = skb_shinfo(skb)->frags[i - 1].size / 2; |
if (rem == 0) |
break; |
|
skb_shinfo(skb)->frags[i - 1].size -= rem; |
|
skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1]; |
get_page(skb_shinfo(skb)->frags[i].page); |
skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page; |
skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size; |
skb_shinfo(skb)->frags[i].size = rem; |
i++; |
skb_shinfo(skb)->nr_frags = i; |
} |
} |
|
/* Stamp the time, and sequence number, convert them to network byte order */ |
if (pgh) { |
pgh->pgh_magic = htonl(PKTGEN_MAGIC); |
do_gettimeofday(&(pgh->timestamp)); |
pgh->timestamp.tv_usec = htonl(pgh->timestamp.tv_usec); |
pgh->timestamp.tv_sec = htonl(pgh->timestamp.tv_sec); |
pgh->seq_num = htonl(info->seq_num); |
} |
|
return skb; |
} |
|
|
static void inject(struct pktgen_info* info) |
{ |
struct net_device *odev = NULL; |
struct sk_buff *skb = NULL; |
__u64 total = 0; |
__u64 idle = 0; |
__u64 lcount = 0; |
int nr_frags = 0; |
int last_ok = 1; /* Was last skb sent? |
* Or a failed transmit of some sort? This will keep |
* sequence numbers in order, for example. |
*/ |
__u64 fp = 0; |
__u32 fp_tmp = 0; |
|
odev = setup_inject(info); |
if (!odev) |
return; |
|
info->do_run_run = 1; /* Cranke yeself! */ |
info->idle_acc = 0; |
info->sofar = 0; |
lcount = info->count; |
|
|
/* Build our initial pkt and place it as a re-try pkt. */ |
skb = fill_packet(odev, info); |
if (skb == NULL) goto out_reldev; |
|
do_gettimeofday(&(info->started_at)); |
|
while(info->do_run_run) { |
|
/* Set a time-stamp, so build a new pkt each time */ |
|
if (last_ok) { |
if (++fp_tmp >= info->clone_skb ) { |
kfree_skb(skb); |
skb = fill_packet(odev, info); |
if (skb == NULL) { |
goto out_reldev; |
} |
fp++; |
fp_tmp = 0; /* reset counter */ |
} |
} |
|
nr_frags = skb_shinfo(skb)->nr_frags; |
|
spin_lock_bh(&odev->xmit_lock); |
if (!netif_queue_stopped(odev)) { |
|
atomic_inc(&skb->users); |
|
if (odev->hard_start_xmit(skb, odev)) { |
|
atomic_dec(&skb->users); |
if (net_ratelimit()) { |
printk(KERN_INFO "Hard xmit error\n"); |
} |
info->errors++; |
last_ok = 0; |
} |
else { |
last_ok = 1; |
info->sofar++; |
info->seq_num++; |
} |
} |
else { |
/* Re-try it next time */ |
last_ok = 0; |
} |
|
|
spin_unlock_bh(&odev->xmit_lock); |
|
if (info->ipg) { |
/* Try not to busy-spin if we have larger sleep times. |
* TODO: Investigate better ways to do this. |
*/ |
if (info->ipg < 10000) { /* 10 usecs or less */ |
nanospin(info->ipg, info); |
} |
else if (info->ipg < 10000000) { /* 10ms or less */ |
udelay(info->ipg / 1000); |
} |
else { |
mdelay(info->ipg / 1000000); |
} |
} |
|
if (signal_pending(current)) { |
break; |
} |
|
/* If lcount is zero, then run forever */ |
if ((lcount != 0) && (--lcount == 0)) { |
if (atomic_read(&skb->users) != 1) { |
u32 idle_start, idle; |
|
idle_start = cycles(); |
while (atomic_read(&skb->users) != 1) { |
if (signal_pending(current)) { |
break; |
} |
schedule(); |
} |
idle = cycles() - idle_start; |
info->idle_acc += idle; |
} |
break; |
} |
|
if (netif_queue_stopped(odev) || current->need_resched) { |
u32 idle_start, idle; |
|
idle_start = cycles(); |
do { |
if (signal_pending(current)) { |
info->do_run_run = 0; |
break; |
} |
if (!netif_running(odev)) { |
info->do_run_run = 0; |
break; |
} |
if (current->need_resched) |
schedule(); |
else |
do_softirq(); |
} while (netif_queue_stopped(odev)); |
idle = cycles() - idle_start; |
info->idle_acc += idle; |
} |
}/* while we should be running */ |
|
do_gettimeofday(&(info->stopped_at)); |
|
total = (info->stopped_at.tv_sec - info->started_at.tv_sec) * 1000000 + |
info->stopped_at.tv_usec - info->started_at.tv_usec; |
|
idle = (__u32)(info->idle_acc)/(__u32)(cpu_speed); |
|
{ |
char *p = info->result; |
__u64 pps = (__u32)(info->sofar * 1000) / ((__u32)(total) / 1000); |
__u64 bps = pps * 8 * (info->pkt_size + 4); /* take 32bit ethernet CRC into account */ |
p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags) %llupps %lluMb/sec (%llubps) errors: %llu", |
(unsigned long long) total, |
(unsigned long long) (total - idle), |
(unsigned long long) idle, |
(unsigned long long) info->sofar, |
skb->len + 4, /* Add 4 to account for the ethernet checksum */ |
nr_frags, |
(unsigned long long) pps, |
(unsigned long long) (bps / (u64) 1024 / (u64) 1024), |
(unsigned long long) bps, |
(unsigned long long) info->errors |
); |
} |
|
kfree_skb(skb); |
|
out_reldev: |
if (odev) { |
dev_put(odev); |
odev = NULL; |
} |
|
return; |
|
} |
|
/* proc/net/pktgen/pg */ |
|
static int proc_busy_read(char *buf , char **start, off_t offset, |
int len, int *eof, void *data) |
{ |
char *p; |
int idx = (int)(long)(data); |
struct pktgen_info* info = NULL; |
|
if ((idx < 0) || (idx >= MAX_PKTGEN)) { |
printk("ERROR: idx: %i is out of range in proc_write\n", idx); |
return -EINVAL; |
} |
info = &(pginfos[idx]); |
|
p = buf; |
p += sprintf(p, "%d\n", info->busy); |
*eof = 1; |
|
return p-buf; |
} |
|
static int proc_read(char *buf , char **start, off_t offset, |
int len, int *eof, void *data) |
{ |
char *p; |
int i; |
int idx = (int)(long)(data); |
struct pktgen_info* info = NULL; |
__u64 sa; |
__u64 stopped; |
__u64 now = getCurMs(); |
|
if ((idx < 0) || (idx >= MAX_PKTGEN)) { |
printk("ERROR: idx: %i is out of range in proc_write\n", idx); |
return -EINVAL; |
} |
info = &(pginfos[idx]); |
|
p = buf; |
p += sprintf(p, "%s\n", VERSION); /* Help with parsing compatibility */ |
p += sprintf(p, "Params: count %llu pkt_size: %u frags: %d ipg: %u clone_skb: %d odev \"%s\"\n", |
(unsigned long long) info->count, |
info->pkt_size, info->nfrags, info->ipg, |
info->clone_skb, info->outdev); |
p += sprintf(p, " dst_min: %s dst_max: %s src_min: %s src_max: %s\n", |
info->dst_min, info->dst_max, info->src_min, info->src_max); |
p += sprintf(p, " src_mac: "); |
for (i = 0; i < 6; i++) { |
p += sprintf(p, "%02X%s", info->src_mac[i], i == 5 ? " " : ":"); |
} |
p += sprintf(p, "dst_mac: "); |
for (i = 0; i < 6; i++) { |
p += sprintf(p, "%02X%s", info->dst_mac[i], i == 5 ? "\n" : ":"); |
} |
p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", |
info->udp_src_min, info->udp_src_max, info->udp_dst_min, |
info->udp_dst_max); |
p += sprintf(p, " src_mac_count: %d dst_mac_count: %d\n Flags: ", |
info->src_mac_count, info->dst_mac_count); |
if (info->flags & F_IPSRC_RND) { |
p += sprintf(p, "IPSRC_RND "); |
} |
if (info->flags & F_IPDST_RND) { |
p += sprintf(p, "IPDST_RND "); |
} |
if (info->flags & F_UDPSRC_RND) { |
p += sprintf(p, "UDPSRC_RND "); |
} |
if (info->flags & F_UDPDST_RND) { |
p += sprintf(p, "UDPDST_RND "); |
} |
if (info->flags & F_MACSRC_RND) { |
p += sprintf(p, "MACSRC_RND "); |
} |
if (info->flags & F_MACDST_RND) { |
p += sprintf(p, "MACDST_RND "); |
} |
p += sprintf(p, "\n"); |
|
sa = tv_to_ms(&(info->started_at)); |
stopped = tv_to_ms(&(info->stopped_at)); |
if (info->do_run_run) { |
stopped = now; /* not really stopped, more like last-running-at */ |
} |
p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %llums stopped: %llums now: %llums idle: %lluns\n", |
(unsigned long long) info->sofar, |
(unsigned long long) info->errors, |
(unsigned long long) sa, |
(unsigned long long) stopped, |
(unsigned long long) now, |
(unsigned long long) info->idle_acc); |
p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", |
info->seq_num, info->cur_dst_mac_offset, info->cur_src_mac_offset); |
p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x cur_udp_dst: %d cur_udp_src: %d\n", |
info->cur_saddr, info->cur_daddr, info->cur_udp_dst, info->cur_udp_src); |
|
if (info->result[0]) |
p += sprintf(p, "Result: %s\n", info->result); |
else |
p += sprintf(p, "Result: Idle\n"); |
*eof = 1; |
|
return p - buf; |
} |
|
static int count_trail_chars(const char *user_buffer, unsigned int maxlen) |
{ |
int i; |
|
for (i = 0; i < maxlen; i++) { |
char c; |
|
if (get_user(c, &user_buffer[i])) |
return -EFAULT; |
switch (c) { |
case '\"': |
case '\n': |
case '\r': |
case '\t': |
case ' ': |
case '=': |
break; |
default: |
goto done; |
}; |
} |
done: |
return i; |
} |
|
static unsigned long num_arg(const char *user_buffer, unsigned long maxlen, |
unsigned long *num) |
{ |
int i = 0; |
|
*num = 0; |
|
for(; i < maxlen; i++) { |
char c; |
|
if (get_user(c, &user_buffer[i])) |
return -EFAULT; |
if ((c >= '0') && (c <= '9')) { |
*num *= 10; |
*num += c -'0'; |
} else |
break; |
} |
return i; |
} |
|
static int strn_len(const char *user_buffer, unsigned int maxlen) |
{ |
int i = 0; |
|
for(; i < maxlen; i++) { |
char c; |
|
if (get_user(c, &user_buffer[i])) |
return -EFAULT; |
switch (c) { |
case '\"': |
case '\n': |
case '\r': |
case '\t': |
case ' ': |
goto done_str; |
default: |
break; |
}; |
} |
done_str: |
return i; |
} |
|
static int proc_write(struct file *file, const char *user_buffer, |
unsigned long count, void *data) |
{ |
int i = 0, max, len; |
char name[16], valstr[32]; |
unsigned long value = 0; |
int idx = (int)(long)(data); |
struct pktgen_info* info = NULL; |
char* result = NULL; |
int tmp; |
|
if ((idx < 0) || (idx >= MAX_PKTGEN)) { |
printk("ERROR: idx: %i is out of range in proc_write\n", idx); |
return -EINVAL; |
} |
info = &(pginfos[idx]); |
result = &(info->result[0]); |
|
if (count < 1) { |
sprintf(result, "Wrong command format"); |
return -EINVAL; |
} |
|
max = count - i; |
tmp = count_trail_chars(&user_buffer[i], max); |
if (tmp < 0) |
return tmp; |
i += tmp; |
|
/* Read variable name */ |
|
len = strn_len(&user_buffer[i], sizeof(name) - 1); |
if (len < 0) |
return len; |
memset(name, 0, sizeof(name)); |
if (copy_from_user(name, &user_buffer[i], len)) |
return -EFAULT; |
i += len; |
|
max = count -i; |
len = count_trail_chars(&user_buffer[i], max); |
if (len < 0) |
return len; |
i += len; |
|
if (debug) |
printk("pg: %s,%lu\n", name, count); |
|
if (!strcmp(name, "stop")) { |
if (info->do_run_run) { |
strcpy(result, "Stopping"); |
} |
else { |
strcpy(result, "Already stopped...\n"); |
} |
info->do_run_run = 0; |
return count; |
} |
|
if (!strcmp(name, "pkt_size")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
if (value < 14+20+8) |
value = 14+20+8; |
info->pkt_size = value; |
sprintf(result, "OK: pkt_size=%u", info->pkt_size); |
return count; |
} |
if (!strcmp(name, "frags")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->nfrags = value; |
sprintf(result, "OK: frags=%u", info->nfrags); |
return count; |
} |
if (!strcmp(name, "ipg")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->ipg = value; |
sprintf(result, "OK: ipg=%u", info->ipg); |
return count; |
} |
if (!strcmp(name, "udp_src_min")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->udp_src_min = value; |
sprintf(result, "OK: udp_src_min=%u", info->udp_src_min); |
return count; |
} |
if (!strcmp(name, "udp_dst_min")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->udp_dst_min = value; |
sprintf(result, "OK: udp_dst_min=%u", info->udp_dst_min); |
return count; |
} |
if (!strcmp(name, "udp_src_max")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->udp_src_max = value; |
sprintf(result, "OK: udp_src_max=%u", info->udp_src_max); |
return count; |
} |
if (!strcmp(name, "udp_dst_max")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->udp_dst_max = value; |
sprintf(result, "OK: udp_dst_max=%u", info->udp_dst_max); |
return count; |
} |
if (!strcmp(name, "clone_skb")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->clone_skb = value; |
|
sprintf(result, "OK: clone_skb=%d", info->clone_skb); |
return count; |
} |
if (!strcmp(name, "count")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->count = value; |
sprintf(result, "OK: count=%llu", (unsigned long long) info->count); |
return count; |
} |
if (!strcmp(name, "src_mac_count")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->src_mac_count = value; |
sprintf(result, "OK: src_mac_count=%d", info->src_mac_count); |
return count; |
} |
if (!strcmp(name, "dst_mac_count")) { |
len = num_arg(&user_buffer[i], 10, &value); |
if (len < 0) |
return len; |
i += len; |
info->dst_mac_count = value; |
sprintf(result, "OK: dst_mac_count=%d", info->dst_mac_count); |
return count; |
} |
if (!strcmp(name, "odev")) { |
len = strn_len(&user_buffer[i], sizeof(info->outdev) - 1); |
if (len < 0) |
return len; |
memset(info->outdev, 0, sizeof(info->outdev)); |
if (copy_from_user(info->outdev, &user_buffer[i], len)) |
return -EFAULT; |
i += len; |
sprintf(result, "OK: odev=%s", info->outdev); |
return count; |
} |
if (!strcmp(name, "flag")) { |
char f[32]; |
len = strn_len(&user_buffer[i], sizeof(f) - 1); |
if (len < 0) |
return len; |
memset(f, 0, 32); |
if (copy_from_user(f, &user_buffer[i], len)) |
return -EFAULT; |
i += len; |
if (strcmp(f, "IPSRC_RND") == 0) { |
info->flags |= F_IPSRC_RND; |
} |
else if (strcmp(f, "!IPSRC_RND") == 0) { |
info->flags &= ~F_IPSRC_RND; |
} |
else if (strcmp(f, "IPDST_RND") == 0) { |
info->flags |= F_IPDST_RND; |
} |
else if (strcmp(f, "!IPDST_RND") == 0) { |
info->flags &= ~F_IPDST_RND; |
} |
else if (strcmp(f, "UDPSRC_RND") == 0) { |
info->flags |= F_UDPSRC_RND; |
} |
else if (strcmp(f, "!UDPSRC_RND") == 0) { |
info->flags &= ~F_UDPSRC_RND; |
} |
else if (strcmp(f, "UDPDST_RND") == 0) { |
info->flags |= F_UDPDST_RND; |
} |
else if (strcmp(f, "!UDPDST_RND") == 0) { |
info->flags &= ~F_UDPDST_RND; |
} |
else if (strcmp(f, "MACSRC_RND") == 0) { |
info->flags |= F_MACSRC_RND; |
} |
else if (strcmp(f, "!MACSRC_RND") == 0) { |
info->flags &= ~F_MACSRC_RND; |
} |
else if (strcmp(f, "MACDST_RND") == 0) { |
info->flags |= F_MACDST_RND; |
} |
else if (strcmp(f, "!MACDST_RND") == 0) { |
info->flags &= ~F_MACDST_RND; |
} |
else { |
sprintf(result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", |
f, |
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n"); |
return count; |
} |
sprintf(result, "OK: flags=0x%x", info->flags); |
return count; |
} |
if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { |
len = strn_len(&user_buffer[i], sizeof(info->dst_min) - 1); |
if (len < 0) |
return len; |
memset(info->dst_min, 0, sizeof(info->dst_min)); |
if (copy_from_user(info->dst_min, &user_buffer[i], len)) |
return -EFAULT; |
if(debug) |
printk("pg: dst_min set to: %s\n", info->dst_min); |
i += len; |
sprintf(result, "OK: dst_min=%s", info->dst_min); |
return count; |
} |
if (!strcmp(name, "dst_max")) { |
len = strn_len(&user_buffer[i], sizeof(info->dst_max) - 1); |
if (len < 0) |
return len; |
memset(info->dst_max, 0, sizeof(info->dst_max)); |
if (copy_from_user(info->dst_max, &user_buffer[i], len)) |
return -EFAULT; |
if(debug) |
printk("pg: dst_max set to: %s\n", info->dst_max); |
i += len; |
sprintf(result, "OK: dst_max=%s", info->dst_max); |
return count; |
} |
if (!strcmp(name, "src_min")) { |
len = strn_len(&user_buffer[i], sizeof(info->src_min) - 1); |
if (len < 0) |
return len; |
memset(info->src_min, 0, sizeof(info->src_min)); |
if (copy_from_user(info->src_min, &user_buffer[i], len)) |
return -EFAULT; |
if(debug) |
printk("pg: src_min set to: %s\n", info->src_min); |
i += len; |
sprintf(result, "OK: src_min=%s", info->src_min); |
return count; |
} |
if (!strcmp(name, "src_max")) { |
len = strn_len(&user_buffer[i], sizeof(info->src_max) - 1); |
if (len < 0) |
return len; |
memset(info->src_max, 0, sizeof(info->src_max)); |
if (copy_from_user(info->src_max, &user_buffer[i], len)) |
return -EFAULT; |
if(debug) |
printk("pg: src_max set to: %s\n", info->src_max); |
i += len; |
sprintf(result, "OK: src_max=%s", info->src_max); |
return count; |
} |
if (!strcmp(name, "dstmac")) { |
char *v = valstr; |
unsigned char *m = info->dst_mac; |
|
len = strn_len(&user_buffer[i], sizeof(valstr) - 1); |
if (len < 0) |
return len; |
memset(valstr, 0, sizeof(valstr)); |
if (copy_from_user(valstr, &user_buffer[i], len)) |
return -EFAULT; |
i += len; |
|
for(*m = 0;*v && m < info->dst_mac + 6; v++) { |
if (*v >= '0' && *v <= '9') { |
*m *= 16; |
*m += *v - '0'; |
} |
if (*v >= 'A' && *v <= 'F') { |
*m *= 16; |
*m += *v - 'A' + 10; |
} |
if (*v >= 'a' && *v <= 'f') { |
*m *= 16; |
*m += *v - 'a' + 10; |
} |
if (*v == ':') { |
m++; |
*m = 0; |
} |
} |
sprintf(result, "OK: dstmac"); |
return count; |
} |
if (!strcmp(name, "srcmac")) { |
char *v = valstr; |
unsigned char *m = info->src_mac; |
|
len = strn_len(&user_buffer[i], sizeof(valstr) - 1); |
if (len < 0) |
return len; |
memset(valstr, 0, sizeof(valstr)); |
if (copy_from_user(valstr, &user_buffer[i], len)) |
return -EFAULT; |
i += len; |
|
for(*m = 0;*v && m < info->src_mac + 6; v++) { |
if (*v >= '0' && *v <= '9') { |
*m *= 16; |
*m += *v - '0'; |
} |
if (*v >= 'A' && *v <= 'F') { |
*m *= 16; |
*m += *v - 'A' + 10; |
} |
if (*v >= 'a' && *v <= 'f') { |
*m *= 16; |
*m += *v - 'a' + 10; |
} |
if (*v == ':') { |
m++; |
*m = 0; |
} |
} |
sprintf(result, "OK: srcmac"); |
return count; |
} |
|
if (!strcmp(name, "inject") || !strcmp(name, "start")) { |
MOD_INC_USE_COUNT; |
if (info->busy) { |
strcpy(info->result, "Already running...\n"); |
} |
else { |
info->busy = 1; |
strcpy(info->result, "Starting"); |
inject(info); |
info->busy = 0; |
} |
MOD_DEC_USE_COUNT; |
return count; |
} |
|
sprintf(info->result, "No such parameter \"%s\"", name); |
return -EINVAL; |
} |
|
|
int create_proc_dir(void) |
{ |
int len; |
/* does proc_dir already exists */ |
len = strlen(PG_PROC_DIR); |
|
for (proc_dir = proc_net->subdir; proc_dir; |
proc_dir=proc_dir->next) { |
if ((proc_dir->namelen == len) && |
(! memcmp(proc_dir->name, PG_PROC_DIR, len))) |
break; |
} |
if (!proc_dir) |
proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); |
if (!proc_dir) return -ENODEV; |
return 1; |
} |
|
int remove_proc_dir(void) |
{ |
remove_proc_entry(PG_PROC_DIR, proc_net); |
return 1; |
} |
|
static int __init init(void) |
{ |
int i; |
printk(version); |
cycles_calibrate(); |
if (cpu_speed == 0) { |
printk("pktgen: Error: your machine does not have working cycle counter.\n"); |
return -EINVAL; |
} |
|
create_proc_dir(); |
|
for (i = 0; i<MAX_PKTGEN; i++) { |
memset(&(pginfos[i]), 0, sizeof(pginfos[i])); |
pginfos[i].pkt_size = ETH_ZLEN; |
pginfos[i].nfrags = 0; |
pginfos[i].clone_skb = clone_skb_d; |
pginfos[i].ipg = ipg_d; |
pginfos[i].count = count_d; |
pginfos[i].sofar = 0; |
pginfos[i].hh[12] = 0x08; /* fill in protocol. Rest is filled in later. */ |
pginfos[i].hh[13] = 0x00; |
pginfos[i].udp_src_min = 9; /* sink NULL */ |
pginfos[i].udp_src_max = 9; |
pginfos[i].udp_dst_min = 9; |
pginfos[i].udp_dst_max = 9; |
|
sprintf(pginfos[i].fname, "net/%s/pg%i", PG_PROC_DIR, i); |
pginfos[i].proc_ent = create_proc_entry(pginfos[i].fname, 0600, 0); |
if (!pginfos[i].proc_ent) { |
printk("pktgen: Error: cannot create net/%s/pg procfs entry.\n", PG_PROC_DIR); |
goto cleanup_mem; |
} |
pginfos[i].proc_ent->read_proc = proc_read; |
pginfos[i].proc_ent->write_proc = proc_write; |
pginfos[i].proc_ent->data = (void*)(long)(i); |
|
sprintf(pginfos[i].busy_fname, "net/%s/pg_busy%i", PG_PROC_DIR, i); |
pginfos[i].busy_proc_ent = create_proc_entry(pginfos[i].busy_fname, 0, 0); |
if (!pginfos[i].busy_proc_ent) { |
printk("pktgen: Error: cannot create net/%s/pg_busy procfs entry.\n", PG_PROC_DIR); |
goto cleanup_mem; |
} |
pginfos[i].busy_proc_ent->read_proc = proc_busy_read; |
pginfos[i].busy_proc_ent->data = (void*)(long)(i); |
} |
return 0; |
|
cleanup_mem: |
for (i = 0; i<MAX_PKTGEN; i++) { |
if (strlen(pginfos[i].fname)) { |
remove_proc_entry(pginfos[i].fname, NULL); |
} |
if (strlen(pginfos[i].busy_fname)) { |
remove_proc_entry(pginfos[i].busy_fname, NULL); |
} |
} |
return -ENOMEM; |
} |
|
|
static void __exit cleanup(void) |
{ |
int i; |
for (i = 0; i<MAX_PKTGEN; i++) { |
if (strlen(pginfos[i].fname)) { |
remove_proc_entry(pginfos[i].fname, NULL); |
} |
if (strlen(pginfos[i].busy_fname)) { |
remove_proc_entry(pginfos[i].busy_fname, NULL); |
} |
} |
remove_proc_dir(); |
} |
|
module_init(init); |
module_exit(cleanup); |
|
MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se"); |
MODULE_DESCRIPTION("Packet Generator tool"); |
MODULE_LICENSE("GPL"); |
MODULE_PARM(count_d, "i"); |
MODULE_PARM(ipg_d, "i"); |
MODULE_PARM(cpu_speed, "i"); |
MODULE_PARM(clone_skb_d, "i"); |
|
|
|
/dst.c
0,0 → 1,219
/* |
* net/dst.c Protocol independent destination cache. |
* |
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
* |
*/ |
|
#include <linux/bitops.h> |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/string.h> |
#include <linux/errno.h> |
#include <linux/netdevice.h> |
#include <linux/skbuff.h> |
#include <linux/init.h> |
|
#include <net/dst.h> |
|
/* Locking strategy: |
* 1) Garbage collection state of dead destination cache |
* entries is protected by dst_lock. |
* 2) GC is run only from BH context, and is the only remover |
* of entries. |
* 3) Entries are added to the garbage list from both BH |
* and non-BH context, so local BH disabling is needed. |
* 4) All operations modify state, so a spinlock is used. |
*/ |
static struct dst_entry *dst_garbage_list; |
#if RT_CACHE_DEBUG >= 2 |
static atomic_t dst_total = ATOMIC_INIT(0); |
#endif |
static spinlock_t dst_lock = SPIN_LOCK_UNLOCKED; |
|
static unsigned long dst_gc_timer_expires; |
static unsigned long dst_gc_timer_inc = DST_GC_MAX; |
static void dst_run_gc(unsigned long); |
|
static struct timer_list dst_gc_timer = |
{ data: DST_GC_MIN, function: dst_run_gc }; |
|
|
static void dst_run_gc(unsigned long dummy) |
{ |
int delayed = 0; |
struct dst_entry * dst, **dstp; |
|
if (!spin_trylock(&dst_lock)) { |
mod_timer(&dst_gc_timer, jiffies + HZ/10); |
return; |
} |
|
|
del_timer(&dst_gc_timer); |
dstp = &dst_garbage_list; |
while ((dst = *dstp) != NULL) { |
if (atomic_read(&dst->__refcnt)) { |
dstp = &dst->next; |
delayed++; |
continue; |
} |
*dstp = dst->next; |
dst_destroy(dst); |
} |
if (!dst_garbage_list) { |
dst_gc_timer_inc = DST_GC_MAX; |
goto out; |
} |
if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) |
dst_gc_timer_expires = DST_GC_MAX; |
dst_gc_timer_inc += DST_GC_INC; |
dst_gc_timer.expires = jiffies + dst_gc_timer_expires; |
#if RT_CACHE_DEBUG >= 2 |
printk("dst_total: %d/%d %ld\n", |
atomic_read(&dst_total), delayed, dst_gc_timer_expires); |
#endif |
add_timer(&dst_gc_timer); |
|
out: |
spin_unlock(&dst_lock); |
} |
|
static int dst_discard(struct sk_buff *skb) |
{ |
kfree_skb(skb); |
return 0; |
} |
|
static int dst_blackhole(struct sk_buff *skb) |
{ |
kfree_skb(skb); |
return 0; |
} |
|
void * dst_alloc(struct dst_ops * ops) |
{ |
struct dst_entry * dst; |
|
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { |
if (ops->gc()) |
return NULL; |
} |
dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); |
if (!dst) |
return NULL; |
memset(dst, 0, ops->entry_size); |
atomic_set(&dst->__refcnt, 0); |
dst->ops = ops; |
dst->lastuse = jiffies; |
dst->input = dst_discard; |
dst->output = dst_blackhole; |
#if RT_CACHE_DEBUG >= 2 |
atomic_inc(&dst_total); |
#endif |
atomic_inc(&ops->entries); |
return dst; |
} |
|
void __dst_free(struct dst_entry * dst) |
{ |
spin_lock_bh(&dst_lock); |
|
/* The first case (dev==NULL) is required, when |
protocol module is unloaded. |
*/ |
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { |
dst->input = dst_discard; |
dst->output = dst_blackhole; |
} |
dst->obsolete = 2; |
dst->next = dst_garbage_list; |
dst_garbage_list = dst; |
if (dst_gc_timer_inc > DST_GC_INC) { |
dst_gc_timer_inc = DST_GC_INC; |
dst_gc_timer_expires = DST_GC_MIN; |
mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); |
} |
|
spin_unlock_bh(&dst_lock); |
} |
|
void dst_destroy(struct dst_entry * dst) |
{ |
struct neighbour *neigh = dst->neighbour; |
struct hh_cache *hh = dst->hh; |
|
dst->hh = NULL; |
if (hh && atomic_dec_and_test(&hh->hh_refcnt)) |
kfree(hh); |
|
if (neigh) { |
dst->neighbour = NULL; |
neigh_release(neigh); |
} |
|
atomic_dec(&dst->ops->entries); |
|
if (dst->ops->destroy) |
dst->ops->destroy(dst); |
if (dst->dev) |
dev_put(dst->dev); |
#if RT_CACHE_DEBUG >= 2 |
atomic_dec(&dst_total); |
#endif |
kmem_cache_free(dst->ops->kmem_cachep, dst); |
} |
|
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) |
{ |
struct net_device *dev = ptr; |
struct dst_entry *dst; |
|
switch (event) { |
case NETDEV_UNREGISTER: |
case NETDEV_DOWN: |
spin_lock_bh(&dst_lock); |
for (dst = dst_garbage_list; dst; dst = dst->next) { |
if (dst->dev == dev) { |
/* Dirty hack. We did it in 2.2 (in __dst_free), |
we have _very_ good reasons not to repeat |
this mistake in 2.3, but we have no choice |
now. _It_ _is_ _explicit_ _deliberate_ |
_race_ _condition_. |
*/ |
if (event!=NETDEV_DOWN && |
!(dev->features & NETIF_F_DYNALLOC) && |
dst->output == dst_blackhole) { |
dst->dev = &loopback_dev; |
dev_put(dev); |
dev_hold(&loopback_dev); |
dst->output = dst_discard; |
if (dst->neighbour && dst->neighbour->dev == dev) { |
dst->neighbour->dev = &loopback_dev; |
dev_put(dev); |
dev_hold(&loopback_dev); |
} |
} else { |
dst->input = dst_discard; |
dst->output = dst_blackhole; |
} |
} |
} |
spin_unlock_bh(&dst_lock); |
break; |
} |
return NOTIFY_DONE; |
} |
|
struct notifier_block dst_dev_notifier = { |
dst_dev_event, |
NULL, |
0 |
}; |
|
void __init dst_init(void) |
{ |
register_netdevice_notifier(&dst_dev_notifier); |
} |
/Makefile
0,0 → 1,35
# |
# Makefile for the Linux networking core. |
# |
# Note! Dependencies are done automagically by 'make dep', which also |
# removes any old dependencies. DON'T put your own dependencies here |
# unless it's something special (ie not a .c file). |
# |
# Note 2! The CFLAGS definition is now in the main makefile... |
|
O_TARGET := core.o |
|
export-objs := netfilter.o profile.o |
|
obj-y := sock.o skbuff.o iovec.o datagram.o scm.o |
|
ifeq ($(CONFIG_SYSCTL),y) |
ifeq ($(CONFIG_NET),y) |
obj-y += sysctl_net_core.o |
endif |
endif |
|
obj-$(CONFIG_FILTER) += filter.o |
|
obj-$(CONFIG_NET) += dev.o ethtool.o dev_mcast.o dst.o neighbour.o \ |
rtnetlink.o utils.o |
|
obj-$(CONFIG_NETFILTER) += netfilter.o |
obj-$(CONFIG_NET_DIVERT) += dv.o |
obj-$(CONFIG_NET_PROFILE) += profile.o |
obj-$(CONFIG_NET_PKTGEN) += pktgen.o |
obj-$(CONFIG_NET_RADIO) += wireless.o |
# Ugly. I wish all wireless drivers were moved in drivers/net/wireless |
obj-$(CONFIG_NET_PCMCIA_RADIO) += wireless.o |
|
include $(TOPDIR)/Rules.make |
/netfilter.c
0,0 → 1,639
/* netfilter.c: look after the filters for various protocols. |
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox. |
* |
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any |
* way. |
* |
* Rusty Russell (C)2000 -- This code is GPL. |
* |
* February 2000: Modified by James Morris to have 1 queue per protocol. |
* 15-Mar-2000: Added NF_REPEAT --RR. |
*/ |
#include <linux/config.h> |
#include <linux/netfilter.h> |
#include <net/protocol.h> |
#include <linux/init.h> |
#include <linux/skbuff.h> |
#include <linux/wait.h> |
#include <linux/module.h> |
#include <linux/interrupt.h> |
#include <linux/if.h> |
#include <linux/netdevice.h> |
#include <linux/brlock.h> |
#include <linux/inetdevice.h> |
#include <net/sock.h> |
#include <net/route.h> |
#include <linux/ip.h> |
|
#define __KERNEL_SYSCALLS__ |
#include <linux/unistd.h> |
|
/* In this code, we can be waiting indefinitely for userspace to |
* service a packet if a hook returns NF_QUEUE. We could keep a count |
* of skbuffs queued for userspace, and not deregister a hook unless |
* this is zero, but that sucks. Now, we simply check when the |
* packets come back: if the hook is gone, the packet is discarded. */ |
#ifdef CONFIG_NETFILTER_DEBUG |
#define NFDEBUG(format, args...) printk(format , ## args) |
#else |
#define NFDEBUG(format, args...) |
#endif |
|
/* Sockopts only registered and called from user context, so |
BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may |
sleep. */ |
static DECLARE_MUTEX(nf_sockopt_mutex); |
|
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; |
static LIST_HEAD(nf_sockopts); |
|
/* |
* A queue handler may be registered for each protocol. Each is protected by |
* long term mutex. The handler must provide an an outfn() to accept packets |
* for queueing and must reinject all packets it receives, no matter what. |
*/ |
static struct nf_queue_handler_t { |
nf_queue_outfn_t outfn; |
void *data; |
} queue_handler[NPROTO]; |
|
int nf_register_hook(struct nf_hook_ops *reg) |
{ |
struct list_head *i; |
|
br_write_lock_bh(BR_NETPROTO_LOCK); |
for (i = nf_hooks[reg->pf][reg->hooknum].next; |
i != &nf_hooks[reg->pf][reg->hooknum]; |
i = i->next) { |
if (reg->priority < ((struct nf_hook_ops *)i)->priority) |
break; |
} |
list_add(®->list, i->prev); |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
return 0; |
} |
|
void nf_unregister_hook(struct nf_hook_ops *reg) |
{ |
br_write_lock_bh(BR_NETPROTO_LOCK); |
list_del(®->list); |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
} |
|
/* Do exclusive ranges overlap? */ |
static inline int overlap(int min1, int max1, int min2, int max2) |
{ |
return max1 > min2 && min1 < max2; |
} |
|
/* Functions to register sockopt ranges (exclusive). */ |
int nf_register_sockopt(struct nf_sockopt_ops *reg) |
{ |
struct list_head *i; |
int ret = 0; |
|
if (down_interruptible(&nf_sockopt_mutex) != 0) |
return -EINTR; |
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { |
struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; |
if (ops->pf == reg->pf |
&& (overlap(ops->set_optmin, ops->set_optmax, |
reg->set_optmin, reg->set_optmax) |
|| overlap(ops->get_optmin, ops->get_optmax, |
reg->get_optmin, reg->get_optmax))) { |
NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", |
ops->set_optmin, ops->set_optmax, |
ops->get_optmin, ops->get_optmax, |
reg->set_optmin, reg->set_optmax, |
reg->get_optmin, reg->get_optmax); |
ret = -EBUSY; |
goto out; |
} |
} |
|
list_add(®->list, &nf_sockopts); |
out: |
up(&nf_sockopt_mutex); |
return ret; |
} |
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg) |
{ |
/* No point being interruptible: we're probably in cleanup_module() */ |
restart: |
down(&nf_sockopt_mutex); |
if (reg->use != 0) { |
/* To be woken by nf_sockopt call... */ |
/* FIXME: Stuart Young's name appears gratuitously. */ |
set_current_state(TASK_UNINTERRUPTIBLE); |
reg->cleanup_task = current; |
up(&nf_sockopt_mutex); |
schedule(); |
goto restart; |
} |
list_del(®->list); |
up(&nf_sockopt_mutex); |
} |
|
#ifdef CONFIG_NETFILTER_DEBUG |
#include <net/ip.h> |
#include <net/route.h> |
#include <net/tcp.h> |
#include <linux/netfilter_ipv4.h> |
|
static void debug_print_hooks_ip(unsigned int nf_debug) |
{ |
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { |
printk("PRE_ROUTING "); |
nf_debug ^= (1 << NF_IP_PRE_ROUTING); |
} |
if (nf_debug & (1 << NF_IP_LOCAL_IN)) { |
printk("LOCAL_IN "); |
nf_debug ^= (1 << NF_IP_LOCAL_IN); |
} |
if (nf_debug & (1 << NF_IP_FORWARD)) { |
printk("FORWARD "); |
nf_debug ^= (1 << NF_IP_FORWARD); |
} |
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { |
printk("LOCAL_OUT "); |
nf_debug ^= (1 << NF_IP_LOCAL_OUT); |
} |
if (nf_debug & (1 << NF_IP_POST_ROUTING)) { |
printk("POST_ROUTING "); |
nf_debug ^= (1 << NF_IP_POST_ROUTING); |
} |
if (nf_debug) |
printk("Crap bits: 0x%04X", nf_debug); |
printk("\n"); |
} |
|
void nf_dump_skb(int pf, struct sk_buff *skb) |
{ |
printk("skb: pf=%i %s dev=%s len=%u\n", |
pf, |
skb->sk ? "(owned)" : "(unowned)", |
skb->dev ? skb->dev->name : "(no dev)", |
skb->len); |
switch (pf) { |
case PF_INET: { |
const struct iphdr *ip = skb->nh.iph; |
__u32 *opt = (__u32 *) (ip + 1); |
int opti; |
__u16 src_port = 0, dst_port = 0; |
|
if (ip->protocol == IPPROTO_TCP |
|| ip->protocol == IPPROTO_UDP) { |
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); |
src_port = ntohs(tcp->source); |
dst_port = ntohs(tcp->dest); |
} |
|
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" |
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", |
ip->protocol, NIPQUAD(ip->saddr), |
src_port, NIPQUAD(ip->daddr), |
dst_port, |
ntohs(ip->tot_len), ip->tos, ntohs(ip->id), |
ntohs(ip->frag_off), ip->ttl); |
|
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) |
printk(" O=0x%8.8X", *opt++); |
printk("\n"); |
} |
} |
} |
|
void nf_debug_ip_local_deliver(struct sk_buff *skb) |
{ |
/* If it's a loopback packet, it must have come through |
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and |
* NF_IP_LOCAL_IN. Otherwise, must have gone through |
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ |
if (!skb->dev) { |
printk("ip_local_deliver: skb->dev is NULL.\n"); |
} |
else if (strcmp(skb->dev->name, "lo") == 0) { |
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| (1 << NF_IP_POST_ROUTING) |
| (1 << NF_IP_PRE_ROUTING) |
| (1 << NF_IP_LOCAL_IN))) { |
printk("ip_local_deliver: bad loopback skb: "); |
debug_print_hooks_ip(skb->nf_debug); |
nf_dump_skb(PF_INET, skb); |
} |
} |
else { |
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING) |
| (1<<NF_IP_LOCAL_IN))) { |
printk("ip_local_deliver: bad non-lo skb: "); |
debug_print_hooks_ip(skb->nf_debug); |
nf_dump_skb(PF_INET, skb); |
} |
} |
} |
|
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) |
{ |
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| (1 << NF_IP_POST_ROUTING))) { |
printk("ip_dev_loopback_xmit: bad owned skb = %p: ", |
newskb); |
debug_print_hooks_ip(newskb->nf_debug); |
nf_dump_skb(PF_INET, newskb); |
} |
/* Clear to avoid confusing input check */ |
newskb->nf_debug = 0; |
} |
|
void nf_debug_ip_finish_output2(struct sk_buff *skb) |
{ |
/* If it's owned, it must have gone through the |
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. |
* Otherwise, must have gone through |
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. |
*/ |
if (skb->sk) { |
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| (1 << NF_IP_POST_ROUTING))) { |
printk("ip_finish_output: bad owned skb = %p: ", skb); |
debug_print_hooks_ip(skb->nf_debug); |
nf_dump_skb(PF_INET, skb); |
} |
} else { |
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) |
| (1 << NF_IP_FORWARD) |
| (1 << NF_IP_POST_ROUTING))) { |
/* Fragments, entunnelled packets, TCP RSTs |
generated by ipt_REJECT will have no |
owners, but still may be local */ |
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| (1 << NF_IP_POST_ROUTING))){ |
printk("ip_finish_output:" |
" bad unowned skb = %p: ",skb); |
debug_print_hooks_ip(skb->nf_debug); |
nf_dump_skb(PF_INET, skb); |
} |
} |
} |
} |
#endif /*CONFIG_NETFILTER_DEBUG*/ |
|
/* Call get/setsockopt() */ |
static int nf_sockopt(struct sock *sk, int pf, int val, |
char *opt, int *len, int get) |
{ |
struct list_head *i; |
struct nf_sockopt_ops *ops; |
int ret; |
|
if (down_interruptible(&nf_sockopt_mutex) != 0) |
return -EINTR; |
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { |
ops = (struct nf_sockopt_ops *)i; |
if (ops->pf == pf) { |
if (get) { |
if (val >= ops->get_optmin |
&& val < ops->get_optmax) { |
ops->use++; |
up(&nf_sockopt_mutex); |
ret = ops->get(sk, val, opt, len); |
goto out; |
} |
} else { |
if (val >= ops->set_optmin |
&& val < ops->set_optmax) { |
ops->use++; |
up(&nf_sockopt_mutex); |
ret = ops->set(sk, val, opt, *len); |
goto out; |
} |
} |
} |
} |
up(&nf_sockopt_mutex); |
return -ENOPROTOOPT; |
|
out: |
down(&nf_sockopt_mutex); |
ops->use--; |
if (ops->cleanup_task) |
wake_up_process(ops->cleanup_task); |
up(&nf_sockopt_mutex); |
return ret; |
} |
|
int nf_setsockopt(struct sock *sk, int pf, int val, char *opt, |
int len) |
{ |
return nf_sockopt(sk, pf, val, opt, &len, 0); |
} |
|
int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len) |
{ |
return nf_sockopt(sk, pf, val, opt, len, 1); |
} |
|
static unsigned int nf_iterate(struct list_head *head, |
struct sk_buff **skb, |
int hook, |
const struct net_device *indev, |
const struct net_device *outdev, |
struct list_head **i, |
int (*okfn)(struct sk_buff *)) |
{ |
for (*i = (*i)->next; *i != head; *i = (*i)->next) { |
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; |
switch (elem->hook(hook, skb, indev, outdev, okfn)) { |
case NF_QUEUE: |
return NF_QUEUE; |
|
case NF_STOLEN: |
return NF_STOLEN; |
|
case NF_DROP: |
return NF_DROP; |
|
case NF_REPEAT: |
*i = (*i)->prev; |
break; |
|
#ifdef CONFIG_NETFILTER_DEBUG |
case NF_ACCEPT: |
break; |
|
default: |
NFDEBUG("Evil return from %p(%u).\n", |
elem->hook, hook); |
#endif |
} |
} |
return NF_ACCEPT; |
} |
|
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) |
{ |
int ret; |
|
br_write_lock_bh(BR_NETPROTO_LOCK); |
if (queue_handler[pf].outfn) |
ret = -EBUSY; |
else { |
queue_handler[pf].outfn = outfn; |
queue_handler[pf].data = data; |
ret = 0; |
} |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
|
return ret; |
} |
|
/* The caller must flush their queue before this */ |
int nf_unregister_queue_handler(int pf) |
{ |
br_write_lock_bh(BR_NETPROTO_LOCK); |
queue_handler[pf].outfn = NULL; |
queue_handler[pf].data = NULL; |
br_write_unlock_bh(BR_NETPROTO_LOCK); |
return 0; |
} |
|
/* |
* Any packet that leaves via this function must come back |
* through nf_reinject(). |
*/ |
static void nf_queue(struct sk_buff *skb, |
struct list_head *elem, |
int pf, unsigned int hook, |
struct net_device *indev, |
struct net_device *outdev, |
int (*okfn)(struct sk_buff *)) |
{ |
int status; |
struct nf_info *info; |
|
if (!queue_handler[pf].outfn) { |
kfree_skb(skb); |
return; |
} |
|
info = kmalloc(sizeof(*info), GFP_ATOMIC); |
if (!info) { |
if (net_ratelimit()) |
printk(KERN_ERR "OOM queueing packet %p\n", |
skb); |
kfree_skb(skb); |
return; |
} |
|
*info = (struct nf_info) { |
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; |
|
/* Bump dev refs so they don't vanish while packet is out */ |
if (indev) dev_hold(indev); |
if (outdev) dev_hold(outdev); |
|
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); |
if (status < 0) { |
/* James M doesn't say fuck enough. */ |
if (indev) dev_put(indev); |
if (outdev) dev_put(outdev); |
kfree(info); |
kfree_skb(skb); |
return; |
} |
} |
|
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, |
struct net_device *indev, |
struct net_device *outdev, |
int (*okfn)(struct sk_buff *)) |
{ |
struct list_head *elem; |
unsigned int verdict; |
int ret = 0; |
|
/* This stopgap cannot be removed until all the hooks are audited. */ |
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { |
kfree_skb(skb); |
return -ENOMEM; |
} |
if (skb->ip_summed == CHECKSUM_HW) { |
if (outdev == NULL) { |
skb->ip_summed = CHECKSUM_NONE; |
} else { |
skb_checksum_help(skb); |
} |
} |
|
/* We may already have this, but read-locks nest anyway */ |
br_read_lock_bh(BR_NETPROTO_LOCK); |
|
#ifdef CONFIG_NETFILTER_DEBUG |
if (skb->nf_debug & (1 << hook)) { |
printk("nf_hook: hook %i already set.\n", hook); |
nf_dump_skb(pf, skb); |
} |
skb->nf_debug |= (1 << hook); |
#endif |
|
elem = &nf_hooks[pf][hook]; |
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, |
outdev, &elem, okfn); |
if (verdict == NF_QUEUE) { |
NFDEBUG("nf_hook: Verdict = QUEUE.\n"); |
nf_queue(skb, elem, pf, hook, indev, outdev, okfn); |
} |
|
switch (verdict) { |
case NF_ACCEPT: |
ret = okfn(skb); |
break; |
|
case NF_DROP: |
kfree_skb(skb); |
ret = -EPERM; |
break; |
} |
|
br_read_unlock_bh(BR_NETPROTO_LOCK); |
return ret; |
} |
|
void nf_reinject(struct sk_buff *skb, struct nf_info *info, |
unsigned int verdict) |
{ |
struct list_head *elem = &info->elem->list; |
struct list_head *i; |
|
/* We don't have BR_NETPROTO_LOCK here */ |
br_read_lock_bh(BR_NETPROTO_LOCK); |
for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) { |
if (i == &nf_hooks[info->pf][info->hook]) { |
/* The module which sent it to userspace is gone. */ |
NFDEBUG("%s: module disappeared, dropping packet.\n", |
__FUNCTION__); |
verdict = NF_DROP; |
break; |
} |
} |
|
/* Continue traversal iff userspace said ok... */ |
if (verdict == NF_REPEAT) { |
elem = elem->prev; |
verdict = NF_ACCEPT; |
} |
|
if (verdict == NF_ACCEPT) { |
verdict = nf_iterate(&nf_hooks[info->pf][info->hook], |
&skb, info->hook, |
info->indev, info->outdev, &elem, |
info->okfn); |
} |
|
switch (verdict) { |
case NF_ACCEPT: |
info->okfn(skb); |
break; |
|
case NF_QUEUE: |
nf_queue(skb, elem, info->pf, info->hook, |
info->indev, info->outdev, info->okfn); |
break; |
|
case NF_DROP: |
kfree_skb(skb); |
break; |
} |
br_read_unlock_bh(BR_NETPROTO_LOCK); |
|
/* Release those devices we held, or Alexey will kill me. */ |
if (info->indev) dev_put(info->indev); |
if (info->outdev) dev_put(info->outdev); |
|
kfree(info); |
return; |
} |
|
#ifdef CONFIG_INET |
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
int ip_route_me_harder(struct sk_buff **pskb) |
{ |
struct iphdr *iph = (*pskb)->nh.iph; |
struct rtable *rt; |
struct rt_key key = {}; |
struct dst_entry *odst; |
unsigned int hh_len; |
|
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause |
* packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook. |
*/ |
if (inet_addr_type(iph->saddr) == RTN_LOCAL) { |
key.dst = iph->daddr; |
key.src = iph->saddr; |
key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0; |
key.tos = RT_TOS(iph->tos); |
#ifdef CONFIG_IP_ROUTE_FWMARK |
key.fwmark = (*pskb)->nfmark; |
#endif |
if (ip_route_output_key(&rt, &key) != 0) |
return -1; |
|
/* Drop old route. */ |
dst_release((*pskb)->dst); |
(*pskb)->dst = &rt->u.dst; |
} else { |
/* non-local src, find valid iif to satisfy |
* rp-filter when calling ip_route_input. */ |
key.dst = iph->saddr; |
if (ip_route_output_key(&rt, &key) != 0) |
return -1; |
|
odst = (*pskb)->dst; |
if (ip_route_input(*pskb, iph->daddr, iph->saddr, |
RT_TOS(iph->tos), rt->u.dst.dev) != 0) { |
dst_release(&rt->u.dst); |
return -1; |
} |
dst_release(&rt->u.dst); |
dst_release(odst); |
} |
|
if ((*pskb)->dst->error) |
return -1; |
|
/* Change in oif may mean change in hh_len. */ |
hh_len = (*pskb)->dst->dev->hard_header_len; |
if (skb_headroom(*pskb) < hh_len) { |
struct sk_buff *nskb; |
|
nskb = skb_realloc_headroom(*pskb, hh_len); |
if (!nskb) |
return -1; |
if ((*pskb)->sk) |
skb_set_owner_w(nskb, (*pskb)->sk); |
kfree_skb(*pskb); |
*pskb = nskb; |
} |
|
return 0; |
} |
#endif /*CONFIG_INET*/ |
|
/* This does not belong here, but ipt_REJECT needs it if connection |
tracking in use: without this, connection may not be in hash table, |
and hence manufactured ICMP or RST packets will not be associated |
with it. */ |
void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *); |
|
void __init netfilter_init(void) |
{ |
int i, h; |
|
for (i = 0; i < NPROTO; i++) { |
for (h = 0; h < NF_MAX_HOOKS; h++) |
INIT_LIST_HEAD(&nf_hooks[i][h]); |
} |
} |
/wireless.c
0,0 → 1,1282
/* |
* This file implement the Wireless Extensions APIs. |
* |
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> |
* Copyright (c) 1997-2003 Jean Tourrilhes, All Rights Reserved. |
* |
* (As all part of the Linux kernel, this file is GPL) |
*/ |
|
/************************** DOCUMENTATION **************************/ |
/* |
* API definition : |
* -------------- |
* See <linux/wireless.h> for details of the APIs and the rest. |
* |
* History : |
* ------- |
* |
* v1 - 5.12.01 - Jean II |
* o Created this file. |
* |
* v2 - 13.12.01 - Jean II |
* o Move /proc/net/wireless stuff from net/core/dev.c to here |
* o Make Wireless Extension IOCTLs go through here |
* o Added iw_handler handling ;-) |
* o Added standard ioctl description |
* o Initial dumb commit strategy based on orinoco.c |
* |
* v3 - 19.12.01 - Jean II |
* o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call |
* o Add event dispatcher function |
* o Add event description |
* o Propagate events as rtnetlink IFLA_WIRELESS option |
* o Generate event on selected SET requests |
* |
* v4 - 18.04.02 - Jean II |
* o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1 |
* |
* v5 - 21.06.02 - Jean II |
* o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup) |
* o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes |
* o Add IWEVCUSTOM for driver specific event/scanning token |
* o Turn on WE_STRICT_WRITE by default + kernel warning |
* o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num) |
* o Fix off-by-one in test (extra_size <= IFNAMSIZ) |
* |
* v6 - 9.01.03 - Jean II |
* o Add common spy support : iw_handler_set_spy(), wireless_spy_update() |
* o Add enhanced spy support : iw_handler_set_thrspy() and event. |
* o Add WIRELESS_EXT version display in /proc/net/wireless |
*/ |
|
/***************************** INCLUDES *****************************/ |
|
#include <asm/uaccess.h> /* copy_to_user() */ |
#include <linux/config.h> /* Not needed ??? */ |
#include <linux/types.h> /* off_t */ |
#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ |
#include <linux/rtnetlink.h> /* rtnetlink stuff */ |
#include <linux/if_arp.h> /* ARPHRD_ETHER */ |
|
#include <linux/wireless.h> /* Pretty obvious */ |
#include <net/iw_handler.h> /* New driver API */ |
|
/**************************** CONSTANTS ****************************/ |
|
/* Enough lenience, let's make sure things are proper... */ |
#define WE_STRICT_WRITE /* Check write buffer size */ |
/* I'll probably drop both the define and kernel message in the next version */ |
|
/* Debuging stuff */ |
#undef WE_IOCTL_DEBUG /* Debug IOCTL API */ |
#undef WE_EVENT_DEBUG /* Debug Event dispatcher */ |
#undef WE_SPY_DEBUG /* Debug enhanced spy support */ |
|
/* Options */ |
#define WE_EVENT_NETLINK /* Propagate events using rtnetlink */ |
#define WE_SET_EVENT /* Generate an event on some set commands */ |
|
/************************* GLOBAL VARIABLES *************************/ |
/* |
* You should not use global variables, because of re-entrancy. |
* On our case, it's only const, so it's OK... |
*/ |
/* |
* Meta-data about all the standard Wireless Extension request we |
* know about. |
*/ |
static const struct iw_ioctl_description standard_ioctl[] = { |
/* SIOCSIWCOMMIT */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCGIWNAME */ |
{ IW_HEADER_TYPE_CHAR, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWNWID */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT}, |
/* SIOCGIWNWID */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWFREQ */ |
{ IW_HEADER_TYPE_FREQ, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT}, |
/* SIOCGIWFREQ */ |
{ IW_HEADER_TYPE_FREQ, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWMODE */ |
{ IW_HEADER_TYPE_UINT, 0, 0, 0, 0, IW_DESCR_FLAG_EVENT}, |
/* SIOCGIWMODE */ |
{ IW_HEADER_TYPE_UINT, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWSENS */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWSENS */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWRANGE */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCGIWRANGE */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, sizeof(struct iw_range), IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWPRIV */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCGIWPRIV (handled directly by us) */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCSIWSTATS */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCGIWSTATS (handled directly by us) */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWSPY */ |
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct sockaddr), 0, IW_MAX_SPY, 0}, |
/* SIOCGIWSPY */ |
{ IW_HEADER_TYPE_POINT, 0, (sizeof(struct sockaddr) + sizeof(struct iw_quality)), 0, IW_MAX_SPY, 0}, |
/* SIOCSIWTHRSPY */ |
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct iw_thrspy), 1, 1, 0}, |
/* SIOCGIWTHRSPY */ |
{ IW_HEADER_TYPE_POINT, 0, sizeof(struct iw_thrspy), 1, 1, 0}, |
/* SIOCSIWAP */ |
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0}, |
/* SIOCGIWAP */ |
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, IW_DESCR_FLAG_DUMP}, |
/* -- hole -- */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCGIWAPLIST */ |
{ IW_HEADER_TYPE_POINT, 0, (sizeof(struct sockaddr) + sizeof(struct iw_quality)), 0, IW_MAX_AP, 0}, |
/* SIOCSIWSCAN */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWSCAN */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_SCAN_MAX_DATA, 0}, |
/* SIOCSIWESSID */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, IW_DESCR_FLAG_EVENT}, |
/* SIOCGIWESSID */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, IW_DESCR_FLAG_DUMP}, |
/* SIOCSIWNICKN */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, 0}, |
/* SIOCGIWNICKN */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ESSID_MAX_SIZE + 1, 0}, |
/* -- hole -- */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* -- hole -- */ |
{ IW_HEADER_TYPE_NULL, 0, 0, 0, 0, 0}, |
/* SIOCSIWRATE */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWRATE */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWRTS */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWRTS */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWFRAG */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWFRAG */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWTXPOW */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWTXPOW */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWRETRY */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWRETRY */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCSIWENCODE */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ENCODING_TOKEN_MAX, IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT}, |
/* SIOCGIWENCODE */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_ENCODING_TOKEN_MAX, IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT}, |
/* SIOCSIWPOWER */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
/* SIOCGIWPOWER */ |
{ IW_HEADER_TYPE_PARAM, 0, 0, 0, 0, 0}, |
}; |
static const int standard_ioctl_num = (sizeof(standard_ioctl) / |
sizeof(struct iw_ioctl_description)); |
|
/* |
* Meta-data about all the additional standard Wireless Extension events |
* we know about. |
*/ |
static const struct iw_ioctl_description standard_event[] = { |
/* IWEVTXDROP */ |
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0}, |
/* IWEVQUAL */ |
{ IW_HEADER_TYPE_QUAL, 0, 0, 0, 0, 0}, |
/* IWEVCUSTOM */ |
{ IW_HEADER_TYPE_POINT, 0, 1, 0, IW_CUSTOM_MAX, 0}, |
/* IWEVREGISTERED */ |
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0}, |
/* IWEVEXPIRED */ |
{ IW_HEADER_TYPE_ADDR, 0, 0, 0, 0, 0}, |
}; |
static const int standard_event_num = (sizeof(standard_event) / |
sizeof(struct iw_ioctl_description)); |
|
/* Size (in bytes) of the various private data types */ |
static const char priv_type_size[] = { |
0, /* IW_PRIV_TYPE_NONE */ |
1, /* IW_PRIV_TYPE_BYTE */ |
1, /* IW_PRIV_TYPE_CHAR */ |
0, /* Not defined */ |
sizeof(__u32), /* IW_PRIV_TYPE_INT */ |
sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ |
sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ |
0, /* Not defined */ |
}; |
|
/* Size (in bytes) of various events */ |
static const int event_type_size[] = { |
IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */ |
0, |
IW_EV_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ |
0, |
IW_EV_UINT_LEN, /* IW_HEADER_TYPE_UINT */ |
IW_EV_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ |
IW_EV_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ |
0, |
IW_EV_POINT_LEN, /* Without variable payload */ |
IW_EV_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ |
IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ |
}; |
|
/************************ COMMON SUBROUTINES ************************/ |
/* |
* Stuff that may be used in various place or doesn't fit in one |
* of the section below. |
*/ |
|
/* ---------------------------------------------------------------- */ |
/* |
* Return the driver handler associated with a specific Wireless Extension. |
* Called from various place, so make sure it remains efficient. |
*/ |
static inline iw_handler get_handler(struct net_device *dev, |
unsigned int cmd) |
{ |
/* Don't "optimise" the following variable, it will crash */ |
unsigned int index; /* *MUST* be unsigned */ |
|
/* Check if we have some wireless handlers defined */ |
if(dev->wireless_handlers == NULL) |
return NULL; |
|
/* Try as a standard command */ |
index = cmd - SIOCIWFIRST; |
if(index < dev->wireless_handlers->num_standard) |
return dev->wireless_handlers->standard[index]; |
|
/* Try as a private command */ |
index = cmd - SIOCIWFIRSTPRIV; |
if(index < dev->wireless_handlers->num_private) |
return dev->wireless_handlers->private[index]; |
|
/* Not found */ |
return NULL; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Get statistics out of the driver |
*/ |
static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) |
{ |
return (dev->get_wireless_stats ? |
dev->get_wireless_stats(dev) : |
(struct iw_statistics *) NULL); |
/* In the future, get_wireless_stats may move from 'struct net_device' |
* to 'struct iw_handler_def', to de-bloat struct net_device. |
* Definitely worse a thought... */ |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Call the commit handler in the driver |
* (if exist and if conditions are right) |
* |
* Note : our current commit strategy is currently pretty dumb, |
* but we will be able to improve on that... |
* The goal is to try to agreagate as many changes as possible |
* before doing the commit. Drivers that will define a commit handler |
* are usually those that need a reset after changing parameters, so |
* we want to minimise the number of reset. |
* A cool idea is to use a timer : at each "set" command, we re-set the |
* timer, when the timer eventually fires, we call the driver. |
* Hopefully, more on that later. |
* |
* Also, I'm waiting to see how many people will complain about the |
* netif_running(dev) test. I'm open on that one... |
* Hopefully, the driver will remember to do a commit in "open()" ;-) |
*/ |
static inline int call_commit_handler(struct net_device * dev) |
{ |
if((netif_running(dev)) && |
(dev->wireless_handlers->standard[0] != NULL)) { |
/* Call the commit handler on the driver */ |
return dev->wireless_handlers->standard[0](dev, NULL, |
NULL, NULL); |
} else |
return 0; /* Command completed successfully */ |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Number of private arguments |
*/ |
static inline int get_priv_size(__u16 args) |
{ |
int num = args & IW_PRIV_SIZE_MASK; |
int type = (args & IW_PRIV_TYPE_MASK) >> 12; |
|
return num * priv_type_size[type]; |
} |
|
|
/******************** /proc/net/wireless SUPPORT ********************/ |
/* |
* The /proc/net/wireless file is a human readable user-space interface |
* exporting various wireless specific statistics from the wireless devices. |
* This is the most popular part of the Wireless Extensions ;-) |
* |
* This interface is a pure clone of /proc/net/dev (in net/core/dev.c). |
* The content of the file is basically the content of "struct iw_statistics". |
*/ |
|
#ifdef CONFIG_PROC_FS |
|
/* ---------------------------------------------------------------- */ |
/* |
* Print one entry (line) of /proc/net/wireless |
*/ |
static inline int sprintf_wireless_stats(char *buffer, struct net_device *dev) |
{ |
/* Get stats from the driver */ |
struct iw_statistics *stats; |
int size; |
|
stats = get_wireless_stats(dev); |
if (stats != (struct iw_statistics *) NULL) { |
size = sprintf(buffer, |
"%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d %6d %6d %6d\n", |
dev->name, |
stats->status, |
stats->qual.qual, |
stats->qual.updated & 1 ? '.' : ' ', |
((__u8) stats->qual.level), |
stats->qual.updated & 2 ? '.' : ' ', |
((__u8) stats->qual.noise), |
stats->qual.updated & 4 ? '.' : ' ', |
stats->discard.nwid, |
stats->discard.code, |
stats->discard.fragment, |
stats->discard.retries, |
stats->discard.misc, |
stats->miss.beacon); |
stats->qual.updated = 0; |
} |
else |
size = 0; |
|
return size; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Print info for /proc/net/wireless (print all entries) |
*/ |
int dev_get_wireless_info(char * buffer, char **start, off_t offset, |
int length) |
{ |
int len = 0; |
off_t begin = 0; |
off_t pos = 0; |
int size; |
|
struct net_device * dev; |
|
size = sprintf(buffer, |
"Inter-| sta-| Quality | Discarded packets | Missed | WE\n" |
" face | tus | link level noise | nwid crypt frag retry misc | beacon | %d\n", |
WIRELESS_EXT); |
|
pos += size; |
len += size; |
|
read_lock(&dev_base_lock); |
for (dev = dev_base; dev != NULL; dev = dev->next) { |
size = sprintf_wireless_stats(buffer + len, dev); |
len += size; |
pos = begin + len; |
|
if (pos < offset) { |
len = 0; |
begin = pos; |
} |
if (pos > offset + length) |
break; |
} |
read_unlock(&dev_base_lock); |
|
*start = buffer + (offset - begin); /* Start of wanted data */ |
len -= (offset - begin); /* Start slop */ |
if (len > length) |
len = length; /* Ending slop */ |
if (len < 0) |
len = 0; |
|
return len; |
} |
#endif /* CONFIG_PROC_FS */ |
|
/************************** IOCTL SUPPORT **************************/ |
/* |
* The original user space API to configure all those Wireless Extensions |
* is through IOCTLs. |
* In there, we check if we need to call the new driver API (iw_handler) |
* or just call the driver ioctl handler. |
*/ |
|
/* ---------------------------------------------------------------- */ |
/* |
* Allow programatic access to /proc/net/wireless even if /proc |
* doesn't exist... Also more efficient... |
*/ |
static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr) |
{ |
/* Get stats from the driver */ |
struct iw_statistics *stats; |
|
stats = get_wireless_stats(dev); |
if (stats != (struct iw_statistics *) NULL) { |
struct iwreq * wrq = (struct iwreq *)ifr; |
|
/* Copy statistics to the user buffer */ |
if(copy_to_user(wrq->u.data.pointer, stats, |
sizeof(struct iw_statistics))) |
return -EFAULT; |
|
/* Check if we need to clear the update flag */ |
if(wrq->u.data.flags != 0) |
stats->qual.updated = 0; |
return 0; |
} else |
return -EOPNOTSUPP; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Export the driver private handler definition |
* They will be picked up by tools like iwpriv... |
*/ |
static inline int ioctl_export_private(struct net_device * dev, |
struct ifreq * ifr) |
{ |
struct iwreq * iwr = (struct iwreq *) ifr; |
|
/* Check if the driver has something to export */ |
if((dev->wireless_handlers->num_private_args == 0) || |
(dev->wireless_handlers->private_args == NULL)) |
return -EOPNOTSUPP; |
|
/* Check NULL pointer */ |
if(iwr->u.data.pointer == NULL) |
return -EFAULT; |
#ifdef WE_STRICT_WRITE |
/* Check if there is enough buffer up there */ |
if(iwr->u.data.length < dev->wireless_handlers->num_private_args) { |
printk(KERN_ERR "%s (WE) : Buffer for request SIOCGIWPRIV too small (%d<%d)\n", dev->name, iwr->u.data.length, dev->wireless_handlers->num_private_args); |
return -E2BIG; |
} |
#endif /* WE_STRICT_WRITE */ |
|
/* Set the number of available ioctls. */ |
iwr->u.data.length = dev->wireless_handlers->num_private_args; |
|
/* Copy structure to the user buffer. */ |
if (copy_to_user(iwr->u.data.pointer, |
dev->wireless_handlers->private_args, |
sizeof(struct iw_priv_args) * iwr->u.data.length)) |
return -EFAULT; |
|
return 0; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Wrapper to call a standard Wireless Extension handler. |
* We do various checks and also take care of moving data between |
* user space and kernel space. |
*/ |
static inline int ioctl_standard_call(struct net_device * dev, |
struct ifreq * ifr, |
unsigned int cmd, |
iw_handler handler) |
{ |
struct iwreq * iwr = (struct iwreq *) ifr; |
const struct iw_ioctl_description * descr; |
struct iw_request_info info; |
int ret = -EINVAL; |
int user_size = 0; |
|
/* Get the description of the IOCTL */ |
if((cmd - SIOCIWFIRST) >= standard_ioctl_num) |
return -EOPNOTSUPP; |
descr = &(standard_ioctl[cmd - SIOCIWFIRST]); |
|
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n", |
ifr->ifr_name, cmd); |
printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens); |
#endif /* WE_IOCTL_DEBUG */ |
|
/* Prepare the call */ |
info.cmd = cmd; |
info.flags = 0; |
|
/* Check if we have a pointer to user space data or not */ |
if(descr->header_type != IW_HEADER_TYPE_POINT) { |
|
/* No extra arguments. Trivial to handle */ |
ret = handler(dev, &info, &(iwr->u), NULL); |
|
#ifdef WE_SET_EVENT |
/* Generate an event to notify listeners of the change */ |
if((descr->flags & IW_DESCR_FLAG_EVENT) && |
((ret == 0) || (ret == -EIWCOMMIT))) |
wireless_send_event(dev, cmd, &(iwr->u), NULL); |
#endif /* WE_SET_EVENT */ |
} else { |
char * extra; |
int err; |
|
/* Check what user space is giving us */ |
if(IW_IS_SET(cmd)) { |
/* Check NULL pointer */ |
if((iwr->u.data.pointer == NULL) && |
(iwr->u.data.length != 0)) |
return -EFAULT; |
/* Check if number of token fits within bounds */ |
if(iwr->u.data.length > descr->max_tokens) |
return -E2BIG; |
if(iwr->u.data.length < descr->min_tokens) |
return -EINVAL; |
} else { |
/* Check NULL pointer */ |
if(iwr->u.data.pointer == NULL) |
return -EFAULT; |
/* Save user space buffer size for checking */ |
user_size = iwr->u.data.length; |
} |
|
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n", |
dev->name, descr->max_tokens * descr->token_size); |
#endif /* WE_IOCTL_DEBUG */ |
|
/* Always allocate for max space. Easier, and won't last |
* long... */ |
extra = kmalloc(descr->max_tokens * descr->token_size, |
GFP_KERNEL); |
if (extra == NULL) { |
return -ENOMEM; |
} |
|
/* If it is a SET, get all the extra data in here */ |
if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { |
err = copy_from_user(extra, iwr->u.data.pointer, |
iwr->u.data.length * |
descr->token_size); |
if (err) { |
kfree(extra); |
return -EFAULT; |
} |
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Got %d bytes\n", |
dev->name, |
iwr->u.data.length * descr->token_size); |
#endif /* WE_IOCTL_DEBUG */ |
} |
|
/* Call the handler */ |
ret = handler(dev, &info, &(iwr->u), extra); |
|
/* If we have something to return to the user */ |
if (!ret && IW_IS_GET(cmd)) { |
#ifdef WE_STRICT_WRITE |
/* Check if there is enough buffer up there */ |
if(user_size < iwr->u.data.length) { |
printk(KERN_ERR "%s (WE) : Buffer for request %04X too small (%d<%d)\n", dev->name, cmd, user_size, iwr->u.data.length); |
kfree(extra); |
return -E2BIG; |
} |
#endif /* WE_STRICT_WRITE */ |
|
err = copy_to_user(iwr->u.data.pointer, extra, |
iwr->u.data.length * |
descr->token_size); |
if (err) |
ret = -EFAULT; |
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n", |
dev->name, |
iwr->u.data.length * descr->token_size); |
#endif /* WE_IOCTL_DEBUG */ |
} |
|
#ifdef WE_SET_EVENT |
/* Generate an event to notify listeners of the change */ |
if((descr->flags & IW_DESCR_FLAG_EVENT) && |
((ret == 0) || (ret == -EIWCOMMIT))) { |
if(descr->flags & IW_DESCR_FLAG_RESTRICT) |
/* If the event is restricted, don't |
* export the payload */ |
wireless_send_event(dev, cmd, &(iwr->u), NULL); |
else |
wireless_send_event(dev, cmd, &(iwr->u), |
extra); |
} |
#endif /* WE_SET_EVENT */ |
|
/* Cleanup - I told you it wasn't that long ;-) */ |
kfree(extra); |
} |
|
/* Call commit handler if needed and defined */ |
if(ret == -EIWCOMMIT) |
ret = call_commit_handler(dev); |
|
/* Here, we will generate the appropriate event if needed */ |
|
return ret; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Wrapper to call a private Wireless Extension handler. |
* We do various checks and also take care of moving data between |
* user space and kernel space. |
* It's not as nice and slimline as the standard wrapper. The cause |
* is struct iw_priv_args, which was not really designed for the |
* job we are going here. |
* |
* IMPORTANT : This function prevent to set and get data on the same |
* IOCTL and enforce the SET/GET convention. Not doing it would be |
* far too hairy... |
* If you need to set and get data at the same time, please don't use |
* a iw_handler but process it in your ioctl handler (i.e. use the |
* old driver API). |
*/ |
static inline int ioctl_private_call(struct net_device * dev, |
struct ifreq * ifr, |
unsigned int cmd, |
iw_handler handler) |
{ |
struct iwreq * iwr = (struct iwreq *) ifr; |
struct iw_priv_args * descr = NULL; |
struct iw_request_info info; |
int extra_size = 0; |
int i; |
int ret = -EINVAL; |
|
/* Get the description of the IOCTL */ |
for(i = 0; i < dev->wireless_handlers->num_private_args; i++) |
if(cmd == dev->wireless_handlers->private_args[i].cmd) { |
descr = &(dev->wireless_handlers->private_args[i]); |
break; |
} |
|
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n", |
ifr->ifr_name, cmd); |
if(descr) { |
printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n", |
dev->name, descr->name, |
descr->set_args, descr->get_args); |
} |
#endif /* WE_IOCTL_DEBUG */ |
|
/* Compute the size of the set/get arguments */ |
if(descr != NULL) { |
if(IW_IS_SET(cmd)) { |
int offset = 0; /* For sub-ioctls */ |
/* Check for sub-ioctl handler */ |
if(descr->name[0] == '\0') |
/* Reserve one int for sub-ioctl index */ |
offset = sizeof(__u32); |
|
/* Size of set arguments */ |
extra_size = get_priv_size(descr->set_args); |
|
/* Does it fits in iwr ? */ |
if((descr->set_args & IW_PRIV_SIZE_FIXED) && |
((extra_size + offset) <= IFNAMSIZ)) |
extra_size = 0; |
} else { |
/* Size of set arguments */ |
extra_size = get_priv_size(descr->get_args); |
|
/* Does it fits in iwr ? */ |
if((descr->get_args & IW_PRIV_SIZE_FIXED) && |
(extra_size <= IFNAMSIZ)) |
extra_size = 0; |
} |
} |
|
/* Prepare the call */ |
info.cmd = cmd; |
info.flags = 0; |
|
/* Check if we have a pointer to user space data or not. */ |
if(extra_size == 0) { |
/* No extra arguments. Trivial to handle */ |
ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u)); |
} else { |
char * extra; |
int err; |
|
/* Check what user space is giving us */ |
if(IW_IS_SET(cmd)) { |
/* Check NULL pointer */ |
if((iwr->u.data.pointer == NULL) && |
(iwr->u.data.length != 0)) |
return -EFAULT; |
|
/* Does it fits within bounds ? */ |
if(iwr->u.data.length > (descr->set_args & |
IW_PRIV_SIZE_MASK)) |
return -E2BIG; |
} else { |
/* Check NULL pointer */ |
if(iwr->u.data.pointer == NULL) |
return -EFAULT; |
} |
|
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n", |
dev->name, extra_size); |
#endif /* WE_IOCTL_DEBUG */ |
|
/* Always allocate for max space. Easier, and won't last |
* long... */ |
extra = kmalloc(extra_size, GFP_KERNEL); |
if (extra == NULL) { |
return -ENOMEM; |
} |
|
/* If it is a SET, get all the extra data in here */ |
if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { |
err = copy_from_user(extra, iwr->u.data.pointer, |
extra_size); |
if (err) { |
kfree(extra); |
return -EFAULT; |
} |
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Got %d elem\n", |
dev->name, iwr->u.data.length); |
#endif /* WE_IOCTL_DEBUG */ |
} |
|
/* Call the handler */ |
ret = handler(dev, &info, &(iwr->u), extra); |
|
/* If we have something to return to the user */ |
if (!ret && IW_IS_GET(cmd)) { |
err = copy_to_user(iwr->u.data.pointer, extra, |
extra_size); |
if (err) |
ret = -EFAULT; |
#ifdef WE_IOCTL_DEBUG |
printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n", |
dev->name, iwr->u.data.length); |
#endif /* WE_IOCTL_DEBUG */ |
} |
|
/* Cleanup - I told you it wasn't that long ;-) */ |
kfree(extra); |
} |
|
|
/* Call commit handler if needed and defined */ |
if(ret == -EIWCOMMIT) |
ret = call_commit_handler(dev); |
|
return ret; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Main IOCTl dispatcher. Called from the main networking code |
* (dev_ioctl() in net/core/dev.c). |
* Check the type of IOCTL and call the appropriate wrapper... |
*/ |
int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) |
{ |
struct net_device *dev; |
iw_handler handler; |
|
/* Permissions are already checked in dev_ioctl() before calling us. |
* The copy_to/from_user() of ifr is also dealt with in there */ |
|
/* Make sure the device exist */ |
if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) |
return -ENODEV; |
|
/* A bunch of special cases, then the generic case... |
* Note that 'cmd' is already filtered in dev_ioctl() with |
* (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ |
switch(cmd) |
{ |
case SIOCGIWSTATS: |
/* Get Wireless Stats */ |
return dev_iwstats(dev, ifr); |
|
case SIOCGIWPRIV: |
/* Check if we have some wireless handlers defined */ |
if(dev->wireless_handlers != NULL) { |
/* We export to user space the definition of |
* the private handler ourselves */ |
return ioctl_export_private(dev, ifr); |
} |
// ## Fall-through for old API ## |
default: |
/* Generic IOCTL */ |
/* Basic check */ |
if (!netif_device_present(dev)) |
return -ENODEV; |
/* New driver API : try to find the handler */ |
handler = get_handler(dev, cmd); |
if(handler != NULL) { |
/* Standard and private are not the same */ |
if(cmd < SIOCIWFIRSTPRIV) |
return ioctl_standard_call(dev, |
ifr, |
cmd, |
handler); |
else |
return ioctl_private_call(dev, |
ifr, |
cmd, |
handler); |
} |
/* Old driver API : call driver ioctl handler */ |
if (dev->do_ioctl) { |
return dev->do_ioctl(dev, ifr, cmd); |
} |
return -EOPNOTSUPP; |
} |
/* Not reached */ |
return -EINVAL; |
} |
|
/************************* EVENT PROCESSING *************************/ |
/* |
* Process events generated by the wireless layer or the driver. |
* Most often, the event will be propagated through rtnetlink |
*/ |
|
#ifdef WE_EVENT_NETLINK |
/* "rtnl" is defined in net/core/rtnetlink.c, but we need it here. |
* It is declared in <linux/rtnetlink.h> */ |
|
/* ---------------------------------------------------------------- */ |
/* |
* Fill a rtnetlink message with our event data. |
* Note that we propage only the specified event and don't dump the |
* current wireless config. Dumping the wireless config is far too |
* expensive (for each parameter, the driver need to query the hardware). |
*/ |
static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, |
struct net_device * dev, |
int type, |
char * event, |
int event_len) |
{ |
struct ifinfomsg *r; |
struct nlmsghdr *nlh; |
unsigned char *b = skb->tail; |
|
nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); |
r = NLMSG_DATA(nlh); |
r->ifi_family = AF_UNSPEC; |
r->ifi_type = dev->type; |
r->ifi_index = dev->ifindex; |
r->ifi_flags = dev->flags; |
r->ifi_change = 0; /* Wireless changes don't affect those flags */ |
|
/* Add the wireless events in the netlink packet */ |
RTA_PUT(skb, IFLA_WIRELESS, |
event_len, event); |
|
nlh->nlmsg_len = skb->tail - b; |
return skb->len; |
|
nlmsg_failure: |
rtattr_failure: |
skb_trim(skb, b - skb->data); |
return -1; |
} |
|
/* ---------------------------------------------------------------- */ |
/* |
* Create and broadcast and send it on the standard rtnetlink socket |
* This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c |
* Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field |
* within a RTM_NEWLINK event. |
*/ |
static inline void rtmsg_iwinfo(struct net_device * dev, |
char * event, |
int event_len) |
{ |
struct sk_buff *skb; |
int size = NLMSG_GOODSIZE; |
|
skb = alloc_skb(size, GFP_ATOMIC); |
if (!skb) |
return; |
|
if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, |
event, event_len) < 0) { |
kfree_skb(skb); |
return; |
} |
NETLINK_CB(skb).dst_groups = RTMGRP_LINK; |
netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); |
} |
#endif /* WE_EVENT_NETLINK */ |
|
/* ---------------------------------------------------------------- */ |
/* |
* Main event dispatcher. Called from other parts and drivers. |
* Send the event on the apropriate channels. |
* May be called from interrupt context. |
*/ |
void wireless_send_event(struct net_device * dev, |
unsigned int cmd, |
union iwreq_data * wrqu, |
char * extra) |
{ |
const struct iw_ioctl_description * descr = NULL; |
int extra_len = 0; |
struct iw_event *event; /* Mallocated whole event */ |
int event_len; /* Its size */ |
int hdr_len; /* Size of the event header */ |
/* Don't "optimise" the following variable, it will crash */ |
unsigned cmd_index; /* *MUST* be unsigned */ |
|
/* Get the description of the IOCTL */ |
if(cmd <= SIOCIWLAST) { |
cmd_index = cmd - SIOCIWFIRST; |
if(cmd_index < standard_ioctl_num) |
descr = &(standard_ioctl[cmd_index]); |
} else { |
cmd_index = cmd - IWEVFIRST; |
if(cmd_index < standard_event_num) |
descr = &(standard_event[cmd_index]); |
} |
/* Don't accept unknown events */ |
if(descr == NULL) { |
/* Note : we don't return an error to the driver, because |
* the driver would not know what to do about it. It can't |
* return an error to the user, because the event is not |
* initiated by a user request. |
* The best the driver could do is to log an error message. |
* We will do it ourselves instead... |
*/ |
printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", |
dev->name, cmd); |
return; |
} |
#ifdef WE_EVENT_DEBUG |
printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n", |
dev->name, cmd); |
printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens); |
#endif /* WE_EVENT_DEBUG */ |
|
/* Check extra parameters and set extra_len */ |
if(descr->header_type == IW_HEADER_TYPE_POINT) { |
/* Check if number of token fits within bounds */ |
if(wrqu->data.length > descr->max_tokens) { |
printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); |
return; |
} |
if(wrqu->data.length < descr->min_tokens) { |
printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); |
return; |
} |
/* Calculate extra_len - extra is NULL for restricted events */ |
if(extra != NULL) |
extra_len = wrqu->data.length * descr->token_size; |
#ifdef WE_EVENT_DEBUG |
printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len); |
#endif /* WE_EVENT_DEBUG */ |
} |
|
/* Total length of the event */ |
hdr_len = event_type_size[descr->header_type]; |
event_len = hdr_len + extra_len; |
|
#ifdef WE_EVENT_DEBUG |
printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len); |
#endif /* WE_EVENT_DEBUG */ |
|
/* Create temporary buffer to hold the event */ |
event = kmalloc(event_len, GFP_ATOMIC); |
if(event == NULL) |
return; |
|
/* Fill event */ |
event->len = event_len; |
event->cmd = cmd; |
memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN); |
if(extra != NULL) |
memcpy(((char *) event) + hdr_len, extra, extra_len); |
|
#ifdef WE_EVENT_NETLINK |
/* rtnetlink event channel */ |
rtmsg_iwinfo(dev, (char *) event, event_len); |
#endif /* WE_EVENT_NETLINK */ |
|
/* Cleanup */ |
kfree(event); |
|
return; /* Always success, I guess ;-) */ |
} |
|
/********************** ENHANCED IWSPY SUPPORT **********************/ |
/* |
* In the old days, the driver was handling spy support all by itself. |
* Now, the driver can delegate this task to Wireless Extensions. |
* It needs to use those standard spy iw_handler in struct iw_handler_def, |
* push data to us via XXX and include struct iw_spy_data in its |
* private part. |
* One of the main advantage of centralising spy support here is that |
* it becomes much easier to improve and extend it without having to touch |
* the drivers. One example is the addition of the Spy-Threshold events. |
* Note : IW_WIRELESS_SPY is defined in iw_handler.h |
*/ |
|
/*------------------------------------------------------------------*/ |
/* |
* Standard Wireless Handler : set Spy List |
*/ |
int iw_handler_set_spy(struct net_device * dev, |
struct iw_request_info * info, |
union iwreq_data * wrqu, |
char * extra) |
{ |
#ifdef IW_WIRELESS_SPY |
struct iw_spy_data * spydata = (dev->priv + |
dev->wireless_handlers->spy_offset); |
struct sockaddr * address = (struct sockaddr *) extra; |
|
/* Disable spy collection while we copy the addresses. |
* As we don't disable interrupts, we need to do this to avoid races. |
* As we are the only writer, this is good enough. */ |
spydata->spy_number = 0; |
|
/* Are there are addresses to copy? */ |
if(wrqu->data.length > 0) { |
int i; |
|
/* Copy addresses */ |
for(i = 0; i < wrqu->data.length; i++) |
memcpy(spydata->spy_address[i], address[i].sa_data, |
ETH_ALEN); |
/* Reset stats */ |
memset(spydata->spy_stat, 0, |
sizeof(struct iw_quality) * IW_MAX_SPY); |
|
#ifdef WE_SPY_DEBUG |
printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length); |
for (i = 0; i < wrqu->data.length; i++) |
printk(KERN_DEBUG |
"%02X:%02X:%02X:%02X:%02X:%02X \n", |
spydata->spy_address[i][0], |
spydata->spy_address[i][1], |
spydata->spy_address[i][2], |
spydata->spy_address[i][3], |
spydata->spy_address[i][4], |
spydata->spy_address[i][5]); |
#endif /* WE_SPY_DEBUG */ |
} |
/* Enable addresses */ |
spydata->spy_number = wrqu->data.length; |
|
return 0; |
#else /* IW_WIRELESS_SPY */ |
return -EOPNOTSUPP; |
#endif /* IW_WIRELESS_SPY */ |
} |
|
/*------------------------------------------------------------------*/ |
/* |
* Standard Wireless Handler : get Spy List |
*/ |
int iw_handler_get_spy(struct net_device * dev, |
struct iw_request_info * info, |
union iwreq_data * wrqu, |
char * extra) |
{ |
#ifdef IW_WIRELESS_SPY |
struct iw_spy_data * spydata = (dev->priv + |
dev->wireless_handlers->spy_offset); |
struct sockaddr * address = (struct sockaddr *) extra; |
int i; |
|
wrqu->data.length = spydata->spy_number; |
|
/* Copy addresses. */ |
for(i = 0; i < spydata->spy_number; i++) { |
memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); |
address[i].sa_family = AF_UNIX; |
} |
/* Copy stats to the user buffer (just after). */ |
if(spydata->spy_number > 0) |
memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), |
spydata->spy_stat, |
sizeof(struct iw_quality) * spydata->spy_number); |
/* Reset updated flags. */ |
for(i = 0; i < spydata->spy_number; i++) |
spydata->spy_stat[i].updated = 0; |
return 0; |
#else /* IW_WIRELESS_SPY */ |
return -EOPNOTSUPP; |
#endif /* IW_WIRELESS_SPY */ |
} |
|
/*------------------------------------------------------------------*/ |
/* |
* Standard Wireless Handler : set spy threshold |
*/ |
int iw_handler_set_thrspy(struct net_device * dev, |
struct iw_request_info *info, |
union iwreq_data * wrqu, |
char * extra) |
{ |
#ifdef IW_WIRELESS_THRSPY |
struct iw_spy_data * spydata = (dev->priv + |
dev->wireless_handlers->spy_offset); |
struct iw_thrspy * threshold = (struct iw_thrspy *) extra; |
|
/* Just do it */ |
memcpy(&(spydata->spy_thr_low), &(threshold->low), |
2 * sizeof(struct iw_quality)); |
|
/* Clear flag */ |
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); |
|
#ifdef WE_SPY_DEBUG |
printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level); |
#endif /* WE_SPY_DEBUG */ |
|
return 0; |
#else /* IW_WIRELESS_THRSPY */ |
return -EOPNOTSUPP; |
#endif /* IW_WIRELESS_THRSPY */ |
} |
|
/*------------------------------------------------------------------*/ |
/* |
* Standard Wireless Handler : get spy threshold |
*/ |
int iw_handler_get_thrspy(struct net_device * dev, |
struct iw_request_info *info, |
union iwreq_data * wrqu, |
char * extra) |
{ |
#ifdef IW_WIRELESS_THRSPY |
struct iw_spy_data * spydata = (dev->priv + |
dev->wireless_handlers->spy_offset); |
struct iw_thrspy * threshold = (struct iw_thrspy *) extra; |
|
/* Just do it */ |
memcpy(&(threshold->low), &(spydata->spy_thr_low), |
2 * sizeof(struct iw_quality)); |
|
return 0; |
#else /* IW_WIRELESS_THRSPY */ |
return -EOPNOTSUPP; |
#endif /* IW_WIRELESS_THRSPY */ |
} |
|
#ifdef IW_WIRELESS_THRSPY |
/*------------------------------------------------------------------*/ |
/* |
* Prepare and send a Spy Threshold event |
*/ |
static void iw_send_thrspy_event(struct net_device * dev, |
struct iw_spy_data * spydata, |
unsigned char * address, |
struct iw_quality * wstats) |
{ |
union iwreq_data wrqu; |
struct iw_thrspy threshold; |
|
/* Init */ |
wrqu.data.length = 1; |
wrqu.data.flags = 0; |
/* Copy address */ |
memcpy(threshold.addr.sa_data, address, ETH_ALEN); |
threshold.addr.sa_family = ARPHRD_ETHER; |
/* Copy stats */ |
memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); |
/* Copy also thresholds */ |
memcpy(&(threshold.low), &(spydata->spy_thr_low), |
2 * sizeof(struct iw_quality)); |
|
#ifdef WE_SPY_DEBUG |
printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n", |
threshold.addr.sa_data[0], |
threshold.addr.sa_data[1], |
threshold.addr.sa_data[2], |
threshold.addr.sa_data[3], |
threshold.addr.sa_data[4], |
threshold.addr.sa_data[5], threshold.qual.level); |
#endif /* WE_SPY_DEBUG */ |
|
/* Send event to user space */ |
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); |
} |
#endif /* IW_WIRELESS_THRSPY */ |
|
/* ---------------------------------------------------------------- */ |
/* |
* Call for the driver to update the spy data. |
* For now, the spy data is a simple array. As the size of the array is |
* small, this is good enough. If we wanted to support larger number of |
* spy addresses, we should use something more efficient... |
*/ |
void wireless_spy_update(struct net_device * dev, |
unsigned char * address, |
struct iw_quality * wstats) |
{ |
#ifdef IW_WIRELESS_SPY |
struct iw_spy_data * spydata = (dev->priv + |
dev->wireless_handlers->spy_offset); |
int i; |
int match = -1; |
|
#ifdef WE_SPY_DEBUG |
printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); |
#endif /* WE_SPY_DEBUG */ |
|
/* Update all records that match */ |
for(i = 0; i < spydata->spy_number; i++) |
if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) { |
memcpy(&(spydata->spy_stat[i]), wstats, |
sizeof(struct iw_quality)); |
match = i; |
} |
#ifdef IW_WIRELESS_THRSPY |
/* Generate an event if we cross the spy threshold. |
* To avoid event storms, we have a simple hysteresis : we generate |
* event only when we go under the low threshold or above the |
* high threshold. */ |
if(match >= 0) { |
if(spydata->spy_thr_under[match]) { |
if(wstats->level > spydata->spy_thr_high.level) { |
spydata->spy_thr_under[match] = 0; |
iw_send_thrspy_event(dev, spydata, |
address, wstats); |
} |
} else { |
if(wstats->level < spydata->spy_thr_low.level) { |
spydata->spy_thr_under[match] = 1; |
iw_send_thrspy_event(dev, spydata, |
address, wstats); |
} |
} |
} |
#endif /* IW_WIRELESS_THRSPY */ |
#endif /* IW_WIRELESS_SPY */ |
} |