/* netfilter.c: look after the filters for various protocols.
|
/* netfilter.c: look after the filters for various protocols.
|
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
|
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
|
*
|
*
|
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
|
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
|
* way.
|
* way.
|
*
|
*
|
* Rusty Russell (C)2000 -- This code is GPL.
|
* Rusty Russell (C)2000 -- This code is GPL.
|
*
|
*
|
* February 2000: Modified by James Morris to have 1 queue per protocol.
|
* February 2000: Modified by James Morris to have 1 queue per protocol.
|
* 15-Mar-2000: Added NF_REPEAT --RR.
|
* 15-Mar-2000: Added NF_REPEAT --RR.
|
*/
|
*/
|
#include <linux/config.h>
|
#include <linux/config.h>
|
#include <linux/netfilter.h>
|
#include <linux/netfilter.h>
|
#include <net/protocol.h>
|
#include <net/protocol.h>
|
#include <linux/init.h>
|
#include <linux/init.h>
|
#include <linux/skbuff.h>
|
#include <linux/skbuff.h>
|
#include <linux/wait.h>
|
#include <linux/wait.h>
|
#include <linux/module.h>
|
#include <linux/module.h>
|
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
#include <linux/if.h>
|
#include <linux/if.h>
|
#include <linux/netdevice.h>
|
#include <linux/netdevice.h>
|
#include <linux/brlock.h>
|
#include <linux/brlock.h>
|
#include <linux/inetdevice.h>
|
#include <linux/inetdevice.h>
|
#include <net/sock.h>
|
#include <net/sock.h>
|
#include <net/route.h>
|
#include <net/route.h>
|
#include <linux/ip.h>
|
#include <linux/ip.h>
|
|
|
#define __KERNEL_SYSCALLS__
|
#define __KERNEL_SYSCALLS__
|
#include <linux/unistd.h>
|
#include <linux/unistd.h>
|
|
|
/* In this code, we can be waiting indefinitely for userspace to
|
/* In this code, we can be waiting indefinitely for userspace to
|
* service a packet if a hook returns NF_QUEUE. We could keep a count
|
* service a packet if a hook returns NF_QUEUE. We could keep a count
|
* of skbuffs queued for userspace, and not deregister a hook unless
|
* of skbuffs queued for userspace, and not deregister a hook unless
|
* this is zero, but that sucks. Now, we simply check when the
|
* this is zero, but that sucks. Now, we simply check when the
|
* packets come back: if the hook is gone, the packet is discarded. */
|
* packets come back: if the hook is gone, the packet is discarded. */
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#define NFDEBUG(format, args...) printk(format , ## args)
|
#define NFDEBUG(format, args...) printk(format , ## args)
|
#else
|
#else
|
#define NFDEBUG(format, args...)
|
#define NFDEBUG(format, args...)
|
#endif
|
#endif
|
|
|
/* Sockopts only registered and called from user context, so
|
/* Sockopts only registered and called from user context, so
|
BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
|
BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
|
sleep. */
|
sleep. */
|
static DECLARE_MUTEX(nf_sockopt_mutex);
|
static DECLARE_MUTEX(nf_sockopt_mutex);
|
|
|
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
|
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
|
static LIST_HEAD(nf_sockopts);
|
static LIST_HEAD(nf_sockopts);
|
|
|
/*
|
/*
|
* A queue handler may be registered for each protocol. Each is protected by
|
* A queue handler may be registered for each protocol. Each is protected by
|
* long term mutex. The handler must provide an an outfn() to accept packets
|
* long term mutex. The handler must provide an an outfn() to accept packets
|
* for queueing and must reinject all packets it receives, no matter what.
|
* for queueing and must reinject all packets it receives, no matter what.
|
*/
|
*/
|
static struct nf_queue_handler_t {
|
static struct nf_queue_handler_t {
|
nf_queue_outfn_t outfn;
|
nf_queue_outfn_t outfn;
|
void *data;
|
void *data;
|
} queue_handler[NPROTO];
|
} queue_handler[NPROTO];
|
|
|
int nf_register_hook(struct nf_hook_ops *reg)
|
int nf_register_hook(struct nf_hook_ops *reg)
|
{
|
{
|
struct list_head *i;
|
struct list_head *i;
|
|
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
for (i = nf_hooks[reg->pf][reg->hooknum].next;
|
for (i = nf_hooks[reg->pf][reg->hooknum].next;
|
i != &nf_hooks[reg->pf][reg->hooknum];
|
i != &nf_hooks[reg->pf][reg->hooknum];
|
i = i->next) {
|
i = i->next) {
|
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
|
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
|
break;
|
break;
|
}
|
}
|
list_add(®->list, i->prev);
|
list_add(®->list, i->prev);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
void nf_unregister_hook(struct nf_hook_ops *reg)
|
void nf_unregister_hook(struct nf_hook_ops *reg)
|
{
|
{
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
list_del(®->list);
|
list_del(®->list);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
}
|
}
|
|
|
/* Do exclusive ranges overlap? */
|
/* Do exclusive ranges overlap? */
|
static inline int overlap(int min1, int max1, int min2, int max2)
|
static inline int overlap(int min1, int max1, int min2, int max2)
|
{
|
{
|
return max1 > min2 && min1 < max2;
|
return max1 > min2 && min1 < max2;
|
}
|
}
|
|
|
/* Functions to register sockopt ranges (exclusive). */
|
/* Functions to register sockopt ranges (exclusive). */
|
int nf_register_sockopt(struct nf_sockopt_ops *reg)
|
int nf_register_sockopt(struct nf_sockopt_ops *reg)
|
{
|
{
|
struct list_head *i;
|
struct list_head *i;
|
int ret = 0;
|
int ret = 0;
|
|
|
if (down_interruptible(&nf_sockopt_mutex) != 0)
|
if (down_interruptible(&nf_sockopt_mutex) != 0)
|
return -EINTR;
|
return -EINTR;
|
|
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
|
struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
|
struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
|
if (ops->pf == reg->pf
|
if (ops->pf == reg->pf
|
&& (overlap(ops->set_optmin, ops->set_optmax,
|
&& (overlap(ops->set_optmin, ops->set_optmax,
|
reg->set_optmin, reg->set_optmax)
|
reg->set_optmin, reg->set_optmax)
|
|| overlap(ops->get_optmin, ops->get_optmax,
|
|| overlap(ops->get_optmin, ops->get_optmax,
|
reg->get_optmin, reg->get_optmax))) {
|
reg->get_optmin, reg->get_optmax))) {
|
NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
|
NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
|
ops->set_optmin, ops->set_optmax,
|
ops->set_optmin, ops->set_optmax,
|
ops->get_optmin, ops->get_optmax,
|
ops->get_optmin, ops->get_optmax,
|
reg->set_optmin, reg->set_optmax,
|
reg->set_optmin, reg->set_optmax,
|
reg->get_optmin, reg->get_optmax);
|
reg->get_optmin, reg->get_optmax);
|
ret = -EBUSY;
|
ret = -EBUSY;
|
goto out;
|
goto out;
|
}
|
}
|
}
|
}
|
|
|
list_add(®->list, &nf_sockopts);
|
list_add(®->list, &nf_sockopts);
|
out:
|
out:
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
return ret;
|
return ret;
|
}
|
}
|
|
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
|
{
|
{
|
/* No point being interruptible: we're probably in cleanup_module() */
|
/* No point being interruptible: we're probably in cleanup_module() */
|
restart:
|
restart:
|
down(&nf_sockopt_mutex);
|
down(&nf_sockopt_mutex);
|
if (reg->use != 0) {
|
if (reg->use != 0) {
|
/* To be woken by nf_sockopt call... */
|
/* To be woken by nf_sockopt call... */
|
/* FIXME: Stuart Young's name appears gratuitously. */
|
/* FIXME: Stuart Young's name appears gratuitously. */
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
reg->cleanup_task = current;
|
reg->cleanup_task = current;
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
schedule();
|
schedule();
|
goto restart;
|
goto restart;
|
}
|
}
|
list_del(®->list);
|
list_del(®->list);
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
}
|
}
|
|
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#include <net/ip.h>
|
#include <net/ip.h>
|
#include <net/route.h>
|
#include <net/route.h>
|
#include <net/tcp.h>
|
#include <net/tcp.h>
|
#include <linux/netfilter_ipv4.h>
|
#include <linux/netfilter_ipv4.h>
|
|
|
static void debug_print_hooks_ip(unsigned int nf_debug)
|
static void debug_print_hooks_ip(unsigned int nf_debug)
|
{
|
{
|
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
|
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
|
printk("PRE_ROUTING ");
|
printk("PRE_ROUTING ");
|
nf_debug ^= (1 << NF_IP_PRE_ROUTING);
|
nf_debug ^= (1 << NF_IP_PRE_ROUTING);
|
}
|
}
|
if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
|
if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
|
printk("LOCAL_IN ");
|
printk("LOCAL_IN ");
|
nf_debug ^= (1 << NF_IP_LOCAL_IN);
|
nf_debug ^= (1 << NF_IP_LOCAL_IN);
|
}
|
}
|
if (nf_debug & (1 << NF_IP_FORWARD)) {
|
if (nf_debug & (1 << NF_IP_FORWARD)) {
|
printk("FORWARD ");
|
printk("FORWARD ");
|
nf_debug ^= (1 << NF_IP_FORWARD);
|
nf_debug ^= (1 << NF_IP_FORWARD);
|
}
|
}
|
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
|
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
|
printk("LOCAL_OUT ");
|
printk("LOCAL_OUT ");
|
nf_debug ^= (1 << NF_IP_LOCAL_OUT);
|
nf_debug ^= (1 << NF_IP_LOCAL_OUT);
|
}
|
}
|
if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
|
if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
|
printk("POST_ROUTING ");
|
printk("POST_ROUTING ");
|
nf_debug ^= (1 << NF_IP_POST_ROUTING);
|
nf_debug ^= (1 << NF_IP_POST_ROUTING);
|
}
|
}
|
if (nf_debug)
|
if (nf_debug)
|
printk("Crap bits: 0x%04X", nf_debug);
|
printk("Crap bits: 0x%04X", nf_debug);
|
printk("\n");
|
printk("\n");
|
}
|
}
|
|
|
void nf_dump_skb(int pf, struct sk_buff *skb)
|
void nf_dump_skb(int pf, struct sk_buff *skb)
|
{
|
{
|
printk("skb: pf=%i %s dev=%s len=%u\n",
|
printk("skb: pf=%i %s dev=%s len=%u\n",
|
pf,
|
pf,
|
skb->sk ? "(owned)" : "(unowned)",
|
skb->sk ? "(owned)" : "(unowned)",
|
skb->dev ? skb->dev->name : "(no dev)",
|
skb->dev ? skb->dev->name : "(no dev)",
|
skb->len);
|
skb->len);
|
switch (pf) {
|
switch (pf) {
|
case PF_INET: {
|
case PF_INET: {
|
const struct iphdr *ip = skb->nh.iph;
|
const struct iphdr *ip = skb->nh.iph;
|
__u32 *opt = (__u32 *) (ip + 1);
|
__u32 *opt = (__u32 *) (ip + 1);
|
int opti;
|
int opti;
|
__u16 src_port = 0, dst_port = 0;
|
__u16 src_port = 0, dst_port = 0;
|
|
|
if (ip->protocol == IPPROTO_TCP
|
if (ip->protocol == IPPROTO_TCP
|
|| ip->protocol == IPPROTO_UDP) {
|
|| ip->protocol == IPPROTO_UDP) {
|
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
|
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
|
src_port = ntohs(tcp->source);
|
src_port = ntohs(tcp->source);
|
dst_port = ntohs(tcp->dest);
|
dst_port = ntohs(tcp->dest);
|
}
|
}
|
|
|
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
|
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
|
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
|
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
|
ip->protocol, NIPQUAD(ip->saddr),
|
ip->protocol, NIPQUAD(ip->saddr),
|
src_port, NIPQUAD(ip->daddr),
|
src_port, NIPQUAD(ip->daddr),
|
dst_port,
|
dst_port,
|
ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
|
ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
|
ntohs(ip->frag_off), ip->ttl);
|
ntohs(ip->frag_off), ip->ttl);
|
|
|
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
|
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
|
printk(" O=0x%8.8X", *opt++);
|
printk(" O=0x%8.8X", *opt++);
|
printk("\n");
|
printk("\n");
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
void nf_debug_ip_local_deliver(struct sk_buff *skb)
|
void nf_debug_ip_local_deliver(struct sk_buff *skb)
|
{
|
{
|
/* If it's a loopback packet, it must have come through
|
/* If it's a loopback packet, it must have come through
|
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
|
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
|
* NF_IP_LOCAL_IN. Otherwise, must have gone through
|
* NF_IP_LOCAL_IN. Otherwise, must have gone through
|
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
|
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
|
if (!skb->dev) {
|
if (!skb->dev) {
|
printk("ip_local_deliver: skb->dev is NULL.\n");
|
printk("ip_local_deliver: skb->dev is NULL.\n");
|
}
|
}
|
else if (strcmp(skb->dev->name, "lo") == 0) {
|
else if (strcmp(skb->dev->name, "lo") == 0) {
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
| (1 << NF_IP_POST_ROUTING)
|
| (1 << NF_IP_POST_ROUTING)
|
| (1 << NF_IP_PRE_ROUTING)
|
| (1 << NF_IP_PRE_ROUTING)
|
| (1 << NF_IP_LOCAL_IN))) {
|
| (1 << NF_IP_LOCAL_IN))) {
|
printk("ip_local_deliver: bad loopback skb: ");
|
printk("ip_local_deliver: bad loopback skb: ");
|
debug_print_hooks_ip(skb->nf_debug);
|
debug_print_hooks_ip(skb->nf_debug);
|
nf_dump_skb(PF_INET, skb);
|
nf_dump_skb(PF_INET, skb);
|
}
|
}
|
}
|
}
|
else {
|
else {
|
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
|
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
|
| (1<<NF_IP_LOCAL_IN))) {
|
| (1<<NF_IP_LOCAL_IN))) {
|
printk("ip_local_deliver: bad non-lo skb: ");
|
printk("ip_local_deliver: bad non-lo skb: ");
|
debug_print_hooks_ip(skb->nf_debug);
|
debug_print_hooks_ip(skb->nf_debug);
|
nf_dump_skb(PF_INET, skb);
|
nf_dump_skb(PF_INET, skb);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
|
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
|
{
|
{
|
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
| (1 << NF_IP_POST_ROUTING))) {
|
| (1 << NF_IP_POST_ROUTING))) {
|
printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
|
printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
|
newskb);
|
newskb);
|
debug_print_hooks_ip(newskb->nf_debug);
|
debug_print_hooks_ip(newskb->nf_debug);
|
nf_dump_skb(PF_INET, newskb);
|
nf_dump_skb(PF_INET, newskb);
|
}
|
}
|
/* Clear to avoid confusing input check */
|
/* Clear to avoid confusing input check */
|
newskb->nf_debug = 0;
|
newskb->nf_debug = 0;
|
}
|
}
|
|
|
void nf_debug_ip_finish_output2(struct sk_buff *skb)
|
void nf_debug_ip_finish_output2(struct sk_buff *skb)
|
{
|
{
|
/* If it's owned, it must have gone through the
|
/* If it's owned, it must have gone through the
|
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
|
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
|
* Otherwise, must have gone through
|
* Otherwise, must have gone through
|
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
|
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
|
*/
|
*/
|
if (skb->sk) {
|
if (skb->sk) {
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
| (1 << NF_IP_POST_ROUTING))) {
|
| (1 << NF_IP_POST_ROUTING))) {
|
printk("ip_finish_output: bad owned skb = %p: ", skb);
|
printk("ip_finish_output: bad owned skb = %p: ", skb);
|
debug_print_hooks_ip(skb->nf_debug);
|
debug_print_hooks_ip(skb->nf_debug);
|
nf_dump_skb(PF_INET, skb);
|
nf_dump_skb(PF_INET, skb);
|
}
|
}
|
} else {
|
} else {
|
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
|
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
|
| (1 << NF_IP_FORWARD)
|
| (1 << NF_IP_FORWARD)
|
| (1 << NF_IP_POST_ROUTING))) {
|
| (1 << NF_IP_POST_ROUTING))) {
|
/* Fragments, entunnelled packets, TCP RSTs
|
/* Fragments, entunnelled packets, TCP RSTs
|
generated by ipt_REJECT will have no
|
generated by ipt_REJECT will have no
|
owners, but still may be local */
|
owners, but still may be local */
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
|
| (1 << NF_IP_POST_ROUTING))){
|
| (1 << NF_IP_POST_ROUTING))){
|
printk("ip_finish_output:"
|
printk("ip_finish_output:"
|
" bad unowned skb = %p: ",skb);
|
" bad unowned skb = %p: ",skb);
|
debug_print_hooks_ip(skb->nf_debug);
|
debug_print_hooks_ip(skb->nf_debug);
|
nf_dump_skb(PF_INET, skb);
|
nf_dump_skb(PF_INET, skb);
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
#endif /*CONFIG_NETFILTER_DEBUG*/
|
#endif /*CONFIG_NETFILTER_DEBUG*/
|
|
|
/* Call get/setsockopt() */
|
/* Call get/setsockopt() */
|
static int nf_sockopt(struct sock *sk, int pf, int val,
|
static int nf_sockopt(struct sock *sk, int pf, int val,
|
char *opt, int *len, int get)
|
char *opt, int *len, int get)
|
{
|
{
|
struct list_head *i;
|
struct list_head *i;
|
struct nf_sockopt_ops *ops;
|
struct nf_sockopt_ops *ops;
|
int ret;
|
int ret;
|
|
|
if (down_interruptible(&nf_sockopt_mutex) != 0)
|
if (down_interruptible(&nf_sockopt_mutex) != 0)
|
return -EINTR;
|
return -EINTR;
|
|
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
|
for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
|
ops = (struct nf_sockopt_ops *)i;
|
ops = (struct nf_sockopt_ops *)i;
|
if (ops->pf == pf) {
|
if (ops->pf == pf) {
|
if (get) {
|
if (get) {
|
if (val >= ops->get_optmin
|
if (val >= ops->get_optmin
|
&& val < ops->get_optmax) {
|
&& val < ops->get_optmax) {
|
ops->use++;
|
ops->use++;
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
ret = ops->get(sk, val, opt, len);
|
ret = ops->get(sk, val, opt, len);
|
goto out;
|
goto out;
|
}
|
}
|
} else {
|
} else {
|
if (val >= ops->set_optmin
|
if (val >= ops->set_optmin
|
&& val < ops->set_optmax) {
|
&& val < ops->set_optmax) {
|
ops->use++;
|
ops->use++;
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
ret = ops->set(sk, val, opt, *len);
|
ret = ops->set(sk, val, opt, *len);
|
goto out;
|
goto out;
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
return -ENOPROTOOPT;
|
return -ENOPROTOOPT;
|
|
|
out:
|
out:
|
down(&nf_sockopt_mutex);
|
down(&nf_sockopt_mutex);
|
ops->use--;
|
ops->use--;
|
if (ops->cleanup_task)
|
if (ops->cleanup_task)
|
wake_up_process(ops->cleanup_task);
|
wake_up_process(ops->cleanup_task);
|
up(&nf_sockopt_mutex);
|
up(&nf_sockopt_mutex);
|
return ret;
|
return ret;
|
}
|
}
|
|
|
int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
|
int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
|
int len)
|
int len)
|
{
|
{
|
return nf_sockopt(sk, pf, val, opt, &len, 0);
|
return nf_sockopt(sk, pf, val, opt, &len, 0);
|
}
|
}
|
|
|
int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
|
int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
|
{
|
{
|
return nf_sockopt(sk, pf, val, opt, len, 1);
|
return nf_sockopt(sk, pf, val, opt, len, 1);
|
}
|
}
|
|
|
static unsigned int nf_iterate(struct list_head *head,
|
static unsigned int nf_iterate(struct list_head *head,
|
struct sk_buff **skb,
|
struct sk_buff **skb,
|
int hook,
|
int hook,
|
const struct net_device *indev,
|
const struct net_device *indev,
|
const struct net_device *outdev,
|
const struct net_device *outdev,
|
struct list_head **i,
|
struct list_head **i,
|
int (*okfn)(struct sk_buff *))
|
int (*okfn)(struct sk_buff *))
|
{
|
{
|
for (*i = (*i)->next; *i != head; *i = (*i)->next) {
|
for (*i = (*i)->next; *i != head; *i = (*i)->next) {
|
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
|
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
|
switch (elem->hook(hook, skb, indev, outdev, okfn)) {
|
switch (elem->hook(hook, skb, indev, outdev, okfn)) {
|
case NF_QUEUE:
|
case NF_QUEUE:
|
return NF_QUEUE;
|
return NF_QUEUE;
|
|
|
case NF_STOLEN:
|
case NF_STOLEN:
|
return NF_STOLEN;
|
return NF_STOLEN;
|
|
|
case NF_DROP:
|
case NF_DROP:
|
return NF_DROP;
|
return NF_DROP;
|
|
|
case NF_REPEAT:
|
case NF_REPEAT:
|
*i = (*i)->prev;
|
*i = (*i)->prev;
|
break;
|
break;
|
|
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#ifdef CONFIG_NETFILTER_DEBUG
|
case NF_ACCEPT:
|
case NF_ACCEPT:
|
break;
|
break;
|
|
|
default:
|
default:
|
NFDEBUG("Evil return from %p(%u).\n",
|
NFDEBUG("Evil return from %p(%u).\n",
|
elem->hook, hook);
|
elem->hook, hook);
|
#endif
|
#endif
|
}
|
}
|
}
|
}
|
return NF_ACCEPT;
|
return NF_ACCEPT;
|
}
|
}
|
|
|
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
|
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
|
{
|
{
|
int ret;
|
int ret;
|
|
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
if (queue_handler[pf].outfn)
|
if (queue_handler[pf].outfn)
|
ret = -EBUSY;
|
ret = -EBUSY;
|
else {
|
else {
|
queue_handler[pf].outfn = outfn;
|
queue_handler[pf].outfn = outfn;
|
queue_handler[pf].data = data;
|
queue_handler[pf].data = data;
|
ret = 0;
|
ret = 0;
|
}
|
}
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
|
|
return ret;
|
return ret;
|
}
|
}
|
|
|
/* The caller must flush their queue before this */
|
/* The caller must flush their queue before this */
|
int nf_unregister_queue_handler(int pf)
|
int nf_unregister_queue_handler(int pf)
|
{
|
{
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
br_write_lock_bh(BR_NETPROTO_LOCK);
|
queue_handler[pf].outfn = NULL;
|
queue_handler[pf].outfn = NULL;
|
queue_handler[pf].data = NULL;
|
queue_handler[pf].data = NULL;
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
br_write_unlock_bh(BR_NETPROTO_LOCK);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/*
|
/*
|
* Any packet that leaves via this function must come back
|
* Any packet that leaves via this function must come back
|
* through nf_reinject().
|
* through nf_reinject().
|
*/
|
*/
|
static void nf_queue(struct sk_buff *skb,
|
static void nf_queue(struct sk_buff *skb,
|
struct list_head *elem,
|
struct list_head *elem,
|
int pf, unsigned int hook,
|
int pf, unsigned int hook,
|
struct net_device *indev,
|
struct net_device *indev,
|
struct net_device *outdev,
|
struct net_device *outdev,
|
int (*okfn)(struct sk_buff *))
|
int (*okfn)(struct sk_buff *))
|
{
|
{
|
int status;
|
int status;
|
struct nf_info *info;
|
struct nf_info *info;
|
|
|
if (!queue_handler[pf].outfn) {
|
if (!queue_handler[pf].outfn) {
|
kfree_skb(skb);
|
kfree_skb(skb);
|
return;
|
return;
|
}
|
}
|
|
|
info = kmalloc(sizeof(*info), GFP_ATOMIC);
|
info = kmalloc(sizeof(*info), GFP_ATOMIC);
|
if (!info) {
|
if (!info) {
|
if (net_ratelimit())
|
if (net_ratelimit())
|
printk(KERN_ERR "OOM queueing packet %p\n",
|
printk(KERN_ERR "OOM queueing packet %p\n",
|
skb);
|
skb);
|
kfree_skb(skb);
|
kfree_skb(skb);
|
return;
|
return;
|
}
|
}
|
|
|
*info = (struct nf_info) {
|
*info = (struct nf_info) {
|
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
|
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
|
|
|
/* Bump dev refs so they don't vanish while packet is out */
|
/* Bump dev refs so they don't vanish while packet is out */
|
if (indev) dev_hold(indev);
|
if (indev) dev_hold(indev);
|
if (outdev) dev_hold(outdev);
|
if (outdev) dev_hold(outdev);
|
|
|
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
|
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
|
if (status < 0) {
|
if (status < 0) {
|
/* James M doesn't say fuck enough. */
|
/* James M doesn't say fuck enough. */
|
if (indev) dev_put(indev);
|
if (indev) dev_put(indev);
|
if (outdev) dev_put(outdev);
|
if (outdev) dev_put(outdev);
|
kfree(info);
|
kfree(info);
|
kfree_skb(skb);
|
kfree_skb(skb);
|
return;
|
return;
|
}
|
}
|
}
|
}
|
|
|
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
|
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
|
struct net_device *indev,
|
struct net_device *indev,
|
struct net_device *outdev,
|
struct net_device *outdev,
|
int (*okfn)(struct sk_buff *))
|
int (*okfn)(struct sk_buff *))
|
{
|
{
|
struct list_head *elem;
|
struct list_head *elem;
|
unsigned int verdict;
|
unsigned int verdict;
|
int ret = 0;
|
int ret = 0;
|
|
|
/* This stopgap cannot be removed until all the hooks are audited. */
|
/* This stopgap cannot be removed until all the hooks are audited. */
|
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
|
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
|
kfree_skb(skb);
|
kfree_skb(skb);
|
return -ENOMEM;
|
return -ENOMEM;
|
}
|
}
|
if (skb->ip_summed == CHECKSUM_HW) {
|
if (skb->ip_summed == CHECKSUM_HW) {
|
if (outdev == NULL) {
|
if (outdev == NULL) {
|
skb->ip_summed = CHECKSUM_NONE;
|
skb->ip_summed = CHECKSUM_NONE;
|
} else {
|
} else {
|
skb_checksum_help(skb);
|
skb_checksum_help(skb);
|
}
|
}
|
}
|
}
|
|
|
/* We may already have this, but read-locks nest anyway */
|
/* We may already have this, but read-locks nest anyway */
|
br_read_lock_bh(BR_NETPROTO_LOCK);
|
br_read_lock_bh(BR_NETPROTO_LOCK);
|
|
|
#ifdef CONFIG_NETFILTER_DEBUG
|
#ifdef CONFIG_NETFILTER_DEBUG
|
if (skb->nf_debug & (1 << hook)) {
|
if (skb->nf_debug & (1 << hook)) {
|
printk("nf_hook: hook %i already set.\n", hook);
|
printk("nf_hook: hook %i already set.\n", hook);
|
nf_dump_skb(pf, skb);
|
nf_dump_skb(pf, skb);
|
}
|
}
|
skb->nf_debug |= (1 << hook);
|
skb->nf_debug |= (1 << hook);
|
#endif
|
#endif
|
|
|
elem = &nf_hooks[pf][hook];
|
elem = &nf_hooks[pf][hook];
|
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
|
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
|
outdev, &elem, okfn);
|
outdev, &elem, okfn);
|
if (verdict == NF_QUEUE) {
|
if (verdict == NF_QUEUE) {
|
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
|
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
|
nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
|
nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
|
}
|
}
|
|
|
switch (verdict) {
|
switch (verdict) {
|
case NF_ACCEPT:
|
case NF_ACCEPT:
|
ret = okfn(skb);
|
ret = okfn(skb);
|
break;
|
break;
|
|
|
case NF_DROP:
|
case NF_DROP:
|
kfree_skb(skb);
|
kfree_skb(skb);
|
ret = -EPERM;
|
ret = -EPERM;
|
break;
|
break;
|
}
|
}
|
|
|
br_read_unlock_bh(BR_NETPROTO_LOCK);
|
br_read_unlock_bh(BR_NETPROTO_LOCK);
|
return ret;
|
return ret;
|
}
|
}
|
|
|
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
|
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
|
unsigned int verdict)
|
unsigned int verdict)
|
{
|
{
|
struct list_head *elem = &info->elem->list;
|
struct list_head *elem = &info->elem->list;
|
struct list_head *i;
|
struct list_head *i;
|
|
|
/* We don't have BR_NETPROTO_LOCK here */
|
/* We don't have BR_NETPROTO_LOCK here */
|
br_read_lock_bh(BR_NETPROTO_LOCK);
|
br_read_lock_bh(BR_NETPROTO_LOCK);
|
for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
|
for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
|
if (i == &nf_hooks[info->pf][info->hook]) {
|
if (i == &nf_hooks[info->pf][info->hook]) {
|
/* The module which sent it to userspace is gone. */
|
/* The module which sent it to userspace is gone. */
|
NFDEBUG("%s: module disappeared, dropping packet.\n",
|
NFDEBUG("%s: module disappeared, dropping packet.\n",
|
__FUNCTION__);
|
__FUNCTION__);
|
verdict = NF_DROP;
|
verdict = NF_DROP;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
|
|
/* Continue traversal iff userspace said ok... */
|
/* Continue traversal iff userspace said ok... */
|
if (verdict == NF_REPEAT) {
|
if (verdict == NF_REPEAT) {
|
elem = elem->prev;
|
elem = elem->prev;
|
verdict = NF_ACCEPT;
|
verdict = NF_ACCEPT;
|
}
|
}
|
|
|
if (verdict == NF_ACCEPT) {
|
if (verdict == NF_ACCEPT) {
|
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
|
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
|
&skb, info->hook,
|
&skb, info->hook,
|
info->indev, info->outdev, &elem,
|
info->indev, info->outdev, &elem,
|
info->okfn);
|
info->okfn);
|
}
|
}
|
|
|
switch (verdict) {
|
switch (verdict) {
|
case NF_ACCEPT:
|
case NF_ACCEPT:
|
info->okfn(skb);
|
info->okfn(skb);
|
break;
|
break;
|
|
|
case NF_QUEUE:
|
case NF_QUEUE:
|
nf_queue(skb, elem, info->pf, info->hook,
|
nf_queue(skb, elem, info->pf, info->hook,
|
info->indev, info->outdev, info->okfn);
|
info->indev, info->outdev, info->okfn);
|
break;
|
break;
|
|
|
case NF_DROP:
|
case NF_DROP:
|
kfree_skb(skb);
|
kfree_skb(skb);
|
break;
|
break;
|
}
|
}
|
br_read_unlock_bh(BR_NETPROTO_LOCK);
|
br_read_unlock_bh(BR_NETPROTO_LOCK);
|
|
|
/* Release those devices we held, or Alexey will kill me. */
|
/* Release those devices we held, or Alexey will kill me. */
|
if (info->indev) dev_put(info->indev);
|
if (info->indev) dev_put(info->indev);
|
if (info->outdev) dev_put(info->outdev);
|
if (info->outdev) dev_put(info->outdev);
|
|
|
kfree(info);
|
kfree(info);
|
return;
|
return;
|
}
|
}
|
|
|
#ifdef CONFIG_INET
|
#ifdef CONFIG_INET
|
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
|
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
|
int ip_route_me_harder(struct sk_buff **pskb)
|
int ip_route_me_harder(struct sk_buff **pskb)
|
{
|
{
|
struct iphdr *iph = (*pskb)->nh.iph;
|
struct iphdr *iph = (*pskb)->nh.iph;
|
struct rtable *rt;
|
struct rtable *rt;
|
struct rt_key key = {};
|
struct rt_key key = {};
|
struct dst_entry *odst;
|
struct dst_entry *odst;
|
unsigned int hh_len;
|
unsigned int hh_len;
|
|
|
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
|
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
|
* packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
|
* packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
|
*/
|
*/
|
if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
|
if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
|
key.dst = iph->daddr;
|
key.dst = iph->daddr;
|
key.src = iph->saddr;
|
key.src = iph->saddr;
|
key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
|
key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
|
key.tos = RT_TOS(iph->tos);
|
key.tos = RT_TOS(iph->tos);
|
#ifdef CONFIG_IP_ROUTE_FWMARK
|
#ifdef CONFIG_IP_ROUTE_FWMARK
|
key.fwmark = (*pskb)->nfmark;
|
key.fwmark = (*pskb)->nfmark;
|
#endif
|
#endif
|
if (ip_route_output_key(&rt, &key) != 0)
|
if (ip_route_output_key(&rt, &key) != 0)
|
return -1;
|
return -1;
|
|
|
/* Drop old route. */
|
/* Drop old route. */
|
dst_release((*pskb)->dst);
|
dst_release((*pskb)->dst);
|
(*pskb)->dst = &rt->u.dst;
|
(*pskb)->dst = &rt->u.dst;
|
} else {
|
} else {
|
/* non-local src, find valid iif to satisfy
|
/* non-local src, find valid iif to satisfy
|
* rp-filter when calling ip_route_input. */
|
* rp-filter when calling ip_route_input. */
|
key.dst = iph->saddr;
|
key.dst = iph->saddr;
|
if (ip_route_output_key(&rt, &key) != 0)
|
if (ip_route_output_key(&rt, &key) != 0)
|
return -1;
|
return -1;
|
|
|
odst = (*pskb)->dst;
|
odst = (*pskb)->dst;
|
if (ip_route_input(*pskb, iph->daddr, iph->saddr,
|
if (ip_route_input(*pskb, iph->daddr, iph->saddr,
|
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
|
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
|
dst_release(&rt->u.dst);
|
dst_release(&rt->u.dst);
|
return -1;
|
return -1;
|
}
|
}
|
dst_release(&rt->u.dst);
|
dst_release(&rt->u.dst);
|
dst_release(odst);
|
dst_release(odst);
|
}
|
}
|
|
|
if ((*pskb)->dst->error)
|
if ((*pskb)->dst->error)
|
return -1;
|
return -1;
|
|
|
/* Change in oif may mean change in hh_len. */
|
/* Change in oif may mean change in hh_len. */
|
hh_len = (*pskb)->dst->dev->hard_header_len;
|
hh_len = (*pskb)->dst->dev->hard_header_len;
|
if (skb_headroom(*pskb) < hh_len) {
|
if (skb_headroom(*pskb) < hh_len) {
|
struct sk_buff *nskb;
|
struct sk_buff *nskb;
|
|
|
nskb = skb_realloc_headroom(*pskb, hh_len);
|
nskb = skb_realloc_headroom(*pskb, hh_len);
|
if (!nskb)
|
if (!nskb)
|
return -1;
|
return -1;
|
if ((*pskb)->sk)
|
if ((*pskb)->sk)
|
skb_set_owner_w(nskb, (*pskb)->sk);
|
skb_set_owner_w(nskb, (*pskb)->sk);
|
kfree_skb(*pskb);
|
kfree_skb(*pskb);
|
*pskb = nskb;
|
*pskb = nskb;
|
}
|
}
|
|
|
return 0;
|
return 0;
|
}
|
}
|
#endif /*CONFIG_INET*/
|
#endif /*CONFIG_INET*/
|
|
|
/* This does not belong here, but ipt_REJECT needs it if connection
|
/* This does not belong here, but ipt_REJECT needs it if connection
|
tracking in use: without this, connection may not be in hash table,
|
tracking in use: without this, connection may not be in hash table,
|
and hence manufactured ICMP or RST packets will not be associated
|
and hence manufactured ICMP or RST packets will not be associated
|
with it. */
|
with it. */
|
void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
|
void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
|
|
|
void __init netfilter_init(void)
|
void __init netfilter_init(void)
|
{
|
{
|
int i, h;
|
int i, h;
|
|
|
for (i = 0; i < NPROTO; i++) {
|
for (i = 0; i < NPROTO; i++) {
|
for (h = 0; h < NF_MAX_HOOKS; h++)
|
for (h = 0; h < NF_MAX_HOOKS; h++)
|
INIT_LIST_HEAD(&nf_hooks[i][h]);
|
INIT_LIST_HEAD(&nf_hooks[i][h]);
|
}
|
}
|
}
|
}
|
|
|