URL
https://opencores.org/ocsvn/or1k_old/or1k_old/trunk
Subversion Repositories or1k_old
[/] [or1k_old/] [trunk/] [uclinux/] [uClinux-2.0.x/] [net/] [ipv4/] [ipmr.c] - Rev 1782
Compare with Previous | Blame | View Log
/* * IP multicast routing support for mrouted 3.6/3.8 * * (c) 1995 Alan Cox, <alan@cymru.net> * Linux Consultancy and Custom Driver Development * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code. * Alan Cox : Fixed the clone/copy bug and device race. * Malcolm Beattie : Buffer handling fixes. * Alexey Kuznetsov : Double buffer free and other fixes. * SVR Anand : Fixed several multicast bugs and problems. * Alexey Kuznetsov : Subset of bugfixes/changes pending for * 2.1. Doesn't include Alexey's PIM support. * * Status: * Cache manager under test. Forwarding in vague test mode * Todo: * Flow control * Finish Tunnels * Debug cache ttl handling properly * Resolve IFF_ALLMULTI for rest of cards */ #include <linux/config.h> #include <asm/system.h> #include <asm/segment.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/mroute.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/icmp.h> #include <net/udp.h> #include <linux/notifier.h> #include <net/checksum.h> /* * Multicast router control variables */ static struct vif_device vif_table[MAXVIFS]; /* Devices */ static unsigned long vifc_map; /* Active device map */ int mroute_do_pim = 0; /* Set in PIM assert */ static struct mfc_cache *mfc_cache_array[MFC_LINES+1]; /* Forwarding cache */ #define cache_resolve_queue (mfc_cache_array[MFC_LINES])/* Unresolved cache */ int cache_resolve_queue_len = 0; /* Size of unresolved */ /* * Delete a VIF entry */ static void vif_delete(struct vif_device *v) { if(!(v->flags&VIFF_TUNNEL)) { v->dev->flags&=~IFF_ALLMULTI; dev_mc_upload(v->dev); } v->dev=NULL; } /* * Delete a multicast route cache entry */ static void ipmr_cache_delete(struct mfc_cache *cache) { struct sk_buff *skb; int line; struct mfc_cache **cp; /* * Find the right cache line */ if(cache->mfc_flags&MFC_QUEUED) { cp=&cache_resolve_queue; del_timer(&cache->mfc_timer); } else { line=MFC_HASH(cache->mfc_mcastgrp,cache->mfc_origin); cp=&(mfc_cache_array[line]); } /* * Unlink the buffer */ while(*cp!=NULL) { if(*cp==cache) { *cp=cache->next; break; } cp=&((*cp)->next); } /* * Free the buffer. If it is a pending resolution * clean up the other resources. */ if(cache->mfc_flags&MFC_QUEUED) { cache_resolve_queue_len--; while((skb=skb_dequeue(&cache->mfc_unresolved))) kfree_skb(skb, FREE_WRITE); } kfree_s(cache,sizeof(cache)); } /* * Cache expiry timer */ static void ipmr_cache_timer(unsigned long data) { struct mfc_cache *cache=(struct mfc_cache *)data; ipmr_cache_delete(cache); } /* * Insert a multicast cache entry */ static void ipmr_cache_insert(struct mfc_cache *c) { int line=MFC_HASH(c->mfc_mcastgrp,c->mfc_origin); c->next=mfc_cache_array[line]; mfc_cache_array[line]=c; } /* * Find a multicast cache entry */ struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) { int line=MFC_HASH(mcastgrp,origin); struct mfc_cache *cache; cache=mfc_cache_array[line]; while(cache!=NULL) { if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp) return cache; cache=cache->next; } cache=cache_resolve_queue; while(cache!=NULL) { if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp) return cache; cache=cache->next; } return NULL; } /* * Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(int priority) { struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority); if(c==NULL) return NULL; c->mfc_queuelen=0; skb_queue_head_init(&c->mfc_unresolved); init_timer(&c->mfc_timer); c->mfc_timer.data=(long)c; c->mfc_timer.function=ipmr_cache_timer; c->mfc_packets=0; c->mfc_bytes=0; return c; } /* * A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct mfc_cache *cache) { struct mfc_cache **p; struct sk_buff *skb; /* * Kill the queue entry timer. */ del_timer(&cache->mfc_timer); cache->mfc_flags&=~MFC_QUEUED; /* * Remove from the resolve queue */ p=&cache_resolve_queue; while((*p)!=NULL) { if((*p)==cache) { *p=cache->next; break; } p=&((*p)->next); } cache_resolve_queue_len--; sti(); /* * Insert into the main cache */ ipmr_cache_insert(cache); /* * Play the pending entries through our router */ while((skb=skb_dequeue(&cache->mfc_unresolved))) ipmr_forward(skb, skb->protocol); } /* * Bounce a cache query up to mrouted. We could use netlink for this but mrouted * expects the following bizarre scheme.. */ static void ipmr_cache_report(struct sk_buff *pkt) { struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC); int ihl=pkt->ip_hdr->ihl<<2; struct igmphdr *igmp; if(!skb) return; skb->free=1; /* * Copy the IP header */ skb->ip_hdr=(struct iphdr *)skb_put(skb,ihl); skb->h.iph=skb->ip_hdr; memcpy(skb->data,pkt->data,ihl); skb->ip_hdr->protocol = 0; /* Flag to the kernel this is a route add */ /* * Add our header */ igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); igmp->type = IGMPMSG_NOCACHE; /* non IGMP dummy message */ igmp->code = 0; skb->ip_hdr->tot_len=htons(skb->len); /* Fix the length */ /* * Deliver to mrouted */ if(sock_queue_rcv_skb(mroute_socket,skb)<0) { skb->sk=NULL; kfree_skb(skb, FREE_READ); } } /* * Queue a packet for resolution */ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb, int is_frag) { if(cache==NULL) { /* * Create a new entry if allowable */ if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL) { kfree_skb(skb, FREE_WRITE); return; } /* * Fill in the new cache entry */ cache->mfc_parent=vifi; cache->mfc_origin=skb->ip_hdr->saddr; cache->mfc_mcastgrp=skb->ip_hdr->daddr; cache->mfc_flags=MFC_QUEUED; /* * Link to the unresolved list */ cache->next=cache_resolve_queue; cache_resolve_queue=cache; cache_resolve_queue_len++; /* * Fire off the expiry timer */ cache->mfc_timer.expires=jiffies+10*HZ; add_timer(&cache->mfc_timer); /* * Reflect first query at mrouted. */ if(mroute_socket) ipmr_cache_report(skb); } /* * See if we can append the packet */ if(cache->mfc_queuelen>3) { kfree_skb(skb, FREE_WRITE); return; } /* * Add to our 'pending' list. Cache the is_frag data * in skb->protocol now it is spare. */ cache->mfc_queuelen++; skb->protocol=is_frag; skb_queue_tail(&cache->mfc_unresolved,skb); } /* * MFC cache manipulation by user space mroute daemon */ int ipmr_mfc_modify(int action, struct mfcctl *mfc) { struct mfc_cache *cache; if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; /* * Find the cache line */ cli(); cache=ipmr_cache_find(mfc->mfcc_origin.s_addr,mfc->mfcc_mcastgrp.s_addr); /* * Delete an entry */ if(action==MRT_DEL_MFC) { if(cache) { ipmr_cache_delete(cache); sti(); return 0; } sti(); return -ENOENT; } if(cache) { /* * Update the cache, see if it frees a pending queue */ cache->mfc_flags|=MFC_RESOLVED; cache->mfc_parent=mfc->mfcc_parent; memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); /* * Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ if(cache->mfc_flags&MFC_QUEUED) ipmr_cache_resolve(cache); /* Unhook & send the frames */ sti(); return 0; } /* * Unsolicited update - that's ok, add anyway. */ cache=ipmr_cache_alloc(GFP_ATOMIC); if(cache==NULL) { sti(); return -ENOMEM; } cache->mfc_flags=MFC_RESOLVED; cache->mfc_origin=mfc->mfcc_origin.s_addr; cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; cache->mfc_parent=mfc->mfcc_parent; memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); ipmr_cache_insert(cache); sti(); return 0; } /* * Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately * that's how BSD mrouted happens to think. Maybe one day with a proper * MOSPF/PIM router set up we can clean this up. */ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) { int err; struct vifctl vif; struct mfcctl mfc; if(optname!=MRT_INIT) { if(sk!=mroute_socket) return -EACCES; } switch(optname) { case MRT_INIT: if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP) return -EOPNOTSUPP; if(optlen!=sizeof(int)) return -ENOPROTOOPT; if((err=verify_area(VERIFY_READ,optval,sizeof(int)))<0) return err; if(get_user((int *)optval)!=1) return -ENOPROTOOPT; if(mroute_socket) return -EADDRINUSE; mroute_socket=sk; /* Initialise state */ return 0; case MRT_DONE: mroute_close(sk); mroute_socket=NULL; return 0; case MRT_ADD_VIF: case MRT_DEL_VIF: if(optlen!=sizeof(vif)) return -EINVAL; if((err=verify_area(VERIFY_READ, optval, sizeof(vif)))<0) return err; memcpy_fromfs(&vif,optval,sizeof(vif)); if(vif.vifc_vifi > MAXVIFS) return -ENFILE; if(optname==MRT_ADD_VIF) { struct vif_device *v=&vif_table[vif.vifc_vifi]; struct device *dev; /* Empty vif ? */ if(vifc_map&(1<<vif.vifc_vifi)) return -EADDRINUSE; /* Find the interface */ dev=ip_dev_find(vif.vifc_lcl_addr.s_addr); if(!dev) return -EADDRNOTAVAIL; /* Must be tunnelled or multicastable */ if(vif.vifc_flags&VIFF_TUNNEL) { if(vif.vifc_flags&VIFF_SRCRT) return -EOPNOTSUPP; /* IPIP will do all the work */ } else { if(dev->flags&IFF_MULTICAST) { /* Most ethernet cards don't know how to do this yet.. */ dev->flags|=IFF_ALLMULTI; dev_mc_upload(dev); } else { /* We are stuck.. */ return -EOPNOTSUPP; } } /* * Fill in the VIF structures */ cli(); v->rate_limit=vif.vifc_rate_limit; v->local=vif.vifc_lcl_addr.s_addr; v->remote=vif.vifc_rmt_addr.s_addr; v->flags=vif.vifc_flags; v->threshold=vif.vifc_threshold; v->dev=dev; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; v->pkt_out = 0; vifc_map|=(1<<vif.vifc_vifi); sti(); return 0; } else /* * VIF deletion */ { struct vif_device *v=&vif_table[vif.vifc_vifi]; if(vifc_map&(1<<vif.vifc_vifi)) { vif_delete(v); vifc_map&=~(1<<vif.vifc_vifi); return 0; } else return -EADDRNOTAVAIL; } /* * Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis. */ case MRT_ADD_MFC: case MRT_DEL_MFC: err=verify_area(VERIFY_READ, optval, sizeof(mfc)); if(err) return err; memcpy_fromfs(&mfc,optval, sizeof(mfc)); return ipmr_mfc_modify(optname, &mfc); /* * Control PIM assert. */ case MRT_ASSERT: if(optlen!=sizeof(int)) return -EINVAL; if((err=verify_area(VERIFY_READ, optval,sizeof(int)))<0) return err; mroute_do_pim= get_user((int *)optval)?1:0; return 0; /* * Spurious command, or MRT_VERSION which you cannot * set. */ default: return -EOPNOTSUPP; } } /* * Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen) { int olr; int err; if(sk!=mroute_socket) return -EACCES; if(optname!=MRT_VERSION && optname!=MRT_ASSERT) return -EOPNOTSUPP; olr=get_user(optlen); if(olr!=sizeof(int)) return -EINVAL; err=verify_area(VERIFY_WRITE, optval,sizeof(int)); if(err) return err; put_user(sizeof(int),optlen); if(optname==MRT_VERSION) put_user(0x0305,(int *)optval); else put_user(mroute_do_pim,(int *)optval); return 0; } /* * The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) { int err; struct sioc_sg_req sr; struct sioc_vif_req vr; struct vif_device *vif; struct mfc_cache *cl; switch(cmd) { case SIOCGETVIFCNT: err=verify_area(VERIFY_WRITE, (void *)arg, sizeof(vr)); if(err) return err; memcpy_fromfs(&vr,(void *)arg,sizeof(vr)); if(vr.vifi>=MAXVIFS) return -EINVAL; vif=&vif_table[vr.vifi]; if(vifc_map&(1<<vr.vifi)) { vr.icount=vif->pkt_in; vr.ocount=vif->pkt_out; vr.ibytes=vif->bytes_in; vr.obytes=vif->bytes_out; memcpy_tofs((void *)arg,&vr,sizeof(vr)); return 0; } return -EADDRNOTAVAIL; case SIOCGETSGCNT: err=verify_area(VERIFY_WRITE, (void *)arg, sizeof(sr)); if(err) return err; memcpy_fromfs(&sr,(void *)arg,sizeof(sr)); cl=ipmr_cache_find(sr.src.s_addr,sr.grp.s_addr); if(cl==NULL) { sr.pktcnt=0; sr.bytecnt=0; sr.wrong_if=0; } else { sr.pktcnt=cl->mfc_packets; sr.bytecnt=cl->mfc_bytes; sr.wrong_if=0; } memcpy_tofs((void *)arg,&sr,sizeof(sr)); return 0; default: return -EINVAL; } } /* * Close the multicast socket, and clear the vif tables etc */ void mroute_close(struct sock *sk) { int i; struct vif_device *v=&vif_table[0]; /* * Shut down all active vif entries */ for(i=0;i<MAXVIFS;i++) { if(vifc_map&(1<<i)) { if(!(v->flags&VIFF_TUNNEL)) { v->dev->flags&=~IFF_ALLMULTI; dev_mc_upload(v->dev); } } v++; } vifc_map=0; /* * Wipe the cache */ for(i=0;i<MFC_LINES;i++) { while(mfc_cache_array[i]!=NULL) ipmr_cache_delete(mfc_cache_array[i]); } /* The timer will clear any 'pending' stuff */ } static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct vif_device *v; int ct; if(event!=NETDEV_DOWN) return NOTIFY_DONE; v=&vif_table[0]; for(ct=0;ct<MAXVIFS;ct++) { if((vifc_map&(1<<ct)) && v->dev==ptr) { vif_delete(v); vifc_map&=~(1<<ct); } v++; } return NOTIFY_DONE; } static struct notifier_block ip_mr_notifier={ ipmr_device_event, NULL, 0 }; /* * Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct sk_buff *skb, struct vif_device *vif, struct device *in_dev, int frag) { int tunnel=0; __u32 raddr=skb->raddr; if(vif->flags&VIFF_TUNNEL) { tunnel=IPFWD_MULTITUNNEL; raddr=vif->remote; } vif->pkt_out++; vif->bytes_out+=skb->len; skb->dev=vif->dev; skb->raddr=skb->h.iph->daddr; /* * If the vif went down as we were forwarding.. just throw the * frame. */ if(vif->dev==NULL || ip_forward(skb, in_dev, frag|IPFWD_MULTICASTING|tunnel, raddr)==-1) kfree_skb(skb, FREE_WRITE); } /* * Multicast packets for forwarding arrive here */ void ipmr_forward(struct sk_buff *skb, int is_frag) { struct mfc_cache *cache; struct sk_buff *skb2; int psend = -1; int vif,ct=0; /* * Without the following addition, skb->h.iph points to something * different that is not the ip header. */ skb->h.iph = skb->ip_hdr; /* Anand, ernet. */ cache=ipmr_cache_find(skb->ip_hdr->saddr,skb->ip_hdr->daddr); /* * No usable cache entry */ if(cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { ipmr_cache_unresolved(cache,ALL_VIFS,skb, is_frag); return; } vif=cache->mfc_parent; if(vif>=MAXVIFS || !(vifc_map&(1<<vif)) || vif_table[vif].dev != skb->dev) { kfree_skb(skb, FREE_READ); return; } vif_table[vif].pkt_in++; vif_table[vif].bytes_in+=skb->len; cache->mfc_packets++; cache->mfc_bytes+=skb->len; /* * Forward the frame */ while(ct<MAXVIFS) { /* * 0 means don't do it. Silly idea, 255 as don't do it would be cleaner! */ if(skb->ip_hdr->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) { if(psend!=-1) { /* * May get variant mac headers * so must copy -- boo hoo. */ skb2=skb_copy(skb, GFP_ATOMIC); if(skb2) { skb2->free=1; ipmr_queue_xmit(skb2, &vif_table[psend], skb->dev, is_frag); } } psend=ct; } ct++; } if(psend==-1) kfree_skb(skb, FREE_WRITE); else ipmr_queue_xmit(skb, &vif_table[psend], skb->dev, is_frag); } /* * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif */ int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dummy) { struct vif_device *vif; int len=0; off_t pos=0; off_t begin=0; int size; int ct; len += sprintf(buffer, "Interface Bytes In Pkts In Bytes Out Pkts Out Flags Local Remote\n"); pos=len; for (ct=0;ct<MAXVIFS;ct++) { vif=&vif_table[ct]; if(!(vifc_map&(1<<ct))) continue; if(vif->dev==NULL) continue; size = sprintf(buffer+len, "%-10s %8ld %7ld %8ld %7ld %05X %08lX %08lX\n", vif->dev->name,vif->bytes_in, vif->pkt_in, vif->bytes_out,vif->pkt_out, vif->flags, vif->local, vif->remote); len+=size; pos+=size; if(pos<offset) { len=0; begin=pos; } if(pos>offset+length) break; } *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) len=length; return len; } int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dummy) { struct mfc_cache *mfc; int len=0; off_t pos=0; off_t begin=0; int size; int ct; len += sprintf(buffer, "Group Origin SrcIface VifTtls\n"); pos=len; for (ct=0;ct<MFC_LINES+1;ct++) { cli(); mfc=mfc_cache_array[ct]; while(mfc!=NULL) { char *name="none"; int n; /* * Device name */ if(mfc->mfc_parent < MAXVIFS && vifc_map&(1<<mfc->mfc_parent)) name=vif_table[mfc->mfc_parent].dev->name; size = sprintf(buffer+len, "%08lX %08lX %-8s", (unsigned long)mfc->mfc_mcastgrp, (unsigned long)mfc->mfc_origin, name); for(n=0;n<MAXVIFS;n++) { if(vifc_map&(1<<n)) size+=sprintf(buffer+len+size, " %-3d", mfc->mfc_ttls[n]); else size+=sprintf(buffer+len+size, " --- "); } size+=sprintf(buffer+len+size,"\n"); len+=size; pos+=size; if(pos<offset) { len=0; begin=pos; } if(pos>offset+length) { sti(); goto done; } mfc=mfc->next; } sti(); } done: *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) len=length; return len; } /* * Setup for IP multicast routing */ #ifdef CONFIG_PROC_FS static struct proc_dir_entry pde1 = { PROC_NET_IPMR_VIF, 9 ,"ip_mr_vif", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, ipmr_vif_info }; static struct proc_net_register pde2 = { PROC_NET_IPMR_MFC, 11 ,"ip_mr_cache", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, ipmr_mfc_info }; #endif void ip_mr_init(void) { printk(KERN_INFO "Linux IP multicast router 0.07.\n"); register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS proc_net_register(&pde1); proc_net_register(&pde2); #endif }