OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [ipv4/] [ipmr.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *      IP multicast routing support for mrouted 3.6/3.8
3
 *
4
 *              (c) 1995 Alan Cox, <alan@redhat.com>
5
 *        Linux Consultancy and Custom Driver Development
6
 *
7
 *      This program is free software; you can redistribute it and/or
8
 *      modify it under the terms of the GNU General Public License
9
 *      as published by the Free Software Foundation; either version
10
 *      2 of the License, or (at your option) any later version.
11
 *
12
 *      Version: $Id: ipmr.c,v 1.1.1.1 2004-04-15 01:13:51 phoenix Exp $
13
 *
14
 *      Fixes:
15
 *      Michael Chastain        :       Incorrect size of copying.
16
 *      Alan Cox                :       Added the cache manager code
17
 *      Alan Cox                :       Fixed the clone/copy bug and device race.
18
 *      Mike McLagan            :       Routing by source
19
 *      Malcolm Beattie         :       Buffer handling fixes.
20
 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
21
 *      SVR Anand               :       Fixed several multicast bugs and problems.
22
 *      Alexey Kuznetsov        :       Status, optimisations and more.
23
 *      Brad Parker             :       Better behaviour on mrouted upcall
24
 *                                      overflow.
25
 *      Carlos Picoto           :       PIMv1 Support
26
 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
27
 *                                      Relax this requrement to work with older peers.
28
 *
29
 */
30
 
31
#include <linux/config.h>
32
#include <asm/system.h>
33
#include <asm/uaccess.h>
34
#include <linux/types.h>
35
#include <linux/sched.h>
36
#include <linux/errno.h>
37
#include <linux/timer.h>
38
#include <linux/mm.h>
39
#include <linux/kernel.h>
40
#include <linux/fcntl.h>
41
#include <linux/stat.h>
42
#include <linux/socket.h>
43
#include <linux/in.h>
44
#include <linux/inet.h>
45
#include <linux/netdevice.h>
46
#include <linux/inetdevice.h>
47
#include <linux/igmp.h>
48
#include <linux/proc_fs.h>
49
#include <linux/mroute.h>
50
#include <linux/init.h>
51
#include <net/ip.h>
52
#include <net/protocol.h>
53
#include <linux/skbuff.h>
54
#include <net/sock.h>
55
#include <net/icmp.h>
56
#include <net/udp.h>
57
#include <net/raw.h>
58
#include <linux/notifier.h>
59
#include <linux/if_arp.h>
60
#include <linux/netfilter_ipv4.h>
61
#include <net/ipip.h>
62
#include <net/checksum.h>
63
 
64
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65
#define CONFIG_IP_PIMSM 1
66
#endif
67
 
68
static struct sock *mroute_socket;
69
 
70
 
71
/* Big lock, protecting vif table, mrt cache and mroute socket state.
72
   Note that the changes are semaphored via rtnl_lock.
73
 */
74
 
75
static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76
 
77
/*
78
 *      Multicast router control variables
79
 */
80
 
81
static struct vif_device vif_table[MAXVIFS];            /* Devices              */
82
static int maxvif;
83
 
84
#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85
 
86
int mroute_do_assert;                                   /* Set in PIM assert    */
87
int mroute_do_pim;
88
 
89
static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
90
 
91
static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
92
atomic_t cache_resolve_queue_len;                       /* Size of unresolved   */
93
 
94
/* Special spinlock for queue of unresolved entries */
95
static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96
 
97
/* We return to original Alan's scheme. Hash table of resolved
98
   entries is changed only in process context and protected
99
   with weak lock mrt_lock. Queue of unresolved entries is protected
100
   with strong spinlock mfc_unres_lock.
101
 
102
   In this case data path is free of exclusive locks at all.
103
 */
104
 
105
kmem_cache_t *mrt_cachep;
106
 
107
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110
 
111
extern struct inet_protocol pim_protocol;
112
 
113
static struct timer_list ipmr_expire_timer;
114
 
115
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116
 
117
static
118
struct net_device *ipmr_new_tunnel(struct vifctl *v)
119
{
120
        struct net_device  *dev;
121
 
122
        dev = __dev_get_by_name("tunl0");
123
 
124
        if (dev) {
125
                int err;
126
                struct ifreq ifr;
127
                mm_segment_t    oldfs;
128
                struct ip_tunnel_parm p;
129
                struct in_device  *in_dev;
130
 
131
                memset(&p, 0, sizeof(p));
132
                p.iph.daddr = v->vifc_rmt_addr.s_addr;
133
                p.iph.saddr = v->vifc_lcl_addr.s_addr;
134
                p.iph.version = 4;
135
                p.iph.ihl = 5;
136
                p.iph.protocol = IPPROTO_IPIP;
137
                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138
                ifr.ifr_ifru.ifru_data = (void*)&p;
139
 
140
                oldfs = get_fs(); set_fs(KERNEL_DS);
141
                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142
                set_fs(oldfs);
143
 
144
                dev = NULL;
145
 
146
                if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147
                        dev->flags |= IFF_MULTICAST;
148
 
149
                        in_dev = __in_dev_get(dev);
150
                        if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151
                                goto failure;
152
                        in_dev->cnf.rp_filter = 0;
153
 
154
                        if (dev_open(dev))
155
                                goto failure;
156
                }
157
        }
158
        return dev;
159
 
160
failure:
161
        unregister_netdevice(dev);
162
        return NULL;
163
}
164
 
165
#ifdef CONFIG_IP_PIMSM
166
 
167
static int reg_vif_num = -1;
168
 
169
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170
{
171
        read_lock(&mrt_lock);
172
        ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173
        ((struct net_device_stats*)dev->priv)->tx_packets++;
174
        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175
        read_unlock(&mrt_lock);
176
        kfree_skb(skb);
177
        return 0;
178
}
179
 
180
static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181
{
182
        return (struct net_device_stats*)dev->priv;
183
}
184
 
185
static
186
struct net_device *ipmr_reg_vif(struct vifctl *v)
187
{
188
        struct net_device  *dev;
189
        struct in_device *in_dev;
190
        int size;
191
 
192
        size = sizeof(*dev) + sizeof(struct net_device_stats);
193
        dev = kmalloc(size, GFP_KERNEL);
194
        if (!dev)
195
                return NULL;
196
 
197
        memset(dev, 0, size);
198
 
199
        dev->priv = dev + 1;
200
 
201
        strcpy(dev->name, "pimreg");
202
 
203
        dev->type               = ARPHRD_PIMREG;
204
        dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
205
        dev->flags              = IFF_NOARP;
206
        dev->hard_start_xmit    = reg_vif_xmit;
207
        dev->get_stats          = reg_vif_get_stats;
208
        dev->features           |= NETIF_F_DYNALLOC;
209
 
210
        if (register_netdevice(dev)) {
211
                kfree(dev);
212
                return NULL;
213
        }
214
        dev->iflink = 0;
215
 
216
        if ((in_dev = inetdev_init(dev)) == NULL)
217
                goto failure;
218
 
219
        in_dev->cnf.rp_filter = 0;
220
 
221
        if (dev_open(dev))
222
                goto failure;
223
 
224
        return dev;
225
 
226
failure:
227
        unregister_netdevice(dev);
228
        return NULL;
229
}
230
#endif
231
 
232
/*
233
 *      Delete a VIF entry
234
 */
235
 
236
static int vif_delete(int vifi)
237
{
238
        struct vif_device *v;
239
        struct net_device *dev;
240
        struct in_device *in_dev;
241
 
242
        if (vifi < 0 || vifi >= maxvif)
243
                return -EADDRNOTAVAIL;
244
 
245
        v = &vif_table[vifi];
246
 
247
        write_lock_bh(&mrt_lock);
248
        dev = v->dev;
249
        v->dev = NULL;
250
 
251
        if (!dev) {
252
                write_unlock_bh(&mrt_lock);
253
                return -EADDRNOTAVAIL;
254
        }
255
 
256
#ifdef CONFIG_IP_PIMSM
257
        if (vifi == reg_vif_num)
258
                reg_vif_num = -1;
259
#endif
260
 
261
        if (vifi+1 == maxvif) {
262
                int tmp;
263
                for (tmp=vifi-1; tmp>=0; tmp--) {
264
                        if (VIF_EXISTS(tmp))
265
                                break;
266
                }
267
                maxvif = tmp+1;
268
        }
269
 
270
        write_unlock_bh(&mrt_lock);
271
 
272
        dev_set_allmulti(dev, -1);
273
 
274
        if ((in_dev = __in_dev_get(dev)) != NULL) {
275
                in_dev->cnf.mc_forwarding--;
276
                ip_rt_multicast_event(in_dev);
277
        }
278
 
279
        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280
                unregister_netdevice(dev);
281
 
282
        dev_put(dev);
283
        return 0;
284
}
285
 
286
/* Destroy an unresolved cache entry, killing queued skbs
287
   and reporting error to netlink readers.
288
 */
289
 
290
static void ipmr_destroy_unres(struct mfc_cache *c)
291
{
292
        struct sk_buff *skb;
293
 
294
        atomic_dec(&cache_resolve_queue_len);
295
 
296
        while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297
                if (skb->nh.iph->version == 0) {
298
                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
299
                        nlh->nlmsg_type = NLMSG_ERROR;
300
                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
301
                        skb_trim(skb, nlh->nlmsg_len);
302
                        ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
303
                        netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
304
                } else
305
                        kfree_skb(skb);
306
        }
307
 
308
        kmem_cache_free(mrt_cachep, c);
309
}
310
 
311
 
312
/* Single timer process for all the unresolved queue. */
313
 
314
void ipmr_expire_process(unsigned long dummy)
315
{
316
        unsigned long now;
317
        unsigned long expires;
318
        struct mfc_cache *c, **cp;
319
 
320
        if (!spin_trylock(&mfc_unres_lock)) {
321
                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
322
                return;
323
        }
324
 
325
        if (atomic_read(&cache_resolve_queue_len) == 0)
326
                goto out;
327
 
328
        now = jiffies;
329
        expires = 10*HZ;
330
        cp = &mfc_unres_queue;
331
 
332
        while ((c=*cp) != NULL) {
333
                long interval = c->mfc_un.unres.expires - now;
334
 
335
                if (interval > 0) {
336
                        if (interval < expires)
337
                                expires = interval;
338
                        cp = &c->next;
339
                        continue;
340
                }
341
 
342
                *cp = c->next;
343
 
344
                ipmr_destroy_unres(c);
345
        }
346
 
347
        if (atomic_read(&cache_resolve_queue_len))
348
                mod_timer(&ipmr_expire_timer, jiffies + expires);
349
 
350
out:
351
        spin_unlock(&mfc_unres_lock);
352
}
353
 
354
/* Fill oifs list. It is called under write locked mrt_lock. */
355
 
356
static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
357
{
358
        int vifi;
359
 
360
        cache->mfc_un.res.minvif = MAXVIFS;
361
        cache->mfc_un.res.maxvif = 0;
362
        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
363
 
364
        for (vifi=0; vifi<maxvif; vifi++) {
365
                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
366
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
367
                        if (cache->mfc_un.res.minvif > vifi)
368
                                cache->mfc_un.res.minvif = vifi;
369
                        if (cache->mfc_un.res.maxvif <= vifi)
370
                                cache->mfc_un.res.maxvif = vifi + 1;
371
                }
372
        }
373
}
374
 
375
static int vif_add(struct vifctl *vifc, int mrtsock)
376
{
377
        int vifi = vifc->vifc_vifi;
378
        struct vif_device *v = &vif_table[vifi];
379
        struct net_device *dev;
380
        struct in_device *in_dev;
381
 
382
        /* Is vif busy ? */
383
        if (VIF_EXISTS(vifi))
384
                return -EADDRINUSE;
385
 
386
        switch (vifc->vifc_flags) {
387
#ifdef CONFIG_IP_PIMSM
388
        case VIFF_REGISTER:
389
                /*
390
                 * Special Purpose VIF in PIM
391
                 * All the packets will be sent to the daemon
392
                 */
393
                if (reg_vif_num >= 0)
394
                        return -EADDRINUSE;
395
                dev = ipmr_reg_vif(vifc);
396
                if (!dev)
397
                        return -ENOBUFS;
398
                break;
399
#endif
400
        case VIFF_TUNNEL:
401
                dev = ipmr_new_tunnel(vifc);
402
                if (!dev)
403
                        return -ENOBUFS;
404
                break;
405
        case 0:
406
                dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
407
                if (!dev)
408
                        return -EADDRNOTAVAIL;
409
                __dev_put(dev);
410
                break;
411
        default:
412
                return -EINVAL;
413
        }
414
 
415
        if ((in_dev = __in_dev_get(dev)) == NULL)
416
                return -EADDRNOTAVAIL;
417
        in_dev->cnf.mc_forwarding++;
418
        dev_set_allmulti(dev, +1);
419
        ip_rt_multicast_event(in_dev);
420
 
421
        /*
422
         *      Fill in the VIF structures
423
         */
424
        v->rate_limit=vifc->vifc_rate_limit;
425
        v->local=vifc->vifc_lcl_addr.s_addr;
426
        v->remote=vifc->vifc_rmt_addr.s_addr;
427
        v->flags=vifc->vifc_flags;
428
        if (!mrtsock)
429
                v->flags |= VIFF_STATIC;
430
        v->threshold=vifc->vifc_threshold;
431
        v->bytes_in = 0;
432
        v->bytes_out = 0;
433
        v->pkt_in = 0;
434
        v->pkt_out = 0;
435
        v->link = dev->ifindex;
436
        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
437
                v->link = dev->iflink;
438
 
439
        /* And finish update writing critical data */
440
        write_lock_bh(&mrt_lock);
441
        dev_hold(dev);
442
        v->dev=dev;
443
#ifdef CONFIG_IP_PIMSM
444
        if (v->flags&VIFF_REGISTER)
445
                reg_vif_num = vifi;
446
#endif
447
        if (vifi+1 > maxvif)
448
                maxvif = vifi+1;
449
        write_unlock_bh(&mrt_lock);
450
        return 0;
451
}
452
 
453
static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
454
{
455
        int line=MFC_HASH(mcastgrp,origin);
456
        struct mfc_cache *c;
457
 
458
        for (c=mfc_cache_array[line]; c; c = c->next) {
459
                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
460
                        break;
461
        }
462
        return c;
463
}
464
 
465
/*
466
 *      Allocate a multicast cache entry
467
 */
468
static struct mfc_cache *ipmr_cache_alloc(void)
469
{
470
        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
471
        if(c==NULL)
472
                return NULL;
473
        memset(c, 0, sizeof(*c));
474
        c->mfc_un.res.minvif = MAXVIFS;
475
        return c;
476
}
477
 
478
static struct mfc_cache *ipmr_cache_alloc_unres(void)
479
{
480
        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
481
        if(c==NULL)
482
                return NULL;
483
        memset(c, 0, sizeof(*c));
484
        skb_queue_head_init(&c->mfc_un.unres.unresolved);
485
        c->mfc_un.unres.expires = jiffies + 10*HZ;
486
        return c;
487
}
488
 
489
/*
490
 *      A cache entry has gone into a resolved state from queued
491
 */
492
 
493
static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
494
{
495
        struct sk_buff *skb;
496
 
497
        /*
498
         *      Play the pending entries through our router
499
         */
500
 
501
        while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
502
                if (skb->nh.iph->version == 0) {
503
                        int err;
504
                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
505
 
506
                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
507
                                nlh->nlmsg_len = skb->tail - (u8*)nlh;
508
                        } else {
509
                                nlh->nlmsg_type = NLMSG_ERROR;
510
                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
511
                                skb_trim(skb, nlh->nlmsg_len);
512
                                ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
513
                        }
514
                        err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
515
                } else
516
                        ip_mr_forward(skb, c, 0);
517
        }
518
}
519
 
520
/*
521
 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
522
 *      expects the following bizarre scheme.
523
 *
524
 *      Called under mrt_lock.
525
 */
526
 
527
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
528
{
529
        struct sk_buff *skb;
530
        int ihl = pkt->nh.iph->ihl<<2;
531
        struct igmphdr *igmp;
532
        struct igmpmsg *msg;
533
        int ret;
534
 
535
#ifdef CONFIG_IP_PIMSM
536
        if (assert == IGMPMSG_WHOLEPKT)
537
                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
538
        else
539
#endif
540
                skb = alloc_skb(128, GFP_ATOMIC);
541
 
542
        if(!skb)
543
                return -ENOBUFS;
544
 
545
#ifdef CONFIG_IP_PIMSM
546
        if (assert == IGMPMSG_WHOLEPKT) {
547
                /* Ugly, but we have no choice with this interface.
548
                   Duplicate old header, fix ihl, length etc.
549
                   And all this only to mangle msg->im_msgtype and
550
                   to set msg->im_mbz to "mbz" :-)
551
                 */
552
                msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
553
                skb->nh.raw = skb->h.raw = (u8*)msg;
554
                memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
555
                msg->im_msgtype = IGMPMSG_WHOLEPKT;
556
                msg->im_mbz = 0;
557
                msg->im_vif = reg_vif_num;
558
                skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
559
                skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
560
        } else
561
#endif
562
        {
563
 
564
        /*
565
         *      Copy the IP header
566
         */
567
 
568
        skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
569
        memcpy(skb->data,pkt->data,ihl);
570
        skb->nh.iph->protocol = 0;                       /* Flag to the kernel this is a route add */
571
        msg = (struct igmpmsg*)skb->nh.iph;
572
        msg->im_vif = vifi;
573
        skb->dst = dst_clone(pkt->dst);
574
 
575
        /*
576
         *      Add our header
577
         */
578
 
579
        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
580
        igmp->type      =
581
        msg->im_msgtype = assert;
582
        igmp->code      =       0;
583
        skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
584
        skb->h.raw = skb->nh.raw;
585
        }
586
 
587
        if (mroute_socket == NULL) {
588
                kfree_skb(skb);
589
                return -EINVAL;
590
        }
591
 
592
        /*
593
         *      Deliver to mrouted
594
         */
595
        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
596
                if (net_ratelimit())
597
                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
598
                kfree_skb(skb);
599
        }
600
 
601
        return ret;
602
}
603
 
604
/*
605
 *      Queue a packet for resolution. It gets locked cache entry!
606
 */
607
 
608
static int
609
ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
610
{
611
        int err;
612
        struct mfc_cache *c;
613
 
614
        spin_lock_bh(&mfc_unres_lock);
615
        for (c=mfc_unres_queue; c; c=c->next) {
616
                if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
617
                    c->mfc_origin == skb->nh.iph->saddr)
618
                        break;
619
        }
620
 
621
        if (c == NULL) {
622
                /*
623
                 *      Create a new entry if allowable
624
                 */
625
 
626
                if (atomic_read(&cache_resolve_queue_len)>=10 ||
627
                    (c=ipmr_cache_alloc_unres())==NULL) {
628
                        spin_unlock_bh(&mfc_unres_lock);
629
 
630
                        kfree_skb(skb);
631
                        return -ENOBUFS;
632
                }
633
 
634
                /*
635
                 *      Fill in the new cache entry
636
                 */
637
                c->mfc_parent=-1;
638
                c->mfc_origin=skb->nh.iph->saddr;
639
                c->mfc_mcastgrp=skb->nh.iph->daddr;
640
 
641
                /*
642
                 *      Reflect first query at mrouted.
643
                 */
644
                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
645
                        /* If the report failed throw the cache entry
646
                           out - Brad Parker
647
                         */
648
                        spin_unlock_bh(&mfc_unres_lock);
649
 
650
                        kmem_cache_free(mrt_cachep, c);
651
                        kfree_skb(skb);
652
                        return err;
653
                }
654
 
655
                atomic_inc(&cache_resolve_queue_len);
656
                c->next = mfc_unres_queue;
657
                mfc_unres_queue = c;
658
 
659
                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
660
        }
661
 
662
        /*
663
         *      See if we can append the packet
664
         */
665
        if (c->mfc_un.unres.unresolved.qlen>3) {
666
                kfree_skb(skb);
667
                err = -ENOBUFS;
668
        } else {
669
                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
670
                err = 0;
671
        }
672
 
673
        spin_unlock_bh(&mfc_unres_lock);
674
        return err;
675
}
676
 
677
/*
678
 *      MFC cache manipulation by user space mroute daemon
679
 */
680
 
681
int ipmr_mfc_delete(struct mfcctl *mfc)
682
{
683
        int line;
684
        struct mfc_cache *c, **cp;
685
 
686
        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
687
 
688
        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
689
                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
690
                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
691
                        write_lock_bh(&mrt_lock);
692
                        *cp = c->next;
693
                        write_unlock_bh(&mrt_lock);
694
 
695
                        kmem_cache_free(mrt_cachep, c);
696
                        return 0;
697
                }
698
        }
699
        return -ENOENT;
700
}
701
 
702
int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
703
{
704
        int line;
705
        struct mfc_cache *uc, *c, **cp;
706
 
707
        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
708
 
709
        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
710
                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
711
                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
712
                        break;
713
        }
714
 
715
        if (c != NULL) {
716
                write_lock_bh(&mrt_lock);
717
                c->mfc_parent = mfc->mfcc_parent;
718
                ipmr_update_threshoulds(c, mfc->mfcc_ttls);
719
                if (!mrtsock)
720
                        c->mfc_flags |= MFC_STATIC;
721
                write_unlock_bh(&mrt_lock);
722
                return 0;
723
        }
724
 
725
        if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
726
                return -EINVAL;
727
 
728
        c=ipmr_cache_alloc();
729
        if (c==NULL)
730
                return -ENOMEM;
731
 
732
        c->mfc_origin=mfc->mfcc_origin.s_addr;
733
        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
734
        c->mfc_parent=mfc->mfcc_parent;
735
        ipmr_update_threshoulds(c, mfc->mfcc_ttls);
736
        if (!mrtsock)
737
                c->mfc_flags |= MFC_STATIC;
738
 
739
        write_lock_bh(&mrt_lock);
740
        c->next = mfc_cache_array[line];
741
        mfc_cache_array[line] = c;
742
        write_unlock_bh(&mrt_lock);
743
 
744
        /*
745
         *      Check to see if we resolved a queued list. If so we
746
         *      need to send on the frames and tidy up.
747
         */
748
        spin_lock_bh(&mfc_unres_lock);
749
        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
750
             cp = &uc->next) {
751
                if (uc->mfc_origin == c->mfc_origin &&
752
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
753
                        *cp = uc->next;
754
                        if (atomic_dec_and_test(&cache_resolve_queue_len))
755
                                del_timer(&ipmr_expire_timer);
756
                        break;
757
                }
758
        }
759
        spin_unlock_bh(&mfc_unres_lock);
760
 
761
        if (uc) {
762
                ipmr_cache_resolve(uc, c);
763
                kmem_cache_free(mrt_cachep, uc);
764
        }
765
        return 0;
766
}
767
 
768
/*
769
 *      Close the multicast socket, and clear the vif tables etc
770
 */
771
 
772
static void mroute_clean_tables(struct sock *sk)
773
{
774
        int i;
775
 
776
        /*
777
         *      Shut down all active vif entries
778
         */
779
        for(i=0; i<maxvif; i++) {
780
                if (!(vif_table[i].flags&VIFF_STATIC))
781
                        vif_delete(i);
782
        }
783
 
784
        /*
785
         *      Wipe the cache
786
         */
787
        for (i=0;i<MFC_LINES;i++) {
788
                struct mfc_cache *c, **cp;
789
 
790
                cp = &mfc_cache_array[i];
791
                while ((c = *cp) != NULL) {
792
                        if (c->mfc_flags&MFC_STATIC) {
793
                                cp = &c->next;
794
                                continue;
795
                        }
796
                        write_lock_bh(&mrt_lock);
797
                        *cp = c->next;
798
                        write_unlock_bh(&mrt_lock);
799
 
800
                        kmem_cache_free(mrt_cachep, c);
801
                }
802
        }
803
 
804
        if (atomic_read(&cache_resolve_queue_len) != 0) {
805
                struct mfc_cache *c;
806
 
807
                spin_lock_bh(&mfc_unres_lock);
808
                while (mfc_unres_queue != NULL) {
809
                        c = mfc_unres_queue;
810
                        mfc_unres_queue = c->next;
811
                        spin_unlock_bh(&mfc_unres_lock);
812
 
813
                        ipmr_destroy_unres(c);
814
 
815
                        spin_lock_bh(&mfc_unres_lock);
816
                }
817
                spin_unlock_bh(&mfc_unres_lock);
818
        }
819
}
820
 
821
static void mrtsock_destruct(struct sock *sk)
822
{
823
        rtnl_lock();
824
        if (sk == mroute_socket) {
825
                ipv4_devconf.mc_forwarding--;
826
 
827
                write_lock_bh(&mrt_lock);
828
                mroute_socket=NULL;
829
                write_unlock_bh(&mrt_lock);
830
 
831
                mroute_clean_tables(sk);
832
        }
833
        rtnl_unlock();
834
}
835
 
836
/*
837
 *      Socket options and virtual interface manipulation. The whole
838
 *      virtual interface system is a complete heap, but unfortunately
839
 *      that's how BSD mrouted happens to think. Maybe one day with a proper
840
 *      MOSPF/PIM router set up we can clean this up.
841
 */
842
 
843
int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
844
{
845
        int ret;
846
        struct vifctl vif;
847
        struct mfcctl mfc;
848
 
849
        if(optname!=MRT_INIT)
850
        {
851
                if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
852
                        return -EACCES;
853
        }
854
 
855
        switch(optname)
856
        {
857
                case MRT_INIT:
858
                        if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
859
                                return -EOPNOTSUPP;
860
                        if(optlen!=sizeof(int))
861
                                return -ENOPROTOOPT;
862
 
863
                        rtnl_lock();
864
                        if (mroute_socket) {
865
                                rtnl_unlock();
866
                                return -EADDRINUSE;
867
                        }
868
 
869
                        ret = ip_ra_control(sk, 1, mrtsock_destruct);
870
                        if (ret == 0) {
871
                                write_lock_bh(&mrt_lock);
872
                                mroute_socket=sk;
873
                                write_unlock_bh(&mrt_lock);
874
 
875
                                ipv4_devconf.mc_forwarding++;
876
                        }
877
                        rtnl_unlock();
878
                        return ret;
879
                case MRT_DONE:
880
                        if (sk!=mroute_socket)
881
                                return -EACCES;
882
                        return ip_ra_control(sk, 0, NULL);
883
                case MRT_ADD_VIF:
884
                case MRT_DEL_VIF:
885
                        if(optlen!=sizeof(vif))
886
                                return -EINVAL;
887
                        if (copy_from_user(&vif,optval,sizeof(vif)))
888
                                return -EFAULT;
889
                        if(vif.vifc_vifi >= MAXVIFS)
890
                                return -ENFILE;
891
                        rtnl_lock();
892
                        if (optname==MRT_ADD_VIF) {
893
                                ret = vif_add(&vif, sk==mroute_socket);
894
                        } else {
895
                                ret = vif_delete(vif.vifc_vifi);
896
                        }
897
                        rtnl_unlock();
898
                        return ret;
899
 
900
                /*
901
                 *      Manipulate the forwarding caches. These live
902
                 *      in a sort of kernel/user symbiosis.
903
                 */
904
                case MRT_ADD_MFC:
905
                case MRT_DEL_MFC:
906
                        if(optlen!=sizeof(mfc))
907
                                return -EINVAL;
908
                        if (copy_from_user(&mfc,optval, sizeof(mfc)))
909
                                return -EFAULT;
910
                        rtnl_lock();
911
                        if (optname==MRT_DEL_MFC)
912
                                ret = ipmr_mfc_delete(&mfc);
913
                        else
914
                                ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
915
                        rtnl_unlock();
916
                        return ret;
917
                /*
918
                 *      Control PIM assert.
919
                 */
920
                case MRT_ASSERT:
921
                {
922
                        int v;
923
                        if(get_user(v,(int *)optval))
924
                                return -EFAULT;
925
                        mroute_do_assert=(v)?1:0;
926
                        return 0;
927
                }
928
#ifdef CONFIG_IP_PIMSM
929
                case MRT_PIM:
930
                {
931
                        int v;
932
                        if(get_user(v,(int *)optval))
933
                                return -EFAULT;
934
                        v = (v)?1:0;
935
                        rtnl_lock();
936
                        if (v != mroute_do_pim) {
937
                                mroute_do_pim = v;
938
                                mroute_do_assert = v;
939
#ifdef CONFIG_IP_PIMSM_V2
940
                                if (mroute_do_pim)
941
                                        inet_add_protocol(&pim_protocol);
942
                                else
943
                                        inet_del_protocol(&pim_protocol);
944
#endif
945
                        }
946
                        rtnl_unlock();
947
                        return 0;
948
                }
949
#endif
950
                /*
951
                 *      Spurious command, or MRT_VERSION which you cannot
952
                 *      set.
953
                 */
954
                default:
955
                        return -ENOPROTOOPT;
956
        }
957
}
958
 
959
/*
960
 *      Getsock opt support for the multicast routing system.
961
 */
962
 
963
int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
964
{
965
        int olr;
966
        int val;
967
 
968
        if(optname!=MRT_VERSION &&
969
#ifdef CONFIG_IP_PIMSM
970
           optname!=MRT_PIM &&
971
#endif
972
           optname!=MRT_ASSERT)
973
                return -ENOPROTOOPT;
974
 
975
        if (get_user(olr, optlen))
976
                return -EFAULT;
977
 
978
        olr = min_t(unsigned int, olr, sizeof(int));
979
        if (olr < 0)
980
                return -EINVAL;
981
 
982
        if(put_user(olr,optlen))
983
                return -EFAULT;
984
        if(optname==MRT_VERSION)
985
                val=0x0305;
986
#ifdef CONFIG_IP_PIMSM
987
        else if(optname==MRT_PIM)
988
                val=mroute_do_pim;
989
#endif
990
        else
991
                val=mroute_do_assert;
992
        if(copy_to_user(optval,&val,olr))
993
                return -EFAULT;
994
        return 0;
995
}
996
 
997
/*
998
 *      The IP multicast ioctl support routines.
999
 */
1000
 
1001
int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1002
{
1003
        struct sioc_sg_req sr;
1004
        struct sioc_vif_req vr;
1005
        struct vif_device *vif;
1006
        struct mfc_cache *c;
1007
 
1008
        switch(cmd)
1009
        {
1010
                case SIOCGETVIFCNT:
1011
                        if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1012
                                return -EFAULT;
1013
                        if(vr.vifi>=maxvif)
1014
                                return -EINVAL;
1015
                        read_lock(&mrt_lock);
1016
                        vif=&vif_table[vr.vifi];
1017
                        if(VIF_EXISTS(vr.vifi)) {
1018
                                vr.icount=vif->pkt_in;
1019
                                vr.ocount=vif->pkt_out;
1020
                                vr.ibytes=vif->bytes_in;
1021
                                vr.obytes=vif->bytes_out;
1022
                                read_unlock(&mrt_lock);
1023
 
1024
                                if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1025
                                        return -EFAULT;
1026
                                return 0;
1027
                        }
1028
                        read_unlock(&mrt_lock);
1029
                        return -EADDRNOTAVAIL;
1030
                case SIOCGETSGCNT:
1031
                        if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1032
                                return -EFAULT;
1033
 
1034
                        read_lock(&mrt_lock);
1035
                        c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1036
                        if (c) {
1037
                                sr.pktcnt = c->mfc_un.res.pkt;
1038
                                sr.bytecnt = c->mfc_un.res.bytes;
1039
                                sr.wrong_if = c->mfc_un.res.wrong_if;
1040
                                read_unlock(&mrt_lock);
1041
 
1042
                                if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1043
                                        return -EFAULT;
1044
                                return 0;
1045
                        }
1046
                        read_unlock(&mrt_lock);
1047
                        return -EADDRNOTAVAIL;
1048
                default:
1049
                        return -ENOIOCTLCMD;
1050
        }
1051
}
1052
 
1053
 
1054
static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1055
{
1056
        struct vif_device *v;
1057
        int ct;
1058
        if (event != NETDEV_UNREGISTER)
1059
                return NOTIFY_DONE;
1060
        v=&vif_table[0];
1061
        for(ct=0;ct<maxvif;ct++,v++) {
1062
                if (v->dev==ptr)
1063
                        vif_delete(ct);
1064
        }
1065
        return NOTIFY_DONE;
1066
}
1067
 
1068
 
1069
static struct notifier_block ip_mr_notifier={
1070
        ipmr_device_event,
1071
        NULL,
1072
 
1073
};
1074
 
1075
/*
1076
 *      Encapsulate a packet by attaching a valid IPIP header to it.
1077
 *      This avoids tunnel drivers and other mess and gives us the speed so
1078
 *      important for multicast video.
1079
 */
1080
 
1081
static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1082
{
1083
        struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1084
 
1085
        iph->version    =       4;
1086
        iph->tos        =       skb->nh.iph->tos;
1087
        iph->ttl        =       skb->nh.iph->ttl;
1088
        iph->frag_off   =       0;
1089
        iph->daddr      =       daddr;
1090
        iph->saddr      =       saddr;
1091
        iph->protocol   =       IPPROTO_IPIP;
1092
        iph->ihl        =       5;
1093
        iph->tot_len    =       htons(skb->len);
1094
        ip_select_ident(iph, skb->dst, NULL);
1095
        ip_send_check(iph);
1096
 
1097
        skb->h.ipiph = skb->nh.iph;
1098
        skb->nh.iph = iph;
1099
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1100
#ifdef CONFIG_NETFILTER
1101
        nf_conntrack_put(skb->nfct);
1102
        skb->nfct = NULL;
1103
#endif
1104
}
1105
 
1106
static inline int ipmr_forward_finish(struct sk_buff *skb)
1107
{
1108
        struct ip_options *opt = &(IPCB(skb)->opt);
1109
        struct dst_entry *dst = skb->dst;
1110
 
1111
        if (unlikely(opt->optlen))
1112
                ip_forward_options(skb);
1113
 
1114
        if (skb->len <= dst->pmtu)
1115
                return dst->output(skb);
1116
        else
1117
                return ip_fragment(skb, dst->output);
1118
}
1119
 
1120
/*
1121
 *      Processing handlers for ipmr_forward
1122
 */
1123
 
1124
static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1125
                           int vifi, int last)
1126
{
1127
        struct iphdr *iph = skb->nh.iph;
1128
        struct vif_device *vif = &vif_table[vifi];
1129
        struct net_device *dev;
1130
        struct rtable *rt;
1131
        int    encap = 0;
1132
        struct sk_buff *skb2;
1133
 
1134
        if (vif->dev == NULL)
1135
                return;
1136
 
1137
#ifdef CONFIG_IP_PIMSM
1138
        if (vif->flags & VIFF_REGISTER) {
1139
                vif->pkt_out++;
1140
                vif->bytes_out+=skb->len;
1141
                ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1142
                ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1143
                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1144
                return;
1145
        }
1146
#endif
1147
 
1148
        if (vif->flags&VIFF_TUNNEL) {
1149
                if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1150
                        return;
1151
                encap = sizeof(struct iphdr);
1152
        } else {
1153
                if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1154
                        return;
1155
        }
1156
 
1157
        dev = rt->u.dst.dev;
1158
 
1159
        if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1160
                /* Do not fragment multicasts. Alas, IPv4 does not
1161
                   allow to send ICMP, so that packets will disappear
1162
                   to blackhole.
1163
                 */
1164
 
1165
                IP_INC_STATS_BH(IpFragFails);
1166
                ip_rt_put(rt);
1167
                return;
1168
        }
1169
 
1170
        encap += dev->hard_header_len;
1171
 
1172
        if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1173
                skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1174
        else if (atomic_read(&skb->users) != 1)
1175
                skb2 = skb_clone(skb, GFP_ATOMIC);
1176
        else {
1177
                atomic_inc(&skb->users);
1178
                skb2 = skb;
1179
        }
1180
 
1181
        if (skb2 == NULL) {
1182
                ip_rt_put(rt);
1183
                return;
1184
        }
1185
 
1186
        vif->pkt_out++;
1187
        vif->bytes_out+=skb->len;
1188
 
1189
        dst_release(skb2->dst);
1190
        skb2->dst = &rt->u.dst;
1191
        iph = skb2->nh.iph;
1192
        ip_decrease_ttl(iph);
1193
 
1194
        /* FIXME: forward and output firewalls used to be called here.
1195
         * What do we do with netfilter? -- RR */
1196
        if (vif->flags & VIFF_TUNNEL) {
1197
                ip_encap(skb2, vif->local, vif->remote);
1198
                /* FIXME: extra output firewall step used to be here. --RR */
1199
                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1200
                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1201
        }
1202
 
1203
        IPCB(skb2)->flags |= IPSKB_FORWARDED;
1204
 
1205
        /*
1206
         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1207
         * not only before forwarding, but after forwarding on all output
1208
         * interfaces. It is clear, if mrouter runs a multicasting
1209
         * program, it should receive packets not depending to what interface
1210
         * program is joined.
1211
         * If we will not make it, the program will have to join on all
1212
         * interfaces. On the other hand, multihoming host (or router, but
1213
         * not mrouter) cannot join to more than one interface - it will
1214
         * result in receiving multiple packets.
1215
         */
1216
        NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1217
                ipmr_forward_finish);
1218
}
1219
 
1220
int ipmr_find_vif(struct net_device *dev)
1221
{
1222
        int ct;
1223
        for (ct=maxvif-1; ct>=0; ct--) {
1224
                if (vif_table[ct].dev == dev)
1225
                        break;
1226
        }
1227
        return ct;
1228
}
1229
 
1230
/* "local" means that we should preserve one skb (for local delivery) */
1231
 
1232
int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1233
{
1234
        int psend = -1;
1235
        int vif, ct;
1236
 
1237
        vif = cache->mfc_parent;
1238
        cache->mfc_un.res.pkt++;
1239
        cache->mfc_un.res.bytes += skb->len;
1240
 
1241
        /*
1242
         * Wrong interface: drop packet and (maybe) send PIM assert.
1243
         */
1244
        if (vif_table[vif].dev != skb->dev) {
1245
                int true_vifi;
1246
 
1247
                if (((struct rtable*)skb->dst)->key.iif == 0) {
1248
                        /* It is our own packet, looped back.
1249
                           Very complicated situation...
1250
 
1251
                           The best workaround until routing daemons will be
1252
                           fixed is not to redistribute packet, if it was
1253
                           send through wrong interface. It means, that
1254
                           multicast applications WILL NOT work for
1255
                           (S,G), which have default multicast route pointing
1256
                           to wrong oif. In any case, it is not a good
1257
                           idea to use multicasting applications on router.
1258
                         */
1259
                        goto dont_forward;
1260
                }
1261
 
1262
                cache->mfc_un.res.wrong_if++;
1263
                true_vifi = ipmr_find_vif(skb->dev);
1264
 
1265
                if (true_vifi >= 0 && mroute_do_assert &&
1266
                    /* pimsm uses asserts, when switching from RPT to SPT,
1267
                       so that we cannot check that packet arrived on an oif.
1268
                       It is bad, but otherwise we would need to move pretty
1269
                       large chunk of pimd to kernel. Ough... --ANK
1270
                     */
1271
                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1272
                    jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1273
                        cache->mfc_un.res.last_assert = jiffies;
1274
                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1275
                }
1276
                goto dont_forward;
1277
        }
1278
 
1279
        vif_table[vif].pkt_in++;
1280
        vif_table[vif].bytes_in+=skb->len;
1281
 
1282
        /*
1283
         *      Forward the frame
1284
         */
1285
        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1286
                if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1287
                        if (psend != -1)
1288
                                ipmr_queue_xmit(skb, cache, psend, 0);
1289
                        psend=ct;
1290
                }
1291
        }
1292
        if (psend != -1)
1293
                ipmr_queue_xmit(skb, cache, psend, !local);
1294
 
1295
dont_forward:
1296
        if (!local)
1297
                kfree_skb(skb);
1298
        return 0;
1299
}
1300
 
1301
 
1302
/*
1303
 *      Multicast packets for forwarding arrive here
1304
 */
1305
 
1306
int ip_mr_input(struct sk_buff *skb)
1307
{
1308
        struct mfc_cache *cache;
1309
        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1310
 
1311
        /* Packet is looped back after forward, it should not be
1312
           forwarded second time, but still can be delivered locally.
1313
         */
1314
        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1315
                goto dont_forward;
1316
 
1317
        if (!local) {
1318
                    if (IPCB(skb)->opt.router_alert) {
1319
                            if (ip_call_ra_chain(skb))
1320
                                    return 0;
1321
                    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1322
                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1323
                               Cisco IOS <= 11.2(8)) do not put router alert
1324
                               option to IGMP packets destined to routable
1325
                               groups. It is very bad, because it means
1326
                               that we can forward NO IGMP messages.
1327
                             */
1328
                            read_lock(&mrt_lock);
1329
                            if (mroute_socket) {
1330
                                    raw_rcv(mroute_socket, skb);
1331
                                    read_unlock(&mrt_lock);
1332
                                    return 0;
1333
                            }
1334
                            read_unlock(&mrt_lock);
1335
                    }
1336
        }
1337
 
1338
        read_lock(&mrt_lock);
1339
        cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1340
 
1341
        /*
1342
         *      No usable cache entry
1343
         */
1344
        if (cache==NULL) {
1345
                int vif;
1346
 
1347
                if (local) {
1348
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1349
                        ip_local_deliver(skb);
1350
                        if (skb2 == NULL) {
1351
                                read_unlock(&mrt_lock);
1352
                                return -ENOBUFS;
1353
                        }
1354
                        skb = skb2;
1355
                }
1356
 
1357
                vif = ipmr_find_vif(skb->dev);
1358
                if (vif >= 0) {
1359
                        int err = ipmr_cache_unresolved(vif, skb);
1360
                        read_unlock(&mrt_lock);
1361
 
1362
                        return err;
1363
                }
1364
                read_unlock(&mrt_lock);
1365
                kfree_skb(skb);
1366
                return -ENODEV;
1367
        }
1368
 
1369
        ip_mr_forward(skb, cache, local);
1370
 
1371
        read_unlock(&mrt_lock);
1372
 
1373
        if (local)
1374
                return ip_local_deliver(skb);
1375
 
1376
        return 0;
1377
 
1378
dont_forward:
1379
        if (local)
1380
                return ip_local_deliver(skb);
1381
        kfree_skb(skb);
1382
        return 0;
1383
}
1384
 
1385
#ifdef CONFIG_IP_PIMSM_V1
1386
/*
1387
 * Handle IGMP messages of PIMv1
1388
 */
1389
 
1390
int pim_rcv_v1(struct sk_buff * skb)
1391
{
1392
        struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1393
        struct iphdr   *encap;
1394
        struct net_device  *reg_dev = NULL;
1395
 
1396
        if (skb_is_nonlinear(skb)) {
1397
                if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1398
                        kfree_skb(skb);
1399
                        return -ENOMEM;
1400
                }
1401
                pim = (struct igmphdr*)skb->h.raw;
1402
        }
1403
 
1404
        if (!mroute_do_pim ||
1405
            skb->len < sizeof(*pim) + sizeof(*encap) ||
1406
            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1407
                kfree_skb(skb);
1408
                return -EINVAL;
1409
        }
1410
 
1411
        encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1412
        /*
1413
           Check that:
1414
           a. packet is really destinted to a multicast group
1415
           b. packet is not a NULL-REGISTER
1416
           c. packet is not truncated
1417
         */
1418
        if (!MULTICAST(encap->daddr) ||
1419
            ntohs(encap->tot_len) == 0 ||
1420
            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1421
                kfree_skb(skb);
1422
                return -EINVAL;
1423
        }
1424
 
1425
        read_lock(&mrt_lock);
1426
        if (reg_vif_num >= 0)
1427
                reg_dev = vif_table[reg_vif_num].dev;
1428
        if (reg_dev)
1429
                dev_hold(reg_dev);
1430
        read_unlock(&mrt_lock);
1431
 
1432
        if (reg_dev == NULL) {
1433
                kfree_skb(skb);
1434
                return -EINVAL;
1435
        }
1436
 
1437
        skb->mac.raw = skb->nh.raw;
1438
        skb_pull(skb, (u8*)encap - skb->data);
1439
        skb->nh.iph = (struct iphdr *)skb->data;
1440
        skb->dev = reg_dev;
1441
        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1442
        skb->protocol = htons(ETH_P_IP);
1443
        skb->ip_summed = 0;
1444
        skb->pkt_type = PACKET_HOST;
1445
        dst_release(skb->dst);
1446
        skb->dst = NULL;
1447
        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1448
        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1449
#ifdef CONFIG_NETFILTER
1450
        nf_conntrack_put(skb->nfct);
1451
        skb->nfct = NULL;
1452
#endif
1453
        netif_rx(skb);
1454
        dev_put(reg_dev);
1455
        return 0;
1456
}
1457
#endif
1458
 
1459
#ifdef CONFIG_IP_PIMSM_V2
1460
int pim_rcv(struct sk_buff * skb)
1461
{
1462
        struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1463
        struct iphdr   *encap;
1464
        struct net_device  *reg_dev = NULL;
1465
 
1466
        if (skb_is_nonlinear(skb)) {
1467
                if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1468
                        kfree_skb(skb);
1469
                        return -ENOMEM;
1470
                }
1471
                pim = (struct pimreghdr*)skb->h.raw;
1472
        }
1473
 
1474
        if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1475
            pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1476
            (pim->flags&PIM_NULL_REGISTER) ||
1477
            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1478
             ip_compute_csum((void *)pim, skb->len))) {
1479
                kfree_skb(skb);
1480
                return -EINVAL;
1481
        }
1482
 
1483
        /* check if the inner packet is destined to mcast group */
1484
        encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1485
        if (!MULTICAST(encap->daddr) ||
1486
            ntohs(encap->tot_len) == 0 ||
1487
            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1488
                kfree_skb(skb);
1489
                return -EINVAL;
1490
        }
1491
 
1492
        read_lock(&mrt_lock);
1493
        if (reg_vif_num >= 0)
1494
                reg_dev = vif_table[reg_vif_num].dev;
1495
        if (reg_dev)
1496
                dev_hold(reg_dev);
1497
        read_unlock(&mrt_lock);
1498
 
1499
        if (reg_dev == NULL) {
1500
                kfree_skb(skb);
1501
                return -EINVAL;
1502
        }
1503
 
1504
        skb->mac.raw = skb->nh.raw;
1505
        skb_pull(skb, (u8*)encap - skb->data);
1506
        skb->nh.iph = (struct iphdr *)skb->data;
1507
        skb->dev = reg_dev;
1508
        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1509
        skb->protocol = htons(ETH_P_IP);
1510
        skb->ip_summed = 0;
1511
        skb->pkt_type = PACKET_HOST;
1512
        dst_release(skb->dst);
1513
        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1514
        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1515
        skb->dst = NULL;
1516
#ifdef CONFIG_NETFILTER
1517
        nf_conntrack_put(skb->nfct);
1518
        skb->nfct = NULL;
1519
#endif
1520
        netif_rx(skb);
1521
        dev_put(reg_dev);
1522
        return 0;
1523
}
1524
#endif
1525
 
1526
static int
1527
ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1528
{
1529
        int ct;
1530
        struct rtnexthop *nhp;
1531
        struct net_device *dev = vif_table[c->mfc_parent].dev;
1532
        u8 *b = skb->tail;
1533
        struct rtattr *mp_head;
1534
 
1535
        if (dev)
1536
                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1537
 
1538
        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1539
 
1540
        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1541
                if (c->mfc_un.res.ttls[ct] < 255) {
1542
                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1543
                                goto rtattr_failure;
1544
                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1545
                        nhp->rtnh_flags = 0;
1546
                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1547
                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1548
                        nhp->rtnh_len = sizeof(*nhp);
1549
                }
1550
        }
1551
        mp_head->rta_type = RTA_MULTIPATH;
1552
        mp_head->rta_len = skb->tail - (u8*)mp_head;
1553
        rtm->rtm_type = RTN_MULTICAST;
1554
        return 1;
1555
 
1556
rtattr_failure:
1557
        skb_trim(skb, b - skb->data);
1558
        return -EMSGSIZE;
1559
}
1560
 
1561
int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1562
{
1563
        int err;
1564
        struct mfc_cache *cache;
1565
        struct rtable *rt = (struct rtable*)skb->dst;
1566
 
1567
        read_lock(&mrt_lock);
1568
        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1569
 
1570
        if (cache==NULL) {
1571
                struct net_device *dev;
1572
                int vif;
1573
 
1574
                if (nowait) {
1575
                        read_unlock(&mrt_lock);
1576
                        return -EAGAIN;
1577
                }
1578
 
1579
                dev = skb->dev;
1580
                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1581
                        read_unlock(&mrt_lock);
1582
                        return -ENODEV;
1583
                }
1584
                skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1585
                skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1586
                skb->nh.iph->saddr = rt->rt_src;
1587
                skb->nh.iph->daddr = rt->rt_dst;
1588
                skb->nh.iph->version = 0;
1589
                err = ipmr_cache_unresolved(vif, skb);
1590
                read_unlock(&mrt_lock);
1591
                return err;
1592
        }
1593
 
1594
        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1595
                cache->mfc_flags |= MFC_NOTIFY;
1596
        err = ipmr_fill_mroute(skb, cache, rtm);
1597
        read_unlock(&mrt_lock);
1598
        return err;
1599
}
1600
 
1601
#ifdef CONFIG_PROC_FS   
1602
/*
1603
 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1604
 */
1605
 
1606
static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1607
{
1608
        struct vif_device *vif;
1609
        int len=0;
1610
        off_t pos=0;
1611
        off_t begin=0;
1612
        int size;
1613
        int ct;
1614
 
1615
        len += sprintf(buffer,
1616
                 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1617
        pos=len;
1618
 
1619
        read_lock(&mrt_lock);
1620
        for (ct=0;ct<maxvif;ct++)
1621
        {
1622
                char *name = "none";
1623
                vif=&vif_table[ct];
1624
                if(!VIF_EXISTS(ct))
1625
                        continue;
1626
                if (vif->dev)
1627
                        name = vif->dev->name;
1628
                size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1629
                        ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1630
                        vif->flags, vif->local, vif->remote);
1631
                len+=size;
1632
                pos+=size;
1633
                if(pos<offset)
1634
                {
1635
                        len=0;
1636
                        begin=pos;
1637
                }
1638
                if(pos>offset+length)
1639
                        break;
1640
        }
1641
        read_unlock(&mrt_lock);
1642
 
1643
        *start=buffer+(offset-begin);
1644
        len-=(offset-begin);
1645
        if(len>length)
1646
                len=length;
1647
        if (len<0)
1648
                len = 0;
1649
        return len;
1650
}
1651
 
1652
static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1653
{
1654
        struct mfc_cache *mfc;
1655
        int len=0;
1656
        off_t pos=0;
1657
        off_t begin=0;
1658
        int size;
1659
        int ct;
1660
 
1661
        len += sprintf(buffer,
1662
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1663
        pos=len;
1664
 
1665
        read_lock(&mrt_lock);
1666
        for (ct=0;ct<MFC_LINES;ct++)
1667
        {
1668
                for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1669
                {
1670
                        int n;
1671
 
1672
                        /*
1673
                         *      Interface forwarding map
1674
                         */
1675
                        size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1676
                                (unsigned long)mfc->mfc_mcastgrp,
1677
                                (unsigned long)mfc->mfc_origin,
1678
                                mfc->mfc_parent,
1679
                                mfc->mfc_un.res.pkt,
1680
                                mfc->mfc_un.res.bytes,
1681
                                mfc->mfc_un.res.wrong_if);
1682
                        for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1683
                        {
1684
                                if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1685
                                        size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1686
                        }
1687
                        size += sprintf(buffer+len+size, "\n");
1688
                        len+=size;
1689
                        pos+=size;
1690
                        if(pos<offset)
1691
                        {
1692
                                len=0;
1693
                                begin=pos;
1694
                        }
1695
                        if(pos>offset+length)
1696
                                goto done;
1697
                }
1698
        }
1699
 
1700
        spin_lock_bh(&mfc_unres_lock);
1701
        for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1702
                size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1703
                               (unsigned long)mfc->mfc_mcastgrp,
1704
                               (unsigned long)mfc->mfc_origin,
1705
                               -1,
1706
                                (long)mfc->mfc_un.unres.unresolved.qlen,
1707
                                0L, 0L);
1708
                len+=size;
1709
                pos+=size;
1710
                if(pos<offset)
1711
                {
1712
                        len=0;
1713
                        begin=pos;
1714
                }
1715
                if(pos>offset+length)
1716
                        break;
1717
        }
1718
        spin_unlock_bh(&mfc_unres_lock);
1719
 
1720
done:
1721
        read_unlock(&mrt_lock);
1722
        *start=buffer+(offset-begin);
1723
        len-=(offset-begin);
1724
        if(len>length)
1725
                len=length;
1726
        if (len < 0) {
1727
                len = 0;
1728
        }
1729
        return len;
1730
}
1731
 
1732
#endif  
1733
 
1734
#ifdef CONFIG_IP_PIMSM_V2
1735
struct inet_protocol pim_protocol =
1736
{
1737
        pim_rcv,                /* PIM handler          */
1738
        NULL,                   /* PIM error control    */
1739
        NULL,                   /* next                 */
1740
        IPPROTO_PIM,            /* protocol ID          */
1741
        0,                       /* copy                 */
1742
        NULL,                   /* data                 */
1743
        "PIM"                   /* name                 */
1744
};
1745
#endif
1746
 
1747
 
1748
/*
1749
 *      Setup for IP multicast routing
1750
 */
1751
 
1752
void __init ip_mr_init(void)
1753
{
1754
        printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1755
        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1756
                                       sizeof(struct mfc_cache),
1757
                                       0, SLAB_HWCACHE_ALIGN,
1758
                                       NULL, NULL);
1759
        init_timer(&ipmr_expire_timer);
1760
        ipmr_expire_timer.function=ipmr_expire_process;
1761
        register_netdevice_notifier(&ip_mr_notifier);
1762
#ifdef CONFIG_PROC_FS   
1763
        proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1764
        proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1765
#endif  
1766
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.