OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [net/] [ipv4/] [ipmr.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 *      IP multicast routing support for mrouted 3.6/3.8
3
 *
4
 *              (c) 1995 Alan Cox, <alan@redhat.com>
5
 *        Linux Consultancy and Custom Driver Development
6
 *
7
 *      This program is free software; you can redistribute it and/or
8
 *      modify it under the terms of the GNU General Public License
9
 *      as published by the Free Software Foundation; either version
10
 *      2 of the License, or (at your option) any later version.
11
 *
12
 *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13
 *
14
 *      Fixes:
15
 *      Michael Chastain        :       Incorrect size of copying.
16
 *      Alan Cox                :       Added the cache manager code
17
 *      Alan Cox                :       Fixed the clone/copy bug and device race.
18
 *      Mike McLagan            :       Routing by source
19
 *      Malcolm Beattie         :       Buffer handling fixes.
20
 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
21
 *      SVR Anand               :       Fixed several multicast bugs and problems.
22
 *      Alexey Kuznetsov        :       Status, optimisations and more.
23
 *      Brad Parker             :       Better behaviour on mrouted upcall
24
 *                                      overflow.
25
 *      Carlos Picoto           :       PIMv1 Support
26
 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
27
 *                                      Relax this requrement to work with older peers.
28
 *
29
 */
30
 
31
#include <asm/system.h>
32
#include <asm/uaccess.h>
33
#include <linux/types.h>
34
#include <linux/capability.h>
35
#include <linux/errno.h>
36
#include <linux/timer.h>
37
#include <linux/mm.h>
38
#include <linux/kernel.h>
39
#include <linux/fcntl.h>
40
#include <linux/stat.h>
41
#include <linux/socket.h>
42
#include <linux/in.h>
43
#include <linux/inet.h>
44
#include <linux/netdevice.h>
45
#include <linux/inetdevice.h>
46
#include <linux/igmp.h>
47
#include <linux/proc_fs.h>
48
#include <linux/seq_file.h>
49
#include <linux/mroute.h>
50
#include <linux/init.h>
51
#include <linux/if_ether.h>
52
#include <net/net_namespace.h>
53
#include <net/ip.h>
54
#include <net/protocol.h>
55
#include <linux/skbuff.h>
56
#include <net/route.h>
57
#include <net/sock.h>
58
#include <net/icmp.h>
59
#include <net/udp.h>
60
#include <net/raw.h>
61
#include <linux/notifier.h>
62
#include <linux/if_arp.h>
63
#include <linux/netfilter_ipv4.h>
64
#include <net/ipip.h>
65
#include <net/checksum.h>
66
#include <net/netlink.h>
67
 
68
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69
#define CONFIG_IP_PIMSM 1
70
#endif
71
 
72
static struct sock *mroute_socket;
73
 
74
 
75
/* Big lock, protecting vif table, mrt cache and mroute socket state.
76
   Note that the changes are semaphored via rtnl_lock.
77
 */
78
 
79
static DEFINE_RWLOCK(mrt_lock);
80
 
81
/*
82
 *      Multicast router control variables
83
 */
84
 
85
static struct vif_device vif_table[MAXVIFS];            /* Devices              */
86
static int maxvif;
87
 
88
#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
89
 
90
static int mroute_do_assert;                            /* Set in PIM assert    */
91
static int mroute_do_pim;
92
 
93
static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
94
 
95
static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
96
static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
97
 
98
/* Special spinlock for queue of unresolved entries */
99
static DEFINE_SPINLOCK(mfc_unres_lock);
100
 
101
/* We return to original Alan's scheme. Hash table of resolved
102
   entries is changed only in process context and protected
103
   with weak lock mrt_lock. Queue of unresolved entries is protected
104
   with strong spinlock mfc_unres_lock.
105
 
106
   In this case data path is free of exclusive locks at all.
107
 */
108
 
109
static struct kmem_cache *mrt_cachep __read_mostly;
110
 
111
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
112
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
113
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
114
 
115
#ifdef CONFIG_IP_PIMSM_V2
116
static struct net_protocol pim_protocol;
117
#endif
118
 
119
static struct timer_list ipmr_expire_timer;
120
 
121
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
122
 
123
static
124
struct net_device *ipmr_new_tunnel(struct vifctl *v)
125
{
126
        struct net_device  *dev;
127
 
128
        dev = __dev_get_by_name(&init_net, "tunl0");
129
 
130
        if (dev) {
131
                int err;
132
                struct ifreq ifr;
133
                mm_segment_t    oldfs;
134
                struct ip_tunnel_parm p;
135
                struct in_device  *in_dev;
136
 
137
                memset(&p, 0, sizeof(p));
138
                p.iph.daddr = v->vifc_rmt_addr.s_addr;
139
                p.iph.saddr = v->vifc_lcl_addr.s_addr;
140
                p.iph.version = 4;
141
                p.iph.ihl = 5;
142
                p.iph.protocol = IPPROTO_IPIP;
143
                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
144
                ifr.ifr_ifru.ifru_data = (void*)&p;
145
 
146
                oldfs = get_fs(); set_fs(KERNEL_DS);
147
                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
148
                set_fs(oldfs);
149
 
150
                dev = NULL;
151
 
152
                if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
153
                        dev->flags |= IFF_MULTICAST;
154
 
155
                        in_dev = __in_dev_get_rtnl(dev);
156
                        if (in_dev == NULL)
157
                                goto failure;
158
 
159
                        ipv4_devconf_setall(in_dev);
160
                        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
161
 
162
                        if (dev_open(dev))
163
                                goto failure;
164
                }
165
        }
166
        return dev;
167
 
168
failure:
169
        /* allow the register to be completed before unregistering. */
170
        rtnl_unlock();
171
        rtnl_lock();
172
 
173
        unregister_netdevice(dev);
174
        return NULL;
175
}
176
 
177
#ifdef CONFIG_IP_PIMSM
178
 
179
static int reg_vif_num = -1;
180
 
181
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
182
{
183
        read_lock(&mrt_lock);
184
        ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
185
        ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
186
        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
187
        read_unlock(&mrt_lock);
188
        kfree_skb(skb);
189
        return 0;
190
}
191
 
192
static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
193
{
194
        return (struct net_device_stats*)netdev_priv(dev);
195
}
196
 
197
static void reg_vif_setup(struct net_device *dev)
198
{
199
        dev->type               = ARPHRD_PIMREG;
200
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
201
        dev->flags              = IFF_NOARP;
202
        dev->hard_start_xmit    = reg_vif_xmit;
203
        dev->get_stats          = reg_vif_get_stats;
204
        dev->destructor         = free_netdev;
205
}
206
 
207
static struct net_device *ipmr_reg_vif(void)
208
{
209
        struct net_device *dev;
210
        struct in_device *in_dev;
211
 
212
        dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
213
                           reg_vif_setup);
214
 
215
        if (dev == NULL)
216
                return NULL;
217
 
218
        if (register_netdevice(dev)) {
219
                free_netdev(dev);
220
                return NULL;
221
        }
222
        dev->iflink = 0;
223
 
224
        rcu_read_lock();
225
        if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
226
                rcu_read_unlock();
227
                goto failure;
228
        }
229
 
230
        ipv4_devconf_setall(in_dev);
231
        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
232
        rcu_read_unlock();
233
 
234
        if (dev_open(dev))
235
                goto failure;
236
 
237
        return dev;
238
 
239
failure:
240
        /* allow the register to be completed before unregistering. */
241
        rtnl_unlock();
242
        rtnl_lock();
243
 
244
        unregister_netdevice(dev);
245
        return NULL;
246
}
247
#endif
248
 
249
/*
250
 *      Delete a VIF entry
251
 */
252
 
253
static int vif_delete(int vifi)
254
{
255
        struct vif_device *v;
256
        struct net_device *dev;
257
        struct in_device *in_dev;
258
 
259
        if (vifi < 0 || vifi >= maxvif)
260
                return -EADDRNOTAVAIL;
261
 
262
        v = &vif_table[vifi];
263
 
264
        write_lock_bh(&mrt_lock);
265
        dev = v->dev;
266
        v->dev = NULL;
267
 
268
        if (!dev) {
269
                write_unlock_bh(&mrt_lock);
270
                return -EADDRNOTAVAIL;
271
        }
272
 
273
#ifdef CONFIG_IP_PIMSM
274
        if (vifi == reg_vif_num)
275
                reg_vif_num = -1;
276
#endif
277
 
278
        if (vifi+1 == maxvif) {
279
                int tmp;
280
                for (tmp=vifi-1; tmp>=0; tmp--) {
281
                        if (VIF_EXISTS(tmp))
282
                                break;
283
                }
284
                maxvif = tmp+1;
285
        }
286
 
287
        write_unlock_bh(&mrt_lock);
288
 
289
        dev_set_allmulti(dev, -1);
290
 
291
        if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
292
                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
293
                ip_rt_multicast_event(in_dev);
294
        }
295
 
296
        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
297
                unregister_netdevice(dev);
298
 
299
        dev_put(dev);
300
        return 0;
301
}
302
 
303
/* Destroy an unresolved cache entry, killing queued skbs
304
   and reporting error to netlink readers.
305
 */
306
 
307
static void ipmr_destroy_unres(struct mfc_cache *c)
308
{
309
        struct sk_buff *skb;
310
        struct nlmsgerr *e;
311
 
312
        atomic_dec(&cache_resolve_queue_len);
313
 
314
        while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
315
                if (ip_hdr(skb)->version == 0) {
316
                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
317
                        nlh->nlmsg_type = NLMSG_ERROR;
318
                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
319
                        skb_trim(skb, nlh->nlmsg_len);
320
                        e = NLMSG_DATA(nlh);
321
                        e->error = -ETIMEDOUT;
322
                        memset(&e->msg, 0, sizeof(e->msg));
323
 
324
                        rtnl_unicast(skb, NETLINK_CB(skb).pid);
325
                } else
326
                        kfree_skb(skb);
327
        }
328
 
329
        kmem_cache_free(mrt_cachep, c);
330
}
331
 
332
 
333
/* Single timer process for all the unresolved queue. */
334
 
335
static void ipmr_expire_process(unsigned long dummy)
336
{
337
        unsigned long now;
338
        unsigned long expires;
339
        struct mfc_cache *c, **cp;
340
 
341
        if (!spin_trylock(&mfc_unres_lock)) {
342
                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
343
                return;
344
        }
345
 
346
        if (atomic_read(&cache_resolve_queue_len) == 0)
347
                goto out;
348
 
349
        now = jiffies;
350
        expires = 10*HZ;
351
        cp = &mfc_unres_queue;
352
 
353
        while ((c=*cp) != NULL) {
354
                if (time_after(c->mfc_un.unres.expires, now)) {
355
                        unsigned long interval = c->mfc_un.unres.expires - now;
356
                        if (interval < expires)
357
                                expires = interval;
358
                        cp = &c->next;
359
                        continue;
360
                }
361
 
362
                *cp = c->next;
363
 
364
                ipmr_destroy_unres(c);
365
        }
366
 
367
        if (atomic_read(&cache_resolve_queue_len))
368
                mod_timer(&ipmr_expire_timer, jiffies + expires);
369
 
370
out:
371
        spin_unlock(&mfc_unres_lock);
372
}
373
 
374
/* Fill oifs list. It is called under write locked mrt_lock. */
375
 
376
static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
377
{
378
        int vifi;
379
 
380
        cache->mfc_un.res.minvif = MAXVIFS;
381
        cache->mfc_un.res.maxvif = 0;
382
        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
383
 
384
        for (vifi=0; vifi<maxvif; vifi++) {
385
                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
386
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
387
                        if (cache->mfc_un.res.minvif > vifi)
388
                                cache->mfc_un.res.minvif = vifi;
389
                        if (cache->mfc_un.res.maxvif <= vifi)
390
                                cache->mfc_un.res.maxvif = vifi + 1;
391
                }
392
        }
393
}
394
 
395
static int vif_add(struct vifctl *vifc, int mrtsock)
396
{
397
        int vifi = vifc->vifc_vifi;
398
        struct vif_device *v = &vif_table[vifi];
399
        struct net_device *dev;
400
        struct in_device *in_dev;
401
 
402
        /* Is vif busy ? */
403
        if (VIF_EXISTS(vifi))
404
                return -EADDRINUSE;
405
 
406
        switch (vifc->vifc_flags) {
407
#ifdef CONFIG_IP_PIMSM
408
        case VIFF_REGISTER:
409
                /*
410
                 * Special Purpose VIF in PIM
411
                 * All the packets will be sent to the daemon
412
                 */
413
                if (reg_vif_num >= 0)
414
                        return -EADDRINUSE;
415
                dev = ipmr_reg_vif();
416
                if (!dev)
417
                        return -ENOBUFS;
418
                break;
419
#endif
420
        case VIFF_TUNNEL:
421
                dev = ipmr_new_tunnel(vifc);
422
                if (!dev)
423
                        return -ENOBUFS;
424
                break;
425
        case 0:
426
                dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
427
                if (!dev)
428
                        return -EADDRNOTAVAIL;
429
                dev_put(dev);
430
                break;
431
        default:
432
                return -EINVAL;
433
        }
434
 
435
        if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
436
                return -EADDRNOTAVAIL;
437
        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
438
        dev_set_allmulti(dev, +1);
439
        ip_rt_multicast_event(in_dev);
440
 
441
        /*
442
         *      Fill in the VIF structures
443
         */
444
        v->rate_limit=vifc->vifc_rate_limit;
445
        v->local=vifc->vifc_lcl_addr.s_addr;
446
        v->remote=vifc->vifc_rmt_addr.s_addr;
447
        v->flags=vifc->vifc_flags;
448
        if (!mrtsock)
449
                v->flags |= VIFF_STATIC;
450
        v->threshold=vifc->vifc_threshold;
451
        v->bytes_in = 0;
452
        v->bytes_out = 0;
453
        v->pkt_in = 0;
454
        v->pkt_out = 0;
455
        v->link = dev->ifindex;
456
        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
457
                v->link = dev->iflink;
458
 
459
        /* And finish update writing critical data */
460
        write_lock_bh(&mrt_lock);
461
        dev_hold(dev);
462
        v->dev=dev;
463
#ifdef CONFIG_IP_PIMSM
464
        if (v->flags&VIFF_REGISTER)
465
                reg_vif_num = vifi;
466
#endif
467
        if (vifi+1 > maxvif)
468
                maxvif = vifi+1;
469
        write_unlock_bh(&mrt_lock);
470
        return 0;
471
}
472
 
473
static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
474
{
475
        int line=MFC_HASH(mcastgrp,origin);
476
        struct mfc_cache *c;
477
 
478
        for (c=mfc_cache_array[line]; c; c = c->next) {
479
                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
480
                        break;
481
        }
482
        return c;
483
}
484
 
485
/*
486
 *      Allocate a multicast cache entry
487
 */
488
static struct mfc_cache *ipmr_cache_alloc(void)
489
{
490
        struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
491
        if (c==NULL)
492
                return NULL;
493
        c->mfc_un.res.minvif = MAXVIFS;
494
        return c;
495
}
496
 
497
static struct mfc_cache *ipmr_cache_alloc_unres(void)
498
{
499
        struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
500
        if (c==NULL)
501
                return NULL;
502
        skb_queue_head_init(&c->mfc_un.unres.unresolved);
503
        c->mfc_un.unres.expires = jiffies + 10*HZ;
504
        return c;
505
}
506
 
507
/*
508
 *      A cache entry has gone into a resolved state from queued
509
 */
510
 
511
static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
512
{
513
        struct sk_buff *skb;
514
        struct nlmsgerr *e;
515
 
516
        /*
517
         *      Play the pending entries through our router
518
         */
519
 
520
        while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
521
                if (ip_hdr(skb)->version == 0) {
522
                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
523
 
524
                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
525
                                nlh->nlmsg_len = (skb_tail_pointer(skb) -
526
                                                  (u8 *)nlh);
527
                        } else {
528
                                nlh->nlmsg_type = NLMSG_ERROR;
529
                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
530
                                skb_trim(skb, nlh->nlmsg_len);
531
                                e = NLMSG_DATA(nlh);
532
                                e->error = -EMSGSIZE;
533
                                memset(&e->msg, 0, sizeof(e->msg));
534
                        }
535
 
536
                        rtnl_unicast(skb, NETLINK_CB(skb).pid);
537
                } else
538
                        ip_mr_forward(skb, c, 0);
539
        }
540
}
541
 
542
/*
543
 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
544
 *      expects the following bizarre scheme.
545
 *
546
 *      Called under mrt_lock.
547
 */
548
 
549
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
550
{
551
        struct sk_buff *skb;
552
        const int ihl = ip_hdrlen(pkt);
553
        struct igmphdr *igmp;
554
        struct igmpmsg *msg;
555
        int ret;
556
 
557
#ifdef CONFIG_IP_PIMSM
558
        if (assert == IGMPMSG_WHOLEPKT)
559
                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
560
        else
561
#endif
562
                skb = alloc_skb(128, GFP_ATOMIC);
563
 
564
        if (!skb)
565
                return -ENOBUFS;
566
 
567
#ifdef CONFIG_IP_PIMSM
568
        if (assert == IGMPMSG_WHOLEPKT) {
569
                /* Ugly, but we have no choice with this interface.
570
                   Duplicate old header, fix ihl, length etc.
571
                   And all this only to mangle msg->im_msgtype and
572
                   to set msg->im_mbz to "mbz" :-)
573
                 */
574
                skb_push(skb, sizeof(struct iphdr));
575
                skb_reset_network_header(skb);
576
                skb_reset_transport_header(skb);
577
                msg = (struct igmpmsg *)skb_network_header(skb);
578
                memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
579
                msg->im_msgtype = IGMPMSG_WHOLEPKT;
580
                msg->im_mbz = 0;
581
                msg->im_vif = reg_vif_num;
582
                ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
583
                ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
584
                                             sizeof(struct iphdr));
585
        } else
586
#endif
587
        {
588
 
589
        /*
590
         *      Copy the IP header
591
         */
592
 
593
        skb->network_header = skb->tail;
594
        skb_put(skb, ihl);
595
        skb_copy_to_linear_data(skb, pkt->data, ihl);
596
        ip_hdr(skb)->protocol = 0;                       /* Flag to the kernel this is a route add */
597
        msg = (struct igmpmsg *)skb_network_header(skb);
598
        msg->im_vif = vifi;
599
        skb->dst = dst_clone(pkt->dst);
600
 
601
        /*
602
         *      Add our header
603
         */
604
 
605
        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
606
        igmp->type      =
607
        msg->im_msgtype = assert;
608
        igmp->code      =       0;
609
        ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
610
        skb->transport_header = skb->network_header;
611
        }
612
 
613
        if (mroute_socket == NULL) {
614
                kfree_skb(skb);
615
                return -EINVAL;
616
        }
617
 
618
        /*
619
         *      Deliver to mrouted
620
         */
621
        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
622
                if (net_ratelimit())
623
                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
624
                kfree_skb(skb);
625
        }
626
 
627
        return ret;
628
}
629
 
630
/*
631
 *      Queue a packet for resolution. It gets locked cache entry!
632
 */
633
 
634
static int
635
ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
636
{
637
        int err;
638
        struct mfc_cache *c;
639
        const struct iphdr *iph = ip_hdr(skb);
640
 
641
        spin_lock_bh(&mfc_unres_lock);
642
        for (c=mfc_unres_queue; c; c=c->next) {
643
                if (c->mfc_mcastgrp == iph->daddr &&
644
                    c->mfc_origin == iph->saddr)
645
                        break;
646
        }
647
 
648
        if (c == NULL) {
649
                /*
650
                 *      Create a new entry if allowable
651
                 */
652
 
653
                if (atomic_read(&cache_resolve_queue_len)>=10 ||
654
                    (c=ipmr_cache_alloc_unres())==NULL) {
655
                        spin_unlock_bh(&mfc_unres_lock);
656
 
657
                        kfree_skb(skb);
658
                        return -ENOBUFS;
659
                }
660
 
661
                /*
662
                 *      Fill in the new cache entry
663
                 */
664
                c->mfc_parent   = -1;
665
                c->mfc_origin   = iph->saddr;
666
                c->mfc_mcastgrp = iph->daddr;
667
 
668
                /*
669
                 *      Reflect first query at mrouted.
670
                 */
671
                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
672
                        /* If the report failed throw the cache entry
673
                           out - Brad Parker
674
                         */
675
                        spin_unlock_bh(&mfc_unres_lock);
676
 
677
                        kmem_cache_free(mrt_cachep, c);
678
                        kfree_skb(skb);
679
                        return err;
680
                }
681
 
682
                atomic_inc(&cache_resolve_queue_len);
683
                c->next = mfc_unres_queue;
684
                mfc_unres_queue = c;
685
 
686
                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
687
        }
688
 
689
        /*
690
         *      See if we can append the packet
691
         */
692
        if (c->mfc_un.unres.unresolved.qlen>3) {
693
                kfree_skb(skb);
694
                err = -ENOBUFS;
695
        } else {
696
                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
697
                err = 0;
698
        }
699
 
700
        spin_unlock_bh(&mfc_unres_lock);
701
        return err;
702
}
703
 
704
/*
705
 *      MFC cache manipulation by user space mroute daemon
706
 */
707
 
708
static int ipmr_mfc_delete(struct mfcctl *mfc)
709
{
710
        int line;
711
        struct mfc_cache *c, **cp;
712
 
713
        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
714
 
715
        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
716
                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
717
                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
718
                        write_lock_bh(&mrt_lock);
719
                        *cp = c->next;
720
                        write_unlock_bh(&mrt_lock);
721
 
722
                        kmem_cache_free(mrt_cachep, c);
723
                        return 0;
724
                }
725
        }
726
        return -ENOENT;
727
}
728
 
729
static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
730
{
731
        int line;
732
        struct mfc_cache *uc, *c, **cp;
733
 
734
        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
735
 
736
        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
737
                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
738
                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
739
                        break;
740
        }
741
 
742
        if (c != NULL) {
743
                write_lock_bh(&mrt_lock);
744
                c->mfc_parent = mfc->mfcc_parent;
745
                ipmr_update_thresholds(c, mfc->mfcc_ttls);
746
                if (!mrtsock)
747
                        c->mfc_flags |= MFC_STATIC;
748
                write_unlock_bh(&mrt_lock);
749
                return 0;
750
        }
751
 
752
        if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
753
                return -EINVAL;
754
 
755
        c=ipmr_cache_alloc();
756
        if (c==NULL)
757
                return -ENOMEM;
758
 
759
        c->mfc_origin=mfc->mfcc_origin.s_addr;
760
        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
761
        c->mfc_parent=mfc->mfcc_parent;
762
        ipmr_update_thresholds(c, mfc->mfcc_ttls);
763
        if (!mrtsock)
764
                c->mfc_flags |= MFC_STATIC;
765
 
766
        write_lock_bh(&mrt_lock);
767
        c->next = mfc_cache_array[line];
768
        mfc_cache_array[line] = c;
769
        write_unlock_bh(&mrt_lock);
770
 
771
        /*
772
         *      Check to see if we resolved a queued list. If so we
773
         *      need to send on the frames and tidy up.
774
         */
775
        spin_lock_bh(&mfc_unres_lock);
776
        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
777
             cp = &uc->next) {
778
                if (uc->mfc_origin == c->mfc_origin &&
779
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
780
                        *cp = uc->next;
781
                        if (atomic_dec_and_test(&cache_resolve_queue_len))
782
                                del_timer(&ipmr_expire_timer);
783
                        break;
784
                }
785
        }
786
        spin_unlock_bh(&mfc_unres_lock);
787
 
788
        if (uc) {
789
                ipmr_cache_resolve(uc, c);
790
                kmem_cache_free(mrt_cachep, uc);
791
        }
792
        return 0;
793
}
794
 
795
/*
796
 *      Close the multicast socket, and clear the vif tables etc
797
 */
798
 
799
static void mroute_clean_tables(struct sock *sk)
800
{
801
        int i;
802
 
803
        /*
804
         *      Shut down all active vif entries
805
         */
806
        for (i=0; i<maxvif; i++) {
807
                if (!(vif_table[i].flags&VIFF_STATIC))
808
                        vif_delete(i);
809
        }
810
 
811
        /*
812
         *      Wipe the cache
813
         */
814
        for (i=0;i<MFC_LINES;i++) {
815
                struct mfc_cache *c, **cp;
816
 
817
                cp = &mfc_cache_array[i];
818
                while ((c = *cp) != NULL) {
819
                        if (c->mfc_flags&MFC_STATIC) {
820
                                cp = &c->next;
821
                                continue;
822
                        }
823
                        write_lock_bh(&mrt_lock);
824
                        *cp = c->next;
825
                        write_unlock_bh(&mrt_lock);
826
 
827
                        kmem_cache_free(mrt_cachep, c);
828
                }
829
        }
830
 
831
        if (atomic_read(&cache_resolve_queue_len) != 0) {
832
                struct mfc_cache *c;
833
 
834
                spin_lock_bh(&mfc_unres_lock);
835
                while (mfc_unres_queue != NULL) {
836
                        c = mfc_unres_queue;
837
                        mfc_unres_queue = c->next;
838
                        spin_unlock_bh(&mfc_unres_lock);
839
 
840
                        ipmr_destroy_unres(c);
841
 
842
                        spin_lock_bh(&mfc_unres_lock);
843
                }
844
                spin_unlock_bh(&mfc_unres_lock);
845
        }
846
}
847
 
848
static void mrtsock_destruct(struct sock *sk)
849
{
850
        rtnl_lock();
851
        if (sk == mroute_socket) {
852
                IPV4_DEVCONF_ALL(MC_FORWARDING)--;
853
 
854
                write_lock_bh(&mrt_lock);
855
                mroute_socket=NULL;
856
                write_unlock_bh(&mrt_lock);
857
 
858
                mroute_clean_tables(sk);
859
        }
860
        rtnl_unlock();
861
}
862
 
863
/*
864
 *      Socket options and virtual interface manipulation. The whole
865
 *      virtual interface system is a complete heap, but unfortunately
866
 *      that's how BSD mrouted happens to think. Maybe one day with a proper
867
 *      MOSPF/PIM router set up we can clean this up.
868
 */
869
 
870
int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
871
{
872
        int ret;
873
        struct vifctl vif;
874
        struct mfcctl mfc;
875
 
876
        if (optname != MRT_INIT) {
877
                if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
878
                        return -EACCES;
879
        }
880
 
881
        switch (optname) {
882
        case MRT_INIT:
883
                if (sk->sk_type != SOCK_RAW ||
884
                    inet_sk(sk)->num != IPPROTO_IGMP)
885
                        return -EOPNOTSUPP;
886
                if (optlen!=sizeof(int))
887
                        return -ENOPROTOOPT;
888
 
889
                rtnl_lock();
890
                if (mroute_socket) {
891
                        rtnl_unlock();
892
                        return -EADDRINUSE;
893
                }
894
 
895
                ret = ip_ra_control(sk, 1, mrtsock_destruct);
896
                if (ret == 0) {
897
                        write_lock_bh(&mrt_lock);
898
                        mroute_socket=sk;
899
                        write_unlock_bh(&mrt_lock);
900
 
901
                        IPV4_DEVCONF_ALL(MC_FORWARDING)++;
902
                }
903
                rtnl_unlock();
904
                return ret;
905
        case MRT_DONE:
906
                if (sk!=mroute_socket)
907
                        return -EACCES;
908
                return ip_ra_control(sk, 0, NULL);
909
        case MRT_ADD_VIF:
910
        case MRT_DEL_VIF:
911
                if (optlen!=sizeof(vif))
912
                        return -EINVAL;
913
                if (copy_from_user(&vif,optval,sizeof(vif)))
914
                        return -EFAULT;
915
                if (vif.vifc_vifi >= MAXVIFS)
916
                        return -ENFILE;
917
                rtnl_lock();
918
                if (optname==MRT_ADD_VIF) {
919
                        ret = vif_add(&vif, sk==mroute_socket);
920
                } else {
921
                        ret = vif_delete(vif.vifc_vifi);
922
                }
923
                rtnl_unlock();
924
                return ret;
925
 
926
                /*
927
                 *      Manipulate the forwarding caches. These live
928
                 *      in a sort of kernel/user symbiosis.
929
                 */
930
        case MRT_ADD_MFC:
931
        case MRT_DEL_MFC:
932
                if (optlen!=sizeof(mfc))
933
                        return -EINVAL;
934
                if (copy_from_user(&mfc,optval, sizeof(mfc)))
935
                        return -EFAULT;
936
                rtnl_lock();
937
                if (optname==MRT_DEL_MFC)
938
                        ret = ipmr_mfc_delete(&mfc);
939
                else
940
                        ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
941
                rtnl_unlock();
942
                return ret;
943
                /*
944
                 *      Control PIM assert.
945
                 */
946
        case MRT_ASSERT:
947
        {
948
                int v;
949
                if (get_user(v,(int __user *)optval))
950
                        return -EFAULT;
951
                mroute_do_assert=(v)?1:0;
952
                return 0;
953
        }
954
#ifdef CONFIG_IP_PIMSM
955
        case MRT_PIM:
956
        {
957
                int v, ret;
958
                if (get_user(v,(int __user *)optval))
959
                        return -EFAULT;
960
                v = (v)?1:0;
961
                rtnl_lock();
962
                ret = 0;
963
                if (v != mroute_do_pim) {
964
                        mroute_do_pim = v;
965
                        mroute_do_assert = v;
966
#ifdef CONFIG_IP_PIMSM_V2
967
                        if (mroute_do_pim)
968
                                ret = inet_add_protocol(&pim_protocol,
969
                                                        IPPROTO_PIM);
970
                        else
971
                                ret = inet_del_protocol(&pim_protocol,
972
                                                        IPPROTO_PIM);
973
                        if (ret < 0)
974
                                ret = -EAGAIN;
975
#endif
976
                }
977
                rtnl_unlock();
978
                return ret;
979
        }
980
#endif
981
        /*
982
         *      Spurious command, or MRT_VERSION which you cannot
983
         *      set.
984
         */
985
        default:
986
                return -ENOPROTOOPT;
987
        }
988
}
989
 
990
/*
991
 *      Getsock opt support for the multicast routing system.
992
 */
993
 
994
int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
995
{
996
        int olr;
997
        int val;
998
 
999
        if (optname!=MRT_VERSION &&
1000
#ifdef CONFIG_IP_PIMSM
1001
           optname!=MRT_PIM &&
1002
#endif
1003
           optname!=MRT_ASSERT)
1004
                return -ENOPROTOOPT;
1005
 
1006
        if (get_user(olr, optlen))
1007
                return -EFAULT;
1008
 
1009
        olr = min_t(unsigned int, olr, sizeof(int));
1010
        if (olr < 0)
1011
                return -EINVAL;
1012
 
1013
        if (put_user(olr,optlen))
1014
                return -EFAULT;
1015
        if (optname==MRT_VERSION)
1016
                val=0x0305;
1017
#ifdef CONFIG_IP_PIMSM
1018
        else if (optname==MRT_PIM)
1019
                val=mroute_do_pim;
1020
#endif
1021
        else
1022
                val=mroute_do_assert;
1023
        if (copy_to_user(optval,&val,olr))
1024
                return -EFAULT;
1025
        return 0;
1026
}
1027
 
1028
/*
1029
 *      The IP multicast ioctl support routines.
1030
 */
1031
 
1032
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1033
{
1034
        struct sioc_sg_req sr;
1035
        struct sioc_vif_req vr;
1036
        struct vif_device *vif;
1037
        struct mfc_cache *c;
1038
 
1039
        switch (cmd) {
1040
        case SIOCGETVIFCNT:
1041
                if (copy_from_user(&vr,arg,sizeof(vr)))
1042
                        return -EFAULT;
1043
                if (vr.vifi>=maxvif)
1044
                        return -EINVAL;
1045
                read_lock(&mrt_lock);
1046
                vif=&vif_table[vr.vifi];
1047
                if (VIF_EXISTS(vr.vifi))        {
1048
                        vr.icount=vif->pkt_in;
1049
                        vr.ocount=vif->pkt_out;
1050
                        vr.ibytes=vif->bytes_in;
1051
                        vr.obytes=vif->bytes_out;
1052
                        read_unlock(&mrt_lock);
1053
 
1054
                        if (copy_to_user(arg,&vr,sizeof(vr)))
1055
                                return -EFAULT;
1056
                        return 0;
1057
                }
1058
                read_unlock(&mrt_lock);
1059
                return -EADDRNOTAVAIL;
1060
        case SIOCGETSGCNT:
1061
                if (copy_from_user(&sr,arg,sizeof(sr)))
1062
                        return -EFAULT;
1063
 
1064
                read_lock(&mrt_lock);
1065
                c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1066
                if (c) {
1067
                        sr.pktcnt = c->mfc_un.res.pkt;
1068
                        sr.bytecnt = c->mfc_un.res.bytes;
1069
                        sr.wrong_if = c->mfc_un.res.wrong_if;
1070
                        read_unlock(&mrt_lock);
1071
 
1072
                        if (copy_to_user(arg,&sr,sizeof(sr)))
1073
                                return -EFAULT;
1074
                        return 0;
1075
                }
1076
                read_unlock(&mrt_lock);
1077
                return -EADDRNOTAVAIL;
1078
        default:
1079
                return -ENOIOCTLCMD;
1080
        }
1081
}
1082
 
1083
 
1084
static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1085
{
1086
        struct net_device *dev = ptr;
1087
        struct vif_device *v;
1088
        int ct;
1089
 
1090
        if (dev->nd_net != &init_net)
1091
                return NOTIFY_DONE;
1092
 
1093
        if (event != NETDEV_UNREGISTER)
1094
                return NOTIFY_DONE;
1095
        v=&vif_table[0];
1096
        for (ct=0;ct<maxvif;ct++,v++) {
1097
                if (v->dev==dev)
1098
                        vif_delete(ct);
1099
        }
1100
        return NOTIFY_DONE;
1101
}
1102
 
1103
 
1104
static struct notifier_block ip_mr_notifier={
1105
        .notifier_call = ipmr_device_event,
1106
};
1107
 
1108
/*
1109
 *      Encapsulate a packet by attaching a valid IPIP header to it.
1110
 *      This avoids tunnel drivers and other mess and gives us the speed so
1111
 *      important for multicast video.
1112
 */
1113
 
1114
static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1115
{
1116
        struct iphdr *iph;
1117
        struct iphdr *old_iph = ip_hdr(skb);
1118
 
1119
        skb_push(skb, sizeof(struct iphdr));
1120
        skb->transport_header = skb->network_header;
1121
        skb_reset_network_header(skb);
1122
        iph = ip_hdr(skb);
1123
 
1124
        iph->version    =       4;
1125
        iph->tos        =       old_iph->tos;
1126
        iph->ttl        =       old_iph->ttl;
1127
        iph->frag_off   =       0;
1128
        iph->daddr      =       daddr;
1129
        iph->saddr      =       saddr;
1130
        iph->protocol   =       IPPROTO_IPIP;
1131
        iph->ihl        =       5;
1132
        iph->tot_len    =       htons(skb->len);
1133
        ip_select_ident(iph, skb->dst, NULL);
1134
        ip_send_check(iph);
1135
 
1136
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1137
        nf_reset(skb);
1138
}
1139
 
1140
static inline int ipmr_forward_finish(struct sk_buff *skb)
1141
{
1142
        struct ip_options * opt = &(IPCB(skb)->opt);
1143
 
1144
        IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1145
 
1146
        if (unlikely(opt->optlen))
1147
                ip_forward_options(skb);
1148
 
1149
        return dst_output(skb);
1150
}
1151
 
1152
/*
1153
 *      Processing handlers for ipmr_forward
1154
 */
1155
 
1156
static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1157
{
1158
        const struct iphdr *iph = ip_hdr(skb);
1159
        struct vif_device *vif = &vif_table[vifi];
1160
        struct net_device *dev;
1161
        struct rtable *rt;
1162
        int    encap = 0;
1163
 
1164
        if (vif->dev == NULL)
1165
                goto out_free;
1166
 
1167
#ifdef CONFIG_IP_PIMSM
1168
        if (vif->flags & VIFF_REGISTER) {
1169
                vif->pkt_out++;
1170
                vif->bytes_out+=skb->len;
1171
                ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1172
                ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1173
                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1174
                kfree_skb(skb);
1175
                return;
1176
        }
1177
#endif
1178
 
1179
        if (vif->flags&VIFF_TUNNEL) {
1180
                struct flowi fl = { .oif = vif->link,
1181
                                    .nl_u = { .ip4_u =
1182
                                              { .daddr = vif->remote,
1183
                                                .saddr = vif->local,
1184
                                                .tos = RT_TOS(iph->tos) } },
1185
                                    .proto = IPPROTO_IPIP };
1186
                if (ip_route_output_key(&rt, &fl))
1187
                        goto out_free;
1188
                encap = sizeof(struct iphdr);
1189
        } else {
1190
                struct flowi fl = { .oif = vif->link,
1191
                                    .nl_u = { .ip4_u =
1192
                                              { .daddr = iph->daddr,
1193
                                                .tos = RT_TOS(iph->tos) } },
1194
                                    .proto = IPPROTO_IPIP };
1195
                if (ip_route_output_key(&rt, &fl))
1196
                        goto out_free;
1197
        }
1198
 
1199
        dev = rt->u.dst.dev;
1200
 
1201
        if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1202
                /* Do not fragment multicasts. Alas, IPv4 does not
1203
                   allow to send ICMP, so that packets will disappear
1204
                   to blackhole.
1205
                 */
1206
 
1207
                IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1208
                ip_rt_put(rt);
1209
                goto out_free;
1210
        }
1211
 
1212
        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1213
 
1214
        if (skb_cow(skb, encap)) {
1215
                ip_rt_put(rt);
1216
                goto out_free;
1217
        }
1218
 
1219
        vif->pkt_out++;
1220
        vif->bytes_out+=skb->len;
1221
 
1222
        dst_release(skb->dst);
1223
        skb->dst = &rt->u.dst;
1224
        ip_decrease_ttl(ip_hdr(skb));
1225
 
1226
        /* FIXME: forward and output firewalls used to be called here.
1227
         * What do we do with netfilter? -- RR */
1228
        if (vif->flags & VIFF_TUNNEL) {
1229
                ip_encap(skb, vif->local, vif->remote);
1230
                /* FIXME: extra output firewall step used to be here. --RR */
1231
                ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1232
                ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1233
        }
1234
 
1235
        IPCB(skb)->flags |= IPSKB_FORWARDED;
1236
 
1237
        /*
1238
         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1239
         * not only before forwarding, but after forwarding on all output
1240
         * interfaces. It is clear, if mrouter runs a multicasting
1241
         * program, it should receive packets not depending to what interface
1242
         * program is joined.
1243
         * If we will not make it, the program will have to join on all
1244
         * interfaces. On the other hand, multihoming host (or router, but
1245
         * not mrouter) cannot join to more than one interface - it will
1246
         * result in receiving multiple packets.
1247
         */
1248
        NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1249
                ipmr_forward_finish);
1250
        return;
1251
 
1252
out_free:
1253
        kfree_skb(skb);
1254
        return;
1255
}
1256
 
1257
static int ipmr_find_vif(struct net_device *dev)
1258
{
1259
        int ct;
1260
        for (ct=maxvif-1; ct>=0; ct--) {
1261
                if (vif_table[ct].dev == dev)
1262
                        break;
1263
        }
1264
        return ct;
1265
}
1266
 
1267
/* "local" means that we should preserve one skb (for local delivery) */
1268
 
1269
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1270
{
1271
        int psend = -1;
1272
        int vif, ct;
1273
 
1274
        vif = cache->mfc_parent;
1275
        cache->mfc_un.res.pkt++;
1276
        cache->mfc_un.res.bytes += skb->len;
1277
 
1278
        /*
1279
         * Wrong interface: drop packet and (maybe) send PIM assert.
1280
         */
1281
        if (vif_table[vif].dev != skb->dev) {
1282
                int true_vifi;
1283
 
1284
                if (((struct rtable*)skb->dst)->fl.iif == 0) {
1285
                        /* It is our own packet, looped back.
1286
                           Very complicated situation...
1287
 
1288
                           The best workaround until routing daemons will be
1289
                           fixed is not to redistribute packet, if it was
1290
                           send through wrong interface. It means, that
1291
                           multicast applications WILL NOT work for
1292
                           (S,G), which have default multicast route pointing
1293
                           to wrong oif. In any case, it is not a good
1294
                           idea to use multicasting applications on router.
1295
                         */
1296
                        goto dont_forward;
1297
                }
1298
 
1299
                cache->mfc_un.res.wrong_if++;
1300
                true_vifi = ipmr_find_vif(skb->dev);
1301
 
1302
                if (true_vifi >= 0 && mroute_do_assert &&
1303
                    /* pimsm uses asserts, when switching from RPT to SPT,
1304
                       so that we cannot check that packet arrived on an oif.
1305
                       It is bad, but otherwise we would need to move pretty
1306
                       large chunk of pimd to kernel. Ough... --ANK
1307
                     */
1308
                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1309
                    time_after(jiffies,
1310
                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1311
                        cache->mfc_un.res.last_assert = jiffies;
1312
                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1313
                }
1314
                goto dont_forward;
1315
        }
1316
 
1317
        vif_table[vif].pkt_in++;
1318
        vif_table[vif].bytes_in+=skb->len;
1319
 
1320
        /*
1321
         *      Forward the frame
1322
         */
1323
        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1324
                if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1325
                        if (psend != -1) {
1326
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1327
                                if (skb2)
1328
                                        ipmr_queue_xmit(skb2, cache, psend);
1329
                        }
1330
                        psend=ct;
1331
                }
1332
        }
1333
        if (psend != -1) {
1334
                if (local) {
1335
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1336
                        if (skb2)
1337
                                ipmr_queue_xmit(skb2, cache, psend);
1338
                } else {
1339
                        ipmr_queue_xmit(skb, cache, psend);
1340
                        return 0;
1341
                }
1342
        }
1343
 
1344
dont_forward:
1345
        if (!local)
1346
                kfree_skb(skb);
1347
        return 0;
1348
}
1349
 
1350
 
1351
/*
1352
 *      Multicast packets for forwarding arrive here
1353
 */
1354
 
1355
int ip_mr_input(struct sk_buff *skb)
1356
{
1357
        struct mfc_cache *cache;
1358
        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1359
 
1360
        /* Packet is looped back after forward, it should not be
1361
           forwarded second time, but still can be delivered locally.
1362
         */
1363
        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1364
                goto dont_forward;
1365
 
1366
        if (!local) {
1367
                    if (IPCB(skb)->opt.router_alert) {
1368
                            if (ip_call_ra_chain(skb))
1369
                                    return 0;
1370
                    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1371
                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1372
                               Cisco IOS <= 11.2(8)) do not put router alert
1373
                               option to IGMP packets destined to routable
1374
                               groups. It is very bad, because it means
1375
                               that we can forward NO IGMP messages.
1376
                             */
1377
                            read_lock(&mrt_lock);
1378
                            if (mroute_socket) {
1379
                                    nf_reset(skb);
1380
                                    raw_rcv(mroute_socket, skb);
1381
                                    read_unlock(&mrt_lock);
1382
                                    return 0;
1383
                            }
1384
                            read_unlock(&mrt_lock);
1385
                    }
1386
        }
1387
 
1388
        read_lock(&mrt_lock);
1389
        cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1390
 
1391
        /*
1392
         *      No usable cache entry
1393
         */
1394
        if (cache==NULL) {
1395
                int vif;
1396
 
1397
                if (local) {
1398
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1399
                        ip_local_deliver(skb);
1400
                        if (skb2 == NULL) {
1401
                                read_unlock(&mrt_lock);
1402
                                return -ENOBUFS;
1403
                        }
1404
                        skb = skb2;
1405
                }
1406
 
1407
                vif = ipmr_find_vif(skb->dev);
1408
                if (vif >= 0) {
1409
                        int err = ipmr_cache_unresolved(vif, skb);
1410
                        read_unlock(&mrt_lock);
1411
 
1412
                        return err;
1413
                }
1414
                read_unlock(&mrt_lock);
1415
                kfree_skb(skb);
1416
                return -ENODEV;
1417
        }
1418
 
1419
        ip_mr_forward(skb, cache, local);
1420
 
1421
        read_unlock(&mrt_lock);
1422
 
1423
        if (local)
1424
                return ip_local_deliver(skb);
1425
 
1426
        return 0;
1427
 
1428
dont_forward:
1429
        if (local)
1430
                return ip_local_deliver(skb);
1431
        kfree_skb(skb);
1432
        return 0;
1433
}
1434
 
1435
#ifdef CONFIG_IP_PIMSM_V1
1436
/*
1437
 * Handle IGMP messages of PIMv1
1438
 */
1439
 
1440
int pim_rcv_v1(struct sk_buff * skb)
1441
{
1442
        struct igmphdr *pim;
1443
        struct iphdr   *encap;
1444
        struct net_device  *reg_dev = NULL;
1445
 
1446
        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1447
                goto drop;
1448
 
1449
        pim = igmp_hdr(skb);
1450
 
1451
        if (!mroute_do_pim ||
1452
            skb->len < sizeof(*pim) + sizeof(*encap) ||
1453
            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1454
                goto drop;
1455
 
1456
        encap = (struct iphdr *)(skb_transport_header(skb) +
1457
                                 sizeof(struct igmphdr));
1458
        /*
1459
           Check that:
1460
           a. packet is really destinted to a multicast group
1461
           b. packet is not a NULL-REGISTER
1462
           c. packet is not truncated
1463
         */
1464
        if (!MULTICAST(encap->daddr) ||
1465
            encap->tot_len == 0 ||
1466
            ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1467
                goto drop;
1468
 
1469
        read_lock(&mrt_lock);
1470
        if (reg_vif_num >= 0)
1471
                reg_dev = vif_table[reg_vif_num].dev;
1472
        if (reg_dev)
1473
                dev_hold(reg_dev);
1474
        read_unlock(&mrt_lock);
1475
 
1476
        if (reg_dev == NULL)
1477
                goto drop;
1478
 
1479
        skb->mac_header = skb->network_header;
1480
        skb_pull(skb, (u8*)encap - skb->data);
1481
        skb_reset_network_header(skb);
1482
        skb->dev = reg_dev;
1483
        skb->protocol = htons(ETH_P_IP);
1484
        skb->ip_summed = 0;
1485
        skb->pkt_type = PACKET_HOST;
1486
        dst_release(skb->dst);
1487
        skb->dst = NULL;
1488
        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1489
        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1490
        nf_reset(skb);
1491
        netif_rx(skb);
1492
        dev_put(reg_dev);
1493
        return 0;
1494
 drop:
1495
        kfree_skb(skb);
1496
        return 0;
1497
}
1498
#endif
1499
 
1500
#ifdef CONFIG_IP_PIMSM_V2
1501
static int pim_rcv(struct sk_buff * skb)
1502
{
1503
        struct pimreghdr *pim;
1504
        struct iphdr   *encap;
1505
        struct net_device  *reg_dev = NULL;
1506
 
1507
        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1508
                goto drop;
1509
 
1510
        pim = (struct pimreghdr *)skb_transport_header(skb);
1511
        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1512
            (pim->flags&PIM_NULL_REGISTER) ||
1513
            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1514
             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1515
                goto drop;
1516
 
1517
        /* check if the inner packet is destined to mcast group */
1518
        encap = (struct iphdr *)(skb_transport_header(skb) +
1519
                                 sizeof(struct pimreghdr));
1520
        if (!MULTICAST(encap->daddr) ||
1521
            encap->tot_len == 0 ||
1522
            ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1523
                goto drop;
1524
 
1525
        read_lock(&mrt_lock);
1526
        if (reg_vif_num >= 0)
1527
                reg_dev = vif_table[reg_vif_num].dev;
1528
        if (reg_dev)
1529
                dev_hold(reg_dev);
1530
        read_unlock(&mrt_lock);
1531
 
1532
        if (reg_dev == NULL)
1533
                goto drop;
1534
 
1535
        skb->mac_header = skb->network_header;
1536
        skb_pull(skb, (u8*)encap - skb->data);
1537
        skb_reset_network_header(skb);
1538
        skb->dev = reg_dev;
1539
        skb->protocol = htons(ETH_P_IP);
1540
        skb->ip_summed = 0;
1541
        skb->pkt_type = PACKET_HOST;
1542
        dst_release(skb->dst);
1543
        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1544
        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1545
        skb->dst = NULL;
1546
        nf_reset(skb);
1547
        netif_rx(skb);
1548
        dev_put(reg_dev);
1549
        return 0;
1550
 drop:
1551
        kfree_skb(skb);
1552
        return 0;
1553
}
1554
#endif
1555
 
1556
static int
1557
ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1558
{
1559
        int ct;
1560
        struct rtnexthop *nhp;
1561
        struct net_device *dev = vif_table[c->mfc_parent].dev;
1562
        u8 *b = skb_tail_pointer(skb);
1563
        struct rtattr *mp_head;
1564
 
1565
        if (dev)
1566
                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1567
 
1568
        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1569
 
1570
        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1571
                if (c->mfc_un.res.ttls[ct] < 255) {
1572
                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1573
                                goto rtattr_failure;
1574
                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1575
                        nhp->rtnh_flags = 0;
1576
                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1577
                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1578
                        nhp->rtnh_len = sizeof(*nhp);
1579
                }
1580
        }
1581
        mp_head->rta_type = RTA_MULTIPATH;
1582
        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1583
        rtm->rtm_type = RTN_MULTICAST;
1584
        return 1;
1585
 
1586
rtattr_failure:
1587
        nlmsg_trim(skb, b);
1588
        return -EMSGSIZE;
1589
}
1590
 
1591
int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1592
{
1593
        int err;
1594
        struct mfc_cache *cache;
1595
        struct rtable *rt = (struct rtable*)skb->dst;
1596
 
1597
        read_lock(&mrt_lock);
1598
        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1599
 
1600
        if (cache==NULL) {
1601
                struct sk_buff *skb2;
1602
                struct iphdr *iph;
1603
                struct net_device *dev;
1604
                int vif;
1605
 
1606
                if (nowait) {
1607
                        read_unlock(&mrt_lock);
1608
                        return -EAGAIN;
1609
                }
1610
 
1611
                dev = skb->dev;
1612
                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1613
                        read_unlock(&mrt_lock);
1614
                        return -ENODEV;
1615
                }
1616
                skb2 = skb_clone(skb, GFP_ATOMIC);
1617
                if (!skb2) {
1618
                        read_unlock(&mrt_lock);
1619
                        return -ENOMEM;
1620
                }
1621
 
1622
                skb_push(skb2, sizeof(struct iphdr));
1623
                skb_reset_network_header(skb2);
1624
                iph = ip_hdr(skb2);
1625
                iph->ihl = sizeof(struct iphdr) >> 2;
1626
                iph->saddr = rt->rt_src;
1627
                iph->daddr = rt->rt_dst;
1628
                iph->version = 0;
1629
                err = ipmr_cache_unresolved(vif, skb2);
1630
                read_unlock(&mrt_lock);
1631
                return err;
1632
        }
1633
 
1634
        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1635
                cache->mfc_flags |= MFC_NOTIFY;
1636
        err = ipmr_fill_mroute(skb, cache, rtm);
1637
        read_unlock(&mrt_lock);
1638
        return err;
1639
}
1640
 
1641
#ifdef CONFIG_PROC_FS
1642
/*
1643
 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1644
 */
1645
struct ipmr_vif_iter {
1646
        int ct;
1647
};
1648
 
1649
static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1650
                                           loff_t pos)
1651
{
1652
        for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1653
                if (!VIF_EXISTS(iter->ct))
1654
                        continue;
1655
                if (pos-- == 0)
1656
                        return &vif_table[iter->ct];
1657
        }
1658
        return NULL;
1659
}
1660
 
1661
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1662
{
1663
        read_lock(&mrt_lock);
1664
        return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1665
                : SEQ_START_TOKEN;
1666
}
1667
 
1668
static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1669
{
1670
        struct ipmr_vif_iter *iter = seq->private;
1671
 
1672
        ++*pos;
1673
        if (v == SEQ_START_TOKEN)
1674
                return ipmr_vif_seq_idx(iter, 0);
1675
 
1676
        while (++iter->ct < maxvif) {
1677
                if (!VIF_EXISTS(iter->ct))
1678
                        continue;
1679
                return &vif_table[iter->ct];
1680
        }
1681
        return NULL;
1682
}
1683
 
1684
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1685
{
1686
        read_unlock(&mrt_lock);
1687
}
1688
 
1689
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1690
{
1691
        if (v == SEQ_START_TOKEN) {
1692
                seq_puts(seq,
1693
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1694
        } else {
1695
                const struct vif_device *vif = v;
1696
                const char *name =  vif->dev ? vif->dev->name : "none";
1697
 
1698
                seq_printf(seq,
1699
                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1700
                           vif - vif_table,
1701
                           name, vif->bytes_in, vif->pkt_in,
1702
                           vif->bytes_out, vif->pkt_out,
1703
                           vif->flags, vif->local, vif->remote);
1704
        }
1705
        return 0;
1706
}
1707
 
1708
static const struct seq_operations ipmr_vif_seq_ops = {
1709
        .start = ipmr_vif_seq_start,
1710
        .next  = ipmr_vif_seq_next,
1711
        .stop  = ipmr_vif_seq_stop,
1712
        .show  = ipmr_vif_seq_show,
1713
};
1714
 
1715
static int ipmr_vif_open(struct inode *inode, struct file *file)
1716
{
1717
        return seq_open_private(file, &ipmr_vif_seq_ops,
1718
                        sizeof(struct ipmr_vif_iter));
1719
}
1720
 
1721
static const struct file_operations ipmr_vif_fops = {
1722
        .owner   = THIS_MODULE,
1723
        .open    = ipmr_vif_open,
1724
        .read    = seq_read,
1725
        .llseek  = seq_lseek,
1726
        .release = seq_release_private,
1727
};
1728
 
1729
struct ipmr_mfc_iter {
1730
        struct mfc_cache **cache;
1731
        int ct;
1732
};
1733
 
1734
 
1735
static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1736
{
1737
        struct mfc_cache *mfc;
1738
 
1739
        it->cache = mfc_cache_array;
1740
        read_lock(&mrt_lock);
1741
        for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1742
                for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1743
                        if (pos-- == 0)
1744
                                return mfc;
1745
        read_unlock(&mrt_lock);
1746
 
1747
        it->cache = &mfc_unres_queue;
1748
        spin_lock_bh(&mfc_unres_lock);
1749
        for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1750
                if (pos-- == 0)
1751
                        return mfc;
1752
        spin_unlock_bh(&mfc_unres_lock);
1753
 
1754
        it->cache = NULL;
1755
        return NULL;
1756
}
1757
 
1758
 
1759
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1760
{
1761
        struct ipmr_mfc_iter *it = seq->private;
1762
        it->cache = NULL;
1763
        it->ct = 0;
1764
        return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1765
                : SEQ_START_TOKEN;
1766
}
1767
 
1768
static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1769
{
1770
        struct mfc_cache *mfc = v;
1771
        struct ipmr_mfc_iter *it = seq->private;
1772
 
1773
        ++*pos;
1774
 
1775
        if (v == SEQ_START_TOKEN)
1776
                return ipmr_mfc_seq_idx(seq->private, 0);
1777
 
1778
        if (mfc->next)
1779
                return mfc->next;
1780
 
1781
        if (it->cache == &mfc_unres_queue)
1782
                goto end_of_list;
1783
 
1784
        BUG_ON(it->cache != mfc_cache_array);
1785
 
1786
        while (++it->ct < MFC_LINES) {
1787
                mfc = mfc_cache_array[it->ct];
1788
                if (mfc)
1789
                        return mfc;
1790
        }
1791
 
1792
        /* exhausted cache_array, show unresolved */
1793
        read_unlock(&mrt_lock);
1794
        it->cache = &mfc_unres_queue;
1795
        it->ct = 0;
1796
 
1797
        spin_lock_bh(&mfc_unres_lock);
1798
        mfc = mfc_unres_queue;
1799
        if (mfc)
1800
                return mfc;
1801
 
1802
 end_of_list:
1803
        spin_unlock_bh(&mfc_unres_lock);
1804
        it->cache = NULL;
1805
 
1806
        return NULL;
1807
}
1808
 
1809
static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1810
{
1811
        struct ipmr_mfc_iter *it = seq->private;
1812
 
1813
        if (it->cache == &mfc_unres_queue)
1814
                spin_unlock_bh(&mfc_unres_lock);
1815
        else if (it->cache == mfc_cache_array)
1816
                read_unlock(&mrt_lock);
1817
}
1818
 
1819
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1820
{
1821
        int n;
1822
 
1823
        if (v == SEQ_START_TOKEN) {
1824
                seq_puts(seq,
1825
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1826
        } else {
1827
                const struct mfc_cache *mfc = v;
1828
                const struct ipmr_mfc_iter *it = seq->private;
1829
 
1830
                seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1831
                           (unsigned long) mfc->mfc_mcastgrp,
1832
                           (unsigned long) mfc->mfc_origin,
1833
                           mfc->mfc_parent,
1834
                           mfc->mfc_un.res.pkt,
1835
                           mfc->mfc_un.res.bytes,
1836
                           mfc->mfc_un.res.wrong_if);
1837
 
1838
                if (it->cache != &mfc_unres_queue) {
1839
                        for (n = mfc->mfc_un.res.minvif;
1840
                             n < mfc->mfc_un.res.maxvif; n++ ) {
1841
                                if (VIF_EXISTS(n)
1842
                                   && mfc->mfc_un.res.ttls[n] < 255)
1843
                                seq_printf(seq,
1844
                                           " %2d:%-3d",
1845
                                           n, mfc->mfc_un.res.ttls[n]);
1846
                        }
1847
                }
1848
                seq_putc(seq, '\n');
1849
        }
1850
        return 0;
1851
}
1852
 
1853
static const struct seq_operations ipmr_mfc_seq_ops = {
1854
        .start = ipmr_mfc_seq_start,
1855
        .next  = ipmr_mfc_seq_next,
1856
        .stop  = ipmr_mfc_seq_stop,
1857
        .show  = ipmr_mfc_seq_show,
1858
};
1859
 
1860
static int ipmr_mfc_open(struct inode *inode, struct file *file)
1861
{
1862
        return seq_open_private(file, &ipmr_mfc_seq_ops,
1863
                        sizeof(struct ipmr_mfc_iter));
1864
}
1865
 
1866
static const struct file_operations ipmr_mfc_fops = {
1867
        .owner   = THIS_MODULE,
1868
        .open    = ipmr_mfc_open,
1869
        .read    = seq_read,
1870
        .llseek  = seq_lseek,
1871
        .release = seq_release_private,
1872
};
1873
#endif
1874
 
1875
#ifdef CONFIG_IP_PIMSM_V2
1876
static struct net_protocol pim_protocol = {
1877
        .handler        =       pim_rcv,
1878
};
1879
#endif
1880
 
1881
 
1882
/*
1883
 *      Setup for IP multicast routing
1884
 */
1885
 
1886
void __init ip_mr_init(void)
1887
{
1888
        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1889
                                       sizeof(struct mfc_cache),
1890
                                       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1891
                                       NULL);
1892
        init_timer(&ipmr_expire_timer);
1893
        ipmr_expire_timer.function=ipmr_expire_process;
1894
        register_netdevice_notifier(&ip_mr_notifier);
1895
#ifdef CONFIG_PROC_FS
1896
        proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
1897
        proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
1898
#endif
1899
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.