OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [net/] [sched/] [sch_api.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * net/sched/sch_api.c  Packet scheduler API.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 *
11
 * Fixes:
12
 *
13
 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15
 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16
 */
17
 
18
#include <linux/module.h>
19
#include <linux/types.h>
20
#include <linux/kernel.h>
21
#include <linux/string.h>
22
#include <linux/errno.h>
23
#include <linux/skbuff.h>
24
#include <linux/init.h>
25
#include <linux/proc_fs.h>
26
#include <linux/seq_file.h>
27
#include <linux/kmod.h>
28
#include <linux/list.h>
29
#include <linux/hrtimer.h>
30
 
31
#include <net/net_namespace.h>
32
#include <net/netlink.h>
33
#include <net/pkt_sched.h>
34
 
35
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
36
                        struct Qdisc *old, struct Qdisc *new);
37
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
38
                         struct Qdisc *q, unsigned long cl, int event);
39
 
40
/*
41
 
42
   Short review.
43
   -------------
44
 
45
   This file consists of two interrelated parts:
46
 
47
   1. queueing disciplines manager frontend.
48
   2. traffic classes manager frontend.
49
 
50
   Generally, queueing discipline ("qdisc") is a black box,
51
   which is able to enqueue packets and to dequeue them (when
52
   device is ready to send something) in order and at times
53
   determined by algorithm hidden in it.
54
 
55
   qdisc's are divided to two categories:
56
   - "queues", which have no internal structure visible from outside.
57
   - "schedulers", which split all the packets to "traffic classes",
58
     using "packet classifiers" (look at cls_api.c)
59
 
60
   In turn, classes may have child qdiscs (as rule, queues)
61
   attached to them etc. etc. etc.
62
 
63
   The goal of the routines in this file is to translate
64
   information supplied by user in the form of handles
65
   to more intelligible for kernel form, to make some sanity
66
   checks and part of work, which is common to all qdiscs
67
   and to provide rtnetlink notifications.
68
 
69
   All real intelligent work is done inside qdisc modules.
70
 
71
 
72
 
73
   Every discipline has two major routines: enqueue and dequeue.
74
 
75
   ---dequeue
76
 
77
   dequeue usually returns a skb to send. It is allowed to return NULL,
78
   but it does not mean that queue is empty, it just means that
79
   discipline does not want to send anything this time.
80
   Queue is really empty if q->q.qlen == 0.
81
   For complicated disciplines with multiple queues q->q is not
82
   real packet queue, but however q->q.qlen must be valid.
83
 
84
   ---enqueue
85
 
86
   enqueue returns 0, if packet was enqueued successfully.
87
   If packet (this one or another one) was dropped, it returns
88
   not zero error code.
89
   NET_XMIT_DROP        - this packet dropped
90
     Expected action: do not backoff, but wait until queue will clear.
91
   NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
92
     Expected action: backoff or ignore
93
   NET_XMIT_POLICED     - dropped by police.
94
     Expected action: backoff or error to real-time apps.
95
 
96
   Auxiliary routines:
97
 
98
   ---requeue
99
 
100
   requeues once dequeued packet. It is used for non-standard or
101
   just buggy devices, which can defer output even if dev->tbusy=0.
102
 
103
   ---reset
104
 
105
   returns qdisc to initial state: purge all buffers, clear all
106
   timers, counters (except for statistics) etc.
107
 
108
   ---init
109
 
110
   initializes newly created qdisc.
111
 
112
   ---destroy
113
 
114
   destroys resources allocated by init and during lifetime of qdisc.
115
 
116
   ---change
117
 
118
   changes qdisc parameters.
119
 */
120
 
121
/* Protects list of registered TC modules. It is pure SMP lock. */
122
static DEFINE_RWLOCK(qdisc_mod_lock);
123
 
124
 
125
/************************************************
126
 *      Queueing disciplines manipulation.      *
127
 ************************************************/
128
 
129
 
130
/* The list of all installed queueing disciplines. */
131
 
132
static struct Qdisc_ops *qdisc_base;
133
 
134
/* Register/uregister queueing discipline */
135
 
136
int register_qdisc(struct Qdisc_ops *qops)
137
{
138
        struct Qdisc_ops *q, **qp;
139
        int rc = -EEXIST;
140
 
141
        write_lock(&qdisc_mod_lock);
142
        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
143
                if (!strcmp(qops->id, q->id))
144
                        goto out;
145
 
146
        if (qops->enqueue == NULL)
147
                qops->enqueue = noop_qdisc_ops.enqueue;
148
        if (qops->requeue == NULL)
149
                qops->requeue = noop_qdisc_ops.requeue;
150
        if (qops->dequeue == NULL)
151
                qops->dequeue = noop_qdisc_ops.dequeue;
152
 
153
        qops->next = NULL;
154
        *qp = qops;
155
        rc = 0;
156
out:
157
        write_unlock(&qdisc_mod_lock);
158
        return rc;
159
}
160
 
161
int unregister_qdisc(struct Qdisc_ops *qops)
162
{
163
        struct Qdisc_ops *q, **qp;
164
        int err = -ENOENT;
165
 
166
        write_lock(&qdisc_mod_lock);
167
        for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
168
                if (q == qops)
169
                        break;
170
        if (q) {
171
                *qp = q->next;
172
                q->next = NULL;
173
                err = 0;
174
        }
175
        write_unlock(&qdisc_mod_lock);
176
        return err;
177
}
178
 
179
/* We know handle. Find qdisc among all qdisc's attached to device
180
   (root qdisc, all its children, children of children etc.)
181
 */
182
 
183
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
184
{
185
        struct Qdisc *q;
186
 
187
        list_for_each_entry(q, &dev->qdisc_list, list) {
188
                if (q->handle == handle)
189
                        return q;
190
        }
191
        return NULL;
192
}
193
 
194
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
195
{
196
        unsigned long cl;
197
        struct Qdisc *leaf;
198
        struct Qdisc_class_ops *cops = p->ops->cl_ops;
199
 
200
        if (cops == NULL)
201
                return NULL;
202
        cl = cops->get(p, classid);
203
 
204
        if (cl == 0)
205
                return NULL;
206
        leaf = cops->leaf(p, cl);
207
        cops->put(p, cl);
208
        return leaf;
209
}
210
 
211
/* Find queueing discipline by name */
212
 
213
static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
214
{
215
        struct Qdisc_ops *q = NULL;
216
 
217
        if (kind) {
218
                read_lock(&qdisc_mod_lock);
219
                for (q = qdisc_base; q; q = q->next) {
220
                        if (rtattr_strcmp(kind, q->id) == 0) {
221
                                if (!try_module_get(q->owner))
222
                                        q = NULL;
223
                                break;
224
                        }
225
                }
226
                read_unlock(&qdisc_mod_lock);
227
        }
228
        return q;
229
}
230
 
231
static struct qdisc_rate_table *qdisc_rtab_list;
232
 
233
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
234
{
235
        struct qdisc_rate_table *rtab;
236
 
237
        for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
238
                if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
239
                        rtab->refcnt++;
240
                        return rtab;
241
                }
242
        }
243
 
244
        if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
245
                return NULL;
246
 
247
        rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
248
        if (rtab) {
249
                rtab->rate = *r;
250
                rtab->refcnt = 1;
251
                memcpy(rtab->data, RTA_DATA(tab), 1024);
252
                rtab->next = qdisc_rtab_list;
253
                qdisc_rtab_list = rtab;
254
        }
255
        return rtab;
256
}
257
 
258
void qdisc_put_rtab(struct qdisc_rate_table *tab)
259
{
260
        struct qdisc_rate_table *rtab, **rtabp;
261
 
262
        if (!tab || --tab->refcnt)
263
                return;
264
 
265
        for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
266
                if (rtab == tab) {
267
                        *rtabp = rtab->next;
268
                        kfree(rtab);
269
                        return;
270
                }
271
        }
272
}
273
 
274
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
275
{
276
        struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
277
                                                 timer);
278
        struct net_device *dev = wd->qdisc->dev;
279
 
280
        wd->qdisc->flags &= ~TCQ_F_THROTTLED;
281
        smp_wmb();
282
        netif_schedule(dev);
283
 
284
        return HRTIMER_NORESTART;
285
}
286
 
287
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
288
{
289
        hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
290
        wd->timer.function = qdisc_watchdog;
291
        wd->qdisc = qdisc;
292
}
293
EXPORT_SYMBOL(qdisc_watchdog_init);
294
 
295
void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
296
{
297
        ktime_t time;
298
 
299
        wd->qdisc->flags |= TCQ_F_THROTTLED;
300
        time = ktime_set(0, 0);
301
        time = ktime_add_ns(time, PSCHED_US2NS(expires));
302
        hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
303
}
304
EXPORT_SYMBOL(qdisc_watchdog_schedule);
305
 
306
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
307
{
308
        hrtimer_cancel(&wd->timer);
309
        wd->qdisc->flags &= ~TCQ_F_THROTTLED;
310
}
311
EXPORT_SYMBOL(qdisc_watchdog_cancel);
312
 
313
/* Allocate an unique handle from space managed by kernel */
314
 
315
static u32 qdisc_alloc_handle(struct net_device *dev)
316
{
317
        int i = 0x10000;
318
        static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
319
 
320
        do {
321
                autohandle += TC_H_MAKE(0x10000U, 0);
322
                if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
323
                        autohandle = TC_H_MAKE(0x80000000U, 0);
324
        } while (qdisc_lookup(dev, autohandle) && --i > 0);
325
 
326
        return i>0 ? autohandle : 0;
327
}
328
 
329
/* Attach toplevel qdisc to device dev */
330
 
331
static struct Qdisc *
332
dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
333
{
334
        struct Qdisc *oqdisc;
335
 
336
        if (dev->flags & IFF_UP)
337
                dev_deactivate(dev);
338
 
339
        qdisc_lock_tree(dev);
340
        if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
341
                oqdisc = dev->qdisc_ingress;
342
                /* Prune old scheduler */
343
                if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
344
                        /* delete */
345
                        qdisc_reset(oqdisc);
346
                        dev->qdisc_ingress = NULL;
347
                } else {  /* new */
348
                        dev->qdisc_ingress = qdisc;
349
                }
350
 
351
        } else {
352
 
353
                oqdisc = dev->qdisc_sleeping;
354
 
355
                /* Prune old scheduler */
356
                if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
357
                        qdisc_reset(oqdisc);
358
 
359
                /* ... and graft new one */
360
                if (qdisc == NULL)
361
                        qdisc = &noop_qdisc;
362
                dev->qdisc_sleeping = qdisc;
363
                dev->qdisc = &noop_qdisc;
364
        }
365
 
366
        qdisc_unlock_tree(dev);
367
 
368
        if (dev->flags & IFF_UP)
369
                dev_activate(dev);
370
 
371
        return oqdisc;
372
}
373
 
374
void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
375
{
376
        struct Qdisc_class_ops *cops;
377
        unsigned long cl;
378
        u32 parentid;
379
 
380
        if (n == 0)
381
                return;
382
        while ((parentid = sch->parent)) {
383
                sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
384
                if (sch == NULL) {
385
                        WARN_ON(parentid != TC_H_ROOT);
386
                        return;
387
                }
388
                cops = sch->ops->cl_ops;
389
                if (cops->qlen_notify) {
390
                        cl = cops->get(sch, parentid);
391
                        cops->qlen_notify(sch, cl);
392
                        cops->put(sch, cl);
393
                }
394
                sch->q.qlen -= n;
395
        }
396
}
397
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
398
 
399
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
400
   to device "dev".
401
 
402
   Old qdisc is not destroyed but returned in *old.
403
 */
404
 
405
static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
406
                       u32 classid,
407
                       struct Qdisc *new, struct Qdisc **old)
408
{
409
        int err = 0;
410
        struct Qdisc *q = *old;
411
 
412
 
413
        if (parent == NULL) {
414
                if (q && q->flags&TCQ_F_INGRESS) {
415
                        *old = dev_graft_qdisc(dev, q);
416
                } else {
417
                        *old = dev_graft_qdisc(dev, new);
418
                }
419
        } else {
420
                struct Qdisc_class_ops *cops = parent->ops->cl_ops;
421
 
422
                err = -EINVAL;
423
 
424
                if (cops) {
425
                        unsigned long cl = cops->get(parent, classid);
426
                        if (cl) {
427
                                err = cops->graft(parent, cl, new, old);
428
                                cops->put(parent, cl);
429
                        }
430
                }
431
        }
432
        return err;
433
}
434
 
435
/*
436
   Allocate and initialize new qdisc.
437
 
438
   Parameters are passed via opt.
439
 */
440
 
441
static struct Qdisc *
442
qdisc_create(struct net_device *dev, u32 parent, u32 handle,
443
           struct rtattr **tca, int *errp)
444
{
445
        int err;
446
        struct rtattr *kind = tca[TCA_KIND-1];
447
        struct Qdisc *sch;
448
        struct Qdisc_ops *ops;
449
 
450
        ops = qdisc_lookup_ops(kind);
451
#ifdef CONFIG_KMOD
452
        if (ops == NULL && kind != NULL) {
453
                char name[IFNAMSIZ];
454
                if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
455
                        /* We dropped the RTNL semaphore in order to
456
                         * perform the module load.  So, even if we
457
                         * succeeded in loading the module we have to
458
                         * tell the caller to replay the request.  We
459
                         * indicate this using -EAGAIN.
460
                         * We replay the request because the device may
461
                         * go away in the mean time.
462
                         */
463
                        rtnl_unlock();
464
                        request_module("sch_%s", name);
465
                        rtnl_lock();
466
                        ops = qdisc_lookup_ops(kind);
467
                        if (ops != NULL) {
468
                                /* We will try again qdisc_lookup_ops,
469
                                 * so don't keep a reference.
470
                                 */
471
                                module_put(ops->owner);
472
                                err = -EAGAIN;
473
                                goto err_out;
474
                        }
475
                }
476
        }
477
#endif
478
 
479
        err = -ENOENT;
480
        if (ops == NULL)
481
                goto err_out;
482
 
483
        sch = qdisc_alloc(dev, ops);
484
        if (IS_ERR(sch)) {
485
                err = PTR_ERR(sch);
486
                goto err_out2;
487
        }
488
 
489
        sch->parent = parent;
490
 
491
        if (handle == TC_H_INGRESS) {
492
                sch->flags |= TCQ_F_INGRESS;
493
                sch->stats_lock = &dev->ingress_lock;
494
                handle = TC_H_MAKE(TC_H_INGRESS, 0);
495
        } else {
496
                sch->stats_lock = &dev->queue_lock;
497
                if (handle == 0) {
498
                        handle = qdisc_alloc_handle(dev);
499
                        err = -ENOMEM;
500
                        if (handle == 0)
501
                                goto err_out3;
502
                }
503
        }
504
 
505
        sch->handle = handle;
506
 
507
        if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
508
                if (tca[TCA_RATE-1]) {
509
                        err = gen_new_estimator(&sch->bstats, &sch->rate_est,
510
                                                sch->stats_lock,
511
                                                tca[TCA_RATE-1]);
512
                        if (err) {
513
                                /*
514
                                 * Any broken qdiscs that would require
515
                                 * a ops->reset() here? The qdisc was never
516
                                 * in action so it shouldn't be necessary.
517
                                 */
518
                                if (ops->destroy)
519
                                        ops->destroy(sch);
520
                                goto err_out3;
521
                        }
522
                }
523
                qdisc_lock_tree(dev);
524
                list_add_tail(&sch->list, &dev->qdisc_list);
525
                qdisc_unlock_tree(dev);
526
 
527
                return sch;
528
        }
529
err_out3:
530
        dev_put(dev);
531
        kfree((char *) sch - sch->padded);
532
err_out2:
533
        module_put(ops->owner);
534
err_out:
535
        *errp = err;
536
        return NULL;
537
}
538
 
539
static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
540
{
541
        if (tca[TCA_OPTIONS-1]) {
542
                int err;
543
 
544
                if (sch->ops->change == NULL)
545
                        return -EINVAL;
546
                err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
547
                if (err)
548
                        return err;
549
        }
550
        if (tca[TCA_RATE-1])
551
                gen_replace_estimator(&sch->bstats, &sch->rate_est,
552
                        sch->stats_lock, tca[TCA_RATE-1]);
553
        return 0;
554
}
555
 
556
struct check_loop_arg
557
{
558
        struct qdisc_walker     w;
559
        struct Qdisc            *p;
560
        int                     depth;
561
};
562
 
563
static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
564
 
565
static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
566
{
567
        struct check_loop_arg   arg;
568
 
569
        if (q->ops->cl_ops == NULL)
570
                return 0;
571
 
572
        arg.w.stop = arg.w.skip = arg.w.count = 0;
573
        arg.w.fn = check_loop_fn;
574
        arg.depth = depth;
575
        arg.p = p;
576
        q->ops->cl_ops->walk(q, &arg.w);
577
        return arg.w.stop ? -ELOOP : 0;
578
}
579
 
580
static int
581
check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
582
{
583
        struct Qdisc *leaf;
584
        struct Qdisc_class_ops *cops = q->ops->cl_ops;
585
        struct check_loop_arg *arg = (struct check_loop_arg *)w;
586
 
587
        leaf = cops->leaf(q, cl);
588
        if (leaf) {
589
                if (leaf == arg->p || arg->depth > 7)
590
                        return -ELOOP;
591
                return check_loop(leaf, arg->p, arg->depth + 1);
592
        }
593
        return 0;
594
}
595
 
596
/*
597
 * Delete/get qdisc.
598
 */
599
 
600
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
601
{
602
        struct tcmsg *tcm = NLMSG_DATA(n);
603
        struct rtattr **tca = arg;
604
        struct net_device *dev;
605
        u32 clid = tcm->tcm_parent;
606
        struct Qdisc *q = NULL;
607
        struct Qdisc *p = NULL;
608
        int err;
609
 
610
        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
611
                return -ENODEV;
612
 
613
        if (clid) {
614
                if (clid != TC_H_ROOT) {
615
                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
616
                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
617
                                        return -ENOENT;
618
                                q = qdisc_leaf(p, clid);
619
                        } else { /* ingress */
620
                                q = dev->qdisc_ingress;
621
                        }
622
                } else {
623
                        q = dev->qdisc_sleeping;
624
                }
625
                if (!q)
626
                        return -ENOENT;
627
 
628
                if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
629
                        return -EINVAL;
630
        } else {
631
                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
632
                        return -ENOENT;
633
        }
634
 
635
        if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
636
                return -EINVAL;
637
 
638
        if (n->nlmsg_type == RTM_DELQDISC) {
639
                if (!clid)
640
                        return -EINVAL;
641
                if (q->handle == 0)
642
                        return -ENOENT;
643
                if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
644
                        return err;
645
                if (q) {
646
                        qdisc_notify(skb, n, clid, q, NULL);
647
                        qdisc_lock_tree(dev);
648
                        qdisc_destroy(q);
649
                        qdisc_unlock_tree(dev);
650
                }
651
        } else {
652
                qdisc_notify(skb, n, clid, NULL, q);
653
        }
654
        return 0;
655
}
656
 
657
/*
658
   Create/change qdisc.
659
 */
660
 
661
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
662
{
663
        struct tcmsg *tcm;
664
        struct rtattr **tca;
665
        struct net_device *dev;
666
        u32 clid;
667
        struct Qdisc *q, *p;
668
        int err;
669
 
670
replay:
671
        /* Reinit, just in case something touches this. */
672
        tcm = NLMSG_DATA(n);
673
        tca = arg;
674
        clid = tcm->tcm_parent;
675
        q = p = NULL;
676
 
677
        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
678
                return -ENODEV;
679
 
680
        if (clid) {
681
                if (clid != TC_H_ROOT) {
682
                        if (clid != TC_H_INGRESS) {
683
                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
684
                                        return -ENOENT;
685
                                q = qdisc_leaf(p, clid);
686
                        } else { /*ingress */
687
                                q = dev->qdisc_ingress;
688
                        }
689
                } else {
690
                        q = dev->qdisc_sleeping;
691
                }
692
 
693
                /* It may be default qdisc, ignore it */
694
                if (q && q->handle == 0)
695
                        q = NULL;
696
 
697
                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
698
                        if (tcm->tcm_handle) {
699
                                if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
700
                                        return -EEXIST;
701
                                if (TC_H_MIN(tcm->tcm_handle))
702
                                        return -EINVAL;
703
                                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
704
                                        goto create_n_graft;
705
                                if (n->nlmsg_flags&NLM_F_EXCL)
706
                                        return -EEXIST;
707
                                if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
708
                                        return -EINVAL;
709
                                if (q == p ||
710
                                    (p && check_loop(q, p, 0)))
711
                                        return -ELOOP;
712
                                atomic_inc(&q->refcnt);
713
                                goto graft;
714
                        } else {
715
                                if (q == NULL)
716
                                        goto create_n_graft;
717
 
718
                                /* This magic test requires explanation.
719
                                 *
720
                                 *   We know, that some child q is already
721
                                 *   attached to this parent and have choice:
722
                                 *   either to change it or to create/graft new one.
723
                                 *
724
                                 *   1. We are allowed to create/graft only
725
                                 *   if CREATE and REPLACE flags are set.
726
                                 *
727
                                 *   2. If EXCL is set, requestor wanted to say,
728
                                 *   that qdisc tcm_handle is not expected
729
                                 *   to exist, so that we choose create/graft too.
730
                                 *
731
                                 *   3. The last case is when no flags are set.
732
                                 *   Alas, it is sort of hole in API, we
733
                                 *   cannot decide what to do unambiguously.
734
                                 *   For now we select create/graft, if
735
                                 *   user gave KIND, which does not match existing.
736
                                 */
737
                                if ((n->nlmsg_flags&NLM_F_CREATE) &&
738
                                    (n->nlmsg_flags&NLM_F_REPLACE) &&
739
                                    ((n->nlmsg_flags&NLM_F_EXCL) ||
740
                                     (tca[TCA_KIND-1] &&
741
                                      rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
742
                                        goto create_n_graft;
743
                        }
744
                }
745
        } else {
746
                if (!tcm->tcm_handle)
747
                        return -EINVAL;
748
                q = qdisc_lookup(dev, tcm->tcm_handle);
749
        }
750
 
751
        /* Change qdisc parameters */
752
        if (q == NULL)
753
                return -ENOENT;
754
        if (n->nlmsg_flags&NLM_F_EXCL)
755
                return -EEXIST;
756
        if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
757
                return -EINVAL;
758
        err = qdisc_change(q, tca);
759
        if (err == 0)
760
                qdisc_notify(skb, n, clid, NULL, q);
761
        return err;
762
 
763
create_n_graft:
764
        if (!(n->nlmsg_flags&NLM_F_CREATE))
765
                return -ENOENT;
766
        if (clid == TC_H_INGRESS)
767
                q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
768
                                 tca, &err);
769
        else
770
                q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
771
                                 tca, &err);
772
        if (q == NULL) {
773
                if (err == -EAGAIN)
774
                        goto replay;
775
                return err;
776
        }
777
 
778
graft:
779
        if (1) {
780
                struct Qdisc *old_q = NULL;
781
                err = qdisc_graft(dev, p, clid, q, &old_q);
782
                if (err) {
783
                        if (q) {
784
                                qdisc_lock_tree(dev);
785
                                qdisc_destroy(q);
786
                                qdisc_unlock_tree(dev);
787
                        }
788
                        return err;
789
                }
790
                qdisc_notify(skb, n, clid, old_q, q);
791
                if (old_q) {
792
                        qdisc_lock_tree(dev);
793
                        qdisc_destroy(old_q);
794
                        qdisc_unlock_tree(dev);
795
                }
796
        }
797
        return 0;
798
}
799
 
800
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
801
                         u32 pid, u32 seq, u16 flags, int event)
802
{
803
        struct tcmsg *tcm;
804
        struct nlmsghdr  *nlh;
805
        unsigned char *b = skb_tail_pointer(skb);
806
        struct gnet_dump d;
807
 
808
        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
809
        tcm = NLMSG_DATA(nlh);
810
        tcm->tcm_family = AF_UNSPEC;
811
        tcm->tcm__pad1 = 0;
812
        tcm->tcm__pad2 = 0;
813
        tcm->tcm_ifindex = q->dev->ifindex;
814
        tcm->tcm_parent = clid;
815
        tcm->tcm_handle = q->handle;
816
        tcm->tcm_info = atomic_read(&q->refcnt);
817
        RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
818
        if (q->ops->dump && q->ops->dump(q, skb) < 0)
819
                goto rtattr_failure;
820
        q->qstats.qlen = q->q.qlen;
821
 
822
        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
823
                        TCA_XSTATS, q->stats_lock, &d) < 0)
824
                goto rtattr_failure;
825
 
826
        if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
827
                goto rtattr_failure;
828
 
829
        if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
830
            gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
831
            gnet_stats_copy_queue(&d, &q->qstats) < 0)
832
                goto rtattr_failure;
833
 
834
        if (gnet_stats_finish_copy(&d) < 0)
835
                goto rtattr_failure;
836
 
837
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
838
        return skb->len;
839
 
840
nlmsg_failure:
841
rtattr_failure:
842
        nlmsg_trim(skb, b);
843
        return -1;
844
}
845
 
846
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
847
                        u32 clid, struct Qdisc *old, struct Qdisc *new)
848
{
849
        struct sk_buff *skb;
850
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
851
 
852
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
853
        if (!skb)
854
                return -ENOBUFS;
855
 
856
        if (old && old->handle) {
857
                if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
858
                        goto err_out;
859
        }
860
        if (new) {
861
                if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
862
                        goto err_out;
863
        }
864
 
865
        if (skb->len)
866
                return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
867
 
868
err_out:
869
        kfree_skb(skb);
870
        return -EINVAL;
871
}
872
 
873
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
874
{
875
        int idx, q_idx;
876
        int s_idx, s_q_idx;
877
        struct net_device *dev;
878
        struct Qdisc *q;
879
 
880
        s_idx = cb->args[0];
881
        s_q_idx = q_idx = cb->args[1];
882
        read_lock(&dev_base_lock);
883
        idx = 0;
884
        for_each_netdev(&init_net, dev) {
885
                if (idx < s_idx)
886
                        goto cont;
887
                if (idx > s_idx)
888
                        s_q_idx = 0;
889
                q_idx = 0;
890
                list_for_each_entry(q, &dev->qdisc_list, list) {
891
                        if (q_idx < s_q_idx) {
892
                                q_idx++;
893
                                continue;
894
                        }
895
                        if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
896
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
897
                                goto done;
898
                        q_idx++;
899
                }
900
cont:
901
                idx++;
902
        }
903
 
904
done:
905
        read_unlock(&dev_base_lock);
906
 
907
        cb->args[0] = idx;
908
        cb->args[1] = q_idx;
909
 
910
        return skb->len;
911
}
912
 
913
 
914
 
915
/************************************************
916
 *      Traffic classes manipulation.           *
917
 ************************************************/
918
 
919
 
920
 
921
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
922
{
923
        struct tcmsg *tcm = NLMSG_DATA(n);
924
        struct rtattr **tca = arg;
925
        struct net_device *dev;
926
        struct Qdisc *q = NULL;
927
        struct Qdisc_class_ops *cops;
928
        unsigned long cl = 0;
929
        unsigned long new_cl;
930
        u32 pid = tcm->tcm_parent;
931
        u32 clid = tcm->tcm_handle;
932
        u32 qid = TC_H_MAJ(clid);
933
        int err;
934
 
935
        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
936
                return -ENODEV;
937
 
938
        /*
939
           parent == TC_H_UNSPEC - unspecified parent.
940
           parent == TC_H_ROOT   - class is root, which has no parent.
941
           parent == X:0         - parent is root class.
942
           parent == X:Y         - parent is a node in hierarchy.
943
           parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
944
 
945
           handle == 0:0         - generate handle from kernel pool.
946
           handle == 0:Y         - class is X:Y, where X:0 is qdisc.
947
           handle == X:Y         - clear.
948
           handle == X:0         - root class.
949
         */
950
 
951
        /* Step 1. Determine qdisc handle X:0 */
952
 
953
        if (pid != TC_H_ROOT) {
954
                u32 qid1 = TC_H_MAJ(pid);
955
 
956
                if (qid && qid1) {
957
                        /* If both majors are known, they must be identical. */
958
                        if (qid != qid1)
959
                                return -EINVAL;
960
                } else if (qid1) {
961
                        qid = qid1;
962
                } else if (qid == 0)
963
                        qid = dev->qdisc_sleeping->handle;
964
 
965
                /* Now qid is genuine qdisc handle consistent
966
                   both with parent and child.
967
 
968
                   TC_H_MAJ(pid) still may be unspecified, complete it now.
969
                 */
970
                if (pid)
971
                        pid = TC_H_MAKE(qid, pid);
972
        } else {
973
                if (qid == 0)
974
                        qid = dev->qdisc_sleeping->handle;
975
        }
976
 
977
        /* OK. Locate qdisc */
978
        if ((q = qdisc_lookup(dev, qid)) == NULL)
979
                return -ENOENT;
980
 
981
        /* An check that it supports classes */
982
        cops = q->ops->cl_ops;
983
        if (cops == NULL)
984
                return -EINVAL;
985
 
986
        /* Now try to get class */
987
        if (clid == 0) {
988
                if (pid == TC_H_ROOT)
989
                        clid = qid;
990
        } else
991
                clid = TC_H_MAKE(qid, clid);
992
 
993
        if (clid)
994
                cl = cops->get(q, clid);
995
 
996
        if (cl == 0) {
997
                err = -ENOENT;
998
                if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
999
                        goto out;
1000
        } else {
1001
                switch (n->nlmsg_type) {
1002
                case RTM_NEWTCLASS:
1003
                        err = -EEXIST;
1004
                        if (n->nlmsg_flags&NLM_F_EXCL)
1005
                                goto out;
1006
                        break;
1007
                case RTM_DELTCLASS:
1008
                        err = cops->delete(q, cl);
1009
                        if (err == 0)
1010
                                tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1011
                        goto out;
1012
                case RTM_GETTCLASS:
1013
                        err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1014
                        goto out;
1015
                default:
1016
                        err = -EINVAL;
1017
                        goto out;
1018
                }
1019
        }
1020
 
1021
        new_cl = cl;
1022
        err = cops->change(q, clid, pid, tca, &new_cl);
1023
        if (err == 0)
1024
                tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1025
 
1026
out:
1027
        if (cl)
1028
                cops->put(q, cl);
1029
 
1030
        return err;
1031
}
1032
 
1033
 
1034
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1035
                          unsigned long cl,
1036
                          u32 pid, u32 seq, u16 flags, int event)
1037
{
1038
        struct tcmsg *tcm;
1039
        struct nlmsghdr  *nlh;
1040
        unsigned char *b = skb_tail_pointer(skb);
1041
        struct gnet_dump d;
1042
        struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1043
 
1044
        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1045
        tcm = NLMSG_DATA(nlh);
1046
        tcm->tcm_family = AF_UNSPEC;
1047
        tcm->tcm_ifindex = q->dev->ifindex;
1048
        tcm->tcm_parent = q->handle;
1049
        tcm->tcm_handle = q->handle;
1050
        tcm->tcm_info = 0;
1051
        RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
1052
        if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1053
                goto rtattr_failure;
1054
 
1055
        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1056
                        TCA_XSTATS, q->stats_lock, &d) < 0)
1057
                goto rtattr_failure;
1058
 
1059
        if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1060
                goto rtattr_failure;
1061
 
1062
        if (gnet_stats_finish_copy(&d) < 0)
1063
                goto rtattr_failure;
1064
 
1065
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1066
        return skb->len;
1067
 
1068
nlmsg_failure:
1069
rtattr_failure:
1070
        nlmsg_trim(skb, b);
1071
        return -1;
1072
}
1073
 
1074
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1075
                          struct Qdisc *q, unsigned long cl, int event)
1076
{
1077
        struct sk_buff *skb;
1078
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1079
 
1080
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1081
        if (!skb)
1082
                return -ENOBUFS;
1083
 
1084
        if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1085
                kfree_skb(skb);
1086
                return -EINVAL;
1087
        }
1088
 
1089
        return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1090
}
1091
 
1092
struct qdisc_dump_args
1093
{
1094
        struct qdisc_walker w;
1095
        struct sk_buff *skb;
1096
        struct netlink_callback *cb;
1097
};
1098
 
1099
static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1100
{
1101
        struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1102
 
1103
        return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1104
                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1105
}
1106
 
1107
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1108
{
1109
        int t;
1110
        int s_t;
1111
        struct net_device *dev;
1112
        struct Qdisc *q;
1113
        struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1114
        struct qdisc_dump_args arg;
1115
 
1116
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1117
                return 0;
1118
        if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1119
                return 0;
1120
 
1121
        s_t = cb->args[0];
1122
        t = 0;
1123
 
1124
        list_for_each_entry(q, &dev->qdisc_list, list) {
1125
                if (t < s_t || !q->ops->cl_ops ||
1126
                    (tcm->tcm_parent &&
1127
                     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1128
                        t++;
1129
                        continue;
1130
                }
1131
                if (t > s_t)
1132
                        memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1133
                arg.w.fn = qdisc_class_dump;
1134
                arg.skb = skb;
1135
                arg.cb = cb;
1136
                arg.w.stop  = 0;
1137
                arg.w.skip = cb->args[1];
1138
                arg.w.count = 0;
1139
                q->ops->cl_ops->walk(q, &arg.w);
1140
                cb->args[1] = arg.w.count;
1141
                if (arg.w.stop)
1142
                        break;
1143
                t++;
1144
        }
1145
 
1146
        cb->args[0] = t;
1147
 
1148
        dev_put(dev);
1149
        return skb->len;
1150
}
1151
 
1152
/* Main classifier routine: scans classifier chain attached
1153
   to this qdisc, (optionally) tests for protocol and asks
1154
   specific classifiers.
1155
 */
1156
int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1157
                       struct tcf_result *res)
1158
{
1159
        __be16 protocol = skb->protocol;
1160
        int err = 0;
1161
 
1162
        for (; tp; tp = tp->next) {
1163
                if ((tp->protocol == protocol ||
1164
                     tp->protocol == htons(ETH_P_ALL)) &&
1165
                    (err = tp->classify(skb, tp, res)) >= 0) {
1166
#ifdef CONFIG_NET_CLS_ACT
1167
                        if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1168
                                skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1169
#endif
1170
                        return err;
1171
                }
1172
        }
1173
        return -1;
1174
}
1175
EXPORT_SYMBOL(tc_classify_compat);
1176
 
1177
int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1178
                struct tcf_result *res)
1179
{
1180
        int err = 0;
1181
        __be16 protocol;
1182
#ifdef CONFIG_NET_CLS_ACT
1183
        struct tcf_proto *otp = tp;
1184
reclassify:
1185
#endif
1186
        protocol = skb->protocol;
1187
 
1188
        err = tc_classify_compat(skb, tp, res);
1189
#ifdef CONFIG_NET_CLS_ACT
1190
        if (err == TC_ACT_RECLASSIFY) {
1191
                u32 verd = G_TC_VERD(skb->tc_verd);
1192
                tp = otp;
1193
 
1194
                if (verd++ >= MAX_REC_LOOP) {
1195
                        printk("rule prio %u protocol %02x reclassify loop, "
1196
                               "packet dropped\n",
1197
                               tp->prio&0xffff, ntohs(tp->protocol));
1198
                        return TC_ACT_SHOT;
1199
                }
1200
                skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1201
                goto reclassify;
1202
        }
1203
#endif
1204
        return err;
1205
}
1206
EXPORT_SYMBOL(tc_classify);
1207
 
1208
void tcf_destroy(struct tcf_proto *tp)
1209
{
1210
        tp->ops->destroy(tp);
1211
        module_put(tp->ops->owner);
1212
        kfree(tp);
1213
}
1214
 
1215
void tcf_destroy_chain(struct tcf_proto *fl)
1216
{
1217
        struct tcf_proto *tp;
1218
 
1219
        while ((tp = fl) != NULL) {
1220
                fl = tp->next;
1221
                tcf_destroy(tp);
1222
        }
1223
}
1224
EXPORT_SYMBOL(tcf_destroy_chain);
1225
 
1226
#ifdef CONFIG_PROC_FS
1227
static int psched_show(struct seq_file *seq, void *v)
1228
{
1229
        struct timespec ts;
1230
 
1231
        hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1232
        seq_printf(seq, "%08x %08x %08x %08x\n",
1233
                   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1234
                   1000000,
1235
                   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1236
 
1237
        return 0;
1238
}
1239
 
1240
static int psched_open(struct inode *inode, struct file *file)
1241
{
1242
        return single_open(file, psched_show, PDE(inode)->data);
1243
}
1244
 
1245
static const struct file_operations psched_fops = {
1246
        .owner = THIS_MODULE,
1247
        .open = psched_open,
1248
        .read  = seq_read,
1249
        .llseek = seq_lseek,
1250
        .release = single_release,
1251
};
1252
#endif
1253
 
1254
static int __init pktsched_init(void)
1255
{
1256
        register_qdisc(&pfifo_qdisc_ops);
1257
        register_qdisc(&bfifo_qdisc_ops);
1258
        proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1259
 
1260
        rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1261
        rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1262
        rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1263
        rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1264
        rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1265
        rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1266
 
1267
        return 0;
1268
}
1269
 
1270
subsys_initcall(pktsched_init);
1271
 
1272
EXPORT_SYMBOL(qdisc_get_rtab);
1273
EXPORT_SYMBOL(qdisc_put_rtab);
1274
EXPORT_SYMBOL(register_qdisc);
1275
EXPORT_SYMBOL(unregister_qdisc);

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.