OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [sched/] [sch_api.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * net/sched/sch_api.c  Packet scheduler API.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 *
11
 * Fixes:
12
 *
13
 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15
 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16
 */
17
 
18
#include <linux/config.h>
19
#include <linux/types.h>
20
#include <linux/kernel.h>
21
#include <linux/sched.h>
22
#include <linux/string.h>
23
#include <linux/mm.h>
24
#include <linux/socket.h>
25
#include <linux/sockios.h>
26
#include <linux/in.h>
27
#include <linux/errno.h>
28
#include <linux/interrupt.h>
29
#include <linux/netdevice.h>
30
#include <linux/skbuff.h>
31
#include <linux/rtnetlink.h>
32
#include <linux/init.h>
33
#include <linux/proc_fs.h>
34
#include <linux/kmod.h>
35
 
36
#include <net/sock.h>
37
#include <net/pkt_sched.h>
38
 
39
#include <asm/processor.h>
40
#include <asm/uaccess.h>
41
#include <asm/system.h>
42
#include <asm/bitops.h>
43
 
44
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
45
                        struct Qdisc *old, struct Qdisc *new);
46
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
47
                         struct Qdisc *q, unsigned long cl, int event);
48
 
49
/*
50
 
51
   Short review.
52
   -------------
53
 
54
   This file consists of two interrelated parts:
55
 
56
   1. queueing disciplines manager frontend.
57
   2. traffic classes manager frontend.
58
 
59
   Generally, queueing discipline ("qdisc") is a black box,
60
   which is able to enqueue packets and to dequeue them (when
61
   device is ready to send something) in order and at times
62
   determined by algorithm hidden in it.
63
 
64
   qdisc's are divided to two categories:
65
   - "queues", which have no internal structure visible from outside.
66
   - "schedulers", which split all the packets to "traffic classes",
67
     using "packet classifiers" (look at cls_api.c)
68
 
69
   In turn, classes may have child qdiscs (as rule, queues)
70
   attached to them etc. etc. etc.
71
 
72
   The goal of the routines in this file is to translate
73
   information supplied by user in the form of handles
74
   to more intelligible for kernel form, to make some sanity
75
   checks and part of work, which is common to all qdiscs
76
   and to provide rtnetlink notifications.
77
 
78
   All real intelligent work is done inside qdisc modules.
79
 
80
 
81
 
82
   Every discipline has two major routines: enqueue and dequeue.
83
 
84
   ---dequeue
85
 
86
   dequeue usually returns a skb to send. It is allowed to return NULL,
87
   but it does not mean that queue is empty, it just means that
88
   discipline does not want to send anything this time.
89
   Queue is really empty if q->q.qlen == 0.
90
   For complicated disciplines with multiple queues q->q is not
91
   real packet queue, but however q->q.qlen must be valid.
92
 
93
   ---enqueue
94
 
95
   enqueue returns 0, if packet was enqueued successfully.
96
   If packet (this one or another one) was dropped, it returns
97
   not zero error code.
98
   NET_XMIT_DROP        - this packet dropped
99
     Expected action: do not backoff, but wait until queue will clear.
100
   NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
101
     Expected action: backoff or ignore
102
   NET_XMIT_POLICED     - dropped by police.
103
     Expected action: backoff or error to real-time apps.
104
 
105
   Auxiliary routines:
106
 
107
   ---requeue
108
 
109
   requeues once dequeued packet. It is used for non-standard or
110
   just buggy devices, which can defer output even if dev->tbusy=0.
111
 
112
   ---reset
113
 
114
   returns qdisc to initial state: purge all buffers, clear all
115
   timers, counters (except for statistics) etc.
116
 
117
   ---init
118
 
119
   initializes newly created qdisc.
120
 
121
   ---destroy
122
 
123
   destroys resources allocated by init and during lifetime of qdisc.
124
 
125
   ---change
126
 
127
   changes qdisc parameters.
128
 */
129
 
130
/* Protects list of registered TC modules. It is pure SMP lock. */
131
static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
132
 
133
 
134
/************************************************
135
 *      Queueing disciplines manipulation.      *
136
 ************************************************/
137
 
138
 
139
/* The list of all installed queueing disciplines. */
140
 
141
static struct Qdisc_ops *qdisc_base = NULL;
142
 
143
/* Register/uregister queueing discipline */
144
 
145
int register_qdisc(struct Qdisc_ops *qops)
146
{
147
        struct Qdisc_ops *q, **qp;
148
 
149
        write_lock(&qdisc_mod_lock);
150
        for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) {
151
                if (strcmp(qops->id, q->id) == 0) {
152
                        write_unlock(&qdisc_mod_lock);
153
                        return -EEXIST;
154
                }
155
        }
156
 
157
        if (qops->enqueue == NULL)
158
                qops->enqueue = noop_qdisc_ops.enqueue;
159
        if (qops->requeue == NULL)
160
                qops->requeue = noop_qdisc_ops.requeue;
161
        if (qops->dequeue == NULL)
162
                qops->dequeue = noop_qdisc_ops.dequeue;
163
 
164
        qops->next = NULL;
165
        *qp = qops;
166
        write_unlock(&qdisc_mod_lock);
167
        return 0;
168
}
169
 
170
int unregister_qdisc(struct Qdisc_ops *qops)
171
{
172
        struct Qdisc_ops *q, **qp;
173
        int err = -ENOENT;
174
 
175
        write_lock(&qdisc_mod_lock);
176
        for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
177
                if (q == qops)
178
                        break;
179
        if (q) {
180
                *qp = q->next;
181
                q->next = NULL;
182
                err = 0;
183
        }
184
        write_unlock(&qdisc_mod_lock);
185
        return err;
186
}
187
 
188
/* We know handle. Find qdisc among all qdisc's attached to device
189
   (root qdisc, all its children, children of children etc.)
190
 */
191
 
192
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
193
{
194
        struct Qdisc *q;
195
 
196
        for (q = dev->qdisc_list; q; q = q->next) {
197
                if (q->handle == handle)
198
                        return q;
199
        }
200
        return NULL;
201
}
202
 
203
struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
204
{
205
        unsigned long cl;
206
        struct Qdisc *leaf;
207
        struct Qdisc_class_ops *cops = p->ops->cl_ops;
208
 
209
        if (cops == NULL)
210
                return NULL;
211
        cl = cops->get(p, classid);
212
 
213
        if (cl == 0)
214
                return NULL;
215
        leaf = cops->leaf(p, cl);
216
        cops->put(p, cl);
217
        return leaf;
218
}
219
 
220
/* Find queueing discipline by name */
221
 
222
struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
223
{
224
        struct Qdisc_ops *q = NULL;
225
 
226
        if (kind) {
227
                read_lock(&qdisc_mod_lock);
228
                for (q = qdisc_base; q; q = q->next) {
229
                        if (rtattr_strcmp(kind, q->id) == 0)
230
                                break;
231
                }
232
                read_unlock(&qdisc_mod_lock);
233
        }
234
        return q;
235
}
236
 
237
static struct qdisc_rate_table *qdisc_rtab_list;
238
 
239
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
240
{
241
        struct qdisc_rate_table *rtab;
242
 
243
        for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
244
                if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
245
                        rtab->refcnt++;
246
                        return rtab;
247
                }
248
        }
249
 
250
        if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
251
                return NULL;
252
 
253
        rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
254
        if (rtab) {
255
                rtab->rate = *r;
256
                rtab->refcnt = 1;
257
                memcpy(rtab->data, RTA_DATA(tab), 1024);
258
                rtab->next = qdisc_rtab_list;
259
                qdisc_rtab_list = rtab;
260
        }
261
        return rtab;
262
}
263
 
264
void qdisc_put_rtab(struct qdisc_rate_table *tab)
265
{
266
        struct qdisc_rate_table *rtab, **rtabp;
267
 
268
        if (!tab || --tab->refcnt)
269
                return;
270
 
271
        for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
272
                if (rtab == tab) {
273
                        *rtabp = rtab->next;
274
                        kfree(rtab);
275
                        return;
276
                }
277
        }
278
}
279
 
280
 
281
/* Allocate an unique handle from space managed by kernel */
282
 
283
u32 qdisc_alloc_handle(struct net_device *dev)
284
{
285
        int i = 0x10000;
286
        static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
287
 
288
        do {
289
                autohandle += TC_H_MAKE(0x10000U, 0);
290
                if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
291
                        autohandle = TC_H_MAKE(0x80000000U, 0);
292
        } while (qdisc_lookup(dev, autohandle) && --i > 0);
293
 
294
        return i>0 ? autohandle : 0;
295
}
296
 
297
/* Attach toplevel qdisc to device dev */
298
 
299
static struct Qdisc *
300
dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
301
{
302
        struct Qdisc *oqdisc;
303
 
304
        if (dev->flags & IFF_UP)
305
                dev_deactivate(dev);
306
 
307
        write_lock(&qdisc_tree_lock);
308
        spin_lock_bh(&dev->queue_lock);
309
        if (qdisc && qdisc->flags&TCQ_F_INGRES) {
310
                oqdisc = dev->qdisc_ingress;
311
                /* Prune old scheduler */
312
                if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
313
                        /* delete */
314
                        qdisc_reset(oqdisc);
315
                        dev->qdisc_ingress = NULL;
316
                } else {  /* new */
317
                        dev->qdisc_ingress = qdisc;
318
                }
319
 
320
        } else {
321
 
322
                oqdisc = dev->qdisc_sleeping;
323
 
324
                /* Prune old scheduler */
325
                if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
326
                        qdisc_reset(oqdisc);
327
 
328
                /* ... and graft new one */
329
                if (qdisc == NULL)
330
                        qdisc = &noop_qdisc;
331
                dev->qdisc_sleeping = qdisc;
332
                dev->qdisc = &noop_qdisc;
333
        }
334
 
335
        spin_unlock_bh(&dev->queue_lock);
336
        write_unlock(&qdisc_tree_lock);
337
 
338
        if (dev->flags & IFF_UP)
339
                dev_activate(dev);
340
 
341
        return oqdisc;
342
}
343
 
344
 
345
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
346
   to device "dev".
347
 
348
   Old qdisc is not destroyed but returned in *old.
349
 */
350
 
351
int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
352
                struct Qdisc *new, struct Qdisc **old)
353
{
354
        int err = 0;
355
        struct Qdisc *q = *old;
356
 
357
 
358
        if (parent == NULL) {
359
                if (q && q->flags&TCQ_F_INGRES) {
360
                        *old = dev_graft_qdisc(dev, q);
361
                } else {
362
                        *old = dev_graft_qdisc(dev, new);
363
                }
364
        } else {
365
                struct Qdisc_class_ops *cops = parent->ops->cl_ops;
366
 
367
                err = -EINVAL;
368
 
369
                if (cops) {
370
                        unsigned long cl = cops->get(parent, classid);
371
                        if (cl) {
372
                                err = cops->graft(parent, cl, new, old);
373
                                cops->put(parent, cl);
374
                        }
375
                }
376
        }
377
        return err;
378
}
379
 
380
/*
381
   Allocate and initialize new qdisc.
382
 
383
   Parameters are passed via opt.
384
 */
385
 
386
static struct Qdisc *
387
qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
388
{
389
        int err;
390
        struct rtattr *kind = tca[TCA_KIND-1];
391
        struct Qdisc *sch = NULL;
392
        struct Qdisc_ops *ops;
393
        int size;
394
 
395
        ops = qdisc_lookup_ops(kind);
396
#ifdef CONFIG_KMOD
397
        if (ops==NULL && tca[TCA_KIND-1] != NULL) {
398
                char module_name[4 + IFNAMSIZ + 1];
399
 
400
                if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
401
                        sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
402
                        request_module (module_name);
403
                        ops = qdisc_lookup_ops(kind);
404
                }
405
        }
406
#endif
407
 
408
        err = -EINVAL;
409
        if (ops == NULL)
410
                goto err_out;
411
 
412
        size = sizeof(*sch) + ops->priv_size;
413
 
414
        sch = kmalloc(size, GFP_KERNEL);
415
        err = -ENOBUFS;
416
        if (!sch)
417
                goto err_out;
418
 
419
        /* Grrr... Resolve race condition with module unload */
420
 
421
        err = -EINVAL;
422
        if (ops != qdisc_lookup_ops(kind))
423
                goto err_out;
424
 
425
        memset(sch, 0, size);
426
 
427
        skb_queue_head_init(&sch->q);
428
 
429
        if (handle == TC_H_INGRESS)
430
                sch->flags |= TCQ_F_INGRES;
431
 
432
        sch->ops = ops;
433
        sch->enqueue = ops->enqueue;
434
        sch->dequeue = ops->dequeue;
435
        sch->dev = dev;
436
        atomic_set(&sch->refcnt, 1);
437
        sch->stats.lock = &dev->queue_lock;
438
        if (handle == 0) {
439
                handle = qdisc_alloc_handle(dev);
440
                err = -ENOMEM;
441
                if (handle == 0)
442
                        goto err_out;
443
        }
444
 
445
        if (handle == TC_H_INGRESS)
446
                sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
447
        else
448
                sch->handle = handle;
449
 
450
        if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
451
                write_lock(&qdisc_tree_lock);
452
                sch->next = dev->qdisc_list;
453
                dev->qdisc_list = sch;
454
                write_unlock(&qdisc_tree_lock);
455
#ifdef CONFIG_NET_ESTIMATOR
456
                if (tca[TCA_RATE-1])
457
                        qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
458
#endif
459
                return sch;
460
        }
461
 
462
err_out:
463
        *errp = err;
464
        if (sch)
465
                kfree(sch);
466
        return NULL;
467
}
468
 
469
static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
470
{
471
        if (tca[TCA_OPTIONS-1]) {
472
                int err;
473
 
474
                if (sch->ops->change == NULL)
475
                        return -EINVAL;
476
                err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
477
                if (err)
478
                        return err;
479
        }
480
#ifdef CONFIG_NET_ESTIMATOR
481
        if (tca[TCA_RATE-1]) {
482
                qdisc_kill_estimator(&sch->stats);
483
                qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
484
        }
485
#endif
486
        return 0;
487
}
488
 
489
struct check_loop_arg
490
{
491
        struct qdisc_walker     w;
492
        struct Qdisc            *p;
493
        int                     depth;
494
};
495
 
496
static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
497
 
498
static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
499
{
500
        struct check_loop_arg   arg;
501
 
502
        if (q->ops->cl_ops == NULL)
503
                return 0;
504
 
505
        arg.w.stop = arg.w.skip = arg.w.count = 0;
506
        arg.w.fn = check_loop_fn;
507
        arg.depth = depth;
508
        arg.p = p;
509
        q->ops->cl_ops->walk(q, &arg.w);
510
        return arg.w.stop ? -ELOOP : 0;
511
}
512
 
513
static int
514
check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
515
{
516
        struct Qdisc *leaf;
517
        struct Qdisc_class_ops *cops = q->ops->cl_ops;
518
        struct check_loop_arg *arg = (struct check_loop_arg *)w;
519
 
520
        leaf = cops->leaf(q, cl);
521
        if (leaf) {
522
                if (leaf == arg->p || arg->depth > 7)
523
                        return -ELOOP;
524
                return check_loop(leaf, arg->p, arg->depth + 1);
525
        }
526
        return 0;
527
}
528
 
529
/*
530
 * Delete/get qdisc.
531
 */
532
 
533
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
534
{
535
        struct tcmsg *tcm = NLMSG_DATA(n);
536
        struct rtattr **tca = arg;
537
        struct net_device *dev;
538
        u32 clid = tcm->tcm_parent;
539
        struct Qdisc *q = NULL;
540
        struct Qdisc *p = NULL;
541
        int err;
542
 
543
        if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
544
                return -ENODEV;
545
 
546
        if (clid) {
547
                if (clid != TC_H_ROOT) {
548
                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
549
                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
550
                                        return -ENOENT;
551
                                q = qdisc_leaf(p, clid);
552
                        } else { /* ingress */
553
                                q = dev->qdisc_ingress;
554
                        }
555
                } else {
556
                        q = dev->qdisc_sleeping;
557
                }
558
                if (!q)
559
                        return -ENOENT;
560
 
561
                if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
562
                        return -EINVAL;
563
        } else {
564
                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
565
                        return -ENOENT;
566
        }
567
 
568
        if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
569
                return -EINVAL;
570
 
571
        if (n->nlmsg_type == RTM_DELQDISC) {
572
                if (!clid)
573
                        return -EINVAL;
574
                if (q->handle == 0)
575
                        return -ENOENT;
576
                if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
577
                        return err;
578
                if (q) {
579
                        qdisc_notify(skb, n, clid, q, NULL);
580
                        spin_lock_bh(&dev->queue_lock);
581
                        qdisc_destroy(q);
582
                        spin_unlock_bh(&dev->queue_lock);
583
                }
584
        } else {
585
                qdisc_notify(skb, n, clid, NULL, q);
586
        }
587
        return 0;
588
}
589
 
590
/*
591
   Create/change qdisc.
592
 */
593
 
594
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
595
{
596
        struct tcmsg *tcm = NLMSG_DATA(n);
597
        struct rtattr **tca = arg;
598
        struct net_device *dev;
599
        u32 clid = tcm->tcm_parent;
600
        struct Qdisc *q = NULL;
601
        struct Qdisc *p = NULL;
602
        int err;
603
 
604
        if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
605
                return -ENODEV;
606
 
607
        if (clid) {
608
                if (clid != TC_H_ROOT) {
609
                        if (clid != TC_H_INGRESS) {
610
                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
611
                                        return -ENOENT;
612
                                q = qdisc_leaf(p, clid);
613
                        } else { /*ingress */
614
                                q = dev->qdisc_ingress;
615
                        }
616
                } else {
617
                        q = dev->qdisc_sleeping;
618
                }
619
 
620
                /* It may be default qdisc, ignore it */
621
                if (q && q->handle == 0)
622
                        q = NULL;
623
 
624
                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
625
                        if (tcm->tcm_handle) {
626
                                if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
627
                                        return -EEXIST;
628
                                if (TC_H_MIN(tcm->tcm_handle))
629
                                        return -EINVAL;
630
                                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
631
                                        goto create_n_graft;
632
                                if (n->nlmsg_flags&NLM_F_EXCL)
633
                                        return -EEXIST;
634
                                if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
635
                                        return -EINVAL;
636
                                if (q == p ||
637
                                    (p && check_loop(q, p, 0)))
638
                                        return -ELOOP;
639
                                atomic_inc(&q->refcnt);
640
                                goto graft;
641
                        } else {
642
                                if (q == NULL)
643
                                        goto create_n_graft;
644
 
645
                                /* This magic test requires explanation.
646
                                 *
647
                                 *   We know, that some child q is already
648
                                 *   attached to this parent and have choice:
649
                                 *   either to change it or to create/graft new one.
650
                                 *
651
                                 *   1. We are allowed to create/graft only
652
                                 *   if CREATE and REPLACE flags are set.
653
                                 *
654
                                 *   2. If EXCL is set, requestor wanted to say,
655
                                 *   that qdisc tcm_handle is not expected
656
                                 *   to exist, so that we choose create/graft too.
657
                                 *
658
                                 *   3. The last case is when no flags are set.
659
                                 *   Alas, it is sort of hole in API, we
660
                                 *   cannot decide what to do unambiguously.
661
                                 *   For now we select create/graft, if
662
                                 *   user gave KIND, which does not match existing.
663
                                 */
664
                                if ((n->nlmsg_flags&NLM_F_CREATE) &&
665
                                    (n->nlmsg_flags&NLM_F_REPLACE) &&
666
                                    ((n->nlmsg_flags&NLM_F_EXCL) ||
667
                                     (tca[TCA_KIND-1] &&
668
                                      rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
669
                                        goto create_n_graft;
670
                        }
671
                }
672
        } else {
673
                if (!tcm->tcm_handle)
674
                        return -EINVAL;
675
                q = qdisc_lookup(dev, tcm->tcm_handle);
676
        }
677
 
678
        /* Change qdisc parameters */
679
        if (q == NULL)
680
                return -ENOENT;
681
        if (n->nlmsg_flags&NLM_F_EXCL)
682
                return -EEXIST;
683
        if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
684
                return -EINVAL;
685
        err = qdisc_change(q, tca);
686
        if (err == 0)
687
                qdisc_notify(skb, n, clid, NULL, q);
688
        return err;
689
 
690
create_n_graft:
691
        if (!(n->nlmsg_flags&NLM_F_CREATE))
692
                return -ENOENT;
693
        if (clid == TC_H_INGRESS)
694
                q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
695
        else
696
                q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
697
        if (q == NULL)
698
                return err;
699
 
700
graft:
701
        if (1) {
702
                struct Qdisc *old_q = NULL;
703
                err = qdisc_graft(dev, p, clid, q, &old_q);
704
                if (err) {
705
                        if (q) {
706
                                spin_lock_bh(&dev->queue_lock);
707
                                qdisc_destroy(q);
708
                                spin_unlock_bh(&dev->queue_lock);
709
                        }
710
                        return err;
711
                }
712
                qdisc_notify(skb, n, clid, old_q, q);
713
                if (old_q) {
714
                        spin_lock_bh(&dev->queue_lock);
715
                        qdisc_destroy(old_q);
716
                        spin_unlock_bh(&dev->queue_lock);
717
                }
718
        }
719
        return 0;
720
}
721
 
722
int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st)
723
{
724
        spin_lock_bh(st->lock);
725
        RTA_PUT(skb, TCA_STATS, (char*)&st->lock - (char*)st, st);
726
        spin_unlock_bh(st->lock);
727
        return 0;
728
 
729
rtattr_failure:
730
        spin_unlock_bh(st->lock);
731
        return -1;
732
}
733
 
734
 
735
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
736
                         u32 pid, u32 seq, unsigned flags, int event)
737
{
738
        struct tcmsg *tcm;
739
        struct nlmsghdr  *nlh;
740
        unsigned char    *b = skb->tail;
741
 
742
        nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
743
        nlh->nlmsg_flags = flags;
744
        tcm = NLMSG_DATA(nlh);
745
        tcm->tcm_family = AF_UNSPEC;
746
        tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
747
        tcm->tcm_parent = clid;
748
        tcm->tcm_handle = q->handle;
749
        tcm->tcm_info = atomic_read(&q->refcnt);
750
        RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
751
        if (q->ops->dump && q->ops->dump(q, skb) < 0)
752
                goto rtattr_failure;
753
        q->stats.qlen = q->q.qlen;
754
        if (qdisc_copy_stats(skb, &q->stats))
755
                goto rtattr_failure;
756
        nlh->nlmsg_len = skb->tail - b;
757
        return skb->len;
758
 
759
nlmsg_failure:
760
rtattr_failure:
761
        skb_trim(skb, b - skb->data);
762
        return -1;
763
}
764
 
765
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
766
                        u32 clid, struct Qdisc *old, struct Qdisc *new)
767
{
768
        struct sk_buff *skb;
769
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
770
 
771
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
772
        if (!skb)
773
                return -ENOBUFS;
774
 
775
        if (old && old->handle) {
776
                if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
777
                        goto err_out;
778
        }
779
        if (new) {
780
                if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
781
                        goto err_out;
782
        }
783
 
784
        if (skb->len)
785
                return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
786
 
787
err_out:
788
        kfree_skb(skb);
789
        return -EINVAL;
790
}
791
 
792
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
793
{
794
        int idx, q_idx;
795
        int s_idx, s_q_idx;
796
        struct net_device *dev;
797
        struct Qdisc *q;
798
 
799
        s_idx = cb->args[0];
800
        s_q_idx = q_idx = cb->args[1];
801
        read_lock(&dev_base_lock);
802
        for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
803
                if (idx < s_idx)
804
                        continue;
805
                if (idx > s_idx)
806
                        s_q_idx = 0;
807
                read_lock(&qdisc_tree_lock);
808
                for (q = dev->qdisc_list, q_idx = 0; q;
809
                     q = q->next, q_idx++) {
810
                        if (q_idx < s_q_idx)
811
                                continue;
812
                        if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
813
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
814
                                read_unlock(&qdisc_tree_lock);
815
                                goto done;
816
                        }
817
                }
818
                read_unlock(&qdisc_tree_lock);
819
        }
820
 
821
done:
822
        read_unlock(&dev_base_lock);
823
 
824
        cb->args[0] = idx;
825
        cb->args[1] = q_idx;
826
 
827
        return skb->len;
828
}
829
 
830
 
831
 
832
/************************************************
833
 *      Traffic classes manipulation.           *
834
 ************************************************/
835
 
836
 
837
 
838
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
839
{
840
        struct tcmsg *tcm = NLMSG_DATA(n);
841
        struct rtattr **tca = arg;
842
        struct net_device *dev;
843
        struct Qdisc *q = NULL;
844
        struct Qdisc_class_ops *cops;
845
        unsigned long cl = 0;
846
        unsigned long new_cl;
847
        u32 pid = tcm->tcm_parent;
848
        u32 clid = tcm->tcm_handle;
849
        u32 qid = TC_H_MAJ(clid);
850
        int err;
851
 
852
        if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
853
                return -ENODEV;
854
 
855
        /*
856
           parent == TC_H_UNSPEC - unspecified parent.
857
           parent == TC_H_ROOT   - class is root, which has no parent.
858
           parent == X:0         - parent is root class.
859
           parent == X:Y         - parent is a node in hierarchy.
860
           parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
861
 
862
           handle == 0:0         - generate handle from kernel pool.
863
           handle == 0:Y         - class is X:Y, where X:0 is qdisc.
864
           handle == X:Y         - clear.
865
           handle == X:0         - root class.
866
         */
867
 
868
        /* Step 1. Determine qdisc handle X:0 */
869
 
870
        if (pid != TC_H_ROOT) {
871
                u32 qid1 = TC_H_MAJ(pid);
872
 
873
                if (qid && qid1) {
874
                        /* If both majors are known, they must be identical. */
875
                        if (qid != qid1)
876
                                return -EINVAL;
877
                } else if (qid1) {
878
                        qid = qid1;
879
                } else if (qid == 0)
880
                        qid = dev->qdisc_sleeping->handle;
881
 
882
                /* Now qid is genuine qdisc handle consistent
883
                   both with parent and child.
884
 
885
                   TC_H_MAJ(pid) still may be unspecified, complete it now.
886
                 */
887
                if (pid)
888
                        pid = TC_H_MAKE(qid, pid);
889
        } else {
890
                if (qid == 0)
891
                        qid = dev->qdisc_sleeping->handle;
892
        }
893
 
894
        /* OK. Locate qdisc */
895
        if ((q = qdisc_lookup(dev, qid)) == NULL)
896
                return -ENOENT;
897
 
898
        /* An check that it supports classes */
899
        cops = q->ops->cl_ops;
900
        if (cops == NULL)
901
                return -EINVAL;
902
 
903
        /* Now try to get class */
904
        if (clid == 0) {
905
                if (pid == TC_H_ROOT)
906
                        clid = qid;
907
        } else
908
                clid = TC_H_MAKE(qid, clid);
909
 
910
        if (clid)
911
                cl = cops->get(q, clid);
912
 
913
        if (cl == 0) {
914
                err = -ENOENT;
915
                if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
916
                        goto out;
917
        } else {
918
                switch (n->nlmsg_type) {
919
                case RTM_NEWTCLASS:
920
                        err = -EEXIST;
921
                        if (n->nlmsg_flags&NLM_F_EXCL)
922
                                goto out;
923
                        break;
924
                case RTM_DELTCLASS:
925
                        err = cops->delete(q, cl);
926
                        if (err == 0)
927
                                tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
928
                        goto out;
929
                case RTM_GETTCLASS:
930
                        err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
931
                        goto out;
932
                default:
933
                        err = -EINVAL;
934
                        goto out;
935
                }
936
        }
937
 
938
        new_cl = cl;
939
        err = cops->change(q, clid, pid, tca, &new_cl);
940
        if (err == 0)
941
                tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
942
 
943
out:
944
        if (cl)
945
                cops->put(q, cl);
946
 
947
        return err;
948
}
949
 
950
 
951
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
952
                          unsigned long cl,
953
                          u32 pid, u32 seq, unsigned flags, int event)
954
{
955
        struct tcmsg *tcm;
956
        struct nlmsghdr  *nlh;
957
        unsigned char    *b = skb->tail;
958
 
959
        nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
960
        nlh->nlmsg_flags = flags;
961
        tcm = NLMSG_DATA(nlh);
962
        tcm->tcm_family = AF_UNSPEC;
963
        tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
964
        tcm->tcm_parent = q->handle;
965
        tcm->tcm_handle = q->handle;
966
        tcm->tcm_info = 0;
967
        RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
968
        if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
969
                goto rtattr_failure;
970
        nlh->nlmsg_len = skb->tail - b;
971
        return skb->len;
972
 
973
nlmsg_failure:
974
rtattr_failure:
975
        skb_trim(skb, b - skb->data);
976
        return -1;
977
}
978
 
979
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
980
                          struct Qdisc *q, unsigned long cl, int event)
981
{
982
        struct sk_buff *skb;
983
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
984
 
985
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
986
        if (!skb)
987
                return -ENOBUFS;
988
 
989
        if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
990
                kfree_skb(skb);
991
                return -EINVAL;
992
        }
993
 
994
        return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
995
}
996
 
997
struct qdisc_dump_args
998
{
999
        struct qdisc_walker w;
1000
        struct sk_buff *skb;
1001
        struct netlink_callback *cb;
1002
};
1003
 
1004
static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1005
{
1006
        struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1007
 
1008
        return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1009
                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1010
}
1011
 
1012
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1013
{
1014
        int t;
1015
        int s_t;
1016
        struct net_device *dev;
1017
        struct Qdisc *q;
1018
        struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1019
        struct qdisc_dump_args arg;
1020
 
1021
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1022
                return 0;
1023
        if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1024
                return 0;
1025
 
1026
        s_t = cb->args[0];
1027
 
1028
        read_lock(&qdisc_tree_lock);
1029
        for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
1030
                if (t < s_t) continue;
1031
                if (!q->ops->cl_ops) continue;
1032
                if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)
1033
                        continue;
1034
                if (t > s_t)
1035
                        memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1036
                arg.w.fn = qdisc_class_dump;
1037
                arg.skb = skb;
1038
                arg.cb = cb;
1039
                arg.w.stop  = 0;
1040
                arg.w.skip = cb->args[1];
1041
                arg.w.count = 0;
1042
                q->ops->cl_ops->walk(q, &arg.w);
1043
                cb->args[1] = arg.w.count;
1044
                if (arg.w.stop)
1045
                        break;
1046
        }
1047
        read_unlock(&qdisc_tree_lock);
1048
 
1049
        cb->args[0] = t;
1050
 
1051
        dev_put(dev);
1052
        return skb->len;
1053
}
1054
 
1055
int psched_us_per_tick = 1;
1056
int psched_tick_per_us = 1;
1057
 
1058
#ifdef CONFIG_PROC_FS
1059
static int psched_read_proc(char *buffer, char **start, off_t offset,
1060
                             int length, int *eof, void *data)
1061
{
1062
        int len;
1063
 
1064
        len = sprintf(buffer, "%08x %08x %08x %08x\n",
1065
                      psched_tick_per_us, psched_us_per_tick,
1066
                      1000000, HZ);
1067
 
1068
        len -= offset;
1069
 
1070
        if (len > length)
1071
                len = length;
1072
        if(len < 0)
1073
                len = 0;
1074
 
1075
        *start = buffer + offset;
1076
        *eof = 1;
1077
 
1078
        return len;
1079
}
1080
#endif
1081
 
1082
#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1083
int psched_tod_diff(int delta_sec, int bound)
1084
{
1085
        int delta;
1086
 
1087
        if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1088
                return bound;
1089
        delta = delta_sec * 1000000;
1090
        if (delta > bound)
1091
                delta = bound;
1092
        return delta;
1093
}
1094
#endif
1095
 
1096
psched_time_t psched_time_base;
1097
 
1098
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1099
psched_tdiff_t psched_clock_per_hz;
1100
int psched_clock_scale;
1101
#endif
1102
 
1103
#ifdef PSCHED_WATCHER
1104
PSCHED_WATCHER psched_time_mark;
1105
 
1106
static void psched_tick(unsigned long);
1107
 
1108
static struct timer_list psched_timer =
1109
        { function: psched_tick };
1110
 
1111
static void psched_tick(unsigned long dummy)
1112
{
1113
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1114
        psched_time_t dummy_stamp;
1115
        PSCHED_GET_TIME(dummy_stamp);
1116
        /* It is OK up to 4GHz cpu */
1117
        psched_timer.expires = jiffies + 1*HZ;
1118
#else
1119
        unsigned long now = jiffies;
1120
        psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE;
1121
        psched_time_mark = now;
1122
        psched_timer.expires = now + 60*60*HZ;
1123
#endif
1124
        add_timer(&psched_timer);
1125
}
1126
#endif
1127
 
1128
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1129
int __init psched_calibrate_clock(void)
1130
{
1131
        psched_time_t stamp, stamp1;
1132
        struct timeval tv, tv1;
1133
        psched_tdiff_t delay;
1134
        long rdelay;
1135
        unsigned long stop;
1136
 
1137
#ifdef PSCHED_WATCHER
1138
        psched_tick(0);
1139
#endif
1140
        stop = jiffies + HZ/10;
1141
        PSCHED_GET_TIME(stamp);
1142
        do_gettimeofday(&tv);
1143
        while (time_before(jiffies, stop)) {
1144
                barrier();
1145
                cpu_relax();
1146
        }
1147
        PSCHED_GET_TIME(stamp1);
1148
        do_gettimeofday(&tv1);
1149
 
1150
        delay = PSCHED_TDIFF(stamp1, stamp);
1151
        rdelay = tv1.tv_usec - tv.tv_usec;
1152
        rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1153
        if (rdelay > delay)
1154
                return -1;
1155
        delay /= rdelay;
1156
        psched_tick_per_us = delay;
1157
        while ((delay>>=1) != 0)
1158
                psched_clock_scale++;
1159
        psched_us_per_tick = 1<<psched_clock_scale;
1160
        psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1161
        return 0;
1162
}
1163
#endif
1164
 
1165
int __init pktsched_init(void)
1166
{
1167
        struct rtnetlink_link *link_p;
1168
 
1169
#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1170
        if (psched_calibrate_clock() < 0)
1171
                return -1;
1172
#elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1173
        psched_tick_per_us = HZ<<PSCHED_JSCALE;
1174
        psched_us_per_tick = 1000000;
1175
#ifdef PSCHED_WATCHER
1176
        psched_tick(0);
1177
#endif
1178
#endif
1179
 
1180
        link_p = rtnetlink_links[PF_UNSPEC];
1181
 
1182
        /* Setup rtnetlink links. It is made here to avoid
1183
           exporting large number of public symbols.
1184
         */
1185
 
1186
        if (link_p) {
1187
                link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1188
                link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1189
                link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1190
                link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1191
                link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1192
                link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1193
                link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1194
                link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1195
        }
1196
 
1197
#define INIT_QDISC(name) { \
1198
          extern struct Qdisc_ops name##_qdisc_ops; \
1199
          register_qdisc(& name##_qdisc_ops);       \
1200
        }
1201
 
1202
        INIT_QDISC(pfifo);
1203
        INIT_QDISC(bfifo);
1204
 
1205
#ifdef CONFIG_NET_SCH_CBQ
1206
        INIT_QDISC(cbq);
1207
#endif
1208
#ifdef CONFIG_NET_SCH_HTB
1209
        INIT_QDISC(htb);
1210
#endif
1211
#ifdef CONFIG_NET_SCH_CSZ
1212
        INIT_QDISC(csz);
1213
#endif
1214
#ifdef CONFIG_NET_SCH_HPFQ
1215
        INIT_QDISC(hpfq);
1216
#endif
1217
#ifdef CONFIG_NET_SCH_HFSC
1218
        INIT_QDISC(hfsc);
1219
#endif
1220
#ifdef CONFIG_NET_SCH_RED
1221
        INIT_QDISC(red);
1222
#endif
1223
#ifdef CONFIG_NET_SCH_GRED
1224
       INIT_QDISC(gred);
1225
#endif
1226
#ifdef CONFIG_NET_SCH_INGRESS
1227
       INIT_QDISC(ingress);
1228
#endif
1229
#ifdef CONFIG_NET_SCH_DSMARK
1230
       INIT_QDISC(dsmark);
1231
#endif
1232
#ifdef CONFIG_NET_SCH_SFQ
1233
        INIT_QDISC(sfq);
1234
#endif
1235
#ifdef CONFIG_NET_SCH_TBF
1236
        INIT_QDISC(tbf);
1237
#endif
1238
#ifdef CONFIG_NET_SCH_TEQL
1239
        teql_init();
1240
#endif
1241
#ifdef CONFIG_NET_SCH_PRIO
1242
        INIT_QDISC(prio);
1243
#endif
1244
#ifdef CONFIG_NET_SCH_ATM
1245
        INIT_QDISC(atm);
1246
#endif
1247
#ifdef CONFIG_NET_CLS
1248
        tc_filter_init();
1249
#endif
1250
 
1251
#ifdef CONFIG_PROC_FS
1252
        create_proc_read_entry("net/psched", 0, 0, psched_read_proc, NULL);
1253
#endif
1254
 
1255
        return 0;
1256
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.