OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [sched/] [sch_csz.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * net/sched/sch_csz.c  Clark-Shenker-Zhang scheduler.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 *
11
 */
12
 
13
#include <linux/config.h>
14
#include <linux/module.h>
15
#include <asm/uaccess.h>
16
#include <asm/system.h>
17
#include <asm/bitops.h>
18
#include <linux/types.h>
19
#include <linux/kernel.h>
20
#include <linux/sched.h>
21
#include <linux/string.h>
22
#include <linux/mm.h>
23
#include <linux/socket.h>
24
#include <linux/sockios.h>
25
#include <linux/in.h>
26
#include <linux/errno.h>
27
#include <linux/interrupt.h>
28
#include <linux/if_ether.h>
29
#include <linux/inet.h>
30
#include <linux/netdevice.h>
31
#include <linux/etherdevice.h>
32
#include <linux/notifier.h>
33
#include <net/ip.h>
34
#include <net/route.h>
35
#include <linux/skbuff.h>
36
#include <net/sock.h>
37
#include <net/pkt_sched.h>
38
 
39
 
40
/*      Clark-Shenker-Zhang algorithm.
41
        =======================================
42
 
43
        SOURCE.
44
 
45
        David D. Clark, Scott Shenker and Lixia Zhang
46
        "Supporting Real-Time Applications in an Integrated Services Packet
47
        Network: Architecture and Mechanism".
48
 
49
        CBQ presents a flexible universal algorithm for packet scheduling,
50
        but it has pretty poor delay characteristics.
51
        Round-robin scheduling and link-sharing goals
52
        apparently contradict minimization of network delay and jitter.
53
        Moreover, correct handling of predictive flows seems to be
54
        impossible in CBQ.
55
 
56
        CSZ presents a more precise but less flexible and less efficient
57
        approach. As I understand it, the main idea is to create
58
        WFQ flows for each guaranteed service and to allocate
59
        the rest of bandwidth to dummy flow-0. Flow-0 comprises
60
        the predictive services and the best effort traffic;
61
        it is handled by a priority scheduler with the highest
62
        priority band allocated for predictive services, and the rest ---
63
        to the best effort packets.
64
 
65
        Note that in CSZ flows are NOT limited to their bandwidth.  It
66
        is supposed that the flow passed admission control at the edge
67
        of the QoS network and it doesn't need further shaping. Any
68
        attempt to improve the flow or to shape it to a token bucket
69
        at intermediate hops will introduce undesired delays and raise
70
        jitter.
71
 
72
        At the moment CSZ is the only scheduler that provides
73
        true guaranteed service. Another schemes (including CBQ)
74
        do not provide guaranteed delay and randomize jitter.
75
        There is a proof (Sally Floyd), that delay
76
        can be estimated by a IntServ compliant formula.
77
        This result is true formally, but it is wrong in principle.
78
        It takes into account only round-robin delays,
79
        ignoring delays introduced by link sharing i.e. overlimiting.
80
        Note that temporary overlimits are inevitable because
81
        real links are not ideal, and the real algorithm must take this
82
        into account.
83
 
84
        ALGORITHM.
85
 
86
        --- Notations.
87
 
88
        $B$ is link bandwidth (bits/sec).
89
 
90
        $I$ is set of all flows, including flow $0$.
91
        Every flow $a \in I$ has associated bandwidth slice $r_a < 1$ and
92
        $\sum_{a \in I} r_a = 1$.
93
 
94
        --- Flow model.
95
 
96
        Let $m_a$ is the number of backlogged bits in flow $a$.
97
        The flow is {\em active}, if $m_a > 0$.
98
        This number is a discontinuous function of time;
99
        when a packet $i$ arrives:
100
        \[
101
        m_a(t_i+0) - m_a(t_i-0) = L^i,
102
        \]
103
        where $L^i$ is the length of the arrived packet.
104
        The flow queue is drained continuously until $m_a == 0$:
105
        \[
106
        {d m_a \over dt} = - { B r_a \over \sum_{b \in A} r_b}.
107
        \]
108
        I.e. flow rates are their allocated rates proportionally
109
        scaled to take all available link bandwidth. Apparently,
110
        it is not the only possible policy. F.e. CBQ classes
111
        without borrowing would be modelled by:
112
        \[
113
        {d m_a \over dt} = - B r_a .
114
        \]
115
        More complicated hierarchical bandwidth allocation
116
        policies are possible, but unfortunately, the basic
117
        flow equations have a simple solution only for proportional
118
        scaling.
119
 
120
        --- Departure times.
121
 
122
        We calculate the time until the last bit of packet is sent:
123
        \[
124
        E_a^i(t) = { m_a(t_i) - \delta_a(t) \over r_a },
125
        \]
126
        where $\delta_a(t)$ is number of bits drained since $t_i$.
127
        We have to evaluate $E_a^i$ for all queued packets,
128
        then find the packet with minimal $E_a^i$ and send it.
129
 
130
        This sounds good, but direct implementation of the algorithm
131
        is absolutely infeasible. Luckily, if flow rates
132
        are scaled proportionally, the equations have a simple solution.
133
 
134
        The differential equation for $E_a^i$ is
135
        \[
136
        {d E_a^i (t) \over dt } = - { d \delta_a(t) \over dt} { 1 \over r_a} =
137
        { B \over \sum_{b \in A} r_b}
138
        \]
139
        with initial condition
140
        \[
141
        E_a^i (t_i) = { m_a(t_i) \over r_a } .
142
        \]
143
 
144
        Let's introduce an auxiliary function $R(t)$:
145
 
146
        --- Round number.
147
 
148
        Consider the following model: we rotate over active flows,
149
        sending $r_a B$ bits from every flow, so that we send
150
        $B \sum_{a \in A} r_a$ bits per round, that takes
151
        $\sum_{a \in A} r_a$ seconds.
152
 
153
        Hence, $R(t)$ (round number) is a monotonically increasing
154
        linear function of time when $A$ is not changed
155
        \[
156
        { d R(t) \over dt } = { 1 \over \sum_{a \in A} r_a }
157
        \]
158
        and it is continuous when $A$ changes.
159
 
160
        The central observation is that the quantity
161
        $F_a^i = R(t) + E_a^i(t)/B$ does not depend on time at all!
162
        $R(t)$ does not depend on flow, so that $F_a^i$ can be
163
        calculated only once on packet arrival, and we need not
164
        recalculate $E$ numbers and resorting queues.
165
        The number $F_a^i$ is called finish number of the packet.
166
        It is just the value of $R(t)$ when the last bit of packet
167
        is sent out.
168
 
169
        Maximal finish number on flow is called finish number of flow
170
        and minimal one is "start number of flow".
171
        Apparently, flow is active if and only if $F_a \leq R$.
172
 
173
        When a packet of length $L_i$ bit arrives to flow $a$ at time $t_i$,
174
        we calculate $F_a^i$ as:
175
 
176
        If flow was inactive ($F_a < R$):
177
        $F_a^i = R(t) + {L_i \over B r_a}$
178
        otherwise
179
        $F_a^i = F_a + {L_i \over B r_a}$
180
 
181
        These equations complete the algorithm specification.
182
 
183
        It looks pretty hairy, but there is a simple
184
        procedure for solving these equations.
185
        See procedure csz_update(), that is a generalization of
186
        the algorithm from S. Keshav's thesis Chapter 3
187
        "Efficient Implementation of Fair Queeing".
188
 
189
        NOTES.
190
 
191
        * We implement only the simplest variant of CSZ,
192
        when flow-0 is a explicit 4band priority fifo.
193
        This is bad, but we need a "peek" operation in addition
194
        to "dequeue" to implement complete CSZ.
195
        I do not want to do that, unless it is absolutely
196
        necessary.
197
 
198
        * A primitive support for token bucket filtering
199
        presents itself too. It directly contradicts CSZ, but
200
        even though the Internet is on the globe ... :-)
201
        "the edges of the network" really exist.
202
 
203
        BUGS.
204
 
205
        * Fixed point arithmetic is overcomplicated, suboptimal and even
206
        wrong. Check it later.  */
207
 
208
 
209
/* This number is arbitrary */
210
 
211
#define CSZ_GUARANTEED          16
212
#define CSZ_FLOWS               (CSZ_GUARANTEED+4)
213
 
214
struct csz_head
215
{
216
        struct csz_head         *snext;
217
        struct csz_head         *sprev;
218
        struct csz_head         *fnext;
219
        struct csz_head         *fprev;
220
};
221
 
222
struct csz_flow
223
{
224
        struct csz_head         *snext;
225
        struct csz_head         *sprev;
226
        struct csz_head         *fnext;
227
        struct csz_head         *fprev;
228
 
229
/* Parameters */
230
        struct tc_ratespec      rate;
231
        struct tc_ratespec      slice;
232
        u32                     *L_tab; /* Lookup table for L/(B*r_a) values */
233
        unsigned long           limit;  /* Maximal length of queue */
234
#ifdef CSZ_PLUS_TBF
235
        struct tc_ratespec      peakrate;
236
        __u32                   buffer; /* Depth of token bucket, normalized
237
                                           as L/(B*r_a) */
238
        __u32                   mtu;
239
#endif
240
 
241
/* Variables */
242
#ifdef CSZ_PLUS_TBF
243
        unsigned long           tokens; /* Tokens number: usecs */
244
        psched_time_t           t_tbf;
245
        unsigned long           R_tbf;
246
        int                     throttled;
247
#endif
248
        unsigned                peeked;
249
        unsigned long           start;  /* Finish number of the first skb */
250
        unsigned long           finish; /* Finish number of the flow */
251
 
252
        struct sk_buff_head     q;      /* FIFO queue */
253
};
254
 
255
#define L2R(f,L) ((f)->L_tab[(L)>>(f)->slice.cell_log])
256
 
257
struct csz_sched_data
258
{
259
/* Parameters */
260
        unsigned char   rate_log;       /* fixed point position for rate;
261
                                         * really we need not it */
262
        unsigned char   R_log;          /* fixed point position for round number */
263
        unsigned char   delta_log;      /* 1<<delta_log is maximal timeout in usecs;
264
                                         * 21 <-> 2.1sec is MAXIMAL value */
265
 
266
/* Variables */
267
        struct tcf_proto *filter_list;
268
        u8      prio2band[TC_PRIO_MAX+1];
269
#ifdef CSZ_PLUS_TBF
270
        struct timer_list wd_timer;
271
        long            wd_expires;
272
#endif
273
        psched_time_t   t_c;            /* Time check-point */
274
        unsigned long   R_c;            /* R-number check-point */
275
        unsigned long   rate;           /* Current sum of rates of active flows */
276
        struct csz_head s;              /* Flows sorted by "start" */
277
        struct csz_head f;              /* Flows sorted by "finish"     */
278
 
279
        struct sk_buff_head     other[4];/* Predicted (0) and the best efforts
280
                                            classes (1,2,3) */
281
        struct csz_flow flow[CSZ_GUARANTEED]; /* Array of flows */
282
};
283
 
284
/* These routines (csz_insert_finish and csz_insert_start) are
285
   the most time consuming part of all the algorithm.
286
 
287
   We insert to sorted list, so that time
288
   is linear with respect to number of active flows in the worst case.
289
   Note that we have not very large number of guaranteed flows,
290
   so that logarithmic algorithms (heap etc.) are useless,
291
   they are slower than linear one when length of list <= 32.
292
 
293
   Heap would take sence if we used WFQ for best efforts
294
   flows, but SFQ is better choice in this case.
295
 */
296
 
297
 
298
/* Insert flow "this" to the list "b" before
299
   flow with greater finish number.
300
 */
301
 
302
#if 0
303
/* Scan forward */
304
extern __inline__ void csz_insert_finish(struct csz_head *b,
305
                                         struct csz_flow *this)
306
{
307
        struct csz_head *f = b->fnext;
308
        unsigned long finish = this->finish;
309
 
310
        while (f != b) {
311
                if (((struct csz_flow*)f)->finish - finish > 0)
312
                        break;
313
                f = f->fnext;
314
        }
315
        this->fnext = f;
316
        this->fprev = f->fprev;
317
        this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
318
}
319
#else
320
/* Scan backward */
321
extern __inline__ void csz_insert_finish(struct csz_head *b,
322
                                         struct csz_flow *this)
323
{
324
        struct csz_head *f = b->fprev;
325
        unsigned long finish = this->finish;
326
 
327
        while (f != b) {
328
                if (((struct csz_flow*)f)->finish - finish <= 0)
329
                        break;
330
                f = f->fprev;
331
        }
332
        this->fnext = f->fnext;
333
        this->fprev = f;
334
        this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
335
}
336
#endif
337
 
338
/* Insert flow "this" to the list "b" before
339
   flow with greater start number.
340
 */
341
 
342
extern __inline__ void csz_insert_start(struct csz_head *b,
343
                                        struct csz_flow *this)
344
{
345
        struct csz_head *f = b->snext;
346
        unsigned long start = this->start;
347
 
348
        while (f != b) {
349
                if (((struct csz_flow*)f)->start - start > 0)
350
                        break;
351
                f = f->snext;
352
        }
353
        this->snext = f;
354
        this->sprev = f->sprev;
355
        this->snext->sprev = this->sprev->snext = (struct csz_head*)this;
356
}
357
 
358
 
359
/* Calculate and return current round number.
360
   It is another time consuming part, but
361
   it is impossible to avoid it.
362
 
363
   It costs O(N) that make all the algorithm useful only
364
   to play with closest to ideal fluid model.
365
 
366
   There exist less academic, but more practical modifications,
367
   which might have even better characteristics (WF2Q+, HPFQ, HFSC)
368
 */
369
 
370
static unsigned long csz_update(struct Qdisc *sch)
371
{
372
        struct csz_sched_data   *q = (struct csz_sched_data*)sch->data;
373
        struct csz_flow         *a;
374
        unsigned long           F;
375
        unsigned long           tmp;
376
        psched_time_t           now;
377
        unsigned long           delay;
378
        unsigned long           R_c;
379
 
380
        PSCHED_GET_TIME(now);
381
        delay = PSCHED_TDIFF_SAFE(now, q->t_c, 0, goto do_reset);
382
 
383
        if (delay>>q->delta_log) {
384
do_reset:
385
                /* Delta is too large.
386
                   It is possible if MTU/BW > 1<<q->delta_log
387
                   (i.e. configuration error) or because of hardware
388
                   fault. We have no choice...
389
                 */
390
                qdisc_reset(sch);
391
                return 0;
392
        }
393
 
394
        q->t_c = now;
395
 
396
        for (;;) {
397
                a = (struct csz_flow*)q->f.fnext;
398
 
399
                /* No more active flows. Reset R and exit. */
400
                if (a == (struct csz_flow*)&q->f) {
401
#ifdef CSZ_DEBUG
402
                        if (q->rate) {
403
                                printk("csz_update: rate!=0 on inactive csz\n");
404
                                q->rate = 0;
405
                        }
406
#endif
407
                        q->R_c = 0;
408
                        return 0;
409
                }
410
 
411
                F = a->finish;
412
 
413
#ifdef CSZ_DEBUG
414
                if (q->rate == 0) {
415
                        printk("csz_update: rate=0 on active csz\n");
416
                        goto do_reset;
417
                }
418
#endif
419
 
420
                /*
421
                 *           tmp = (t - q->t_c)/q->rate;
422
                 */
423
 
424
                tmp = ((delay<<(31-q->delta_log))/q->rate)>>(31-q->delta_log+q->R_log);
425
 
426
                tmp += q->R_c;
427
 
428
                /* OK, this flow (and all flows with greater
429
                   finish numbers) is still active */
430
                if (F - tmp > 0)
431
                        break;
432
 
433
                /* It is more not active */
434
 
435
                a->fprev->fnext = a->fnext;
436
                a->fnext->fprev = a->fprev;
437
 
438
                /*
439
                 * q->t_c += (F - q->R_c)*q->rate
440
                 */
441
 
442
                tmp = ((F-q->R_c)*q->rate)<<q->R_log;
443
                R_c = F;
444
                q->rate -= a->slice.rate;
445
 
446
                if ((long)(delay - tmp) >= 0) {
447
                        delay -= tmp;
448
                        continue;
449
                }
450
                delay = 0;
451
        }
452
 
453
        q->R_c = tmp;
454
        return tmp;
455
}
456
 
457
unsigned csz_classify(struct sk_buff *skb, struct csz_sched_data *q)
458
{
459
        return CSZ_GUARANTEED;
460
}
461
 
462
static int
463
csz_enqueue(struct sk_buff *skb, struct Qdisc* sch)
464
{
465
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
466
        unsigned flow_id = csz_classify(skb, q);
467
        unsigned long R;
468
        int prio = 0;
469
        struct csz_flow *this;
470
 
471
        if (flow_id >= CSZ_GUARANTEED) {
472
                prio = flow_id - CSZ_GUARANTEED;
473
                flow_id = 0;
474
        }
475
 
476
        this = &q->flow[flow_id];
477
        if (this->q.qlen >= this->limit || this->L_tab == NULL) {
478
                sch->stats.drops++;
479
                kfree_skb(skb);
480
                return NET_XMIT_DROP;
481
        }
482
 
483
        R = csz_update(sch);
484
 
485
        if ((long)(this->finish - R) >= 0) {
486
                /* It was active */
487
                this->finish += L2R(this,skb->len);
488
        } else {
489
                /* It is inactive; activate it */
490
                this->finish = R + L2R(this,skb->len);
491
                q->rate += this->slice.rate;
492
                csz_insert_finish(&q->f, this);
493
        }
494
 
495
        /* If this flow was empty, remember start number
496
           and insert it into start queue */
497
        if (this->q.qlen == 0) {
498
                this->start = this->finish;
499
                csz_insert_start(&q->s, this);
500
        }
501
        if (flow_id)
502
                skb_queue_tail(&this->q, skb);
503
        else
504
                skb_queue_tail(&q->other[prio], skb);
505
        sch->q.qlen++;
506
        sch->stats.bytes += skb->len;
507
        sch->stats.packets++;
508
        return 0;
509
}
510
 
511
static __inline__ struct sk_buff *
512
skb_dequeue_best(struct csz_sched_data * q)
513
{
514
        int i;
515
        struct sk_buff *skb;
516
 
517
        for (i=0; i<4; i++) {
518
                skb = skb_dequeue(&q->other[i]);
519
                if (skb) {
520
                        q->flow[0].q.qlen--;
521
                        return skb;
522
                }
523
        }
524
        return NULL;
525
}
526
 
527
static __inline__ struct sk_buff *
528
skb_peek_best(struct csz_sched_data * q)
529
{
530
        int i;
531
        struct sk_buff *skb;
532
 
533
        for (i=0; i<4; i++) {
534
                skb = skb_peek(&q->other[i]);
535
                if (skb)
536
                        return skb;
537
        }
538
        return NULL;
539
}
540
 
541
#ifdef CSZ_PLUS_TBF
542
 
543
static void csz_watchdog(unsigned long arg)
544
{
545
        struct Qdisc *sch = (struct Qdisc*)arg;
546
 
547
        qdisc_wakeup(sch->dev);
548
}
549
 
550
static __inline__ void
551
csz_move_queue(struct csz_flow *this, long delta)
552
{
553
        this->fprev->fnext = this->fnext;
554
        this->fnext->fprev = this->fprev;
555
 
556
        this->start += delta;
557
        this->finish += delta;
558
 
559
        csz_insert_finish(this);
560
}
561
 
562
static __inline__ int csz_enough_tokens(struct csz_sched_data *q,
563
                                        struct csz_flow *this,
564
                                        struct sk_buff *skb)
565
{
566
        long toks;
567
        long shift;
568
        psched_time_t now;
569
 
570
        PSCHED_GET_TIME(now);
571
 
572
        toks = PSCHED_TDIFF(now, t_tbf) + this->tokens - L2R(q,this,skb->len);
573
 
574
        shift = 0;
575
        if (this->throttled) {
576
                /* Remember aposteriory delay */
577
 
578
                unsigned long R = csz_update(q);
579
                shift = R - this->R_tbf;
580
                this->R_tbf = R;
581
        }
582
 
583
        if (toks >= 0) {
584
                /* Now we have enough tokens to proceed */
585
 
586
                this->tokens = toks <= this->depth ? toks : this->depth;
587
                this->t_tbf = now;
588
 
589
                if (!this->throttled)
590
                        return 1;
591
 
592
                /* Flow was throttled. Update its start&finish numbers
593
                   with delay calculated aposteriori.
594
                 */
595
 
596
                this->throttled = 0;
597
                if (shift > 0)
598
                        csz_move_queue(this, shift);
599
                return 1;
600
        }
601
 
602
        if (!this->throttled) {
603
                /* Flow has just been throttled; remember
604
                   current round number to calculate aposteriori delay
605
                 */
606
                this->throttled = 1;
607
                this->R_tbf = csz_update(q);
608
        }
609
 
610
        /* Move all the queue to the time when it will be allowed to send.
611
           We should translate time to round number, but it is impossible,
612
           so that we made the most conservative estimate i.e. we suppose
613
           that only this flow is active and, hence, R = t.
614
           Really toks <= R <= toks/r_a.
615
 
616
           This apriory shift in R will be adjusted later to reflect
617
           real delay. We cannot avoid it because of:
618
           - throttled flow continues to be active from the viewpoint
619
             of CSZ, so that it would acquire the highest priority,
620
             if you not adjusted start numbers.
621
           - Eventually, finish number would become less than round
622
             number and flow were declared inactive.
623
         */
624
 
625
        toks = -toks;
626
 
627
        /* Remeber, that we should start watchdog */
628
        if (toks < q->wd_expires)
629
                q->wd_expires = toks;
630
 
631
        toks >>= q->R_log;
632
        shift += toks;
633
        if (shift > 0) {
634
                this->R_tbf += toks;
635
                csz_move_queue(this, shift);
636
        }
637
        csz_insert_start(this);
638
        return 0;
639
}
640
#endif
641
 
642
 
643
static struct sk_buff *
644
csz_dequeue(struct Qdisc* sch)
645
{
646
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
647
        struct sk_buff *skb;
648
        struct csz_flow *this;
649
 
650
#ifdef CSZ_PLUS_TBF
651
        q->wd_expires = 0;
652
#endif
653
        this = (struct csz_flow*)q->s.snext;
654
 
655
        while (this != (struct csz_flow*)&q->s) {
656
 
657
                /* First of all: unlink from start list */
658
                this->sprev->snext = this->snext;
659
                this->snext->sprev = this->sprev;
660
 
661
                if (this != &q->flow[0]) {       /* Guaranteed flow */
662
                        skb = __skb_dequeue(&this->q);
663
                        if (skb) {
664
#ifdef CSZ_PLUS_TBF
665
                                if (this->depth) {
666
                                        if (!csz_enough_tokens(q, this, skb))
667
                                                continue;
668
                                }
669
#endif
670
                                if (this->q.qlen) {
671
                                        struct sk_buff *nskb = skb_peek(&this->q);
672
                                        this->start += L2R(this,nskb->len);
673
                                        csz_insert_start(&q->s, this);
674
                                }
675
                                sch->q.qlen--;
676
                                return skb;
677
                        }
678
                } else {        /* Predicted or best effort flow */
679
                        skb = skb_dequeue_best(q);
680
                        if (skb) {
681
                                unsigned peeked = this->peeked;
682
                                this->peeked = 0;
683
 
684
                                if (--this->q.qlen) {
685
                                        struct sk_buff *nskb;
686
                                        unsigned dequeued = L2R(this,skb->len);
687
 
688
                                        /* We got not the same thing that
689
                                           peeked earlier; adjust start number
690
                                           */
691
                                        if (peeked != dequeued && peeked)
692
                                                this->start += dequeued - peeked;
693
 
694
                                        nskb = skb_peek_best(q);
695
                                        peeked = L2R(this,nskb->len);
696
                                        this->start += peeked;
697
                                        this->peeked = peeked;
698
                                        csz_insert_start(&q->s, this);
699
                                }
700
                                sch->q.qlen--;
701
                                return skb;
702
                        }
703
                }
704
        }
705
#ifdef CSZ_PLUS_TBF
706
        /* We are about to return no skb.
707
           Schedule watchdog timer, if it occurred because of shaping.
708
         */
709
        if (q->wd_expires) {
710
                unsigned long delay = PSCHED_US2JIFFIE(q->wd_expires);
711
                if (delay == 0)
712
                        delay = 1;
713
                mod_timer(&q->wd_timer, jiffies + delay);
714
                sch->stats.overlimits++;
715
        }
716
#endif
717
        return NULL;
718
}
719
 
720
static void
721
csz_reset(struct Qdisc* sch)
722
{
723
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
724
        int    i;
725
 
726
        for (i=0; i<4; i++)
727
                skb_queue_purge(&q->other[i]);
728
 
729
        for (i=0; i<CSZ_GUARANTEED; i++) {
730
                struct csz_flow *this = q->flow + i;
731
                skb_queue_purge(&this->q);
732
                this->snext = this->sprev =
733
                this->fnext = this->fprev = (struct csz_head*)this;
734
                this->start = this->finish = 0;
735
        }
736
        q->s.snext = q->s.sprev = &q->s;
737
        q->f.fnext = q->f.fprev = &q->f;
738
        q->R_c = 0;
739
#ifdef CSZ_PLUS_TBF
740
        PSCHED_GET_TIME(&q->t_tbf);
741
        q->tokens = q->depth;
742
        del_timer(&q->wd_timer);
743
#endif
744
        sch->q.qlen = 0;
745
}
746
 
747
static void
748
csz_destroy(struct Qdisc* sch)
749
{
750
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
751
        struct tcf_proto *tp;
752
 
753
        while ((tp = q->filter_list) != NULL) {
754
                q->filter_list = tp->next;
755
                tcf_destroy(tp);
756
        }
757
 
758
        MOD_DEC_USE_COUNT;
759
}
760
 
761
static int csz_init(struct Qdisc *sch, struct rtattr *opt)
762
{
763
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
764
        struct rtattr *tb[TCA_CSZ_PTAB];
765
        struct tc_csz_qopt *qopt;
766
        int    i;
767
 
768
        rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
769
        if (tb[TCA_CSZ_PARMS-1] == NULL ||
770
            RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*qopt))
771
                return -EINVAL;
772
        qopt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
773
 
774
        q->R_log = qopt->R_log;
775
        q->delta_log = qopt->delta_log;
776
        for (i=0; i<=TC_PRIO_MAX; i++) {
777
                if (qopt->priomap[i] >= CSZ_FLOWS)
778
                        return -EINVAL;
779
                q->prio2band[i] = qopt->priomap[i];
780
        }
781
 
782
        for (i=0; i<4; i++)
783
                skb_queue_head_init(&q->other[i]);
784
 
785
        for (i=0; i<CSZ_GUARANTEED; i++) {
786
                struct csz_flow *this = q->flow + i;
787
                skb_queue_head_init(&this->q);
788
                this->snext = this->sprev =
789
                this->fnext = this->fprev = (struct csz_head*)this;
790
                this->start = this->finish = 0;
791
        }
792
        q->s.snext = q->s.sprev = &q->s;
793
        q->f.fnext = q->f.fprev = &q->f;
794
        q->R_c = 0;
795
#ifdef CSZ_PLUS_TBF
796
        init_timer(&q->wd_timer);
797
        q->wd_timer.data = (unsigned long)sch;
798
        q->wd_timer.function = csz_watchdog;
799
#endif
800
        MOD_INC_USE_COUNT;
801
        return 0;
802
}
803
 
804
static int csz_dump(struct Qdisc *sch, struct sk_buff *skb)
805
{
806
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
807
        unsigned char    *b = skb->tail;
808
        struct rtattr *rta;
809
        struct tc_csz_qopt opt;
810
 
811
        rta = (struct rtattr*)b;
812
        RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
813
 
814
        opt.flows = CSZ_FLOWS;
815
        memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
816
        RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
817
        rta->rta_len = skb->tail - b;
818
 
819
        return skb->len;
820
 
821
rtattr_failure:
822
        skb_trim(skb, b - skb->data);
823
        return -1;
824
}
825
 
826
static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
827
                     struct Qdisc **old)
828
{
829
        return -EINVAL;
830
}
831
 
832
static struct Qdisc * csz_leaf(struct Qdisc *sch, unsigned long cl)
833
{
834
        return NULL;
835
}
836
 
837
 
838
static unsigned long csz_get(struct Qdisc *sch, u32 classid)
839
{
840
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
841
        unsigned long band = TC_H_MIN(classid) - 1;
842
 
843
        if (band >= CSZ_FLOWS)
844
                return 0;
845
 
846
        if (band < CSZ_GUARANTEED && q->flow[band].L_tab == NULL)
847
                return 0;
848
 
849
        return band+1;
850
}
851
 
852
static unsigned long csz_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
853
{
854
        return csz_get(sch, classid);
855
}
856
 
857
 
858
static void csz_put(struct Qdisc *sch, unsigned long cl)
859
{
860
        return;
861
}
862
 
863
static int csz_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
864
{
865
        unsigned long cl = *arg;
866
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
867
        struct rtattr *opt = tca[TCA_OPTIONS-1];
868
        struct rtattr *tb[TCA_CSZ_PTAB];
869
        struct tc_csz_copt *copt;
870
 
871
        rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
872
        if (tb[TCA_CSZ_PARMS-1] == NULL ||
873
            RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*copt))
874
                return -EINVAL;
875
        copt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
876
 
877
        if (tb[TCA_CSZ_RTAB-1] &&
878
            RTA_PAYLOAD(tb[TCA_CSZ_RTAB-1]) < 1024)
879
                return -EINVAL;
880
 
881
        if (cl) {
882
                struct csz_flow *a;
883
                cl--;
884
                if (cl >= CSZ_FLOWS)
885
                        return -ENOENT;
886
                if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
887
                        return -EINVAL;
888
 
889
                a = &q->flow[cl];
890
 
891
                spin_lock_bh(&sch->dev->queue_lock);
892
#if 0
893
                a->rate_log = copt->rate_log;
894
#endif
895
#ifdef CSZ_PLUS_TBF
896
                a->limit = copt->limit;
897
                a->rate = copt->rate;
898
                a->buffer = copt->buffer;
899
                a->mtu = copt->mtu;
900
#endif
901
 
902
                if (tb[TCA_CSZ_RTAB-1])
903
                        memcpy(a->L_tab, RTA_DATA(tb[TCA_CSZ_RTAB-1]), 1024);
904
 
905
                spin_unlock_bh(&sch->dev->queue_lock);
906
                return 0;
907
        }
908
        /* NI */
909
        return 0;
910
}
911
 
912
static int csz_delete(struct Qdisc *sch, unsigned long cl)
913
{
914
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
915
        struct csz_flow *a;
916
 
917
        cl--;
918
 
919
        if (cl >= CSZ_FLOWS)
920
                return -ENOENT;
921
        if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
922
                return -EINVAL;
923
 
924
        a = &q->flow[cl];
925
 
926
        spin_lock_bh(&sch->dev->queue_lock);
927
        a->fprev->fnext = a->fnext;
928
        a->fnext->fprev = a->fprev;
929
        a->sprev->snext = a->snext;
930
        a->snext->sprev = a->sprev;
931
        a->start = a->finish = 0;
932
        kfree(xchg(&q->flow[cl].L_tab, NULL));
933
        spin_unlock_bh(&sch->dev->queue_lock);
934
 
935
        return 0;
936
}
937
 
938
static int csz_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm)
939
{
940
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
941
        unsigned char    *b = skb->tail;
942
        struct rtattr *rta;
943
        struct tc_csz_copt opt;
944
 
945
        tcm->tcm_handle = sch->handle|cl;
946
 
947
        cl--;
948
 
949
        if (cl > CSZ_FLOWS)
950
                goto rtattr_failure;
951
 
952
        if (cl < CSZ_GUARANTEED) {
953
                struct csz_flow *f = &q->flow[cl];
954
 
955
                if (f->L_tab == NULL)
956
                        goto rtattr_failure;
957
 
958
                rta = (struct rtattr*)b;
959
                RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
960
 
961
                opt.limit = f->limit;
962
                opt.rate = f->rate;
963
                opt.slice = f->slice;
964
                memset(&opt.peakrate, 0, sizeof(opt.peakrate));
965
#ifdef CSZ_PLUS_TBF
966
                opt.buffer = f->buffer;
967
                opt.mtu = f->mtu;
968
#else
969
                opt.buffer = 0;
970
                opt.mtu = 0;
971
#endif
972
 
973
                RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
974
                rta->rta_len = skb->tail - b;
975
        }
976
 
977
        return skb->len;
978
 
979
rtattr_failure:
980
        skb_trim(skb, b - skb->data);
981
        return -1;
982
}
983
 
984
static void csz_walk(struct Qdisc *sch, struct qdisc_walker *arg)
985
{
986
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
987
        int prio = 0;
988
 
989
        if (arg->stop)
990
                return;
991
 
992
        for (prio = 0; prio < CSZ_FLOWS; prio++) {
993
                if (arg->count < arg->skip) {
994
                        arg->count++;
995
                        continue;
996
                }
997
                if (prio < CSZ_GUARANTEED && q->flow[prio].L_tab == NULL) {
998
                        arg->count++;
999
                        continue;
1000
                }
1001
                if (arg->fn(sch, prio+1, arg) < 0) {
1002
                        arg->stop = 1;
1003
                        break;
1004
                }
1005
                arg->count++;
1006
        }
1007
}
1008
 
1009
static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl)
1010
{
1011
        struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
1012
 
1013
        if (cl)
1014
                return NULL;
1015
 
1016
        return &q->filter_list;
1017
}
1018
 
1019
struct Qdisc_class_ops csz_class_ops =
1020
{
1021
        csz_graft,
1022
        csz_leaf,
1023
 
1024
        csz_get,
1025
        csz_put,
1026
        csz_change,
1027
        csz_delete,
1028
        csz_walk,
1029
 
1030
        csz_find_tcf,
1031
        csz_bind,
1032
        csz_put,
1033
 
1034
        csz_dump_class,
1035
};
1036
 
1037
struct Qdisc_ops csz_qdisc_ops =
1038
{
1039
        NULL,
1040
        &csz_class_ops,
1041
        "csz",
1042
        sizeof(struct csz_sched_data),
1043
 
1044
        csz_enqueue,
1045
        csz_dequeue,
1046
        NULL,
1047
        NULL,
1048
 
1049
        csz_init,
1050
        csz_reset,
1051
        csz_destroy,
1052
        NULL /* csz_change */,
1053
 
1054
        csz_dump,
1055
};
1056
 
1057
 
1058
#ifdef MODULE
1059
int init_module(void)
1060
{
1061
        return register_qdisc(&csz_qdisc_ops);
1062
}
1063
 
1064
void cleanup_module(void)
1065
{
1066
        unregister_qdisc(&csz_qdisc_ops);
1067
}
1068
#endif
1069
MODULE_LICENSE("GPL");

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.