OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [sched/] [sch_red.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * net/sched/sch_red.c  Random Early Detection queue.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 *
11
 * Changes:
12
 * J Hadi Salim <hadi@nortel.com> 980914:       computation fixes
13
 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14
 * J Hadi Salim <hadi@nortelnetworks.com> 980816:  ECN support
15
 */
16
 
17
#include <linux/config.h>
18
#include <linux/module.h>
19
#include <asm/uaccess.h>
20
#include <asm/system.h>
21
#include <asm/bitops.h>
22
#include <linux/types.h>
23
#include <linux/kernel.h>
24
#include <linux/sched.h>
25
#include <linux/string.h>
26
#include <linux/mm.h>
27
#include <linux/socket.h>
28
#include <linux/sockios.h>
29
#include <linux/in.h>
30
#include <linux/errno.h>
31
#include <linux/interrupt.h>
32
#include <linux/if_ether.h>
33
#include <linux/inet.h>
34
#include <linux/netdevice.h>
35
#include <linux/etherdevice.h>
36
#include <linux/notifier.h>
37
#include <net/ip.h>
38
#include <net/route.h>
39
#include <linux/skbuff.h>
40
#include <net/sock.h>
41
#include <net/pkt_sched.h>
42
#include <net/inet_ecn.h>
43
 
44
 
45
/*      Random Early Detection (RED) algorithm.
46
        =======================================
47
 
48
        Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
49
        for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
50
 
51
        This file codes a "divisionless" version of RED algorithm
52
        as written down in Fig.17 of the paper.
53
 
54
Short description.
55
------------------
56
 
57
        When a new packet arrives we calculate the average queue length:
58
 
59
        avg = (1-W)*avg + W*current_queue_len,
60
 
61
        W is the filter time constant (choosen as 2^(-Wlog)), it controls
62
        the inertia of the algorithm. To allow larger bursts, W should be
63
        decreased.
64
 
65
        if (avg > th_max) -> packet marked (dropped).
66
        if (avg < th_min) -> packet passes.
67
        if (th_min < avg < th_max) we calculate probability:
68
 
69
        Pb = max_P * (avg - th_min)/(th_max-th_min)
70
 
71
        and mark (drop) packet with this probability.
72
        Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
73
        max_P should be small (not 1), usually 0.01..0.02 is good value.
74
 
75
        max_P is chosen as a number, so that max_P/(th_max-th_min)
76
        is a negative power of two in order arithmetics to contain
77
        only shifts.
78
 
79
 
80
        Parameters, settable by user:
81
        -----------------------------
82
 
83
        limit           - bytes (must be > qth_max + burst)
84
 
85
        Hard limit on queue length, should be chosen >qth_max
86
        to allow packet bursts. This parameter does not
87
        affect the algorithms behaviour and can be chosen
88
        arbitrarily high (well, less than ram size)
89
        Really, this limit will never be reached
90
        if RED works correctly.
91
 
92
        qth_min         - bytes (should be < qth_max/2)
93
        qth_max         - bytes (should be at least 2*qth_min and less limit)
94
        Wlog            - bits (<32) log(1/W).
95
        Plog            - bits (<32)
96
 
97
        Plog is related to max_P by formula:
98
 
99
        max_P = (qth_max-qth_min)/2^Plog;
100
 
101
        F.e. if qth_max=128K and qth_min=32K, then Plog=22
102
        corresponds to max_P=0.02
103
 
104
        Scell_log
105
        Stab
106
 
107
        Lookup table for log((1-W)^(t/t_ave).
108
 
109
 
110
NOTES:
111
 
112
Upper bound on W.
113
-----------------
114
 
115
        If you want to allow bursts of L packets of size S,
116
        you should choose W:
117
 
118
        L + 1 - th_min/S < (1-(1-W)^L)/W
119
 
120
        th_min/S = 32         th_min/S = 4
121
 
122
        log(W)  L
123
        -1      33
124
        -2      35
125
        -3      39
126
        -4      46
127
        -5      57
128
        -6      75
129
        -7      101
130
        -8      135
131
        -9      190
132
        etc.
133
 */
134
 
135
struct red_sched_data
136
{
137
/* Parameters */
138
        u32             limit;          /* HARD maximal queue length    */
139
        u32             qth_min;        /* Min average length threshold: A scaled */
140
        u32             qth_max;        /* Max average length threshold: A scaled */
141
        u32             Rmask;
142
        u32             Scell_max;
143
        unsigned char   flags;
144
        char            Wlog;           /* log(W)               */
145
        char            Plog;           /* random number bits   */
146
        char            Scell_log;
147
        u8              Stab[256];
148
 
149
/* Variables */
150
        unsigned long   qave;           /* Average queue length: A scaled */
151
        int             qcount;         /* Packets since last random number generation */
152
        u32             qR;             /* Cached random number */
153
 
154
        psched_time_t   qidlestart;     /* Start of idle period         */
155
        struct tc_red_xstats st;
156
};
157
 
158
static int red_ecn_mark(struct sk_buff *skb)
159
{
160
        if (skb->nh.raw + 20 > skb->tail)
161
                return 0;
162
 
163
        switch (skb->protocol) {
164
        case __constant_htons(ETH_P_IP):
165
                if (!INET_ECN_is_capable(skb->nh.iph->tos))
166
                        return 0;
167
                if (INET_ECN_is_not_ce(skb->nh.iph->tos))
168
                        IP_ECN_set_ce(skb->nh.iph);
169
                return 1;
170
        case __constant_htons(ETH_P_IPV6):
171
                if (!INET_ECN_is_capable(ip6_get_dsfield(skb->nh.ipv6h)))
172
                        return 0;
173
                IP6_ECN_set_ce(skb->nh.ipv6h);
174
                return 1;
175
        default:
176
                return 0;
177
        }
178
}
179
 
180
static int
181
red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
182
{
183
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
184
 
185
        psched_time_t now;
186
 
187
        if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
188
                long us_idle;
189
                int  shift;
190
 
191
                PSCHED_GET_TIME(now);
192
                us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
193
                PSCHED_SET_PASTPERFECT(q->qidlestart);
194
 
195
/*
196
   The problem: ideally, average length queue recalcultion should
197
   be done over constant clock intervals. This is too expensive, so that
198
   the calculation is driven by outgoing packets.
199
   When the queue is idle we have to model this clock by hand.
200
 
201
   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
202
   dummy packets as a burst after idle time, i.e.
203
 
204
          q->qave *= (1-W)^m
205
 
206
   This is an apparently overcomplicated solution (f.e. we have to precompute
207
   a table to make this calculation in reasonable time)
208
   I believe that a simpler model may be used here,
209
   but it is field for experiments.
210
*/
211
                shift = q->Stab[us_idle>>q->Scell_log];
212
 
213
                if (shift) {
214
                        q->qave >>= shift;
215
                } else {
216
                        /* Approximate initial part of exponent
217
                           with linear function:
218
                           (1-W)^m ~= 1-mW + ...
219
 
220
                           Seems, it is the best solution to
221
                           problem of too coarce exponent tabulation.
222
                         */
223
 
224
                        us_idle = (q->qave * us_idle)>>q->Scell_log;
225
                        if (us_idle < q->qave/2)
226
                                q->qave -= us_idle;
227
                        else
228
                                q->qave >>= 1;
229
                }
230
        } else {
231
                q->qave += sch->stats.backlog - (q->qave >> q->Wlog);
232
                /* NOTE:
233
                   q->qave is fixed point number with point at Wlog.
234
                   The formulae above is equvalent to floating point
235
                   version:
236
 
237
                   qave = qave*(1-W) + sch->stats.backlog*W;
238
                                                           --ANK (980924)
239
                 */
240
        }
241
 
242
        if (q->qave < q->qth_min) {
243
                q->qcount = -1;
244
enqueue:
245
                if (sch->stats.backlog + skb->len <= q->limit) {
246
                        __skb_queue_tail(&sch->q, skb);
247
                        sch->stats.backlog += skb->len;
248
                        sch->stats.bytes += skb->len;
249
                        sch->stats.packets++;
250
                        return NET_XMIT_SUCCESS;
251
                } else {
252
                        q->st.pdrop++;
253
                }
254
                kfree_skb(skb);
255
                sch->stats.drops++;
256
                return NET_XMIT_DROP;
257
        }
258
        if (q->qave >= q->qth_max) {
259
                q->qcount = -1;
260
                sch->stats.overlimits++;
261
mark:
262
                if  (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {
263
                        q->st.early++;
264
                        goto drop;
265
                }
266
                q->st.marked++;
267
                goto enqueue;
268
        }
269
 
270
        if (++q->qcount) {
271
                /* The formula used below causes questions.
272
 
273
                   OK. qR is random number in the interval 0..Rmask
274
                   i.e. 0..(2^Plog). If we used floating point
275
                   arithmetics, it would be: (2^Plog)*rnd_num,
276
                   where rnd_num is less 1.
277
 
278
                   Taking into account, that qave have fixed
279
                   point at Wlog, and Plog is related to max_P by
280
                   max_P = (qth_max-qth_min)/2^Plog; two lines
281
                   below have the following floating point equivalent:
282
 
283
                   max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
284
 
285
                   Any questions? --ANK (980924)
286
                 */
287
                if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
288
                        goto enqueue;
289
                q->qcount = 0;
290
                q->qR = net_random()&q->Rmask;
291
                sch->stats.overlimits++;
292
                goto mark;
293
        }
294
        q->qR = net_random()&q->Rmask;
295
        goto enqueue;
296
 
297
drop:
298
        kfree_skb(skb);
299
        sch->stats.drops++;
300
        return NET_XMIT_CN;
301
}
302
 
303
static int
304
red_requeue(struct sk_buff *skb, struct Qdisc* sch)
305
{
306
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
307
 
308
        PSCHED_SET_PASTPERFECT(q->qidlestart);
309
 
310
        __skb_queue_head(&sch->q, skb);
311
        sch->stats.backlog += skb->len;
312
        return 0;
313
}
314
 
315
static struct sk_buff *
316
red_dequeue(struct Qdisc* sch)
317
{
318
        struct sk_buff *skb;
319
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
320
 
321
        skb = __skb_dequeue(&sch->q);
322
        if (skb) {
323
                sch->stats.backlog -= skb->len;
324
                return skb;
325
        }
326
        PSCHED_GET_TIME(q->qidlestart);
327
        return NULL;
328
}
329
 
330
static unsigned int red_drop(struct Qdisc* sch)
331
{
332
        struct sk_buff *skb;
333
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
334
 
335
        skb = __skb_dequeue_tail(&sch->q);
336
        if (skb) {
337
                unsigned int len = skb->len;
338
                sch->stats.backlog -= len;
339
                sch->stats.drops++;
340
                q->st.other++;
341
                kfree_skb(skb);
342
                return len;
343
        }
344
        PSCHED_GET_TIME(q->qidlestart);
345
        return 0;
346
}
347
 
348
static void red_reset(struct Qdisc* sch)
349
{
350
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
351
 
352
        __skb_queue_purge(&sch->q);
353
        sch->stats.backlog = 0;
354
        PSCHED_SET_PASTPERFECT(q->qidlestart);
355
        q->qave = 0;
356
        q->qcount = -1;
357
}
358
 
359
static int red_change(struct Qdisc *sch, struct rtattr *opt)
360
{
361
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
362
        struct rtattr *tb[TCA_RED_STAB];
363
        struct tc_red_qopt *ctl;
364
 
365
        if (opt == NULL ||
366
            rtattr_parse(tb, TCA_RED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
367
            tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 ||
368
            RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
369
            RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256)
370
                return -EINVAL;
371
 
372
        ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
373
 
374
        sch_tree_lock(sch);
375
        q->flags = ctl->flags;
376
        q->Wlog = ctl->Wlog;
377
        q->Plog = ctl->Plog;
378
        q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
379
        q->Scell_log = ctl->Scell_log;
380
        q->Scell_max = (255<<q->Scell_log);
381
        q->qth_min = ctl->qth_min<<ctl->Wlog;
382
        q->qth_max = ctl->qth_max<<ctl->Wlog;
383
        q->limit = ctl->limit;
384
        memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
385
 
386
        q->qcount = -1;
387
        if (skb_queue_len(&sch->q) == 0)
388
                PSCHED_SET_PASTPERFECT(q->qidlestart);
389
        sch_tree_unlock(sch);
390
        return 0;
391
}
392
 
393
static int red_init(struct Qdisc* sch, struct rtattr *opt)
394
{
395
        int err;
396
 
397
        MOD_INC_USE_COUNT;
398
 
399
        if ((err = red_change(sch, opt)) != 0) {
400
                MOD_DEC_USE_COUNT;
401
        }
402
        return err;
403
}
404
 
405
 
406
int red_copy_xstats(struct sk_buff *skb, struct tc_red_xstats *st)
407
{
408
        RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
409
        return 0;
410
 
411
rtattr_failure:
412
        return 1;
413
}
414
 
415
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
416
{
417
        struct red_sched_data *q = (struct red_sched_data *)sch->data;
418
        unsigned char    *b = skb->tail;
419
        struct rtattr *rta;
420
        struct tc_red_qopt opt;
421
 
422
        rta = (struct rtattr*)b;
423
        RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
424
        opt.limit = q->limit;
425
        opt.qth_min = q->qth_min>>q->Wlog;
426
        opt.qth_max = q->qth_max>>q->Wlog;
427
        opt.Wlog = q->Wlog;
428
        opt.Plog = q->Plog;
429
        opt.Scell_log = q->Scell_log;
430
        opt.flags = q->flags;
431
        RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
432
        rta->rta_len = skb->tail - b;
433
 
434
        if (red_copy_xstats(skb, &q->st))
435
                goto rtattr_failure;
436
 
437
        return skb->len;
438
 
439
rtattr_failure:
440
        skb_trim(skb, b - skb->data);
441
        return -1;
442
}
443
 
444
static void red_destroy(struct Qdisc *sch)
445
{
446
        MOD_DEC_USE_COUNT;
447
}
448
 
449
struct Qdisc_ops red_qdisc_ops =
450
{
451
        NULL,
452
        NULL,
453
        "red",
454
        sizeof(struct red_sched_data),
455
 
456
        red_enqueue,
457
        red_dequeue,
458
        red_requeue,
459
        red_drop,
460
 
461
        red_init,
462
        red_reset,
463
        red_destroy,
464
        red_change,
465
 
466
        red_dump,
467
};
468
 
469
 
470
#ifdef MODULE
471
int init_module(void)
472
{
473
        return register_qdisc(&red_qdisc_ops);
474
}
475
 
476
void cleanup_module(void)
477
{
478
        unregister_qdisc(&red_qdisc_ops);
479
}
480
#endif
481
MODULE_LICENSE("GPL");

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.