OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [net/] [sched/] [sch_sfq.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 */
11
 
12
#include <linux/module.h>
13
#include <linux/types.h>
14
#include <linux/kernel.h>
15
#include <linux/jiffies.h>
16
#include <linux/string.h>
17
#include <linux/in.h>
18
#include <linux/errno.h>
19
#include <linux/init.h>
20
#include <linux/ipv6.h>
21
#include <linux/skbuff.h>
22
#include <linux/jhash.h>
23
#include <net/ip.h>
24
#include <net/netlink.h>
25
#include <net/pkt_sched.h>
26
 
27
 
28
/*      Stochastic Fairness Queuing algorithm.
29
        =======================================
30
 
31
        Source:
32
        Paul E. McKenney "Stochastic Fairness Queuing",
33
        IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
34
 
35
        Paul E. McKenney "Stochastic Fairness Queuing",
36
        "Interworking: Research and Experience", v.2, 1991, p.113-131.
37
 
38
 
39
        See also:
40
        M. Shreedhar and George Varghese "Efficient Fair
41
        Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
42
 
43
 
44
        This is not the thing that is usually called (W)FQ nowadays.
45
        It does not use any timestamp mechanism, but instead
46
        processes queues in round-robin order.
47
 
48
        ADVANTAGE:
49
 
50
        - It is very cheap. Both CPU and memory requirements are minimal.
51
 
52
        DRAWBACKS:
53
 
54
        - "Stochastic" -> It is not 100% fair.
55
        When hash collisions occur, several flows are considered as one.
56
 
57
        - "Round-robin" -> It introduces larger delays than virtual clock
58
        based schemes, and should not be used for isolating interactive
59
        traffic from non-interactive. It means, that this scheduler
60
        should be used as leaf of CBQ or P3, which put interactive traffic
61
        to higher priority band.
62
 
63
        We still need true WFQ for top level CSZ, but using WFQ
64
        for the best effort traffic is absolutely pointless:
65
        SFQ is superior for this purpose.
66
 
67
        IMPLEMENTATION:
68
        This implementation limits maximal queue length to 128;
69
        maximal mtu to 2^15-1; number of hash buckets to 1024.
70
        The only goal of this restrictions was that all data
71
        fit into one 4K page :-). Struct sfq_sched_data is
72
        organized in anti-cache manner: all the data for a bucket
73
        are scattered over different locations. This is not good,
74
        but it allowed me to put it into 4K.
75
 
76
        It is easy to increase these values, but not in flight.  */
77
 
78
#define SFQ_DEPTH               128
79
#define SFQ_HASH_DIVISOR        1024
80
 
81
/* This type should contain at least SFQ_DEPTH*2 values */
82
typedef unsigned char sfq_index;
83
 
84
struct sfq_head
85
{
86
        sfq_index       next;
87
        sfq_index       prev;
88
};
89
 
90
struct sfq_sched_data
91
{
92
/* Parameters */
93
        int             perturb_period;
94
        unsigned        quantum;        /* Allotment per round: MUST BE >= MTU */
95
        int             limit;
96
 
97
/* Variables */
98
        struct timer_list perturb_timer;
99
        u32             perturbation;
100
        sfq_index       tail;           /* Index of current slot in round */
101
        sfq_index       max_depth;      /* Maximal depth */
102
 
103
        sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
104
        sfq_index       next[SFQ_DEPTH];        /* Active slots link */
105
        short           allot[SFQ_DEPTH];       /* Current allotment per slot */
106
        unsigned short  hash[SFQ_DEPTH];        /* Hash value indexed by slots */
107
        struct sk_buff_head     qs[SFQ_DEPTH];          /* Slot queue */
108
        struct sfq_head dep[SFQ_DEPTH*2];       /* Linked list of slots, indexed by depth */
109
};
110
 
111
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
112
{
113
        return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
114
}
115
 
116
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
117
{
118
        u32 h, h2;
119
 
120
        switch (skb->protocol) {
121
        case __constant_htons(ETH_P_IP):
122
        {
123
                const struct iphdr *iph = ip_hdr(skb);
124
                h = iph->daddr;
125
                h2 = iph->saddr^iph->protocol;
126
                if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
127
                    (iph->protocol == IPPROTO_TCP ||
128
                     iph->protocol == IPPROTO_UDP ||
129
                     iph->protocol == IPPROTO_UDPLITE ||
130
                     iph->protocol == IPPROTO_SCTP ||
131
                     iph->protocol == IPPROTO_DCCP ||
132
                     iph->protocol == IPPROTO_ESP))
133
                        h2 ^= *(((u32*)iph) + iph->ihl);
134
                break;
135
        }
136
        case __constant_htons(ETH_P_IPV6):
137
        {
138
                struct ipv6hdr *iph = ipv6_hdr(skb);
139
                h = iph->daddr.s6_addr32[3];
140
                h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
141
                if (iph->nexthdr == IPPROTO_TCP ||
142
                    iph->nexthdr == IPPROTO_UDP ||
143
                    iph->nexthdr == IPPROTO_UDPLITE ||
144
                    iph->nexthdr == IPPROTO_SCTP ||
145
                    iph->nexthdr == IPPROTO_DCCP ||
146
                    iph->nexthdr == IPPROTO_ESP)
147
                        h2 ^= *(u32*)&iph[1];
148
                break;
149
        }
150
        default:
151
                h = (u32)(unsigned long)skb->dst^skb->protocol;
152
                h2 = (u32)(unsigned long)skb->sk;
153
        }
154
        return sfq_fold_hash(q, h, h2);
155
}
156
 
157
static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
158
{
159
        sfq_index p, n;
160
        int d = q->qs[x].qlen + SFQ_DEPTH;
161
 
162
        p = d;
163
        n = q->dep[d].next;
164
        q->dep[x].next = n;
165
        q->dep[x].prev = p;
166
        q->dep[p].next = q->dep[n].prev = x;
167
}
168
 
169
static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
170
{
171
        sfq_index p, n;
172
 
173
        n = q->dep[x].next;
174
        p = q->dep[x].prev;
175
        q->dep[p].next = n;
176
        q->dep[n].prev = p;
177
 
178
        if (n == p && q->max_depth == q->qs[x].qlen + 1)
179
                q->max_depth--;
180
 
181
        sfq_link(q, x);
182
}
183
 
184
static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
185
{
186
        sfq_index p, n;
187
        int d;
188
 
189
        n = q->dep[x].next;
190
        p = q->dep[x].prev;
191
        q->dep[p].next = n;
192
        q->dep[n].prev = p;
193
        d = q->qs[x].qlen;
194
        if (q->max_depth < d)
195
                q->max_depth = d;
196
 
197
        sfq_link(q, x);
198
}
199
 
200
static unsigned int sfq_drop(struct Qdisc *sch)
201
{
202
        struct sfq_sched_data *q = qdisc_priv(sch);
203
        sfq_index d = q->max_depth;
204
        struct sk_buff *skb;
205
        unsigned int len;
206
 
207
        /* Queue is full! Find the longest slot and
208
           drop a packet from it */
209
 
210
        if (d > 1) {
211
                sfq_index x = q->dep[d+SFQ_DEPTH].next;
212
                skb = q->qs[x].prev;
213
                len = skb->len;
214
                __skb_unlink(skb, &q->qs[x]);
215
                kfree_skb(skb);
216
                sfq_dec(q, x);
217
                sch->q.qlen--;
218
                sch->qstats.drops++;
219
                sch->qstats.backlog -= len;
220
                return len;
221
        }
222
 
223
        if (d == 1) {
224
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
225
                d = q->next[q->tail];
226
                q->next[q->tail] = q->next[d];
227
                q->allot[q->next[d]] += q->quantum;
228
                skb = q->qs[d].prev;
229
                len = skb->len;
230
                __skb_unlink(skb, &q->qs[d]);
231
                kfree_skb(skb);
232
                sfq_dec(q, d);
233
                sch->q.qlen--;
234
                q->ht[q->hash[d]] = SFQ_DEPTH;
235
                sch->qstats.drops++;
236
                sch->qstats.backlog -= len;
237
                return len;
238
        }
239
 
240
        return 0;
241
}
242
 
243
static int
244
sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
245
{
246
        struct sfq_sched_data *q = qdisc_priv(sch);
247
        unsigned hash = sfq_hash(q, skb);
248
        sfq_index x;
249
 
250
        x = q->ht[hash];
251
        if (x == SFQ_DEPTH) {
252
                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
253
                q->hash[x] = hash;
254
        }
255
        /* If selected queue has length q->limit, this means that
256
         * all another queues are empty and that we do simple tail drop,
257
         * i.e. drop _this_ packet.
258
         */
259
        if (q->qs[x].qlen >= q->limit)
260
                return qdisc_drop(skb, sch);
261
 
262
        sch->qstats.backlog += skb->len;
263
        __skb_queue_tail(&q->qs[x], skb);
264
        sfq_inc(q, x);
265
        if (q->qs[x].qlen == 1) {               /* The flow is new */
266
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
267
                        q->tail = x;
268
                        q->next[x] = x;
269
                        q->allot[x] = q->quantum;
270
                } else {
271
                        q->next[x] = q->next[q->tail];
272
                        q->next[q->tail] = x;
273
                        q->tail = x;
274
                }
275
        }
276
        if (++sch->q.qlen <= q->limit) {
277
                sch->bstats.bytes += skb->len;
278
                sch->bstats.packets++;
279
                return 0;
280
        }
281
 
282
        sfq_drop(sch);
283
        return NET_XMIT_CN;
284
}
285
 
286
static int
287
sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
288
{
289
        struct sfq_sched_data *q = qdisc_priv(sch);
290
        unsigned hash = sfq_hash(q, skb);
291
        sfq_index x;
292
 
293
        x = q->ht[hash];
294
        if (x == SFQ_DEPTH) {
295
                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
296
                q->hash[x] = hash;
297
        }
298
        sch->qstats.backlog += skb->len;
299
        __skb_queue_head(&q->qs[x], skb);
300
        /* If selected queue has length q->limit+1, this means that
301
         * all another queues are empty and we do simple tail drop.
302
         * This packet is still requeued at head of queue, tail packet
303
         * is dropped.
304
         */
305
        if (q->qs[x].qlen > q->limit) {
306
                skb = q->qs[x].prev;
307
                __skb_unlink(skb, &q->qs[x]);
308
                sch->qstats.drops++;
309
                sch->qstats.backlog -= skb->len;
310
                kfree_skb(skb);
311
                return NET_XMIT_CN;
312
        }
313
        sfq_inc(q, x);
314
        if (q->qs[x].qlen == 1) {               /* The flow is new */
315
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
316
                        q->tail = x;
317
                        q->next[x] = x;
318
                        q->allot[x] = q->quantum;
319
                } else {
320
                        q->next[x] = q->next[q->tail];
321
                        q->next[q->tail] = x;
322
                        q->tail = x;
323
                }
324
        }
325
        if (++sch->q.qlen <= q->limit) {
326
                sch->qstats.requeues++;
327
                return 0;
328
        }
329
 
330
        sch->qstats.drops++;
331
        sfq_drop(sch);
332
        return NET_XMIT_CN;
333
}
334
 
335
 
336
 
337
 
338
static struct sk_buff *
339
sfq_dequeue(struct Qdisc* sch)
340
{
341
        struct sfq_sched_data *q = qdisc_priv(sch);
342
        struct sk_buff *skb;
343
        sfq_index a, old_a;
344
 
345
        /* No active slots */
346
        if (q->tail == SFQ_DEPTH)
347
                return NULL;
348
 
349
        a = old_a = q->next[q->tail];
350
 
351
        /* Grab packet */
352
        skb = __skb_dequeue(&q->qs[a]);
353
        sfq_dec(q, a);
354
        sch->q.qlen--;
355
        sch->qstats.backlog -= skb->len;
356
 
357
        /* Is the slot empty? */
358
        if (q->qs[a].qlen == 0) {
359
                q->ht[q->hash[a]] = SFQ_DEPTH;
360
                a = q->next[a];
361
                if (a == old_a) {
362
                        q->tail = SFQ_DEPTH;
363
                        return skb;
364
                }
365
                q->next[q->tail] = a;
366
                q->allot[a] += q->quantum;
367
        } else if ((q->allot[a] -= skb->len) <= 0) {
368
                q->tail = a;
369
                a = q->next[a];
370
                q->allot[a] += q->quantum;
371
        }
372
        return skb;
373
}
374
 
375
static void
376
sfq_reset(struct Qdisc* sch)
377
{
378
        struct sk_buff *skb;
379
 
380
        while ((skb = sfq_dequeue(sch)) != NULL)
381
                kfree_skb(skb);
382
}
383
 
384
static void sfq_perturbation(unsigned long arg)
385
{
386
        struct Qdisc *sch = (struct Qdisc*)arg;
387
        struct sfq_sched_data *q = qdisc_priv(sch);
388
 
389
        get_random_bytes(&q->perturbation, 4);
390
 
391
        if (q->perturb_period)
392
                mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
393
}
394
 
395
static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
396
{
397
        struct sfq_sched_data *q = qdisc_priv(sch);
398
        struct tc_sfq_qopt *ctl = RTA_DATA(opt);
399
        unsigned int qlen;
400
 
401
        if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
402
                return -EINVAL;
403
 
404
        sch_tree_lock(sch);
405
        q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
406
        q->perturb_period = ctl->perturb_period*HZ;
407
        if (ctl->limit)
408
                q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
409
 
410
        qlen = sch->q.qlen;
411
        while (sch->q.qlen > q->limit)
412
                sfq_drop(sch);
413
        qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
414
 
415
        del_timer(&q->perturb_timer);
416
        if (q->perturb_period) {
417
                mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
418
                get_random_bytes(&q->perturbation, 4);
419
        }
420
        sch_tree_unlock(sch);
421
        return 0;
422
}
423
 
424
static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
425
{
426
        struct sfq_sched_data *q = qdisc_priv(sch);
427
        int i;
428
 
429
        init_timer(&q->perturb_timer);
430
        q->perturb_timer.data = (unsigned long)sch;
431
        q->perturb_timer.function = sfq_perturbation;
432
 
433
        for (i=0; i<SFQ_HASH_DIVISOR; i++)
434
                q->ht[i] = SFQ_DEPTH;
435
        for (i=0; i<SFQ_DEPTH; i++) {
436
                skb_queue_head_init(&q->qs[i]);
437
                q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
438
                q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
439
        }
440
        q->limit = SFQ_DEPTH - 1;
441
        q->max_depth = 0;
442
        q->tail = SFQ_DEPTH;
443
        if (opt == NULL) {
444
                q->quantum = psched_mtu(sch->dev);
445
                q->perturb_period = 0;
446
                get_random_bytes(&q->perturbation, 4);
447
        } else {
448
                int err = sfq_change(sch, opt);
449
                if (err)
450
                        return err;
451
        }
452
        for (i=0; i<SFQ_DEPTH; i++)
453
                sfq_link(q, i);
454
        return 0;
455
}
456
 
457
static void sfq_destroy(struct Qdisc *sch)
458
{
459
        struct sfq_sched_data *q = qdisc_priv(sch);
460
        del_timer(&q->perturb_timer);
461
}
462
 
463
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
464
{
465
        struct sfq_sched_data *q = qdisc_priv(sch);
466
        unsigned char *b = skb_tail_pointer(skb);
467
        struct tc_sfq_qopt opt;
468
 
469
        opt.quantum = q->quantum;
470
        opt.perturb_period = q->perturb_period/HZ;
471
 
472
        opt.limit = q->limit;
473
        opt.divisor = SFQ_HASH_DIVISOR;
474
        opt.flows = q->limit;
475
 
476
        RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
477
 
478
        return skb->len;
479
 
480
rtattr_failure:
481
        nlmsg_trim(skb, b);
482
        return -1;
483
}
484
 
485
static struct Qdisc_ops sfq_qdisc_ops = {
486
        .next           =       NULL,
487
        .cl_ops         =       NULL,
488
        .id             =       "sfq",
489
        .priv_size      =       sizeof(struct sfq_sched_data),
490
        .enqueue        =       sfq_enqueue,
491
        .dequeue        =       sfq_dequeue,
492
        .requeue        =       sfq_requeue,
493
        .drop           =       sfq_drop,
494
        .init           =       sfq_init,
495
        .reset          =       sfq_reset,
496
        .destroy        =       sfq_destroy,
497
        .change         =       NULL,
498
        .dump           =       sfq_dump,
499
        .owner          =       THIS_MODULE,
500
};
501
 
502
static int __init sfq_module_init(void)
503
{
504
        return register_qdisc(&sfq_qdisc_ops);
505
}
506
static void __exit sfq_module_exit(void)
507
{
508
        unregister_qdisc(&sfq_qdisc_ops);
509
}
510
module_init(sfq_module_init)
511
module_exit(sfq_module_exit)
512
MODULE_LICENSE("GPL");

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.