OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [sched/] [sch_sfq.c] - Blame information for rev 1275

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
3
 *
4
 *              This program is free software; you can redistribute it and/or
5
 *              modify it under the terms of the GNU General Public License
6
 *              as published by the Free Software Foundation; either version
7
 *              2 of the License, or (at your option) any later version.
8
 *
9
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10
 */
11
 
12
#include <linux/config.h>
13
#include <linux/module.h>
14
#include <asm/uaccess.h>
15
#include <asm/system.h>
16
#include <asm/bitops.h>
17
#include <linux/types.h>
18
#include <linux/kernel.h>
19
#include <linux/sched.h>
20
#include <linux/string.h>
21
#include <linux/mm.h>
22
#include <linux/socket.h>
23
#include <linux/sockios.h>
24
#include <linux/in.h>
25
#include <linux/errno.h>
26
#include <linux/interrupt.h>
27
#include <linux/if_ether.h>
28
#include <linux/inet.h>
29
#include <linux/netdevice.h>
30
#include <linux/etherdevice.h>
31
#include <linux/notifier.h>
32
#include <linux/init.h>
33
#include <net/ip.h>
34
#include <linux/ipv6.h>
35
#include <net/route.h>
36
#include <linux/skbuff.h>
37
#include <net/sock.h>
38
#include <net/pkt_sched.h>
39
 
40
 
41
/*      Stochastic Fairness Queuing algorithm.
42
        =======================================
43
 
44
        Source:
45
        Paul E. McKenney "Stochastic Fairness Queuing",
46
        IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
47
 
48
        Paul E. McKenney "Stochastic Fairness Queuing",
49
        "Interworking: Research and Experience", v.2, 1991, p.113-131.
50
 
51
 
52
        See also:
53
        M. Shreedhar and George Varghese "Efficient Fair
54
        Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
55
 
56
 
57
        This is not the thing that is usually called (W)FQ nowadays.
58
        It does not use any timestamp mechanism, but instead
59
        processes queues in round-robin order.
60
 
61
        ADVANTAGE:
62
 
63
        - It is very cheap. Both CPU and memory requirements are minimal.
64
 
65
        DRAWBACKS:
66
 
67
        - "Stochastic" -> It is not 100% fair.
68
        When hash collisions occur, several flows are considered as one.
69
 
70
        - "Round-robin" -> It introduces larger delays than virtual clock
71
        based schemes, and should not be used for isolating interactive
72
        traffic from non-interactive. It means, that this scheduler
73
        should be used as leaf of CBQ or P3, which put interactive traffic
74
        to higher priority band.
75
 
76
        We still need true WFQ for top level CSZ, but using WFQ
77
        for the best effort traffic is absolutely pointless:
78
        SFQ is superior for this purpose.
79
 
80
        IMPLEMENTATION:
81
        This implementation limits maximal queue length to 128;
82
        maximal mtu to 2^15-1; number of hash buckets to 1024.
83
        The only goal of this restrictions was that all data
84
        fit into one 4K page :-). Struct sfq_sched_data is
85
        organized in anti-cache manner: all the data for a bucket
86
        are scattered over different locations. This is not good,
87
        but it allowed me to put it into 4K.
88
 
89
        It is easy to increase these values, but not in flight.  */
90
 
91
#define SFQ_DEPTH               128
92
#define SFQ_HASH_DIVISOR        1024
93
 
94
/* This type should contain at least SFQ_DEPTH*2 values */
95
typedef unsigned char sfq_index;
96
 
97
struct sfq_head
98
{
99
        sfq_index       next;
100
        sfq_index       prev;
101
};
102
 
103
struct sfq_sched_data
104
{
105
/* Parameters */
106
        int             perturb_period;
107
        unsigned        quantum;        /* Allotment per round: MUST BE >= MTU */
108
        int             limit;
109
 
110
/* Variables */
111
        struct timer_list perturb_timer;
112
        int             perturbation;
113
        sfq_index       tail;           /* Index of current slot in round */
114
        sfq_index       max_depth;      /* Maximal depth */
115
 
116
        sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
117
        sfq_index       next[SFQ_DEPTH];        /* Active slots link */
118
        short           allot[SFQ_DEPTH];       /* Current allotment per slot */
119
        unsigned short  hash[SFQ_DEPTH];        /* Hash value indexed by slots */
120
        struct sk_buff_head     qs[SFQ_DEPTH];          /* Slot queue */
121
        struct sfq_head dep[SFQ_DEPTH*2];       /* Linked list of slots, indexed by depth */
122
};
123
 
124
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
125
{
126
        int pert = q->perturbation;
127
 
128
        /* Have we any rotation primitives? If not, WHY? */
129
        h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
130
        h ^= h>>10;
131
        return h & 0x3FF;
132
}
133
 
134
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
135
{
136
        u32 h, h2;
137
 
138
        switch (skb->protocol) {
139
        case __constant_htons(ETH_P_IP):
140
        {
141
                struct iphdr *iph = skb->nh.iph;
142
                h = iph->daddr;
143
                h2 = iph->saddr^iph->protocol;
144
                if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
145
                    (iph->protocol == IPPROTO_TCP ||
146
                     iph->protocol == IPPROTO_UDP ||
147
                     iph->protocol == IPPROTO_ESP))
148
                        h2 ^= *(((u32*)iph) + iph->ihl);
149
                break;
150
        }
151
        case __constant_htons(ETH_P_IPV6):
152
        {
153
                struct ipv6hdr *iph = skb->nh.ipv6h;
154
                h = iph->daddr.s6_addr32[3];
155
                h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
156
                if (iph->nexthdr == IPPROTO_TCP ||
157
                    iph->nexthdr == IPPROTO_UDP ||
158
                    iph->nexthdr == IPPROTO_ESP)
159
                        h2 ^= *(u32*)&iph[1];
160
                break;
161
        }
162
        default:
163
                h = (u32)(unsigned long)skb->dst^skb->protocol;
164
                h2 = (u32)(unsigned long)skb->sk;
165
        }
166
        return sfq_fold_hash(q, h, h2);
167
}
168
 
169
extern __inline__ void sfq_link(struct sfq_sched_data *q, sfq_index x)
170
{
171
        sfq_index p, n;
172
        int d = q->qs[x].qlen + SFQ_DEPTH;
173
 
174
        p = d;
175
        n = q->dep[d].next;
176
        q->dep[x].next = n;
177
        q->dep[x].prev = p;
178
        q->dep[p].next = q->dep[n].prev = x;
179
}
180
 
181
extern __inline__ void sfq_dec(struct sfq_sched_data *q, sfq_index x)
182
{
183
        sfq_index p, n;
184
 
185
        n = q->dep[x].next;
186
        p = q->dep[x].prev;
187
        q->dep[p].next = n;
188
        q->dep[n].prev = p;
189
 
190
        if (n == p && q->max_depth == q->qs[x].qlen + 1)
191
                q->max_depth--;
192
 
193
        sfq_link(q, x);
194
}
195
 
196
extern __inline__ void sfq_inc(struct sfq_sched_data *q, sfq_index x)
197
{
198
        sfq_index p, n;
199
        int d;
200
 
201
        n = q->dep[x].next;
202
        p = q->dep[x].prev;
203
        q->dep[p].next = n;
204
        q->dep[n].prev = p;
205
        d = q->qs[x].qlen;
206
        if (q->max_depth < d)
207
                q->max_depth = d;
208
 
209
        sfq_link(q, x);
210
}
211
 
212
static unsigned int sfq_drop(struct Qdisc *sch)
213
{
214
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
215
        sfq_index d = q->max_depth;
216
        struct sk_buff *skb;
217
        unsigned int len;
218
 
219
        /* Queue is full! Find the longest slot and
220
           drop a packet from it */
221
 
222
        if (d > 1) {
223
                sfq_index x = q->dep[d+SFQ_DEPTH].next;
224
                skb = q->qs[x].prev;
225
                len = skb->len;
226
                __skb_unlink(skb, &q->qs[x]);
227
                kfree_skb(skb);
228
                sfq_dec(q, x);
229
                sch->q.qlen--;
230
                sch->stats.drops++;
231
                return len;
232
        }
233
 
234
        if (d == 1) {
235
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
236
                d = q->next[q->tail];
237
                q->next[q->tail] = q->next[d];
238
                q->allot[q->next[d]] += q->quantum;
239
                skb = q->qs[d].prev;
240
                len = skb->len;
241
                __skb_unlink(skb, &q->qs[d]);
242
                kfree_skb(skb);
243
                sfq_dec(q, d);
244
                sch->q.qlen--;
245
                q->ht[q->hash[d]] = SFQ_DEPTH;
246
                sch->stats.drops++;
247
                return len;
248
        }
249
 
250
        return 0;
251
}
252
 
253
static int
254
sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
255
{
256
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
257
        unsigned hash = sfq_hash(q, skb);
258
        sfq_index x;
259
 
260
        x = q->ht[hash];
261
        if (x == SFQ_DEPTH) {
262
                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
263
                q->hash[x] = hash;
264
        }
265
        __skb_queue_tail(&q->qs[x], skb);
266
        sfq_inc(q, x);
267
        if (q->qs[x].qlen == 1) {               /* The flow is new */
268
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
269
                        q->tail = x;
270
                        q->next[x] = x;
271
                        q->allot[x] = q->quantum;
272
                } else {
273
                        q->next[x] = q->next[q->tail];
274
                        q->next[q->tail] = x;
275
                        q->tail = x;
276
                }
277
        }
278
        if (++sch->q.qlen < q->limit-1) {
279
                sch->stats.bytes += skb->len;
280
                sch->stats.packets++;
281
                return 0;
282
        }
283
 
284
        sfq_drop(sch);
285
        return NET_XMIT_CN;
286
}
287
 
288
static int
289
sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
290
{
291
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
292
        unsigned hash = sfq_hash(q, skb);
293
        sfq_index x;
294
 
295
        x = q->ht[hash];
296
        if (x == SFQ_DEPTH) {
297
                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
298
                q->hash[x] = hash;
299
        }
300
        __skb_queue_head(&q->qs[x], skb);
301
        sfq_inc(q, x);
302
        if (q->qs[x].qlen == 1) {               /* The flow is new */
303
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
304
                        q->tail = x;
305
                        q->next[x] = x;
306
                        q->allot[x] = q->quantum;
307
                } else {
308
                        q->next[x] = q->next[q->tail];
309
                        q->next[q->tail] = x;
310
                        q->tail = x;
311
                }
312
        }
313
        if (++sch->q.qlen < q->limit - 1)
314
                return 0;
315
 
316
        sch->stats.drops++;
317
        sfq_drop(sch);
318
        return NET_XMIT_CN;
319
}
320
 
321
 
322
 
323
 
324
static struct sk_buff *
325
sfq_dequeue(struct Qdisc* sch)
326
{
327
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
328
        struct sk_buff *skb;
329
        sfq_index a, old_a;
330
 
331
        /* No active slots */
332
        if (q->tail == SFQ_DEPTH)
333
                return NULL;
334
 
335
        a = old_a = q->next[q->tail];
336
 
337
        /* Grab packet */
338
        skb = __skb_dequeue(&q->qs[a]);
339
        sfq_dec(q, a);
340
        sch->q.qlen--;
341
 
342
        /* Is the slot empty? */
343
        if (q->qs[a].qlen == 0) {
344
                q->ht[q->hash[a]] = SFQ_DEPTH;
345
                a = q->next[a];
346
                if (a == old_a) {
347
                        q->tail = SFQ_DEPTH;
348
                        return skb;
349
                }
350
                q->next[q->tail] = a;
351
                q->allot[a] += q->quantum;
352
        } else if ((q->allot[a] -= skb->len) <= 0) {
353
                q->tail = a;
354
                a = q->next[a];
355
                q->allot[a] += q->quantum;
356
        }
357
        return skb;
358
}
359
 
360
static void
361
sfq_reset(struct Qdisc* sch)
362
{
363
        struct sk_buff *skb;
364
 
365
        while ((skb = sfq_dequeue(sch)) != NULL)
366
                kfree_skb(skb);
367
}
368
 
369
static void sfq_perturbation(unsigned long arg)
370
{
371
        struct Qdisc *sch = (struct Qdisc*)arg;
372
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
373
 
374
        q->perturbation = net_random()&0x1F;
375
        q->perturb_timer.expires = jiffies + q->perturb_period;
376
 
377
        if (q->perturb_period) {
378
                q->perturb_timer.expires = jiffies + q->perturb_period;
379
                add_timer(&q->perturb_timer);
380
        }
381
}
382
 
383
static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
384
{
385
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
386
        struct tc_sfq_qopt *ctl = RTA_DATA(opt);
387
 
388
        if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
389
                return -EINVAL;
390
 
391
        sch_tree_lock(sch);
392
        q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
393
        q->perturb_period = ctl->perturb_period*HZ;
394
        if (ctl->limit)
395
                q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
396
 
397
        while (sch->q.qlen >= q->limit-1)
398
                sfq_drop(sch);
399
 
400
        del_timer(&q->perturb_timer);
401
        if (q->perturb_period) {
402
                q->perturb_timer.expires = jiffies + q->perturb_period;
403
                add_timer(&q->perturb_timer);
404
        }
405
        sch_tree_unlock(sch);
406
        return 0;
407
}
408
 
409
static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
410
{
411
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
412
        int i;
413
 
414
        q->perturb_timer.data = (unsigned long)sch;
415
        q->perturb_timer.function = sfq_perturbation;
416
        init_timer(&q->perturb_timer);
417
 
418
        for (i=0; i<SFQ_HASH_DIVISOR; i++)
419
                q->ht[i] = SFQ_DEPTH;
420
        for (i=0; i<SFQ_DEPTH; i++) {
421
                skb_queue_head_init(&q->qs[i]);
422
                q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
423
                q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
424
        }
425
        q->limit = SFQ_DEPTH;
426
        q->max_depth = 0;
427
        q->tail = SFQ_DEPTH;
428
        if (opt == NULL) {
429
                q->quantum = psched_mtu(sch->dev);
430
                q->perturb_period = 0;
431
        } else {
432
                int err = sfq_change(sch, opt);
433
                if (err)
434
                        return err;
435
        }
436
        for (i=0; i<SFQ_DEPTH; i++)
437
                sfq_link(q, i);
438
        MOD_INC_USE_COUNT;
439
        return 0;
440
}
441
 
442
static void sfq_destroy(struct Qdisc *sch)
443
{
444
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
445
        del_timer(&q->perturb_timer);
446
        MOD_DEC_USE_COUNT;
447
}
448
 
449
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
450
{
451
        struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
452
        unsigned char    *b = skb->tail;
453
        struct tc_sfq_qopt opt;
454
 
455
        opt.quantum = q->quantum;
456
        opt.perturb_period = q->perturb_period/HZ;
457
 
458
        opt.limit = q->limit;
459
        opt.divisor = SFQ_HASH_DIVISOR;
460
        opt.flows = q->limit;
461
 
462
        RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
463
 
464
        return skb->len;
465
 
466
rtattr_failure:
467
        skb_trim(skb, b - skb->data);
468
        return -1;
469
}
470
 
471
struct Qdisc_ops sfq_qdisc_ops =
472
{
473
        NULL,
474
        NULL,
475
        "sfq",
476
        sizeof(struct sfq_sched_data),
477
 
478
        sfq_enqueue,
479
        sfq_dequeue,
480
        sfq_requeue,
481
        sfq_drop,
482
 
483
        sfq_init,
484
        sfq_reset,
485
        sfq_destroy,
486
        NULL, /* sfq_change */
487
 
488
        sfq_dump,
489
};
490
 
491
#ifdef MODULE
492
int init_module(void)
493
{
494
        return register_qdisc(&sfq_qdisc_ops);
495
}
496
 
497
void cleanup_module(void)
498
{
499
        unregister_qdisc(&sfq_qdisc_ops);
500
}
501
#endif
502
MODULE_LICENSE("GPL");

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.