OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [net/] [ipv4/] [tcp_cong.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * Plugable TCP congestion control support and newReno
3
 * congestion control.
4
 * Based on ideas from I/O scheduler suport and Web100.
5
 *
6
 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
7
 */
8
 
9
#include <linux/module.h>
10
#include <linux/mm.h>
11
#include <linux/types.h>
12
#include <linux/list.h>
13
#include <net/tcp.h>
14
 
15
int sysctl_tcp_max_ssthresh = 0;
16
 
17
static DEFINE_SPINLOCK(tcp_cong_list_lock);
18
static LIST_HEAD(tcp_cong_list);
19
 
20
/* Simple linear search, don't expect many entries! */
21
static struct tcp_congestion_ops *tcp_ca_find(const char *name)
22
{
23
        struct tcp_congestion_ops *e;
24
 
25
        list_for_each_entry_rcu(e, &tcp_cong_list, list) {
26
                if (strcmp(e->name, name) == 0)
27
                        return e;
28
        }
29
 
30
        return NULL;
31
}
32
 
33
/*
34
 * Attach new congestion control algorithm to the list
35
 * of available options.
36
 */
37
int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
38
{
39
        int ret = 0;
40
 
41
        /* all algorithms must implement ssthresh and cong_avoid ops */
42
        if (!ca->ssthresh || !ca->cong_avoid) {
43
                printk(KERN_ERR "TCP %s does not implement required ops\n",
44
                       ca->name);
45
                return -EINVAL;
46
        }
47
 
48
        spin_lock(&tcp_cong_list_lock);
49
        if (tcp_ca_find(ca->name)) {
50
                printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
51
                ret = -EEXIST;
52
        } else {
53
                list_add_tail_rcu(&ca->list, &tcp_cong_list);
54
                printk(KERN_INFO "TCP %s registered\n", ca->name);
55
        }
56
        spin_unlock(&tcp_cong_list_lock);
57
 
58
        return ret;
59
}
60
EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
61
 
62
/*
63
 * Remove congestion control algorithm, called from
64
 * the module's remove function.  Module ref counts are used
65
 * to ensure that this can't be done till all sockets using
66
 * that method are closed.
67
 */
68
void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
69
{
70
        spin_lock(&tcp_cong_list_lock);
71
        list_del_rcu(&ca->list);
72
        spin_unlock(&tcp_cong_list_lock);
73
}
74
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
75
 
76
/* Assign choice of congestion control. */
77
void tcp_init_congestion_control(struct sock *sk)
78
{
79
        struct inet_connection_sock *icsk = inet_csk(sk);
80
        struct tcp_congestion_ops *ca;
81
 
82
        /* if no choice made yet assign the current value set as default */
83
        if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
84
                rcu_read_lock();
85
                list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
86
                        if (try_module_get(ca->owner)) {
87
                                icsk->icsk_ca_ops = ca;
88
                                break;
89
                        }
90
 
91
                        /* fallback to next available */
92
                }
93
                rcu_read_unlock();
94
        }
95
 
96
        if (icsk->icsk_ca_ops->init)
97
                icsk->icsk_ca_ops->init(sk);
98
}
99
 
100
/* Manage refcounts on socket close. */
101
void tcp_cleanup_congestion_control(struct sock *sk)
102
{
103
        struct inet_connection_sock *icsk = inet_csk(sk);
104
 
105
        if (icsk->icsk_ca_ops->release)
106
                icsk->icsk_ca_ops->release(sk);
107
        module_put(icsk->icsk_ca_ops->owner);
108
}
109
 
110
/* Used by sysctl to change default congestion control */
111
int tcp_set_default_congestion_control(const char *name)
112
{
113
        struct tcp_congestion_ops *ca;
114
        int ret = -ENOENT;
115
 
116
        spin_lock(&tcp_cong_list_lock);
117
        ca = tcp_ca_find(name);
118
#ifdef CONFIG_KMOD
119
        if (!ca && capable(CAP_SYS_MODULE)) {
120
                spin_unlock(&tcp_cong_list_lock);
121
 
122
                request_module("tcp_%s", name);
123
                spin_lock(&tcp_cong_list_lock);
124
                ca = tcp_ca_find(name);
125
        }
126
#endif
127
 
128
        if (ca) {
129
                ca->flags |= TCP_CONG_NON_RESTRICTED;   /* default is always allowed */
130
                list_move(&ca->list, &tcp_cong_list);
131
                ret = 0;
132
        }
133
        spin_unlock(&tcp_cong_list_lock);
134
 
135
        return ret;
136
}
137
 
138
/* Set default value from kernel configuration at bootup */
139
static int __init tcp_congestion_default(void)
140
{
141
        return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
142
}
143
late_initcall(tcp_congestion_default);
144
 
145
 
146
/* Build string with list of available congestion control values */
147
void tcp_get_available_congestion_control(char *buf, size_t maxlen)
148
{
149
        struct tcp_congestion_ops *ca;
150
        size_t offs = 0;
151
 
152
        rcu_read_lock();
153
        list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
154
                offs += snprintf(buf + offs, maxlen - offs,
155
                                 "%s%s",
156
                                 offs == 0 ? "" : " ", ca->name);
157
 
158
        }
159
        rcu_read_unlock();
160
}
161
 
162
/* Get current default congestion control */
163
void tcp_get_default_congestion_control(char *name)
164
{
165
        struct tcp_congestion_ops *ca;
166
        /* We will always have reno... */
167
        BUG_ON(list_empty(&tcp_cong_list));
168
 
169
        rcu_read_lock();
170
        ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
171
        strncpy(name, ca->name, TCP_CA_NAME_MAX);
172
        rcu_read_unlock();
173
}
174
 
175
/* Built list of non-restricted congestion control values */
176
void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
177
{
178
        struct tcp_congestion_ops *ca;
179
        size_t offs = 0;
180
 
181
        *buf = '\0';
182
        rcu_read_lock();
183
        list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
184
                if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
185
                        continue;
186
                offs += snprintf(buf + offs, maxlen - offs,
187
                                 "%s%s",
188
                                 offs == 0 ? "" : " ", ca->name);
189
 
190
        }
191
        rcu_read_unlock();
192
}
193
 
194
/* Change list of non-restricted congestion control */
195
int tcp_set_allowed_congestion_control(char *val)
196
{
197
        struct tcp_congestion_ops *ca;
198
        char *clone, *name;
199
        int ret = 0;
200
 
201
        clone = kstrdup(val, GFP_USER);
202
        if (!clone)
203
                return -ENOMEM;
204
 
205
        spin_lock(&tcp_cong_list_lock);
206
        /* pass 1 check for bad entries */
207
        while ((name = strsep(&clone, " ")) && *name) {
208
                ca = tcp_ca_find(name);
209
                if (!ca) {
210
                        ret = -ENOENT;
211
                        goto out;
212
                }
213
        }
214
 
215
        /* pass 2 clear old values */
216
        list_for_each_entry_rcu(ca, &tcp_cong_list, list)
217
                ca->flags &= ~TCP_CONG_NON_RESTRICTED;
218
 
219
        /* pass 3 mark as allowed */
220
        while ((name = strsep(&val, " ")) && *name) {
221
                ca = tcp_ca_find(name);
222
                WARN_ON(!ca);
223
                if (ca)
224
                        ca->flags |= TCP_CONG_NON_RESTRICTED;
225
        }
226
out:
227
        spin_unlock(&tcp_cong_list_lock);
228
 
229
        return ret;
230
}
231
 
232
 
233
/* Change congestion control for socket */
234
int tcp_set_congestion_control(struct sock *sk, const char *name)
235
{
236
        struct inet_connection_sock *icsk = inet_csk(sk);
237
        struct tcp_congestion_ops *ca;
238
        int err = 0;
239
 
240
        rcu_read_lock();
241
        ca = tcp_ca_find(name);
242
 
243
        /* no change asking for existing value */
244
        if (ca == icsk->icsk_ca_ops)
245
                goto out;
246
 
247
#ifdef CONFIG_KMOD
248
        /* not found attempt to autoload module */
249
        if (!ca && capable(CAP_SYS_MODULE)) {
250
                rcu_read_unlock();
251
                request_module("tcp_%s", name);
252
                rcu_read_lock();
253
                ca = tcp_ca_find(name);
254
        }
255
#endif
256
        if (!ca)
257
                err = -ENOENT;
258
 
259
        else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
260
                err = -EPERM;
261
 
262
        else if (!try_module_get(ca->owner))
263
                err = -EBUSY;
264
 
265
        else {
266
                tcp_cleanup_congestion_control(sk);
267
                icsk->icsk_ca_ops = ca;
268
 
269
                if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
270
                        icsk->icsk_ca_ops->init(sk);
271
        }
272
 out:
273
        rcu_read_unlock();
274
        return err;
275
}
276
 
277
 
278
/*
279
 * Slow start is used when congestion window is less than slow start
280
 * threshold. This version implements the basic RFC2581 version
281
 * and optionally supports:
282
 *      RFC3742 Limited Slow Start        - growth limited to max_ssthresh
283
 *      RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
284
 */
285
void tcp_slow_start(struct tcp_sock *tp)
286
{
287
        int cnt; /* increase in packets */
288
 
289
        /* RFC3465: ABC Slow start
290
         * Increase only after a full MSS of bytes is acked
291
         *
292
         * TCP sender SHOULD increase cwnd by the number of
293
         * previously unacknowledged bytes ACKed by each incoming
294
         * acknowledgment, provided the increase is not more than L
295
         */
296
        if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
297
                return;
298
 
299
        if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
300
                cnt = sysctl_tcp_max_ssthresh >> 1;     /* limited slow start */
301
        else
302
                cnt = tp->snd_cwnd;                     /* exponential increase */
303
 
304
        /* RFC3465: ABC
305
         * We MAY increase by 2 if discovered delayed ack
306
         */
307
        if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
308
                cnt <<= 1;
309
        tp->bytes_acked = 0;
310
 
311
        tp->snd_cwnd_cnt += cnt;
312
        while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
313
                tp->snd_cwnd_cnt -= tp->snd_cwnd;
314
                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
315
                        tp->snd_cwnd++;
316
        }
317
}
318
EXPORT_SYMBOL_GPL(tcp_slow_start);
319
 
320
/*
321
 * TCP Reno congestion control
322
 * This is special case used for fallback as well.
323
 */
324
/* This is Jacobson's slow start and congestion avoidance.
325
 * SIGCOMM '88, p. 328.
326
 */
327
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag)
328
{
329
        struct tcp_sock *tp = tcp_sk(sk);
330
 
331
        if (!tcp_is_cwnd_limited(sk, in_flight))
332
                return;
333
 
334
        /* In "safe" area, increase. */
335
        if (tp->snd_cwnd <= tp->snd_ssthresh)
336
                tcp_slow_start(tp);
337
 
338
        /* In dangerous area, increase slowly. */
339
        else if (sysctl_tcp_abc) {
340
                /* RFC3465: Appropriate Byte Count
341
                 * increase once for each full cwnd acked
342
                 */
343
                if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
344
                        tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
345
                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
346
                                tp->snd_cwnd++;
347
                }
348
        } else {
349
                /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
350
                if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
351
                        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
352
                                tp->snd_cwnd++;
353
                        tp->snd_cwnd_cnt = 0;
354
                } else
355
                        tp->snd_cwnd_cnt++;
356
        }
357
}
358
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
359
 
360
/* Slow start threshold is half the congestion window (min 2) */
361
u32 tcp_reno_ssthresh(struct sock *sk)
362
{
363
        const struct tcp_sock *tp = tcp_sk(sk);
364
        return max(tp->snd_cwnd >> 1U, 2U);
365
}
366
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
367
 
368
/* Lower bound on congestion window with halving. */
369
u32 tcp_reno_min_cwnd(const struct sock *sk)
370
{
371
        const struct tcp_sock *tp = tcp_sk(sk);
372
        return tp->snd_ssthresh/2;
373
}
374
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
375
 
376
struct tcp_congestion_ops tcp_reno = {
377
        .flags          = TCP_CONG_NON_RESTRICTED,
378
        .name           = "reno",
379
        .owner          = THIS_MODULE,
380
        .ssthresh       = tcp_reno_ssthresh,
381
        .cong_avoid     = tcp_reno_cong_avoid,
382
        .min_cwnd       = tcp_reno_min_cwnd,
383
};
384
 
385
/* Initial congestion control used (until SYN)
386
 * really reno under another name so we can tell difference
387
 * during tcp_set_default_congestion_control
388
 */
389
struct tcp_congestion_ops tcp_init_congestion_ops  = {
390
        .name           = "",
391
        .owner          = THIS_MODULE,
392
        .ssthresh       = tcp_reno_ssthresh,
393
        .cong_avoid     = tcp_reno_cong_avoid,
394
        .min_cwnd       = tcp_reno_min_cwnd,
395
};
396
EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.