OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [include/] [net/] [tcp.h] - Blame information for rev 1774

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
3
 *              operating system.  INET is implemented using the  BSD Socket
4
 *              interface as the means of communication with the user level.
5
 *
6
 *              Definitions for the TCP module.
7
 *
8
 * Version:     @(#)tcp.h       1.0.5   05/23/93
9
 *
10
 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12
 *
13
 *              This program is free software; you can redistribute it and/or
14
 *              modify it under the terms of the GNU General Public License
15
 *              as published by the Free Software Foundation; either version
16
 *              2 of the License, or (at your option) any later version.
17
 */
18
#ifndef _TCP_H
19
#define _TCP_H
20
 
21
#define TCP_DEBUG 1
22
#define FASTRETRANS_DEBUG 1
23
 
24
/* Cancel timers, when they are not required. */
25
#undef TCP_CLEAR_TIMERS
26
 
27
#include <linux/config.h>
28
#include <linux/tcp.h>
29
#include <linux/slab.h>
30
#include <linux/cache.h>
31
#include <net/checksum.h>
32
#include <net/sock.h>
33
#include <net/snmp.h>
34
 
35
/* This is for all connections with a full identity, no wildcards.
36
 * New scheme, half the table is for TIME_WAIT, the other half is
37
 * for the rest.  I'll experiment with dynamic table growth later.
38
 */
39
struct tcp_ehash_bucket {
40
        rwlock_t        lock;
41
        struct sock     *chain;
42
} __attribute__((__aligned__(8)));
43
 
44
/* This is for listening sockets, thus all sockets which possess wildcards. */
45
#define TCP_LHTABLE_SIZE        32      /* Yes, really, this is all you need. */
46
 
47
/* There are a few simple rules, which allow for local port reuse by
48
 * an application.  In essence:
49
 *
50
 *      1) Sockets bound to different interfaces may share a local port.
51
 *         Failing that, goto test 2.
52
 *      2) If all sockets have sk->reuse set, and none of them are in
53
 *         TCP_LISTEN state, the port may be shared.
54
 *         Failing that, goto test 3.
55
 *      3) If all sockets are bound to a specific sk->rcv_saddr local
56
 *         address, and none of them are the same, the port may be
57
 *         shared.
58
 *         Failing this, the port cannot be shared.
59
 *
60
 * The interesting point, is test #2.  This is what an FTP server does
61
 * all day.  To optimize this case we use a specific flag bit defined
62
 * below.  As we add sockets to a bind bucket list, we perform a
63
 * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
64
 * As long as all sockets added to a bind bucket pass this test,
65
 * the flag bit will be set.
66
 * The resulting situation is that tcp_v[46]_verify_bind() can just check
67
 * for this flag bit, if it is set and the socket trying to bind has
68
 * sk->reuse set, we don't even have to walk the owners list at all,
69
 * we return that it is ok to bind this socket to the requested local port.
70
 *
71
 * Sounds like a lot of work, but it is worth it.  In a more naive
72
 * implementation (ie. current FreeBSD etc.) the entire list of ports
73
 * must be walked for each data port opened by an ftp server.  Needless
74
 * to say, this does not scale at all.  With a couple thousand FTP
75
 * users logged onto your box, isn't it nice to know that new data
76
 * ports are created in O(1) time?  I thought so. ;-)   -DaveM
77
 */
78
struct tcp_bind_bucket {
79
        unsigned short          port;
80
        signed short            fastreuse;
81
        struct tcp_bind_bucket  *next;
82
        struct sock             *owners;
83
        struct tcp_bind_bucket  **pprev;
84
};
85
 
86
struct tcp_bind_hashbucket {
87
        spinlock_t              lock;
88
        struct tcp_bind_bucket  *chain;
89
};
90
 
91
extern struct tcp_hashinfo {
92
        /* This is for sockets with full identity only.  Sockets here will
93
         * always be without wildcards and will have the following invariant:
94
         *
95
         *          TCP_ESTABLISHED <= sk->state < TCP_CLOSE
96
         *
97
         * First half of the table is for sockets not in TIME_WAIT, second half
98
         * is for TIME_WAIT sockets only.
99
         */
100
        struct tcp_ehash_bucket *__tcp_ehash;
101
 
102
        /* Ok, let's try this, I give up, we do need a local binding
103
         * TCP hash as well as the others for fast bind/connect.
104
         */
105
        struct tcp_bind_hashbucket *__tcp_bhash;
106
 
107
        int __tcp_bhash_size;
108
        int __tcp_ehash_size;
109
 
110
        /* All sockets in TCP_LISTEN state will be in here.  This is the only
111
         * table where wildcard'd TCP sockets can exist.  Hash function here
112
         * is just local port number.
113
         */
114
        struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
115
 
116
        /* All the above members are written once at bootup and
117
         * never written again _or_ are predominantly read-access.
118
         *
119
         * Now align to a new cache line as all the following members
120
         * are often dirty.
121
         */
122
        rwlock_t __tcp_lhash_lock ____cacheline_aligned;
123
        atomic_t __tcp_lhash_users;
124
        wait_queue_head_t __tcp_lhash_wait;
125
        spinlock_t __tcp_portalloc_lock;
126
} tcp_hashinfo;
127
 
128
#define tcp_ehash       (tcp_hashinfo.__tcp_ehash)
129
#define tcp_bhash       (tcp_hashinfo.__tcp_bhash)
130
#define tcp_ehash_size  (tcp_hashinfo.__tcp_ehash_size)
131
#define tcp_bhash_size  (tcp_hashinfo.__tcp_bhash_size)
132
#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
133
#define tcp_lhash_lock  (tcp_hashinfo.__tcp_lhash_lock)
134
#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
135
#define tcp_lhash_wait  (tcp_hashinfo.__tcp_lhash_wait)
136
#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
137
 
138
extern kmem_cache_t *tcp_bucket_cachep;
139
extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
140
                                                 unsigned short snum);
141
extern void tcp_bucket_unlock(struct sock *sk);
142
extern int tcp_port_rover;
143
extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
144
 
145
/* These are AF independent. */
146
static __inline__ int tcp_bhashfn(__u16 lport)
147
{
148
        return (lport & (tcp_bhash_size - 1));
149
}
150
 
151
/* This is a TIME_WAIT bucket.  It works around the memory consumption
152
 * problems of sockets in such a state on heavily loaded servers, but
153
 * without violating the protocol specification.
154
 */
155
struct tcp_tw_bucket {
156
        /* These _must_ match the beginning of struct sock precisely.
157
         * XXX Yes I know this is gross, but I'd have to edit every single
158
         * XXX networking file if I created a "struct sock_header". -DaveM
159
         */
160
        __u32                   daddr;
161
        __u32                   rcv_saddr;
162
        __u16                   dport;
163
        unsigned short          num;
164
        int                     bound_dev_if;
165
        struct sock             *next;
166
        struct sock             **pprev;
167
        struct sock             *bind_next;
168
        struct sock             **bind_pprev;
169
        unsigned char           state,
170
                                substate; /* "zapped" is replaced with "substate" */
171
        __u16                   sport;
172
        unsigned short          family;
173
        unsigned char           reuse,
174
                                rcv_wscale; /* It is also TW bucket specific */
175
        atomic_t                refcnt;
176
 
177
        /* And these are ours. */
178
        int                     hashent;
179
        int                     timeout;
180
        __u32                   rcv_nxt;
181
        __u32                   snd_nxt;
182
        __u32                   rcv_wnd;
183
        __u32                   ts_recent;
184
        long                    ts_recent_stamp;
185
        unsigned long           ttd;
186
        struct tcp_bind_bucket  *tb;
187
        struct tcp_tw_bucket    *next_death;
188
        struct tcp_tw_bucket    **pprev_death;
189
 
190
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
191
        struct in6_addr         v6_daddr;
192
        struct in6_addr         v6_rcv_saddr;
193
#endif
194
};
195
 
196
extern kmem_cache_t *tcp_timewait_cachep;
197
 
198
static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
199
{
200
        if (atomic_dec_and_test(&tw->refcnt)) {
201
#ifdef INET_REFCNT_DEBUG
202
                printk(KERN_DEBUG "tw_bucket %p released\n", tw);
203
#endif
204
                kmem_cache_free(tcp_timewait_cachep, tw);
205
        }
206
}
207
 
208
extern atomic_t tcp_orphan_count;
209
extern int tcp_tw_count;
210
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
211
extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
212
extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
213
extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
214
 
215
 
216
/* Socket demux engine toys. */
217
#ifdef __BIG_ENDIAN
218
#define TCP_COMBINED_PORTS(__sport, __dport) \
219
        (((__u32)(__sport)<<16) | (__u32)(__dport))
220
#else /* __LITTLE_ENDIAN */
221
#define TCP_COMBINED_PORTS(__sport, __dport) \
222
        (((__u32)(__dport)<<16) | (__u32)(__sport))
223
#endif
224
 
225
#if (BITS_PER_LONG == 64)
226
#ifdef __BIG_ENDIAN
227
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
228
        __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
229
#else /* __LITTLE_ENDIAN */
230
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
231
        __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
232
#endif /* __BIG_ENDIAN */
233
#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
234
        (((*((__u64 *)&((__sk)->daddr)))== (__cookie))  &&              \
235
         ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&              \
236
         (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
237
#else /* 32-bit arch */
238
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
239
#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
240
        (((__sk)->daddr                 == (__saddr))   &&              \
241
         ((__sk)->rcv_saddr             == (__daddr))   &&              \
242
         ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&              \
243
         (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
244
#endif /* 64-bit arch */
245
 
246
#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)                     \
247
        (((*((__u32 *)&((__sk)->dport)))== (__ports))                           && \
248
         ((__sk)->family                == AF_INET6)                            && \
249
         !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr))           && \
250
         !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr))       && \
251
         (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
252
 
253
/* These can have wildcards, don't try too hard. */
254
static __inline__ int tcp_lhashfn(unsigned short num)
255
{
256
        return num & (TCP_LHTABLE_SIZE - 1);
257
}
258
 
259
static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
260
{
261
        return tcp_lhashfn(sk->num);
262
}
263
 
264
#define MAX_TCP_HEADER  (128 + MAX_HEADER)
265
 
266
/*
267
 * Never offer a window over 32767 without using window scaling. Some
268
 * poor stacks do signed 16bit maths!
269
 */
270
#define MAX_TCP_WINDOW          32767U
271
 
272
/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
273
#define TCP_MIN_MSS             88U
274
 
275
/* Minimal RCV_MSS. */
276
#define TCP_MIN_RCVMSS          536U
277
 
278
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
279
#define TCP_FASTRETRANS_THRESH 3
280
 
281
/* Maximal reordering. */
282
#define TCP_MAX_REORDERING      127
283
 
284
/* Maximal number of ACKs sent quickly to accelerate slow-start. */
285
#define TCP_MAX_QUICKACKS       16U
286
 
287
/* urg_data states */
288
#define TCP_URG_VALID   0x0100
289
#define TCP_URG_NOTYET  0x0200
290
#define TCP_URG_READ    0x0400
291
 
292
#define TCP_RETR1       3       /*
293
                                 * This is how many retries it does before it
294
                                 * tries to figure out if the gateway is
295
                                 * down. Minimal RFC value is 3; it corresponds
296
                                 * to ~3sec-8min depending on RTO.
297
                                 */
298
 
299
#define TCP_RETR2       15      /*
300
                                 * This should take at least
301
                                 * 90 minutes to time out.
302
                                 * RFC1122 says that the limit is 100 sec.
303
                                 * 15 is ~13-30min depending on RTO.
304
                                 */
305
 
306
#define TCP_SYN_RETRIES  5      /* number of times to retry active opening a
307
                                 * connection: ~180sec is RFC minumum   */
308
 
309
#define TCP_SYNACK_RETRIES 5    /* number of times to retry passive opening a
310
                                 * connection: ~180sec is RFC minumum   */
311
 
312
 
313
#define TCP_ORPHAN_RETRIES 7    /* number of times to retry on an orphaned
314
                                 * socket. 7 is ~50sec-16min.
315
                                 */
316
 
317
 
318
#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
319
                                  * state, about 60 seconds     */
320
#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
321
                                 /* BSD style FIN_WAIT2 deadlock breaker.
322
                                  * It used to be 3min, new value is 60sec,
323
                                  * to combine FIN-WAIT-2 timeout with
324
                                  * TIME-WAIT timer.
325
                                  */
326
 
327
#define TCP_DELACK_MAX  ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
328
#if HZ >= 100
329
#define TCP_DELACK_MIN  ((unsigned)(HZ/25))     /* minimal time to delay before sending an ACK */
330
#define TCP_ATO_MIN     ((unsigned)(HZ/25))
331
#else
332
#define TCP_DELACK_MIN  4U
333
#define TCP_ATO_MIN     4U
334
#endif
335
#define TCP_RTO_MAX     ((unsigned)(120*HZ))
336
#define TCP_RTO_MIN     ((unsigned)(HZ/5))
337
#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ))     /* RFC 1122 initial RTO value   */
338
 
339
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
340
                                                         * for local resources.
341
                                                         */
342
 
343
#define TCP_KEEPALIVE_TIME      (120*60*HZ)     /* two hours */
344
#define TCP_KEEPALIVE_PROBES    9               /* Max of 9 keepalive probes    */
345
#define TCP_KEEPALIVE_INTVL     (75*HZ)
346
 
347
#define MAX_TCP_KEEPIDLE        32767
348
#define MAX_TCP_KEEPINTVL       32767
349
#define MAX_TCP_KEEPCNT         127
350
#define MAX_TCP_SYNCNT          127
351
 
352
/* TIME_WAIT reaping mechanism. */
353
#define TCP_TWKILL_SLOTS        8       /* Please keep this a power of 2. */
354
#define TCP_TWKILL_PERIOD       (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
355
 
356
#define TCP_SYNQ_INTERVAL       (HZ/5)  /* Period of SYNACK timer */
357
#define TCP_SYNQ_HSIZE          512     /* Size of SYNACK hash table */
358
 
359
#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
360
#define TCP_PAWS_MSL    60              /* Per-host timestamps are invalidated
361
                                         * after this time. It should be equal
362
                                         * (or greater than) TCP_TIMEWAIT_LEN
363
                                         * to provide reliability equal to one
364
                                         * provided by timewait state.
365
                                         */
366
#define TCP_PAWS_WINDOW 1               /* Replay window for per-host
367
                                         * timestamps. It must be less than
368
                                         * minimal timewait lifetime.
369
                                         */
370
 
371
#define TCP_TW_RECYCLE_SLOTS_LOG        5
372
#define TCP_TW_RECYCLE_SLOTS            (1<<TCP_TW_RECYCLE_SLOTS_LOG)
373
 
374
/* If time > 4sec, it is "slow" path, no recycling is required,
375
   so that we select tick to get range about 4 seconds.
376
 */
377
 
378
#if HZ <= 16 || HZ > 4096
379
# error Unsupported: HZ <= 16 or HZ > 4096
380
#elif HZ <= 32
381
# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
382
#elif HZ <= 64
383
# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
384
#elif HZ <= 128
385
# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
386
#elif HZ <= 256
387
# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
388
#elif HZ <= 512
389
# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
390
#elif HZ <= 1024
391
# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
392
#elif HZ <= 2048
393
# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
394
#else
395
# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
396
#endif
397
 
398
/*
399
 *      TCP option
400
 */
401
 
402
#define TCPOPT_NOP              1       /* Padding */
403
#define TCPOPT_EOL              0        /* End of options */
404
#define TCPOPT_MSS              2       /* Segment size negotiating */
405
#define TCPOPT_WINDOW           3       /* Window scaling */
406
#define TCPOPT_SACK_PERM        4       /* SACK Permitted */
407
#define TCPOPT_SACK             5       /* SACK Block */
408
#define TCPOPT_TIMESTAMP        8       /* Better RTT estimations/PAWS */
409
 
410
/*
411
 *     TCP option lengths
412
 */
413
 
414
#define TCPOLEN_MSS            4
415
#define TCPOLEN_WINDOW         3
416
#define TCPOLEN_SACK_PERM      2
417
#define TCPOLEN_TIMESTAMP      10
418
 
419
/* But this is what stacks really send out. */
420
#define TCPOLEN_TSTAMP_ALIGNED          12
421
#define TCPOLEN_WSCALE_ALIGNED          4
422
#define TCPOLEN_SACKPERM_ALIGNED        4
423
#define TCPOLEN_SACK_BASE               2
424
#define TCPOLEN_SACK_BASE_ALIGNED       4
425
#define TCPOLEN_SACK_PERBLOCK           8
426
 
427
#define TCP_TIME_RETRANS        1       /* Retransmit timer */
428
#define TCP_TIME_DACK           2       /* Delayed ack timer */
429
#define TCP_TIME_PROBE0         3       /* Zero window probe timer */
430
#define TCP_TIME_KEEPOPEN       4       /* Keepalive timer */
431
 
432
/* sysctl variables for tcp */
433
extern int sysctl_max_syn_backlog;
434
extern int sysctl_tcp_timestamps;
435
extern int sysctl_tcp_window_scaling;
436
extern int sysctl_tcp_sack;
437
extern int sysctl_tcp_fin_timeout;
438
extern int sysctl_tcp_tw_recycle;
439
extern int sysctl_tcp_keepalive_time;
440
extern int sysctl_tcp_keepalive_probes;
441
extern int sysctl_tcp_keepalive_intvl;
442
extern int sysctl_tcp_syn_retries;
443
extern int sysctl_tcp_synack_retries;
444
extern int sysctl_tcp_retries1;
445
extern int sysctl_tcp_retries2;
446
extern int sysctl_tcp_orphan_retries;
447
extern int sysctl_tcp_syncookies;
448
extern int sysctl_tcp_retrans_collapse;
449
extern int sysctl_tcp_stdurg;
450
extern int sysctl_tcp_rfc1337;
451
extern int sysctl_tcp_abort_on_overflow;
452
extern int sysctl_tcp_max_orphans;
453
extern int sysctl_tcp_max_tw_buckets;
454
extern int sysctl_tcp_fack;
455
extern int sysctl_tcp_reordering;
456
extern int sysctl_tcp_ecn;
457
extern int sysctl_tcp_dsack;
458
extern int sysctl_tcp_mem[3];
459
extern int sysctl_tcp_wmem[3];
460
extern int sysctl_tcp_rmem[3];
461
extern int sysctl_tcp_app_win;
462
extern int sysctl_tcp_adv_win_scale;
463
extern int sysctl_tcp_tw_reuse;
464
extern int sysctl_tcp_frto;
465
extern int sysctl_tcp_low_latency;
466
extern int sysctl_tcp_westwood;
467
 
468
extern atomic_t tcp_memory_allocated;
469
extern atomic_t tcp_sockets_allocated;
470
extern int tcp_memory_pressure;
471
 
472
struct open_request;
473
 
474
struct or_calltable {
475
        int  family;
476
        int  (*rtx_syn_ack)     (struct sock *sk, struct open_request *req, struct dst_entry*);
477
        void (*send_ack)        (struct sk_buff *skb, struct open_request *req);
478
        void (*destructor)      (struct open_request *req);
479
        void (*send_reset)      (struct sk_buff *skb);
480
};
481
 
482
struct tcp_v4_open_req {
483
        __u32                   loc_addr;
484
        __u32                   rmt_addr;
485
        struct ip_options       *opt;
486
};
487
 
488
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
489
struct tcp_v6_open_req {
490
        struct in6_addr         loc_addr;
491
        struct in6_addr         rmt_addr;
492
        struct sk_buff          *pktopts;
493
        int                     iif;
494
};
495
#endif
496
 
497
/* this structure is too big */
498
struct open_request {
499
        struct open_request     *dl_next; /* Must be first member! */
500
        __u32                   rcv_isn;
501
        __u32                   snt_isn;
502
        __u16                   rmt_port;
503
        __u16                   mss;
504
        __u8                    retrans;
505
        __u8                    __pad;
506
        __u16   snd_wscale : 4,
507
                rcv_wscale : 4,
508
                tstamp_ok : 1,
509
                sack_ok : 1,
510
                wscale_ok : 1,
511
                ecn_ok : 1,
512
                acked : 1;
513
        /* The following two fields can be easily recomputed I think -AK */
514
        __u32                   window_clamp;   /* window clamp at creation time */
515
        __u32                   rcv_wnd;        /* rcv_wnd offered first time */
516
        __u32                   ts_recent;
517
        unsigned long           expires;
518
        struct or_calltable     *class;
519
        struct sock             *sk;
520
        union {
521
                struct tcp_v4_open_req v4_req;
522
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
523
                struct tcp_v6_open_req v6_req;
524
#endif
525
        } af;
526
};
527
 
528
/* SLAB cache for open requests. */
529
extern kmem_cache_t *tcp_openreq_cachep;
530
 
531
#define tcp_openreq_alloc()             kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
532
#define tcp_openreq_fastfree(req)       kmem_cache_free(tcp_openreq_cachep, req)
533
 
534
static inline void tcp_openreq_free(struct open_request *req)
535
{
536
        req->class->destructor(req);
537
        tcp_openreq_fastfree(req);
538
}
539
 
540
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
541
#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
542
#else
543
#define TCP_INET_FAMILY(fam) 1
544
#endif
545
 
546
/*
547
 *      Pointers to address related TCP functions
548
 *      (i.e. things that depend on the address family)
549
 *
550
 *      BUGGG_FUTURE: all the idea behind this struct is wrong.
551
 *      It mixes socket frontend with transport function.
552
 *      With port sharing between IPv6/v4 it gives the only advantage,
553
 *      only poor IPv6 needs to permanently recheck, that it
554
 *      is still IPv6 8)8) It must be cleaned up as soon as possible.
555
 *                                              --ANK (980802)
556
 */
557
 
558
struct tcp_func {
559
        int                     (*queue_xmit)           (struct sk_buff *skb,
560
                                                         int ipfragok);
561
 
562
        void                    (*send_check)           (struct sock *sk,
563
                                                         struct tcphdr *th,
564
                                                         int len,
565
                                                         struct sk_buff *skb);
566
 
567
        int                     (*rebuild_header)       (struct sock *sk);
568
 
569
        int                     (*conn_request)         (struct sock *sk,
570
                                                         struct sk_buff *skb);
571
 
572
        struct sock *           (*syn_recv_sock)        (struct sock *sk,
573
                                                         struct sk_buff *skb,
574
                                                         struct open_request *req,
575
                                                         struct dst_entry *dst);
576
 
577
        int                     (*remember_stamp)       (struct sock *sk);
578
 
579
        __u16                   net_header_len;
580
 
581
        int                     (*setsockopt)           (struct sock *sk,
582
                                                         int level,
583
                                                         int optname,
584
                                                         char *optval,
585
                                                         int optlen);
586
 
587
        int                     (*getsockopt)           (struct sock *sk,
588
                                                         int level,
589
                                                         int optname,
590
                                                         char *optval,
591
                                                         int *optlen);
592
 
593
 
594
        void                    (*addr2sockaddr)        (struct sock *sk,
595
                                                         struct sockaddr *);
596
 
597
        int sockaddr_len;
598
};
599
 
600
/*
601
 * The next routines deal with comparing 32 bit unsigned ints
602
 * and worry about wraparound (automatic with unsigned arithmetic).
603
 */
604
 
605
static inline int before(__u32 seq1, __u32 seq2)
606
{
607
        return (__s32)(seq1-seq2) < 0;
608
}
609
 
610
static inline int after(__u32 seq1, __u32 seq2)
611
{
612
        return (__s32)(seq2-seq1) < 0;
613
}
614
 
615
 
616
/* is s2<=s1<=s3 ? */
617
static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
618
{
619
        return seq3 - seq2 >= seq1 - seq2;
620
}
621
 
622
 
623
extern struct proto tcp_prot;
624
 
625
extern struct tcp_mib tcp_statistics[NR_CPUS*2];
626
#define TCP_INC_STATS(field)            SNMP_INC_STATS(tcp_statistics, field)
627
#define TCP_INC_STATS_BH(field)         SNMP_INC_STATS_BH(tcp_statistics, field)
628
#define TCP_INC_STATS_USER(field)       SNMP_INC_STATS_USER(tcp_statistics, field)
629
#define TCP_ADD_STATS_BH(field, val)    SNMP_ADD_STATS_BH(tcp_statistics, field, val)
630
#define TCP_ADD_STATS_USER(field, val)  SNMP_ADD_STATS_USER(tcp_statistics, field, val)
631
 
632
extern void                     tcp_put_port(struct sock *sk);
633
extern void                     __tcp_put_port(struct sock *sk);
634
extern void                     tcp_inherit_port(struct sock *sk, struct sock *child);
635
 
636
extern void                     tcp_v4_err(struct sk_buff *skb, u32);
637
 
638
extern void                     tcp_shutdown (struct sock *sk, int how);
639
 
640
extern int                      tcp_v4_rcv(struct sk_buff *skb);
641
 
642
extern int                      tcp_v4_remember_stamp(struct sock *sk);
643
 
644
extern int                      tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
645
 
646
extern int                      tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
647
extern ssize_t                  tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
648
 
649
extern int                      tcp_ioctl(struct sock *sk,
650
                                          int cmd,
651
                                          unsigned long arg);
652
 
653
extern int                      tcp_rcv_state_process(struct sock *sk,
654
                                                      struct sk_buff *skb,
655
                                                      struct tcphdr *th,
656
                                                      unsigned len);
657
 
658
extern int                      tcp_rcv_established(struct sock *sk,
659
                                                    struct sk_buff *skb,
660
                                                    struct tcphdr *th,
661
                                                    unsigned len);
662
 
663
enum tcp_ack_state_t
664
{
665
        TCP_ACK_SCHED = 1,
666
        TCP_ACK_TIMER = 2,
667
        TCP_ACK_PUSHED= 4
668
};
669
 
670
static inline void tcp_schedule_ack(struct tcp_opt *tp)
671
{
672
        tp->ack.pending |= TCP_ACK_SCHED;
673
}
674
 
675
static inline int tcp_ack_scheduled(struct tcp_opt *tp)
676
{
677
        return tp->ack.pending&TCP_ACK_SCHED;
678
}
679
 
680
static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
681
{
682
        if (tp->ack.quick && --tp->ack.quick == 0) {
683
                /* Leaving quickack mode we deflate ATO. */
684
                tp->ack.ato = TCP_ATO_MIN;
685
        }
686
}
687
 
688
extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
689
 
690
static __inline__ void tcp_delack_init(struct tcp_opt *tp)
691
{
692
        memset(&tp->ack, 0, sizeof(tp->ack));
693
}
694
 
695
static inline void tcp_clear_options(struct tcp_opt *tp)
696
{
697
        tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
698
}
699
 
700
enum tcp_tw_status
701
{
702
        TCP_TW_SUCCESS = 0,
703
        TCP_TW_RST = 1,
704
        TCP_TW_ACK = 2,
705
        TCP_TW_SYN = 3
706
};
707
 
708
 
709
extern enum tcp_tw_status       tcp_timewait_state_process(struct tcp_tw_bucket *tw,
710
                                                           struct sk_buff *skb,
711
                                                           struct tcphdr *th,
712
                                                           unsigned len);
713
 
714
extern struct sock *            tcp_check_req(struct sock *sk,struct sk_buff *skb,
715
                                              struct open_request *req,
716
                                              struct open_request **prev);
717
extern int                      tcp_child_process(struct sock *parent,
718
                                                  struct sock *child,
719
                                                  struct sk_buff *skb);
720
extern void                     tcp_enter_frto(struct sock *sk);
721
extern void                     tcp_enter_loss(struct sock *sk, int how);
722
extern void                     tcp_clear_retrans(struct tcp_opt *tp);
723
extern void                     tcp_update_metrics(struct sock *sk);
724
 
725
extern void                     tcp_close(struct sock *sk,
726
                                          long timeout);
727
extern struct sock *            tcp_accept(struct sock *sk, int flags, int *err);
728
extern unsigned int             tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
729
extern void                     tcp_write_space(struct sock *sk);
730
 
731
extern int                      tcp_getsockopt(struct sock *sk, int level,
732
                                               int optname, char *optval,
733
                                               int *optlen);
734
extern int                      tcp_setsockopt(struct sock *sk, int level,
735
                                               int optname, char *optval,
736
                                               int optlen);
737
extern void                     tcp_set_keepalive(struct sock *sk, int val);
738
extern int                      tcp_recvmsg(struct sock *sk,
739
                                            struct msghdr *msg,
740
                                            int len, int nonblock,
741
                                            int flags, int *addr_len);
742
 
743
extern int                      tcp_listen_start(struct sock *sk);
744
 
745
extern void                     tcp_parse_options(struct sk_buff *skb,
746
                                                  struct tcp_opt *tp,
747
                                                  int estab);
748
 
749
/*
750
 *      TCP v4 functions exported for the inet6 API
751
 */
752
 
753
extern int                      tcp_v4_rebuild_header(struct sock *sk);
754
 
755
extern int                      tcp_v4_build_header(struct sock *sk,
756
                                                    struct sk_buff *skb);
757
 
758
extern void                     tcp_v4_send_check(struct sock *sk,
759
                                                  struct tcphdr *th, int len,
760
                                                  struct sk_buff *skb);
761
 
762
extern int                      tcp_v4_conn_request(struct sock *sk,
763
                                                    struct sk_buff *skb);
764
 
765
extern struct sock *            tcp_create_openreq_child(struct sock *sk,
766
                                                         struct open_request *req,
767
                                                         struct sk_buff *skb);
768
 
769
extern struct sock *            tcp_v4_syn_recv_sock(struct sock *sk,
770
                                                     struct sk_buff *skb,
771
                                                     struct open_request *req,
772
                                                        struct dst_entry *dst);
773
 
774
extern int                      tcp_v4_do_rcv(struct sock *sk,
775
                                              struct sk_buff *skb);
776
 
777
extern int                      tcp_v4_connect(struct sock *sk,
778
                                               struct sockaddr *uaddr,
779
                                               int addr_len);
780
 
781
extern int                      tcp_connect(struct sock *sk);
782
 
783
extern struct sk_buff *         tcp_make_synack(struct sock *sk,
784
                                                struct dst_entry *dst,
785
                                                struct open_request *req);
786
 
787
extern int                      tcp_disconnect(struct sock *sk, int flags);
788
 
789
extern void                     tcp_unhash(struct sock *sk);
790
 
791
extern int                      tcp_v4_hash_connecting(struct sock *sk);
792
 
793
 
794
/* From syncookies.c */
795
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
796
                                    struct ip_options *opt);
797
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
798
                                     __u16 *mss);
799
 
800
/* tcp_output.c */
801
 
802
extern int tcp_write_xmit(struct sock *, int nonagle);
803
extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
804
extern void tcp_xmit_retransmit_queue(struct sock *);
805
extern void tcp_simple_retransmit(struct sock *);
806
 
807
extern void tcp_send_probe0(struct sock *);
808
extern void tcp_send_partial(struct sock *);
809
extern int  tcp_write_wakeup(struct sock *);
810
extern void tcp_send_fin(struct sock *sk);
811
extern void tcp_send_active_reset(struct sock *sk, int priority);
812
extern int  tcp_send_synack(struct sock *);
813
extern int  tcp_transmit_skb(struct sock *, struct sk_buff *);
814
extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
815
extern void tcp_push_one(struct sock *, unsigned mss_now);
816
extern void tcp_send_ack(struct sock *sk);
817
extern void tcp_send_delayed_ack(struct sock *sk);
818
 
819
/* tcp_timer.c */
820
extern void tcp_init_xmit_timers(struct sock *);
821
extern void tcp_clear_xmit_timers(struct sock *);
822
 
823
extern void tcp_delete_keepalive_timer (struct sock *);
824
extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
825
extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
826
 
827
extern const char timer_bug_msg[];
828
 
829
/* Read 'sendfile()'-style from a TCP socket */
830
typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
831
                                unsigned int, size_t);
832
extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
833
                         sk_read_actor_t recv_actor);
834
 
835
static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
836
{
837
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
838
 
839
        switch (what) {
840
        case TCP_TIME_RETRANS:
841
        case TCP_TIME_PROBE0:
842
                tp->pending = 0;
843
 
844
#ifdef TCP_CLEAR_TIMERS
845
                if (timer_pending(&tp->retransmit_timer) &&
846
                    del_timer(&tp->retransmit_timer))
847
                        __sock_put(sk);
848
#endif
849
                break;
850
        case TCP_TIME_DACK:
851
                tp->ack.blocked = 0;
852
                tp->ack.pending = 0;
853
 
854
#ifdef TCP_CLEAR_TIMERS
855
                if (timer_pending(&tp->delack_timer) &&
856
                    del_timer(&tp->delack_timer))
857
                        __sock_put(sk);
858
#endif
859
                break;
860
        default:
861
                printk(timer_bug_msg);
862
                return;
863
        };
864
 
865
}
866
 
867
/*
868
 *      Reset the retransmission timer
869
 */
870
static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
871
{
872
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
873
 
874
        if (when > TCP_RTO_MAX) {
875
#ifdef TCP_DEBUG
876
                printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
877
#endif
878
                when = TCP_RTO_MAX;
879
        }
880
 
881
        switch (what) {
882
        case TCP_TIME_RETRANS:
883
        case TCP_TIME_PROBE0:
884
                tp->pending = what;
885
                tp->timeout = jiffies+when;
886
                if (!mod_timer(&tp->retransmit_timer, tp->timeout))
887
                        sock_hold(sk);
888
                break;
889
 
890
        case TCP_TIME_DACK:
891
                tp->ack.pending |= TCP_ACK_TIMER;
892
                tp->ack.timeout = jiffies+when;
893
                if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
894
                        sock_hold(sk);
895
                break;
896
 
897
        default:
898
                printk(KERN_DEBUG "bug: unknown timer value\n");
899
        };
900
}
901
 
902
/* Compute the current effective MSS, taking SACKs and IP options,
903
 * and even PMTU discovery events into account.
904
 */
905
 
906
static __inline__ unsigned int tcp_current_mss(struct sock *sk)
907
{
908
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
909
        struct dst_entry *dst = __sk_dst_get(sk);
910
        int mss_now = tp->mss_cache;
911
 
912
        if (dst && dst->pmtu != tp->pmtu_cookie)
913
                mss_now = tcp_sync_mss(sk, dst->pmtu);
914
 
915
        if (tp->eff_sacks)
916
                mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
917
                            (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
918
        return mss_now;
919
}
920
 
921
/* Initialize RCV_MSS value.
922
 * RCV_MSS is an our guess about MSS used by the peer.
923
 * We haven't any direct information about the MSS.
924
 * It's better to underestimate the RCV_MSS rather than overestimate.
925
 * Overestimations make us ACKing less frequently than needed.
926
 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
927
 */
928
 
929
static inline void tcp_initialize_rcv_mss(struct sock *sk)
930
{
931
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
932
        unsigned int hint = min(tp->advmss, tp->mss_cache);
933
 
934
        hint = min(hint, tp->rcv_wnd/2);
935
        hint = min(hint, TCP_MIN_RCVMSS);
936
        hint = max(hint, TCP_MIN_MSS);
937
 
938
        tp->ack.rcv_mss = hint;
939
}
940
 
941
static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
942
{
943
        tp->pred_flags = htonl((tp->tcp_header_len << 26) |
944
                               ntohl(TCP_FLAG_ACK) |
945
                               snd_wnd);
946
}
947
 
948
static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
949
{
950
        __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
951
}
952
 
953
static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
954
{
955
        if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
956
            tp->rcv_wnd &&
957
            atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
958
            !tp->urg_data)
959
                tcp_fast_path_on(tp);
960
}
961
 
962
/* Compute the actual receive window we are currently advertising.
963
 * Rcv_nxt can be after the window if our peer push more data
964
 * than the offered window.
965
 */
966
static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
967
{
968
        s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
969
 
970
        if (win < 0)
971
                win = 0;
972
        return (u32) win;
973
}
974
 
975
/* Choose a new window, without checks for shrinking, and without
976
 * scaling applied to the result.  The caller does these things
977
 * if necessary.  This is a "raw" window selection.
978
 */
979
extern u32      __tcp_select_window(struct sock *sk);
980
 
981
/* TCP timestamps are only 32-bits, this causes a slight
982
 * complication on 64-bit systems since we store a snapshot
983
 * of jiffies in the buffer control blocks below.  We decidely
984
 * only use of the low 32-bits of jiffies and hide the ugly
985
 * casts with the following macro.
986
 */
987
#define tcp_time_stamp          ((__u32)(jiffies))
988
 
989
/* This is what the send packet queueing engine uses to pass
990
 * TCP per-packet control information to the transmission
991
 * code.  We also store the host-order sequence numbers in
992
 * here too.  This is 36 bytes on 32-bit architectures,
993
 * 40 bytes on 64-bit machines, if this grows please adjust
994
 * skbuff.h:skbuff->cb[xxx] size appropriately.
995
 */
996
struct tcp_skb_cb {
997
        union {
998
                struct inet_skb_parm    h4;
999
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1000
                struct inet6_skb_parm   h6;
1001
#endif
1002
        } header;       /* For incoming frames          */
1003
        __u32           seq;            /* Starting sequence number     */
1004
        __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
1005
        __u32           when;           /* used to compute rtt's        */
1006
        __u8            flags;          /* TCP header flags.            */
1007
 
1008
        /* NOTE: These must match up to the flags byte in a
1009
         *       real TCP header.
1010
         */
1011
#define TCPCB_FLAG_FIN          0x01
1012
#define TCPCB_FLAG_SYN          0x02
1013
#define TCPCB_FLAG_RST          0x04
1014
#define TCPCB_FLAG_PSH          0x08
1015
#define TCPCB_FLAG_ACK          0x10
1016
#define TCPCB_FLAG_URG          0x20
1017
#define TCPCB_FLAG_ECE          0x40
1018
#define TCPCB_FLAG_CWR          0x80
1019
 
1020
        __u8            sacked;         /* State flags for SACK/FACK.   */
1021
#define TCPCB_SACKED_ACKED      0x01    /* SKB ACK'd by a SACK block    */
1022
#define TCPCB_SACKED_RETRANS    0x02    /* SKB retransmitted            */
1023
#define TCPCB_LOST              0x04    /* SKB is lost                  */
1024
#define TCPCB_TAGBITS           0x07    /* All tag bits                 */
1025
 
1026
#define TCPCB_EVER_RETRANS      0x80    /* Ever retransmitted frame     */
1027
#define TCPCB_RETRANS           (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
1028
 
1029
#define TCPCB_URG               0x20    /* Urgent pointer advenced here */
1030
 
1031
#define TCPCB_AT_TAIL           (TCPCB_URG)
1032
 
1033
        __u16           urg_ptr;        /* Valid w/URG flags is set.    */
1034
        __u32           ack_seq;        /* Sequence number ACK'd        */
1035
};
1036
 
1037
#define TCP_SKB_CB(__skb)       ((struct tcp_skb_cb *)&((__skb)->cb[0]))
1038
 
1039
#define for_retrans_queue(skb, sk, tp) \
1040
                for (skb = (sk)->write_queue.next;                      \
1041
                     (skb != (tp)->send_head) &&                        \
1042
                     (skb != (struct sk_buff *)&(sk)->write_queue);     \
1043
                     skb=skb->next)
1044
 
1045
 
1046
#include <net/tcp_ecn.h>
1047
 
1048
 
1049
/*
1050
 *      Compute minimal free write space needed to queue new packets.
1051
 */
1052
static inline int tcp_min_write_space(struct sock *sk)
1053
{
1054
        return sk->wmem_queued/2;
1055
}
1056
 
1057
static inline int tcp_wspace(struct sock *sk)
1058
{
1059
        return sk->sndbuf - sk->wmem_queued;
1060
}
1061
 
1062
 
1063
/* This determines how many packets are "in the network" to the best
1064
 * of our knowledge.  In many cases it is conservative, but where
1065
 * detailed information is available from the receiver (via SACK
1066
 * blocks etc.) we can make more aggressive calculations.
1067
 *
1068
 * Use this for decisions involving congestion control, use just
1069
 * tp->packets_out to determine if the send queue is empty or not.
1070
 *
1071
 * Read this equation as:
1072
 *
1073
 *      "Packets sent once on transmission queue" MINUS
1074
 *      "Packets left network, but not honestly ACKed yet" PLUS
1075
 *      "Packets fast retransmitted"
1076
 */
1077
static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
1078
{
1079
        return tp->packets_out - tp->left_out + tp->retrans_out;
1080
}
1081
 
1082
/* Recalculate snd_ssthresh, we want to set it to:
1083
 *
1084
 *      one half the current congestion window, but no
1085
 *      less than two segments
1086
 */
1087
static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
1088
{
1089
        return max(tp->snd_cwnd >> 1U, 2U);
1090
}
1091
 
1092
/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
1093
 * The exception is rate halving phase, when cwnd is decreasing towards
1094
 * ssthresh.
1095
 */
1096
static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
1097
{
1098
        if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
1099
                return tp->snd_ssthresh;
1100
        else
1101
                return max(tp->snd_ssthresh,
1102
                           ((tp->snd_cwnd >> 1) +
1103
                            (tp->snd_cwnd >> 2)));
1104
}
1105
 
1106
static inline void tcp_sync_left_out(struct tcp_opt *tp)
1107
{
1108
        if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
1109
                tp->sacked_out = tp->packets_out - tp->lost_out;
1110
        tp->left_out = tp->sacked_out + tp->lost_out;
1111
}
1112
 
1113
extern void tcp_cwnd_application_limited(struct sock *sk);
1114
 
1115
/* Congestion window validation. (RFC2861) */
1116
 
1117
static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
1118
{
1119
        if (tp->packets_out >= tp->snd_cwnd) {
1120
                /* Network is feed fully. */
1121
                tp->snd_cwnd_used = 0;
1122
                tp->snd_cwnd_stamp = tcp_time_stamp;
1123
        } else {
1124
                /* Network starves. */
1125
                if (tp->packets_out > tp->snd_cwnd_used)
1126
                        tp->snd_cwnd_used = tp->packets_out;
1127
 
1128
                if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
1129
                        tcp_cwnd_application_limited(sk);
1130
        }
1131
}
1132
 
1133
/* Set slow start threshould and cwnd not falling to slow start */
1134
static inline void __tcp_enter_cwr(struct tcp_opt *tp)
1135
{
1136
        tp->undo_marker = 0;
1137
        tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1138
        tp->snd_cwnd = min(tp->snd_cwnd,
1139
                           tcp_packets_in_flight(tp) + 1U);
1140
        tp->snd_cwnd_cnt = 0;
1141
        tp->high_seq = tp->snd_nxt;
1142
        tp->snd_cwnd_stamp = tcp_time_stamp;
1143
        TCP_ECN_queue_cwr(tp);
1144
}
1145
 
1146
static inline void tcp_enter_cwr(struct tcp_opt *tp)
1147
{
1148
        tp->prior_ssthresh = 0;
1149
        if (tp->ca_state < TCP_CA_CWR) {
1150
                __tcp_enter_cwr(tp);
1151
                tp->ca_state = TCP_CA_CWR;
1152
        }
1153
}
1154
 
1155
extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
1156
 
1157
/* Slow start with delack produces 3 packets of burst, so that
1158
 * it is safe "de facto".
1159
 */
1160
static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
1161
{
1162
        return 3;
1163
}
1164
 
1165
static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
1166
{
1167
        return after(tp->snd_sml,tp->snd_una) &&
1168
                !after(tp->snd_sml, tp->snd_nxt);
1169
}
1170
 
1171
static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
1172
{
1173
        if (skb->len < mss)
1174
                tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1175
}
1176
 
1177
/* Return 0, if packet can be sent now without violation Nagle's rules:
1178
   1. It is full sized.
1179
   2. Or it contains FIN.
1180
   3. Or TCP_NODELAY was set.
1181
   4. Or TCP_CORK is not set, and all sent packets are ACKed.
1182
      With Minshall's modification: all sent small packets are ACKed.
1183
 */
1184
 
1185
static __inline__ int
1186
tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
1187
{
1188
        return (skb->len < mss_now &&
1189
                !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1190
                (nonagle == 2 ||
1191
                 (!nonagle &&
1192
                  tp->packets_out &&
1193
                  tcp_minshall_check(tp))));
1194
}
1195
 
1196
/* This checks if the data bearing packet SKB (usually tp->send_head)
1197
 * should be put on the wire right now.
1198
 */
1199
static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
1200
                                   unsigned cur_mss, int nonagle)
1201
{
1202
        /*      RFC 1122 - section 4.2.3.4
1203
         *
1204
         *      We must queue if
1205
         *
1206
         *      a) The right edge of this frame exceeds the window
1207
         *      b) There are packets in flight and we have a small segment
1208
         *         [SWS avoidance and Nagle algorithm]
1209
         *         (part of SWS is done on packetization)
1210
         *         Minshall version sounds: there are no _small_
1211
         *         segments in flight. (tcp_nagle_check)
1212
         *      c) We have too many packets 'in flight'
1213
         *
1214
         *      Don't use the nagle rule for urgent data (or
1215
         *      for the final FIN -DaveM).
1216
         *
1217
         *      Also, Nagle rule does not apply to frames, which
1218
         *      sit in the middle of queue (they have no chances
1219
         *      to get new data) and if room at tail of skb is
1220
         *      not enough to save something seriously (<32 for now).
1221
         */
1222
 
1223
        /* Don't be strict about the congestion window for the
1224
         * final FIN frame.  -DaveM
1225
         */
1226
        return ((nonagle==1 || tp->urg_mode
1227
                 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
1228
                ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
1229
                 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
1230
                !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
1231
}
1232
 
1233
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
1234
{
1235
        if (!tp->packets_out && !tp->pending)
1236
                tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
1237
}
1238
 
1239
static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
1240
{
1241
        return (skb->next == (struct sk_buff*)&sk->write_queue);
1242
}
1243
 
1244
/* Push out any pending frames which were held back due to
1245
 * TCP_CORK or attempt at coalescing tiny packets.
1246
 * The socket must be locked by the caller.
1247
 */
1248
static __inline__ void __tcp_push_pending_frames(struct sock *sk,
1249
                                                 struct tcp_opt *tp,
1250
                                                 unsigned cur_mss,
1251
                                                 int nonagle)
1252
{
1253
        struct sk_buff *skb = tp->send_head;
1254
 
1255
        if (skb) {
1256
                if (!tcp_skb_is_last(sk, skb))
1257
                        nonagle = 1;
1258
                if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
1259
                    tcp_write_xmit(sk, nonagle))
1260
                        tcp_check_probe_timer(sk, tp);
1261
        }
1262
        tcp_cwnd_validate(sk, tp);
1263
}
1264
 
1265
static __inline__ void tcp_push_pending_frames(struct sock *sk,
1266
                                               struct tcp_opt *tp)
1267
{
1268
        __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
1269
}
1270
 
1271
static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
1272
{
1273
        struct sk_buff *skb = tp->send_head;
1274
 
1275
        return (skb &&
1276
                tcp_snd_test(tp, skb, tcp_current_mss(sk),
1277
                             tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
1278
}
1279
 
1280
static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1281
{
1282
        tp->snd_wl1 = seq;
1283
}
1284
 
1285
static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1286
{
1287
        tp->snd_wl1 = seq;
1288
}
1289
 
1290
extern void                     tcp_destroy_sock(struct sock *sk);
1291
 
1292
 
1293
/*
1294
 * Calculate(/check) TCP checksum
1295
 */
1296
static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
1297
                                   unsigned long saddr, unsigned long daddr,
1298
                                   unsigned long base)
1299
{
1300
        return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1301
}
1302
 
1303
static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
1304
{
1305
        return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
1306
}
1307
 
1308
static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
1309
{
1310
        return skb->ip_summed != CHECKSUM_UNNECESSARY &&
1311
                __tcp_checksum_complete(skb);
1312
}
1313
 
1314
/* Prequeue for VJ style copy to user, combined with checksumming. */
1315
 
1316
static __inline__ void tcp_prequeue_init(struct tcp_opt *tp)
1317
{
1318
        tp->ucopy.task = NULL;
1319
        tp->ucopy.len = 0;
1320
        tp->ucopy.memory = 0;
1321
        skb_queue_head_init(&tp->ucopy.prequeue);
1322
}
1323
 
1324
/* Packet is added to VJ-style prequeue for processing in process
1325
 * context, if a reader task is waiting. Apparently, this exciting
1326
 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1327
 * failed somewhere. Latency? Burstiness? Well, at least now we will
1328
 * see, why it failed. 8)8)                               --ANK
1329
 *
1330
 * NOTE: is this not too big to inline?
1331
 */
1332
static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1333
{
1334
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1335
 
1336
        if (!sysctl_tcp_low_latency && tp->ucopy.task) {
1337
                __skb_queue_tail(&tp->ucopy.prequeue, skb);
1338
                tp->ucopy.memory += skb->truesize;
1339
                if (tp->ucopy.memory > sk->rcvbuf) {
1340
                        struct sk_buff *skb1;
1341
 
1342
                        if (sk->lock.users)
1343
                                out_of_line_bug();
1344
 
1345
                        while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1346
                                sk->backlog_rcv(sk, skb1);
1347
                                NET_INC_STATS_BH(TCPPrequeueDropped);
1348
                        }
1349
 
1350
                        tp->ucopy.memory = 0;
1351
                } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1352
                        wake_up_interruptible(sk->sleep);
1353
                        if (!tcp_ack_scheduled(tp))
1354
                                tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
1355
                }
1356
                return 1;
1357
        }
1358
        return 0;
1359
}
1360
 
1361
 
1362
#undef STATE_TRACE
1363
 
1364
#ifdef STATE_TRACE
1365
static char *statename[]={
1366
        "Unused","Established","Syn Sent","Syn Recv",
1367
        "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1368
        "Close Wait","Last ACK","Listen","Closing"
1369
};
1370
#endif
1371
 
1372
static __inline__ void tcp_set_state(struct sock *sk, int state)
1373
{
1374
        int oldstate = sk->state;
1375
 
1376
        switch (state) {
1377
        case TCP_ESTABLISHED:
1378
                if (oldstate != TCP_ESTABLISHED)
1379
                        TCP_INC_STATS(TcpCurrEstab);
1380
                break;
1381
 
1382
        case TCP_CLOSE:
1383
                if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1384
                        TCP_INC_STATS(TcpEstabResets);
1385
 
1386
                sk->prot->unhash(sk);
1387
                if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK))
1388
                        tcp_put_port(sk);
1389
                /* fall through */
1390
        default:
1391
                if (oldstate==TCP_ESTABLISHED)
1392
                        tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--;
1393
        }
1394
 
1395
        /* Change state AFTER socket is unhashed to avoid closed
1396
         * socket sitting in hash tables.
1397
         */
1398
        sk->state = state;
1399
 
1400
#ifdef STATE_TRACE
1401
        SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1402
#endif  
1403
}
1404
 
1405
static __inline__ void tcp_done(struct sock *sk)
1406
{
1407
        tcp_set_state(sk, TCP_CLOSE);
1408
        tcp_clear_xmit_timers(sk);
1409
 
1410
        sk->shutdown = SHUTDOWN_MASK;
1411
 
1412
        if (!sk->dead)
1413
                sk->state_change(sk);
1414
        else
1415
                tcp_destroy_sock(sk);
1416
}
1417
 
1418
static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
1419
{
1420
        tp->dsack = 0;
1421
        tp->eff_sacks = 0;
1422
        tp->num_sacks = 0;
1423
}
1424
 
1425
static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
1426
{
1427
        if (tp->tstamp_ok) {
1428
                *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1429
                                          (TCPOPT_NOP << 16) |
1430
                                          (TCPOPT_TIMESTAMP << 8) |
1431
                                          TCPOLEN_TIMESTAMP);
1432
                *ptr++ = htonl(tstamp);
1433
                *ptr++ = htonl(tp->ts_recent);
1434
        }
1435
        if (tp->eff_sacks) {
1436
                struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
1437
                int this_sack;
1438
 
1439
                *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1440
                                          (TCPOPT_NOP << 16) |
1441
                                          (TCPOPT_SACK << 8) |
1442
                                          (TCPOLEN_SACK_BASE +
1443
                                           (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1444
                for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
1445
                        *ptr++ = htonl(sp[this_sack].start_seq);
1446
                        *ptr++ = htonl(sp[this_sack].end_seq);
1447
                }
1448
                if (tp->dsack) {
1449
                        tp->dsack = 0;
1450
                        tp->eff_sacks--;
1451
                }
1452
        }
1453
}
1454
 
1455
/* Construct a tcp options header for a SYN or SYN_ACK packet.
1456
 * If this is every changed make sure to change the definition of
1457
 * MAX_SYN_SIZE to match the new maximum number of options that you
1458
 * can generate.
1459
 */
1460
static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1461
                                             int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1462
{
1463
        /* We always get an MSS option.
1464
         * The option bytes which will be seen in normal data
1465
         * packets should timestamps be used, must be in the MSS
1466
         * advertised.  But we subtract them from tp->mss_cache so
1467
         * that calculations in tcp_sendmsg are simpler etc.
1468
         * So account for this fact here if necessary.  If we
1469
         * don't do this correctly, as a receiver we won't
1470
         * recognize data packets as being full sized when we
1471
         * should, and thus we won't abide by the delayed ACK
1472
         * rules correctly.
1473
         * SACKs don't matter, we never delay an ACK when we
1474
         * have any of those going out.
1475
         */
1476
        *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1477
        if (ts) {
1478
                if(sack)
1479
                        *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1480
                                                  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1481
                else
1482
                        *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1483
                                                  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1484
                *ptr++ = htonl(tstamp);         /* TSVAL */
1485
                *ptr++ = htonl(ts_recent);      /* TSECR */
1486
        } else if(sack)
1487
                *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1488
                                          (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1489
        if (offer_wscale)
1490
                *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1491
}
1492
 
1493
/* Determine a window scaling and initial window to offer.
1494
 * Based on the assumption that the given amount of space
1495
 * will be offered. Store the results in the tp structure.
1496
 * NOTE: for smooth operation initial space offering should
1497
 * be a multiple of mss if possible. We assume here that mss >= 1.
1498
 * This MUST be enforced by all callers.
1499
 */
1500
static inline void tcp_select_initial_window(int __space, __u32 mss,
1501
        __u32 *rcv_wnd,
1502
        __u32 *window_clamp,
1503
        int wscale_ok,
1504
        __u8 *rcv_wscale)
1505
{
1506
        unsigned int space = (__space < 0 ? 0 : __space);
1507
 
1508
        /* If no clamp set the clamp to the max possible scaled window */
1509
        if (*window_clamp == 0)
1510
                (*window_clamp) = (65535 << 14);
1511
        space = min(*window_clamp, space);
1512
 
1513
        /* Quantize space offering to a multiple of mss if possible. */
1514
        if (space > mss)
1515
                space = (space / mss) * mss;
1516
 
1517
        /* NOTE: offering an initial window larger than 32767
1518
         * will break some buggy TCP stacks. We try to be nice.
1519
         * If we are not window scaling, then this truncates
1520
         * our initial window offering to 32k. There should also
1521
         * be a sysctl option to stop being nice.
1522
         */
1523
        (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
1524
        (*rcv_wscale) = 0;
1525
        if (wscale_ok) {
1526
                /* See RFC1323 for an explanation of the limit to 14 */
1527
                while (space > 65535 && (*rcv_wscale) < 14) {
1528
                        space >>= 1;
1529
                        (*rcv_wscale)++;
1530
                }
1531
                if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
1532
                    space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
1533
                        (*rcv_wscale)--;
1534
        }
1535
 
1536
        /* Set initial window to value enough for senders,
1537
         * following RFC1414. Senders, not following this RFC,
1538
         * will be satisfied with 2.
1539
         */
1540
        if (mss > (1<<*rcv_wscale)) {
1541
                int init_cwnd = 4;
1542
                if (mss > 1460*3)
1543
                        init_cwnd = 2;
1544
                else if (mss > 1460)
1545
                        init_cwnd = 3;
1546
                if (*rcv_wnd > init_cwnd*mss)
1547
                        *rcv_wnd = init_cwnd*mss;
1548
        }
1549
        /* Set the clamp no higher than max representable value */
1550
        (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
1551
}
1552
 
1553
static inline int tcp_win_from_space(int space)
1554
{
1555
        return sysctl_tcp_adv_win_scale<=0 ?
1556
                (space>>(-sysctl_tcp_adv_win_scale)) :
1557
                space - (space>>sysctl_tcp_adv_win_scale);
1558
}
1559
 
1560
/* Note: caller must be prepared to deal with negative returns */
1561
static inline int tcp_space(struct sock *sk)
1562
{
1563
        return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
1564
}
1565
 
1566
static inline int tcp_full_space( struct sock *sk)
1567
{
1568
        return tcp_win_from_space(sk->rcvbuf);
1569
}
1570
 
1571
static inline void tcp_acceptq_removed(struct sock *sk)
1572
{
1573
        sk->ack_backlog--;
1574
}
1575
 
1576
static inline void tcp_acceptq_added(struct sock *sk)
1577
{
1578
        sk->ack_backlog++;
1579
}
1580
 
1581
static inline int tcp_acceptq_is_full(struct sock *sk)
1582
{
1583
        return sk->ack_backlog > sk->max_ack_backlog;
1584
}
1585
 
1586
static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
1587
                                         struct sock *child)
1588
{
1589
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1590
 
1591
        req->sk = child;
1592
        tcp_acceptq_added(sk);
1593
 
1594
        if (!tp->accept_queue_tail) {
1595
                tp->accept_queue = req;
1596
        } else {
1597
                tp->accept_queue_tail->dl_next = req;
1598
        }
1599
        tp->accept_queue_tail = req;
1600
        req->dl_next = NULL;
1601
}
1602
 
1603
struct tcp_listen_opt
1604
{
1605
        u8                      max_qlen_log;   /* log_2 of maximal queued SYNs */
1606
        int                     qlen;
1607
        int                     qlen_young;
1608
        int                     clock_hand;
1609
        u32                     hash_rnd;
1610
        struct open_request     *syn_table[TCP_SYNQ_HSIZE];
1611
};
1612
 
1613
static inline void
1614
tcp_synq_removed(struct sock *sk, struct open_request *req)
1615
{
1616
        struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1617
 
1618
        if (--lopt->qlen == 0)
1619
                tcp_delete_keepalive_timer(sk);
1620
        if (req->retrans == 0)
1621
                lopt->qlen_young--;
1622
}
1623
 
1624
static inline void tcp_synq_added(struct sock *sk)
1625
{
1626
        struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1627
 
1628
        if (lopt->qlen++ == 0)
1629
                tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1630
        lopt->qlen_young++;
1631
}
1632
 
1633
static inline int tcp_synq_len(struct sock *sk)
1634
{
1635
        return sk->tp_pinfo.af_tcp.listen_opt->qlen;
1636
}
1637
 
1638
static inline int tcp_synq_young(struct sock *sk)
1639
{
1640
        return sk->tp_pinfo.af_tcp.listen_opt->qlen_young;
1641
}
1642
 
1643
static inline int tcp_synq_is_full(struct sock *sk)
1644
{
1645
        return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log;
1646
}
1647
 
1648
static inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req,
1649
                                       struct open_request **prev)
1650
{
1651
        write_lock(&tp->syn_wait_lock);
1652
        *prev = req->dl_next;
1653
        write_unlock(&tp->syn_wait_lock);
1654
}
1655
 
1656
static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
1657
                                     struct open_request **prev)
1658
{
1659
        tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev);
1660
        tcp_synq_removed(sk, req);
1661
        tcp_openreq_free(req);
1662
}
1663
 
1664
static __inline__ void tcp_openreq_init(struct open_request *req,
1665
                                        struct tcp_opt *tp,
1666
                                        struct sk_buff *skb)
1667
{
1668
        req->rcv_wnd = 0;                /* So that tcp_send_synack() knows! */
1669
        req->rcv_isn = TCP_SKB_CB(skb)->seq;
1670
        req->mss = tp->mss_clamp;
1671
        req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
1672
        req->tstamp_ok = tp->tstamp_ok;
1673
        req->sack_ok = tp->sack_ok;
1674
        req->snd_wscale = tp->snd_wscale;
1675
        req->wscale_ok = tp->wscale_ok;
1676
        req->acked = 0;
1677
        req->ecn_ok = 0;
1678
        req->rmt_port = skb->h.th->source;
1679
}
1680
 
1681
#define TCP_MEM_QUANTUM ((int)PAGE_SIZE)
1682
 
1683
static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
1684
{
1685
        sk->tp_pinfo.af_tcp.queue_shrunk = 1;
1686
        sk->wmem_queued -= skb->truesize;
1687
        sk->forward_alloc += skb->truesize;
1688
        __kfree_skb(skb);
1689
}
1690
 
1691
static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
1692
{
1693
        sk->wmem_queued += skb->truesize;
1694
        sk->forward_alloc -= skb->truesize;
1695
}
1696
 
1697
extern void __tcp_mem_reclaim(struct sock *sk);
1698
extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
1699
 
1700
static inline void tcp_mem_reclaim(struct sock *sk)
1701
{
1702
        if (sk->forward_alloc >= TCP_MEM_QUANTUM)
1703
                __tcp_mem_reclaim(sk);
1704
}
1705
 
1706
static inline void tcp_enter_memory_pressure(void)
1707
{
1708
        if (!tcp_memory_pressure) {
1709
                NET_INC_STATS(TCPMemoryPressures);
1710
                tcp_memory_pressure = 1;
1711
        }
1712
}
1713
 
1714
static inline void tcp_moderate_sndbuf(struct sock *sk)
1715
{
1716
        if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
1717
                sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2);
1718
                sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF);
1719
        }
1720
}
1721
 
1722
static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
1723
{
1724
        struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
1725
 
1726
        if (skb) {
1727
                skb->truesize += mem;
1728
                if (sk->forward_alloc >= (int)skb->truesize ||
1729
                    tcp_mem_schedule(sk, skb->truesize, 0)) {
1730
                        skb_reserve(skb, MAX_TCP_HEADER);
1731
                        return skb;
1732
                }
1733
                __kfree_skb(skb);
1734
        } else {
1735
                tcp_enter_memory_pressure();
1736
                tcp_moderate_sndbuf(sk);
1737
        }
1738
        return NULL;
1739
}
1740
 
1741
static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
1742
{
1743
        return tcp_alloc_pskb(sk, size, 0, gfp);
1744
}
1745
 
1746
static inline struct page * tcp_alloc_page(struct sock *sk)
1747
{
1748
        if (sk->forward_alloc >= (int)PAGE_SIZE ||
1749
            tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
1750
                struct page *page = alloc_pages(sk->allocation, 0);
1751
                if (page)
1752
                        return page;
1753
        }
1754
        tcp_enter_memory_pressure();
1755
        tcp_moderate_sndbuf(sk);
1756
        return NULL;
1757
}
1758
 
1759
static inline void tcp_writequeue_purge(struct sock *sk)
1760
{
1761
        struct sk_buff *skb;
1762
 
1763
        while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
1764
                tcp_free_skb(sk, skb);
1765
        tcp_mem_reclaim(sk);
1766
}
1767
 
1768
extern void tcp_rfree(struct sk_buff *skb);
1769
 
1770
static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
1771
{
1772
        skb->sk = sk;
1773
        skb->destructor = tcp_rfree;
1774
        atomic_add(skb->truesize, &sk->rmem_alloc);
1775
        sk->forward_alloc -= skb->truesize;
1776
}
1777
 
1778
extern void tcp_listen_wlock(void);
1779
 
1780
/* - We may sleep inside this lock.
1781
 * - If sleeping is not required (or called from BH),
1782
 *   use plain read_(un)lock(&tcp_lhash_lock).
1783
 */
1784
 
1785
static inline void tcp_listen_lock(void)
1786
{
1787
        /* read_lock synchronizes to candidates to writers */
1788
        read_lock(&tcp_lhash_lock);
1789
        atomic_inc(&tcp_lhash_users);
1790
        read_unlock(&tcp_lhash_lock);
1791
}
1792
 
1793
static inline void tcp_listen_unlock(void)
1794
{
1795
        if (atomic_dec_and_test(&tcp_lhash_users))
1796
                wake_up(&tcp_lhash_wait);
1797
}
1798
 
1799
static inline int keepalive_intvl_when(struct tcp_opt *tp)
1800
{
1801
        return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1802
}
1803
 
1804
static inline int keepalive_time_when(struct tcp_opt *tp)
1805
{
1806
        return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1807
}
1808
 
1809
static inline int tcp_fin_time(struct tcp_opt *tp)
1810
{
1811
        int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
1812
 
1813
        if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
1814
                fin_timeout = (tp->rto<<2) - (tp->rto>>1);
1815
 
1816
        return fin_timeout;
1817
}
1818
 
1819
static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
1820
{
1821
        if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
1822
                return 0;
1823
        if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
1824
                return 0;
1825
 
1826
        /* RST segments are not recommended to carry timestamp,
1827
           and, if they do, it is recommended to ignore PAWS because
1828
           "their cleanup function should take precedence over timestamps."
1829
           Certainly, it is mistake. It is necessary to understand the reasons
1830
           of this constraint to relax it: if peer reboots, clock may go
1831
           out-of-sync and half-open connections will not be reset.
1832
           Actually, the problem would be not existing if all
1833
           the implementations followed draft about maintaining clock
1834
           via reboots. Linux-2.2 DOES NOT!
1835
 
1836
           However, we can relax time bounds for RST segments to MSL.
1837
         */
1838
        if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
1839
                return 0;
1840
        return 1;
1841
}
1842
 
1843
#define TCP_CHECK_TIMER(sk) do { } while (0)
1844
 
1845
static inline int tcp_use_frto(const struct sock *sk)
1846
{
1847
        const struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1848
 
1849
        /* F-RTO must be activated in sysctl and there must be some
1850
         * unsent new data, and the advertised window should allow
1851
         * sending it.
1852
         */
1853
        return (sysctl_tcp_frto && tp->send_head &&
1854
                !after(TCP_SKB_CB(tp->send_head)->end_seq,
1855
                       tp->snd_una + tp->snd_wnd));
1856
}
1857
 
1858
static inline void tcp_mib_init(void)
1859
{
1860
        /* See RFC 2012 */
1861
        TCP_ADD_STATS_USER(TcpRtoAlgorithm, 1);
1862
        TCP_ADD_STATS_USER(TcpRtoMin, TCP_RTO_MIN*1000/HZ);
1863
        TCP_ADD_STATS_USER(TcpRtoMax, TCP_RTO_MAX*1000/HZ);
1864
        TCP_ADD_STATS_USER(TcpMaxConn, -1);
1865
}
1866
 
1867
 
1868
/* TCP Westwood functions and constants */
1869
 
1870
#define TCP_WESTWOOD_INIT_RTT               20*HZ           /* maybe too conservative?! */
1871
#define TCP_WESTWOOD_RTT_MIN                HZ/20           /* 50ms */
1872
 
1873
static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq)
1874
{
1875
        if (sysctl_tcp_westwood)
1876
                tp->westwood.rtt = rtt_seq;
1877
}
1878
 
1879
void __tcp_westwood_fast_bw(struct sock *, struct sk_buff *);
1880
void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *);
1881
 
1882
/*
1883
 * This function initializes fields used in TCP Westwood+. We can't
1884
 * get no information about RTTmin at this time so we simply set it to
1885
 * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative
1886
 * since in this way we're sure it will be updated in a consistent
1887
 * way as soon as possible. It will reasonably happen within the first
1888
 * RTT period of the connection lifetime.
1889
 */
1890
 
1891
static inline void __tcp_init_westwood(struct sock *sk)
1892
{
1893
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1894
 
1895
        tp->westwood.bw_ns_est = 0;
1896
        tp->westwood.bw_est = 0;
1897
        tp->westwood.accounted = 0;
1898
        tp->westwood.cumul_ack = 0;
1899
        tp->westwood.rtt_win_sx = tcp_time_stamp;
1900
        tp->westwood.rtt = TCP_WESTWOOD_INIT_RTT;
1901
        tp->westwood.rtt_min = TCP_WESTWOOD_INIT_RTT;
1902
        tp->westwood.snd_una = tp->snd_una;
1903
}
1904
 
1905
static inline void tcp_init_westwood(struct sock *sk)
1906
{
1907
        __tcp_init_westwood(sk);
1908
}
1909
 
1910
static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
1911
{
1912
        if (sysctl_tcp_westwood)
1913
                __tcp_westwood_fast_bw(sk, skb);
1914
}
1915
 
1916
static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
1917
{
1918
        if (sysctl_tcp_westwood)
1919
                __tcp_westwood_slow_bw(sk, skb);
1920
}
1921
 
1922
static inline __u32 __tcp_westwood_bw_rttmin(struct tcp_opt *tp)
1923
{
1924
        return (__u32) ((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
1925
                        (__u32) (tp->mss_cache));
1926
}
1927
 
1928
static inline __u32 tcp_westwood_bw_rttmin(struct tcp_opt *tp)
1929
{
1930
        __u32 ret = 0;
1931
 
1932
        if (sysctl_tcp_westwood)
1933
                ret = (__u32) (max(__tcp_westwood_bw_rttmin(tp), 2U));
1934
 
1935
        return ret;
1936
}
1937
 
1938
static inline int tcp_westwood_ssthresh(struct tcp_opt *tp)
1939
{
1940
        int ret = 0;
1941
        __u32 ssthresh;
1942
 
1943
        if (sysctl_tcp_westwood) {
1944
                if (!(ssthresh = tcp_westwood_bw_rttmin(tp)))
1945
                        return ret;
1946
 
1947
                tp->snd_ssthresh = ssthresh;
1948
                ret = 1;
1949
        }
1950
 
1951
        return ret;
1952
}
1953
 
1954
static inline int tcp_westwood_cwnd(struct tcp_opt *tp)
1955
{
1956
        int ret = 0;
1957
        __u32 cwnd;
1958
 
1959
        if (sysctl_tcp_westwood) {
1960
                if (!(cwnd = tcp_westwood_bw_rttmin(tp)))
1961
                        return ret;
1962
 
1963
                tp->snd_cwnd = cwnd;
1964
                ret = 1;
1965
        }
1966
 
1967
        return ret;
1968
}
1969
 
1970
static inline int tcp_westwood_complete_cwr(struct tcp_opt *tp)
1971
{
1972
        int ret = 0;
1973
 
1974
        if (sysctl_tcp_westwood) {
1975
                if (tcp_westwood_cwnd(tp)) {
1976
                        tp->snd_ssthresh = tp->snd_cwnd;
1977
                        ret = 1;
1978
                }
1979
        }
1980
 
1981
        return ret;
1982
}
1983
 
1984
#endif  /* _TCP_H */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.