OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rc203soc/] [sw/] [uClinux/] [net/] [ipv4/] [tcp_input.c] - Blame information for rev 1771

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1629 jcastillo
/*
2
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
3
 *              operating system.  INET is implemented using the  BSD Socket
4
 *              interface as the means of communication with the user level.
5
 *
6
 *              Implementation of the Transmission Control Protocol(TCP).
7
 *
8
 * Version:     @(#)tcp_input.c 1.0.16  05/25/93
9
 *
10
 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12
 *              Mark Evans, <evansmp@uhura.aston.ac.uk>
13
 *              Corey Minyard <wf-rch!minyard@relay.EU.net>
14
 *              Florian La Roche, <flla@stud.uni-sb.de>
15
 *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16
 *              Linus Torvalds, <torvalds@cs.helsinki.fi>
17
 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18
 *              Matthew Dillon, <dillon@apollo.west.oic.com>
19
 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20
 *              Jorge Cwik, <jorge@laser.satlink.net>
21
 *
22
 * FIXES
23
 *              Pedro Roque     :       Double ACK bug
24
 *              Eric Schenk     :       Fixes to slow start algorithm.
25
 *              Eric Schenk     :       Yet another double ACK bug.
26
 *              Eric Schenk     :       Delayed ACK bug fixes.
27
 *              Eric Schenk     :       Floyd style fast retrans war avoidance.
28
 *              Eric Schenk     :       Skip fast retransmit on small windows.
29
 *              Eric Schenk     :       Fixes to retransmission code to
30
 *                              :       avoid extra retransmission.
31
 *              Theodore Ts'o   :       Do secure TCP sequence numbers.
32
 *              Eric Schenk     :       SYN and RST cookies for dealing
33
 *                              :       with SYN flooding attacks.
34
 *              David S. Miller :       New socket lookup architecture for ISS.
35
 *                                      This code is dedicated to John Dyson.
36
 *              Elliot Poger    :       Added support for SO_BINDTODEVICE.
37
 *      Willy Konynenberg       :       Transparent proxy adapted to new
38
 *                                      socket hash code.
39
 *      J Hadi Salim            :       We assumed that some idiot wasnt going
40
 *      Alan Cox                        to idly redefine bits of ToS in an
41
 *                                      experimental protocol for other things
42
 *                                      (ECN) - wrong!. Mask the bits off. Note
43
 *                                      masking the bits if they dont use ECN
44
 *                                      then use it for ToS is even more
45
 *                                      broken.
46
 *                                      </RANT>
47
 *      George Baeslack         :       SIGIO delivery on accept() bug that
48
 *                                      affected sun jdk.
49
 */
50
 
51
#include <linux/config.h>
52
#include <linux/types.h>
53
#include <linux/random.h>
54
#include <net/tcp.h>
55
 
56
/*
57
 *      Do we assume the IP ToS is entirely for its intended purpose
58
 */
59
 
60
#define TOS_VALID_MASK(x)               ((x)&0x3F)
61
 
62
/*
63
 *      Policy code extracted so it's now separate
64
 */
65
 
66
/*
67
 *      Called each time to estimate the delayed ack timeout. This is
68
 *      how it should be done so a fast link isn't impacted by ack delay.
69
 */
70
 
71
extern __inline__ void tcp_delack_estimator(struct sock *sk)
72
{
73
        /*
74
         *      Delayed ACK time estimator.
75
         */
76
 
77
        if (sk->lrcvtime == 0)
78
        {
79
                sk->lrcvtime = jiffies;
80
                sk->ato = HZ/3;
81
        }
82
        else
83
        {
84
                int m;
85
 
86
                m = jiffies - sk->lrcvtime;
87
 
88
                sk->lrcvtime = jiffies;
89
 
90
                if (m <= 0)
91
                        m = 1;
92
 
93
                /* This used to test against sk->rtt.
94
                 * On a purely receiving link, there is no rtt measure.
95
                 * The result is that we lose delayed ACKs on one-way links.
96
                 * Therefore we test against sk->rto, which will always
97
                 * at least have a default value.
98
                 */
99
                if (m > sk->rto)
100
                {
101
                        sk->ato = sk->rto;
102
                        /*
103
                         * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato);
104
                         */
105
                }
106
                else
107
                {
108
                        /*
109
                         * Very fast acting estimator.
110
                         * May fluctuate too much. Probably we should be
111
                         * doing something like the rtt estimator here.
112
                         */
113
                        sk->ato = (sk->ato >> 1) + m;
114
                        /*
115
                         * printk(KERN_DEBUG "ato: m %lu\n", sk->ato);
116
                         */
117
                }
118
        }
119
}
120
 
121
/*
122
 *      Called on frames that were known _not_ to have been
123
 *      retransmitted [see Karn/Partridge Proceedings SIGCOMM 87].
124
 *      The algorithm is from the SIGCOMM 88 piece by Van Jacobson.
125
 */
126
 
127
extern __inline__ void tcp_rtt_estimator(struct sock *sk, struct sk_buff *oskb)
128
{
129
        long m;
130
        /*
131
         *      The following amusing code comes from Jacobson's
132
         *      article in SIGCOMM '88.  Note that rtt and mdev
133
         *      are scaled versions of rtt and mean deviation.
134
         *      This is designed to be as fast as possible
135
         *      m stands for "measurement".
136
         */
137
 
138
        m = jiffies - oskb->when;  /* RTT */
139
 
140
        if (sk->rtt != 0) {
141
                if(m<=0)
142
                        m=1;            /* IS THIS RIGHT FOR <0 ??? */
143
                m -= (sk->rtt >> 3);    /* m is now error in rtt est */
144
                sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
145
                if (m < 0)
146
                        m = -m;         /* m is now abs(error) */
147
                m -= (sk->mdev >> 2);   /* similar update on mdev */
148
                sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
149
        } else {
150
                /* no previous measure. */
151
                sk->rtt = m<<3;         /* take the measured time to be rtt */
152
                sk->mdev = m<<1;        /* make sure rto = 3*rtt */
153
        }
154
 
155
        /*
156
         *      Now update timeout.  Note that this removes any backoff.
157
         */
158
 
159
        /* Jacobson's algorithm calls for rto = R + 4V.
160
         * We diverge from Jacobson's algorithm here. See the commentary
161
         * in tcp_ack to understand why.
162
         */
163
        sk->rto = (sk->rtt >> 3) + sk->mdev;
164
        sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1));
165
        if (sk->rto > 120*HZ)
166
                sk->rto = 120*HZ;
167
        if (sk->rto < HZ/5)     /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
168
                sk->rto = HZ/5;
169
        sk->backoff = 0;
170
}
171
 
172
#if defined(CONFIG_RST_COOKIES)
173
 
174
/*
175
 * This code needs to be a bit more clever.
176
 * Does 300 second timeouts now. Still just a circular buffer.
177
 * At most 32 validations stored. New validations are ignored
178
 * if all 32 validations are currently valid. To do otherwise
179
 * allows a situation in which clearances are forgotten before
180
 * they can be used (provided valid traffic is coming fast enough).
181
 * The buffer should really be as long as the number of valid
182
 * connections we want to accept in an 300 second period.
183
 * 32 is maybe to small. On the other hand, the validation check
184
 * algorithm has to walk the whole table, which is also stupid.
185
 * It would be better to have a combined hash/circular buffer.
186
 * The hash could be used with chaining for fast lookup.
187
 * Really this is probably an argument against using RST cookies
188
 * at all, since they take up space for the clearances.
189
 */
190
 
191
static struct {
192
        u32 saddr;
193
        unsigned long tstamp;
194
} clearances[32] = {
195
{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
196
{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
197
{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
198
{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}};
199
 
200
static next_clearance = 0;
201
/* Does the address saddr have an active security clearance? */
202
int tcp_clearance(__u32 saddr)
203
{
204
        int i;
205
        for (i = 0; i < 32; i++)
206
                if (clearances[i].saddr == saddr
207
                && clearances[i].tstamp > jiffies-HZ*300)
208
                        return 1;
209
        return 0;
210
}
211
 
212
void add_clearance(__u32 saddr)
213
{
214
        /*
215
         * If expired then we can add a new entry.
216
         */
217
        if (clearances[next_clearance].tstamp <= jiffies-HZ*300) {
218
                clearances[next_clearance].saddr = saddr;
219
                clearances[next_clearance].tstamp = jiffies;
220
                next_clearance = (next_clearance+1)%32;
221
        }
222
}
223
 
224
#endif
225
 
226
#ifdef CONFIG_SYN_COOKIES
227
/*
228
 *      MTU values we can represent in fall back mode.
229
 *      These values are partially borrowed from Jeff Weisberg's SunOS
230
 *      implementation of SYNCOOKIES. I have added an extra limiting
231
 *      value of 64 to deal with the case of very small MTU values.
232
 *      (e.g. long delay packet radio links, 1200 baud modems.)
233
 */
234
static __u32 cookie_mtu[8] = { 64, 256, 512, 536, 1024, 1440, 1460, 4312 };
235
unsigned int ui_c_send_cookies = 0;
236
#endif
237
 
238
extern void tcp_v4_hash(struct sock *sk);
239
extern void tcp_v4_unhash(struct sock *sk);
240
extern void tcp_v4_rehash(struct sock *sk);
241
 
242
/* Don't inline this cruft.  Here are some nice properties to
243
 * exploit here.  The BSD API does not allow a listening TCP
244
 * to specify the remote port nor the remote address for the
245
 * connection.  So always assume those are both wildcarded
246
 * during the search since they can never be otherwise.
247
 */
248
static struct sock *tcp_v4_lookup_longway(u32 daddr, unsigned short hnum,
249
                                          struct device *dev)
250
{
251
        struct sock *sk = tcp_listening_hash[tcp_lhashfn(hnum)];
252
        struct sock *result = NULL;
253
        int score, hiscore = 0;
254
 
255
        for(; sk; sk = sk->next) {
256
                if(sk->num == hnum) {
257
                        __u32 rcv_saddr = sk->rcv_saddr;
258
                        score = 1;
259
 
260
                        /* If this socket is bound to a particular IP address,
261
                         * does the dest IPaddr of the packet match it?
262
                         */
263
                        if(rcv_saddr) {
264
                                if(rcv_saddr != daddr)
265
                                        continue;
266
                                score++;
267
                        }
268
 
269
                        /* If this socket is bound to a particular interface,
270
                         * did the packet come in on it? */
271
                        if (sk->bound_device) {
272
                                if (dev != sk->bound_device)
273
                                        continue;
274
                                score++;
275
                        }
276
 
277
                        /* Check the score--max is 3. */
278
                        if (score == 3)
279
                                return sk; /* Best possible match. */
280
                        if (score > hiscore) {
281
                                hiscore = score;
282
                                result = sk;
283
                        }
284
                }
285
        }
286
        return result;
287
}
288
 
289
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
290
 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
291
 */
292
static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
293
                                           u32 saddr, u16 sport, u32 daddr,
294
                                           u16 dport, struct device *dev)
295
{
296
        unsigned short hnum = ntohs(dport);
297
        struct sock *sk;
298
 
299
        /* Optimize here for direct hit, only listening connections can
300
         * have wildcards anyways.  It is assumed that this code only
301
         * gets called from within NET_BH.
302
         */
303
        sk = tcp_established_hash[tcp_hashfn(daddr, hnum, saddr, sport)];
304
        for(; sk; sk = sk->next)
305
                if(sk->daddr            == saddr                && /* remote address */
306
                   sk->dummy_th.dest    == sport                && /* remote port    */
307
                   sk->num              == hnum                 && /* local port     */
308
                   sk->rcv_saddr        == daddr                && /* local address  */
309
                   ((sk->bound_device==NULL) || (sk->bound_device==dev))  )
310
                        goto hit; /* You sunk my battleship! */
311
        sk = tcp_v4_lookup_longway(daddr, hnum, dev);
312
hit:
313
        return sk;
314
}
315
 
316
__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
317
                                      struct device *dev)
318
{
319
        return __tcp_v4_lookup(0, saddr, sport, daddr, dport, dev);
320
}
321
 
322
#ifdef CONFIG_IP_TRANSPARENT_PROXY
323
/* I am not entirely sure this is fully equivalent to the old lookup code, but it does
324
 * look reasonable.  WFK
325
 */
326
struct sock *tcp_v4_proxy_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 rport,
327
                                 struct device *dev)
328
{
329
        unsigned short hnum = ntohs(dport);
330
        unsigned short hrnum = ntohs(rport);
331
        struct sock *sk;
332
 
333
        /* Optimize here for direct hit, only listening connections can
334
         * have wildcards anyways.  It is assumed that this code only
335
         * gets called from within NET_BH.
336
         */
337
        sk = tcp_established_hash[tcp_hashfn(daddr, hnum, saddr, sport)];
338
        for(; sk; sk = sk->next)
339
                if(sk->daddr            == saddr                && /* remote address */
340
                   sk->dummy_th.dest    == sport                && /* remote port    */
341
                   sk->num              == hnum                 && /* local port     */
342
                   sk->rcv_saddr        == daddr                && /* local address  */
343
                   ((sk->bound_device==NULL) || (sk->bound_device==dev))  )
344
                        goto hit; /* You sunk my battleship! */
345
        /* If we don't match on a bound socket, try to find one explicitly listening
346
         * on the remote address (a proxy bind).
347
         */
348
        sk = tcp_v4_lookup_longway(daddr, hnum, dev);
349
        /* If that didn't yield an exact match, look for a socket listening on the
350
         * redirect port.
351
         */
352
        if (!sk || sk->rcv_saddr != daddr) {
353
                sk = tcp_v4_lookup_longway(paddr, hrnum, dev);
354
        }
355
hit:
356
        return sk;
357
}
358
#endif
359
 
360
/*
361
 * React to a out-of-window TCP sequence number in an incoming packet
362
 */
363
 
364
static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, u32 end_seq,
365
              struct device *dev)
366
{
367
        if (th->rst)
368
                return;
369
 
370
        /*
371
         *      Send a reset if we get something not ours and we are
372
         *      unsynchronized. Note: We don't do anything to our end. We
373
         *      are just killing the bogus remote connection then we will
374
         *      connect again and it will work (with luck).
375
         */
376
 
377
        if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV)
378
        {
379
                tcp_send_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev,0,255);
380
                return;
381
        }
382
 
383
        /*
384
         *      This packet is old news. Usually this is just a resend
385
         *      from the far end, but sometimes it means the far end lost
386
         *      an ACK we sent, so we better send an ACK.
387
         */
388
        /*
389
         *      BEWARE! Unconditional answering by ack to out-of-window ack
390
         *      can result in infinite exchange of empty acks.
391
         *      This check cures bug, found by Michiel Boland, but
392
         *      not another possible cases.
393
         *      If we are in TCP_TIME_WAIT, we have already received
394
         *      FIN, so that our peer need not window update. If our
395
         *      ACK were lost, peer would retransmit his FIN anyway. --ANK
396
         */
397
        if (sk->state != TCP_TIME_WAIT || ntohl(th->seq) != end_seq)
398
                tcp_send_ack(sk);
399
}
400
 
401
/*
402
 *      This functions checks to see if the tcp header is actually acceptable.
403
 */
404
 
405
extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq)
406
{
407
        u32 end_window = sk->lastwin_seq + sk->window;
408
        return  /* if start is at end of window, end must be too (zero window) */
409
                (seq == end_window && seq == end_seq) ||
410
                /* if start is before end of window, check for interest */
411
                (before(seq, end_window) && !before(end_seq, sk->acked_seq));
412
}
413
 
414
/*
415
 *      When we get a reset we do this. This probably is a tcp_output routine
416
 *      really.
417
 */
418
 
419
static int tcp_reset(struct sock *sk, struct sk_buff *skb)
420
{
421
        sk->zapped = 1;
422
        /*
423
         *      We want the right error as BSD sees it (and indeed as we do).
424
         */
425
        switch (sk->state) {
426
        case TCP_TIME_WAIT:
427
                break;
428
        case TCP_SYN_SENT:
429
                sk->err = ECONNREFUSED;
430
                break;
431
        case TCP_CLOSE_WAIT:
432
                sk->err = EPIPE;
433
                break;
434
        default:
435
                sk->err = ECONNRESET;
436
        }
437
#ifdef CONFIG_TCP_RFC1337
438
        /*
439
         *      Time wait assassination protection [RFC1337]
440
         *
441
         *      This is a good idea, but causes more sockets to take time to close.
442
         *
443
         *      Ian Heavens has since shown this is an inadequate fix for the protocol
444
         *      bug in question.
445
         */
446
        if(sk->state!=TCP_TIME_WAIT)
447
        {
448
                tcp_set_state(sk,TCP_CLOSE);
449
                sk->shutdown = SHUTDOWN_MASK;
450
        }
451
#else   
452
        tcp_set_state(sk,TCP_CLOSE);
453
        sk->shutdown = SHUTDOWN_MASK;
454
#endif  
455
        if (!sk->dead)
456
                sk->state_change(sk);
457
        kfree_skb(skb, FREE_READ);
458
        return(0);
459
}
460
 
461
 
462
/*
463
 *      Look for tcp options. Parses everything but only knows about MSS.
464
 *      This routine is always called with the packet containing the SYN.
465
 *      However it may also be called with the ack to the SYN.  So you
466
 *      can't assume this is always the SYN.  It's always called after
467
 *      we have set up sk->mtu to our own MTU.
468
 *
469
 *      We need at minimum to add PAWS support here. Possibly large windows
470
 *      as Linux gets deployed on 100Mb/sec networks.
471
 */
472
 
473
static void tcp_options(struct sock *sk, struct tcphdr *th)
474
{
475
        unsigned char *ptr;
476
        int length=(th->doff*4)-sizeof(struct tcphdr);
477
        int mss_seen = 0;
478
 
479
        ptr = (unsigned char *)(th + 1);
480
 
481
        while(length>0)
482
        {
483
                int opcode=*ptr++;
484
                int opsize=*ptr++;
485
                switch(opcode)
486
                {
487
                        case TCPOPT_EOL:
488
                                goto ende;
489
                        case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
490
                                length--;
491
                                ptr--;          /* the opsize=*ptr++ above was a mistake */
492
                                continue;
493
 
494
                        default:
495
                                if(opsize<=2)   /* Avoid silly options looping forever */
496
                                        goto ende;
497
                                switch(opcode)
498
                                {
499
                                        case TCPOPT_MSS:
500
                                                if(opsize==4 && th->syn)
501
                                                {
502
                                                        sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
503
                                                        mss_seen = 1;
504
                                                }
505
                                                break;
506
                                                /* Add other options here as people feel the urge to implement stuff like large windows */
507
                                }
508
                                ptr+=opsize-2;
509
                                length-=opsize;
510
                }
511
        }
512
ende:   if (th->syn)
513
        {
514
                if (! mss_seen)
515
                      sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
516
        }
517
#ifdef CONFIG_INET_PCTCP
518
        sk->mss = min(sk->max_window >> 1, sk->mtu);
519
#else    
520
        sk->mss = min(sk->max_window, sk->mtu);
521
        sk->max_unacked = 2 * sk->mss;
522
#endif  
523
}
524
 
525
 
526
/*
527
 *      This routine handles a connection request.
528
 *      It should make sure we haven't already responded.
529
 *      Because of the way BSD works, we have to send a syn/ack now.
530
 *      This also means it will be harder to close a socket which is
531
 *      listening.
532
 */
533
 
534
static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
535
                 u32 daddr, u32 saddr, struct options *opt, struct device *dev, u32 seq)
536
{
537
        struct sock *newsk;
538
        struct tcphdr *th;
539
        struct rtable *rt;
540
#ifdef CONFIG_SYN_COOKIES
541
        int send_cookie = 0;
542
#endif
543
 
544
        th = skb->h.th;
545
 
546
        /* If the socket is dead, don't accept the connection. */
547
        if (!sk->dead)
548
        {
549
                /*
550
                 * This must wait for 3 way completion.
551
                 * sk->data_ready(sk,0);
552
                 */
553
        }
554
        else
555
        {
556
                if(sk->debug)
557
                        printk("Reset on %p: Connect on dead socket.\n",sk);
558
                tcp_send_reset(daddr, saddr, th, sk->prot, opt, dev, 0,255);
559
                tcp_statistics.TcpAttemptFails++;
560
                kfree_skb(skb, FREE_READ);
561
                return;
562
        }
563
 
564
        /*
565
         *      Make sure we can accept more.  This will prevent a
566
         *      flurry of syns from eating up all our memory.
567
         *
568
         *      BSD does some funnies here and allows 3/2 times the
569
         *      set backlog as a fudge factor. That's just too gross.
570
         *
571
         *      Well, now I'm making things even grosser for dealing
572
         *      with SYNACK flooding.
573
         */
574
 
575
        if (sk->ack_backlog >= sk->max_ack_backlog)
576
        {
577
#if defined(CONFIG_RST_COOKIES) || defined(CONFIG_SYN_COOKIES)
578
                static unsigned long warning_time = 0;
579
 
580
                /* We may be experiencing SYNACK flooding.
581
                 * We now must decide if we should accept this connection.
582
                 * If we have a security clearance for the incoming
583
                 * packet, i.e. it is from a location we where talking
584
                 * to succesfully recently, or that has responded to
585
                 * a security probe, then we go ahead and deal normally,
586
                 * accepting up to 2*max in the backlog.
587
                 * Otherwise, we send out either an RST security probe
588
                 * or a SYN cookie, or both. (depending on configuration).
589
                 * Note that we send out a cookie even if the backlog
590
                 * is full up to 2*max, since the backlog may clear
591
                 * by the time we get a response.
592
                 * WARNING: This code changes the semantics of the backlog
593
                 * a bit. I'm not entirely sure this is the right thing
594
                 * to do here.
595
                 */
596
                extern void tcp_send_synack_probe(unsigned long saddr,
597
                                                  unsigned long daddr, struct tcphdr *th,
598
                                                  struct proto *prot,
599
                                                  struct options *opt,
600
                                                  struct device *dev, int tos, int ttl);
601
 
602
#ifdef CONFIG_RST_COOKIES
603
                if (!tcp_clearance(saddr)) {
604
#endif
605
                        /* Only let this warning get printed once a minute. */
606
                        if (jiffies - warning_time > HZ*60) {
607
                                warning_time = jiffies;
608
                                printk(KERN_INFO "Warning: possible SYN flood from %d.%d.%d.%d on %d.%d.%d.%d:%d.  Sending cookies.\n",
609
                                        NIPQUAD(saddr), NIPQUAD(daddr), ntohs(th->dest));
610
                        }
611
#ifdef CONFIG_RST_COOKIES
612
                        tcp_send_synack_probe(daddr, saddr, th, &tcp_prot,
613
                                opt, dev, skb->ip_hdr->tos, 255);
614
#endif
615
#ifdef CONFIG_SYN_COOKIES
616
                        send_cookie = 1;
617
                        ui_c_send_cookies++;
618
#else
619
                        /* If we only have RST cookies we should
620
                         * not drop through to the rest of the response code.
621
                         */
622
                        kfree_skb(skb, FREE_READ);
623
                        return;
624
#endif
625
#ifdef CONFIG_RST_COOKIES
626
                } else if (sk->ack_backlog >= 2*sk->max_ack_backlog) {
627
                        tcp_statistics.TcpAttemptFails++;
628
                        kfree_skb(skb, FREE_READ);
629
                        return;
630
                }
631
#endif
632
#else
633
                tcp_statistics.TcpAttemptFails++;
634
                kfree_skb(skb, FREE_READ);
635
                return;
636
#endif
637
        }
638
 
639
        /*
640
         * We need to build a new sock struct.
641
         * It is sort of bad to have a socket without an inode attached
642
         * to it, but the wake_up's will just wake up the listening socket,
643
         * and if the listening socket is destroyed before this is taken
644
         * off of the queue, this will take care of it.
645
         */
646
 
647
        newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
648
        if (newsk == NULL)
649
        {
650
                /* just ignore the syn.  It will get retransmitted. */
651
                tcp_statistics.TcpAttemptFails++;
652
                kfree_skb(skb, FREE_READ);
653
                return;
654
        }
655
 
656
        memcpy(newsk, sk, sizeof(*newsk));
657
 
658
        /* Or else we die! -DaveM */
659
        newsk->sklist_next = NULL;
660
        /* and die again -- erics */
661
        newsk->pprev = NULL;
662
 
663
        newsk->opt = NULL;
664
        newsk->ip_route_cache  = NULL;
665
        if (opt && opt->optlen)
666
        {
667
                sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
668
                if (!sk->opt)
669
                {
670
                        kfree_s(newsk, sizeof(struct sock));
671
                        tcp_statistics.TcpAttemptFails++;
672
                        kfree_skb(skb, FREE_READ);
673
                        return;
674
                }
675
                if (ip_options_echo(sk->opt, opt, daddr, saddr, skb))
676
                {
677
                        kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
678
                        kfree_s(newsk, sizeof(struct sock));
679
                        tcp_statistics.TcpAttemptFails++;
680
                        kfree_skb(skb, FREE_READ);
681
                        return;
682
                }
683
        }
684
 
685
        skb->when = jiffies;    /* For timeout */
686
        skb_queue_head_init(&newsk->write_queue);
687
        skb_queue_head_init(&newsk->receive_queue);
688
        newsk->send_head = NULL;
689
        newsk->send_tail = NULL;
690
        newsk->send_next = NULL;
691
        skb_queue_head_init(&newsk->back_log);
692
        newsk->rtt = 0;
693
        newsk->rto = TCP_TIMEOUT_INIT;
694
        newsk->mdev = TCP_TIMEOUT_INIT;
695
        newsk->max_window = 32; /* It cannot be left at zero. -DaveM */
696
        /*
697
         * See draft-stevens-tcpca-spec-01 for discussion of the
698
         * initialization of these values.
699
         */
700
        newsk->cong_window = 1;
701
        newsk->cong_count = 0;
702
        newsk->ssthresh = 0x7fffffff;
703
 
704
        newsk->lrcvtime = 0;
705
        newsk->idletime = 0;
706
        newsk->high_seq = 0;
707
        newsk->backoff = 0;
708
        newsk->blog = 0;
709
        newsk->intr = 0;
710
        newsk->proc = 0;
711
        newsk->done = 0;
712
        newsk->partial = NULL;
713
        newsk->pair = NULL;
714
        newsk->wmem_alloc = 0;
715
        newsk->rmem_alloc = 0;
716
        newsk->localroute = sk->localroute;
717
 
718
        newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
719
 
720
        newsk->err = 0;
721
        newsk->shutdown = 0;
722
        newsk->ack_backlog = 0;
723
        newsk->acked_seq = skb->seq+1;
724
        newsk->lastwin_seq = skb->seq+1;
725
        newsk->delay_acks = 1;
726
        newsk->copied_seq = skb->seq+1;
727
        newsk->fin_seq = skb->seq;
728
        newsk->syn_seq = skb->seq;
729
        newsk->state = TCP_SYN_RECV;
730
        newsk->timeout = 0;
731
        newsk->ip_xmit_timeout = 0;
732
        newsk->urg_data = 0;
733
        newsk->retransmits = 0;
734
        newsk->linger=0;
735
        newsk->destroy = 0;
736
        init_timer(&newsk->timer);
737
        newsk->timer.data = (unsigned long)newsk;
738
        newsk->timer.function = &net_timer;
739
        init_timer(&newsk->delack_timer);
740
        newsk->delack_timer.data = (unsigned long)newsk;
741
        newsk->delack_timer.function = tcp_delack_timer;
742
        init_timer(&newsk->retransmit_timer);
743
        newsk->retransmit_timer.data = (unsigned long)newsk;
744
        newsk->retransmit_timer.function = tcp_retransmit_timer;
745
        newsk->dummy_th.source = skb->h.th->dest;
746
        newsk->dummy_th.dest = skb->h.th->source;
747
        newsk->users=0;
748
 
749
#ifdef CONFIG_IP_TRANSPARENT_PROXY
750
        /*
751
         *      Deal with possibly redirected traffic by setting num to
752
         *      the intended destination port of the received packet.
753
         */
754
        newsk->num = ntohs(skb->h.th->dest);
755
 
756
#endif
757
        /*
758
         *      Swap these two, they are from our point of view.
759
         */
760
 
761
        newsk->daddr = saddr;
762
        newsk->saddr = daddr;
763
        newsk->rcv_saddr = daddr;
764
#ifdef CONFIG_SYN_COOKIES
765
        /* Don't actually stuff the socket into the protocol lists
766
         * if we are going to just destroy it anyway. We don't want any
767
         * funnies happening if the next packet arrives before we get
768
         * a chance to clean this one up.
769
         */
770
        if (!send_cookie)
771
#endif
772
        {
773
                tcp_v4_hash(newsk);
774
                add_to_prot_sklist(newsk);
775
        }
776
 
777
        newsk->acked_seq = skb->seq + 1;
778
        newsk->copied_seq = skb->seq + 1;
779
        newsk->socket = NULL;
780
        newsk->listening = sk;
781
 
782
        /*
783
         *      Grab the ttl and tos values and use them
784
         */
785
 
786
        newsk->ip_ttl=sk->ip_ttl;
787
        newsk->ip_tos=TOS_VALID_MASK(skb->ip_hdr->tos);
788
 
789
        /*
790
         *      Use 512 or whatever user asked for
791
         */
792
 
793
        /*
794
         *      Note use of sk->user_mss, since user has no direct access to newsk
795
         */
796
 
797
        rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0,
798
                         sk->bound_device);
799
        newsk->ip_route_cache = rt;
800
 
801
        if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
802
                newsk->window_clamp = rt->rt_window;
803
        else
804
                newsk->window_clamp = 0;
805
 
806
        if (sk->user_mss)
807
                newsk->mtu = sk->user_mss;
808
        else if (rt)
809
                newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
810
        else
811
                newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
812
 
813
        /*
814
         *      But not bigger than device MTU
815
         */
816
 
817
        newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
818
 
819
        /* Must check it here, just to be absolutely safe.  If we end up
820
         * with a newsk->{max_window,mtu} of zero, we can thus end up with
821
         * a newsk->mss of zero, which causes us to bomb out in
822
         * tcp_do_sendmsg. -DaveM
823
         */
824
        if(newsk->mtu < 32)
825
                newsk->mtu = 32;
826
 
827
#ifdef CONFIG_SKIP
828
 
829
        /*
830
         *      SKIP devices set their MTU to 65535. This is so they can take packets
831
         *      unfragmented to security process then fragment. They could lie to the
832
         *      TCP layer about a suitable MTU, but it's easier to let skip sort it out
833
         *      simply because the final package we want unfragmented is going to be
834
         *
835
         *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
836
         */
837
 
838
        if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
839
                sk->mtu=skip_pick_mtu(sk->mtu,dev);
840
#endif
841
        /*
842
         *      This will min with what arrived in the packet
843
         */
844
 
845
        tcp_options(newsk,skb->h.th);
846
 
847
#ifdef CONFIG_SYN_COOKIES
848
        if (send_cookie) {
849
                int mtu_index = 0;
850
                /* Pick the largest MTU smaller than sk->mtu that we
851
                 * can represent in a cookies bottom 3 bits.
852
                 */
853
                while (newsk->mtu > cookie_mtu[mtu_index+1] && mtu_index < 7)
854
                        mtu_index++;
855
                newsk->mtu = cookie_mtu[mtu_index];
856
                /*
857
                 * Choose a cookie.
858
                 */
859
                seq = secure_tcp_syn_cookie(daddr,saddr,
860
                        ntohs(th->source),ntohs(th->dest),ntohl(th->seq),jiffies/(60*HZ));
861
                seq |= mtu_index;
862
        }
863
#endif
864
 
865
        /* Set up the right sequence numbers */
866
        newsk->write_seq = seq;
867
        newsk->window_seq = newsk->write_seq;
868
        newsk->rcv_ack_seq = newsk->write_seq;
869
 
870
#ifdef CONFIG_SYN_COOKIES
871
        tcp_send_synack(newsk, sk, skb, send_cookie);
872
#else
873
        tcp_send_synack(newsk, sk, skb, 0);
874
#endif
875
}
876
 
877
 
878
#ifdef CONFIG_SYN_COOKIES
879
/*
880
 *      This routine handles a faked connection request as a result
881
 *      of a valid SYN cookie being seen. This sets up a socket in the
882
 *      SYN_SENT state.
883
 */
884
 
885
static int tcp_conn_request_fake(struct sock *sk, struct sk_buff *skb,
886
                 u32 daddr, u32 saddr, struct options *opt, struct device *dev, u32 seq, u32 mtu)
887
{
888
        struct sock *newsk;
889
        struct sk_buff *newskb;
890
        struct rtable *rt;
891
 
892
        /* If the socket is dead, don't accept the connection. */
893
        if (!sk->dead)
894
        {
895
                /*sk->data_ready(sk,0); */
896
        }
897
        else
898
        {
899
                if(sk->debug)
900
                        printk("Reset on %p: Connect on dead socket.\n",sk);
901
                tcp_statistics.TcpAttemptFails++;
902
                return 0;
903
        }
904
 
905
        /*
906
         * We need to build a new sock struct.
907
         * It is sort of bad to have a socket without an inode attached
908
         * to it, but the wake_up's will just wake up the listening socket,
909
         * and if the listening socket is destroyed before this is taken
910
         * off of the queue, this will take care of it.
911
         */
912
 
913
        newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
914
        if (newsk == NULL)
915
        {
916
                /* Bad juju. If we ignore things now the remote side
917
                 * will be frozen. Really we should retrans the cookie,
918
                 * but that's a no go also, since we don't have enough
919
                 * memory to receive it either. So, we're stuck with
920
                 * this bad case, and a few others further down.
921
                 * We just have to hope it is a low probability event.
922
                 * Also, to avoid a loop we must not go down into
923
                 * the recursive call to tcp_rcv in the caller to this
924
                 * routine, so we should let them know we failed.
925
                 */
926
                tcp_statistics.TcpAttemptFails++;
927
                return 0;
928
        }
929
 
930
        memcpy(newsk, sk, sizeof(*newsk));
931
 
932
        /* Or else we die! -DaveM */
933
        newsk->sklist_next = NULL;
934
 
935
        newsk->opt = NULL;
936
        newsk->ip_route_cache  = NULL;
937
        if (opt && opt->optlen)
938
        {
939
                sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
940
                if (!sk->opt)
941
                {
942
                        /* More bad juju. */
943
                        kfree_s(newsk, sizeof(struct sock));
944
                        tcp_statistics.TcpAttemptFails++;
945
                        return 0;
946
                }
947
                if (ip_options_echo(sk->opt, opt, daddr, saddr, skb))
948
                {
949
                        /* More bad juju. */
950
                        kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
951
                        kfree_s(newsk, sizeof(struct sock));
952
                        tcp_statistics.TcpAttemptFails++;
953
                        return 0;
954
                }
955
        }
956
 
957
        skb_queue_head_init(&newsk->write_queue);
958
        skb_queue_head_init(&newsk->receive_queue);
959
        newsk->send_head = NULL;
960
        newsk->send_tail = NULL;
961
        newsk->send_next = NULL;
962
        skb_queue_head_init(&newsk->back_log);
963
        newsk->rtt = 0;
964
        newsk->rto = TCP_TIMEOUT_INIT;
965
        newsk->mdev = TCP_TIMEOUT_INIT;
966
        newsk->max_window = 32; /* It cannot be left at zero. -DaveM */
967
        /*
968
         * See draft-stevens-tcpca-spec-01 for discussion of the
969
         * initialization of these values.
970
         */
971
        newsk->cong_window = 1;
972
        newsk->cong_count = 0;
973
        newsk->ssthresh = 0x7fffffff;
974
 
975
        newsk->lrcvtime = 0;
976
        newsk->idletime = 0;
977
        newsk->high_seq = 0;
978
        newsk->backoff = 0;
979
        newsk->blog = 0;
980
        newsk->intr = 0;
981
        newsk->proc = 0;
982
        newsk->done = 0;
983
        newsk->partial = NULL;
984
        newsk->pair = NULL;
985
        newsk->wmem_alloc = 0;
986
        newsk->rmem_alloc = 0;
987
        newsk->localroute = sk->localroute;
988
 
989
        newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
990
 
991
        newsk->err = 0;
992
        newsk->shutdown = 0;
993
        newsk->ack_backlog = 0;
994
        newsk->acked_seq = skb->seq;
995
        newsk->lastwin_seq = skb->seq;
996
        newsk->delay_acks = 1;
997
        newsk->copied_seq = skb->seq;
998
        newsk->fin_seq = skb->seq-1;
999
        newsk->syn_seq = skb->seq-1;
1000
        newsk->state = TCP_SYN_RECV;
1001
        newsk->timeout = 0;
1002
        newsk->ip_xmit_timeout = 0;
1003
        newsk->urg_data = 0;
1004
        newsk->retransmits = 0;
1005
        newsk->linger=0;
1006
        newsk->destroy = 0;
1007
        init_timer(&newsk->timer);
1008
        newsk->timer.data = (unsigned long)newsk;
1009
        newsk->timer.function = &net_timer;
1010
        init_timer(&newsk->delack_timer);
1011
        newsk->delack_timer.data = (unsigned long)newsk;
1012
        newsk->delack_timer.function = tcp_delack_timer;
1013
        init_timer(&newsk->retransmit_timer);
1014
        newsk->retransmit_timer.data = (unsigned long)newsk;
1015
        newsk->retransmit_timer.function = tcp_retransmit_timer;
1016
        newsk->dummy_th.source = skb->h.th->dest;
1017
        newsk->dummy_th.dest = skb->h.th->source;
1018
        newsk->users=0;
1019
 
1020
#ifdef CONFIG_IP_TRANSPARENT_PROXY
1021
        /*
1022
         *      Deal with possibly redirected traffic by setting num to
1023
         *      the intended destination port of the received packet.
1024
         */
1025
        newsk->num = ntohs(skb->h.th->dest);
1026
 
1027
#endif
1028
        /*
1029
         *      Swap these two, they are from our point of view.
1030
         */
1031
 
1032
        newsk->daddr = saddr;
1033
        newsk->saddr = daddr;
1034
        newsk->rcv_saddr = daddr;
1035
        tcp_v4_hash(newsk);
1036
        add_to_prot_sklist(newsk);
1037
 
1038
        newsk->acked_seq = skb->seq;
1039
        newsk->copied_seq = skb->seq;
1040
        newsk->socket = NULL;
1041
        newsk->listening = sk;
1042
 
1043
        /*
1044
         *      Grab the ttl and tos values and use them
1045
         */
1046
 
1047
        newsk->ip_ttl=sk->ip_ttl;
1048
        newsk->ip_tos=TOS_VALID_MASK(skb->ip_hdr->tos);
1049
 
1050
        rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0,
1051
                         sk->bound_device);
1052
        newsk->ip_route_cache = rt;
1053
 
1054
        if (rt!=NULL && (rt->rt_flags&RTF_WINDOW))
1055
                newsk->window_clamp = rt->rt_window;
1056
        else
1057
                newsk->window_clamp = 0;
1058
 
1059
        newsk->mtu = mtu;
1060
 
1061
        /* Set up the right sequence numbers.
1062
         * Note that we have to make sure write_seq is correct for having
1063
         * sent off the handshake!
1064
         */
1065
        newsk->write_seq = seq+1;
1066
        newsk->sent_seq = seq+1;
1067
        newsk->window_seq = seq;
1068
        newsk->rcv_ack_seq = seq;
1069
        newsk->max_unacked = 2 * newsk->mss;
1070
 
1071
        tcp_select_window(newsk);
1072
 
1073
        /* We need to get something into the receive queue to enable an
1074
         * accept. Possibly we should be faking up a SYN packet, but
1075
         * as far as I can tell the contents of this skb don't matter,
1076
         * so long as it points to our new socket.
1077
         */
1078
        newskb = skb_clone(skb,GFP_ATOMIC);
1079
        newskb->sk = newsk;
1080
        atomic_add(skb->truesize, &newsk->rmem_alloc);
1081
        sk->ack_backlog++;
1082
        skb_queue_tail(&sk->receive_queue,newskb);
1083
        return 1;
1084
}
1085
#endif
1086
 
1087
/*
1088
 * Handle a TCP window that shrunk on us. It shouldn't happen,
1089
 * but..
1090
 *
1091
 * We may need to move packets from the send queue
1092
 * to the write queue, if the window has been shrunk on us.
1093
 * The RFC says you are not allowed to shrink your window
1094
 * like this, but if the other end does, you must be able
1095
 * to deal with it.
1096
 */
1097
void tcp_window_shrunk(struct sock * sk, u32 window_seq)
1098
{
1099
        struct sk_buff *skb;
1100
        struct sk_buff *skb2;
1101
        struct sk_buff *wskb = NULL;
1102
 
1103
        skb2 = sk->send_head;
1104
        sk->send_head = NULL;
1105
        sk->send_tail = NULL;
1106
        sk->send_next = NULL;
1107
 
1108
        /*
1109
         *      This is an artifact of a flawed concept. We want one
1110
         *      queue and a smarter send routine when we send all.
1111
         */
1112
        cli();
1113
        while (skb2 != NULL)
1114
        {
1115
                skb = skb2;
1116
                skb2 = skb->link3;
1117
                skb->link3 = NULL;
1118
                if (after(skb->end_seq, window_seq))
1119
                {
1120
                        if (sk->packets_out > 0)
1121
                                sk->packets_out--;
1122
                        /* We may need to remove this from the dev send list. */
1123
                        if (skb->next != NULL)
1124
                        {
1125
                                skb_unlink(skb);
1126
                        }
1127
                        /* Now add it to the write_queue. */
1128
                        if (wskb == NULL)
1129
                                skb_queue_head(&sk->write_queue,skb);
1130
                        else
1131
                                skb_append(wskb,skb);
1132
                        wskb = skb;
1133
                }
1134
                else
1135
                {
1136
                        if (sk->send_head == NULL)
1137
                        {
1138
                                sk->send_head = skb;
1139
                                sk->send_tail = skb;
1140
                                sk->send_next = skb;
1141
                        }
1142
                        else
1143
                        {
1144
                                sk->send_tail->link3 = skb;
1145
                                sk->send_tail = skb;
1146
                        }
1147
                        skb->link3 = NULL;
1148
                }
1149
        }
1150
        sti();
1151
}
1152
 
1153
 
1154
/*
1155
 *      This routine deals with incoming acks, but not outgoing ones.
1156
 *
1157
 *      This routine is totally _WRONG_. The list structuring is wrong,
1158
 *      the algorithm is wrong, the code is wrong.
1159
 */
1160
 
1161
static int tcp_ack(struct sock *sk, struct tcphdr *th, u32 ack, int len)
1162
{
1163
        int flag = 0;
1164
        u32 window_seq;
1165
 
1166
        /*
1167
         * 1 - there was data in packet as well as ack or new data is sent or
1168
         *     in shutdown state
1169
         * 2 - data from retransmit queue was acked and removed
1170
         * 4 - window shrunk or data from retransmit queue was acked and removed
1171
         */
1172
 
1173
        if(sk->zapped)
1174
                return(1);      /* Dead, can't ack any more so why bother */
1175
 
1176
        /*
1177
         *      We have dropped back to keepalive timeouts. Thus we have
1178
         *      no retransmits pending.
1179
         */
1180
 
1181
        if (sk->ip_xmit_timeout == TIME_KEEPOPEN)
1182
                sk->retransmits = 0;
1183
 
1184
        /*
1185
         *      If the ack is newer than sent or older than previous acks
1186
         *      then we can probably ignore it.
1187
         */
1188
 
1189
        if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq))
1190
                goto uninteresting_ack;
1191
 
1192
        /*
1193
         *      Have we discovered a larger window
1194
         */
1195
        window_seq = ntohs(th->window);
1196
        if (window_seq > sk->max_window)
1197
        {
1198
                sk->max_window = window_seq;
1199
#ifdef CONFIG_INET_PCTCP
1200
                /* Hack because we don't send partial packets to non SWS
1201
                   handling hosts */
1202
                sk->mss = min(window_seq>>1, sk->mtu);
1203
#else
1204
                sk->mss = min(window_seq, sk->mtu);
1205
#endif  
1206
        }
1207
        window_seq += ack;
1208
 
1209
        /*
1210
         *      See if our window has been shrunk.
1211
         */
1212
        if (after(sk->window_seq, window_seq))
1213
                tcp_window_shrunk(sk, window_seq);
1214
 
1215
        /*
1216
         *      Pipe has emptied
1217
         */
1218
        if (sk->send_tail == NULL || sk->send_head == NULL)
1219
        {
1220
                sk->send_head = NULL;
1221
                sk->send_tail = NULL;
1222
                sk->send_next = NULL;
1223
                sk->packets_out= 0;
1224
        }
1225
 
1226
        /*
1227
         *      We don't want too many packets out there.
1228
         */
1229
 
1230
        if (sk->ip_xmit_timeout == TIME_WRITE &&
1231
                sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq))
1232
        {
1233
 
1234
                /*
1235
                 * This is Jacobson's slow start and congestion avoidance.
1236
                 * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
1237
                 * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a
1238
                 * counter and increment it once every cwnd times.  It's possible
1239
                 * that this should be done only if sk->retransmits == 0.  I'm
1240
                 * interpreting "new data is acked" as including data that has
1241
                 * been retransmitted but is just now being acked.
1242
                 */
1243
                if (sk->cong_window <= sk->ssthresh)
1244
                        /*
1245
                         *      In "safe" area, increase
1246
                         */
1247
                        sk->cong_window++;
1248
                else
1249
                {
1250
                        /*
1251
                         *      In dangerous area, increase slowly.  In theory this is
1252
                         *      sk->cong_window += 1 / sk->cong_window
1253
                         */
1254
                        if (sk->cong_count >= sk->cong_window)
1255
                        {
1256
                                sk->cong_window++;
1257
                                sk->cong_count = 0;
1258
                        }
1259
                        else
1260
                                sk->cong_count++;
1261
                }
1262
        }
1263
 
1264
        /*
1265
         *      Remember the highest ack received and update the
1266
         *      right hand window edge of the host.
1267
         *      We do a bit of work here to track number of times we've
1268
         *      seen this ack without a change in the right edge of the
1269
         *      window and no data in the packet.
1270
         *      This will allow us to do fast retransmits.
1271
         */
1272
 
1273
        /* We are looking for duplicate ACKs here.
1274
         * An ACK is a duplicate if:
1275
         * (1) it has the same sequence number as the largest number we've seen,
1276
         * (2) it has the same window as the last ACK,
1277
         * (3) we have outstanding data that has not been ACKed
1278
         * (4) The packet was not carrying any data.
1279
         * (5) [From Floyd's paper on fast retransmit wars]
1280
         *     The packet acked data after high_seq;
1281
         * I've tried to order these in occurrence of most likely to fail
1282
         * to least likely to fail.
1283
         * [These are an extension of the rules BSD stacks use to
1284
         *  determine if an ACK is a duplicate.]
1285
         */
1286
 
1287
        if (sk->rcv_ack_seq == ack
1288
                && sk->window_seq == window_seq
1289
                && len == th->doff*4
1290
                && before(ack, sk->sent_seq)
1291
                && after(ack, sk->high_seq))
1292
        {
1293
                /* Prevent counting of duplicate ACKs if the congestion
1294
                 * window is smaller than 3. Note that since we reduce
1295
                 * the congestion window when we do a fast retransmit,
1296
                 * we must be careful to keep counting if we were already
1297
                 * counting. The idea behind this is to avoid doing
1298
                 * fast retransmits if the congestion window is so small
1299
                 * that we cannot get 3 ACKs due to the loss of a packet
1300
                 * unless we are getting ACKs for retransmitted packets.
1301
                 */
1302
                if (sk->cong_window >= 3 || sk->rcv_ack_cnt > MAX_DUP_ACKS+1)
1303
                        sk->rcv_ack_cnt++;
1304
                /* See draft-stevens-tcpca-spec-01 for explanation
1305
                 * of what we are doing here.
1306
                 */
1307
                if (sk->rcv_ack_cnt == MAX_DUP_ACKS+1) {
1308
                        int tmp;
1309
 
1310
                        /* We need to be a bit careful to preserve the
1311
                         * count of packets that are out in the system here.
1312
                         */
1313
                        sk->ssthresh = max(
1314
                                min(sk->cong_window,
1315
                                (sk->window_seq-sk->rcv_ack_seq)/max(sk->mss,1))
1316
                                 >> 1, 2);
1317
                        sk->cong_window = sk->ssthresh+MAX_DUP_ACKS+1;
1318
                        sk->cong_count = 0;
1319
                        tmp = sk->packets_out;
1320
                        tcp_do_retransmit(sk,0);
1321
                        sk->packets_out = tmp;
1322
                } else if (sk->rcv_ack_cnt > MAX_DUP_ACKS+1) {
1323
                        sk->cong_window++;
1324
                        /*
1325
                        * At this point we are suppose to transmit a NEW
1326
                        * packet (not retransmit the missing packet,
1327
                        * this would only get us into a retransmit war.)
1328
                        * I think that having just adjusted cong_window
1329
                        * we will transmit the new packet below.
1330
                        */
1331
                }
1332
        }
1333
        else
1334
        {
1335
                if (sk->rcv_ack_cnt > MAX_DUP_ACKS) {
1336
                        /* Don't allow congestion window to drop to zero. */
1337
                        sk->cong_window = max(sk->ssthresh, 1);
1338
                        sk->cong_count = 0;
1339
                }
1340
                sk->window_seq = window_seq;
1341
                sk->rcv_ack_seq = ack;
1342
                sk->rcv_ack_cnt = 1;
1343
        }
1344
 
1345
        /*
1346
         *      We passed data and got it acked, remove any soft error
1347
         *      log. Something worked...
1348
         */
1349
 
1350
        sk->err_soft = 0;
1351
 
1352
        /*
1353
         *      If this ack opens up a zero window, clear backoff.  It was
1354
         *      being used to time the probes, and is probably far higher than
1355
         *      it needs to be for normal retransmission.
1356
         */
1357
 
1358
        if (sk->ip_xmit_timeout == TIME_PROBE0)
1359
        {
1360
                sk->retransmits = 0;     /* Our probe was answered */
1361
 
1362
                /*
1363
                 *      Was it a usable window open ?
1364
                 */
1365
 
1366
                if (!skb_queue_empty(&sk->write_queue) &&   /* should always be true */
1367
                    ! before (sk->window_seq, sk->write_queue.next->end_seq))
1368
                {
1369
                        sk->backoff = 0;
1370
 
1371
                        /*
1372
                         *      Recompute rto from rtt.  this eliminates any backoff.
1373
                         */
1374
 
1375
                        /*
1376
                         * Appendix C of Van Jacobson's final version of
1377
                         * the SIGCOMM 88 paper states that although
1378
                         * the original paper suggested that
1379
                         *  RTO = R*2V
1380
                         * was the correct calculation experience showed
1381
                         * better results using
1382
                         *  RTO = R*4V
1383
                         * In particular this gives better performance over
1384
                         * slow links, and should not effect fast links.
1385
                         *
1386
                         * Note: Jacobson's algorithm is fine on BSD which
1387
                         * has a 1/2 second granularity clock, but with our
1388
                         * 1/100 second granularity clock we become too
1389
                         * sensitive to minor changes in the round trip time.
1390
                         * We add in two compensating factors.
1391
                         * First we multiply by 5/4. For large congestion
1392
                         * windows this allows us to tolerate burst traffic
1393
                         * delaying up to 1/4 of our packets.
1394
                         * We also add in a rtt / cong_window term.
1395
                         * For small congestion windows this allows
1396
                         * a single packet delay, but has negligible effect
1397
                         * on the compensation for large windows.
1398
                         */
1399
                        sk->rto = (sk->rtt >> 3) + sk->mdev;
1400
                        sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1));
1401
                        if (sk->rto > 120*HZ)
1402
                                sk->rto = 120*HZ;
1403
                        if (sk->rto < HZ/5)     /* Was 1*HZ, then 1 - turns out we must allow about
1404
                                                   .2 of a second because of BSD delayed acks - on a 100Mb/sec link
1405
                                                   .2 of a second is going to need huge windows (SIGH) */
1406
                        sk->rto = HZ/5;
1407
                }
1408
        }
1409
 
1410
        /*
1411
         *      See if we can take anything off of the retransmit queue.
1412
         */
1413
 
1414
        for (;;) {
1415
                int was_locked;
1416
                struct sk_buff * skb = sk->send_head;
1417
                if (!skb)
1418
                        break;
1419
 
1420
                /* Check for a bug. */
1421
                if (skb->link3 && after(skb->end_seq, skb->link3->end_seq))
1422
                        printk("INET: tcp.c: *** bug send_list out of order.\n");
1423
 
1424
                /*
1425
                 *      If our packet is before the ack sequence we can
1426
                 *      discard it as it's confirmed to have arrived the other end.
1427
                 */
1428
 
1429
                if (after(skb->end_seq, ack))
1430
                        break;
1431
 
1432
                if (sk->retransmits)
1433
                {
1434
                        /*
1435
                         *      We were retransmitting.  don't count this in RTT est
1436
                         */
1437
                        flag |= 2;
1438
                }
1439
 
1440
                if ((sk->send_head = skb->link3) == NULL)
1441
                {
1442
                        sk->send_tail = NULL;
1443
                        sk->send_next = NULL;
1444
                        sk->retransmits = 0;
1445
                }
1446
 
1447
                /*
1448
                 * advance the send_next pointer if needed.
1449
                 */
1450
                if (sk->send_next == skb)
1451
                        sk->send_next = sk->send_head;
1452
 
1453
                /*
1454
                 * Note that we only reset backoff and rto in the
1455
                 * rtt recomputation code.  And that doesn't happen
1456
                 * if there were retransmissions in effect.  So the
1457
                 * first new packet after the retransmissions is
1458
                 * sent with the backoff still in effect.  Not until
1459
                 * we get an ack from a non-retransmitted packet do
1460
                 * we reset the backoff and rto.  This allows us to deal
1461
                 * with a situation where the network delay has increased
1462
                 * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
1463
                 */
1464
 
1465
                /*
1466
                 *      We have one less packet out there.
1467
                 */
1468
 
1469
                if (sk->packets_out > 0)
1470
                        sk->packets_out --;
1471
 
1472
                /* This is really only supposed to be called when we
1473
                 * are actually ACKing new data, which should exclude
1474
                 * the ACK handshake on an initial SYN packet as well.
1475
                 * Rather than introducing a new test here for this
1476
                 * special case, we just reset the initial values for
1477
                 * rtt immediately after we move to the established state.
1478
                 */
1479
                if (!(flag&2))  /* Not retransmitting */
1480
                        tcp_rtt_estimator(sk,skb);
1481
                IS_SKB(skb);
1482
 
1483
                /*
1484
                 *      We may need to remove this from the dev send list.
1485
                 */
1486
                cli();
1487
                was_locked = skb_device_locked(skb);
1488
 
1489
                if (was_locked) {
1490
                        /* In this case, we are relying on the fact that kfree_skb
1491
                         * will just set the free flag to be 3, and increment
1492
                         * a counter. It will not actually free anything, and
1493
                         * will not take much time
1494
                         */
1495
                        kfree_skb(skb, FREE_WRITE);
1496
                } else {
1497
                        skb_unlink(skb);
1498
                }
1499
                sti();
1500
 
1501
                if (!was_locked)
1502
                    kfree_skb(skb, FREE_WRITE); /* write. */
1503
                if (!sk->dead)
1504
                        sk->write_space(sk);
1505
        }
1506
 
1507
        /*
1508
         * Maybe we can take some stuff off of the write queue,
1509
         * and put it onto the xmit queue.
1510
         * There is bizarre case being tested here, to check if
1511
         * the data at the head of the queue ends before the start of
1512
         * the sequence we already ACKed. This is not an error,
1513
         * it can occur when we send a packet directly off of the write_queue
1514
         * in a zero window probe.
1515
         */
1516
 
1517
        if (!skb_queue_empty(&sk->write_queue) &&
1518
                !before(sk->window_seq, sk->write_queue.next->end_seq) &&
1519
                (sk->retransmits == 0 ||
1520
                 sk->ip_xmit_timeout != TIME_WRITE ||
1521
                 !after(sk->write_queue.next->end_seq, sk->rcv_ack_seq)) &&
1522
                sk->packets_out < sk->cong_window)
1523
        {
1524
                /*
1525
                 *      Add more data to the send queue.
1526
                 */
1527
                tcp_write_xmit(sk);
1528
        }
1529
 
1530
        /*
1531
         * Reset timers to reflect the new state.
1532
         *
1533
         * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
1534
         * from TCP_CLOSE we don't do anything
1535
         *
1536
         * from anything else, if there is queued data (or fin) pending,
1537
         * we use a TIME_WRITE timeout, if there is data to write but
1538
         * no room in the window we use TIME_PROBE0, else if keepalive
1539
         * we reset to a KEEPALIVE timeout, else we delete the timer.
1540
         *
1541
         * We do not set flag for nominal write data, otherwise we may
1542
         * force a state where we start to write itsy bitsy tidbits
1543
         * of data.
1544
         */
1545
 
1546
        switch(sk->state) {
1547
        case TCP_TIME_WAIT:
1548
                /*
1549
                 * keep us in TIME_WAIT until we stop getting packets,
1550
                 * reset the timeout.
1551
                 */
1552
                tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
1553
                break;
1554
        case TCP_CLOSE:
1555
                /*
1556
                 * don't touch the timer.
1557
                 */
1558
                break;
1559
        default:
1560
                /*
1561
                 *      Must check send_head and write_queue
1562
                 *      to determine which timeout to use.
1563
                 */
1564
                if (sk->send_head) {
1565
                        tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
1566
                } else if (!skb_queue_empty(&sk->write_queue)
1567
                        && sk->ack_backlog == 0)
1568
                {
1569
                        /*
1570
                         * if the write queue is not empty when we get here
1571
                         * then we failed to move any data to the retransmit
1572
                         * queue above. (If we had send_head would be non-NULL).
1573
                         * Furthermore, since the send_head is NULL here
1574
                         * we must not be in retransmit mode at this point.
1575
                         * This implies we have no packets in flight,
1576
                         * hence sk->packets_out < sk->cong_window.
1577
                         * Examining the conditions for the test to move
1578
                         * data to the retransmission queue we find that
1579
                         * we must therefore have a zero window.
1580
                         * Hence, if the ack_backlog is 0 we should initiate
1581
                         * a zero probe.
1582
                         * We don't do a zero probe if we have a delayed
1583
                         * ACK in hand since the other side may have a
1584
                         * window opening, but they are waiting to hear
1585
                         * from us before they tell us about it.
1586
                         * (They are applying Nagle's rule).
1587
                         * So, we don't set up the zero window probe
1588
                         * just yet. We do have to clear the timer
1589
                         * though in this case...
1590
                         */
1591
                        tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
1592
                } else if (sk->keepopen) {
1593
                        tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
1594
                } else {
1595
                        del_timer(&sk->retransmit_timer);
1596
                        sk->ip_xmit_timeout = 0;
1597
                }
1598
                break;
1599
        }
1600
 
1601
        /*
1602
         * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
1603
         * we are now waiting for an acknowledge to our FIN.  The other end is
1604
         * already in TIME_WAIT.
1605
         *
1606
         * Move to TCP_CLOSE on success.
1607
         */
1608
 
1609
        if (sk->state == TCP_LAST_ACK)
1610
        {
1611
                if (!sk->dead)
1612
                        sk->state_change(sk);
1613
                if(sk->debug)
1614
                        printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n",
1615
                                sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
1616
                if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/)
1617
                {
1618
                        sk->shutdown = SHUTDOWN_MASK;
1619
                        tcp_set_state(sk,TCP_CLOSE);
1620
                        return 1;
1621
                }
1622
        }
1623
 
1624
        /*
1625
         *      Incoming ACK to a FIN we sent in the case of our initiating the close.
1626
         *
1627
         *      Move to FIN_WAIT2 to await a FIN from the other end. Set
1628
         *      SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
1629
         */
1630
 
1631
        if (sk->state == TCP_FIN_WAIT1)
1632
        {
1633
 
1634
                if (!sk->dead)
1635
                        sk->state_change(sk);
1636
                if (sk->rcv_ack_seq == sk->write_seq)
1637
                {
1638
                        sk->shutdown |= SEND_SHUTDOWN;
1639
                        tcp_set_state(sk, TCP_FIN_WAIT2);
1640
                        /* If the socket is dead, then there is no
1641
                         * user process hanging around using it.
1642
                         * We want to set up a FIN_WAIT2 timeout ala BSD.
1643
                         */
1644
                        if (sk->dead)
1645
                                tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
1646
                }
1647
        }
1648
 
1649
        /*
1650
         *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
1651
         *
1652
         *      Move to TIME_WAIT
1653
         */
1654
 
1655
        if (sk->state == TCP_CLOSING)
1656
        {
1657
 
1658
                if (!sk->dead)
1659
                        sk->state_change(sk);
1660
                if (sk->rcv_ack_seq == sk->write_seq)
1661
                {
1662
                        tcp_time_wait(sk);
1663
                }
1664
        }
1665
 
1666
        /*
1667
         *      Final ack of a three way shake
1668
         */
1669
 
1670
        if (sk->state==TCP_SYN_RECV)
1671
        {
1672
                tcp_set_state(sk, TCP_ESTABLISHED);
1673
 
1674
                /*
1675
                 *      We have a listening socket owning us. Wake it for
1676
                 *      the accept.
1677
                 */
1678
 
1679
                if ( sk->listening )
1680
                {
1681
                        /* The listener may be sk->dead. Dont worry
1682
                           data_ready traps this */
1683
                        sk->data_ready(sk->listening,0);
1684
                        sk->listening = NULL;
1685
                }
1686
 
1687
                /* Must check for peer advertising zero sized window
1688
                 * or else we get a sk->{mtu,mss} of zero and thus bomb out
1689
                 * in tcp_do_sendmsg. -DaveM
1690
                 */
1691
                if(sk->max_window == 0)
1692
                        sk->max_window = 32;
1693
 
1694
                tcp_options(sk,th);
1695
 
1696
#if 0
1697
                sk->dummy_th.dest=th->source;
1698
                tcp_v4_rehash(sk);
1699
#endif
1700
 
1701
                sk->copied_seq = sk->acked_seq;
1702
                if(!sk->dead)
1703
                        sk->state_change(sk);
1704
 
1705
                /* Reset the RTT estimator to the initial
1706
                 * state rather than testing to avoid
1707
                 * updating it on the ACK to the SYN packet.
1708
                 */
1709
                sk->rtt = 0;
1710
                sk->rto = TCP_TIMEOUT_INIT;
1711
                sk->mdev = TCP_TIMEOUT_INIT;
1712
        }
1713
 
1714
        /*
1715
         * The following code has been greatly simplified from the
1716
         * old hacked up stuff. The wonders of properly setting the
1717
         * retransmission timeouts.
1718
         *
1719
         * If we are retransmitting, and we acked a packet on the retransmit
1720
         * queue, and there is still something in the retransmit queue,
1721
         * then we can output some retransmission packets.
1722
         *
1723
         * Note that we need to be a bit careful here about getting the
1724
         * correct TIME_WRITE timer set. If we just got an ack of a
1725
         * packet we where retransmitting, we will retransmit the next
1726
         * packet in the retransmit queue below, and the timeout
1727
         * should now start from the time we retransmitted that packet.
1728
         * The resetting of the TIME_WRITE timer above will have set it
1729
         * relative to the prior transmission time, which would be wrong.
1730
         */
1731
 
1732
        if (sk->send_head != NULL && (flag&2) && sk->retransmits)
1733
        {
1734
                tcp_do_retransmit(sk, 1);
1735
                tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
1736
        }
1737
 
1738
        return 1;
1739
 
1740
uninteresting_ack:
1741
        if(sk->debug)
1742
                printk("Ack ignored %u %u\n",ack,sk->sent_seq);
1743
 
1744
        /*
1745
         *      Keepalive processing.
1746
         */
1747
 
1748
        if (after(ack, sk->sent_seq))
1749
        {
1750
                return 0;
1751
        }
1752
 
1753
        /*
1754
         *      Restart the keepalive timer.
1755
         */
1756
 
1757
        if (sk->keepopen)
1758
        {
1759
                if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
1760
                        tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
1761
        }
1762
 
1763
        /*
1764
         * A zero return from tcp_ack(), while in SYN_RECV, means that the
1765
         * handshake has failed, and an RST packet should be generated. We
1766
         * really have to generate an RST here, or a blind spoofing attack
1767
         * would be possible.
1768
         */
1769
        return sk->state != TCP_SYN_RECV;
1770
}
1771
 
1772
 
1773
/*
1774
 *      Process the FIN bit. This now behaves as it is supposed to work
1775
 *      and the FIN takes effect when it is validly part of sequence
1776
 *      space. Not before when we get holes.
1777
 *
1778
 *      If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
1779
 *      (and thence onto LAST-ACK and finally, CLOSE, we never enter
1780
 *      TIME-WAIT)
1781
 *
1782
 *      If we are in FINWAIT-1, a received FIN indicates simultaneous
1783
 *      close and we go into CLOSING (and later onto TIME-WAIT)
1784
 *
1785
 *      If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
1786
 *
1787
 */
1788
 
1789
static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
1790
{
1791
        sk->fin_seq = skb->end_seq;
1792
 
1793
        if (!sk->dead)
1794
        {
1795
                sk->state_change(sk);
1796
                sock_wake_async(sk->socket, 1);
1797
        }
1798
 
1799
        switch(sk->state)
1800
        {
1801
                case TCP_SYN_RECV:
1802
                case TCP_SYN_SENT:
1803
                case TCP_ESTABLISHED:
1804
                        /*
1805
                         * move to CLOSE_WAIT, tcp_data() already handled
1806
                         * sending the ack.
1807
                         */
1808
                        tcp_set_state(sk,TCP_CLOSE_WAIT);
1809
                        if (th->rst)
1810
                                sk->shutdown = SHUTDOWN_MASK;
1811
                        break;
1812
 
1813
                case TCP_CLOSE_WAIT:
1814
                case TCP_CLOSING:
1815
                        /*
1816
                         * received a retransmission of the FIN, do
1817
                         * nothing.
1818
                         */
1819
                        break;
1820
                case TCP_TIME_WAIT:
1821
                        /*
1822
                         * received a retransmission of the FIN,
1823
                         * restart the TIME_WAIT timer.
1824
                         */
1825
                        tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
1826
                        return(0);
1827
                case TCP_FIN_WAIT1:
1828
                        /*
1829
                         * This case occurs when a simultaneous close
1830
                         * happens, we must ack the received FIN and
1831
                         * enter the CLOSING state.
1832
                         *
1833
                         * This causes a WRITE timeout, which will either
1834
                         * move on to TIME_WAIT when we timeout, or resend
1835
                         * the FIN properly (maybe we get rid of that annoying
1836
                         * FIN lost hang). The TIME_WRITE code is already correct
1837
                         * for handling this timeout.
1838
                         */
1839
 
1840
                        if (sk->ip_xmit_timeout != TIME_WRITE) {
1841
                                if (sk->send_head)
1842
                                        tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
1843
                                else if (sk->ip_xmit_timeout != TIME_PROBE0
1844
                                || skb_queue_empty(&sk->write_queue)) {
1845
                                        /* BUG check case.
1846
                                         * We have a problem here if there
1847
                                         * is no timer running [leads to
1848
                                         * frozen socket] or no data in the
1849
                                         * write queue [means we sent a fin
1850
                                         * and lost it from the queue before
1851
                                         * changing the ack properly].
1852
                                         */
1853
                                        printk(KERN_ERR "Lost timer or fin packet in tcp_fin.\n");
1854
                                }
1855
                        }
1856
                        tcp_set_state(sk,TCP_CLOSING);
1857
                        break;
1858
                case TCP_FIN_WAIT2:
1859
                        /*
1860
                         * received a FIN -- send ACK and enter TIME_WAIT
1861
                         */
1862
                        tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
1863
                        sk->shutdown|=SHUTDOWN_MASK;
1864
                        tcp_set_state(sk,TCP_TIME_WAIT);
1865
                        break;
1866
                case TCP_CLOSE:
1867
                        /*
1868
                         * already in CLOSE
1869
                         */
1870
                        break;
1871
                default:
1872
                        tcp_set_state(sk,TCP_LAST_ACK);
1873
 
1874
                        /* Start the timers. */
1875
                        tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
1876
                        return(0);
1877
        }
1878
 
1879
        return(0);
1880
}
1881
 
1882
/*
1883
 * Add a sk_buff to the TCP receive queue, calculating
1884
 * the ACK sequence as we go..
1885
 */
1886
static inline void tcp_insert_skb(struct sk_buff * skb, struct sk_buff_head * list)
1887
{
1888
        struct sk_buff * prev, * next;
1889
        u32 seq;
1890
 
1891
        /*
1892
         * Find where the new skb goes.. (This goes backwards,
1893
         * on the assumption that we get the packets in order)
1894
         */
1895
        seq = skb->seq;
1896
        prev = list->prev;
1897
        next = (struct sk_buff *) list;
1898
        for (;;) {
1899
                if (prev == (struct sk_buff *) list || !after(prev->seq, seq))
1900
                        break;
1901
                next = prev;
1902
                prev = prev->prev;
1903
        }
1904
        __skb_insert(skb, prev, next, list);
1905
}
1906
 
1907
/*
1908
 * Called for each packet when we find a new ACK endpoint sequence in it
1909
 */
1910
static inline u32 tcp_queue_ack(struct sk_buff * skb, struct sock * sk)
1911
{
1912
        /*
1913
         *      When we ack the fin, we do the FIN
1914
         *      processing.
1915
         */
1916
        skb->acked = 1;
1917
        if (skb->h.th->fin)
1918
                tcp_fin(skb,sk,skb->h.th);
1919
        return skb->end_seq;
1920
}
1921
 
1922
static void tcp_queue(struct sk_buff * skb, struct sock * sk, struct tcphdr *th)
1923
{
1924
        u32 ack_seq;
1925
 
1926
        tcp_insert_skb(skb, &sk->receive_queue);
1927
 
1928
        /*
1929
         * Did we get anything new to ack?
1930
         */
1931
        ack_seq = sk->acked_seq;
1932
 
1933
 
1934
        if (!after(skb->seq, ack_seq)) {
1935
                if (after(skb->end_seq, ack_seq)) {
1936
                        /* the packet straddles our window end */
1937
                        struct sk_buff_head * list = &sk->receive_queue;
1938
                        struct sk_buff * next;
1939
                        ack_seq = tcp_queue_ack(skb, sk);
1940
 
1941
                        /*
1942
                         * Do we have any old packets to ack that the above
1943
                         * made visible? (Go forward from skb)
1944
                         */
1945
                        next = skb->next;
1946
                        while (next != (struct sk_buff *) list) {
1947
                                if (after(next->seq, ack_seq))
1948
                                        break;
1949
                                if (after(next->end_seq, ack_seq))
1950
                                        ack_seq = tcp_queue_ack(next, sk);
1951
                                next = next->next;
1952
                        }
1953
 
1954
                        /*
1955
                         * Ok, we found new data, update acked_seq as
1956
                         * necessary (and possibly send the actual
1957
                         * ACK packet).
1958
                         */
1959
                        sk->acked_seq = ack_seq;
1960
 
1961
                } else {
1962
                        if (sk->debug)
1963
                                printk("Ack duplicate packet.\n");
1964
                        tcp_send_ack(sk);
1965
                        return;
1966
                }
1967
 
1968
 
1969
                /*
1970
                 * Delay the ack if possible.  Send ack's to
1971
                 * fin frames immediately as there shouldn't be
1972
                 * anything more to come.
1973
                 */
1974
                if (!sk->delay_acks || th->fin) {
1975
                        tcp_send_ack(sk);
1976
                } else {
1977
                        /*
1978
                         * If psh is set we assume it's an
1979
                         * interactive session that wants quick
1980
                         * acks to avoid nagling too much.
1981
                         */
1982
                        int delay = HZ/2;
1983
                        if (th->psh)
1984
                                delay = HZ/50;
1985
                        tcp_send_delayed_ack(sk, delay, sk->ato);
1986
                }
1987
 
1988
                /*
1989
                 *      Tell the user we have some more data.
1990
                 */
1991
 
1992
                if (!sk->dead)
1993
                        sk->data_ready(sk,0);
1994
 
1995
        }
1996
        else
1997
        {
1998
            /*
1999
             *  If we've missed a packet, send an ack.
2000
             *  Also start a timer to send another.
2001
             *
2002
             *  4.3reno machines look for these kind of acks so
2003
             *  they can do fast recovery. Three identical 'old'
2004
             *  acks lets it know that one frame has been lost
2005
             *      and should be resent. Because this is before the
2006
             *  whole window of data has timed out it can take
2007
             *  one lost frame per window without stalling.
2008
             *  [See Jacobson RFC1323, Stevens TCP/IP illus vol2]
2009
             *
2010
             *  We also should be spotting triple bad sequences.
2011
             *  [We now do this.]
2012
             *
2013
             */
2014
 
2015
            if (!skb->acked)
2016
            {
2017
                    if(sk->debug)
2018
                            printk("Ack past end of seq packet.\n");
2019
                    tcp_send_ack(sk);
2020
                    /*
2021
                     * We need to be very careful here. We must
2022
                     * not violate Jacobsons packet conservation condition.
2023
                     * This means we should only send an ACK when a packet
2024
                     * leaves the network. We can say a packet left the
2025
                     * network when we see a packet leave the network, or
2026
                     * when an rto measure expires.
2027
                     */
2028
                    tcp_send_delayed_ack(sk,sk->rto,sk->rto);
2029
            }
2030
        }
2031
}
2032
 
2033
 
2034
/*
2035
 *      This routine handles the data.  If there is room in the buffer,
2036
 *      it will be have already been moved into it.  If there is no
2037
 *      room, then we will just have to discard the packet.
2038
 */
2039
 
2040
static int tcp_data(struct sk_buff *skb, struct sock *sk,
2041
         unsigned long saddr, unsigned int len)
2042
{
2043
        struct tcphdr *th;
2044
        u32 new_seq, shut_seq;
2045
 
2046
        th = skb->h.th;
2047
        skb_pull(skb,th->doff*4);
2048
        skb_trim(skb,len-(th->doff*4));
2049
 
2050
        /*
2051
         *      The bytes in the receive read/assembly queue has increased. Needed for the
2052
         *      low memory discard algorithm
2053
         */
2054
 
2055
        sk->bytes_rcv += skb->len;
2056
 
2057
        if (skb->len == 0 && !th->fin)
2058
        {
2059
                /*
2060
                 *      Don't want to keep passing ack's back and forth.
2061
                 *      (someone sent us dataless, boring frame)
2062
                 */
2063
                if (!th->ack)
2064
                        tcp_send_ack(sk);
2065
                kfree_skb(skb, FREE_READ);
2066
                return(0);
2067
        }
2068
 
2069
 
2070
        /*
2071
         *      We no longer have anyone receiving data on this connection.
2072
         */
2073
 
2074
#ifndef TCP_DONT_RST_SHUTDOWN            
2075
 
2076
        if(sk->shutdown & RCV_SHUTDOWN)
2077
        {
2078
                /*
2079
                 *      FIXME: BSD has some magic to avoid sending resets to
2080
                 *      broken 4.2 BSD keepalives. Much to my surprise a few non
2081
                 *      BSD stacks still have broken keepalives so we want to
2082
                 *      cope with it.
2083
                 */
2084
 
2085
                if(skb->len)    /* We don't care if it's just an ack or
2086
                                   a keepalive/window probe */
2087
                {
2088
                        new_seq = skb->seq + skb->len + th->syn;        /* Right edge of _data_ part of frame */
2089
 
2090
                        /* Do this the way 4.4BSD treats it. Not what I'd
2091
                           regard as the meaning of the spec but it's what BSD
2092
                           does and clearly they know everything 8) */
2093
 
2094
                        /*
2095
                         *      This is valid because of two things
2096
                         *
2097
                         *      a) The way tcp_data behaves at the bottom.
2098
                         *      b) A fin takes effect when read not when received.
2099
                         */
2100
 
2101
                        shut_seq = sk->acked_seq+1;     /* Last byte */
2102
 
2103
                        if(after(new_seq,shut_seq))
2104
                        {
2105
                                if(sk->debug)
2106
                                        printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n",
2107
                                                sk, new_seq, shut_seq, sk->blog);
2108
                                if(sk->dead)
2109
                                {
2110
                                        sk->acked_seq = new_seq + th->fin;
2111
                                        tcp_send_reset(sk->saddr, sk->daddr, skb->h.th,
2112
                                                sk->prot, NULL, skb->dev, 0, 255);
2113
                                        tcp_statistics.TcpEstabResets++;
2114
                                        sk->err = EPIPE;
2115
                                        sk->error_report(sk);
2116
                                        sk->shutdown = SHUTDOWN_MASK;
2117
                                        tcp_set_state(sk,TCP_CLOSE);
2118
                                        kfree_skb(skb, FREE_READ);
2119
                                        return 0;
2120
                                }
2121
                        }
2122
                }
2123
        }
2124
 
2125
#endif
2126
 
2127
        /*
2128
         * We should only call this if there is data in the frame.
2129
         */
2130
        tcp_delack_estimator(sk);
2131
 
2132
        tcp_queue(skb, sk, th);
2133
 
2134
        return(0);
2135
}
2136
 
2137
 
2138
/*
2139
 *      This routine is only called when we have urgent data
2140
 *      signalled. Its the 'slow' part of tcp_urg. It could be
2141
 *      moved inline now as tcp_urg is only called from one
2142
 *      place. We handle URGent data wrong. We have to - as
2143
 *      BSD still doesn't use the correction from RFC961.
2144
 *
2145
 *      For 1003.1g we should support a new option TCP_STDURG to permit
2146
 *      either form.
2147
 */
2148
 
2149
static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
2150
{
2151
        u32 ptr = ntohs(th->urg_ptr);
2152
 
2153
        if (ptr)
2154
                ptr--;
2155
        ptr += ntohl(th->seq);
2156
 
2157
        /* ignore urgent data that we've already seen and read */
2158
        if (after(sk->copied_seq, ptr))
2159
                return;
2160
 
2161
        /* do we already have a newer (or duplicate) urgent pointer? */
2162
        if (sk->urg_data && !after(ptr, sk->urg_seq))
2163
                return;
2164
 
2165
        /* tell the world about our new urgent pointer */
2166
        if (sk->proc != 0) {
2167
                if (sk->proc > 0) {
2168
                        kill_proc(sk->proc, SIGURG, 1);
2169
                } else {
2170
                        kill_pg(-sk->proc, SIGURG, 1);
2171
                }
2172
        }
2173
        /*
2174
         *      We may be adding urgent data when the last byte read was
2175
         *      urgent. To do this requires some care. We cannot just ignore
2176
         *      sk->copied_seq since we would read the last urgent byte again
2177
         *      as data, nor can we alter copied_seq until this data arrives
2178
         *      or we break the sematics of SIOCATMARK (and thus sockatmark())
2179
         */
2180
        if (sk->urg_seq == sk->copied_seq)
2181
                sk->copied_seq++;       /* Move the copied sequence on correctly */
2182
        sk->urg_data = URG_NOTYET;
2183
        sk->urg_seq = ptr;
2184
}
2185
 
2186
/*
2187
 *      This is the 'fast' part of urgent handling.
2188
 */
2189
 
2190
static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len)
2191
{
2192
        /*
2193
         *      Check if we get a new urgent pointer - normally not
2194
         */
2195
 
2196
        if (th->urg)
2197
                tcp_check_urg(sk,th);
2198
 
2199
        /*
2200
         *      Do we wait for any urgent data? - normally not
2201
         */
2202
 
2203
        if (sk->urg_data == URG_NOTYET) {
2204
                u32 ptr;
2205
 
2206
                /*
2207
                 *      Is the urgent pointer pointing into this packet?
2208
                 */
2209
                ptr = sk->urg_seq - ntohl(th->seq) + th->doff*4;
2210
                if (ptr < len) {
2211
                        sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
2212
                        if (!sk->dead)
2213
                                sk->data_ready(sk,0);
2214
                }
2215
        }
2216
}
2217
 
2218
/*
2219
 * This should be a bit smarter and remove partially
2220
 * overlapping stuff too, but this should be good
2221
 * enough for any even remotely normal case (and the
2222
 * worst that can happen is that we have a few
2223
 * unnecessary packets in the receive queue).
2224
 *
2225
 * This function is never called with an empty list..
2226
 */
2227
static inline void tcp_remove_dups(struct sk_buff_head * list)
2228
{
2229
        struct sk_buff * next = list->next;
2230
 
2231
        for (;;) {
2232
                struct sk_buff * skb = next;
2233
                next = next->next;
2234
                if (next == (struct sk_buff *) list)
2235
                        break;
2236
                if (before(next->end_seq, skb->end_seq)) {
2237
                        __skb_unlink(next, list);
2238
                        kfree_skb(next, FREE_READ);
2239
                        next = skb;
2240
                        continue;
2241
                }
2242
                if (next->seq != skb->seq)
2243
                        continue;
2244
                __skb_unlink(skb, list);
2245
                kfree_skb(skb, FREE_READ);
2246
        }
2247
}
2248
 
2249
/*
2250
 * Throw out all unnecessary packets: we've gone over the
2251
 * receive queue limit. This shouldn't happen in a normal
2252
 * TCP connection, but we might have gotten duplicates etc.
2253
 */
2254
static void prune_queue(struct sk_buff_head * list)
2255
{
2256
        for (;;) {
2257
                struct sk_buff * skb = list->prev;
2258
 
2259
                /* gone through it all? */
2260
                if (skb == (struct sk_buff *) list)
2261
                        break;
2262
                if (!skb->acked) {
2263
                        __skb_unlink(skb, list);
2264
                        kfree_skb(skb, FREE_READ);
2265
                        continue;
2266
                }
2267
                tcp_remove_dups(list);
2268
                break;
2269
        }
2270
}
2271
 
2272
#ifdef CONFIG_IP_TRANSPARENT_PROXY
2273
/*
2274
 *      Check whether a received TCP packet might be for one of our
2275
 *      connections.
2276
 */
2277
 
2278
int tcp_chkaddr(struct sk_buff *skb)
2279
{
2280
        struct iphdr *iph = skb->h.iph;
2281
        struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4);
2282
        struct sock *sk;
2283
 
2284
        sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest,
2285
                           skb->dev);
2286
        if (!sk)
2287
                return 0;
2288
        /* 0 means accept all LOCAL addresses here, not all the world... */
2289
        if (sk->rcv_saddr == 0)
2290
                return 0;
2291
        return 1;
2292
}
2293
#endif
2294
 
2295
/*
2296
 *      A TCP packet has arrived.
2297
 *              skb->h.raw is the TCP header.
2298
 */
2299
 
2300
int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
2301
        __u32 daddr, unsigned short len,
2302
        __u32 saddr, int redo, struct inet_protocol * protocol)
2303
{
2304
        struct tcphdr *th;
2305
        struct sock *sk;
2306
        __u32 seq;
2307
        int was_ack;
2308
#ifdef CONFIG_IP_TRANSPARENT_PROXY
2309
        int r;
2310
#endif
2311
 
2312
        /*
2313
         * "redo" is 1 if we have already seen this skb but couldn't
2314
         * use it at that time (the socket was locked).  In that case
2315
         * we have already done a lot of the work (looked up the socket
2316
         * etc).
2317
         */
2318
        th = skb->h.th;
2319
        was_ack = th->ack; /* Remember for later when we've freed the skb */
2320
        sk = skb->sk;
2321
#ifdef CONFIG_RST_COOKIES
2322
        if (th->rst && secure_tcp_probe_number(saddr,daddr,ntohs(th->source),ntohs(th->dest),ntohl(th->seq),1)) {
2323
                add_clearance(saddr);
2324
        }
2325
#endif
2326
        if (!redo) {
2327
                tcp_statistics.TcpInSegs++;
2328
                if (skb->pkt_type!=PACKET_HOST)
2329
                        goto discard_it;
2330
 
2331
                /*
2332
                 *      Pull up the IP header.
2333
                 */
2334
 
2335
                skb_pull(skb, skb->h.raw-skb->data);
2336
 
2337
                /*
2338
                 *      Try to use the device checksum if provided.
2339
                 */
2340
                switch (skb->ip_summed)
2341
                {
2342
                        case CHECKSUM_NONE:
2343
                                skb->csum = csum_partial((char *)th, len, 0);
2344
                        case CHECKSUM_HW:
2345
                                if (tcp_check(th, len, saddr, daddr, skb->csum))
2346
                                        goto discard_it;
2347
                        default:
2348
                                /* CHECKSUM_UNNECESSARY */
2349
                }
2350
#ifdef CONFIG_SYN_COOKIES
2351
retry_search:
2352
#endif
2353
#ifdef CONFIG_IP_TRANSPARENT_PROXY
2354
                if (skb->redirport)
2355
                        sk = tcp_v4_proxy_lookup(saddr, th->source, daddr, th->dest, dev->pa_addr, skb->redirport, dev);
2356
                else
2357
#endif
2358
                sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest, dev);
2359
                if (!sk)
2360
                        goto no_tcp_socket;
2361
                skb->sk = sk;
2362
                skb->seq = ntohl(th->seq);
2363
                skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
2364
                skb->ack_seq = ntohl(th->ack_seq);
2365
 
2366
                skb->acked = 0;
2367
                skb->used = 0;
2368
                skb->free = 1;
2369
                skb->saddr = daddr;
2370
                skb->daddr = saddr;
2371
 
2372
                /*
2373
                 * We may need to add it to the backlog here.
2374
                 */
2375
                if (sk->users)
2376
                {
2377
                        __skb_queue_tail(&sk->back_log, skb);
2378
                        return(0);
2379
                }
2380
        }
2381
 
2382
        /*
2383
         *      If this socket has got a reset it's to all intents and purposes
2384
         *      really dead. Count closed sockets as dead.
2385
         *
2386
         *      Note: BSD appears to have a bug here. A 'closed' TCP in BSD
2387
         *      simply drops data. This seems incorrect as a 'closed' TCP doesn't
2388
         *      exist so should cause resets as if the port was unreachable.
2389
         */
2390
 
2391
        if (sk->zapped || sk->state==TCP_CLOSE) {
2392
                goto no_tcp_socket;
2393
        }
2394
 
2395
        if (!sk->prot)
2396
        {
2397
                printk(KERN_CRIT "IMPOSSIBLE 3\n");
2398
                return(0);
2399
        }
2400
 
2401
 
2402
        /*
2403
         *      Charge the memory to the socket.
2404
         */
2405
 
2406
        skb->sk=sk;
2407
        atomic_add(skb->truesize, &sk->rmem_alloc);
2408
 
2409
        /*
2410
         * Mark the time of the last received packet.
2411
         */
2412
        sk->idletime = jiffies;
2413
 
2414
        /*
2415
         *      We should now do header prediction.
2416
         */
2417
 
2418
        /*
2419
         *      This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
2420
         *      don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
2421
         *      compatibility. We also set up variables more thoroughly [Karn notes in the
2422
         *      KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
2423
         */
2424
 
2425
        if(sk->state!=TCP_ESTABLISHED)          /* Skip this lot for normal flow */
2426
        {
2427
 
2428
                /*
2429
                 *      Now deal with unusual cases.
2430
                 */
2431
 
2432
                if(sk->state==TCP_LISTEN)
2433
                {
2434
                        /* Don't start connections with illegal address
2435
                           ranges. Trying to talk TCP to a broken dhcp host
2436
                           isnt good on a lan with broken SunOS 4.x boxes
2437
                           who think its a broadcast */
2438
 
2439
                        if ((saddr | daddr) == 0)
2440
                                goto discard_it;
2441
 
2442
                        if (th->ack) {  /* These use the socket TOS.. might want to be the received TOS */
2443
#ifdef CONFIG_SYN_COOKIES
2444
                                if (!th->syn && !th->rst) {
2445
                                        __u32 acked_seq = ntohl(th->ack_seq)-1;
2446
                                        int mtu_index = (acked_seq&0x7); /* extract MTU */
2447
                                        __u32 count = jiffies/(60*HZ);
2448
 
2449
                                        acked_seq = acked_seq&0xfffffff8;
2450
 
2451
                                        /* Any time in the last 2 minutes is OK */
2452
                                        if (acked_seq == secure_tcp_syn_cookie(daddr,
2453
                                            saddr,ntohs(th->source),ntohs(th->dest),
2454
                                            ntohl(th->seq)-1,count)
2455
                                        || acked_seq == secure_tcp_syn_cookie(daddr,
2456
                                            saddr,ntohs(th->source),ntohs(th->dest),
2457
                                            ntohl(th->seq)-1,count-1)
2458
                                        || acked_seq == secure_tcp_syn_cookie(daddr,
2459
                                            saddr,ntohs(th->source),ntohs(th->dest),
2460
                                            ntohl(th->seq)-1,count-2)) {
2461
                                                /* If this passes, we need to fake up the
2462
                                                * new socket in TCP_SYN_SENT state and
2463
                                                * call ourselves recursively to handle
2464
                                                * the move to ESTABLISHED using the
2465
                                                * current packet. Nasty, but a cleaner
2466
                                                * solution would require major rewrites.
2467
                                                */
2468
                                                if (tcp_conn_request_fake(sk, skb, daddr, saddr, opt,
2469
                                                                          dev, (acked_seq | mtu_index), cookie_mtu[mtu_index])) {
2470
 
2471
                                                        goto retry_search;
2472
                                                }
2473
                                        }
2474
                                }
2475
#endif
2476
                                tcp_send_reset(daddr,saddr,th,sk->prot,opt,dev,0, 255);
2477
                        }
2478
 
2479
                        /*
2480
                         *      We don't care for RST, and non SYN are absorbed (old segments)
2481
                         *      Broadcast/multicast SYN isn't allowed. Note - bug if you change the
2482
                         *      netmask on a running connection it can go broadcast. Even Sun's have
2483
                         *      this problem so I'm ignoring it
2484
                         */
2485
 
2486
#ifdef CONFIG_IP_TRANSPARENT_PROXY
2487
                        /*
2488
                         * We may get non-local addresses and still want to
2489
                         * handle them locally, due to transparent proxying.
2490
                         * Thus, narrow down the test to what is really meant.
2491
                         */
2492
                        if(th->rst || !th->syn || th->ack || (r = ip_chk_addr(daddr)) == IS_BROADCAST || r == IS_MULTICAST)
2493
#else
2494
                        if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
2495
#endif
2496
                        {
2497
                                kfree_skb(skb, FREE_READ);
2498
                                return 0;
2499
                        }
2500
 
2501
                        /*
2502
                         *      Guess we need to make a new socket up
2503
                         */
2504
                        seq = secure_tcp_sequence_number(saddr, daddr,
2505
                                                         skb->h.th->dest,
2506
                                                         skb->h.th->source);
2507
                        tcp_conn_request(sk, skb, daddr, saddr, opt, dev, seq);
2508
 
2509
                        /*
2510
                         *      Now we have several options: In theory there is nothing else
2511
                         *      in the frame. KA9Q has an option to send data with the syn,
2512
                         *      BSD accepts data with the syn up to the [to be] advertised window
2513
                         *      and Solaris 2.1 gives you a protocol error. For now we just ignore
2514
                         *      it, that fits the spec precisely and avoids incompatibilities. It
2515
                         *      would be nice in future to drop through and process the data.
2516
                         *
2517
                         *      Now TTCP is starting to use we ought to queue this data.
2518
                         */
2519
 
2520
                        return 0;
2521
                }
2522
 
2523
                /*
2524
                 *      Retransmitted SYN for our socket. This is uninteresting. If sk->state==TCP_LISTEN
2525
                 *      then it's a new connection
2526
                 */
2527
 
2528
                if (sk->state == TCP_SYN_RECV)
2529
                {
2530
                        if(th->syn && skb->seq+1 == sk->acked_seq)
2531
                        {
2532
                                kfree_skb(skb, FREE_READ);
2533
                                return 0;
2534
                        }
2535
                        goto rfc_step4;
2536
                }
2537
 
2538
                /*
2539
                 *      SYN sent means we have to look for a suitable ack and either reset
2540
                 *      for bad matches or go to connected. The SYN_SENT case is unusual and should
2541
                 *      not be in line code. [AC]
2542
                 */
2543
 
2544
                if(sk->state==TCP_SYN_SENT)
2545
                {
2546
                        /* Crossed SYN or previous junk segment */
2547
                        if(th->ack)
2548
                        {
2549
                                /* We got an ack, but it's not a good ack.
2550
                                 * We used to test this with a call to tcp_ack,
2551
                                 * but this loses, because it takes the SYN
2552
                                 * packet out of the send queue, even if
2553
                                 * the ACK doesn't have the SYN bit sent, and
2554
                                 * therefore isn't the one we are waiting for.
2555
                                 */
2556
                                if (after(skb->ack_seq, sk->sent_seq) || before(skb->ack_seq, sk->rcv_ack_seq))
2557
                                {
2558
                                        /* Reset the ack - it's an ack from a
2559
                                           different connection  [ th->rst is checked in tcp_send_reset()] */
2560
                                        tcp_statistics.TcpAttemptFails++;
2561
                                        tcp_send_reset(daddr, saddr, th,
2562
                                                sk->prot, opt,dev,0,255);
2563
                                        kfree_skb(skb, FREE_READ);
2564
                                        return(0);
2565
                                }
2566
                                if(th->rst)
2567
                                        return tcp_reset(sk,skb);
2568
                                if(!th->syn)
2569
                                {
2570
                                        /* A valid ack from a different connection
2571
                                           start. Shouldn't happen but cover it */
2572
                                        tcp_statistics.TcpAttemptFails++;
2573
                                        tcp_send_reset(daddr, saddr, th,
2574
                                                sk->prot, opt,dev,0,255);
2575
                                        kfree_skb(skb, FREE_READ);
2576
                                        return 0;
2577
                                }
2578
 
2579
                                /* process the ACK, get the SYN packet out
2580
                                 * of the send queue, do other initial
2581
                                 * processing stuff. [We know it's good, and
2582
                                 * we know it's the SYN,ACK we want.]
2583
                                 */
2584
                                tcp_ack(sk,th,skb->ack_seq,len);
2585
 
2586
                                /* We must check here (before tcp_options) whether
2587
                                 * peer advertised a zero sized window on us, else
2588
                                 * we end up with a zero sk->{mtu,mss} and thus bomb
2589
                                 * out in tcp_do_sendmsg. -DaveM
2590
                                 */
2591
                                if(sk->max_window == 0)
2592
                                        sk->max_window = 32;
2593
 
2594
                                /*
2595
                                 *      Ok.. it's good. Set up sequence numbers and
2596
                                 *      move to established.
2597
                                 */
2598
                                sk->acked_seq = skb->seq+1;
2599
                                sk->lastwin_seq = skb->seq+1;
2600
                                sk->fin_seq = skb->seq;
2601
                                tcp_send_ack(sk);
2602
                                tcp_set_state(sk, TCP_ESTABLISHED);
2603
                                tcp_options(sk,th);
2604
 
2605
#if 0
2606
                                sk->dummy_th.dest=th->source;
2607
                                tcp_v4_rehash(sk);
2608
#endif
2609
 
2610
                                sk->copied_seq = sk->acked_seq;
2611
                                if(!sk->dead)
2612
                                {
2613
                                        sk->state_change(sk);
2614
                                        sock_wake_async(sk->socket, 0);
2615
                                }
2616
 
2617
                                /* Reset the RTT estimator to the initial
2618
                                 * state rather than testing to avoid
2619
                                 * updating it on the ACK to the SYN packet.
2620
                                 */
2621
                                sk->rtt = 0;
2622
                                sk->rto = TCP_TIMEOUT_INIT;
2623
                                sk->mdev = TCP_TIMEOUT_INIT;
2624
                                goto rfc_step6;
2625
                        }
2626
                        else
2627
                        {
2628
                                /* See if SYN's cross. Drop if boring */
2629
                                if(th->syn && !th->rst)
2630
                                {
2631
                                        /* Crossed SYN's are fine - but talking to
2632
                                           yourself is right out... */
2633
                                        if(sk->saddr==saddr && sk->daddr==daddr &&
2634
                                                sk->dummy_th.source==th->source &&
2635
                                                sk->dummy_th.dest==th->dest)
2636
                                        {
2637
                                                tcp_statistics.TcpAttemptFails++;
2638
                                                return tcp_reset(sk,skb);
2639
                                        }
2640
                                        tcp_set_state(sk,TCP_SYN_RECV);
2641
 
2642
                                        /*
2643
                                         *      FIXME:
2644
                                         *      Must send SYN|ACK here
2645
                                         */
2646
                                }
2647
                                /* Discard junk segment */
2648
                                kfree_skb(skb, FREE_READ);
2649
                                return 0;
2650
                        }
2651
 
2652
                        /*
2653
                         *      Data maybe.. drop through
2654
                         */
2655
 
2656
                }
2657
 
2658
        /*
2659
         *      BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
2660
         *      a more complex suggestion for fixing these reuse issues in RFC1644
2661
         *      but not yet ready for general use. Also see RFC1379.
2662
         *
2663
         *      Note the funny way we go back to the top of this function for
2664
         *      this case ("goto try_next_socket").  That also takes care of
2665
         *      checking "sk->users" for the new socket as well as doing all
2666
         *      the normal tests on the packet.
2667
         */
2668
 
2669
#define BSD_TIME_WAIT
2670
#ifdef BSD_TIME_WAIT
2671
                if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead &&
2672
                        after(skb->seq, sk->acked_seq) && !th->rst)
2673
                {
2674
                        u32 seq = sk->write_seq;
2675
                        if(sk->debug)
2676
                                printk("Doing a BSD time wait\n");
2677
                        tcp_statistics.TcpEstabResets++;
2678
                        atomic_sub(skb->truesize, &sk->rmem_alloc);
2679
                        skb->sk = NULL;
2680
                        sk->err=ECONNRESET;
2681
                        tcp_set_state(sk, TCP_CLOSE);
2682
                        sk->shutdown = SHUTDOWN_MASK;
2683
#ifdef CONFIG_IP_TRANSPARENT_PROXY
2684
                        /* What to do here?
2685
                         * For the non-proxy case, this code is effectively almost a no-op,
2686
                         * due to the sk = NULL.  Is that intentional?  If so, why shouldn't we
2687
                         * do the same for the proxy case and get rid of some useless code?
2688
                         */
2689
                        if (skb->redirport)
2690
                                sk = tcp_v4_proxy_lookup(saddr, th->source, daddr, th->dest,
2691
                                                         dev->pa_addr, skb->redirport, dev);
2692
                        else
2693
#endif
2694
                        sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest, dev);
2695
                        /* this is not really correct: we should check sk->users */
2696
                        if (sk && sk->state==TCP_LISTEN)
2697
                        {
2698
                                skb->sk = sk;
2699
                                atomic_add(skb->truesize, &sk->rmem_alloc);
2700
                                /* FIXME: Is the sequence number addition
2701
                                 * of 128000 here enough for fast networks?
2702
                                 * Also, does this reduce the security of
2703
                                 * our tcp sequence numbers?
2704
                                 */
2705
                                tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
2706
                                return 0;
2707
                        }
2708
                        kfree_skb(skb, FREE_READ);
2709
                        return 0;
2710
                }
2711
#endif  
2712
        }
2713
 
2714
rfc_step4:              /* I'll clean this up later */
2715
 
2716
        /*
2717
         *      We are now in normal data flow (see the step list in the RFC)
2718
         *      Note most of these are inline now. I'll inline the lot when
2719
         *      I have time to test it hard and look at what gcc outputs
2720
         */
2721
 
2722
        if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn))
2723
        {
2724
                bad_tcp_sequence(sk, th, skb->end_seq-th->syn, dev);
2725
                kfree_skb(skb, FREE_READ);
2726
                return 0;
2727
        }
2728
 
2729
        if(th->rst)
2730
                return tcp_reset(sk,skb);
2731
 
2732
        /*
2733
         *      Check for a SYN, and ensure it matches the SYN we were
2734
         *      first sent. We have to handle the rather unusual (but valid)
2735
         *      sequence that KA9Q derived products may generate of
2736
         *
2737
         *      SYN
2738
         *                              SYN|ACK Data
2739
         *      ACK     (lost)
2740
         *                              SYN|ACK Data + More Data
2741
         *      .. we must ACK not RST...
2742
         *
2743
         *      We keep syn_seq as the sequence space occupied by the
2744
         *      original syn.
2745
         */
2746
 
2747
        if(th->syn && skb->seq!=sk->syn_seq)
2748
        {
2749
                tcp_send_reset(daddr,saddr,th, &tcp_prot, opt, dev,0, 255);
2750
                return tcp_reset(sk,skb);
2751
        }
2752
 
2753
        /*
2754
         *      Process the ACK
2755
         */
2756
 
2757
        if(!th->ack)
2758
        {
2759
                kfree_skb(skb, FREE_WRITE);
2760
                return 0;
2761
        }
2762
 
2763
        if(!tcp_ack(sk,th,skb->ack_seq,len))
2764
        {
2765
                /*
2766
                 *      Our three way handshake failed.
2767
                 */
2768
 
2769
                if(sk->state==TCP_SYN_RECV)
2770
                {
2771
                        tcp_send_reset(daddr, saddr, th,sk->prot, opt, dev,0,255);
2772
                }
2773
                kfree_skb(skb, FREE_READ);
2774
                return 0;
2775
        }
2776
 
2777
rfc_step6:
2778
        /*
2779
         *      If the accepted buffer put us over our queue size we
2780
         *      now drop it (we must process the ack first to avoid
2781
         *      deadlock cases).
2782
         */
2783
 
2784
        /*
2785
         *      Process urgent data
2786
         */
2787
 
2788
        tcp_urg(sk, th, len);
2789
 
2790
        /*
2791
         *      Process the encapsulated data
2792
         */
2793
 
2794
        if(tcp_data(skb,sk, saddr, len))
2795
                kfree_skb(skb, FREE_READ);
2796
 
2797
        /*
2798
         *      If we had a partial packet being help up due to
2799
         *      application of Nagle's rule we are now free to send it.
2800
         */
2801
        if (was_ack
2802
            && sk->packets_out == 0
2803
            && sk->partial != NULL
2804
            && skb_queue_empty(&sk->write_queue)
2805
            && sk->send_head == NULL)
2806
        {
2807
                tcp_send_partial(sk);
2808
        }
2809
 
2810
        /*
2811
         *      If our receive queue has grown past its limits,
2812
         *      try to prune away duplicates etc..
2813
         */
2814
        if (sk->rmem_alloc > sk->rcvbuf)
2815
                prune_queue(&sk->receive_queue);
2816
 
2817
        /*
2818
         *      And done
2819
         */
2820
 
2821
        return 0;
2822
 
2823
no_tcp_socket:
2824
        /*
2825
         * No such TCB. If th->rst is 0 send a reset (checked in tcp_send_reset)
2826
         */
2827
        tcp_send_reset(daddr, saddr, th, &tcp_prot, opt,dev,0,255);
2828
 
2829
discard_it:
2830
        /*
2831
         *      Discard frame
2832
         */
2833
        skb->sk = NULL;
2834
        kfree_skb(skb, FREE_READ);
2835
        return 0;
2836
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.