OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-3.0/] [packages/] [net/] [bsd_tcpip/] [current/] [src/] [sys/] [netinet/] [tcp_input.c] - Blame information for rev 838

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 786 skrzyp
//==========================================================================
2
//
3
//      src/sys/netinet/tcp_input.c
4
//
5
//==========================================================================
6
// ####BSDCOPYRIGHTBEGIN####                                    
7
// -------------------------------------------                  
8
// This file is part of eCos, the Embedded Configurable Operating System.
9
//
10
// Portions of this software may have been derived from FreeBSD 
11
// or other sources, and if so are covered by the appropriate copyright
12
// and license included herein.                                 
13
//
14
// Portions created by the Free Software Foundation are         
15
// Copyright (C) 2002 Free Software Foundation, Inc.            
16
// -------------------------------------------                  
17
// ####BSDCOPYRIGHTEND####                                      
18
//==========================================================================
19
 
20
/*
21
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
22
 *      The Regents of the University of California.  All rights reserved.
23
 *
24
 * Redistribution and use in source and binary forms, with or without
25
 * modification, are permitted provided that the following conditions
26
 * are met:
27
 * 1. Redistributions of source code must retain the above copyright
28
 *    notice, this list of conditions and the following disclaimer.
29
 * 2. Redistributions in binary form must reproduce the above copyright
30
 *    notice, this list of conditions and the following disclaimer in the
31
 *    documentation and/or other materials provided with the distribution.
32
 * 3. All advertising materials mentioning features or use of this software
33
 *    must display the following acknowledgement:
34
 *      This product includes software developed by the University of
35
 *      California, Berkeley and its contributors.
36
 * 4. Neither the name of the University nor the names of its contributors
37
 *    may be used to endorse or promote products derived from this software
38
 *    without specific prior written permission.
39
 *
40
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50
 * SUCH DAMAGE.
51
 *
52
 *      @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
53
 * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.16 2001/08/22 00:59:12 silby Exp $
54
 */
55
 
56
#include <sys/param.h>
57
#include <sys/malloc.h>
58
#include <sys/mbuf.h>
59
#include <sys/protosw.h>
60
#include <sys/socket.h>
61
#include <sys/sysctl.h>
62
#include <sys/socketvar.h>
63
 
64
#include <net/if.h>
65
#include <net/route.h>
66
 
67
#include <netinet/in.h>
68
#include <netinet/in_systm.h>
69
#include <netinet/ip.h>
70
#include <netinet/ip_icmp.h>    /* for ICMP_BANDLIM             */
71
#include <netinet/in_var.h>
72
#include <netinet/icmp_var.h>   /* for ICMP_BANDLIM             */
73
#include <netinet/in_pcb.h>
74
#include <netinet/ip_var.h>
75
#ifdef INET6
76
#include <netinet/ip6.h>
77
#include <netinet/icmp6.h>
78
#include <netinet6/nd6.h>
79
#include <netinet6/ip6_var.h>
80
#include <netinet6/in6_pcb.h>
81
#endif
82
#include <netinet/tcp.h>
83
#include <netinet/tcp_fsm.h>
84
#include <netinet/tcp_seq.h>
85
#include <netinet/tcp_timer.h>
86
#include <netinet/tcp_var.h>
87
 
88
#ifdef INET6
89
#include <netinet6/tcp6_var.h>
90
#endif
91
#include <netinet/tcpip.h>
92
#ifdef TCPDEBUG
93
#include <netinet/tcp_debug.h>
94
 
95
u_char tcp_saveipgen[40]; /* the size must be of max ip header, now IPv6 */
96
struct tcphdr tcp_savetcp;
97
#endif /* TCPDEBUG */
98
 
99
#ifdef IPSEC
100
#include <netinet6/ipsec.h>
101
#include <netkey/key.h>
102
#endif /*IPSEC*/
103
 
104
static int      tcprexmtthresh = 3;
105
tcp_cc  tcp_ccgen;
106
 
107
struct  tcpstat tcpstat;
108
SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD,
109
    &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
110
 
111
static int log_in_vain = 0;
112
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
113
    &log_in_vain, 0, "Log all incoming TCP connections");
114
 
115
static int blackhole = 0;
116
SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
117
        &blackhole, 0, "Do not send RST when dropping refused connections");
118
 
119
int tcp_delack_enabled = 1;
120
SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
121
    &tcp_delack_enabled, 0,
122
    "Delay ACK to try and piggyback it onto a data packet");
123
 
124
int tcp_lq_overflow = 1;
125
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW,
126
    &tcp_lq_overflow, 0,
127
    "Listen Queue Overflow");
128
 
129
#ifdef TCP_DROP_SYNFIN
130
static int drop_synfin = 0;
131
SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
132
    &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
133
#endif
134
 
135
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
136
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
137
    "TCP Segment Reassembly Queue");
138
 
139
int tcp_reass_maxseg = 0;
140
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RD,
141
    &tcp_reass_maxseg, 0,
142
    "Global maximum number of TCP Segments in Reassembly Queue");
143
 
144
int tcp_reass_qsize = 0;
145
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
146
    &tcp_reass_qsize, 0,
147
    "Global number of TCP Segments currently in Reassembly Queue");
148
 
149
static int tcp_reass_overflows = 0;
150
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
151
    &tcp_reass_overflows, 0,
152
    "Global number of TCP Segment Reassembly Queue Overflows");
153
#endif
154
 
155
struct inpcbhead tcb;
156
#define tcb6    tcb  /* for KAME src sync over BSD*'s */
157
struct inpcbinfo tcbinfo;
158
 
159
static void      tcp_dooptions __P((struct tcpcb *,
160
            u_char *, int, struct tcphdr *, struct tcpopt *));
161
static void      tcp_pulloutofband __P((struct socket *,
162
            struct tcphdr *, struct mbuf *, int));
163
static int       tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
164
                                struct mbuf *));
165
static void      tcp_xmit_timer __P((struct tcpcb *, int));
166
static int       tcp_newreno __P((struct tcpcb *, struct tcphdr *));
167
 
168
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
169
#ifdef INET6
170
#define ND6_HINT(tp) \
171
do { \
172
        if ((tp) && (tp)->t_inpcb && \
173
            ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \
174
            (tp)->t_inpcb->in6p_route.ro_rt) \
175
                nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \
176
} while (0)
177
#else
178
#define ND6_HINT(tp)
179
#endif
180
 
181
/*
182
 * Indicate whether this ack should be delayed.
183
 */
184
#define DELAY_ACK(tp) \
185
        (tcp_delack_enabled && !callout_pending(tp->tt_delack))
186
 
187
static int
188
tcp_reass(tp, th, tlenp, m)
189
        register struct tcpcb *tp;
190
        register struct tcphdr *th;
191
        int *tlenp;
192
        struct mbuf *m;
193
{
194
        struct tseg_qent *q;
195
        struct tseg_qent *p = NULL;
196
        struct tseg_qent *nq;
197
        struct tseg_qent *te;
198
        struct socket *so = tp->t_inpcb->inp_socket;
199
        int flags;
200
 
201
        /*
202
         * Call with th==0 after become established to
203
         * force pre-ESTABLISHED data up to user socket.
204
         */
205
        if (th == 0)
206
                goto present;
207
 
208
        /*
209
         * Limit the number of segments in the reassembly queue to prevent
210
         * holding on to too many segments (and thus running out of mbufs).
211
         * Make sure to let the missing segment through which caused this
212
         * queue.  Always keep one global queue entry spare to be able to
213
         * process the missing segment.
214
         */
215
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
216
        if (th->th_seq != tp->rcv_nxt &&
217
            tcp_reass_qsize + 1 >= tcp_reass_maxseg) {
218
                tcp_reass_overflows++;
219
                tcpstat.tcps_rcvmemdrop++;
220
                m_freem(m);
221
                return (0);
222
        }
223
#endif
224
 
225
        /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
226
        MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
227
               M_NOWAIT);
228
        if (te == NULL) {
229
                tcpstat.tcps_rcvmemdrop++;
230
                m_freem(m);
231
                return (0);
232
        }
233
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
234
        tcp_reass_qsize++;
235
#endif
236
 
237
        /*
238
         * Find a segment which begins after this one does.
239
         */
240
        LIST_FOREACH(q, &tp->t_segq, tqe_q) {
241
                if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
242
                        break;
243
                p = q;
244
        }
245
 
246
        /*
247
         * If there is a preceding segment, it may provide some of
248
         * our data already.  If so, drop the data from the incoming
249
         * segment.  If it provides all of our data, drop us.
250
         */
251
        if (p != NULL) {
252
                register int i;
253
                /* conversion to int (in i) handles seq wraparound */
254
                i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
255
                if (i > 0) {
256
                        if (i >= *tlenp) {
257
                                tcpstat.tcps_rcvduppack++;
258
                                tcpstat.tcps_rcvdupbyte += *tlenp;
259
                                m_freem(m);
260
                                FREE(te, M_TSEGQ);
261
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
262
                                tcp_reass_qsize--;
263
#endif
264
                                /*
265
                                 * Try to present any queued data
266
                                 * at the left window edge to the user.
267
                                 * This is needed after the 3-WHS
268
                                 * completes.
269
                                 */
270
                                goto present;   /* ??? */
271
                        }
272
                        m_adj(m, i);
273
                        *tlenp -= i;
274
                        th->th_seq += i;
275
                }
276
        }
277
        tcpstat.tcps_rcvoopack++;
278
        tcpstat.tcps_rcvoobyte += *tlenp;
279
 
280
        /*
281
         * While we overlap succeeding segments trim them or,
282
         * if they are completely covered, dequeue them.
283
         */
284
        while (q) {
285
                register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
286
                if (i <= 0)
287
                        break;
288
                if (i < q->tqe_len) {
289
                        q->tqe_th->th_seq += i;
290
                        q->tqe_len -= i;
291
                        m_adj(q->tqe_m, i);
292
                        break;
293
                }
294
 
295
                nq = LIST_NEXT(q, tqe_q);
296
                LIST_REMOVE(q, tqe_q);
297
                m_freem(q->tqe_m);
298
                FREE(q, M_TSEGQ);
299
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
300
                tcp_reass_qsize--;
301
#endif
302
                q = nq;
303
        }
304
 
305
        /* Insert the new segment queue entry into place. */
306
        te->tqe_m = m;
307
        te->tqe_th = th;
308
        te->tqe_len = *tlenp;
309
 
310
        if (p == NULL) {
311
                LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
312
        } else {
313
                LIST_INSERT_AFTER(p, te, tqe_q);
314
        }
315
 
316
present:
317
        /*
318
         * Present data to user, advancing rcv_nxt through
319
         * completed sequence space.
320
         */
321
        if (!TCPS_HAVEESTABLISHED(tp->t_state))
322
                return (0);
323
        q = LIST_FIRST(&tp->t_segq);
324
        if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
325
                return (0);
326
        do {
327
                tp->rcv_nxt += q->tqe_len;
328
                flags = q->tqe_th->th_flags & TH_FIN;
329
                nq = LIST_NEXT(q, tqe_q);
330
                LIST_REMOVE(q, tqe_q);
331
                if (so->so_state & SS_CANTRCVMORE)
332
                        m_freem(q->tqe_m);
333
                else
334
                        sbappend(&so->so_rcv, q->tqe_m);
335
                FREE(q, M_TSEGQ);
336
#ifdef CYGNUM_NET_TCP_REASS_DIVISOR
337
                tcp_reass_qsize--;
338
#endif
339
                q = nq;
340
        } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
341
        ND6_HINT(tp);
342
        sorwakeup(so);
343
        return (flags);
344
}
345
 
346
/*
347
 * TCP input routine, follows pages 65-76 of the
348
 * protocol specification dated September, 1981 very closely.
349
 */
350
#ifdef INET6
351
int
352
tcp6_input(mp, offp, proto)
353
        struct mbuf **mp;
354
        int *offp, proto;
355
{
356
        register struct mbuf *m = *mp;
357
        struct in6_ifaddr *ia6;
358
 
359
        IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
360
 
361
        /*
362
         * draft-itojun-ipv6-tcp-to-anycast
363
         * better place to put this in?
364
         */
365
        ia6 = ip6_getdstifaddr(m);
366
        if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
367
                struct ip6_hdr *ip6;
368
 
369
                ip6 = mtod(m, struct ip6_hdr *);
370
                icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
371
                            (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
372
                return IPPROTO_DONE;
373
        }
374
 
375
        tcp_input(m, *offp);
376
        return IPPROTO_DONE;
377
}
378
#endif
379
 
380
void
381
tcp_input(m, off0)
382
        register struct mbuf *m;
383
        int off0;
384
{
385
        register struct tcphdr *th;
386
        register struct ip *ip = NULL;
387
        register struct ipovly *ipov;
388
        register struct inpcb *inp;
389
        u_char *optp = NULL;
390
        int optlen = 0;
391
        int len, tlen, off;
392
        int drop_hdrlen;
393
        register struct tcpcb *tp = 0;
394
        register int thflags;
395
        struct socket *so = 0;
396
        int todrop, acked, ourfinisacked, needoutput = 0;
397
        struct in_addr laddr;
398
#ifdef INET6
399
        struct in6_addr laddr6;
400
#endif
401
        int dropsocket = 0;
402
        int iss = 0;
403
        u_long tiwin;
404
        struct tcpopt to;               /* options in this segment */
405
        struct rmxp_tao *taop;          /* pointer to our TAO cache entry */
406
        struct rmxp_tao tao_noncached;  /* in case there's no cached entry */
407
#ifdef TCPDEBUG
408
        short ostate = 0;
409
#endif
410
#ifdef INET6
411
        struct ip6_hdr *ip6 = NULL;
412
        int isipv6;
413
#endif /* INET6 */
414
        int rstreason CYGBLD_ATTRIB_UNUSED; /* For badport_bandlim accounting purposes */
415
 
416
#ifdef INET6
417
        isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
418
#endif
419
        bzero((char *)&to, sizeof(to));
420
 
421
        tcpstat.tcps_rcvtotal++;
422
 
423
#ifdef INET6
424
        if (isipv6) {
425
                /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
426
                ip6 = mtod(m, struct ip6_hdr *);
427
                tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
428
                if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
429
                        tcpstat.tcps_rcvbadsum++;
430
                        goto drop;
431
                }
432
                th = (struct tcphdr *)((caddr_t)ip6 + off0);
433
 
434
                /*
435
                 * Be proactive about unspecified IPv6 address in source.
436
                 * As we use all-zero to indicate unbounded/unconnected pcb,
437
                 * unspecified IPv6 address can be used to confuse us.
438
                 *
439
                 * Note that packets with unspecified IPv6 destination is
440
                 * already dropped in ip6_input.
441
                 */
442
                if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
443
                        /* XXX stat */
444
                        goto drop;
445
                }
446
        } else
447
#endif /* INET6 */
448
      {
449
        /*
450
         * Get IP and TCP header together in first mbuf.
451
         * Note: IP leaves IP header in first mbuf.
452
         */
453
        if (off0 > sizeof (struct ip)) {
454
                ip_stripoptions(m, (struct mbuf *)0);
455
                off0 = sizeof(struct ip);
456
        }
457
        if (m->m_len < sizeof (struct tcpiphdr)) {
458
                if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
459
                        tcpstat.tcps_rcvshort++;
460
                        return;
461
                }
462
        }
463
        ip = mtod(m, struct ip *);
464
        ipov = (struct ipovly *)ip;
465
        th = (struct tcphdr *)((caddr_t)ip + off0);
466
        tlen = ip->ip_len;
467
 
468
        if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
469
                if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
470
                        th->th_sum = m->m_pkthdr.csum_data;
471
                else
472
                        th->th_sum = in_pseudo(ip->ip_src.s_addr,
473
                            ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data +
474
                            ip->ip_len + IPPROTO_TCP));
475
                th->th_sum ^= 0xffff;
476
        } else {
477
                /*
478
                 * Checksum extended TCP header and data.
479
                 */
480
                len = sizeof (struct ip) + tlen;
481
                bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
482
                ipov->ih_len = (u_short)tlen;
483
                HTONS(ipov->ih_len);
484
                th->th_sum = in_cksum(m, len);
485
        }
486
        if (th->th_sum) {
487
                tcpstat.tcps_rcvbadsum++;
488
                goto drop;
489
        }
490
#ifdef INET6
491
        /* Re-initialization for later version check */
492
        ip->ip_v = IPVERSION;
493
#endif
494
      }
495
 
496
        /*
497
         * Check that TCP offset makes sense,
498
         * pull out TCP options and adjust length.              XXX
499
         */
500
        off = th->th_off << 2;
501
        if (off < sizeof (struct tcphdr) || off > tlen) {
502
                tcpstat.tcps_rcvbadoff++;
503
                goto drop;
504
        }
505
        tlen -= off;    /* tlen is used instead of ti->ti_len */
506
        if (off > sizeof (struct tcphdr)) {
507
#ifdef INET6
508
                if (isipv6) {
509
                        IP6_EXTHDR_CHECK(m, off0, off, );
510
                        ip6 = mtod(m, struct ip6_hdr *);
511
                        th = (struct tcphdr *)((caddr_t)ip6 + off0);
512
                } else
513
#endif /* INET6 */
514
              {
515
                if (m->m_len < sizeof(struct ip) + off) {
516
                        if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
517
                                tcpstat.tcps_rcvshort++;
518
                                return;
519
                        }
520
                        ip = mtod(m, struct ip *);
521
                        ipov = (struct ipovly *)ip;
522
                        th = (struct tcphdr *)((caddr_t)ip + off0);
523
                }
524
              }
525
                optlen = off - sizeof (struct tcphdr);
526
                optp = (u_char *)(th + 1);
527
        }
528
        thflags = th->th_flags;
529
 
530
#ifdef TCP_DROP_SYNFIN
531
        /*
532
         * If the drop_synfin option is enabled, drop all packets with
533
         * both the SYN and FIN bits set. This prevents e.g. nmap from
534
         * identifying the TCP/IP stack.
535
         *
536
         * This is incompatible with RFC1644 extensions (T/TCP).
537
         */
538
        if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
539
                goto drop;
540
#endif
541
 
542
        /*
543
         * Convert TCP protocol specific fields to host format.
544
         */
545
        NTOHL(th->th_seq);
546
        NTOHL(th->th_ack);
547
        NTOHS(th->th_win);
548
        NTOHS(th->th_urp);
549
 
550
        /*
551
         * Delay droping TCP, IP headers, IPv6 ext headers, and TCP options,
552
         * until after ip6_savecontrol() is called and before other functions
553
         * which don't want those proto headers.
554
         * Because ip6_savecontrol() is going to parse the mbuf to
555
         * search for data to be passed up to user-land, it wants mbuf
556
         * parameters to be unchanged.
557
         */
558
        drop_hdrlen = off0 + off;
559
 
560
        /*
561
         * Locate pcb for segment.
562
         */
563
findpcb:
564
#ifdef IPFIREWALL_FORWARD
565
        if (ip_fw_fwd_addr != NULL
566
#ifdef INET6
567
            && isipv6 == NULL /* IPv6 support is not yet */
568
#endif /* INET6 */
569
            ) {
570
                /*
571
                 * Diverted. Pretend to be the destination.
572
                 * already got one like this?
573
                 */
574
                inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
575
                        ip->ip_dst, th->th_dport, 0, m->m_pkthdr.rcvif);
576
                if (!inp) {
577
                        /*
578
                         * No, then it's new. Try find the ambushing socket
579
                         */
580
                        if (!ip_fw_fwd_addr->sin_port) {
581
                                inp = in_pcblookup_hash(&tcbinfo, ip->ip_src,
582
                                    th->th_sport, ip_fw_fwd_addr->sin_addr,
583
                                    th->th_dport, 1, m->m_pkthdr.rcvif);
584
                        } else {
585
                                inp = in_pcblookup_hash(&tcbinfo,
586
                                    ip->ip_src, th->th_sport,
587
                                    ip_fw_fwd_addr->sin_addr,
588
                                    ntohs(ip_fw_fwd_addr->sin_port), 1,
589
                                    m->m_pkthdr.rcvif);
590
                        }
591
                }
592
                ip_fw_fwd_addr = NULL;
593
        } else
594
#endif  /* IPFIREWALL_FORWARD */
595
      {
596
#ifdef INET6
597
        if (isipv6)
598
                inp = in6_pcblookup_hash(&tcbinfo, &ip6->ip6_src, th->th_sport,
599
                                         &ip6->ip6_dst, th->th_dport, 1,
600
                                         m->m_pkthdr.rcvif);
601
        else
602
#endif /* INET6 */
603
        inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
604
            ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif);
605
      }
606
 
607
#ifdef IPSEC
608
#ifdef INET6
609
        if (isipv6) {
610
                if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) {
611
                        ipsec6stat.in_polvio++;
612
                        goto drop;
613
                }
614
        } else
615
#endif /* INET6 */
616
        if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) {
617
                ipsecstat.in_polvio++;
618
                goto drop;
619
        }
620
#endif /*IPSEC*/
621
 
622
        /*
623
         * If the state is CLOSED (i.e., TCB does not exist) then
624
         * all data in the incoming segment is discarded.
625
         * If the TCB exists but is in CLOSED state, it is embryonic,
626
         * but should either do a listen or a connect soon.
627
         */
628
        if (inp == NULL) {
629
                if (log_in_vain) {
630
#ifdef INET6
631
                        char dbuf[INET6_ADDRSTRLEN], sbuf[INET6_ADDRSTRLEN];
632
#else /* INET6 */
633
                        char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"];
634
#endif /* INET6 */
635
 
636
#ifdef INET6
637
                        if (isipv6) {
638
                                strcpy(dbuf, ip6_sprintf(&ip6->ip6_dst));
639
                                strcpy(sbuf, ip6_sprintf(&ip6->ip6_src));
640
                        } else
641
#endif
642
                      {
643
                        strcpy(dbuf, inet_ntoa(ip->ip_dst));
644
                        strcpy(sbuf, inet_ntoa(ip->ip_src));
645
                      }
646
                        switch (log_in_vain) {
647
                        case 1:
648
                                if(thflags & TH_SYN)
649
                                        log(LOG_INFO,
650
                                        "Connection attempt to TCP %s:%d from %s:%d\n",
651
                                        dbuf, ntohs(th->th_dport),
652
                                        sbuf,
653
                                        ntohs(th->th_sport));
654
                                break;
655
                        case 2:
656
                                log(LOG_INFO,
657
                                "Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n",
658
                                dbuf, ntohs(th->th_dport), sbuf,
659
                                ntohs(th->th_sport), thflags);
660
                                break;
661
                        default:
662
                                break;
663
                        }
664
                }
665
                if (blackhole) {
666
                        switch (blackhole) {
667
                        case 1:
668
                                if (thflags & TH_SYN)
669
                                        goto drop;
670
                                break;
671
                        case 2:
672
                                goto drop;
673
                        default:
674
                                goto drop;
675
                        }
676
                }
677
                rstreason = BANDLIM_RST_CLOSEDPORT;
678
                goto dropwithreset;
679
        }
680
        tp = intotcpcb(inp);
681
        if (tp == 0) {
682
                rstreason = BANDLIM_RST_CLOSEDPORT;
683
                goto dropwithreset;
684
        }
685
        if (tp->t_state == TCPS_CLOSED)
686
                goto drop;
687
 
688
        /* Unscale the window into a 32-bit value. */
689
        if ((thflags & TH_SYN) == 0)
690
                tiwin = th->th_win << tp->snd_scale;
691
        else
692
                tiwin = th->th_win;
693
 
694
        so = inp->inp_socket;
695
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
696
#ifdef TCPDEBUG
697
                if (so->so_options & SO_DEBUG) {
698
                        ostate = tp->t_state;
699
#ifdef INET6
700
                        if (isipv6)
701
                                bcopy((char *)ip6, (char *)tcp_saveipgen,
702
                                      sizeof(*ip6));
703
                        else
704
#endif /* INET6 */
705
                        bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
706
                        tcp_savetcp = *th;
707
                }
708
#endif
709
                if (so->so_options & SO_ACCEPTCONN) {
710
                        register struct tcpcb *tp0 = tp;
711
                        struct socket *so2;
712
#ifdef IPSEC
713
                        struct socket *oso;
714
#endif
715
#ifdef INET6
716
                        struct inpcb *oinp = sotoinpcb(so);
717
#endif /* INET6 */
718
 
719
#ifndef IPSEC
720
                        /*
721
                         * Current IPsec implementation makes incorrect IPsec
722
                         * cache if this check is done here.
723
                         * So delay this until duplicated socket is created.
724
                         */
725
                        if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
726
                                /*
727
                                 * Note: dropwithreset makes sure we don't
728
                                 * send a RST in response to a RST.
729
                                 */
730
                                if (thflags & TH_ACK) {
731
                                        tcpstat.tcps_badsyn++;
732
                                        rstreason = BANDLIM_RST_OPENPORT;
733
                                        goto dropwithreset;
734
                                }
735
                                goto drop;
736
                        }
737
#endif
738
 
739
#ifdef INET6
740
                        /*
741
                         * If deprecated address is forbidden,
742
                         * we do not accept SYN to deprecated interface
743
                         * address to prevent any new inbound connection from
744
                         * getting established.
745
                         * When we do not accept SYN, we send a TCP RST,
746
                         * with deprecated source address (instead of dropping
747
                         * it).  We compromise it as it is much better for peer
748
                         * to send a RST, and RST will be the final packet
749
                         * for the exchange.
750
                         *
751
                         * If we do not forbid deprecated addresses, we accept
752
                         * the SYN packet.  RFC2462 does not suggest dropping
753
                         * SYN in this case.
754
                         * If we decipher RFC2462 5.5.4, it says like this:
755
                         * 1. use of deprecated addr with existing
756
                         *    communication is okay - "SHOULD continue to be
757
                         *    used"
758
                         * 2. use of it with new communication:
759
                         *   (2a) "SHOULD NOT be used if alternate address
760
                         *        with sufficient scope is available"
761
                         *   (2b) nothing mentioned otherwise.
762
                         * Here we fall into (2b) case as we have no choice in
763
                         * our source address selection - we must obey the peer.
764
                         *
765
                         * The wording in RFC2462 is confusing, and there are
766
                         * multiple description text for deprecated address
767
                         * handling - worse, they are not exactly the same.
768
                         * I believe 5.5.4 is the best one, so we follow 5.5.4.
769
                         */
770
                        if (isipv6 && !ip6_use_deprecated) {
771
                                struct in6_ifaddr *ia6;
772
 
773
                                if ((ia6 = ip6_getdstifaddr(m)) &&
774
                                    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
775
                                        tp = NULL;
776
                                        rstreason = BANDLIM_RST_OPENPORT;
777
                                        goto dropwithreset;
778
                                }
779
                        }
780
#endif
781
 
782
                        so2 = sonewconn(so, 0);
783
                        if (so2 == 0) {
784
                                tcpstat.tcps_listendrop++;
785
                                so2 = sodropablereq(so);
786
                                if (so2) {
787
                                        if (tcp_lq_overflow)
788
                                                sototcpcb(so2)->t_flags |=
789
                                                    TF_LQ_OVERFLOW;
790
                                        tcp_drop(sototcpcb(so2), ETIMEDOUT);
791
                                        so2 = sonewconn(so, 0);
792
                                }
793
                                if (!so2)
794
                                        goto drop;
795
                        }
796
#ifdef IPSEC
797
                        oso = so;
798
#endif
799
                        so = so2;
800
                        /*
801
                         * This is ugly, but ....
802
                         *
803
                         * Mark socket as temporary until we're
804
                         * committed to keeping it.  The code at
805
                         * ``drop'' and ``dropwithreset'' check the
806
                         * flag dropsocket to see if the temporary
807
                         * socket created here should be discarded.
808
                         * We mark the socket as discardable until
809
                         * we're committed to it below in TCPS_LISTEN.
810
                         */
811
                        dropsocket++;
812
                        inp = (struct inpcb *)so->so_pcb;
813
#ifdef INET6
814
                        if (isipv6)
815
                                inp->in6p_laddr = ip6->ip6_dst;
816
                        else {
817
                                inp->inp_vflag &= ~INP_IPV6;
818
                                inp->inp_vflag |= INP_IPV4;
819
#endif /* INET6 */
820
                        inp->inp_laddr = ip->ip_dst;
821
#ifdef INET6
822
                        }
823
#endif /* INET6 */
824
                        inp->inp_lport = th->th_dport;
825
                        if (in_pcbinshash(inp) != 0) {
826
                                /*
827
                                 * Undo the assignments above if we failed to
828
                                 * put the PCB on the hash lists.
829
                                 */
830
#ifdef INET6
831
                                if (isipv6)
832
                                        inp->in6p_laddr = in6addr_any;
833
                                else
834
#endif /* INET6 */
835
                                inp->inp_laddr.s_addr = INADDR_ANY;
836
                                inp->inp_lport = 0;
837
                                goto drop;
838
                        }
839
#ifdef IPSEC
840
                        /*
841
                         * To avoid creating incorrectly cached IPsec
842
                         * association, this is need to be done here.
843
                         *
844
                         * Subject: (KAME-snap 748)
845
                         * From: Wayne Knowles <w.knowles@niwa.cri.nz>
846
                         * ftp://ftp.kame.net/pub/mail-list/snap-users/748
847
                         */
848
                        if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
849
                                /*
850
                                 * Note: dropwithreset makes sure we don't
851
                                 * send a RST in response to a RST.
852
                                 */
853
                                if (thflags & TH_ACK) {
854
                                        tcpstat.tcps_badsyn++;
855
                                        rstreason = BANDLIM_RST_OPENPORT;
856
                                        goto dropwithreset;
857
                                }
858
                                goto drop;
859
                        }
860
#endif
861
#ifdef INET6
862
                        if (isipv6) {
863
                                /*
864
                                 * Inherit socket options from the listening
865
                                 * socket.
866
                                 * Note that in6p_inputopts are not (even
867
                                 * should not be) copied, since it stores
868
                                 * previously received options and is used to
869
                                 * detect if each new option is different than
870
                                 * the previous one and hence should be passed
871
                                 * to a user.
872
                                 * If we copied in6p_inputopts, a user would
873
                                 * not be able to receive options just after
874
                                 * calling the accept system call.
875
                                 */
876
                                inp->inp_flags |=
877
                                        oinp->inp_flags & INP_CONTROLOPTS;
878
                                if (oinp->in6p_outputopts)
879
                                        inp->in6p_outputopts =
880
                                                ip6_copypktopts(oinp->in6p_outputopts,
881
                                                                M_NOWAIT);
882
                        } else
883
#endif /* INET6 */
884
                        inp->inp_options = ip_srcroute();
885
#ifdef IPSEC
886
                        /* copy old policy into new socket's */
887
                        if (ipsec_copy_policy(sotoinpcb(oso)->inp_sp,
888
                                              inp->inp_sp))
889
                                printf("tcp_input: could not copy policy\n");
890
#endif
891
                        tp = intotcpcb(inp);
892
                        tp->t_state = TCPS_LISTEN;
893
                        tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
894
 
895
                        /* Compute proper scaling value from buffer space */
896
                        while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
897
                           TCP_MAXWIN << tp->request_r_scale <
898
                           so->so_rcv.sb_hiwat)
899
                                tp->request_r_scale++;
900
                }
901
        }
902
 
903
#ifdef INET6
904
        /* save packet options if user wanted */
905
        if (isipv6 && (inp->in6p_flags & INP_CONTROLOPTS) != 0) {
906
                struct ip6_recvpktopts opts6;
907
 
908
                /*
909
                 * Temporarily re-adjusting the mbuf before ip6_savecontrol(),
910
                 * which is necessary for FreeBSD only due to difference from
911
                 * other BSD stacks.
912
                 * XXX: we'll soon make a more natural fix after getting a
913
                 *      consensus.
914
                 */
915
                ip6_savecontrol(inp, ip6, m, &opts6, &inp->in6p_inputopts);
916
                if (inp->in6p_inputopts)
917
                        ip6_update_recvpcbopt(inp->in6p_inputopts, &opts6);
918
                if (opts6.head) {
919
                        if (sbappendcontrol(&inp->in6p_socket->so_rcv,
920
                                            NULL, opts6.head)
921
                            == 0)
922
                                m_freem(opts6.head);
923
                }
924
        }
925
#endif /* INET6 */
926
 
927
        /*
928
         * Segment received on connection.
929
         * Reset idle time and keep-alive timer.
930
         */
931
        tp->t_rcvtime = ticks;
932
        if (TCPS_HAVEESTABLISHED(tp->t_state))
933
                callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
934
 
935
        /*
936
         * Process options if not in LISTEN state,
937
         * else do it below (after getting remote address).
938
         */
939
        if (tp->t_state != TCPS_LISTEN)
940
                tcp_dooptions(tp, optp, optlen, th, &to);
941
 
942
        /*
943
         * Header prediction: check for the two common cases
944
         * of a uni-directional data xfer.  If the packet has
945
         * no control flags, is in-sequence, the window didn't
946
         * change and we're not retransmitting, it's a
947
         * candidate.  If the length is zero and the ack moved
948
         * forward, we're the sender side of the xfer.  Just
949
         * free the data acked & wake any higher level process
950
         * that was blocked waiting for space.  If the length
951
         * is non-zero and the ack didn't move, we're the
952
         * receiver side.  If we're getting packets in-order
953
         * (the reassembly queue is empty), add the data to
954
         * the socket buffer and note that we need a delayed ack.
955
         * Make sure that the hidden state-flags are also off.
956
         * Since we check for TCPS_ESTABLISHED above, it can only
957
         * be TH_NEEDSYN.
958
         */
959
        if (tp->t_state == TCPS_ESTABLISHED &&
960
            (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
961
            ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
962
            ((to.to_flag & TOF_TS) == 0 ||
963
             TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
964
            /*
965
             * Using the CC option is compulsory if once started:
966
             *   the segment is OK if no T/TCP was negotiated or
967
             *   if the segment has a CC option equal to CCrecv
968
             */
969
            ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
970
             ((to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv)) &&
971
            th->th_seq == tp->rcv_nxt &&
972
            tiwin && tiwin == tp->snd_wnd &&
973
            tp->snd_nxt == tp->snd_max) {
974
 
975
                /*
976
                 * If last ACK falls within this segment's sequence numbers,
977
                 * record the timestamp.
978
                 * NOTE that the test is modified according to the latest
979
                 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
980
                 */
981
                if ((to.to_flag & TOF_TS) != 0 &&
982
                   SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
983
                        tp->ts_recent_age = ticks;
984
                        tp->ts_recent = to.to_tsval;
985
                }
986
 
987
                if (tlen == 0) {
988
                        if (SEQ_GT(th->th_ack, tp->snd_una) &&
989
                            SEQ_LEQ(th->th_ack, tp->snd_max) &&
990
                            tp->snd_cwnd >= tp->snd_wnd &&
991
                            tp->t_dupacks < tcprexmtthresh) {
992
                                /*
993
                                 * this is a pure ack for outstanding data.
994
                                 */
995
                                ++tcpstat.tcps_predack;
996
                                /*
997
                                 * "bad retransmit" recovery
998
                                 */
999
                                if (tp->t_rxtshift == 1 &&
1000
                                    ticks < tp->t_badrxtwin) {
1001
                                        tp->snd_cwnd = tp->snd_cwnd_prev;
1002
                                        tp->snd_ssthresh =
1003
                                            tp->snd_ssthresh_prev;
1004
                                        tp->snd_nxt = tp->snd_max;
1005
                                        tp->t_badrxtwin = 0;
1006
                                }
1007
                                if ((to.to_flag & TOF_TS) != 0)
1008
                                        tcp_xmit_timer(tp,
1009
                                            ticks - to.to_tsecr + 1);
1010
                                else if (tp->t_rtttime &&
1011
                                            SEQ_GT(th->th_ack, tp->t_rtseq))
1012
                                        tcp_xmit_timer(tp, ticks - tp->t_rtttime);
1013
                                acked = th->th_ack - tp->snd_una;
1014
                                tcpstat.tcps_rcvackpack++;
1015
                                tcpstat.tcps_rcvackbyte += acked;
1016
                                sbdrop(&so->so_snd, acked);
1017
                                tp->snd_una = th->th_ack;
1018
                                m_freem(m);
1019
                                ND6_HINT(tp); /* some progress has been done */
1020
 
1021
                                /*
1022
                                 * If all outstanding data are acked, stop
1023
                                 * retransmit timer, otherwise restart timer
1024
                                 * using current (possibly backed-off) value.
1025
                                 * If process is waiting for space,
1026
                                 * wakeup/selwakeup/signal.  If data
1027
                                 * are ready to send, let tcp_output
1028
                                 * decide between more output or persist.
1029
                                 */
1030
                                if (tp->snd_una == tp->snd_max)
1031
                                        callout_stop(tp->tt_rexmt);
1032
                                else if (!callout_active(tp->tt_persist))
1033
                                        callout_reset(tp->tt_rexmt,
1034
                                                      tp->t_rxtcur,
1035
                                                      tcp_timer_rexmt, tp);
1036
 
1037
                                sowwakeup(so);
1038
                                if (so->so_snd.sb_cc)
1039
                                        (void) tcp_output(tp);
1040
                                return;
1041
                        }
1042
                } else if (th->th_ack == tp->snd_una &&
1043
                    LIST_EMPTY(&tp->t_segq) &&
1044
                    tlen <= sbspace(&so->so_rcv)) {
1045
                        /*
1046
                         * this is a pure, in-sequence data packet
1047
                         * with nothing on the reassembly queue and
1048
                         * we have enough buffer space to take it.
1049
                         */
1050
                        ++tcpstat.tcps_preddat;
1051
                        tp->rcv_nxt += tlen;
1052
                        tcpstat.tcps_rcvpack++;
1053
                        tcpstat.tcps_rcvbyte += tlen;
1054
                        ND6_HINT(tp);   /* some progress has been done */
1055
                        /*
1056
                         * Add data to socket buffer.
1057
                         */
1058
                        m_adj(m, drop_hdrlen);  /* delayed header drop */
1059
                        sbappend(&so->so_rcv, m);
1060
                        sorwakeup(so);
1061
                        if (DELAY_ACK(tp)) {
1062
                                callout_reset(tp->tt_delack, tcp_delacktime,
1063
                                    tcp_timer_delack, tp);
1064
                        } else {
1065
                                tp->t_flags |= TF_ACKNOW;
1066
                                tcp_output(tp);
1067
                        }
1068
                        return;
1069
                }
1070
        }
1071
 
1072
        /*
1073
         * Calculate amount of space in receive window,
1074
         * and then do TCP input processing.
1075
         * Receive window is amount of space in rcv queue,
1076
         * but not less than advertised window.
1077
         */
1078
        { int win;
1079
 
1080
        win = sbspace(&so->so_rcv);
1081
        if (win < 0)
1082
                win = 0;
1083
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
1084
        }
1085
 
1086
        switch (tp->t_state) {
1087
 
1088
        /*
1089
         * If the state is LISTEN then ignore segment if it contains an RST.
1090
         * If the segment contains an ACK then it is bad and send a RST.
1091
         * If it does not contain a SYN then it is not interesting; drop it.
1092
         * If it is from this socket, drop it, it must be forged.
1093
         * Don't bother responding if the destination was a broadcast.
1094
         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
1095
         * tp->iss, and send a segment:
1096
         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
1097
         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
1098
         * Fill in remote peer address fields if not previously specified.
1099
         * Enter SYN_RECEIVED state, and process any other fields of this
1100
         * segment in this state.
1101
         */
1102
        case TCPS_LISTEN: {
1103
                register struct sockaddr_in *sin;
1104
#ifdef INET6
1105
                register struct sockaddr_in6 *sin6;
1106
#endif
1107
 
1108
                if (thflags & TH_RST)
1109
                        goto drop;
1110
                if (thflags & TH_ACK) {
1111
                        rstreason = BANDLIM_RST_OPENPORT;
1112
                        goto dropwithreset;
1113
                }
1114
                if ((thflags & TH_SYN) == 0)
1115
                        goto drop;
1116
                if (th->th_dport == th->th_sport) {
1117
#ifdef INET6
1118
                        if (isipv6) {
1119
                                if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
1120
                                                       &ip6->ip6_src))
1121
                                        goto drop;
1122
                        } else
1123
#endif /* INET6 */
1124
                        if (ip->ip_dst.s_addr == ip->ip_src.s_addr)
1125
                                goto drop;
1126
                }
1127
                /*
1128
                 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
1129
                 * in_broadcast() should never return true on a received
1130
                 * packet with M_BCAST not set.
1131
                 *
1132
                 * Packets with a multicast source address should also
1133
                 * be discarded.
1134
                 */
1135
                if (m->m_flags & (M_BCAST|M_MCAST))
1136
                        goto drop;
1137
#ifdef INET6
1138
                if (isipv6) {
1139
                        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1140
                            IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
1141
                                goto drop;
1142
                } else
1143
#endif
1144
                if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
1145
                    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
1146
                    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
1147
                        goto drop;
1148
#ifdef INET6
1149
                if (isipv6) {
1150
                        MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
1151
                               M_SONAME, M_NOWAIT);
1152
                        if (sin6 == NULL)
1153
                                goto drop;
1154
                        bzero(sin6, sizeof(*sin6));
1155
                        sin6->sin6_family = AF_INET6;
1156
                        sin6->sin6_len = sizeof(*sin6);
1157
                        sin6->sin6_addr = ip6->ip6_src;
1158
                        sin6->sin6_port = th->th_sport;
1159
                        laddr6 = inp->in6p_laddr;
1160
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
1161
                                inp->in6p_laddr = ip6->ip6_dst;
1162
                        if (in6_pcbconnect(inp, (struct sockaddr *)sin6,
1163
                                           (struct proc *)&proc0)) {
1164
                                inp->in6p_laddr = laddr6;
1165
                                FREE(sin6, M_SONAME);
1166
                                goto drop;
1167
                        }
1168
                        FREE(sin6, M_SONAME);
1169
                } else
1170
#endif
1171
              {
1172
                MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
1173
                       M_NOWAIT);
1174
                if (sin == NULL)
1175
                        goto drop;
1176
                sin->sin_family = AF_INET;
1177
                sin->sin_len = sizeof(*sin);
1178
                sin->sin_addr = ip->ip_src;
1179
                sin->sin_port = th->th_sport;
1180
                bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
1181
                laddr = inp->inp_laddr;
1182
                if (inp->inp_laddr.s_addr == INADDR_ANY)
1183
                        inp->inp_laddr = ip->ip_dst;
1184
                if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0)) {
1185
                        inp->inp_laddr = laddr;
1186
                        FREE(sin, M_SONAME);
1187
                        goto drop;
1188
                }
1189
                FREE(sin, M_SONAME);
1190
              }
1191
                if ((taop = tcp_gettaocache(inp)) == NULL) {
1192
                        taop = &tao_noncached;
1193
                        bzero(taop, sizeof(*taop));
1194
                }
1195
                tcp_dooptions(tp, optp, optlen, th, &to);
1196
                if (iss)
1197
                        tp->iss = iss;
1198
                else {
1199
                        tp->iss = tcp_new_isn(tp);
1200
                }
1201
                tp->irs = th->th_seq;
1202
                tcp_sendseqinit(tp);
1203
                tcp_rcvseqinit(tp);
1204
                tp->snd_recover = tp->snd_una;
1205
                /*
1206
                 * Initialization of the tcpcb for transaction;
1207
                 *   set SND.WND = SEG.WND,
1208
                 *   initialize CCsend and CCrecv.
1209
                 */
1210
                tp->snd_wnd = tiwin;    /* initial send-window */
1211
                tp->cc_send = CC_INC(tcp_ccgen);
1212
                tp->cc_recv = to.to_cc;
1213
                /*
1214
                 * Perform TAO test on incoming CC (SEG.CC) option, if any.
1215
                 * - compare SEG.CC against cached CC from the same host,
1216
                 *      if any.
1217
                 * - if SEG.CC > chached value, SYN must be new and is accepted
1218
                 *      immediately: save new CC in the cache, mark the socket
1219
                 *      connected, enter ESTABLISHED state, turn on flag to
1220
                 *      send a SYN in the next segment.
1221
                 *      A virtual advertised window is set in rcv_adv to
1222
                 *      initialize SWS prevention.  Then enter normal segment
1223
                 *      processing: drop SYN, process data and FIN.
1224
                 * - otherwise do a normal 3-way handshake.
1225
                 */
1226
                if ((to.to_flag & TOF_CC) != 0) {
1227
                    if (((tp->t_flags & TF_NOPUSH) != 0) &&
1228
                        taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
1229
 
1230
                        taop->tao_cc = to.to_cc;
1231
                        tp->t_starttime = ticks;
1232
                        tp->t_state = TCPS_ESTABLISHED;
1233
 
1234
                        /*
1235
                         * If there is a FIN, or if there is data and the
1236
                         * connection is local, then delay SYN,ACK(SYN) in
1237
                         * the hope of piggy-backing it on a response
1238
                         * segment.  Otherwise must send ACK now in case
1239
                         * the other side is slow starting.
1240
                         */
1241
                        if (DELAY_ACK(tp) && ((thflags & TH_FIN) ||
1242
                            (tlen != 0 &&
1243
#ifdef INET6
1244
                              ((isipv6 && in6_localaddr(&inp->in6p_faddr))
1245
                              ||
1246
                              (!isipv6 &&
1247
#endif
1248
                            in_localaddr(inp->inp_faddr)
1249
#ifdef INET6
1250
                               ))
1251
#endif
1252
                             ))) {
1253
                                callout_reset(tp->tt_delack, tcp_delacktime,
1254
                                    tcp_timer_delack, tp);
1255
                                tp->t_flags |= TF_NEEDSYN;
1256
                        } else
1257
                                tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
1258
 
1259
                        /*
1260
                         * Limit the `virtual advertised window' to TCP_MAXWIN
1261
                         * here.  Even if we requested window scaling, it will
1262
                         * become effective only later when our SYN is acked.
1263
                         */
1264
                        tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
1265
                        tcpstat.tcps_connects++;
1266
                        soisconnected(so);
1267
                        callout_reset(tp->tt_keep, tcp_keepinit,
1268
                                      tcp_timer_keep, tp);
1269
                        dropsocket = 0;          /* committed to socket */
1270
                        tcpstat.tcps_accepts++;
1271
                        goto trimthenstep6;
1272
                    }
1273
                /* else do standard 3-way handshake */
1274
                } else {
1275
                    /*
1276
                     * No CC option, but maybe CC.NEW:
1277
                     *   invalidate cached value.
1278
                     */
1279
                     taop->tao_cc = 0;
1280
                }
1281
                /*
1282
                 * TAO test failed or there was no CC option,
1283
                 *    do a standard 3-way handshake.
1284
                 */
1285
                tp->t_flags |= TF_ACKNOW;
1286
                tp->t_state = TCPS_SYN_RECEIVED;
1287
                callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
1288
                dropsocket = 0;          /* committed to socket */
1289
                tcpstat.tcps_accepts++;
1290
                goto trimthenstep6;
1291
                }
1292
 
1293
        /*
1294
         * If the state is SYN_RECEIVED:
1295
         *      if seg contains an ACK, but not for our SYN/ACK, send a RST.
1296
         */
1297
        case TCPS_SYN_RECEIVED:
1298
                if ((thflags & TH_ACK) &&
1299
                    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
1300
                     SEQ_GT(th->th_ack, tp->snd_max))) {
1301
                                rstreason = BANDLIM_RST_OPENPORT;
1302
                                goto dropwithreset;
1303
                }
1304
                break;
1305
 
1306
        /*
1307
         * If the state is SYN_SENT:
1308
         *      if seg contains an ACK, but not for our SYN, drop the input.
1309
         *      if seg contains a RST, then drop the connection.
1310
         *      if seg does not contain SYN, then drop it.
1311
         * Otherwise this is an acceptable SYN segment
1312
         *      initialize tp->rcv_nxt and tp->irs
1313
         *      if seg contains ack then advance tp->snd_una
1314
         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
1315
         *      arrange for segment to be acked (eventually)
1316
         *      continue processing rest of data/controls, beginning with URG
1317
         */
1318
        case TCPS_SYN_SENT:
1319
                if ((taop = tcp_gettaocache(inp)) == NULL) {
1320
                        taop = &tao_noncached;
1321
                        bzero(taop, sizeof(*taop));
1322
                }
1323
 
1324
                if ((thflags & TH_ACK) &&
1325
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
1326
                     SEQ_GT(th->th_ack, tp->snd_max))) {
1327
                        /*
1328
                         * If we have a cached CCsent for the remote host,
1329
                         * hence we haven't just crashed and restarted,
1330
                         * do not send a RST.  This may be a retransmission
1331
                         * from the other side after our earlier ACK was lost.
1332
                         * Our new SYN, when it arrives, will serve as the
1333
                         * needed ACK.
1334
                         */
1335
                        if (taop->tao_ccsent != 0)
1336
                                goto drop;
1337
                        else {
1338
                                rstreason = BANDLIM_UNLIMITED;
1339
                                goto dropwithreset;
1340
                        }
1341
                }
1342
                if (thflags & TH_RST) {
1343
                        if (thflags & TH_ACK)
1344
                                tp = tcp_drop(tp, ECONNREFUSED);
1345
                        goto drop;
1346
                }
1347
                if ((thflags & TH_SYN) == 0)
1348
                        goto drop;
1349
                tp->snd_wnd = th->th_win;       /* initial send window */
1350
                tp->cc_recv = to.to_cc;         /* foreign CC */
1351
 
1352
                tp->irs = th->th_seq;
1353
                tcp_rcvseqinit(tp);
1354
                if (thflags & TH_ACK) {
1355
                        /*
1356
                         * Our SYN was acked.  If segment contains CC.ECHO
1357
                         * option, check it to make sure this segment really
1358
                         * matches our SYN.  If not, just drop it as old
1359
                         * duplicate, but send an RST if we're still playing
1360
                         * by the old rules.  If no CC.ECHO option, make sure
1361
                         * we don't get fooled into using T/TCP.
1362
                         */
1363
                        if (to.to_flag & TOF_CCECHO) {
1364
                                if (tp->cc_send != to.to_ccecho) {
1365
                                        if (taop->tao_ccsent != 0)
1366
                                                goto drop;
1367
                                        else {
1368
                                                rstreason = BANDLIM_UNLIMITED;
1369
                                                goto dropwithreset;
1370
                                        }
1371
                                }
1372
                        } else
1373
                                tp->t_flags &= ~TF_RCVD_CC;
1374
                        tcpstat.tcps_connects++;
1375
                        soisconnected(so);
1376
                        /* Do window scaling on this connection? */
1377
                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1378
                                (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1379
                                tp->snd_scale = tp->requested_s_scale;
1380
                                tp->rcv_scale = tp->request_r_scale;
1381
                        }
1382
                        /* Segment is acceptable, update cache if undefined. */
1383
                        if (taop->tao_ccsent == 0)
1384
                                taop->tao_ccsent = to.to_ccecho;
1385
 
1386
                        tp->rcv_adv += tp->rcv_wnd;
1387
                        tp->snd_una++;          /* SYN is acked */
1388
                        /*
1389
                         * If there's data, delay ACK; if there's also a FIN
1390
                         * ACKNOW will be turned on later.
1391
                         */
1392
                        if (DELAY_ACK(tp) && tlen != 0)
1393
                                callout_reset(tp->tt_delack, tcp_delacktime,
1394
                                    tcp_timer_delack, tp);
1395
                        else
1396
                                tp->t_flags |= TF_ACKNOW;
1397
                        /*
1398
                         * Received <SYN,ACK> in SYN_SENT[*] state.
1399
                         * Transitions:
1400
                         *      SYN_SENT  --> ESTABLISHED
1401
                         *      SYN_SENT* --> FIN_WAIT_1
1402
                         */
1403
                        tp->t_starttime = ticks;
1404
                        if (tp->t_flags & TF_NEEDFIN) {
1405
                                tp->t_state = TCPS_FIN_WAIT_1;
1406
                                tp->t_flags &= ~TF_NEEDFIN;
1407
                                thflags &= ~TH_SYN;
1408
                        } else {
1409
                                tp->t_state = TCPS_ESTABLISHED;
1410
                                callout_reset(tp->tt_keep, tcp_keepidle,
1411
                                              tcp_timer_keep, tp);
1412
                        }
1413
                } else {
1414
                /*
1415
                 *  Received initial SYN in SYN-SENT[*] state => simul-
1416
                 *  taneous open.  If segment contains CC option and there is
1417
                 *  a cached CC, apply TAO test; if it succeeds, connection is
1418
                 *  half-synchronized.  Otherwise, do 3-way handshake:
1419
                 *        SYN-SENT -> SYN-RECEIVED
1420
                 *        SYN-SENT* -> SYN-RECEIVED*
1421
                 *  If there was no CC option, clear cached CC value.
1422
                 */
1423
                        tp->t_flags |= TF_ACKNOW;
1424
                        callout_stop(tp->tt_rexmt);
1425
                        if (to.to_flag & TOF_CC) {
1426
                                if (taop->tao_cc != 0 &&
1427
                                    CC_GT(to.to_cc, taop->tao_cc)) {
1428
                                        /*
1429
                                         * update cache and make transition:
1430
                                         *        SYN-SENT -> ESTABLISHED*
1431
                                         *        SYN-SENT* -> FIN-WAIT-1*
1432
                                         */
1433
                                        taop->tao_cc = to.to_cc;
1434
                                        tp->t_starttime = ticks;
1435
                                        if (tp->t_flags & TF_NEEDFIN) {
1436
                                                tp->t_state = TCPS_FIN_WAIT_1;
1437
                                                tp->t_flags &= ~TF_NEEDFIN;
1438
                                        } else {
1439
                                                tp->t_state = TCPS_ESTABLISHED;
1440
                                                callout_reset(tp->tt_keep,
1441
                                                              tcp_keepidle,
1442
                                                              tcp_timer_keep,
1443
                                                              tp);
1444
                                        }
1445
                                        tp->t_flags |= TF_NEEDSYN;
1446
                                } else
1447
                                        tp->t_state = TCPS_SYN_RECEIVED;
1448
                        } else {
1449
                                /* CC.NEW or no option => invalidate cache */
1450
                                taop->tao_cc = 0;
1451
                                tp->t_state = TCPS_SYN_RECEIVED;
1452
                        }
1453
                }
1454
 
1455
trimthenstep6:
1456
                /*
1457
                 * Advance th->th_seq to correspond to first data byte.
1458
                 * If data, trim to stay within window,
1459
                 * dropping FIN if necessary.
1460
                 */
1461
                th->th_seq++;
1462
                if (tlen > tp->rcv_wnd) {
1463
                        todrop = tlen - tp->rcv_wnd;
1464
                        m_adj(m, -todrop);
1465
                        tlen = tp->rcv_wnd;
1466
                        thflags &= ~TH_FIN;
1467
                        tcpstat.tcps_rcvpackafterwin++;
1468
                        tcpstat.tcps_rcvbyteafterwin += todrop;
1469
                }
1470
                tp->snd_wl1 = th->th_seq - 1;
1471
                tp->rcv_up = th->th_seq;
1472
                /*
1473
                 *  Client side of transaction: already sent SYN and data.
1474
                 *  If the remote host used T/TCP to validate the SYN,
1475
                 *  our data will be ACK'd; if so, enter normal data segment
1476
                 *  processing in the middle of step 5, ack processing.
1477
                 *  Otherwise, goto step 6.
1478
                 */
1479
                if (thflags & TH_ACK)
1480
                        goto process_ACK;
1481
                goto step6;
1482
        /*
1483
         * If the state is LAST_ACK or CLOSING or TIME_WAIT:
1484
         *      if segment contains a SYN and CC [not CC.NEW] option:
1485
         *              if state == TIME_WAIT and connection duration > MSL,
1486
         *                  drop packet and send RST;
1487
         *
1488
         *              if SEG.CC > CCrecv then is new SYN, and can implicitly
1489
         *                  ack the FIN (and data) in retransmission queue.
1490
         *                  Complete close and delete TCPCB.  Then reprocess
1491
         *                  segment, hoping to find new TCPCB in LISTEN state;
1492
         *
1493
         *              else must be old SYN; drop it.
1494
         *      else do normal processing.
1495
         */
1496
        case TCPS_LAST_ACK:
1497
        case TCPS_CLOSING:
1498
        case TCPS_TIME_WAIT:
1499
                if ((thflags & TH_SYN) &&
1500
                    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
1501
                        if (tp->t_state == TCPS_TIME_WAIT &&
1502
                                        (ticks - tp->t_starttime) > tcp_msl) {
1503
                                rstreason = BANDLIM_UNLIMITED;
1504
                                goto dropwithreset;
1505
                        }
1506
                        if (CC_GT(to.to_cc, tp->cc_recv)) {
1507
                                tp = tcp_close(tp);
1508
                                goto findpcb;
1509
                        }
1510
                        else
1511
                                goto drop;
1512
                }
1513
                break;  /* continue normal processing */
1514
        }
1515
 
1516
        /*
1517
         * States other than LISTEN or SYN_SENT.
1518
         * First check the RST flag and sequence number since reset segments
1519
         * are exempt from the timestamp and connection count tests.  This
1520
         * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
1521
         * below which allowed reset segments in half the sequence space
1522
         * to fall though and be processed (which gives forged reset
1523
         * segments with a random sequence number a 50 percent chance of
1524
         * killing a connection).
1525
         * Then check timestamp, if present.
1526
         * Then check the connection count, if present.
1527
         * Then check that at least some bytes of segment are within
1528
         * receive window.  If segment begins before rcv_nxt,
1529
         * drop leading data (and SYN); if nothing left, just ack.
1530
         *
1531
         *
1532
         * If the RST bit is set, check the sequence number to see
1533
         * if this is a valid reset segment.
1534
         * RFC 793 page 37:
1535
         *   In all states except SYN-SENT, all reset (RST) segments
1536
         *   are validated by checking their SEQ-fields.  A reset is
1537
         *   valid if its sequence number is in the window.
1538
         * Note: this does not take into account delayed ACKs, so
1539
         *   we should test against last_ack_sent instead of rcv_nxt.
1540
         *   The sequence number in the reset segment is normally an
1541
         *   echo of our outgoing acknowlegement numbers, but some hosts
1542
         *   send a reset with the sequence number at the rightmost edge
1543
         *   of our receive window, and we have to handle this case.
1544
         * If we have multiple segments in flight, the intial reset
1545
         * segment sequence numbers will be to the left of last_ack_sent,
1546
         * but they will eventually catch up.
1547
         * In any case, it never made sense to trim reset segments to
1548
         * fit the receive window since RFC 1122 says:
1549
         *   4.2.2.12  RST Segment: RFC-793 Section 3.4
1550
         *
1551
         *    A TCP SHOULD allow a received RST segment to include data.
1552
         *
1553
         *    DISCUSSION
1554
         *         It has been suggested that a RST segment could contain
1555
         *         ASCII text that encoded and explained the cause of the
1556
         *         RST.  No standard has yet been established for such
1557
         *         data.
1558
         *
1559
         * If the reset segment passes the sequence number test examine
1560
         * the state:
1561
         *    SYN_RECEIVED STATE:
1562
         *      If passive open, return to LISTEN state.
1563
         *      If active open, inform user that connection was refused.
1564
         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
1565
         *      Inform user that connection was reset, and close tcb.
1566
         *    CLOSING, LAST_ACK STATES:
1567
         *      Close the tcb.
1568
         *    TIME_WAIT STATE:
1569
         *      Drop the segment - see Stevens, vol. 2, p. 964 and
1570
         *      RFC 1337.
1571
         */
1572
        if (thflags & TH_RST) {
1573
                if (SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
1574
                    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
1575
                        switch (tp->t_state) {
1576
 
1577
                        case TCPS_SYN_RECEIVED:
1578
                                so->so_error = ECONNREFUSED;
1579
                                goto close;
1580
 
1581
                        case TCPS_ESTABLISHED:
1582
                        case TCPS_FIN_WAIT_1:
1583
                        case TCPS_FIN_WAIT_2:
1584
                        case TCPS_CLOSE_WAIT:
1585
                                so->so_error = ECONNRESET;
1586
                        close:
1587
                                tp->t_state = TCPS_CLOSED;
1588
                                tcpstat.tcps_drops++;
1589
                                tp = tcp_close(tp);
1590
                                break;
1591
 
1592
                        case TCPS_CLOSING:
1593
                        case TCPS_LAST_ACK:
1594
                                tp = tcp_close(tp);
1595
                                break;
1596
 
1597
                        case TCPS_TIME_WAIT:
1598
                                break;
1599
                        }
1600
                }
1601
                goto drop;
1602
        }
1603
 
1604
        /*
1605
         * RFC 1323 PAWS: If we have a timestamp reply on this segment
1606
         * and it's less than ts_recent, drop it.
1607
         */
1608
        if ((to.to_flag & TOF_TS) != 0 && tp->ts_recent &&
1609
            TSTMP_LT(to.to_tsval, tp->ts_recent)) {
1610
 
1611
                /* Check to see if ts_recent is over 24 days old.  */
1612
                if ((int)(ticks - tp->ts_recent_age) > TCP_PAWS_IDLE) {
1613
                        /*
1614
                         * Invalidate ts_recent.  If this segment updates
1615
                         * ts_recent, the age will be reset later and ts_recent
1616
                         * will get a valid value.  If it does not, setting
1617
                         * ts_recent to zero will at least satisfy the
1618
                         * requirement that zero be placed in the timestamp
1619
                         * echo reply when ts_recent isn't valid.  The
1620
                         * age isn't reset until we get a valid ts_recent
1621
                         * because we don't want out-of-order segments to be
1622
                         * dropped when ts_recent is old.
1623
                         */
1624
                        tp->ts_recent = 0;
1625
                } else {
1626
                        tcpstat.tcps_rcvduppack++;
1627
                        tcpstat.tcps_rcvdupbyte += tlen;
1628
                        tcpstat.tcps_pawsdrop++;
1629
                        goto dropafterack;
1630
                }
1631
        }
1632
 
1633
        /*
1634
         * T/TCP mechanism
1635
         *   If T/TCP was negotiated and the segment doesn't have CC,
1636
         *   or if its CC is wrong then drop the segment.
1637
         *   RST segments do not have to comply with this.
1638
         */
1639
        if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
1640
            ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc))
1641
                goto dropafterack;
1642
 
1643
        /*
1644
         * In the SYN-RECEIVED state, validate that the packet belongs to
1645
         * this connection before trimming the data to fit the receive
1646
         * window.  Check the sequence number versus IRS since we know
1647
         * the sequence numbers haven't wrapped.  This is a partial fix
1648
         * for the "LAND" DoS attack.
1649
         */
1650
        if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
1651
                rstreason = BANDLIM_UNLIMITED;
1652
                goto dropwithreset;
1653
        }
1654
 
1655
        todrop = tp->rcv_nxt - th->th_seq;
1656
        if (todrop > 0) {
1657
                if (thflags & TH_SYN) {
1658
                        thflags &= ~TH_SYN;
1659
                        th->th_seq++;
1660
                        if (th->th_urp > 1)
1661
                                th->th_urp--;
1662
                        else
1663
                                thflags &= ~TH_URG;
1664
                        todrop--;
1665
                }
1666
                /*
1667
                 * Following if statement from Stevens, vol. 2, p. 960.
1668
                 */
1669
                if (todrop > tlen
1670
                    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
1671
                        /*
1672
                         * Any valid FIN must be to the left of the window.
1673
                         * At this point the FIN must be a duplicate or out
1674
                         * of sequence; drop it.
1675
                         */
1676
                        thflags &= ~TH_FIN;
1677
 
1678
                        /*
1679
                         * Send an ACK to resynchronize and drop any data.
1680
                         * But keep on processing for RST or ACK.
1681
                         */
1682
                        tp->t_flags |= TF_ACKNOW;
1683
                        todrop = tlen;
1684
                        tcpstat.tcps_rcvduppack++;
1685
                        tcpstat.tcps_rcvdupbyte += todrop;
1686
                } else {
1687
                        tcpstat.tcps_rcvpartduppack++;
1688
                        tcpstat.tcps_rcvpartdupbyte += todrop;
1689
                }
1690
                drop_hdrlen += todrop;  /* drop from the top afterwards */
1691
                th->th_seq += todrop;
1692
                tlen -= todrop;
1693
                if (th->th_urp > todrop)
1694
                        th->th_urp -= todrop;
1695
                else {
1696
                        thflags &= ~TH_URG;
1697
                        th->th_urp = 0;
1698
                }
1699
        }
1700
 
1701
        /*
1702
         * If new data are received on a connection after the
1703
         * user processes are gone, then RST the other end.
1704
         */
1705
        if ((so->so_state & SS_NOFDREF) &&
1706
            tp->t_state > TCPS_CLOSE_WAIT && tlen) {
1707
                tp = tcp_close(tp);
1708
                tcpstat.tcps_rcvafterclose++;
1709
                rstreason = BANDLIM_UNLIMITED;
1710
                goto dropwithreset;
1711
        }
1712
 
1713
        /*
1714
         * If segment ends after window, drop trailing data
1715
         * (and PUSH and FIN); if nothing left, just ACK.
1716
         */
1717
        todrop = (th->th_seq+tlen) - (tp->rcv_nxt+tp->rcv_wnd);
1718
        if (todrop > 0) {
1719
                tcpstat.tcps_rcvpackafterwin++;
1720
                if (todrop >= tlen) {
1721
                        tcpstat.tcps_rcvbyteafterwin += tlen;
1722
                        /*
1723
                         * If a new connection request is received
1724
                         * while in TIME_WAIT, drop the old connection
1725
                         * and start over if the sequence numbers
1726
                         * are above the previous ones.
1727
                         */
1728
                        if (thflags & TH_SYN &&
1729
                            tp->t_state == TCPS_TIME_WAIT &&
1730
                            SEQ_GT(th->th_seq, tp->rcv_nxt)) {
1731
                                iss = tcp_new_isn(tp);
1732
                                tp = tcp_close(tp);
1733
                                goto findpcb;
1734
                        }
1735
                        /*
1736
                         * If window is closed can only take segments at
1737
                         * window edge, and have to drop data and PUSH from
1738
                         * incoming segments.  Continue processing, but
1739
                         * remember to ack.  Otherwise, drop segment
1740
                         * and ack.
1741
                         */
1742
                        if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
1743
                                tp->t_flags |= TF_ACKNOW;
1744
                                tcpstat.tcps_rcvwinprobe++;
1745
                        } else
1746
                                goto dropafterack;
1747
                } else
1748
                        tcpstat.tcps_rcvbyteafterwin += todrop;
1749
                m_adj(m, -todrop);
1750
                tlen -= todrop;
1751
                thflags &= ~(TH_PUSH|TH_FIN);
1752
        }
1753
 
1754
        /*
1755
         * If last ACK falls within this segment's sequence numbers,
1756
         * record its timestamp.
1757
         * NOTE that the test is modified according to the latest
1758
         * proposal of the tcplw@cray.com list (Braden 1993/04/26).
1759
         */
1760
        if ((to.to_flag & TOF_TS) != 0 &&
1761
            SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
1762
                tp->ts_recent_age = ticks;
1763
                tp->ts_recent = to.to_tsval;
1764
        }
1765
 
1766
        /*
1767
         * If a SYN is in the window, then this is an
1768
         * error and we send an RST and drop the connection.
1769
         */
1770
        if (thflags & TH_SYN) {
1771
                tp = tcp_drop(tp, ECONNRESET);
1772
                rstreason = BANDLIM_UNLIMITED;
1773
                goto dropwithreset;
1774
        }
1775
 
1776
        /*
1777
         * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
1778
         * flag is on (half-synchronized state), then queue data for
1779
         * later processing; else drop segment and return.
1780
         */
1781
        if ((thflags & TH_ACK) == 0) {
1782
                if (tp->t_state == TCPS_SYN_RECEIVED ||
1783
                    (tp->t_flags & TF_NEEDSYN))
1784
                        goto step6;
1785
                else
1786
                        goto drop;
1787
        }
1788
 
1789
        /*
1790
         * Ack processing.
1791
         */
1792
        switch (tp->t_state) {
1793
 
1794
        /*
1795
         * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
1796
         * ESTABLISHED state and continue processing.
1797
         * The ACK was checked above.
1798
         */
1799
        case TCPS_SYN_RECEIVED:
1800
 
1801
                tcpstat.tcps_connects++;
1802
                soisconnected(so);
1803
                /* Do window scaling? */
1804
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1805
                        (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1806
                        tp->snd_scale = tp->requested_s_scale;
1807
                        tp->rcv_scale = tp->request_r_scale;
1808
                }
1809
                /*
1810
                 * Upon successful completion of 3-way handshake,
1811
                 * update cache.CC if it was undefined, pass any queued
1812
                 * data to the user, and advance state appropriately.
1813
                 */
1814
                if ((taop = tcp_gettaocache(inp)) != NULL &&
1815
                    taop->tao_cc == 0)
1816
                        taop->tao_cc = tp->cc_recv;
1817
 
1818
                /*
1819
                 * Make transitions:
1820
                 *      SYN-RECEIVED  -> ESTABLISHED
1821
                 *      SYN-RECEIVED* -> FIN-WAIT-1
1822
                 */
1823
                tp->t_starttime = ticks;
1824
                if (tp->t_flags & TF_NEEDFIN) {
1825
                        tp->t_state = TCPS_FIN_WAIT_1;
1826
                        tp->t_flags &= ~TF_NEEDFIN;
1827
                } else {
1828
                        tp->t_state = TCPS_ESTABLISHED;
1829
                        callout_reset(tp->tt_keep, tcp_keepidle,
1830
                                      tcp_timer_keep, tp);
1831
                }
1832
                /*
1833
                 * If segment contains data or ACK, will call tcp_reass()
1834
                 * later; if not, do so now to pass queued data to user.
1835
                 */
1836
                if (tlen == 0 && (thflags & TH_FIN) == 0)
1837
                        (void) tcp_reass(tp, (struct tcphdr *)0, 0,
1838
                            (struct mbuf *)0);
1839
                tp->snd_wl1 = th->th_seq - 1;
1840
                /* fall into ... */
1841
 
1842
        /*
1843
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1844
         * ACKs.  If the ack is in the range
1845
         *      tp->snd_una < th->th_ack <= tp->snd_max
1846
         * then advance tp->snd_una to th->th_ack and drop
1847
         * data from the retransmission queue.  If this ACK reflects
1848
         * more up to date window information we update our window information.
1849
         */
1850
        case TCPS_ESTABLISHED:
1851
        case TCPS_FIN_WAIT_1:
1852
        case TCPS_FIN_WAIT_2:
1853
        case TCPS_CLOSE_WAIT:
1854
        case TCPS_CLOSING:
1855
        case TCPS_LAST_ACK:
1856
        case TCPS_TIME_WAIT:
1857
 
1858
                if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
1859
                        if (tlen == 0 && tiwin == tp->snd_wnd) {
1860
                                tcpstat.tcps_rcvdupack++;
1861
                                /*
1862
                                 * If we have outstanding data (other than
1863
                                 * a window probe), this is a completely
1864
                                 * duplicate ack (ie, window info didn't
1865
                                 * change), the ack is the biggest we've
1866
                                 * seen and we've seen exactly our rexmt
1867
                                 * threshhold of them, assume a packet
1868
                                 * has been dropped and retransmit it.
1869
                                 * Kludge snd_nxt & the congestion
1870
                                 * window so we send only this one
1871
                                 * packet.
1872
                                 *
1873
                                 * We know we're losing at the current
1874
                                 * window size so do congestion avoidance
1875
                                 * (set ssthresh to half the current window
1876
                                 * and pull our congestion window back to
1877
                                 * the new ssthresh).
1878
                                 *
1879
                                 * Dup acks mean that packets have left the
1880
                                 * network (they're now cached at the receiver)
1881
                                 * so bump cwnd by the amount in the receiver
1882
                                 * to keep a constant cwnd packets in the
1883
                                 * network.
1884
                                 */
1885
                                if (!callout_active(tp->tt_rexmt) ||
1886
                                    th->th_ack != tp->snd_una)
1887
                                        tp->t_dupacks = 0;
1888
                                else if (++tp->t_dupacks == tcprexmtthresh) {
1889
                                        tcp_seq onxt = tp->snd_nxt;
1890
                                        u_int win =
1891
                                            min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1892
                                                tp->t_maxseg;
1893
                                        if (tcp_do_newreno && SEQ_LT(th->th_ack,
1894
                                            tp->snd_recover)) {
1895
                                                /* False retransmit, should not
1896
                                                 * cut window
1897
                                                 */
1898
                                                tp->snd_cwnd += tp->t_maxseg;
1899
                                                tp->t_dupacks = 0;
1900
                                                (void) tcp_output(tp);
1901
                                                goto drop;
1902
                                        }
1903
                                        if (win < 2)
1904
                                                win = 2;
1905
                                        tp->snd_ssthresh = win * tp->t_maxseg;
1906
                                        tp->snd_recover = tp->snd_max;
1907
                                        callout_stop(tp->tt_rexmt);
1908
                                        tp->t_rtttime = 0;
1909
                                        tp->snd_nxt = th->th_ack;
1910
                                        tp->snd_cwnd = tp->t_maxseg;
1911
                                        (void) tcp_output(tp);
1912
                                        tp->snd_cwnd = tp->snd_ssthresh +
1913
                                               tp->t_maxseg * tp->t_dupacks;
1914
                                        if (SEQ_GT(onxt, tp->snd_nxt))
1915
                                                tp->snd_nxt = onxt;
1916
                                        goto drop;
1917
                                } else if (tp->t_dupacks > tcprexmtthresh) {
1918
                                        tp->snd_cwnd += tp->t_maxseg;
1919
                                        (void) tcp_output(tp);
1920
                                        goto drop;
1921
                                }
1922
                        } else
1923
                                tp->t_dupacks = 0;
1924
                        break;
1925
                }
1926
                /*
1927
                 * If the congestion window was inflated to account
1928
                 * for the other side's cached packets, retract it.
1929
                 */
1930
                if (tcp_do_newreno == 0) {
1931
                        if (tp->t_dupacks >= tcprexmtthresh &&
1932
                                tp->snd_cwnd > tp->snd_ssthresh)
1933
                                tp->snd_cwnd = tp->snd_ssthresh;
1934
                        tp->t_dupacks = 0;
1935
                } else if (tp->t_dupacks >= tcprexmtthresh &&
1936
                    !tcp_newreno(tp, th)) {
1937
                        /*
1938
                         * Window inflation should have left us with approx.
1939
                         * snd_ssthresh outstanding data.  But in case we
1940
                         * would be inclined to send a burst, better to do
1941
                         * it via the slow start mechanism.
1942
                         */
1943
                        if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
1944
                                tp->snd_cwnd =
1945
                                    tp->snd_max - th->th_ack + tp->t_maxseg;
1946
                        else
1947
                                tp->snd_cwnd = tp->snd_ssthresh;
1948
                        tp->t_dupacks = 0;
1949
                }
1950
                if (SEQ_GT(th->th_ack, tp->snd_max)) {
1951
                        tcpstat.tcps_rcvacktoomuch++;
1952
                        goto dropafterack;
1953
                }
1954
                /*
1955
                 *  If we reach this point, ACK is not a duplicate,
1956
                 *     i.e., it ACKs something we sent.
1957
                 */
1958
                if (tp->t_flags & TF_NEEDSYN) {
1959
                        /*
1960
                         * T/TCP: Connection was half-synchronized, and our
1961
                         * SYN has been ACK'd (so connection is now fully
1962
                         * synchronized).  Go to non-starred state,
1963
                         * increment snd_una for ACK of SYN, and check if
1964
                         * we can do window scaling.
1965
                         */
1966
                        tp->t_flags &= ~TF_NEEDSYN;
1967
                        tp->snd_una++;
1968
                        /* Do window scaling? */
1969
                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1970
                                (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1971
                                tp->snd_scale = tp->requested_s_scale;
1972
                                tp->rcv_scale = tp->request_r_scale;
1973
                        }
1974
                }
1975
 
1976
process_ACK:
1977
                acked = th->th_ack - tp->snd_una;
1978
                tcpstat.tcps_rcvackpack++;
1979
                tcpstat.tcps_rcvackbyte += acked;
1980
 
1981
                /*
1982
                 * If we just performed our first retransmit, and the ACK
1983
                 * arrives within our recovery window, then it was a mistake
1984
                 * to do the retransmit in the first place.  Recover our
1985
                 * original cwnd and ssthresh, and proceed to transmit where
1986
                 * we left off.
1987
                 */
1988
                if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
1989
                        tp->snd_cwnd = tp->snd_cwnd_prev;
1990
                        tp->snd_ssthresh = tp->snd_ssthresh_prev;
1991
                        tp->snd_nxt = tp->snd_max;
1992
                        tp->t_badrxtwin = 0;     /* XXX probably not required */
1993
                }
1994
 
1995
                /*
1996
                 * If we have a timestamp reply, update smoothed
1997
                 * round trip time.  If no timestamp is present but
1998
                 * transmit timer is running and timed sequence
1999
                 * number was acked, update smoothed round trip time.
2000
                 * Since we now have an rtt measurement, cancel the
2001
                 * timer backoff (cf., Phil Karn's retransmit alg.).
2002
                 * Recompute the initial retransmit timer.
2003
                 */
2004
                if (to.to_flag & TOF_TS)
2005
                        tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
2006
                else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
2007
                        tcp_xmit_timer(tp, ticks - tp->t_rtttime);
2008
 
2009
                /*
2010
                 * If all outstanding data is acked, stop retransmit
2011
                 * timer and remember to restart (more output or persist).
2012
                 * If there is more data to be acked, restart retransmit
2013
                 * timer, using current (possibly backed-off) value.
2014
                 */
2015
                if (th->th_ack == tp->snd_max) {
2016
                        callout_stop(tp->tt_rexmt);
2017
                        needoutput = 1;
2018
                } else if (!callout_active(tp->tt_persist))
2019
                        callout_reset(tp->tt_rexmt, tp->t_rxtcur,
2020
                                      tcp_timer_rexmt, tp);
2021
 
2022
                /*
2023
                 * If no data (only SYN) was ACK'd,
2024
                 *    skip rest of ACK processing.
2025
                 */
2026
                if (acked == 0)
2027
                        goto step6;
2028
 
2029
                /*
2030
                 * When new data is acked, open the congestion window.
2031
                 * If the window gives us less than ssthresh packets
2032
                 * in flight, open exponentially (maxseg per packet).
2033
                 * Otherwise open linearly: maxseg per window
2034
                 * (maxseg^2 / cwnd per packet).
2035
                 */
2036
                {
2037
                register u_int cw = tp->snd_cwnd;
2038
                register u_int incr = tp->t_maxseg;
2039
 
2040
                if (cw > tp->snd_ssthresh)
2041
                        incr = incr * incr / cw;
2042
                /*
2043
                 * If t_dupacks != 0 here, it indicates that we are still
2044
                 * in NewReno fast recovery mode, so we leave the congestion
2045
                 * window alone.
2046
                 */
2047
                if (tcp_do_newreno == 0 || tp->t_dupacks == 0)
2048
                        tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
2049
                }
2050
                if (acked > so->so_snd.sb_cc) {
2051
                        tp->snd_wnd -= so->so_snd.sb_cc;
2052
                        sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
2053
                        ourfinisacked = 1;
2054
                } else {
2055
                        sbdrop(&so->so_snd, acked);
2056
                        tp->snd_wnd -= acked;
2057
                        ourfinisacked = 0;
2058
                }
2059
                sowwakeup(so);
2060
                tp->snd_una = th->th_ack;
2061
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
2062
                        tp->snd_nxt = tp->snd_una;
2063
 
2064
                switch (tp->t_state) {
2065
 
2066
                /*
2067
                 * In FIN_WAIT_1 STATE in addition to the processing
2068
                 * for the ESTABLISHED state if our FIN is now acknowledged
2069
                 * then enter FIN_WAIT_2.
2070
                 */
2071
                case TCPS_FIN_WAIT_1:
2072
                        if (ourfinisacked) {
2073
                                /*
2074
                                 * If we can't receive any more
2075
                                 * data, then closing user can proceed.
2076
                                 * Starting the timer is contrary to the
2077
                                 * specification, but if we don't get a FIN
2078
                                 * we'll hang forever.
2079
                                 */
2080
                                if (so->so_state & SS_CANTRCVMORE) {
2081
                                        soisdisconnected(so);
2082
                                        callout_reset(tp->tt_2msl, tcp_maxidle,
2083
                                                      tcp_timer_2msl, tp);
2084
                                }
2085
                                tp->t_state = TCPS_FIN_WAIT_2;
2086
                        }
2087
                        break;
2088
 
2089
                /*
2090
                 * In CLOSING STATE in addition to the processing for
2091
                 * the ESTABLISHED state if the ACK acknowledges our FIN
2092
                 * then enter the TIME-WAIT state, otherwise ignore
2093
                 * the segment.
2094
                 */
2095
                case TCPS_CLOSING:
2096
                        if (ourfinisacked) {
2097
                                tp->t_state = TCPS_TIME_WAIT;
2098
                                tcp_canceltimers(tp);
2099
                                /* Shorten TIME_WAIT [RFC-1644, p.28] */
2100
                                if (tp->cc_recv != 0 &&
2101
                                    (ticks - tp->t_starttime) < tcp_msl) {
2102
                                        callout_reset(tp->tt_2msl,
2103
                                                      tp->t_rxtcur *
2104
                                                      TCPTV_TWTRUNC,
2105
                                                      tcp_timer_2msl, tp);
2106
                                }
2107
                                else {
2108
                                        callout_reset(tp->tt_2msl, 2 * tcp_msl,
2109
                                                      tcp_timer_2msl, tp);
2110
                                }
2111
                                soisdisconnected(so);
2112
                        }
2113
                        break;
2114
 
2115
                /*
2116
                 * In LAST_ACK, we may still be waiting for data to drain
2117
                 * and/or to be acked, as well as for the ack of our FIN.
2118
                 * If our FIN is now acknowledged, delete the TCB,
2119
                 * enter the closed state and return.
2120
                 */
2121
                case TCPS_LAST_ACK:
2122
                        if (ourfinisacked) {
2123
                                tp = tcp_close(tp);
2124
                                goto drop;
2125
                        }
2126
                        break;
2127
 
2128
                /*
2129
                 * In TIME_WAIT state the only thing that should arrive
2130
                 * is a retransmission of the remote FIN.  Acknowledge
2131
                 * it and restart the finack timer.
2132
                 */
2133
                case TCPS_TIME_WAIT:
2134
                        callout_reset(tp->tt_2msl, 2 * tcp_msl,
2135
                                      tcp_timer_2msl, tp);
2136
                        goto dropafterack;
2137
                }
2138
        }
2139
 
2140
step6:
2141
        /*
2142
         * Update window information.
2143
         * Don't look at window if no ACK: TAC's send garbage on first SYN.
2144
         */
2145
        if ((thflags & TH_ACK) &&
2146
            (SEQ_LT(tp->snd_wl1, th->th_seq) ||
2147
            (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
2148
             (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
2149
                /* keep track of pure window updates */
2150
                if (tlen == 0 &&
2151
                    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
2152
                        tcpstat.tcps_rcvwinupd++;
2153
                tp->snd_wnd = tiwin;
2154
                tp->snd_wl1 = th->th_seq;
2155
                tp->snd_wl2 = th->th_ack;
2156
                if (tp->snd_wnd > tp->max_sndwnd)
2157
                        tp->max_sndwnd = tp->snd_wnd;
2158
                needoutput = 1;
2159
        }
2160
 
2161
        /*
2162
         * Process segments with URG.
2163
         */
2164
        if ((thflags & TH_URG) && th->th_urp &&
2165
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2166
                /*
2167
                 * This is a kludge, but if we receive and accept
2168
                 * random urgent pointers, we'll crash in
2169
                 * soreceive.  It's hard to imagine someone
2170
                 * actually wanting to send this much urgent data.
2171
                 */
2172
                if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
2173
                        th->th_urp = 0;                  /* XXX */
2174
                        thflags &= ~TH_URG;             /* XXX */
2175
                        goto dodata;                    /* XXX */
2176
                }
2177
                /*
2178
                 * If this segment advances the known urgent pointer,
2179
                 * then mark the data stream.  This should not happen
2180
                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
2181
                 * a FIN has been received from the remote side.
2182
                 * In these states we ignore the URG.
2183
                 *
2184
                 * According to RFC961 (Assigned Protocols),
2185
                 * the urgent pointer points to the last octet
2186
                 * of urgent data.  We continue, however,
2187
                 * to consider it to indicate the first octet
2188
                 * of data past the urgent section as the original
2189
                 * spec states (in one of two places).
2190
                 */
2191
                if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
2192
                        tp->rcv_up = th->th_seq + th->th_urp;
2193
                        so->so_oobmark = so->so_rcv.sb_cc +
2194
                            (tp->rcv_up - tp->rcv_nxt) - 1;
2195
                        if (so->so_oobmark == 0)
2196
                                so->so_state |= SS_RCVATMARK;
2197
                        sohasoutofband(so);
2198
                        tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
2199
                }
2200
                /*
2201
                 * Remove out of band data so doesn't get presented to user.
2202
                 * This can happen independent of advancing the URG pointer,
2203
                 * but if two URG's are pending at once, some out-of-band
2204
                 * data may creep in... ick.
2205
                 */
2206
                if (th->th_urp <= (u_long)tlen
2207
#ifdef SO_OOBINLINE
2208
                     && (so->so_options & SO_OOBINLINE) == 0
2209
#endif
2210
                     )
2211
                        tcp_pulloutofband(so, th, m,
2212
                                drop_hdrlen);   /* hdr drop is delayed */
2213
        } else
2214
                /*
2215
                 * If no out of band data is expected,
2216
                 * pull receive urgent pointer along
2217
                 * with the receive window.
2218
                 */
2219
                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
2220
                        tp->rcv_up = tp->rcv_nxt;
2221
dodata:                                                 /* XXX */
2222
 
2223
        /*
2224
         * Process the segment text, merging it into the TCP sequencing queue,
2225
         * and arranging for acknowledgment of receipt if necessary.
2226
         * This process logically involves adjusting tp->rcv_wnd as data
2227
         * is presented to the user (this happens in tcp_usrreq.c,
2228
         * case PRU_RCVD).  If a FIN has already been received on this
2229
         * connection then we just ignore the text.
2230
         */
2231
        if ((tlen || (thflags&TH_FIN)) &&
2232
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2233
                m_adj(m, drop_hdrlen);  /* delayed header drop */
2234
                /*
2235
                 * Insert segment which inludes th into reassembly queue of tcp with
2236
                 * control block tp.  Return TH_FIN if reassembly now includes
2237
                 * a segment with FIN.  This handle the common case inline (segment
2238
                 * is the next to be received on an established connection, and the
2239
                 * queue is empty), avoiding linkage into and removal from the queue
2240
                 * and repetition of various conversions.
2241
                 * Set DELACK for segments received in order, but ack immediately
2242
                 * when segments are out of order (so fast retransmit can work).
2243
                 */
2244
                if (th->th_seq == tp->rcv_nxt &&
2245
                    LIST_EMPTY(&tp->t_segq) &&
2246
                    TCPS_HAVEESTABLISHED(tp->t_state)) {
2247
                        if (DELAY_ACK(tp))
2248
                                callout_reset(tp->tt_delack, tcp_delacktime,
2249
                                    tcp_timer_delack, tp);
2250
                        else
2251
                                tp->t_flags |= TF_ACKNOW;
2252
                        tp->rcv_nxt += tlen;
2253
                        thflags = th->th_flags & TH_FIN;
2254
                        tcpstat.tcps_rcvpack++;
2255
                        tcpstat.tcps_rcvbyte += tlen;
2256
                        ND6_HINT(tp);
2257
                        sbappend(&so->so_rcv, m);
2258
                        sorwakeup(so);
2259
                } else {
2260
                        thflags = tcp_reass(tp, th, &tlen, m);
2261
                        tp->t_flags |= TF_ACKNOW;
2262
                }
2263
 
2264
                /*
2265
                 * Note the amount of data that peer has sent into
2266
                 * our window, in order to estimate the sender's
2267
                 * buffer size.
2268
                 */
2269
                len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
2270
        } else {
2271
                m_freem(m);
2272
                thflags &= ~TH_FIN;
2273
        }
2274
 
2275
        /*
2276
         * If FIN is received ACK the FIN and let the user know
2277
         * that the connection is closing.
2278
         */
2279
        if (thflags & TH_FIN) {
2280
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2281
                        socantrcvmore(so);
2282
                        /*
2283
                         *  If connection is half-synchronized
2284
                         *  (ie NEEDSYN flag on) then delay ACK,
2285
                         *  so it may be piggybacked when SYN is sent.
2286
                         *  Otherwise, since we received a FIN then no
2287
                         *  more input can be expected, send ACK now.
2288
                         */
2289
                        if (DELAY_ACK(tp) && (tp->t_flags & TF_NEEDSYN))
2290
                                callout_reset(tp->tt_delack, tcp_delacktime,
2291
                                    tcp_timer_delack, tp);
2292
                        else
2293
                                tp->t_flags |= TF_ACKNOW;
2294
                        tp->rcv_nxt++;
2295
                }
2296
                switch (tp->t_state) {
2297
 
2298
                /*
2299
                 * In SYN_RECEIVED and ESTABLISHED STATES
2300
                 * enter the CLOSE_WAIT state.
2301
                 */
2302
                case TCPS_SYN_RECEIVED:
2303
                        tp->t_starttime = ticks;
2304
                        /*FALLTHROUGH*/
2305
                case TCPS_ESTABLISHED:
2306
                        tp->t_state = TCPS_CLOSE_WAIT;
2307
                        break;
2308
 
2309
                /*
2310
                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
2311
                 * enter the CLOSING state.
2312
                 */
2313
                case TCPS_FIN_WAIT_1:
2314
                        tp->t_state = TCPS_CLOSING;
2315
                        break;
2316
 
2317
                /*
2318
                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
2319
                 * starting the time-wait timer, turning off the other
2320
                 * standard timers.
2321
                 */
2322
                case TCPS_FIN_WAIT_2:
2323
                        tp->t_state = TCPS_TIME_WAIT;
2324
                        tcp_canceltimers(tp);
2325
                        /* Shorten TIME_WAIT [RFC-1644, p.28] */
2326
                        if (tp->cc_recv != 0 &&
2327
                            (ticks - tp->t_starttime) < tcp_msl) {
2328
                                callout_reset(tp->tt_2msl,
2329
                                              tp->t_rxtcur * TCPTV_TWTRUNC,
2330
                                              tcp_timer_2msl, tp);
2331
                                /* For transaction client, force ACK now. */
2332
                                tp->t_flags |= TF_ACKNOW;
2333
                        }
2334
                        else {
2335
                                callout_reset(tp->tt_2msl, 2 * tcp_msl,
2336
                                              tcp_timer_2msl, tp);
2337
                        }
2338
                        soisdisconnected(so);
2339
                        break;
2340
 
2341
                /*
2342
                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
2343
                 */
2344
                case TCPS_TIME_WAIT:
2345
                        callout_reset(tp->tt_2msl, 2 * tcp_msl,
2346
                                      tcp_timer_2msl, tp);
2347
                        break;
2348
                }
2349
        }
2350
#ifdef TCPDEBUG
2351
        if (so->so_options & SO_DEBUG)
2352
                tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
2353
                          &tcp_savetcp, 0);
2354
#endif
2355
 
2356
        /*
2357
         * Return any desired output.
2358
         */
2359
        if (needoutput || (tp->t_flags & TF_ACKNOW))
2360
                (void) tcp_output(tp);
2361
        return;
2362
 
2363
dropafterack:
2364
        /*
2365
         * Generate an ACK dropping incoming segment if it occupies
2366
         * sequence space, where the ACK reflects our state.
2367
         *
2368
         * We can now skip the test for the RST flag since all
2369
         * paths to this code happen after packets containing
2370
         * RST have been dropped.
2371
         *
2372
         * In the SYN-RECEIVED state, don't send an ACK unless the
2373
         * segment we received passes the SYN-RECEIVED ACK test.
2374
         * If it fails send a RST.  This breaks the loop in the
2375
         * "LAND" DoS attack, and also prevents an ACK storm
2376
         * between two listening ports that have been sent forged
2377
         * SYN segments, each with the source address of the other.
2378
         */
2379
        if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
2380
            (SEQ_GT(tp->snd_una, th->th_ack) ||
2381
             SEQ_GT(th->th_ack, tp->snd_max)) ) {
2382
                rstreason = BANDLIM_RST_OPENPORT;
2383
                goto dropwithreset;
2384
        }
2385
#ifdef TCPDEBUG
2386
        if (so->so_options & SO_DEBUG)
2387
                tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
2388
                          &tcp_savetcp, 0);
2389
#endif
2390
        m_freem(m);
2391
        tp->t_flags |= TF_ACKNOW;
2392
        (void) tcp_output(tp);
2393
        return;
2394
 
2395
dropwithreset:
2396
        /*
2397
         * Generate a RST, dropping incoming segment.
2398
         * Make ACK acceptable to originator of segment.
2399
         * Don't bother to respond if destination was broadcast/multicast.
2400
         */
2401
        if ((thflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
2402
                goto drop;
2403
#ifdef INET6
2404
        if (isipv6) {
2405
                if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
2406
                    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
2407
                        goto drop;
2408
        } else
2409
#endif /* INET6 */
2410
        if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
2411
            IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
2412
            ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
2413
                goto drop;
2414
        /* IPv6 anycast check is done at tcp6_input() */
2415
 
2416
        /*
2417
         * Perform bandwidth limiting.
2418
         */
2419
#ifdef ICMP_BANDLIM
2420
        if (badport_bandlim(rstreason) < 0)
2421
                goto drop;
2422
#endif
2423
 
2424
#ifdef TCPDEBUG
2425
        if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
2426
                tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
2427
                          &tcp_savetcp, 0);
2428
#endif
2429
        if (thflags & TH_ACK)
2430
                /* mtod() below is safe as long as hdr dropping is delayed */
2431
                tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
2432
                            TH_RST);
2433
        else {
2434
                if (thflags & TH_SYN)
2435
                        tlen++;
2436
                /* mtod() below is safe as long as hdr dropping is delayed */
2437
                tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
2438
                            (tcp_seq)0, TH_RST|TH_ACK);
2439
        }
2440
        /* destroy temporarily created socket */
2441
        if (dropsocket)
2442
                (void) soabort(so);
2443
        return;
2444
 
2445
drop:
2446
        /*
2447
         * Drop space held by incoming segment and return.
2448
         */
2449
#ifdef TCPDEBUG
2450
        if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
2451
                tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
2452
                          &tcp_savetcp, 0);
2453
#endif
2454
        m_freem(m);
2455
        /* destroy temporarily created socket */
2456
        if (dropsocket)
2457
                (void) soabort(so);
2458
        return;
2459
}
2460
 
2461
static void
2462
tcp_dooptions(tp, cp, cnt, th, to)
2463
        struct tcpcb *tp;
2464
        u_char *cp;
2465
        int cnt;
2466
        struct tcphdr *th;
2467
        struct tcpopt *to;
2468
{
2469
        u_short mss = 0;
2470
        int opt, optlen;
2471
 
2472
        for (; cnt > 0; cnt -= optlen, cp += optlen) {
2473
                opt = cp[0];
2474
                if (opt == TCPOPT_EOL)
2475
                        break;
2476
                if (opt == TCPOPT_NOP)
2477
                        optlen = 1;
2478
                else {
2479
                        if (cnt < 2)
2480
                                break;
2481
                        optlen = cp[1];
2482
                        if (optlen < 2 || optlen > cnt)
2483
                                break;
2484
                }
2485
                switch (opt) {
2486
 
2487
                default:
2488
                        continue;
2489
 
2490
                case TCPOPT_MAXSEG:
2491
                        if (optlen != TCPOLEN_MAXSEG)
2492
                                continue;
2493
                        if (!(th->th_flags & TH_SYN))
2494
                                continue;
2495
                        bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
2496
                        NTOHS(mss);
2497
                        break;
2498
 
2499
                case TCPOPT_WINDOW:
2500
                        if (optlen != TCPOLEN_WINDOW)
2501
                                continue;
2502
                        if (!(th->th_flags & TH_SYN))
2503
                                continue;
2504
                        tp->t_flags |= TF_RCVD_SCALE;
2505
                        tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
2506
                        break;
2507
 
2508
                case TCPOPT_TIMESTAMP:
2509
                        if (optlen != TCPOLEN_TIMESTAMP)
2510
                                continue;
2511
                        to->to_flag |= TOF_TS;
2512
                        bcopy((char *)cp + 2,
2513
                            (char *)&to->to_tsval, sizeof(to->to_tsval));
2514
                        NTOHL(to->to_tsval);
2515
                        bcopy((char *)cp + 6,
2516
                            (char *)&to->to_tsecr, sizeof(to->to_tsecr));
2517
                        NTOHL(to->to_tsecr);
2518
 
2519
                        /*
2520
                         * A timestamp received in a SYN makes
2521
                         * it ok to send timestamp requests and replies.
2522
                         */
2523
                        if (th->th_flags & TH_SYN) {
2524
                                tp->t_flags |= TF_RCVD_TSTMP;
2525
                                tp->ts_recent = to->to_tsval;
2526
                                tp->ts_recent_age = ticks;
2527
                        }
2528
                        break;
2529
                case TCPOPT_CC:
2530
                        if (optlen != TCPOLEN_CC)
2531
                                continue;
2532
                        to->to_flag |= TOF_CC;
2533
                        bcopy((char *)cp + 2,
2534
                            (char *)&to->to_cc, sizeof(to->to_cc));
2535
                        NTOHL(to->to_cc);
2536
                        /*
2537
                         * A CC or CC.new option received in a SYN makes
2538
                         * it ok to send CC in subsequent segments.
2539
                         */
2540
                        if (th->th_flags & TH_SYN)
2541
                                tp->t_flags |= TF_RCVD_CC;
2542
                        break;
2543
                case TCPOPT_CCNEW:
2544
                        if (optlen != TCPOLEN_CC)
2545
                                continue;
2546
                        if (!(th->th_flags & TH_SYN))
2547
                                continue;
2548
                        to->to_flag |= TOF_CCNEW;
2549
                        bcopy((char *)cp + 2,
2550
                            (char *)&to->to_cc, sizeof(to->to_cc));
2551
                        NTOHL(to->to_cc);
2552
                        /*
2553
                         * A CC or CC.new option received in a SYN makes
2554
                         * it ok to send CC in subsequent segments.
2555
                         */
2556
                        tp->t_flags |= TF_RCVD_CC;
2557
                        break;
2558
                case TCPOPT_CCECHO:
2559
                        if (optlen != TCPOLEN_CC)
2560
                                continue;
2561
                        if (!(th->th_flags & TH_SYN))
2562
                                continue;
2563
                        to->to_flag |= TOF_CCECHO;
2564
                        bcopy((char *)cp + 2,
2565
                            (char *)&to->to_ccecho, sizeof(to->to_ccecho));
2566
                        NTOHL(to->to_ccecho);
2567
                        break;
2568
                }
2569
        }
2570
        if (th->th_flags & TH_SYN)
2571
                tcp_mss(tp, mss);       /* sets t_maxseg */
2572
}
2573
 
2574
/*
2575
 * Pull out of band byte out of a segment so
2576
 * it doesn't appear in the user's data queue.
2577
 * It is still reflected in the segment length for
2578
 * sequencing purposes.
2579
 */
2580
static void
2581
tcp_pulloutofband(so, th, m, off)
2582
        struct socket *so;
2583
        struct tcphdr *th;
2584
        register struct mbuf *m;
2585
        int off;                /* delayed to be droped hdrlen */
2586
{
2587
        int cnt = off + th->th_urp - 1;
2588
 
2589
        while (cnt >= 0) {
2590
                if (m->m_len > cnt) {
2591
                        char *cp = mtod(m, caddr_t) + cnt;
2592
                        struct tcpcb *tp = sototcpcb(so);
2593
 
2594
                        tp->t_iobc = *cp;
2595
                        tp->t_oobflags |= TCPOOB_HAVEDATA;
2596
                        bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
2597
                        m->m_len--;
2598
                        if (m->m_flags & M_PKTHDR)
2599
                                m->m_pkthdr.len--;
2600
                        return;
2601
                }
2602
                cnt -= m->m_len;
2603
                m = m->m_next;
2604
                if (m == 0)
2605
                        break;
2606
        }
2607
        panic("tcp_pulloutofband");
2608
}
2609
 
2610
/*
2611
 * Collect new round-trip time estimate
2612
 * and update averages and current timeout.
2613
 */
2614
static void
2615
tcp_xmit_timer(tp, rtt)
2616
        register struct tcpcb *tp;
2617
        int rtt;
2618
{
2619
        register int delta;
2620
 
2621
        tcpstat.tcps_rttupdated++;
2622
        tp->t_rttupdated++;
2623
        if (tp->t_srtt != 0) {
2624
                /*
2625
                 * srtt is stored as fixed point with 5 bits after the
2626
                 * binary point (i.e., scaled by 8).  The following magic
2627
                 * is equivalent to the smoothing algorithm in rfc793 with
2628
                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
2629
                 * point).  Adjust rtt to origin 0.
2630
                 */
2631
                delta = ((rtt - 1) << TCP_DELTA_SHIFT)
2632
                        - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
2633
 
2634
                if ((tp->t_srtt += delta) <= 0)
2635
                        tp->t_srtt = 1;
2636
 
2637
                /*
2638
                 * We accumulate a smoothed rtt variance (actually, a
2639
                 * smoothed mean difference), then set the retransmit
2640
                 * timer to smoothed rtt + 4 times the smoothed variance.
2641
                 * rttvar is stored as fixed point with 4 bits after the
2642
                 * binary point (scaled by 16).  The following is
2643
                 * equivalent to rfc793 smoothing with an alpha of .75
2644
                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
2645
                 * rfc793's wired-in beta.
2646
                 */
2647
                if (delta < 0)
2648
                        delta = -delta;
2649
                delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
2650
                if ((tp->t_rttvar += delta) <= 0)
2651
                        tp->t_rttvar = 1;
2652
        } else {
2653
                /*
2654
                 * No rtt measurement yet - use the unsmoothed rtt.
2655
                 * Set the variance to half the rtt (so our first
2656
                 * retransmit happens at 3*rtt).
2657
                 */
2658
                tp->t_srtt = rtt << TCP_RTT_SHIFT;
2659
                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
2660
        }
2661
        tp->t_rtttime = 0;
2662
        tp->t_rxtshift = 0;
2663
 
2664
        /*
2665
         * the retransmit should happen at rtt + 4 * rttvar.
2666
         * Because of the way we do the smoothing, srtt and rttvar
2667
         * will each average +1/2 tick of bias.  When we compute
2668
         * the retransmit timer, we want 1/2 tick of rounding and
2669
         * 1 extra tick because of +-1/2 tick uncertainty in the
2670
         * firing of the timer.  The bias will give us exactly the
2671
         * 1.5 tick we need.  But, because the bias is
2672
         * statistical, we have to test that we don't drop below
2673
         * the minimum feasible timer (which is 2 ticks).
2674
         */
2675
        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
2676
                      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
2677
 
2678
        /*
2679
         * We received an ack for a packet that wasn't retransmitted;
2680
         * it is probably safe to discard any error indications we've
2681
         * received recently.  This isn't quite right, but close enough
2682
         * for now (a route might have failed after we sent a segment,
2683
         * and the return path might not be symmetrical).
2684
         */
2685
        tp->t_softerror = 0;
2686
}
2687
 
2688
/*
2689
 * Determine a reasonable value for maxseg size.
2690
 * If the route is known, check route for mtu.
2691
 * If none, use an mss that can be handled on the outgoing
2692
 * interface without forcing IP to fragment; if bigger than
2693
 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
2694
 * to utilize large mbufs.  If no route is found, route has no mtu,
2695
 * or the destination isn't local, use a default, hopefully conservative
2696
 * size (usually 512 or the default IP max size, but no more than the mtu
2697
 * of the interface), as we can't discover anything about intervening
2698
 * gateways or networks.  We also initialize the congestion/slow start
2699
 * window to be a single segment if the destination isn't local.
2700
 * While looking at the routing entry, we also initialize other path-dependent
2701
 * parameters from pre-set or cached values in the routing entry.
2702
 *
2703
 * Also take into account the space needed for options that we
2704
 * send regularly.  Make maxseg shorter by that amount to assure
2705
 * that we can send maxseg amount of data even when the options
2706
 * are present.  Store the upper limit of the length of options plus
2707
 * data in maxopd.
2708
 *
2709
 * NOTE that this routine is only called when we process an incoming
2710
 * segment, for outgoing segments only tcp_mssopt is called.
2711
 *
2712
 * In case of T/TCP, we call this routine during implicit connection
2713
 * setup as well (offer = -1), to initialize maxseg from the cached
2714
 * MSS of our peer.
2715
 */
2716
void
2717
tcp_mss(tp, offer)
2718
        struct tcpcb *tp;
2719
        int offer;
2720
{
2721
        register struct rtentry *rt;
2722
        struct ifnet *ifp;
2723
        register int rtt, mss;
2724
        u_long bufsize;
2725
        struct inpcb *inp;
2726
        struct socket *so;
2727
        struct rmxp_tao *taop;
2728
        int origoffer = offer;
2729
#ifdef INET6
2730
        int isipv6;
2731
        int min_protoh;
2732
#endif
2733
 
2734
        inp = tp->t_inpcb;
2735
#ifdef INET6
2736
        isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
2737
        min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
2738
                            : sizeof (struct tcpiphdr);
2739
#else
2740
#define min_protoh  (sizeof (struct tcpiphdr))
2741
#endif
2742
#ifdef INET6
2743
        if (isipv6)
2744
                rt = tcp_rtlookup6(inp);
2745
        else
2746
#endif
2747
        rt = tcp_rtlookup(inp);
2748
        if (rt == NULL) {
2749
                tp->t_maxopd = tp->t_maxseg =
2750
#ifdef INET6
2751
                isipv6 ? tcp_v6mssdflt :
2752
#endif /* INET6 */
2753
                tcp_mssdflt;
2754
                return;
2755
        }
2756
        ifp = rt->rt_ifp;
2757
        so = inp->inp_socket;
2758
 
2759
        taop = rmx_taop(rt->rt_rmx);
2760
        /*
2761
         * Offer == -1 means that we didn't receive SYN yet,
2762
         * use cached value in that case;
2763
         */
2764
        if (offer == -1)
2765
                offer = taop->tao_mssopt;
2766
        /*
2767
         * Offer == 0 means that there was no MSS on the SYN segment,
2768
         * in this case we use tcp_mssdflt.
2769
         */
2770
        if (offer == 0)
2771
                offer =
2772
#ifdef INET6
2773
                        isipv6 ? tcp_v6mssdflt :
2774
#endif /* INET6 */
2775
                        tcp_mssdflt;
2776
        else
2777
                /*
2778
                 * Sanity check: make sure that maxopd will be large
2779
                 * enough to allow some data on segments even is the
2780
                 * all the option space is used (40bytes).  Otherwise
2781
                 * funny things may happen in tcp_output.
2782
                 */
2783
                offer = max(offer, 64);
2784
        taop->tao_mssopt = offer;
2785
 
2786
        /*
2787
         * While we're here, check if there's an initial rtt
2788
         * or rttvar.  Convert from the route-table units
2789
         * to scaled multiples of the slow timeout timer.
2790
         */
2791
        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
2792
                /*
2793
                 * XXX the lock bit for RTT indicates that the value
2794
                 * is also a minimum value; this is subject to time.
2795
                 */
2796
                if (rt->rt_rmx.rmx_locks & RTV_RTT)
2797
                        tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
2798
                tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
2799
                tcpstat.tcps_usedrtt++;
2800
                if (rt->rt_rmx.rmx_rttvar) {
2801
                        tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
2802
                            (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
2803
                        tcpstat.tcps_usedrttvar++;
2804
                } else {
2805
                        /* default variation is +- 1 rtt */
2806
                        tp->t_rttvar =
2807
                            tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
2808
                }
2809
                TCPT_RANGESET(tp->t_rxtcur,
2810
                              ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
2811
                              tp->t_rttmin, TCPTV_REXMTMAX);
2812
        }
2813
        /*
2814
         * if there's an mtu associated with the route, use it
2815
         * else, use the link mtu.
2816
         */
2817
        if (rt->rt_rmx.rmx_mtu)
2818
                mss = rt->rt_rmx.rmx_mtu - min_protoh;
2819
        else
2820
        {
2821
                mss =
2822
#ifdef INET6
2823
                        (isipv6 ? nd_ifinfo[rt->rt_ifp->if_index].linkmtu :
2824
#endif
2825
                         ifp->if_mtu
2826
#ifdef INET6
2827
                         )
2828
#endif
2829
                        - min_protoh;
2830
#ifdef INET6
2831
                if (isipv6) {
2832
                        if (!in6_localaddr(&inp->in6p_faddr))
2833
                                mss = min(mss, tcp_v6mssdflt);
2834
                } else
2835
#endif
2836
                if (!in_localaddr(inp->inp_faddr))
2837
                        mss = min(mss, tcp_mssdflt);
2838
        }
2839
        mss = min(mss, offer);
2840
        /*
2841
         * maxopd stores the maximum length of data AND options
2842
         * in a segment; maxseg is the amount of data in a normal
2843
         * segment.  We need to store this value (maxopd) apart
2844
         * from maxseg, because now every segment carries options
2845
         * and thus we normally have somewhat less data in segments.
2846
         */
2847
        tp->t_maxopd = mss;
2848
 
2849
        /*
2850
         * In case of T/TCP, origoffer==-1 indicates, that no segments
2851
         * were received yet.  In this case we just guess, otherwise
2852
         * we do the same as before T/TCP.
2853
         */
2854
        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
2855
            (origoffer == -1 ||
2856
             (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
2857
                mss -= TCPOLEN_TSTAMP_APPA;
2858
        if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
2859
            (origoffer == -1 ||
2860
             (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
2861
                mss -= TCPOLEN_CC_APPA;
2862
 
2863
#if     (MCLBYTES & (MCLBYTES - 1)) == 0
2864
                if (mss > MCLBYTES)
2865
                        mss &= ~(MCLBYTES-1);
2866
#else
2867
                if (mss > MCLBYTES)
2868
                        mss = mss / MCLBYTES * MCLBYTES;
2869
#endif
2870
        /*
2871
         * If there's a pipesize, change the socket buffer
2872
         * to that size.  Make the socket buffers an integral
2873
         * number of mss units; if the mss is larger than
2874
         * the socket buffer, decrease the mss.
2875
         */
2876
#ifdef RTV_SPIPE
2877
        if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
2878
#endif
2879
                bufsize = so->so_snd.sb_hiwat;
2880
        if (bufsize < mss)
2881
                mss = bufsize;
2882
        else {
2883
                bufsize = roundup(bufsize, mss);
2884
                if (bufsize > sb_max)
2885
                        bufsize = sb_max;
2886
                (void)sbreserve(&so->so_snd, bufsize, so, NULL);
2887
        }
2888
        tp->t_maxseg = mss;
2889
 
2890
#ifdef RTV_RPIPE
2891
        if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
2892
#endif
2893
                bufsize = so->so_rcv.sb_hiwat;
2894
        if (bufsize > mss) {
2895
                bufsize = roundup(bufsize, mss);
2896
                if (bufsize > sb_max)
2897
                        bufsize = sb_max;
2898
                (void)sbreserve(&so->so_rcv, bufsize, so, NULL);
2899
        }
2900
 
2901
        /*
2902
         * Set the slow-start flight size depending on whether this
2903
         * is a local network or not.
2904
         */
2905
        if (
2906
#ifdef INET6
2907
            (isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
2908
            (!isipv6 &&
2909
#endif
2910
             in_localaddr(inp->inp_faddr)
2911
#ifdef INET6
2912
             )
2913
#endif
2914
            )
2915
                tp->snd_cwnd = mss * ss_fltsz_local;
2916
        else
2917
                tp->snd_cwnd = mss * ss_fltsz;
2918
 
2919
        if (rt->rt_rmx.rmx_ssthresh) {
2920
                /*
2921
                 * There's some sort of gateway or interface
2922
                 * buffer limit on the path.  Use this to set
2923
                 * the slow start threshhold, but set the
2924
                 * threshold to no less than 2*mss.
2925
                 */
2926
                tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
2927
                tcpstat.tcps_usedssthresh++;
2928
        }
2929
}
2930
 
2931
/*
2932
 * Determine the MSS option to send on an outgoing SYN.
2933
 */
2934
int
2935
tcp_mssopt(tp)
2936
        struct tcpcb *tp;
2937
{
2938
        struct rtentry *rt;
2939
#ifdef INET6
2940
        int isipv6;
2941
        int min_protoh;
2942
#endif
2943
 
2944
#ifdef INET6
2945
        isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
2946
        min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
2947
                            : sizeof (struct tcpiphdr);
2948
#else
2949
#define min_protoh  (sizeof (struct tcpiphdr))
2950
#endif
2951
#ifdef INET6
2952
        if (isipv6)
2953
                rt = tcp_rtlookup6(tp->t_inpcb);
2954
        else
2955
#endif /* INET6 */
2956
        rt = tcp_rtlookup(tp->t_inpcb);
2957
        if (rt == NULL)
2958
                return
2959
#ifdef INET6
2960
                        isipv6 ? tcp_v6mssdflt :
2961
#endif /* INET6 */
2962
                        tcp_mssdflt;
2963
 
2964
        return rt->rt_ifp->if_mtu - min_protoh;
2965
}
2966
 
2967
 
2968
/*
2969
 * Checks for partial ack.  If partial ack arrives, force the retransmission
2970
 * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
2971
 * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to
2972
 * be started again.  If the ack advances at least to tp->snd_recover, return 0.
2973
 */
2974
static int
2975
tcp_newreno(tp, th)
2976
        struct tcpcb *tp;
2977
        struct tcphdr *th;
2978
{
2979
        if (SEQ_LT(th->th_ack, tp->snd_recover)) {
2980
                tcp_seq onxt = tp->snd_nxt;
2981
                u_long  ocwnd = tp->snd_cwnd;
2982
 
2983
                callout_stop(tp->tt_rexmt);
2984
                tp->t_rtttime = 0;
2985
                tp->snd_nxt = th->th_ack;
2986
                /*
2987
                 * Set snd_cwnd to one segment beyond acknowledged offset
2988
                 * (tp->snd_una has not yet been updated when this function
2989
                 *  is called)
2990
                 */
2991
                tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
2992
                (void) tcp_output(tp);
2993
                tp->snd_cwnd = ocwnd;
2994
                if (SEQ_GT(onxt, tp->snd_nxt))
2995
                        tp->snd_nxt = onxt;
2996
                /*
2997
                 * Partial window deflation.  Relies on fact that tp->snd_una
2998
                 * not updated yet.
2999
                 */
3000
                tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
3001
                return (1);
3002
        }
3003
        return (0);
3004
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.