OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [ecos-2.0/] [packages/] [net/] [tcpip/] [v2_0/] [src/] [sys/] [netinet/] [tcp_input.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1254 phoenix
//==========================================================================
2
//
3
//      sys/netinet/tcp_input.c
4
//
5
//     
6
//
7
//==========================================================================
8
//####BSDCOPYRIGHTBEGIN####
9
//
10
// -------------------------------------------
11
//
12
// Portions of this software may have been derived from OpenBSD or other sources,
13
// and are covered by the appropriate copyright disclaimers included herein.
14
//
15
// -------------------------------------------
16
//
17
//####BSDCOPYRIGHTEND####
18
//==========================================================================
19
//#####DESCRIPTIONBEGIN####
20
//
21
// Author(s):    gthomas
22
// Contributors: gthomas
23
// Date:         2000-01-10
24
// Purpose:      
25
// Description:  
26
//              
27
//
28
//####DESCRIPTIONEND####
29
//
30
//==========================================================================
31
 
32
 
33
/*      $OpenBSD: tcp_input.c,v 1.54 1999/12/15 16:37:20 provos Exp $   */
34
/*      $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $  */
35
 
36
/*
37
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
38
 *      The Regents of the University of California.  All rights reserved.
39
 *
40
 * Redistribution and use in source and binary forms, with or without
41
 * modification, are permitted provided that the following conditions
42
 * are met:
43
 * 1. Redistributions of source code must retain the above copyright
44
 *    notice, this list of conditions and the following disclaimer.
45
 * 2. Redistributions in binary form must reproduce the above copyright
46
 *    notice, this list of conditions and the following disclaimer in the
47
 *    documentation and/or other materials provided with the distribution.
48
 * 3. All advertising materials mentioning features or use of this software
49
 *    must display the following acknowledgement:
50
 *      This product includes software developed by the University of
51
 *      California, Berkeley and its contributors.
52
 * 4. Neither the name of the University nor the names of its contributors
53
 *    may be used to endorse or promote products derived from this software
54
 *    without specific prior written permission.
55
 *
56
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66
 * SUCH DAMAGE.
67
 *
68
 *      @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
69
 */
70
 
71
/*
72
%%% portions-copyright-nrl-95
73
Portions of this software are Copyright 1995-1998 by Randall Atkinson,
74
Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights
75
Reserved. All rights under this copyright have been assigned to the US
76
Naval Research Laboratory (NRL). The NRL Copyright Notice and License
77
Agreement Version 1.1 (January 17, 1995) applies to these portions of the
78
software.
79
You should have received a copy of the license with this software. If you
80
didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>.
81
*/
82
 
83
#ifndef TUBA_INCLUDE
84
#include <sys/param.h>
85
#ifndef __ECOS
86
#include <sys/systm.h>
87
#endif
88
#include <sys/malloc.h>
89
#include <sys/mbuf.h>
90
#include <sys/protosw.h>
91
#include <sys/socket.h>
92
#include <sys/socketvar.h>
93
#include <sys/errno.h>
94
 
95
#include <net/if.h>
96
#include <net/route.h>
97
 
98
#include <netinet/in.h>
99
#include <netinet/in_systm.h>
100
#include <netinet/ip.h>
101
#include <netinet/in_pcb.h>
102
#include <netinet/ip_var.h>
103
#include <netinet/tcp.h>
104
#include <netinet/tcp_fsm.h>
105
#include <netinet/tcp_seq.h>
106
#include <netinet/tcp_timer.h>
107
#include <netinet/tcp_var.h>
108
#include <netinet/tcpip.h>
109
#include <netinet/tcp_debug.h>
110
#ifndef __ECOS
111
#include <dev/rndvar.h>
112
#endif
113
#include <machine/stdarg.h>
114
#ifndef __ECOS
115
#include <sys/md5k.h>
116
#endif
117
 
118
#ifdef IPSEC
119
#include <netinet/ip_ipsp.h>
120
#endif /* IPSEC */
121
 
122
#ifdef INET6
123
#ifndef INET
124
#include <netinet/in.h>
125
#endif
126
#include <sys/domain.h>
127
#include <netinet6/in6_var.h>
128
#include <netinet/ip6.h>
129
#include <netinet6/ip6_var.h>
130
#include <netinet6/tcpipv6.h>
131
#include <netinet/icmp6.h>
132
#include <netinet6/nd6.h>
133
 
134
#ifndef CREATE_IPV6_MAPPED
135
#define CREATE_IPV6_MAPPED(a6, a4) \
136
do { \
137
        bzero(&(a6), sizeof(a6));                       \
138
        (a6).s6_addr[10] = (a6).s6_addr[11] = 0xff;     \
139
        *(u_int32_t *)&(a6).s6_addr[12] = (a4);         \
140
} while (0)
141
#endif
142
 
143
struct  tcpiphdr tcp_saveti;
144
struct  tcpipv6hdr tcp_saveti6;
145
 
146
/* for the packet header length in the mbuf */
147
#define M_PH_LEN(m)      (((struct mbuf *)(m))->m_pkthdr.len)
148
#define M_V6_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip6_hdr))
149
#define M_V4_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip))
150
#endif /* INET6 */
151
 
152
int     tcprexmtthresh = 3;
153
struct  tcpiphdr tcp_saveti;
154
int     tcptv_keep_init = TCPTV_KEEP_INIT;
155
 
156
extern u_long sb_max;
157
 
158
#endif /* TUBA_INCLUDE */
159
#define TCP_PAWS_IDLE   (24 * 24 * 60 * 60 * PR_SLOWHZ)
160
 
161
/* for modulo comparisons of timestamps */
162
#define TSTMP_LT(a,b)   ((int)((a)-(b)) < 0)
163
#define TSTMP_GEQ(a,b)  ((int)((a)-(b)) >= 0)
164
 
165
/*
166
 * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint.
167
 */
168
#ifdef INET6
169
#define ND6_HINT(tp) \
170
do { \
171
        if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) \
172
         && !(tp->t_inpcb->inp_flags & INP_IPV6_MAPPED) \
173
         && tp->t_inpcb->inp_route6.ro_rt) { \
174
                nd6_nud_hint(tp->t_inpcb->inp_route6.ro_rt, NULL); \
175
        } \
176
} while (0)
177
#else
178
#define ND6_HINT(tp)
179
#endif
180
 
181
/*
182
 * Insert segment ti into reassembly queue of tcp with
183
 * control block tp.  Return TH_FIN if reassembly now includes
184
 * a segment with FIN.  The macro form does the common case inline
185
 * (segment is the next to be received on an established connection,
186
 * and the queue is empty), avoiding linkage into and removal
187
 * from the queue and repetition of various conversions.
188
 * Set DELACK for segments received in order, but ack immediately
189
 * when segments are out of order (so fast retransmit can work).
190
 */
191
 
192
#ifndef TUBA_INCLUDE
193
 
194
int
195
tcp_reass(tp, th, m, tlen)
196
        register struct tcpcb *tp;
197
        register struct tcphdr *th;
198
        struct mbuf *m;
199
        int *tlen;
200
{
201
        register struct ipqent *p, *q, *nq, *tiqe;
202
        struct socket *so = tp->t_inpcb->inp_socket;
203
        int flags;
204
 
205
        /*
206
         * Call with th==0 after become established to
207
         * force pre-ESTABLISHED data up to user socket.
208
         */
209
        if (th == 0)
210
                goto present;
211
 
212
        /*
213
         * Allocate a new queue entry, before we throw away any data.
214
         * If we can't, just drop the packet.  XXX
215
         */
216
        MALLOC(tiqe, struct ipqent *, sizeof (struct ipqent), M_IPQ, M_NOWAIT);
217
        if (tiqe == NULL) {
218
                tcpstat.tcps_rcvmemdrop++;
219
                m_freem(m);
220
                return (0);
221
        }
222
 
223
        /*
224
         * Find a segment which begins after this one does.
225
         */
226
        for (p = NULL, q = tp->segq.lh_first; q != NULL;
227
            p = q, q = q->ipqe_q.le_next)
228
                if (SEQ_GT(q->ipqe_tcp->th_seq, th->th_seq))
229
                        break;
230
 
231
        /*
232
         * If there is a preceding segment, it may provide some of
233
         * our data already.  If so, drop the data from the incoming
234
         * segment.  If it provides all of our data, drop us.
235
         */
236
        if (p != NULL) {
237
                register struct tcphdr *phdr = p->ipqe_tcp;
238
                register int i;
239
 
240
                /* conversion to int (in i) handles seq wraparound */
241
                i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
242
                if (i > 0) {
243
                        if (i >= *tlen) {
244
                                tcpstat.tcps_rcvduppack++;
245
                                tcpstat.tcps_rcvdupbyte += *tlen;
246
                                m_freem(m);
247
                                FREE(tiqe, M_IPQ);
248
                                return (0);
249
                        }
250
                        m_adj(m, i);
251
                        *tlen -= i;
252
                        th->th_seq += i;
253
                }
254
        }
255
        tcpstat.tcps_rcvoopack++;
256
        tcpstat.tcps_rcvoobyte += *tlen;
257
 
258
        /*
259
         * While we overlap succeeding segments trim them or,
260
         * if they are completely covered, dequeue them.
261
         */
262
        for (; q != NULL; q = nq) {
263
                register struct tcphdr *qhdr = q->ipqe_tcp;
264
                register int i = (th->th_seq + *tlen) - qhdr->th_seq;
265
 
266
                if (i <= 0)
267
                        break;
268
                if (i < qhdr->th_reseqlen) {
269
                        qhdr->th_seq += i;
270
                        qhdr->th_reseqlen -= i;
271
                        m_adj(q->ipqe_m, i);
272
                        break;
273
                }
274
                nq = q->ipqe_q.le_next;
275
                m_freem(q->ipqe_m);
276
                LIST_REMOVE(q, ipqe_q);
277
                FREE(q, M_IPQ);
278
        }
279
 
280
        /* Insert the new fragment queue entry into place. */
281
        tiqe->ipqe_m = m;
282
        th->th_reseqlen = *tlen;
283
        tiqe->ipqe_tcp = th;
284
        if (p == NULL) {
285
                LIST_INSERT_HEAD(&tp->segq, tiqe, ipqe_q);
286
        } else {
287
                LIST_INSERT_AFTER(p, tiqe, ipqe_q);
288
        }
289
 
290
present:
291
        /*
292
         * Present data to user, advancing rcv_nxt through
293
         * completed sequence space.
294
         */
295
        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
296
                return (0);
297
        q = tp->segq.lh_first;
298
        if (q == NULL || q->ipqe_tcp->th_seq != tp->rcv_nxt)
299
                return (0);
300
        if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_tcp->th_reseqlen)
301
                return (0);
302
        do {
303
                tp->rcv_nxt += q->ipqe_tcp->th_reseqlen;
304
                flags = q->ipqe_tcp->th_flags & TH_FIN;
305
 
306
                nq = q->ipqe_q.le_next;
307
                LIST_REMOVE(q, ipqe_q);
308
                ND6_HINT(tp);
309
                if (so->so_state & SS_CANTRCVMORE)
310
                        m_freem(q->ipqe_m);
311
                else
312
                        sbappend(&so->so_rcv, q->ipqe_m);
313
                FREE(q, M_IPQ);
314
                q = nq;
315
        } while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt);
316
        sorwakeup(so);
317
        return (flags);
318
}
319
 
320
/*
321
 * First check for a port-specific bomb. We do not want to drop half-opens
322
 * for other ports if this is the only port being bombed.  We only check
323
 * the bottom 40 half open connections, to avoid wasting too much time.
324
 *
325
 * Or, otherwise it is more likely a generic syn bomb, so delete the oldest
326
 * half-open connection.
327
 */
328
void
329
tcpdropoldhalfopen(avoidtp, port)
330
        struct tcpcb *avoidtp;
331
        u_int16_t port;
332
{
333
        register struct inpcb *inp;
334
        register struct tcpcb *tp;
335
        int ncheck = 40;
336
        int s;
337
 
338
        s = splnet();
339
        inp = tcbtable.inpt_queue.cqh_first;
340
        if (inp)                                                /* XXX */
341
        for (; inp != (struct inpcb *)&tcbtable.inpt_queue && --ncheck;
342
            inp = inp->inp_queue.cqe_prev) {
343
                if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
344
                    tp != avoidtp &&
345
                    tp->t_state == TCPS_SYN_RECEIVED &&
346
                    port == inp->inp_lport) {
347
                        tcp_close(tp);
348
                        goto done;
349
                }
350
        }
351
 
352
        inp = tcbtable.inpt_queue.cqh_first;
353
        if (inp)                                                /* XXX */
354
        for (; inp != (struct inpcb *)&tcbtable.inpt_queue;
355
            inp = inp->inp_queue.cqe_prev) {
356
                if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
357
                    tp != avoidtp &&
358
                    tp->t_state == TCPS_SYN_RECEIVED) {
359
                        tcp_close(tp);
360
                        goto done;
361
                }
362
        }
363
done:
364
        splx(s);
365
}
366
 
367
#if defined(INET6) && !defined(TCP6)
368
int
369
tcp6_input(mp, offp, proto)
370
        struct mbuf **mp;
371
        int *offp, proto;
372
{
373
        struct mbuf *m = *mp;
374
 
375
#if defined(NFAITH) && 0 < NFAITH
376
        if (m->m_pkthdr.rcvif) {
377
                if (m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
378
                        /* XXX send icmp6 host/port unreach? */
379
                        m_freem(m);
380
                        return IPPROTO_DONE;
381
                }
382
        }
383
#endif
384
 
385
        /*
386
         * draft-itojun-ipv6-tcp-to-anycast
387
         * better place to put this in?
388
         */
389
        if (m->m_flags & M_ANYCAST6) {
390
                if (m->m_len >= sizeof(struct ip6_hdr)) {
391
                        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
392
                        icmp6_error(m, ICMP6_DST_UNREACH,
393
                                ICMP6_DST_UNREACH_ADDR,
394
                                (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
395
                } else
396
                        m_freem(m);
397
                return IPPROTO_DONE;
398
        }
399
 
400
        tcp_input(m, *offp, proto);
401
        return IPPROTO_DONE;
402
}
403
#endif
404
 
405
/*
406
 * TCP input routine, follows pages 65-76 of the
407
 * protocol specification dated September, 1981 very closely.
408
 */
409
void
410
#if __STDC__
411
tcp_input(struct mbuf *m, ...)
412
#else
413
tcp_input(m, va_alist)
414
        register struct mbuf *m;
415
#endif
416
{
417
        register struct tcpiphdr *ti;
418
        register struct inpcb *inp;
419
        caddr_t optp = NULL;
420
        int optlen = 0;
421
        int len, tlen, off;
422
        register struct tcpcb *tp = 0;
423
        register int tiflags;
424
        struct socket *so = NULL;
425
        int todrop, acked, ourfinisacked, needoutput = 0;
426
        int hdroptlen = 0;
427
        short ostate = 0;
428
        struct in_addr laddr;
429
        int dropsocket = 0;
430
        int iss = 0;
431
        u_long tiwin;
432
        u_int32_t ts_val, ts_ecr;
433
        int ts_present = 0;
434
        int iphlen;
435
        va_list ap;
436
        register struct tcphdr *th;
437
#ifdef IPSEC
438
        struct tdb *tdb = NULL;
439
#endif /* IPSEC */
440
#ifdef INET6
441
        struct in6_addr laddr6;
442
        unsigned short is_ipv6;     /* Type of incoming datagram. */
443
        struct ip6_hdr *ipv6 = NULL;
444
#endif /* INET6 */
445
 
446
        va_start(ap, m);
447
        iphlen = va_arg(ap, int);
448
        va_end(ap);
449
 
450
        tcpstat.tcps_rcvtotal++;
451
 
452
#ifdef IPSEC
453
        /* Save the last SA which was used to process the mbuf */
454
        if ((m->m_flags & (M_CONF|M_AUTH)) && m->m_pkthdr.tdbi) {
455
                struct tdb_ident *tdbi = m->m_pkthdr.tdbi;
456
                /* XXX gettdb() should really be called at spltdb().      */
457
                /* XXX this is splsoftnet(), currently they are the same. */
458
                tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
459
                free(m->m_pkthdr.tdbi, M_TEMP);
460
                m->m_pkthdr.tdbi = NULL;
461
        }
462
#endif /* IPSEC */
463
#ifdef INET6
464
        /*
465
         * Before we do ANYTHING, we have to figure out if it's TCP/IPv6 or
466
         * TCP/IPv4.
467
         */
468
        is_ipv6 = mtod(m, struct ip *)->ip_v == 6;
469
#endif /* INET6 */
470
 
471
        /*
472
         * Get IP and TCP header together in first mbuf.
473
         * Note: IP leaves IP header in first mbuf.
474
         */
475
#ifndef INET6
476
        ti = mtod(m, struct tcpiphdr *);
477
#else /* INET6 */
478
        if (!is_ipv6)
479
#endif /* INET6 */
480
        if (iphlen > sizeof (struct ip)) {
481
#if 0   /*XXX*/
482
                ip_stripoptions(m, (struct mbuf *)0);
483
#else
484
#ifdef __ECOS
485
                diag_printf("extension headers are not allowed\n");
486
#else
487
                printf("extension headers are not allowed\n");
488
#endif
489
                m_freem(m);
490
                return;
491
#endif
492
        }
493
        if (m->m_len < iphlen + sizeof(struct tcphdr)) {
494
                if ((m = m_pullup2(m, iphlen + sizeof(struct tcphdr))) == 0) {
495
                        tcpstat.tcps_rcvshort++;
496
                        return;
497
                }
498
#ifndef INET6
499
                ti = mtod(m, struct tcpiphdr *);
500
#endif /* INET6 */
501
        }
502
 
503
        tlen = m->m_pkthdr.len - iphlen;
504
 
505
#ifdef INET6
506
        /*
507
         * After that, do initial segment processing which is still very
508
         * dependent on what IP version you're using.
509
         */
510
 
511
        if (is_ipv6) {
512
#ifdef DIAGNOSTIC
513
          if (iphlen < sizeof(struct ip6_hdr)) {
514
            m_freem(m);
515
            return;
516
          }
517
#endif /* DIAGNOSTIC */
518
 
519
          /* strip off any options */
520
          if (iphlen > sizeof(struct ip6_hdr)) {
521
#if 0 /*XXX*/
522
            ipv6_stripoptions(m, iphlen);
523
#else
524
#ifdef __ECOS
525
                diag_printf("extension headers are not allowed\n");
526
#else
527
                printf("extension headers are not allowed\n");
528
#endif
529
                m_freem(m);
530
                return;
531
#endif
532
            iphlen = sizeof(struct ip6_hdr);
533
          }
534
 
535
          ti = NULL;
536
          ipv6 = mtod(m, struct ip6_hdr *);
537
 
538
          if (in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen)) {
539
            tcpstat.tcps_rcvbadsum++;
540
            goto drop;
541
          } /* endif in6_cksum */
542
        } else {
543
          ti = mtod(m, struct tcpiphdr *);
544
#endif /* INET6 */
545
 
546
        /*
547
         * Checksum extended TCP header and data.
548
         */
549
#ifndef INET6
550
        tlen = ((struct ip *)ti)->ip_len;
551
#endif /* INET6 */
552
        len = sizeof (struct ip) + tlen;
553
        bzero(ti->ti_x1, sizeof ti->ti_x1);
554
        ti->ti_len = (u_int16_t)tlen;
555
        HTONS(ti->ti_len);
556
        if ((ti->ti_sum = in_cksum(m, len)) != 0) {
557
                tcpstat.tcps_rcvbadsum++;
558
                goto drop;
559
        }
560
#ifdef INET6
561
        }
562
#endif /* INET6 */
563
#endif /* TUBA_INCLUDE */
564
 
565
        th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen);
566
 
567
        /*
568
         * Check that TCP offset makes sense,
569
         * pull out TCP options and adjust length.              XXX
570
         */
571
        off = th->th_off << 2;
572
        if (off < sizeof (struct tcphdr) || off > tlen) {
573
                tcpstat.tcps_rcvbadoff++;
574
                goto drop;
575
        }
576
        tlen -= off;
577
        if (off > sizeof (struct tcphdr)) {
578
                if (m->m_len < iphlen + off) {
579
                        if ((m = m_pullup2(m, iphlen + off)) == 0) {
580
                                tcpstat.tcps_rcvshort++;
581
                                return;
582
                        }
583
#ifdef INET6
584
                        if (is_ipv6)
585
                          ipv6 = mtod(m, struct ip6_hdr *);
586
                        else
587
#endif /* INET6 */
588
                        ti = mtod(m, struct tcpiphdr *);
589
                        th = (struct tcphdr *)(mtod(m, caddr_t) + iphlen);
590
                }
591
                optlen = off - sizeof (struct tcphdr);
592
                optp = mtod(m, caddr_t) + iphlen + sizeof(struct tcphdr);
593
                /*
594
                 * Do quick retrieval of timestamp options ("options
595
                 * prediction?").  If timestamp is the only option and it's
596
                 * formatted as recommended in RFC 1323 appendix A, we
597
                 * quickly get the values now and not bother calling
598
                 * tcp_dooptions(), etc.
599
                 */
600
                if ((optlen == TCPOLEN_TSTAMP_APPA ||
601
                     (optlen > TCPOLEN_TSTAMP_APPA &&
602
                        optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
603
                     *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
604
                     (th->th_flags & TH_SYN) == 0) {
605
                        ts_present = 1;
606
                        ts_val = ntohl(*(u_int32_t *)(optp + 4));
607
                        ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
608
                        optp = NULL;    /* we've parsed the options */
609
                }
610
        }
611
        tiflags = th->th_flags;
612
 
613
        /*
614
         * Convert TCP protocol specific fields to host format.
615
         */
616
        NTOHL(th->th_seq);
617
        NTOHL(th->th_ack);
618
        NTOHS(th->th_win);
619
        NTOHS(th->th_urp);
620
 
621
        /*
622
         * Locate pcb for segment.
623
         */
624
findpcb:
625
#ifdef INET6
626
        if (is_ipv6) {
627
          inp = in6_pcbhashlookup(&tcbtable, &ipv6->ip6_src, th->th_sport,
628
                                 &ipv6->ip6_dst, th->th_dport);
629
        } else
630
#endif /* INET6 */
631
        inp = in_pcbhashlookup(&tcbtable, ti->ti_src, ti->ti_sport,
632
            ti->ti_dst, ti->ti_dport);
633
        if (inp == 0) {
634
                ++tcpstat.tcps_pcbhashmiss;
635
#ifdef INET6
636
                if (is_ipv6)
637
                        inp = in_pcblookup(&tcbtable, &ipv6->ip6_src,
638
                            th->th_sport, &ipv6->ip6_dst, th->th_dport,
639
                            INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
640
                else
641
#endif /* INET6 */
642
                inp = in_pcblookup(&tcbtable, &ti->ti_src, ti->ti_sport,
643
                    &ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
644
                /*
645
                 * If the state is CLOSED (i.e., TCB does not exist) then
646
                 * all data in the incoming segment is discarded.
647
                 * If the TCB exists but is in CLOSED state, it is embryonic,
648
                 * but should either do a listen or a connect soon.
649
                 */
650
                if (inp == 0) {
651
                        ++tcpstat.tcps_noport;
652
                        goto dropwithreset;
653
                }
654
        }
655
 
656
        tp = intotcpcb(inp);
657
        if (tp == 0)
658
                goto dropwithreset;
659
        if (tp->t_state == TCPS_CLOSED)
660
                goto drop;
661
 
662
        /* Unscale the window into a 32-bit value. */
663
        if ((tiflags & TH_SYN) == 0)
664
                tiwin = th->th_win << tp->snd_scale;
665
        else
666
                tiwin = th->th_win;
667
 
668
        so = inp->inp_socket;
669
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
670
                if (so->so_options & SO_DEBUG) {
671
                        ostate = tp->t_state;
672
#ifdef INET6
673
                        if (is_ipv6)
674
                          tcp_saveti6 = *(mtod(m, struct tcpipv6hdr *));
675
                        else
676
#endif /* INET6 */
677
                        tcp_saveti = *ti;
678
                }
679
                if (so->so_options & SO_ACCEPTCONN) {
680
                        struct socket *so1;
681
 
682
                        so1 = sonewconn(so, 0);
683
                        if (so1 == NULL) {
684
                                tcpdropoldhalfopen(tp, th->th_dport);
685
                                so1 = sonewconn(so, 0);
686
                                if (so1 == NULL)
687
                                        goto drop;
688
                        }
689
                        so = so1;
690
                        /*
691
                         * This is ugly, but ....
692
                         *
693
                         * Mark socket as temporary until we're
694
                         * committed to keeping it.  The code at
695
                         * ``drop'' and ``dropwithreset'' check the
696
                         * flag dropsocket to see if the temporary
697
                         * socket created here should be discarded.
698
                         * We mark the socket as discardable until
699
                         * we're committed to it below in TCPS_LISTEN.
700
                         */
701
                        dropsocket++;
702
#ifdef IPSEC
703
                        /*
704
                         * We need to copy the required security levels
705
                         * from the old pcb.
706
                         */
707
                        {
708
                          struct inpcb *newinp = (struct inpcb *)so->so_pcb;
709
                          bcopy(inp->inp_seclevel, newinp->inp_seclevel,
710
                                sizeof(inp->inp_seclevel));
711
                          newinp->inp_secrequire = inp->inp_secrequire;
712
                        }
713
#endif /* IPSEC */
714
#ifdef INET6
715
                        /*
716
                         * inp still has the OLD in_pcb stuff, set the
717
                         * v6-related flags on the new guy, too.   This is
718
                         * done particularly for the case where an AF_INET6
719
                         * socket is bound only to a port, and a v4 connection
720
                         * comes in on that port.
721
                         * we also copy the flowinfo from the original pcb
722
                         * to the new one.
723
                         */
724
                        {
725
                          int flags = inp->inp_flags;
726
                          struct inpcb *oldinpcb = inp;
727
 
728
                          inp = (struct inpcb *)so->so_pcb;
729
                          inp->inp_flags |= (flags & (INP_IPV6 | INP_IPV6_UNDEC
730
                                                      | INP_IPV6_MAPPED));
731
                          if ((inp->inp_flags & INP_IPV6) &&
732
                              !(inp->inp_flags & INP_IPV6_MAPPED)) {
733
                            inp->inp_ipv6.ip6_hlim =
734
                              oldinpcb->inp_ipv6.ip6_hlim;
735
                            inp->inp_ipv6.ip6_flow =
736
                              oldinpcb->inp_ipv6.ip6_flow;
737
                          }
738
                        }
739
#else /* INET6 */
740
                        inp = (struct inpcb *)so->so_pcb;
741
#endif /* INET6 */
742
                        inp->inp_lport = th->th_dport;
743
#ifdef INET6
744
                        if (is_ipv6) {
745
                          inp->inp_laddr6 = ipv6->ip6_dst;
746
                          inp->inp_fflowinfo = htonl(0x0fffffff) &
747
                            ipv6->ip6_flow;
748
 
749
                          /*inp->inp_options = ip6_srcroute();*/ /* soon. */
750
                          /* still need to tweak outbound options
751
                             processing to include this mbuf in
752
                             the right place and put the correct
753
                             NextHdr values in the right places.
754
                             XXX  rja */
755
                        } else {
756
                          if (inp->inp_flags & INP_IPV6) {/* v4 to v6 socket */
757
                            CREATE_IPV6_MAPPED(inp->inp_laddr6,
758
                              ti->ti_dst.s_addr);
759
                          } else {
760
#endif /* INET6 */
761
                            inp->inp_laddr = ti->ti_dst;
762
                            inp->inp_options = ip_srcroute();
763
#ifdef INET6
764
                          }
765
                        }
766
#endif /* INET6 */
767
                        in_pcbrehash(inp);
768
                        tp = intotcpcb(inp);
769
                        tp->t_state = TCPS_LISTEN;
770
 
771
                        /* Compute proper scaling value from buffer space
772
                         */
773
                        while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
774
                           TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
775
                                tp->request_r_scale++;
776
                }
777
        }
778
 
779
#ifdef IPSEC
780
        /* Check if this socket requires security for incoming packets */
781
        if ((inp->inp_seclevel[SL_AUTH] >= IPSEC_LEVEL_REQUIRE &&
782
             !(m->m_flags & M_AUTH)) ||
783
            (inp->inp_seclevel[SL_ESP_TRANS] >= IPSEC_LEVEL_REQUIRE &&
784
             !(m->m_flags & M_CONF))) {
785
#ifdef notyet
786
#ifdef INET6
787
                if (is_ipv6)
788
                        icmp6_error(m, ICMPV6_BLAH, ICMPV6_BLAH, 0);
789
                else
790
#endif /* INET6 */
791
                icmp_error(m, ICMP_BLAH, ICMP_BLAH, 0, 0);
792
#endif /* notyet */
793
                tcpstat.tcps_rcvnosec++;
794
                goto drop;
795
        }
796
        /* Use tdb_bind_out for this inp's outbound communication */
797
        if (tdb)
798
                tdb_add_inp(tdb, inp);
799
#endif /*IPSEC */
800
 
801
        /*
802
         * Segment received on connection.
803
         * Reset idle time and keep-alive timer.
804
         */
805
        tp->t_idle = 0;
806
        if (tp->t_state != TCPS_SYN_RECEIVED)
807
                tp->t_timer[TCPT_KEEP] = tcp_keepidle;
808
 
809
#ifdef TCP_SACK
810
        if (!tp->sack_disable)
811
                tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
812
#endif /* TCP_SACK */
813
 
814
        /*
815
         * Process options if not in LISTEN state,
816
         * else do it below (after getting remote address).
817
         */
818
        if (optp && tp->t_state != TCPS_LISTEN)
819
                tcp_dooptions(tp, optp, optlen, th,
820
                        &ts_present, &ts_val, &ts_ecr);
821
 
822
#ifdef TCP_SACK
823
        if (!tp->sack_disable) {
824
                tp->rcv_laststart = th->th_seq; /* last rec'vd segment*/
825
                tp->rcv_lastend = th->th_seq + tlen;
826
        }
827
#endif /* TCP_SACK */
828
        /*
829
         * Header prediction: check for the two common cases
830
         * of a uni-directional data xfer.  If the packet has
831
         * no control flags, is in-sequence, the window didn't
832
         * change and we're not retransmitting, it's a
833
         * candidate.  If the length is zero and the ack moved
834
         * forward, we're the sender side of the xfer.  Just
835
         * free the data acked & wake any higher level process
836
         * that was blocked waiting for space.  If the length
837
         * is non-zero and the ack didn't move, we're the
838
         * receiver side.  If we're getting packets in-order
839
         * (the reassembly queue is empty), add the data to
840
         * the socket buffer and note that we need a delayed ack.
841
         */
842
        if (tp->t_state == TCPS_ESTABLISHED &&
843
            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
844
            (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
845
            th->th_seq == tp->rcv_nxt &&
846
            tiwin && tiwin == tp->snd_wnd &&
847
            tp->snd_nxt == tp->snd_max) {
848
 
849
                /*
850
                 * If last ACK falls within this segment's sequence numbers,
851
                 *  record the timestamp.
852
                 * Fix from Braden, see Stevens p. 870
853
                 */
854
                if (ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
855
                        tp->ts_recent_age = tcp_now;
856
                        tp->ts_recent = ts_val;
857
                }
858
 
859
                if (tlen == 0) {
860
                        if (SEQ_GT(th->th_ack, tp->snd_una) &&
861
                            SEQ_LEQ(th->th_ack, tp->snd_max) &&
862
                            tp->snd_cwnd >= tp->snd_wnd &&
863
                            tp->t_dupacks == 0) {
864
                                /*
865
                                 * this is a pure ack for outstanding data.
866
                                 */
867
                                ++tcpstat.tcps_predack;
868
                                if (ts_present)
869
                                        tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
870
                                else if (tp->t_rtt &&
871
                                            SEQ_GT(th->th_ack, tp->t_rtseq))
872
                                        tcp_xmit_timer(tp, tp->t_rtt);
873
                                acked = th->th_ack - tp->snd_una;
874
                                tcpstat.tcps_rcvackpack++;
875
                                tcpstat.tcps_rcvackbyte += acked;
876
                                ND6_HINT(tp);
877
                                sbdrop(&so->so_snd, acked);
878
                                tp->snd_una = th->th_ack;
879
#if defined(TCP_SACK) || defined(TCP_NEWRENO)
880
                                /*
881
                                 * We want snd_last to track snd_una so
882
                                 * as to avoid sequence wraparound problems
883
                                 * for very large transfers.
884
                                 */
885
                                tp->snd_last = tp->snd_una;
886
#endif /* TCP_SACK or TCP_NEWRENO */
887
#if defined(TCP_SACK) && defined(TCP_FACK)
888
                                tp->snd_fack = tp->snd_una;
889
                                tp->retran_data = 0;
890
#endif /* TCP_FACK */
891
                                m_freem(m);
892
 
893
                                /*
894
                                 * If all outstanding data are acked, stop
895
                                 * retransmit timer, otherwise restart timer
896
                                 * using current (possibly backed-off) value.
897
                                 * If process is waiting for space,
898
                                 * wakeup/selwakeup/signal.  If data
899
                                 * are ready to send, let tcp_output
900
                                 * decide between more output or persist.
901
                                 */
902
                                if (tp->snd_una == tp->snd_max)
903
                                        tp->t_timer[TCPT_REXMT] = 0;
904
                                else if (tp->t_timer[TCPT_PERSIST] == 0)
905
                                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
906
 
907
                                if (sb_notify(&so->so_snd))
908
                                        sowwakeup(so);
909
                                if (so->so_snd.sb_cc)
910
                                        (void) tcp_output(tp);
911
                                return;
912
                        }
913
                } else if (th->th_ack == tp->snd_una &&
914
                    tp->segq.lh_first == NULL &&
915
                    tlen <= sbspace(&so->so_rcv)) {
916
                        /*
917
                         * This is a pure, in-sequence data packet
918
                         * with nothing on the reassembly queue and
919
                         * we have enough buffer space to take it.
920
                         */
921
#ifdef TCP_SACK
922
                        /* Clean receiver SACK report if present */
923
                        if (!tp->sack_disable && tp->rcv_numsacks)
924
                                tcp_clean_sackreport(tp);
925
#endif /* TCP_SACK */
926
                        ++tcpstat.tcps_preddat;
927
                        tp->rcv_nxt += tlen;
928
                        tcpstat.tcps_rcvpack++;
929
                        tcpstat.tcps_rcvbyte += tlen;
930
                        ND6_HINT(tp);
931
                        /*
932
                         * Drop TCP, IP headers and TCP options then add data
933
                         * to socket buffer.
934
                         */
935
                        m_adj(m, iphlen + off);
936
                        sbappend(&so->so_rcv, m);
937
                        sorwakeup(so);
938
                        if (th->th_flags & TH_PUSH)
939
                                tp->t_flags |= TF_ACKNOW;
940
                        else
941
                                tp->t_flags |= TF_DELACK;
942
                        return;
943
                }
944
        }
945
 
946
        /*
947
         * Compute mbuf offset to TCP data segment.
948
         */
949
        hdroptlen = iphlen + off;
950
 
951
        /*
952
         * Calculate amount of space in receive window,
953
         * and then do TCP input processing.
954
         * Receive window is amount of space in rcv queue,
955
         * but not less than advertised window.
956
         */
957
        { int win;
958
 
959
        win = sbspace(&so->so_rcv);
960
        if (win < 0)
961
                win = 0;
962
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
963
        }
964
 
965
        switch (tp->t_state) {
966
 
967
        /*
968
         * If the state is LISTEN then ignore segment if it contains an RST.
969
         * If the segment contains an ACK then it is bad and send a RST.
970
         * If it does not contain a SYN then it is not interesting; drop it.
971
         * If it is from this socket, drop it, it must be forged.
972
         * Don't bother responding if the destination was a broadcast.
973
         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
974
         * tp->iss, and send a segment:
975
         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
976
         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
977
         * Fill in remote peer address fields if not previously specified.
978
         * Enter SYN_RECEIVED state, and process any other fields of this
979
         * segment in this state.
980
         */
981
        case TCPS_LISTEN: {
982
                struct mbuf *am;
983
                register struct sockaddr_in *sin;
984
#ifdef INET6
985
                register struct sockaddr_in6 *sin6;
986
#endif /* INET6 */
987
 
988
                if (tiflags & TH_RST)
989
                        goto drop;
990
                if (tiflags & TH_ACK)
991
                        goto dropwithreset;
992
                if ((tiflags & TH_SYN) == 0)
993
                        goto drop;
994
                if (th->th_dport == th->th_sport) {
995
#ifdef INET6
996
                  if (is_ipv6) {
997
                    if (IN6_ARE_ADDR_EQUAL(&ipv6->ip6_src, &ipv6->ip6_dst))
998
                      goto drop;
999
                  } else {
1000
#endif /* INET6 */
1001
                    if (ti->ti_dst.s_addr == ti->ti_src.s_addr)
1002
                      goto drop;
1003
#ifdef INET6
1004
                  }
1005
#endif /* INET6 */
1006
                }
1007
 
1008
                /*
1009
                 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
1010
                 * in_broadcast() should never return true on a received
1011
                 * packet with M_BCAST not set.
1012
                 */
1013
                if (m->m_flags & (M_BCAST|M_MCAST))
1014
                  goto drop;
1015
#ifdef INET6
1016
                if (is_ipv6) {
1017
                        /* XXX What about IPv6 Anycasting ?? :-(  rja */
1018
                        if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst))
1019
                                goto drop;
1020
                } else
1021
#endif /* INET6 */
1022
                if (IN_MULTICAST(ti->ti_dst.s_addr))
1023
                        goto drop;
1024
                am = m_get(M_DONTWAIT, MT_SONAME);      /* XXX */
1025
                if (am == NULL)
1026
                        goto drop;
1027
#ifdef INET6
1028
                if (is_ipv6) {
1029
                  /*
1030
                   * This is probably the place to set the tp->pf value.
1031
                   * (Don't forget to do it in the v4 code as well!)
1032
                   *
1033
                   * Also, remember to blank out things like flowlabel, or
1034
                   * set flowlabel for accepted sockets in v6.
1035
                   *
1036
                   * FURTHERMORE, this is PROBABLY the place where the whole
1037
                   * business of key munging is set up for passive
1038
                   * connections.
1039
                   */
1040
                  am->m_len = sizeof(struct sockaddr_in6);
1041
                  sin6 = mtod(am, struct sockaddr_in6 *);
1042
                  sin6->sin6_family = AF_INET6;
1043
                  sin6->sin6_len = sizeof(struct sockaddr_in6);
1044
                  sin6->sin6_addr = ipv6->ip6_src;
1045
                  sin6->sin6_port = th->th_sport;
1046
                  sin6->sin6_flowinfo = htonl(0x0fffffff) &
1047
                    inp->inp_ipv6.ip6_flow;
1048
                  laddr6 = inp->inp_laddr6;
1049
                  if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6))
1050
                    inp->inp_laddr6 = ipv6->ip6_dst;
1051
                  /* This is a good optimization. */
1052
                  if (in6_pcbconnect(inp, am)) {
1053
                    inp->inp_laddr6 = laddr6;
1054
                    (void) m_free(am);
1055
                    goto drop;
1056
                  } /* endif in6_pcbconnect() */
1057
                  tp->pf = PF_INET6;
1058
                } else {
1059
                  /*
1060
                   * Letting v4 incoming datagrams to reach valid
1061
                   * PF_INET6 sockets causes some overhead here.
1062
                   */
1063
                  if (inp->inp_flags & INP_IPV6) {
1064
                    if (!(inp->inp_flags & (INP_IPV6_UNDEC|INP_IPV6_MAPPED))) {
1065
                      (void) m_free(am);
1066
                      goto drop;
1067
                    }
1068
 
1069
                    am->m_len = sizeof(struct sockaddr_in6);
1070
 
1071
                    sin6 = mtod(am, struct sockaddr_in6 *);
1072
                    sin6->sin6_family = AF_INET6;
1073
                    sin6->sin6_len = sizeof(*sin6);
1074
                    CREATE_IPV6_MAPPED(sin6->sin6_addr, ti->ti_src.s_addr);
1075
                    sin6->sin6_port = th->th_sport;
1076
                    sin6->sin6_flowinfo = 0;
1077
 
1078
                    laddr6 = inp->inp_laddr6;
1079
                    if (inp->inp_laddr.s_addr == INADDR_ANY)
1080
                      CREATE_IPV6_MAPPED(inp->inp_laddr6, ti->ti_dst.s_addr);
1081
 
1082
                    /*
1083
                     * The pcb initially has the v6 default hoplimit
1084
                     * set. We're sending v4 packets so we need to set
1085
                     * the v4 ttl and tos.
1086
                     */
1087
                    inp->inp_ip.ip_ttl = ip_defttl;
1088
                    inp->inp_ip.ip_tos = 0;
1089
 
1090
                    if (in6_pcbconnect(inp, am)) {
1091
                      inp->inp_laddr6 = laddr6;
1092
                      (void) m_freem(am);
1093
                      goto drop;
1094
                    }
1095
                    tp->pf = PF_INET;
1096
                  } else {
1097
#endif /* INET6 */
1098
                am->m_len = sizeof (struct sockaddr_in);
1099
                sin = mtod(am, struct sockaddr_in *);
1100
                sin->sin_family = AF_INET;
1101
                sin->sin_len = sizeof(*sin);
1102
                sin->sin_addr = ti->ti_src;
1103
                sin->sin_port = ti->ti_sport;
1104
                bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
1105
                laddr = inp->inp_laddr;
1106
                if (inp->inp_laddr.s_addr == INADDR_ANY)
1107
                        inp->inp_laddr = ti->ti_dst;
1108
                if (in_pcbconnect(inp, am)) {
1109
                        inp->inp_laddr = laddr;
1110
                        (void) m_free(am);
1111
                        goto drop;
1112
                }
1113
                (void) m_free(am);
1114
                tp->pf = PF_INET;
1115
#ifdef INET6
1116
                  }  /* if (inp->inp_flags & INP_IPV6) */
1117
                } /* if (is_ipv6) */
1118
#endif /* INET6 */
1119
                tp->t_template = tcp_template(tp);
1120
                if (tp->t_template == 0) {
1121
                        tp = tcp_drop(tp, ENOBUFS);
1122
                        dropsocket = 0;          /* socket is already gone */
1123
                        goto drop;
1124
                }
1125
                if (optp)
1126
                        tcp_dooptions(tp, optp, optlen, th,
1127
                                &ts_present, &ts_val, &ts_ecr);
1128
#ifdef TCP_SACK
1129
                /*
1130
                 * If peer did not send a SACK_PERMITTED option (i.e., if
1131
                 * tcp_dooptions() did not set TF_SACK_PERMIT), set
1132
                 * sack_disable to 1 if it is currently 0.
1133
                 */
1134
                if (!tp->sack_disable)
1135
                        if ((tp->t_flags & TF_SACK_PERMIT) == 0)
1136
                                tp->sack_disable = 1;
1137
#endif
1138
 
1139
                if (iss)
1140
                        tp->iss = iss;
1141
                else
1142
                        tp->iss = tcp_iss;
1143
#ifdef TCP_COMPAT_42
1144
                tcp_iss += TCP_ISSINCR/2;
1145
#else /* TCP_COMPAT_42 */
1146
                tcp_iss += arc4random() % TCP_ISSINCR + 1;
1147
#endif /* !TCP_COMPAT_42 */
1148
                tp->irs = th->th_seq;
1149
                tcp_sendseqinit(tp);
1150
#if defined (TCP_SACK) || defined (TCP_NEWRENO)
1151
                tp->snd_last = tp->snd_una;
1152
#endif /* TCP_SACK || TCP_NEWRENO */
1153
#if defined(TCP_SACK) && defined(TCP_FACK)
1154
                tp->snd_fack = tp->snd_una;
1155
                tp->retran_data = 0;
1156
                tp->snd_awnd = 0;
1157
#endif /* TCP_FACK */
1158
                tcp_rcvseqinit(tp);
1159
                tp->t_flags |= TF_ACKNOW;
1160
                tp->t_state = TCPS_SYN_RECEIVED;
1161
                tp->t_timer[TCPT_KEEP] = tcptv_keep_init;
1162
                dropsocket = 0;          /* committed to socket */
1163
                tcpstat.tcps_accepts++;
1164
                goto trimthenstep6;
1165
                }
1166
 
1167
        /*
1168
         * If the state is SYN_RECEIVED:
1169
         *      if seg contains SYN/ACK, send an RST.
1170
         *      if seg contains an ACK, but not for our SYN/ACK, send an RST
1171
         */
1172
 
1173
        case TCPS_SYN_RECEIVED:
1174
                if (tiflags & TH_ACK) {
1175
                        if (tiflags & TH_SYN) {
1176
                                tcpstat.tcps_badsyn++;
1177
                                goto dropwithreset;
1178
                        }
1179
                        if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
1180
                            SEQ_GT(th->th_ack, tp->snd_max))
1181
                                goto dropwithreset;
1182
                }
1183
                break;
1184
 
1185
        /*
1186
         * If the state is SYN_SENT:
1187
         *      if seg contains an ACK, but not for our SYN, drop the input.
1188
         *      if seg contains a RST, then drop the connection.
1189
         *      if seg does not contain SYN, then drop it.
1190
         * Otherwise this is an acceptable SYN segment
1191
         *      initialize tp->rcv_nxt and tp->irs
1192
         *      if seg contains ack then advance tp->snd_una
1193
         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
1194
         *      arrange for segment to be acked (eventually)
1195
         *      continue processing rest of data/controls, beginning with URG
1196
         */
1197
        case TCPS_SYN_SENT:
1198
                if ((tiflags & TH_ACK) &&
1199
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
1200
                     SEQ_GT(th->th_ack, tp->snd_max)))
1201
                        goto dropwithreset;
1202
                if (tiflags & TH_RST) {
1203
                        if (tiflags & TH_ACK)
1204
                                tp = tcp_drop(tp, ECONNREFUSED);
1205
                        goto drop;
1206
                }
1207
                if ((tiflags & TH_SYN) == 0)
1208
                        goto drop;
1209
                if (tiflags & TH_ACK) {
1210
                        tp->snd_una = th->th_ack;
1211
                        if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1212
                                tp->snd_nxt = tp->snd_una;
1213
                }
1214
                tp->t_timer[TCPT_REXMT] = 0;
1215
                tp->irs = th->th_seq;
1216
                tcp_rcvseqinit(tp);
1217
                tp->t_flags |= TF_ACKNOW;
1218
#ifdef TCP_SACK
1219
                /*
1220
                 * If we've sent a SACK_PERMITTED option, and the peer
1221
                 * also replied with one, then TF_SACK_PERMIT should have
1222
                 * been set in tcp_dooptions().  If it was not, disable SACKs.
1223
                 */
1224
                if (!tp->sack_disable)
1225
                        if ((tp->t_flags & TF_SACK_PERMIT) == 0)
1226
                                tp->sack_disable = 1;
1227
#endif
1228
                if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
1229
                        tcpstat.tcps_connects++;
1230
                        soisconnected(so);
1231
                        tp->t_state = TCPS_ESTABLISHED;
1232
                        /* Do window scaling on this connection? */
1233
                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1234
                                (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1235
                                tp->snd_scale = tp->requested_s_scale;
1236
                                tp->rcv_scale = tp->request_r_scale;
1237
                        }
1238
                        (void) tcp_reass(tp, (struct tcphdr *)0,
1239
                                (struct mbuf *)0, &tlen);
1240
                        /*
1241
                         * if we didn't have to retransmit the SYN,
1242
                         * use its rtt as our initial srtt & rtt var.
1243
                         */
1244
                        if (tp->t_rtt)
1245
                                tcp_xmit_timer(tp, tp->t_rtt);
1246
                        /*
1247
                         * Since new data was acked (the SYN), open the
1248
                         * congestion window by one MSS.  We do this
1249
                         * here, because we won't go through the normal
1250
                         * ACK processing below.  And since this is the
1251
                         * start of the connection, we know we are in
1252
                         * the exponential phase of slow-start.
1253
                         */
1254
                        tp->snd_cwnd += tp->t_maxseg;
1255
                } else
1256
                        tp->t_state = TCPS_SYN_RECEIVED;
1257
 
1258
trimthenstep6:
1259
                /*
1260
                 * Advance ti->ti_seq to correspond to first data byte.
1261
                 * If data, trim to stay within window,
1262
                 * dropping FIN if necessary.
1263
                 */
1264
                th->th_seq++;
1265
                if (tlen > tp->rcv_wnd) {
1266
                        todrop = tlen - tp->rcv_wnd;
1267
                        m_adj(m, -todrop);
1268
                        tlen = tp->rcv_wnd;
1269
                        tiflags &= ~TH_FIN;
1270
                        tcpstat.tcps_rcvpackafterwin++;
1271
                        tcpstat.tcps_rcvbyteafterwin += todrop;
1272
                }
1273
                tp->snd_wl1 = th->th_seq - 1;
1274
                tp->rcv_up = th->th_seq;
1275
                goto step6;
1276
        }
1277
 
1278
        /*
1279
         * States other than LISTEN or SYN_SENT.
1280
         * First check timestamp, if present.
1281
         * Then check that at least some bytes of segment are within
1282
         * receive window.  If segment begins before rcv_nxt,
1283
         * drop leading data (and SYN); if nothing left, just ack.
1284
         *
1285
         * RFC 1323 PAWS: If we have a timestamp reply on this segment
1286
         * and it's less than ts_recent, drop it.
1287
         */
1288
        if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
1289
            TSTMP_LT(ts_val, tp->ts_recent)) {
1290
 
1291
                /* Check to see if ts_recent is over 24 days old.  */
1292
                if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
1293
                        /*
1294
                         * Invalidate ts_recent.  If this segment updates
1295
                         * ts_recent, the age will be reset later and ts_recent
1296
                         * will get a valid value.  If it does not, setting
1297
                         * ts_recent to zero will at least satisfy the
1298
                         * requirement that zero be placed in the timestamp
1299
                         * echo reply when ts_recent isn't valid.  The
1300
                         * age isn't reset until we get a valid ts_recent
1301
                         * because we don't want out-of-order segments to be
1302
                         * dropped when ts_recent is old.
1303
                         */
1304
                        tp->ts_recent = 0;
1305
                } else {
1306
                        tcpstat.tcps_rcvduppack++;
1307
                        tcpstat.tcps_rcvdupbyte += tlen;
1308
                        tcpstat.tcps_pawsdrop++;
1309
                        goto dropafterack;
1310
                }
1311
        }
1312
 
1313
        todrop = tp->rcv_nxt - th->th_seq;
1314
        if (todrop > 0) {
1315
                if (tiflags & TH_SYN) {
1316
                        tiflags &= ~TH_SYN;
1317
                        th->th_seq++;
1318
                        if (th->th_urp > 1)
1319
                                th->th_urp--;
1320
                        else
1321
                                tiflags &= ~TH_URG;
1322
                        todrop--;
1323
                }
1324
                if (todrop >= tlen ||
1325
                    (todrop == tlen && (tiflags & TH_FIN) == 0)) {
1326
                        /*
1327
                         * Any valid FIN must be to the left of the
1328
                         * window.  At this point, FIN must be a
1329
                         * duplicate or out-of-sequence, so drop it.
1330
                         */
1331
                        tiflags &= ~TH_FIN;
1332
                        /*
1333
                         * Send ACK to resynchronize, and drop any data,
1334
                         * but keep on processing for RST or ACK.
1335
                         */
1336
                        tp->t_flags |= TF_ACKNOW;
1337
                        tcpstat.tcps_rcvdupbyte += todrop = tlen;
1338
                        tcpstat.tcps_rcvduppack++;
1339
                } else {
1340
                        tcpstat.tcps_rcvpartduppack++;
1341
                        tcpstat.tcps_rcvpartdupbyte += todrop;
1342
                }
1343
                hdroptlen += todrop;    /* drop from head afterwards */
1344
                th->th_seq += todrop;
1345
                tlen -= todrop;
1346
                if (th->th_urp > todrop)
1347
                        th->th_urp -= todrop;
1348
                else {
1349
                        tiflags &= ~TH_URG;
1350
                        th->th_urp = 0;
1351
                }
1352
        }
1353
 
1354
        /*
1355
         * If new data are received on a connection after the
1356
         * user processes are gone, then RST the other end.
1357
         */
1358
        if ((so->so_state & SS_NOFDREF) &&
1359
            tp->t_state > TCPS_CLOSE_WAIT && tlen) {
1360
                tp = tcp_close(tp);
1361
                tcpstat.tcps_rcvafterclose++;
1362
                goto dropwithreset;
1363
        }
1364
 
1365
        /*
1366
         * If segment ends after window, drop trailing data
1367
         * (and PUSH and FIN); if nothing left, just ACK.
1368
         */
1369
        todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
1370
        if (todrop > 0) {
1371
                tcpstat.tcps_rcvpackafterwin++;
1372
                if (todrop >= tlen) {
1373
                        tcpstat.tcps_rcvbyteafterwin += tlen;
1374
                        /*
1375
                         * If a new connection request is received
1376
                         * while in TIME_WAIT, drop the old connection
1377
                         * and start over if the sequence numbers
1378
                         * are above the previous ones.
1379
                         */
1380
                        if (tiflags & TH_SYN &&
1381
                            tp->t_state == TCPS_TIME_WAIT &&
1382
                            SEQ_GT(th->th_seq, tp->rcv_nxt)) {
1383
                                iss = tp->snd_nxt + TCP_ISSINCR;
1384
                                tp = tcp_close(tp);
1385
                                goto findpcb;
1386
                        }
1387
                        /*
1388
                         * If window is closed can only take segments at
1389
                         * window edge, and have to drop data and PUSH from
1390
                         * incoming segments.  Continue processing, but
1391
                         * remember to ack.  Otherwise, drop segment
1392
                         * and ack.
1393
                         */
1394
                        if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
1395
                                tp->t_flags |= TF_ACKNOW;
1396
                                tcpstat.tcps_rcvwinprobe++;
1397
                        } else
1398
                                goto dropafterack;
1399
                } else
1400
                        tcpstat.tcps_rcvbyteafterwin += todrop;
1401
                m_adj(m, -todrop);
1402
                tlen -= todrop;
1403
                tiflags &= ~(TH_PUSH|TH_FIN);
1404
        }
1405
 
1406
        /*
1407
         * If last ACK falls within this segment's sequence numbers,
1408
         * record its timestamp.
1409
         * Fix from Braden, see Stevens p. 870
1410
         */
1411
        if (ts_present && TSTMP_GEQ(ts_val, tp->ts_recent) &&
1412
            SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
1413
                tp->ts_recent_age = tcp_now;
1414
                tp->ts_recent = ts_val;
1415
        }
1416
 
1417
        /*
1418
         * If the RST bit is set examine the state:
1419
         *    SYN_RECEIVED STATE:
1420
         *      If passive open, return to LISTEN state.
1421
         *      If active open, inform user that connection was refused.
1422
         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
1423
         *      Inform user that connection was reset, and close tcb.
1424
         *    CLOSING, LAST_ACK, TIME_WAIT STATES
1425
         *      Close the tcb.
1426
         */
1427
        if (tiflags & TH_RST) {
1428
#ifndef INET6
1429
                if (ti->ti_seq != tp->last_ack_sent)
1430
#else
1431
                if (th->th_seq != tp->last_ack_sent)
1432
#endif
1433
                        goto drop;
1434
 
1435
                switch (tp->t_state) {
1436
                case TCPS_SYN_RECEIVED:
1437
                        so->so_error = ECONNREFUSED;
1438
                        goto close;
1439
 
1440
                case TCPS_ESTABLISHED:
1441
                case TCPS_FIN_WAIT_1:
1442
                case TCPS_FIN_WAIT_2:
1443
                case TCPS_CLOSE_WAIT:
1444
                        so->so_error = ECONNRESET;
1445
                close:
1446
                        tp->t_state = TCPS_CLOSED;
1447
                        tcpstat.tcps_drops++;
1448
                        tp = tcp_close(tp);
1449
                        goto drop;
1450
                case TCPS_CLOSING:
1451
                case TCPS_LAST_ACK:
1452
                case TCPS_TIME_WAIT:
1453
                        tp = tcp_close(tp);
1454
                        goto drop;
1455
                }
1456
        }
1457
 
1458
        /*
1459
         * If a SYN is in the window, then this is an
1460
         * error and we send an RST and drop the connection.
1461
         */
1462
        if (tiflags & TH_SYN) {
1463
                tp = tcp_drop(tp, ECONNRESET);
1464
                goto dropwithreset;
1465
        }
1466
 
1467
        /*
1468
         * If the ACK bit is off we drop the segment and return.
1469
         */
1470
        if ((tiflags & TH_ACK) == 0) {
1471
                if (tp->t_flags & TF_ACKNOW)
1472
                        goto dropafterack;
1473
                else
1474
                        goto drop;
1475
        }
1476
 
1477
        /*
1478
         * Ack processing.
1479
         */
1480
        switch (tp->t_state) {
1481
 
1482
        /*
1483
         * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
1484
         * ESTABLISHED state and continue processing.
1485
         * The ACK was checked above.
1486
         */
1487
        case TCPS_SYN_RECEIVED:
1488
                tcpstat.tcps_connects++;
1489
                soisconnected(so);
1490
                tp->t_state = TCPS_ESTABLISHED;
1491
                /* Do window scaling? */
1492
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1493
                        (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1494
                        tp->snd_scale = tp->requested_s_scale;
1495
                        tp->rcv_scale = tp->request_r_scale;
1496
                }
1497
                (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0,
1498
                                 &tlen);
1499
                tp->snd_wl1 = th->th_seq - 1;
1500
                /* fall into ... */
1501
 
1502
        /*
1503
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1504
         * ACKs.  If the ack is in the range
1505
         *      tp->snd_una < ti->ti_ack <= tp->snd_max
1506
         * then advance tp->snd_una to ti->ti_ack and drop
1507
         * data from the retransmission queue.  If this ACK reflects
1508
         * more up to date window information we update our window information.
1509
         */
1510
        case TCPS_ESTABLISHED:
1511
        case TCPS_FIN_WAIT_1:
1512
        case TCPS_FIN_WAIT_2:
1513
        case TCPS_CLOSE_WAIT:
1514
        case TCPS_CLOSING:
1515
        case TCPS_LAST_ACK:
1516
        case TCPS_TIME_WAIT:
1517
                if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
1518
                        /*
1519
                         * Duplicate/old ACK processing.
1520
                         * Increments t_dupacks:
1521
                         *      Pure duplicate (same seq/ack/window, no data)
1522
                         * Doesn't affect t_dupacks:
1523
                         *      Data packets.
1524
                         *      Normal window updates (window opens)
1525
                         * Resets t_dupacks:
1526
                         *      New data ACKed.
1527
                         *      Window shrinks
1528
                         *      Old ACK
1529
                         */
1530
                        if (tlen)
1531
                                break;
1532
                        /*
1533
                         * If we get an old ACK, there is probably packet
1534
                         * reordering going on.  Be conservative and reset
1535
                         * t_dupacks so that we are less agressive in
1536
                         * doing a fast retransmit.
1537
                         */
1538
                        if (th->th_ack != tp->snd_una) {
1539
                                tp->t_dupacks = 0;
1540
                                break;
1541
                        }
1542
                        if (tiwin == tp->snd_wnd) {
1543
                                tcpstat.tcps_rcvdupack++;
1544
                                /*
1545
                                 * If we have outstanding data (other than
1546
                                 * a window probe), this is a completely
1547
                                 * duplicate ack (ie, window info didn't
1548
                                 * change), the ack is the biggest we've
1549
                                 * seen and we've seen exactly our rexmt
1550
                                 * threshhold of them, assume a packet
1551
                                 * has been dropped and retransmit it.
1552
                                 * Kludge snd_nxt & the congestion
1553
                                 * window so we send only this one
1554
                                 * packet.
1555
                                 *
1556
                                 * We know we're losing at the current
1557
                                 * window size so do congestion avoidance
1558
                                 * (set ssthresh to half the current window
1559
                                 * and pull our congestion window back to
1560
                                 * the new ssthresh).
1561
                                 *
1562
                                 * Dup acks mean that packets have left the
1563
                                 * network (they're now cached at the receiver)
1564
                                 * so bump cwnd by the amount in the receiver
1565
                                 * to keep a constant cwnd packets in the
1566
                                 * network.
1567
                                 */
1568
                                if (tp->t_timer[TCPT_REXMT] == 0)
1569
                                        tp->t_dupacks = 0;
1570
#if defined(TCP_SACK) && defined(TCP_FACK)
1571
                                /*
1572
                                 * In FACK, can enter fast rec. if the receiver
1573
                                 * reports a reass. queue longer than 3 segs.
1574
                                 */
1575
                                else if (++tp->t_dupacks == tcprexmtthresh ||
1576
                                    ((SEQ_GT(tp->snd_fack, tcprexmtthresh *
1577
                                    tp->t_maxseg + tp->snd_una)) &&
1578
                                    SEQ_GT(tp->snd_una, tp->snd_last))) {
1579
#else
1580
                                else if (++tp->t_dupacks == tcprexmtthresh) {
1581
#endif /* TCP_FACK */
1582
                                        tcp_seq onxt = tp->snd_nxt;
1583
                                        u_long win =
1584
                                            ulmin(tp->snd_wnd, tp->snd_cwnd) /
1585
                                                2 / tp->t_maxseg;
1586
 
1587
#if defined(TCP_SACK) || defined(TCP_NEWRENO) 
1588
                                        if (SEQ_LT(th->th_ack, tp->snd_last)){
1589
                                                /*
1590
                                                 * False fast retx after
1591
                                                 * timeout.  Do not cut window.
1592
                                                 */
1593
                                                tp->snd_cwnd += tp->t_maxseg;
1594
                                                tp->t_dupacks = 0;
1595
                                                (void) tcp_output(tp);
1596
                                                goto drop;
1597
                                        }
1598
#endif
1599
                                        if (win < 2)
1600
                                                win = 2;
1601
                                        tp->snd_ssthresh = win * tp->t_maxseg;
1602
#if defined(TCP_SACK) || defined(TCP_NEWRENO)
1603
                                        tp->snd_last = tp->snd_max;
1604
#endif
1605
#ifdef TCP_SACK
1606
                                        if (!tp->sack_disable) {
1607
                                                tp->t_timer[TCPT_REXMT] = 0;
1608
                                                tp->t_rtt = 0;
1609
                                                tcpstat.tcps_sndrexmitfast++;
1610
#if defined(TCP_SACK) && defined(TCP_FACK) 
1611
                                                (void) tcp_output(tp);
1612
                                                /*
1613
                                                 * During FR, snd_cwnd is held
1614
                                                 * constant for FACK.
1615
                                                 */
1616
                                                tp->snd_cwnd = tp->snd_ssthresh;
1617
                                                tp->t_dupacks = tcprexmtthresh;
1618
#else
1619
                                                /*
1620
                                                 * tcp_output() will send
1621
                                                 * oldest SACK-eligible rtx.
1622
                                                 */
1623
                                                (void) tcp_output(tp);
1624
                                                tp->snd_cwnd = tp->snd_ssthresh+
1625
                                                   tp->t_maxseg * tp->t_dupacks;
1626
#endif /* TCP_FACK */
1627
                                                goto drop;
1628
                                        }
1629
#endif /* TCP_SACK */
1630
                                        tp->t_timer[TCPT_REXMT] = 0;
1631
                                        tp->t_rtt = 0;
1632
                                        tp->snd_nxt = th->th_ack;
1633
                                        tp->snd_cwnd = tp->t_maxseg;
1634
                                        tcpstat.tcps_sndrexmitfast++;
1635
                                        (void) tcp_output(tp);
1636
 
1637
                                        tp->snd_cwnd = tp->snd_ssthresh +
1638
                                            tp->t_maxseg * tp->t_dupacks;
1639
                                        if (SEQ_GT(onxt, tp->snd_nxt))
1640
                                                tp->snd_nxt = onxt;
1641
                                        goto drop;
1642
                                } else if (tp->t_dupacks > tcprexmtthresh) {
1643
#if defined(TCP_SACK) && defined(TCP_FACK)
1644
                                        /*
1645
                                         * while (awnd < cwnd)
1646
                                         *         sendsomething();
1647
                                         */
1648
                                        if (!tp->sack_disable) {
1649
                                                if (tp->snd_awnd < tp->snd_cwnd)
1650
                                                        tcp_output(tp);
1651
                                                goto drop;
1652
                                        }
1653
#endif /* TCP_FACK */
1654
                                        tp->snd_cwnd += tp->t_maxseg;
1655
                                        (void) tcp_output(tp);
1656
                                        goto drop;
1657
                                }
1658
                        } else if (tiwin < tp->snd_wnd) {
1659
                                /*
1660
                                 * The window was retracted!  Previous dup
1661
                                 * ACKs may have been due to packets arriving
1662
                                 * after the shrunken window, not a missing
1663
                                 * packet, so play it safe and reset t_dupacks
1664
                                 */
1665
                                tp->t_dupacks = 0;
1666
                        }
1667
                        break;
1668
                }
1669
                /*
1670
                 * If the congestion window was inflated to account
1671
                 * for the other side's cached packets, retract it.
1672
                 */
1673
#ifdef TCP_NEWRENO
1674
                if (tp->t_dupacks >= tcprexmtthresh && !tcp_newreno(tp, th)) {
1675
                        /* Out of fast recovery */
1676
                        tp->snd_cwnd = tp->snd_ssthresh;
1677
                        /*
1678
                         * Window inflation should have left us with approx.
1679
                         * snd_ssthresh outstanding data.  But in case we
1680
                         * would be inclined to send a burst, better to do
1681
                         * it via the slow start mechanism.
1682
                         */
1683
                        if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
1684
                            tp->snd_ssthresh)
1685
                                tp->snd_cwnd = tcp_seq_subtract(tp->snd_max,
1686
                                    th->th_ack) + tp->t_maxseg;
1687
                        tp->t_dupacks = 0;
1688
                }
1689
#elif defined(TCP_SACK)
1690
                if (!tp->sack_disable) {
1691
                        if (tp->t_dupacks >= tcprexmtthresh) {
1692
                                /* Check for a partial ACK */
1693
                                if (tcp_sack_partialack(tp, th)) {
1694
#if defined(TCP_SACK) && defined(TCP_FACK)
1695
                                        /* Force call to tcp_output */
1696
                                        if (tp->snd_awnd < tp->snd_cwnd)
1697
                                                needoutput = 1;
1698
#else
1699
                                        tp->snd_cwnd += tp->t_maxseg;
1700
                                        needoutput = 1;
1701
#endif /* TCP_FACK */
1702
                                } else {
1703
                                        /* Out of fast recovery */
1704
                                        tp->snd_cwnd = tp->snd_ssthresh;
1705
                                        if (tcp_seq_subtract(tp->snd_max,
1706
                                            th->th_ack) < tp->snd_ssthresh)
1707
                                                tp->snd_cwnd =
1708
                                                   tcp_seq_subtract(tp->snd_max,
1709
                                                   th->th_ack) + tp->t_maxseg;
1710
                                        tp->t_dupacks = 0;
1711
#if defined(TCP_SACK) && defined(TCP_FACK)
1712
                                        if (SEQ_GT(th->th_ack, tp->snd_fack))
1713
                                                tp->snd_fack = th->th_ack;
1714
#endif /* TCP_FACK */
1715
                                }
1716
                        }
1717
                } else {
1718
                        if (tp->t_dupacks >= tcprexmtthresh &&
1719
                            !tcp_newreno(tp, th)) {
1720
                                /* Out of fast recovery */
1721
                                tp->snd_cwnd = tp->snd_ssthresh;
1722
                                if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
1723
                                    tp->snd_ssthresh)
1724
                                        tp->snd_cwnd =
1725
                                            tcp_seq_subtract(tp->snd_max,
1726
                                            th->th_ack) + tp->t_maxseg;
1727
                                tp->t_dupacks = 0;
1728
                        }
1729
                }
1730
#else /* else neither TCP_NEWRENO nor TCP_SACK */
1731
                if (tp->t_dupacks >= tcprexmtthresh &&
1732
                    tp->snd_cwnd > tp->snd_ssthresh)
1733
                        tp->snd_cwnd = tp->snd_ssthresh;
1734
                tp->t_dupacks = 0;
1735
#endif
1736
                if (SEQ_GT(th->th_ack, tp->snd_max)) {
1737
                        tcpstat.tcps_rcvacktoomuch++;
1738
                        goto dropafterack;
1739
                }
1740
                acked = th->th_ack - tp->snd_una;
1741
                tcpstat.tcps_rcvackpack++;
1742
                tcpstat.tcps_rcvackbyte += acked;
1743
 
1744
                /*
1745
                 * If we have a timestamp reply, update smoothed
1746
                 * round trip time.  If no timestamp is present but
1747
                 * transmit timer is running and timed sequence
1748
                 * number was acked, update smoothed round trip time.
1749
                 * Since we now have an rtt measurement, cancel the
1750
                 * timer backoff (cf., Phil Karn's retransmit alg.).
1751
                 * Recompute the initial retransmit timer.
1752
                 */
1753
                if (ts_present)
1754
                        tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1755
                else if (tp->t_rtt && SEQ_GT(th->th_ack, tp->t_rtseq))
1756
                        tcp_xmit_timer(tp,tp->t_rtt);
1757
 
1758
                /*
1759
                 * If all outstanding data is acked, stop retransmit
1760
                 * timer and remember to restart (more output or persist).
1761
                 * If there is more data to be acked, restart retransmit
1762
                 * timer, using current (possibly backed-off) value.
1763
                 */
1764
                if (th->th_ack == tp->snd_max) {
1765
                        tp->t_timer[TCPT_REXMT] = 0;
1766
                        needoutput = 1;
1767
                } else if (tp->t_timer[TCPT_PERSIST] == 0)
1768
                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1769
                /*
1770
                 * When new data is acked, open the congestion window.
1771
                 * If the window gives us less than ssthresh packets
1772
                 * in flight, open exponentially (maxseg per packet).
1773
                 * Otherwise open linearly: maxseg per window
1774
                 * (maxseg^2 / cwnd per packet).
1775
                 */
1776
                {
1777
                register u_int cw = tp->snd_cwnd;
1778
                register u_int incr = tp->t_maxseg;
1779
 
1780
                if (cw > tp->snd_ssthresh)
1781
                        incr = incr * incr / cw;
1782
#if defined (TCP_NEWRENO) || defined (TCP_SACK)
1783
                if (SEQ_GEQ(th->th_ack, tp->snd_last))
1784
#endif
1785
                tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1786
                }
1787
                ND6_HINT(tp);
1788
                if (acked > so->so_snd.sb_cc) {
1789
                        tp->snd_wnd -= so->so_snd.sb_cc;
1790
                        sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
1791
                        ourfinisacked = 1;
1792
                } else {
1793
                        sbdrop(&so->so_snd, acked);
1794
                        tp->snd_wnd -= acked;
1795
                        ourfinisacked = 0;
1796
                }
1797
                if (sb_notify(&so->so_snd))
1798
                        sowwakeup(so);
1799
                tp->snd_una = th->th_ack;
1800
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1801
                        tp->snd_nxt = tp->snd_una;
1802
#if defined (TCP_SACK) && defined (TCP_FACK)
1803
                if (SEQ_GT(tp->snd_una, tp->snd_fack))
1804
                        tp->snd_fack = tp->snd_una;
1805
#endif
1806
 
1807
                switch (tp->t_state) {
1808
 
1809
                /*
1810
                 * In FIN_WAIT_1 STATE in addition to the processing
1811
                 * for the ESTABLISHED state if our FIN is now acknowledged
1812
                 * then enter FIN_WAIT_2.
1813
                 */
1814
                case TCPS_FIN_WAIT_1:
1815
                        if (ourfinisacked) {
1816
                                /*
1817
                                 * If we can't receive any more
1818
                                 * data, then closing user can proceed.
1819
                                 * Starting the timer is contrary to the
1820
                                 * specification, but if we don't get a FIN
1821
                                 * we'll hang forever.
1822
                                 */
1823
                                if (so->so_state & SS_CANTRCVMORE) {
1824
                                        soisdisconnected(so);
1825
                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1826
                                }
1827
                                tp->t_state = TCPS_FIN_WAIT_2;
1828
                        }
1829
                        break;
1830
 
1831
                /*
1832
                 * In CLOSING STATE in addition to the processing for
1833
                 * the ESTABLISHED state if the ACK acknowledges our FIN
1834
                 * then enter the TIME-WAIT state, otherwise ignore
1835
                 * the segment.
1836
                 */
1837
                case TCPS_CLOSING:
1838
                        if (ourfinisacked) {
1839
                                tp->t_state = TCPS_TIME_WAIT;
1840
                                tcp_canceltimers(tp);
1841
                                tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1842
                                soisdisconnected(so);
1843
                        }
1844
                        break;
1845
 
1846
                /*
1847
                 * In LAST_ACK, we may still be waiting for data to drain
1848
                 * and/or to be acked, as well as for the ack of our FIN.
1849
                 * If our FIN is now acknowledged, delete the TCB,
1850
                 * enter the closed state and return.
1851
                 */
1852
                case TCPS_LAST_ACK:
1853
                        if (ourfinisacked) {
1854
                                tp = tcp_close(tp);
1855
                                goto drop;
1856
                        }
1857
                        break;
1858
 
1859
                /*
1860
                 * In TIME_WAIT state the only thing that should arrive
1861
                 * is a retransmission of the remote FIN.  Acknowledge
1862
                 * it and restart the finack timer.
1863
                 */
1864
                case TCPS_TIME_WAIT:
1865
                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1866
                        goto dropafterack;
1867
                }
1868
        }
1869
 
1870
step6:
1871
        /*
1872
         * Update window information.
1873
         * Don't look at window if no ACK: TAC's send garbage on first SYN.
1874
         */
1875
        if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) ||
1876
            (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack)) ||
1877
            (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))) {
1878
                /* keep track of pure window updates */
1879
                if (tlen == 0 &&
1880
                    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
1881
                        tcpstat.tcps_rcvwinupd++;
1882
                tp->snd_wnd = tiwin;
1883
                tp->snd_wl1 = th->th_seq;
1884
                tp->snd_wl2 = th->th_ack;
1885
                if (tp->snd_wnd > tp->max_sndwnd)
1886
                        tp->max_sndwnd = tp->snd_wnd;
1887
                needoutput = 1;
1888
        }
1889
 
1890
        /*
1891
         * Process segments with URG.
1892
         */
1893
        if ((tiflags & TH_URG) && th->th_urp &&
1894
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1895
                /*
1896
                 * This is a kludge, but if we receive and accept
1897
                 * random urgent pointers, we'll crash in
1898
                 * soreceive.  It's hard to imagine someone
1899
                 * actually wanting to send this much urgent data.
1900
                 */
1901
                if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
1902
                        th->th_urp = 0;                  /* XXX */
1903
                        tiflags &= ~TH_URG;             /* XXX */
1904
                        goto dodata;                    /* XXX */
1905
                }
1906
                /*
1907
                 * If this segment advances the known urgent pointer,
1908
                 * then mark the data stream.  This should not happen
1909
                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1910
                 * a FIN has been received from the remote side.
1911
                 * In these states we ignore the URG.
1912
                 *
1913
                 * According to RFC961 (Assigned Protocols),
1914
                 * the urgent pointer points to the last octet
1915
                 * of urgent data.  We continue, however,
1916
                 * to consider it to indicate the first octet
1917
                 * of data past the urgent section as the original
1918
                 * spec states (in one of two places).
1919
                 */
1920
                if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
1921
                        tp->rcv_up = th->th_seq + th->th_urp;
1922
                        so->so_oobmark = so->so_rcv.sb_cc +
1923
                            (tp->rcv_up - tp->rcv_nxt) - 1;
1924
                        if (so->so_oobmark == 0)
1925
                                so->so_state |= SS_RCVATMARK;
1926
                        sohasoutofband(so);
1927
                        tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1928
                }
1929
                /*
1930
                 * Remove out of band data so doesn't get presented to user.
1931
                 * This can happen independent of advancing the URG pointer,
1932
                 * but if two URG's are pending at once, some out-of-band
1933
                 * data may creep in... ick.
1934
                 */
1935
                if (th->th_urp <= (u_int16_t) tlen
1936
#ifdef SO_OOBINLINE
1937
                     && (so->so_options & SO_OOBINLINE) == 0
1938
#endif
1939
                     )
1940
                        tcp_pulloutofband(so, th->th_urp, m, hdroptlen);
1941
        } else
1942
                /*
1943
                 * If no out of band data is expected,
1944
                 * pull receive urgent pointer along
1945
                 * with the receive window.
1946
                 */
1947
                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1948
                        tp->rcv_up = tp->rcv_nxt;
1949
dodata:                                                 /* XXX */
1950
 
1951
        /*
1952
         * Process the segment text, merging it into the TCP sequencing queue,
1953
         * and arranging for acknowledgment of receipt if necessary.
1954
         * This process logically involves adjusting tp->rcv_wnd as data
1955
         * is presented to the user (this happens in tcp_usrreq.c,
1956
         * case PRU_RCVD).  If a FIN has already been received on this
1957
         * connection then we just ignore the text.
1958
         */
1959
        if ((tlen || (tiflags & TH_FIN)) &&
1960
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1961
                if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL &&
1962
                    tp->t_state == TCPS_ESTABLISHED) {
1963
                        if (th->th_flags & TH_PUSH)
1964
                                tp->t_flags |= TF_ACKNOW;
1965
                        else
1966
                                tp->t_flags |= TF_DELACK;
1967
                        tp->rcv_nxt += tlen;
1968
                        tiflags = th->th_flags & TH_FIN;
1969
                        tcpstat.tcps_rcvpack++;
1970
                        tcpstat.tcps_rcvbyte += tlen;
1971
                        ND6_HINT(tp);
1972
                        m_adj(m, hdroptlen);
1973
                        sbappend(&so->so_rcv, m);
1974
                        sorwakeup(so);
1975
                } else {
1976
                        m_adj(m, hdroptlen);
1977
                        tiflags = tcp_reass(tp, th, m, &tlen);
1978
                        tp->t_flags |= TF_ACKNOW;
1979
                }
1980
#ifdef TCP_SACK
1981
                if (!tp->sack_disable)
1982
                        tcp_update_sack_list(tp);
1983
#endif 
1984
 
1985
                /*
1986
                 * variable len never referenced again in modern BSD,
1987
                 * so why bother computing it ??
1988
                 */
1989
#if 0
1990
                /*
1991
                 * Note the amount of data that peer has sent into
1992
                 * our window, in order to estimate the sender's
1993
                 * buffer size.
1994
                 */
1995
                len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
1996
#endif /* 0 */
1997
        } else {
1998
                m_freem(m);
1999
                tiflags &= ~TH_FIN;
2000
        }
2001
 
2002
        /*
2003
         * If FIN is received ACK the FIN and let the user know
2004
         * that the connection is closing.  Ignore a FIN received before
2005
         * the connection is fully established.
2006
         */
2007
        if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
2008
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2009
                        socantrcvmore(so);
2010
                        tp->t_flags |= TF_ACKNOW;
2011
                        tp->rcv_nxt++;
2012
                }
2013
                switch (tp->t_state) {
2014
 
2015
                /*
2016
                 * In ESTABLISHED STATE enter the CLOSE_WAIT state.
2017
                 */
2018
                case TCPS_ESTABLISHED:
2019
                        tp->t_state = TCPS_CLOSE_WAIT;
2020
                        break;
2021
 
2022
                /*
2023
                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
2024
                 * enter the CLOSING state.
2025
                 */
2026
                case TCPS_FIN_WAIT_1:
2027
                        tp->t_state = TCPS_CLOSING;
2028
                        break;
2029
 
2030
                /*
2031
                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
2032
                 * starting the time-wait timer, turning off the other
2033
                 * standard timers.
2034
                 */
2035
                case TCPS_FIN_WAIT_2:
2036
                        tp->t_state = TCPS_TIME_WAIT;
2037
                        tcp_canceltimers(tp);
2038
                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2039
                        soisdisconnected(so);
2040
                        break;
2041
 
2042
                /*
2043
                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
2044
                 */
2045
                case TCPS_TIME_WAIT:
2046
                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2047
                        break;
2048
                }
2049
        }
2050
#ifdef TCPDEBUG
2051
        if (so->so_options & SO_DEBUG) {
2052
#ifdef INET6
2053
                if (tp->pf == PF_INET6)
2054
                        tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen);
2055
                else
2056
#endif /* INET6 */
2057
                        tcp_trace(TA_INPUT, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen);
2058
        }
2059
#endif /* TCPDEBUG */
2060
 
2061
        /*
2062
         * Return any desired output.
2063
         */
2064
        if (needoutput || (tp->t_flags & TF_ACKNOW)) {
2065
                (void) tcp_output(tp);
2066
        }
2067
        return;
2068
 
2069
dropafterack:
2070
        /*
2071
         * Generate an ACK dropping incoming segment if it occupies
2072
         * sequence space, where the ACK reflects our state.
2073
         */
2074
        if (tiflags & TH_RST)
2075
                goto drop;
2076
        m_freem(m);
2077
        tp->t_flags |= TF_ACKNOW;
2078
        (void) tcp_output(tp);
2079
        return;
2080
 
2081
dropwithreset:
2082
        /*
2083
         * Generate a RST, dropping incoming segment.
2084
         * Make ACK acceptable to originator of segment.
2085
         * Don't bother to respond if destination was broadcast/multicast.
2086
         */
2087
        if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
2088
          goto drop;
2089
#ifdef INET6
2090
        if (is_ipv6) {
2091
          /* For following calls to tcp_respond */
2092
          ti = mtod(m, struct tcpiphdr *);
2093
          if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst))
2094
            goto drop;
2095
        } else {
2096
#endif /* INET6 */
2097
            if (IN_MULTICAST(ti->ti_dst.s_addr))
2098
              goto drop;
2099
#ifdef INET6
2100
        }
2101
#endif /* INET6 */
2102
        if (tiflags & TH_ACK)
2103
                tcp_respond(tp, (caddr_t) ti, m, (tcp_seq)0, th->th_ack, TH_RST);
2104
        else {
2105
                if (tiflags & TH_SYN)
2106
                        tlen++;
2107
                tcp_respond(tp, (caddr_t) ti, m, th->th_seq+tlen, (tcp_seq)0,
2108
                    TH_RST|TH_ACK);
2109
        }
2110
        /* destroy temporarily created socket */
2111
        if (dropsocket)
2112
                (void) soabort(so);
2113
        return;
2114
 
2115
drop:
2116
        /*
2117
         * Drop space held by incoming segment and return.
2118
         */
2119
#ifdef TCPDEBUG
2120
        if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) {
2121
#ifdef INET6
2122
          if (tp->pf == PF_INET6)
2123
            tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti6, 0, tlen);
2124
          else
2125
#endif /* INET6 */
2126
            tcp_trace(TA_DROP, ostate, tp, (caddr_t) &tcp_saveti, 0, tlen);
2127
        }
2128
#endif /* TCPDEBUG */
2129
 
2130
        m_freem(m);
2131
        /* destroy temporarily created socket */
2132
        if (dropsocket)
2133
                (void) soabort(so);
2134
        return;
2135
#ifndef TUBA_INCLUDE
2136
}
2137
 
2138
void
2139
tcp_dooptions(tp, cp, cnt, th, ts_present, ts_val, ts_ecr)
2140
        struct tcpcb *tp;
2141
        u_char *cp;
2142
        int cnt;
2143
        struct tcphdr *th;
2144
        int *ts_present;
2145
        u_int32_t *ts_val, *ts_ecr;
2146
{
2147
        u_int16_t mss = 0;
2148
        int opt, optlen;
2149
 
2150
        for (; cnt > 0; cnt -= optlen, cp += optlen) {
2151
                opt = cp[0];
2152
                if (opt == TCPOPT_EOL)
2153
                        break;
2154
                if (opt == TCPOPT_NOP)
2155
                        optlen = 1;
2156
                else {
2157
                        optlen = cp[1];
2158
                        if (optlen <= 0)
2159
                                break;
2160
                }
2161
                switch (opt) {
2162
 
2163
                default:
2164
                        continue;
2165
 
2166
                case TCPOPT_MAXSEG:
2167
                        if (optlen != TCPOLEN_MAXSEG)
2168
                                continue;
2169
                        if (!(th->th_flags & TH_SYN))
2170
                                continue;
2171
                        bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
2172
                        NTOHS(mss);
2173
                        break;
2174
 
2175
                case TCPOPT_WINDOW:
2176
                        if (optlen != TCPOLEN_WINDOW)
2177
                                continue;
2178
                        if (!(th->th_flags & TH_SYN))
2179
                                continue;
2180
                        tp->t_flags |= TF_RCVD_SCALE;
2181
                        tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
2182
                        break;
2183
 
2184
                case TCPOPT_TIMESTAMP:
2185
                        if (optlen != TCPOLEN_TIMESTAMP)
2186
                                continue;
2187
                        *ts_present = 1;
2188
                        bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
2189
                        NTOHL(*ts_val);
2190
                        bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
2191
                        NTOHL(*ts_ecr);
2192
 
2193
                        /*
2194
                         * A timestamp received in a SYN makes
2195
                         * it ok to send timestamp requests and replies.
2196
                         */
2197
                        if (th->th_flags & TH_SYN) {
2198
                                tp->t_flags |= TF_RCVD_TSTMP;
2199
                                tp->ts_recent = *ts_val;
2200
                                tp->ts_recent_age = tcp_now;
2201
                        }
2202
                        break;
2203
 
2204
#ifdef TCP_SACK 
2205
                case TCPOPT_SACK_PERMITTED:
2206
                        if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED)
2207
                                continue;
2208
                        if (th->th_flags & TH_SYN)
2209
                                /* MUST only be set on SYN */
2210
                                tp->t_flags |= TF_SACK_PERMIT;
2211
                        break;
2212
                case TCPOPT_SACK:
2213
                        if (tcp_sack_option(tp, th, cp, optlen))
2214
                                continue;
2215
                        break;
2216
#endif          
2217
                }
2218
        }
2219
        /* Update t_maxopd and t_maxseg after all options are processed */
2220
        if (th->th_flags & TH_SYN)
2221
                (void) tcp_mss(tp, mss);        /* sets t_maxseg */
2222
}
2223
 
2224
#if defined(TCP_SACK) || defined(TCP_NEWRENO)
2225
u_long
2226
tcp_seq_subtract(a, b)
2227
        u_long a, b;
2228
{
2229
        return ((long)(a - b));
2230
}
2231
#endif
2232
 
2233
 
2234
#ifdef TCP_SACK 
2235
/*
2236
 * This function is called upon receipt of new valid data (while not in header
2237
 * prediction mode), and it updates the ordered list of sacks.
2238
 */
2239
void
2240
tcp_update_sack_list(tp)
2241
        struct tcpcb *tp;
2242
{
2243
        /*
2244
         * First reported block MUST be the most recent one.  Subsequent
2245
         * blocks SHOULD be in the order in which they arrived at the
2246
         * receiver.  These two conditions make the implementation fully
2247
         * compliant with RFC 2018.
2248
         */
2249
        int i, j = 0, count = 0, lastpos = -1;
2250
        struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
2251
 
2252
        /* First clean up current list of sacks */
2253
        for (i = 0; i < tp->rcv_numsacks; i++) {
2254
                sack = tp->sackblks[i];
2255
                if (sack.start == 0 && sack.end == 0) {
2256
                        count++; /* count = number of blocks to be discarded */
2257
                        continue;
2258
                }
2259
                if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
2260
                        tp->sackblks[i].start = tp->sackblks[i].end = 0;
2261
                        count++;
2262
                } else {
2263
                        temp[j].start = tp->sackblks[i].start;
2264
                        temp[j++].end = tp->sackblks[i].end;
2265
                }
2266
        }
2267
        tp->rcv_numsacks -= count;
2268
        if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
2269
                tcp_clean_sackreport(tp);
2270
                if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) {
2271
                        /* ==> need first sack block */
2272
                        tp->sackblks[0].start = tp->rcv_laststart;
2273
                        tp->sackblks[0].end = tp->rcv_lastend;
2274
                        tp->rcv_numsacks = 1;
2275
                }
2276
                return;
2277
        }
2278
        /* Otherwise, sack blocks are already present. */
2279
        for (i = 0; i < tp->rcv_numsacks; i++)
2280
                tp->sackblks[i] = temp[i]; /* first copy back sack list */
2281
        if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend))
2282
                return;     /* sack list remains unchanged */
2283
        /*
2284
         * From here, segment just received should be (part of) the 1st sack.
2285
         * Go through list, possibly coalescing sack block entries.
2286
         */
2287
        firstsack.start = tp->rcv_laststart;
2288
        firstsack.end = tp->rcv_lastend;
2289
        for (i = 0; i < tp->rcv_numsacks; i++) {
2290
                sack = tp->sackblks[i];
2291
                if (SEQ_LT(sack.end, firstsack.start) ||
2292
                    SEQ_GT(sack.start, firstsack.end))
2293
                        continue; /* no overlap */
2294
                if (sack.start == firstsack.start && sack.end == firstsack.end){
2295
                        /*
2296
                         * identical block; delete it here since we will
2297
                         * move it to the front of the list.
2298
                         */
2299
                        tp->sackblks[i].start = tp->sackblks[i].end = 0;
2300
                        lastpos = i;    /* last posn with a zero entry */
2301
                        continue;
2302
                }
2303
                if (SEQ_LEQ(sack.start, firstsack.start))
2304
                        firstsack.start = sack.start; /* merge blocks */
2305
                if (SEQ_GEQ(sack.end, firstsack.end))
2306
                        firstsack.end = sack.end;     /* merge blocks */
2307
                tp->sackblks[i].start = tp->sackblks[i].end = 0;
2308
                lastpos = i;    /* last posn with a zero entry */
2309
        }
2310
        if (lastpos != -1) {    /* at least one merge */
2311
                for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
2312
                        sack = tp->sackblks[i];
2313
                        if (sack.start == 0 && sack.end == 0)
2314
                                continue;
2315
                        temp[j++] = sack;
2316
                }
2317
                tp->rcv_numsacks = j; /* including first blk (added later) */
2318
                for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
2319
                        tp->sackblks[i] = temp[i];
2320
        } else {        /* no merges -- shift sacks by 1 */
2321
                if (tp->rcv_numsacks < MAX_SACK_BLKS)
2322
                        tp->rcv_numsacks++;
2323
                for (i = tp->rcv_numsacks-1; i > 0; i--)
2324
                        tp->sackblks[i] = tp->sackblks[i-1];
2325
        }
2326
        tp->sackblks[0] = firstsack;
2327
        return;
2328
}
2329
 
2330
/*
2331
 * Process the TCP SACK option.  Returns 1 if tcp_dooptions() should continue,
2332
 * and 0 otherwise, if the option was fine.  tp->snd_holes is an ordered list
2333
 * of holes (oldest to newest, in terms of the sequence space).
2334
 */
2335
int
2336
tcp_sack_option(tp, th, cp, optlen)
2337
        struct tcpcb *tp;
2338
        struct tcphdr *th;
2339
        u_char *cp;
2340
        int    optlen;
2341
{
2342
        int tmp_olen;
2343
        u_char *tmp_cp;
2344
        struct sackhole *cur, *p, *temp;
2345
 
2346
        if (tp->sack_disable)
2347
                return 1;
2348
 
2349
        /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
2350
        if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
2351
                return 1;
2352
        tmp_cp = cp + 2;
2353
        tmp_olen = optlen - 2;
2354
        if (tp->snd_numholes < 0)
2355
                tp->snd_numholes = 0;
2356
        if (tp->t_maxseg == 0)
2357
                panic("tcp_sack_option"); /* Should never happen */
2358
        while (tmp_olen > 0) {
2359
                struct sackblk sack;
2360
 
2361
                bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq));
2362
                NTOHL(sack.start);
2363
                bcopy((char *) tmp_cp + sizeof(tcp_seq),
2364
                    (char *) &(sack.end), sizeof(tcp_seq));
2365
                NTOHL(sack.end);
2366
                tmp_olen -= TCPOLEN_SACK;
2367
                tmp_cp += TCPOLEN_SACK;
2368
                if (SEQ_LEQ(sack.end, sack.start))
2369
                        continue; /* bad SACK fields */
2370
                if (SEQ_LEQ(sack.end, tp->snd_una))
2371
                        continue; /* old block */
2372
#if defined(TCP_SACK) && defined(TCP_FACK)
2373
                /* Updates snd_fack.  */
2374
                if (SEQ_GEQ(sack.end, tp->snd_fack))
2375
                        tp->snd_fack = sack.end;
2376
#endif /* TCP_FACK */
2377
                if (SEQ_GT(th->th_ack, tp->snd_una)) {
2378
                        if (SEQ_LT(sack.start, th->th_ack))
2379
                                continue;
2380
                } else {
2381
                        if (SEQ_LT(sack.start, tp->snd_una))
2382
                                continue;
2383
                }
2384
                if (SEQ_GT(sack.end, tp->snd_max))
2385
                        continue;
2386
                if (tp->snd_holes == 0) { /* first hole */
2387
                        tp->snd_holes = (struct sackhole *)
2388
                            malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT);
2389
                        if (tp->snd_holes == NULL) {
2390
                                /* ENOBUFS, so ignore SACKed block for now*/
2391
                                continue;
2392
                        }
2393
                        cur = tp->snd_holes;
2394
                        cur->start = th->th_ack;
2395
                        cur->end = sack.start;
2396
                        cur->rxmit = cur->start;
2397
                        cur->next = 0;
2398
                        tp->snd_numholes = 1;
2399
                        tp->rcv_lastsack = sack.end;
2400
                        /*
2401
                         * dups is at least one.  If more data has been
2402
                         * SACKed, it can be greater than one.
2403
                         */
2404
                        cur->dups = min(tcprexmtthresh,
2405
                            ((sack.end - cur->end)/tp->t_maxseg));
2406
                        if (cur->dups < 1)
2407
                                cur->dups = 1;
2408
                        continue; /* with next sack block */
2409
                }
2410
                /* Go thru list of holes:  p = previous,  cur = current */
2411
                p = cur = tp->snd_holes;
2412
                while (cur) {
2413
                        if (SEQ_LEQ(sack.end, cur->start))
2414
                                /* SACKs data before the current hole */
2415
                                break; /* no use going through more holes */
2416
                        if (SEQ_GEQ(sack.start, cur->end)) {
2417
                                /* SACKs data beyond the current hole */
2418
                                cur->dups++;
2419
                                if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2420
                                        tcprexmtthresh)
2421
                                        cur->dups = tcprexmtthresh;
2422
                                p = cur;
2423
                                cur = cur->next;
2424
                                continue;
2425
                        }
2426
                        if (SEQ_LEQ(sack.start, cur->start)) {
2427
                                /* Data acks at least the beginning of hole */
2428
#if defined(TCP_SACK) && defined(TCP_FACK)
2429
                                if (SEQ_GT(sack.end, cur->rxmit))
2430
                                        tp->retran_data -=
2431
                                            tcp_seq_subtract(cur->rxmit,
2432
                                            cur->start);
2433
                                else
2434
                                        tp->retran_data -=
2435
                                            tcp_seq_subtract(sack.end,
2436
                                            cur->start);
2437
#endif /* TCP_FACK */
2438
                                if (SEQ_GEQ(sack.end,cur->end)){
2439
                                        /* Acks entire hole, so delete hole */
2440
                                        if (p != cur) {
2441
                                                p->next = cur->next;
2442
                                                free(cur, M_PCB);
2443
                                                cur = p->next;
2444
                                        } else {
2445
                                                cur=cur->next;
2446
                                                free(p, M_PCB);
2447
                                                p = cur;
2448
                                                tp->snd_holes = p;
2449
                                        }
2450
                                        tp->snd_numholes--;
2451
                                        continue;
2452
                                }
2453
                                /* otherwise, move start of hole forward */
2454
                                cur->start = sack.end;
2455
                                cur->rxmit = max (cur->rxmit, cur->start);
2456
                                p = cur;
2457
                                cur = cur->next;
2458
                                continue;
2459
                        }
2460
                        /* move end of hole backward */
2461
                        if (SEQ_GEQ(sack.end, cur->end)) {
2462
#if defined(TCP_SACK) && defined(TCP_FACK)
2463
                                if (SEQ_GT(cur->rxmit, sack.start))
2464
                                        tp->retran_data -=
2465
                                            tcp_seq_subtract(cur->rxmit,
2466
                                            sack.start);
2467
#endif /* TCP_FACK */
2468
                                cur->end = sack.start;
2469
                                cur->rxmit = min (cur->rxmit, cur->end);
2470
                                cur->dups++;
2471
                                if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2472
                                        tcprexmtthresh)
2473
                                        cur->dups = tcprexmtthresh;
2474
                                p = cur;
2475
                                cur = cur->next;
2476
                                continue;
2477
                        }
2478
                        if (SEQ_LT(cur->start, sack.start) &&
2479
                            SEQ_GT(cur->end, sack.end)) {
2480
                                /*
2481
                                 * ACKs some data in middle of a hole; need to
2482
                                 * split current hole
2483
                                 */
2484
                                temp = (struct sackhole *)malloc(sizeof(*temp),
2485
                                    M_PCB,M_NOWAIT);
2486
                                if (temp == NULL)
2487
                                        continue; /* ENOBUFS */
2488
#if defined(TCP_SACK) && defined(TCP_FACK)
2489
                                if (SEQ_GT(cur->rxmit, sack.end))
2490
                                        tp->retran_data -=
2491
                                            tcp_seq_subtract(sack.end,
2492
                                            sack.start);
2493
                                else if (SEQ_GT(cur->rxmit, sack.start))
2494
                                        tp->retran_data -=
2495
                                            tcp_seq_subtract(cur->rxmit,
2496
                                            sack.start);
2497
#endif /* TCP_FACK */
2498
                                temp->next = cur->next;
2499
                                temp->start = sack.end;
2500
                                temp->end = cur->end;
2501
                                temp->dups = cur->dups;
2502
                                temp->rxmit = max (cur->rxmit, temp->start);
2503
                                cur->end = sack.start;
2504
                                cur->rxmit = min (cur->rxmit, cur->end);
2505
                                cur->dups++;
2506
                                if ( ((sack.end - cur->end)/tp->t_maxseg) >=
2507
                                        tcprexmtthresh)
2508
                                        cur->dups = tcprexmtthresh;
2509
                                cur->next = temp;
2510
                                p = temp;
2511
                                cur = p->next;
2512
                                tp->snd_numholes++;
2513
                        }
2514
                }
2515
                /* At this point, p points to the last hole on the list */
2516
                if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
2517
                        /*
2518
                         * Need to append new hole at end.
2519
                         * Last hole is p (and it's not NULL).
2520
                         */
2521
                        temp = (struct sackhole *) malloc(sizeof(*temp),
2522
                            M_PCB, M_NOWAIT);
2523
                        if (temp == NULL)
2524
                                continue; /* ENOBUFS */
2525
                        temp->start = tp->rcv_lastsack;
2526
                        temp->end = sack.start;
2527
                        temp->dups = min(tcprexmtthresh,
2528
                            ((sack.end - sack.start)/tp->t_maxseg));
2529
                        if (temp->dups < 1)
2530
                                temp->dups = 1;
2531
                        temp->rxmit = temp->start;
2532
                        temp->next = 0;
2533
                        p->next = temp;
2534
                        tp->rcv_lastsack = sack.end;
2535
                        tp->snd_numholes++;
2536
                }
2537
        }
2538
#if defined(TCP_SACK) && defined(TCP_FACK)
2539
        /*
2540
         * Update retran_data and snd_awnd.  Go through the list of
2541
         * holes.   Increment retran_data by (hole->rxmit - hole->start).
2542
         */
2543
        tp->retran_data = 0;
2544
        cur = tp->snd_holes;
2545
        while (cur) {
2546
                tp->retran_data += cur->rxmit - cur->start;
2547
                cur = cur->next;
2548
        }
2549
        tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) +
2550
            tp->retran_data;
2551
#endif /* TCP_FACK */
2552
 
2553
        return 0;
2554
}
2555
 
2556
/*
2557
 * Delete stale (i.e, cumulatively ack'd) holes.  Hole is deleted only if
2558
 * it is completely acked; otherwise, tcp_sack_option(), called from
2559
 * tcp_dooptions(), will fix up the hole.
2560
 */
2561
void
2562
tcp_del_sackholes(tp, th)
2563
        struct tcpcb *tp;
2564
        struct tcphdr *th;
2565
{
2566
        if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) {
2567
                /* max because this could be an older ack just arrived */
2568
                tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
2569
                        th->th_ack : tp->snd_una;
2570
                struct sackhole *cur = tp->snd_holes;
2571
                struct sackhole *prev = cur;
2572
                while (cur)
2573
                        if (SEQ_LEQ(cur->end, lastack)) {
2574
                                cur = cur->next;
2575
                                free(prev, M_PCB);
2576
                                prev = cur;
2577
                                tp->snd_numholes--;
2578
                        } else if (SEQ_LT(cur->start, lastack)) {
2579
                                cur->start = lastack;
2580
                                break;
2581
                        } else
2582
                                break;
2583
                tp->snd_holes = cur;
2584
        }
2585
}
2586
 
2587
/*
2588
 * Delete all receiver-side SACK information.
2589
 */
2590
void
2591
tcp_clean_sackreport(tp)
2592
        struct tcpcb *tp;
2593
{
2594
        int i;
2595
 
2596
        tp->rcv_numsacks = 0;
2597
        for (i = 0; i < MAX_SACK_BLKS; i++)
2598
                tp->sackblks[i].start = tp->sackblks[i].end=0;
2599
 
2600
}
2601
 
2602
/*
2603
 * Checks for partial ack.  If partial ack arrives, turn off retransmission
2604
 * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
2605
 * If the ack advances at least to tp->snd_last, return 0.
2606
 */
2607
int
2608
tcp_sack_partialack(tp, th)
2609
        struct tcpcb *tp;
2610
        struct tcphdr *th;
2611
{
2612
        if (SEQ_LT(th->th_ack, tp->snd_last)) {
2613
                /* Turn off retx. timer (will start again next segment) */
2614
                tp->t_timer[TCPT_REXMT] = 0;
2615
                tp->t_rtt = 0;
2616
#ifndef TCP_FACK
2617
                /*
2618
                 * Partial window deflation.  This statement relies on the
2619
                 * fact that tp->snd_una has not been updated yet.  In FACK
2620
                 * hold snd_cwnd constant during fast recovery.
2621
                 */
2622
                tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
2623
#endif
2624
                return 1;
2625
        }
2626
        return 0;
2627
}
2628
#endif /* TCP_SACK */
2629
 
2630
/*
2631
 * Pull out of band byte out of a segment so
2632
 * it doesn't appear in the user's data queue.
2633
 * It is still reflected in the segment length for
2634
 * sequencing purposes.
2635
 */
2636
void
2637
tcp_pulloutofband(so, urgent, m, off)
2638
        struct socket *so;
2639
        u_int urgent;
2640
        register struct mbuf *m;
2641
        int off;
2642
{
2643
        int cnt = off + urgent - 1;
2644
 
2645
        while (cnt >= 0) {
2646
                if (m->m_len > cnt) {
2647
                        char *cp = mtod(m, caddr_t) + cnt;
2648
                        struct tcpcb *tp = sototcpcb(so);
2649
 
2650
                        tp->t_iobc = *cp;
2651
                        tp->t_oobflags |= TCPOOB_HAVEDATA;
2652
                        bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
2653
                        m->m_len--;
2654
                        return;
2655
                }
2656
                cnt -= m->m_len;
2657
                m = m->m_next;
2658
                if (m == 0)
2659
                        break;
2660
        }
2661
        panic("tcp_pulloutofband");
2662
}
2663
 
2664
/*
2665
 * Collect new round-trip time estimate
2666
 * and update averages and current timeout.
2667
 */
2668
void
2669
tcp_xmit_timer(tp, rtt)
2670
        register struct tcpcb *tp;
2671
        short rtt;
2672
{
2673
        register short delta;
2674
        short rttmin;
2675
 
2676
        tcpstat.tcps_rttupdated++;
2677
        --rtt;
2678
        if (tp->t_srtt != 0) {
2679
                /*
2680
                 * srtt is stored as fixed point with 3 bits after the
2681
                 * binary point (i.e., scaled by 8).  The following magic
2682
                 * is equivalent to the smoothing algorithm in rfc793 with
2683
                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
2684
                 * point).  Adjust rtt to origin 0.
2685
                 */
2686
                delta = (rtt << 2) - (tp->t_srtt >> TCP_RTT_SHIFT);
2687
                if ((tp->t_srtt += delta) <= 0)
2688
                        tp->t_srtt = 1;
2689
                /*
2690
                 * We accumulate a smoothed rtt variance (actually, a
2691
                 * smoothed mean difference), then set the retransmit
2692
                 * timer to smoothed rtt + 4 times the smoothed variance.
2693
                 * rttvar is stored as fixed point with 2 bits after the
2694
                 * binary point (scaled by 4).  The following is
2695
                 * equivalent to rfc793 smoothing with an alpha of .75
2696
                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
2697
                 * rfc793's wired-in beta.
2698
                 */
2699
                if (delta < 0)
2700
                        delta = -delta;
2701
                delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
2702
                if ((tp->t_rttvar += delta) <= 0)
2703
                        tp->t_rttvar = 1;
2704
        } else {
2705
                /*
2706
                 * No rtt measurement yet - use the unsmoothed rtt.
2707
                 * Set the variance to half the rtt (so our first
2708
                 * retransmit happens at 3*rtt).
2709
                 */
2710
                tp->t_srtt = rtt << (TCP_RTT_SHIFT + 2);
2711
                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT + 2 - 1);
2712
        }
2713
        tp->t_rtt = 0;
2714
        tp->t_rxtshift = 0;
2715
 
2716
        /*
2717
         * the retransmit should happen at rtt + 4 * rttvar.
2718
         * Because of the way we do the smoothing, srtt and rttvar
2719
         * will each average +1/2 tick of bias.  When we compute
2720
         * the retransmit timer, we want 1/2 tick of rounding and
2721
         * 1 extra tick because of +-1/2 tick uncertainty in the
2722
         * firing of the timer.  The bias will give us exactly the
2723
         * 1.5 tick we need.  But, because the bias is
2724
         * statistical, we have to test that we don't drop below
2725
         * the minimum feasible timer (which is 2 ticks).
2726
         */
2727
        if (tp->t_rttmin > rtt + 2)
2728
                rttmin = tp->t_rttmin;
2729
        else
2730
                rttmin = rtt + 2;
2731
        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), rttmin, TCPTV_REXMTMAX);
2732
 
2733
        /*
2734
         * We received an ack for a packet that wasn't retransmitted;
2735
         * it is probably safe to discard any error indications we've
2736
         * received recently.  This isn't quite right, but close enough
2737
         * for now (a route might have failed after we sent a segment,
2738
         * and the return path might not be symmetrical).
2739
         */
2740
        tp->t_softerror = 0;
2741
}
2742
 
2743
/*
2744
 * Determine a reasonable value for maxseg size.
2745
 * If the route is known, check route for mtu.
2746
 * If none, use an mss that can be handled on the outgoing
2747
 * interface without forcing IP to fragment; if bigger than
2748
 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
2749
 * to utilize large mbufs.  If no route is found, route has no mtu,
2750
 * or the destination isn't local, use a default, hopefully conservative
2751
 * size (usually 512 or the default IP max size, but no more than the mtu
2752
 * of the interface), as we can't discover anything about intervening
2753
 * gateways or networks.  We also initialize the congestion/slow start
2754
 * window to be a single segment if the destination isn't local.
2755
 * While looking at the routing entry, we also initialize other path-dependent
2756
 * parameters from pre-set or cached values in the routing entry.
2757
 *
2758
 * Also take into account the space needed for options that we
2759
 * send regularly.  Make maxseg shorter by that amount to assure
2760
 * that we can send maxseg amount of data even when the options
2761
 * are present.  Store the upper limit of the length of options plus
2762
 * data in maxopd.
2763
 */
2764
int
2765
tcp_mss(tp, offer)
2766
        register struct tcpcb *tp;
2767
        u_int offer;
2768
{
2769
        struct route *ro;
2770
        register struct rtentry *rt;
2771
        struct ifnet *ifp;
2772
        register int rtt, mss;
2773
        u_long bufsize;
2774
        struct inpcb *inp;
2775
        struct socket *so;
2776
 
2777
        inp = tp->t_inpcb;
2778
        ro = &inp->inp_route;
2779
        so = inp->inp_socket;
2780
 
2781
        if ((rt = ro->ro_rt) == (struct rtentry *)0) {
2782
                /* No route yet, so try to acquire one */
2783
#ifdef INET6
2784
          /*
2785
           * Get a new IPv6 route if an IPv6 destination, otherwise, get
2786
           * and IPv4 route (including those pesky IPv4-mapped addresses).
2787
           */
2788
          bzero(ro,sizeof(struct route_in6));
2789
          if (sotopf(so) == AF_INET6) {
2790
            if (IN6_IS_ADDR_V4MAPPED(&inp->inp_faddr6)) {
2791
              /* Get an IPv4 route. */
2792
              ro->ro_dst.sa_family = AF_INET;
2793
              ro->ro_dst.sa_len = sizeof(ro->ro_dst);
2794
              ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
2795
                inp->inp_faddr;
2796
              rtalloc(ro);
2797
            } else {
2798
              ro->ro_dst.sa_family = AF_INET6;
2799
              ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
2800
              ((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
2801
                inp->inp_faddr6;
2802
              rtalloc(ro);
2803
            }
2804
          } else
2805
#endif /* INET6 */
2806
                if (inp->inp_faddr.s_addr != INADDR_ANY) {
2807
                        ro->ro_dst.sa_family = AF_INET;
2808
                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
2809
                        satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
2810
                        rtalloc(ro);
2811
                }
2812
                if ((rt = ro->ro_rt) == (struct rtentry *)0) {
2813
                        tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
2814
                        return (tcp_mssdflt);
2815
                }
2816
        }
2817
        ifp = rt->rt_ifp;
2818
 
2819
#ifdef RTV_MTU  /* if route characteristics exist ... */
2820
        /*
2821
         * While we're here, check if there's an initial rtt
2822
         * or rttvar.  Convert from the route-table units
2823
         * to scaled multiples of the slow timeout timer.
2824
         */
2825
        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
2826
                /*
2827
                 * XXX the lock bit for MTU indicates that the value
2828
                 * is also a minimum value; this is subject to time.
2829
                 */
2830
                if (rt->rt_rmx.rmx_locks & RTV_RTT)
2831
                        TCPT_RANGESET(tp->t_rttmin,
2832
                            rtt / (RTM_RTTUNIT / PR_SLOWHZ),
2833
                            TCPTV_MIN, TCPTV_REXMTMAX);
2834
                tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
2835
                if (rt->rt_rmx.rmx_rttvar)
2836
                        tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
2837
                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
2838
                else
2839
                        /* default variation is +- 1 rtt */
2840
                        tp->t_rttvar =
2841
                            tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
2842
                TCPT_RANGESET((long) tp->t_rxtcur,
2843
                    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
2844
                    tp->t_rttmin, TCPTV_REXMTMAX);
2845
        }
2846
        /*
2847
         * if there's an mtu associated with the route, use it
2848
         */
2849
        if (rt->rt_rmx.rmx_mtu)
2850
#ifdef INET6
2851
        {
2852
          /*
2853
           * One may wish to lower MSS to take into account options,
2854
           * especially security-related options.
2855
           */
2856
          if (tp->pf == AF_INET6)
2857
            mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpipv6hdr);
2858
          else
2859
#endif /* INET6 */
2860
                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
2861
#ifdef INET6
2862
        }
2863
#endif /* INET6 */
2864
        else
2865
#endif /* RTV_MTU */
2866
        {
2867
          /*
2868
           *  ifp may be null and rmx_mtu may be zero in certain
2869
           *  v6 cases (e.g., if ND wasn't able to resolve the
2870
           *  destination host.
2871
           */
2872
                mss = ifp ? ifp->if_mtu - sizeof(struct tcpiphdr) : 0;
2873
#ifdef INET6
2874
                if (tp->pf == AF_INET)
2875
#endif /* INET6 */
2876
                if (!in_localaddr(inp->inp_faddr))
2877
                        mss = min(mss, tcp_mssdflt);
2878
        }
2879
        /*
2880
         * The current mss, t_maxseg, is initialized to the default value.
2881
         * If we compute a smaller value, reduce the current mss.
2882
         * If we compute a larger value, return it for use in sending
2883
         * a max seg size option, but don't store it for use
2884
         * unless we received an offer at least that large from peer.
2885
         * However, do not accept offers under 32 bytes.
2886
         */
2887
        if (offer)
2888
                mss = min(mss, offer);
2889
        mss = max(mss, 64);             /* sanity - at least max opt. space */
2890
        /*
2891
         * maxopd stores the maximum length of data AND options
2892
         * in a segment; maxseg is the amount of data in a normal
2893
         * segment.  We need to store this value (maxopd) apart
2894
         * from maxseg, because now every segment carries options
2895
         * and thus we normally have somewhat less data in segments.
2896
         */
2897
        tp->t_maxopd = mss;
2898
 
2899
        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
2900
            (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
2901
                mss -= TCPOLEN_TSTAMP_APPA;
2902
 
2903
#if     (MCLBYTES & (MCLBYTES - 1)) == 0
2904
                if (mss > MCLBYTES)
2905
                        mss &= ~(MCLBYTES-1);
2906
#else
2907
                if (mss > MCLBYTES)
2908
                        mss = mss / MCLBYTES * MCLBYTES;
2909
#endif
2910
        /*
2911
         * If there's a pipesize, change the socket buffer
2912
         * to that size.  Make the socket buffers an integral
2913
         * number of mss units; if the mss is larger than
2914
         * the socket buffer, decrease the mss.
2915
         */
2916
#ifdef RTV_SPIPE
2917
        if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
2918
#endif
2919
                bufsize = so->so_snd.sb_hiwat;
2920
        if (bufsize < mss)
2921
                mss = bufsize;
2922
        else {
2923
                bufsize = roundup(bufsize, mss);
2924
                if (bufsize > sb_max)
2925
                        bufsize = sb_max;
2926
                (void)sbreserve(&so->so_snd, bufsize);
2927
        }
2928
        tp->t_maxseg = mss;
2929
 
2930
#ifdef RTV_RPIPE
2931
        if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
2932
#endif
2933
                bufsize = so->so_rcv.sb_hiwat;
2934
        if (bufsize > mss) {
2935
                bufsize = roundup(bufsize, mss);
2936
                if (bufsize > sb_max)
2937
                        bufsize = sb_max;
2938
                (void)sbreserve(&so->so_rcv, bufsize);
2939
        }
2940
        tp->snd_cwnd = mss;
2941
 
2942
#ifdef RTV_SSTHRESH
2943
        if (rt->rt_rmx.rmx_ssthresh) {
2944
                /*
2945
                 * There's some sort of gateway or interface
2946
                 * buffer limit on the path.  Use this to set
2947
                 * the slow start threshhold, but set the
2948
                 * threshold to no less than 2*mss.
2949
                 */
2950
                tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
2951
        }
2952
#endif /* RTV_MTU */
2953
        return (mss);
2954
}
2955
#endif /* TUBA_INCLUDE */
2956
 
2957
#if defined(TCP_NEWRENO) || defined (TCP_SACK)
2958
/*
2959
 * Checks for partial ack.  If partial ack arrives, force the retransmission
2960
 * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
2961
 * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to
2962
 * be started again.  If the ack advances at least to tp->snd_last, return 0.
2963
 */
2964
int
2965
tcp_newreno(tp, th)
2966
        struct tcpcb *tp;
2967
        struct tcphdr *th;
2968
{
2969
        if (SEQ_LT(th->th_ack, tp->snd_last)) {
2970
                /*
2971
                 * snd_una has not been updated and the socket send buffer
2972
                 * not yet drained of the acked data, so we have to leave
2973
                 * snd_una as it was to get the correct data offset in
2974
                 * tcp_output().
2975
                 */
2976
                tcp_seq onxt = tp->snd_nxt;
2977
                u_long  ocwnd = tp->snd_cwnd;
2978
                tp->t_timer[TCPT_REXMT] = 0;
2979
                tp->t_rtt = 0;
2980
                tp->snd_nxt = th->th_ack;
2981
                /*
2982
                 * Set snd_cwnd to one segment beyond acknowledged offset
2983
                 * (tp->snd_una not yet updated when this function is called)
2984
                 */
2985
                tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
2986
                (void) tcp_output(tp);
2987
                tp->snd_cwnd = ocwnd;
2988
                if (SEQ_GT(onxt, tp->snd_nxt))
2989
                        tp->snd_nxt = onxt;
2990
                /*
2991
                 * Partial window deflation.  Relies on fact that tp->snd_una
2992
                 * not updated yet.
2993
                 */
2994
                tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
2995
                return 1;
2996
    }
2997
    return 0;
2998
}
2999
#endif /* TCP_NEWRENO || TCP_SACK */

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.