URL https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-2.0/] [packages/] [net/] [bsd_tcpip/] [v2_0/] [src/] [sys/] [netinet/] [tcp_output.c] - Blame information for rev 27

Go to most recent revision | Details | Compare with Previous | View Log

//==========================================================================
//
//      src/sys/netinet/tcp_output.c
//
//==========================================================================
//####BSDCOPYRIGHTBEGIN####
//
// -------------------------------------------
//
// Portions of this software may have been derived from OpenBSD, 
// FreeBSD or other sources, and are covered by the appropriate
// copyright disclaimers included herein.
//
// Portions created by Red Hat are
// Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
//
// -------------------------------------------
//
//####BSDCOPYRIGHTEND####
//==========================================================================
 
/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
 *      The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      @(#)tcp_output.c        8.4 (Berkeley) 5/24/95
 * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.39.2.10 2001/07/07 04:30:38 silby Exp $
 */
 
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
 
#include <net/route.h>
 
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
#include <netinet/tcp.h>
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
 
#ifdef IPSEC
#include <netinet6/ipsec.h>
#endif /*IPSEC*/
 
#ifdef notyet
extern struct mbuf *m_copypack();
#endif
 
static int path_mtu_discovery = 1;
int ss_fltsz = 1;
int ss_fltsz_local = TCP_MAXWIN;               /* something large */
int     tcp_do_newreno = 1;
 
/*
 * Tcp output routine: figure out what should be sent and send it.
 */
int
tcp_output(tp)
        register struct tcpcb *tp;
{
        register struct socket *so = tp->t_inpcb->inp_socket;
        register long len, win;
        int off, flags, error;
        register struct mbuf *m;
        struct ip *ip = NULL;
        register struct ipovly *ipov = NULL;
#ifdef INET6
        struct ip6_hdr *ip6 = NULL;
#endif /* INET6 */
        register struct tcphdr *th;
        u_char opt[TCP_MAXOLEN];
        unsigned ipoptlen, optlen, hdrlen;
        int idle, sendalot;
        int maxburst = TCP_MAXBURST;
        struct rmxp_tao *taop;
        struct rmxp_tao tao_noncached;
#ifdef INET6
        int isipv6;
#endif
 
#ifdef INET6
        isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
#endif
 
        /*
         * Determine length of data that should be transmitted,
         * and flags that will be used.
         * If there is some data or critical controls (SYN, RST)
         * to send, then transmit; otherwise, investigate further.
         */
        idle = (tp->snd_max == tp->snd_una);
        if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
                /*
                 * We have been idle for "a while" and no acks are
                 * expected to clock out any data we send --
                 * slow start to get ack "clock" running again.
                 *
                 * Set the slow-start flight size depending on whether
                 * this is a local network or not.
                 */
                if (
#ifdef INET6
                    (isipv6 && in6_localaddr(&tp->t_inpcb->in6p_faddr)) ||
                    (!isipv6 &&
#endif
                     in_localaddr(tp->t_inpcb->inp_faddr)
#ifdef INET6
                     )
#endif
                    )
                        tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local;
                else
                        tp->snd_cwnd = tp->t_maxseg * ss_fltsz;
        }
again:
        sendalot = 0;
        off = tp->snd_nxt - tp->snd_una;
        win = min(tp->snd_wnd, tp->snd_cwnd);
 
        flags = tcp_outflags[tp->t_state];
        /*
         * Get standard flags, and add SYN or FIN if requested by 'hidden'
         * state flags.
         */
        if (tp->t_flags & TF_NEEDFIN)
                flags |= TH_FIN;
        if (tp->t_flags & TF_NEEDSYN)
                flags |= TH_SYN;
 
        /*
         * If in persist timeout with window of 0, send 1 byte.
         * Otherwise, if window is small but nonzero
         * and timer expired, we will send what we can
         * and go to transmit state.
         */
        if (tp->t_force) {
                if (win == 0) {
                        /*
                         * If we still have some data to send, then
                         * clear the FIN bit.  Usually this would
                         * happen below when it realizes that we
                         * aren't sending all the data.  However,
                         * if we have exactly 1 byte of unsent data,
                         * then it won't clear the FIN bit below,
                         * and if we are in persist state, we wind
                         * up sending the packet without recording
                         * that we sent the FIN bit.
                         *
                         * We can't just blindly clear the FIN bit,
                         * because if we don't have any more data
                         * to send then the probe will be the FIN
                         * itself.
                         */
                        if (off < so->so_snd.sb_cc)
                                flags &= ~TH_FIN;
                        win = 1;
                } else {
                        callout_stop(tp->tt_persist);
                        tp->t_rxtshift = 0;
                }
        }
 
        len = (long)ulmin(so->so_snd.sb_cc, win) - off;
 
        if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
                taop = &tao_noncached;
                bzero(taop, sizeof(*taop));
        }
 
        /*
         * Lop off SYN bit if it has already been sent.  However, if this
         * is SYN-SENT state and if segment contains data and if we don't
         * know that foreign host supports TAO, suppress sending segment.
         */
        if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
                flags &= ~TH_SYN;
                off--, len++;
                if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
                    taop->tao_ccsent == 0)
                        return 0;
        }
 
        /*
         * Be careful not to send data and/or FIN on SYN segments
         * in cases when no CC option will be sent.
         * This measure is needed to prevent interoperability problems
         * with not fully conformant TCP implementations.
         */
        if ((flags & TH_SYN) &&
            ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
             ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
                len = 0;
                flags &= ~TH_FIN;
        }
 
        if (len < 0) {
                /*
                 * If FIN has been sent but not acked,
                 * but we haven't been called to retransmit,
                 * len will be -1.  Otherwise, window shrank
                 * after we sent into it.  If window shrank to 0,
                 * cancel pending retransmit, pull snd_nxt back
                 * to (closed) window, and set the persist timer
                 * if it isn't already going.  If the window didn't
                 * close completely, just wait for an ACK.
                 */
                len = 0;
                if (win == 0) {
                        callout_stop(tp->tt_rexmt);
                        tp->t_rxtshift = 0;
                        tp->snd_nxt = tp->snd_una;
                        if (!callout_active(tp->tt_persist))
                                tcp_setpersist(tp);
                }
        }
        if (len > tp->t_maxseg) {
                len = tp->t_maxseg;
                sendalot = 1;
        }
        if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
                flags &= ~TH_FIN;
 
        win = sbspace(&so->so_rcv);
 
        /*
         * Sender silly window avoidance.  If connection is idle
         * and can send all data, a maximum segment,
         * at least a maximum default-size segment do it,
         * or are forced, do it; otherwise don't bother.
         * If peer's buffer is tiny, then send
         * when window is at least half open.
         * If retransmitting (possibly after persist timer forced us
         * to send into a small window), then must resend.
         */
        if (len) {
                if (len == tp->t_maxseg)
                        goto send;
                if (!(tp->t_flags & TF_MORETOCOME) &&
                    (idle || tp->t_flags & TF_NODELAY) &&
                    (tp->t_flags & TF_NOPUSH) == 0 &&
                    len + off >= so->so_snd.sb_cc)
                        goto send;
                if (tp->t_force)
                        goto send;
                if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
                        goto send;
                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
                        goto send;
        }
 
        /*
         * Compare available window to amount of window
         * known to peer (as advertised window less
         * next expected input).  If the difference is at least two
         * max size segments, or at least 50% of the maximum possible
         * window, then want to send a window update to peer.
         */
        if (win > 0) {
                /*
                 * "adv" is the amount we can increase the window,
                 * taking into account that we are limited by
                 * TCP_MAXWIN << tp->rcv_scale.
                 */
                long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
                        (tp->rcv_adv - tp->rcv_nxt);
 
                if (adv >= (long) (2 * tp->t_maxseg))
                        goto send;
                if (2 * adv >= (long) so->so_rcv.sb_hiwat)
                        goto send;
        }
 
        /*
         * Send if we owe peer an ACK.
         */
        if (tp->t_flags & TF_ACKNOW)
                goto send;
        if ((flags & TH_RST) ||
            ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
                goto send;
        if (SEQ_GT(tp->snd_up, tp->snd_una))
                goto send;
        /*
         * If our state indicates that FIN should be sent
         * and we have not yet done so, or we're retransmitting the FIN,
         * then we need to send.
         */
        if (flags & TH_FIN &&
            ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
                goto send;
 
        /*
         * TCP window updates are not reliable, rather a polling protocol
         * using ``persist'' packets is used to insure receipt of window
         * updates.  The three ``states'' for the output side are:
         *      idle                    not doing retransmits or persists
         *      persisting              to move a small or zero window
         *      (re)transmitting        and thereby not persisting
         *
         * callout_active(tp->tt_persist)
         *      is true when we are in persist state.
         * tp->t_force
         *      is set when we are called to send a persist packet.
         * callout_active(tp->tt_rexmt)
         *      is set when we are retransmitting
         * The output side is idle when both timers are zero.
         *
         * If send window is too small, there is data to transmit, and no
         * retransmit or persist is pending, then go to persist state.
         * If nothing happens soon, send when timer expires:
         * if window is nonzero, transmit what we can,
         * otherwise force out a byte.
         */
        if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
            !callout_active(tp->tt_persist)) {
                tp->t_rxtshift = 0;
                tcp_setpersist(tp);
        }
 
        /*
         * No reason to send a segment, just return.
         */
        return (0);
 
send:
        /*
         * Before ESTABLISHED, force sending of initial options
         * unless TCP set not to do any options.
         * NOTE: we assume that the IP/TCP header plus TCP options
         * always fit in a single mbuf, leaving room for a maximum
         * link header, i.e.
         *      max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
         */
        optlen = 0;
#ifdef INET6
        if (isipv6)
                hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
        else
#endif
        hdrlen = sizeof (struct tcpiphdr);
        if (flags & TH_SYN) {
                tp->snd_nxt = tp->iss;
                if ((tp->t_flags & TF_NOOPT) == 0) {
                        u_short mss;
 
                        opt[0] = TCPOPT_MAXSEG;
                        opt[1] = TCPOLEN_MAXSEG;
                        mss = htons((u_short) tcp_mssopt(tp));
                        (void)memcpy(opt + 2, &mss, sizeof(mss));
                        optlen = TCPOLEN_MAXSEG;
 
                        if ((tp->t_flags & TF_REQ_SCALE) &&
                            ((flags & TH_ACK) == 0 ||
                            (tp->t_flags & TF_RCVD_SCALE))) {
                                *((u_int32_t *)(opt + optlen)) = htonl(
                                        TCPOPT_NOP << 24 |
                                        TCPOPT_WINDOW << 16 |
                                        TCPOLEN_WINDOW << 8 |
                                        tp->request_r_scale);
                                optlen += 4;
                        }
                }
        }
 
        /*
         * Send a timestamp and echo-reply if this is a SYN and our side
         * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
         * and our peer have sent timestamps in our SYN's.
         */
        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
            (flags & TH_RST) == 0 &&
            ((flags & TH_ACK) == 0 ||
             (tp->t_flags & TF_RCVD_TSTMP))) {
                u_int32_t *lp = (u_int32_t *)(opt + optlen);
 
                /* Form timestamp option as shown in appendix A of RFC 1323. */
                *lp++ = htonl(TCPOPT_TSTAMP_HDR);
                *lp++ = htonl(ticks);
                *lp   = htonl(tp->ts_recent);
                optlen += TCPOLEN_TSTAMP_APPA;
        }
 
        /*
         * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
         * options are allowed (!TF_NOOPT) and it's not a RST.
         */
        if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
             (flags & TH_RST) == 0) {
                switch (flags & (TH_SYN|TH_ACK)) {
                /*
                 * This is a normal ACK, send CC if we received CC before
                 * from our peer.
                 */
                case TH_ACK:
                        if (!(tp->t_flags & TF_RCVD_CC))
                                break;
                        /*FALLTHROUGH*/
 
                /*
                 * We can only get here in T/TCP's SYN_SENT* state, when
                 * we're a sending a non-SYN segment without waiting for
                 * the ACK of our SYN.  A check above assures that we only
                 * do this if our peer understands T/TCP.
                 */
                case 0:
                        opt[optlen++] = TCPOPT_NOP;
                        opt[optlen++] = TCPOPT_NOP;
                        opt[optlen++] = TCPOPT_CC;
                        opt[optlen++] = TCPOLEN_CC;
                        *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
 
                        optlen += 4;
                        break;
 
                /*
                 * This is our initial SYN, check whether we have to use
                 * CC or CC.new.
                 */
                case TH_SYN:
                        opt[optlen++] = TCPOPT_NOP;
                        opt[optlen++] = TCPOPT_NOP;
                        opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
                                                TCPOPT_CCNEW : TCPOPT_CC;
                        opt[optlen++] = TCPOLEN_CC;
                        *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
                        optlen += 4;
                        break;
 
                /*
                 * This is a SYN,ACK; send CC and CC.echo if we received
                 * CC from our peer.
                 */
                case (TH_SYN|TH_ACK):
                        if (tp->t_flags & TF_RCVD_CC) {
                                opt[optlen++] = TCPOPT_NOP;
                                opt[optlen++] = TCPOPT_NOP;
                                opt[optlen++] = TCPOPT_CC;
                                opt[optlen++] = TCPOLEN_CC;
                                *(u_int32_t *)&opt[optlen] =
                                        htonl(tp->cc_send);
                                optlen += 4;
                                opt[optlen++] = TCPOPT_NOP;
                                opt[optlen++] = TCPOPT_NOP;
                                opt[optlen++] = TCPOPT_CCECHO;
                                opt[optlen++] = TCPOLEN_CC;
                                *(u_int32_t *)&opt[optlen] =
                                        htonl(tp->cc_recv);
                                optlen += 4;
                        }
                        break;
                }
        }
 
        hdrlen += optlen;
 
#ifdef INET6
        if (isipv6)
                ipoptlen = ip6_optlen(tp->t_inpcb);
        else
#endif
      {
        if (tp->t_inpcb->inp_options) {
                ipoptlen = tp->t_inpcb->inp_options->m_len -
                                offsetof(struct ipoption, ipopt_list);
        } else {
                ipoptlen = 0;
        }
      }
#ifdef IPSEC
        ipoptlen += ipsec_hdrsiz_tcp(tp);
#endif
 
        /*
         * Adjust data length if insertion of options will
         * bump the packet length beyond the t_maxopd length.
         * Clear the FIN bit because we cut off the tail of
         * the segment.
         */
        if (len + optlen + ipoptlen > tp->t_maxopd) {
                /*
                 * If there is still more to send, don't close the connection.
                 */
                flags &= ~TH_FIN;
                len = tp->t_maxopd - optlen - ipoptlen;
                sendalot = 1;
        }
 
/*#ifdef DIAGNOSTIC*/
#ifdef INET6
        if (max_linkhdr + hdrlen > MCLBYTES)
                panic("tcphdr too big");
#else
        if (max_linkhdr + hdrlen > MHLEN)
                panic("tcphdr too big");
#endif
/*#endif*/
 
        /*
         * Grab a header mbuf, attaching a copy of data to
         * be transmitted, and initialize the header from
         * the template for sends on this connection.
         */
        if (len) {
                if (tp->t_force && len == 1)
                        tcpstat.tcps_sndprobe++;
                else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
                        tcpstat.tcps_sndrexmitpack++;
                        tcpstat.tcps_sndrexmitbyte += len;
                } else {
                        tcpstat.tcps_sndpack++;
                        tcpstat.tcps_sndbyte += len;
                }
#ifdef notyet
                if ((m = m_copypack(so->so_snd.sb_mb, off,
                    (int)len, max_linkhdr + hdrlen)) == 0) {
                        error = ENOBUFS;
                        goto out;
                }
                /*
                 * m_copypack left space for our hdr; use it.
                 */
                m->m_len += hdrlen;
                m->m_data -= hdrlen;
#else
                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m == NULL) {
                        error = ENOBUFS;
                        goto out;
                }
#ifdef INET6
                if (MHLEN < hdrlen + max_linkhdr) {
                        MCLGET(m, M_DONTWAIT);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_freem(m);
                                error = ENOBUFS;
                                goto out;
                        }
                }
#endif
                m->m_data += max_linkhdr;
                m->m_len = hdrlen;
                if (len <= MHLEN - hdrlen - max_linkhdr) {
                        m_copydata(so->so_snd.sb_mb, off, (int) len,
                            mtod(m, caddr_t) + hdrlen);
                        m->m_len += len;
                } else {
                        m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
                        if (m->m_next == 0) {
                                (void) m_free(m);
                                error = ENOBUFS;
                                goto out;
                        }
                }
#endif
                /*
                 * If we're sending everything we've got, set PUSH.
                 * (This will keep happy those implementations which only
                 * give data to the user when a buffer fills or
                 * a PUSH comes in.)
                 */
                if (off + len == so->so_snd.sb_cc)
                        flags |= TH_PUSH;
        } else {
                if (tp->t_flags & TF_ACKNOW)
                        tcpstat.tcps_sndacks++;
                else if (flags & (TH_SYN|TH_FIN|TH_RST))
                        tcpstat.tcps_sndctrl++;
                else if (SEQ_GT(tp->snd_up, tp->snd_una))
                        tcpstat.tcps_sndurg++;
                else
                        tcpstat.tcps_sndwinup++;
 
                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m == NULL) {
                        error = ENOBUFS;
                        goto out;
                }
#ifdef INET6
                if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
                    MHLEN >= hdrlen) {
                        MH_ALIGN(m, hdrlen);
                } else
#endif
                m->m_data += max_linkhdr;
                m->m_len = hdrlen;
        }
        m->m_pkthdr.rcvif = (struct ifnet *)0;
#ifdef INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
                th = (struct tcphdr *)(ip6 + 1);
                tcp_fillheaders(tp, ip6, th);
        } else
#endif /* INET6 */
      {
        ip = mtod(m, struct ip *);
        ipov = (struct ipovly *)ip;
        th = (struct tcphdr *)(ip + 1);
        /* this picks up the pseudo header (w/o the length) */
        tcp_fillheaders(tp, ip, th);
      }
 
        /*
         * Fill in fields, remembering maximum advertised
         * window for use in delaying messages about window sizes.
         * If resending a FIN, be sure not to use a new sequence number.
         */
        if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
            tp->snd_nxt == tp->snd_max)
                tp->snd_nxt--;
        /*
         * If we are doing retransmissions, then snd_nxt will
         * not reflect the first unsent octet.  For ACK only
         * packets, we do not want the sequence number of the
         * retransmitted packet, we want the sequence number
         * of the next unsent octet.  So, if there is no data
         * (and no SYN or FIN), use snd_max instead of snd_nxt
         * when filling in ti_seq.  But if we are in persist
         * state, snd_max might reflect one byte beyond the
         * right edge of the window, so use snd_nxt in that
         * case, since we know we aren't doing a retransmission.
         * (retransmit and persist are mutually exclusive...)
         */
        if (len || (flags & (TH_SYN|TH_FIN))
            || callout_active(tp->tt_persist))
                th->th_seq = htonl(tp->snd_nxt);
        else
                th->th_seq = htonl(tp->snd_max);
        th->th_ack = htonl(tp->rcv_nxt);
        if (optlen) {
                bcopy(opt, th + 1, optlen);
                th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
        }
        th->th_flags = flags;
        /*
         * Calculate receive window.  Don't shrink window,
         * but avoid silly window syndrome.
         */
        if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
                win = 0;
        if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
                win = (long)(tp->rcv_adv - tp->rcv_nxt);
        if (win > (long)TCP_MAXWIN << tp->rcv_scale)
                win = (long)TCP_MAXWIN << tp->rcv_scale;
        th->th_win = htons((u_short) (win>>tp->rcv_scale));
        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
                th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
                th->th_flags |= TH_URG;
        } else
                /*
                 * If no urgent pointer to send, then we pull
                 * the urgent pointer to the left edge of the send window
                 * so that it doesn't drift into the send window on sequence
                 * number wraparound.
                 */
                tp->snd_up = tp->snd_una;               /* drag it along */
 
        /*
         * Put TCP length in extended header, and then
         * checksum extended header and data.
         */
        m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
#ifdef INET6
        if (isipv6)
                /*
                 * ip6_plen is not need to be filled now, and will be filled
                 * in ip6_output.
                 */
                th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
                                       sizeof(struct tcphdr) + optlen + len);
        else
#endif /* INET6 */
      {
        m->m_pkthdr.csum_flags = CSUM_TCP;
        m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
        if (len + optlen)
                th->th_sum = in_addword(th->th_sum,
                    htons((u_short)(optlen + len)));
 
        /* IP version must be set here for ipv4/ipv6 checking later */
      }
 
        /*
         * In transmit state, time the transmission and arrange for
         * the retransmit.  In persist state, just set snd_max.
         */
        if (tp->t_force == 0 || !callout_active(tp->tt_persist)) {
                tcp_seq startseq = tp->snd_nxt;
 
                /*
                 * Advance snd_nxt over sequence space of this segment.
                 */
                if (flags & (TH_SYN|TH_FIN)) {
                        if (flags & TH_SYN)
                                tp->snd_nxt++;
                        if (flags & TH_FIN) {
                                tp->snd_nxt++;
                                tp->t_flags |= TF_SENTFIN;
                        }
                }
                tp->snd_nxt += len;
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
                        tp->snd_max = tp->snd_nxt;
                        /*
                         * Time this transmission if not a retransmission and
                         * not currently timing anything.
                         */
                        if (tp->t_rtttime == 0) {
                                tp->t_rtttime = ticks;
                                tp->t_rtseq = startseq;
                                tcpstat.tcps_segstimed++;
                        }
                }
 
                /*
                 * Set retransmit timer if not currently set,
                 * and not doing an ack or a keep-alive probe.
                 * Initial value for retransmit timer is smoothed
                 * round-trip time + 2 * round-trip time variance.
                 * Initialize shift counter which is used for backoff
                 * of retransmit time.
                 */
                if (!callout_active(tp->tt_rexmt) &&
                    tp->snd_nxt != tp->snd_una) {
                        if (callout_active(tp->tt_persist)) {
                                callout_stop(tp->tt_persist);
                                tp->t_rxtshift = 0;
                        }
                        callout_reset(tp->tt_rexmt, tp->t_rxtcur,
                                      tcp_timer_rexmt, tp);
                }
        } else
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
                        tp->snd_max = tp->snd_nxt + len;
 
#ifdef TCPDEBUG
        /*
         * Trace.
         */
        if (so->so_options & SO_DEBUG)
                tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
#endif
 
        /*
         * Fill in IP length and desired time to live and
         * send to IP level.  There should be a better way
         * to handle ttl and tos; we could keep them in
         * the template, but need a way to checksum without them.
         */
        /*
         * m->m_pkthdr.len should have been set before cksum calcuration,
         * because in6_cksum() need it.
         */
#ifdef INET6
        if (isipv6) {
                /*
                 * we separately set hoplimit for every segment, since the
                 * user might want to change the value via setsockopt.
                 * Also, desired default hop limit might be changed via
                 * Neighbor Discovery.
                 */
                ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb,
                                               tp->t_inpcb->in6p_route.ro_rt ?
                                               tp->t_inpcb->in6p_route.ro_rt->rt_ifp
                                               : NULL);
 
                /* TODO: IPv6 IP6TOS_ECT bit on */
#ifdef IPSEC
                if (ipsec_setsocket(m, so) != 0) {
                        m_freem(m);
                        error = ENOBUFS;
                        goto out;
                }
#endif /*IPSEC*/
                error = ip6_output(m,
                            tp->t_inpcb->in6p_outputopts,
                            &tp->t_inpcb->in6p_route,
                            (so->so_options & SO_DONTROUTE), NULL, NULL);
        } else
#endif /* INET6 */
    {
        struct rtentry *rt;
        ip->ip_len = m->m_pkthdr.len;
#ifdef INET6
        if (INP_CHECK_SOCKAF(so, AF_INET6))
                ip->ip_ttl = in6_selecthlim(tp->t_inpcb,
                                            tp->t_inpcb->in6p_route.ro_rt ?
                                            tp->t_inpcb->in6p_route.ro_rt->rt_ifp
                                            : NULL);
        else
#endif /* INET6 */
        ip->ip_ttl = tp->t_inpcb->inp_ip_ttl;   /* XXX */
        ip->ip_tos = tp->t_inpcb->inp_ip_tos;   /* XXX */
        /*
         * See if we should do MTU discovery.  We do it only if the following
         * are true:
         *      1) we have a valid route to the destination
         *      2) the MTU is not locked (if it is, then discovery has been
         *         disabled)
         */
        if (path_mtu_discovery
            && (rt = tp->t_inpcb->inp_route.ro_rt)
            && rt->rt_flags & RTF_UP
            && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
                ip->ip_off |= IP_DF;
        }
#ifdef IPSEC
        ipsec_setsocket(m, so);
#endif /*IPSEC*/
        error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
            (so->so_options & SO_DONTROUTE), 0);
    }
        if (error) {
 
                /*
                 * We know that the packet was lost, so back out the
                 * sequence number advance, if any.
                 */
                if (tp->t_force == 0 || !callout_active(tp->tt_persist)) {
                        /*
                         * No need to check for TH_FIN here because
                         * the TF_SENTFIN flag handles that case.
                         */
                        if ((flags & TH_SYN) == 0)
                                tp->snd_nxt -= len;
                }
 
out:
                if (error == ENOBUFS) {
                        if (!callout_active(tp->tt_rexmt) &&
                            !callout_active(tp->tt_persist))
                                callout_reset(tp->tt_rexmt, tp->t_rxtcur,
                                      tcp_timer_rexmt, tp);
                        tcp_quench(tp->t_inpcb, 0);
                        return (0);
                }
                if (error == EMSGSIZE) {
                        /*
                         * ip_output() will have already fixed the route
                         * for us.  tcp_mtudisc() will, as its last action,
                         * initiate retransmission, so it is important to
                         * not do so here.
                         */
                        tcp_mtudisc(tp->t_inpcb, 0);
                        return 0;
                }
                if ((error == EHOSTUNREACH || error == ENETDOWN)
                    && TCPS_HAVERCVDSYN(tp->t_state)) {
                        tp->t_softerror = error;
                        return (0);
                }
                return (error);
        }
        tcpstat.tcps_sndtotal++;
 
        /*
         * Data sent (as far as we can tell).
         * If this advertises a larger window than any other segment,
         * then remember the size of the advertised window.
         * Any pending ACK has now been sent.
         */
        if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
                tp->rcv_adv = tp->rcv_nxt + win;
        tp->last_ack_sent = tp->rcv_nxt;
        tp->t_flags &= ~TF_ACKNOW;
        if (tcp_delack_enabled)
                callout_stop(tp->tt_delack);
        if (sendalot && (!tcp_do_newreno || --maxburst))
                goto again;
        return (0);
}
 
void
tcp_setpersist(tp)
        register struct tcpcb *tp;
{
        int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
        int tt;
 
        if (callout_active(tp->tt_rexmt))
                panic("tcp_setpersist: retransmit pending");
        /*
         * Start/restart persistance timer.
         */
        TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
                      TCPTV_PERSMIN, TCPTV_PERSMAX);
        callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
                tp->t_rxtshift++;
}
Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-2.0/] [packages/] [net/] [bsd_tcpip/] [v2_0/] [src/] [sys/] [netinet/] [tcp_output.c] - Blame information for rev 27

Line No.	Rev	Author	Line
1	27	unneback	`//==========================================================================`
2			`//`
3			`// src/sys/netinet/tcp_output.c`
4			`//`
5			`//==========================================================================`
6			`//####BSDCOPYRIGHTBEGIN####`
7			`//`
8			`// -------------------------------------------`
9			`//`
10			`// Portions of this software may have been derived from OpenBSD,`
11			`// FreeBSD or other sources, and are covered by the appropriate`
12			`// copyright disclaimers included herein.`
13			`//`
14			`// Portions created by Red Hat are`
15			`// Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.`
16			`//`
17			`// -------------------------------------------`
18			`//`
19			`//####BSDCOPYRIGHTEND####`
20			`//==========================================================================`
21
22			`/*`
23			`* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995`
24			`* The Regents of the University of California. All rights reserved.`
25			`*`
26			`* Redistribution and use in source and binary forms, with or without`
27			`* modification, are permitted provided that the following conditions`
28			`* are met:`
29			`* 1. Redistributions of source code must retain the above copyright`
30			`* notice, this list of conditions and the following disclaimer.`
31			`* 2. Redistributions in binary form must reproduce the above copyright`
32			`* notice, this list of conditions and the following disclaimer in the`
33			`* documentation and/or other materials provided with the distribution.`
34			`* 3. All advertising materials mentioning features or use of this software`
35			`* must display the following acknowledgement:`
36			`* This product includes software developed by the University of`
37			`* California, Berkeley and its contributors.`
38			`* 4. Neither the name of the University nor the names of its contributors`
39			`* may be used to endorse or promote products derived from this software`
40			`* without specific prior written permission.`