keep USETRAILERS from SIOCSARP
[unix-history] / usr / src / sys / netinet / tcp_output.c
index 40cb16d..4d150a4 100644 (file)
@@ -1,9 +1,9 @@
 /*
 /*
- * Copyright (c) 1982 Regents of the University of California.
+ * Copyright (c) 1982, 1986 Regents of the University of California.
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
- *     @(#)tcp_output.c        6.14 (Berkeley) %G%
+ *     @(#)tcp_output.c        7.10 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -62,6 +62,7 @@ again:
        sendalot = 0;
        off = tp->snd_nxt - tp->snd_una;
        win = MIN(tp->snd_wnd, tp->snd_cwnd);
        sendalot = 0;
        off = tp->snd_nxt - tp->snd_una;
        win = MIN(tp->snd_wnd, tp->snd_cwnd);
+
        /*
         * If in persist timeout with window of 0, send 1 byte.
         * Otherwise, if window is small but nonzero
        /*
         * If in persist timeout with window of 0, send 1 byte.
         * Otherwise, if window is small but nonzero
@@ -69,7 +70,7 @@ again:
         * and go to transmit state.
         */
        if (tp->t_force) {
         * and go to transmit state.
         */
        if (tp->t_force) {
-               if (win == 0) 
+               if (win == 0)
                        win = 1;
                else {
                        tp->t_timer[TCPT_PERSIST] = 0;
                        win = 1;
                else {
                        tp->t_timer[TCPT_PERSIST] = 0;
@@ -78,29 +79,55 @@ again:
        }
 
        len = MIN(so->so_snd.sb_cc, win) - off;
        }
 
        len = MIN(so->so_snd.sb_cc, win) - off;
-       if (len < 0)
-               return (0);     /* ??? */       /* past FIN */
-       if (len > tp->t_maxseg) {
-               len = tp->t_maxseg;
+       flags = tcp_outflags[tp->t_state];
+
+       if (len < 0) {
                /*
                /*
-                * Don't send more than one segment if retransmitting
-                * (or persisting, but then we shouldn't be here).
+                * If FIN has been sent but not acked,
+                * but we haven't been called to retransmit,
+                * len will be -1; transmit if acking, otherwise no need.
+                * Otherwise, window shrank after we sent into it.
+                * If window shrank to 0, cancel pending retransmit
+                * and pull snd_nxt back to (closed) window.
+                * We will enter persist state below.
+                * If the window didn't close completely,
+                * just wait for an ACK.
                 */
                 */
-               if (tp->t_rxtshift == 0)
-                       sendalot = 1;
+               if (flags & TH_FIN) {
+                       if (tp->t_flags & TF_ACKNOW)
+                               len = 0;
+                       else
+                               return (0);
+               } else if (win == 0) {
+                       tp->t_timer[TCPT_REXMT] = 0;
+                       tp->snd_nxt = tp->snd_una;
+                       len = 0;
+               } else
+                       return (0);
        }
        }
-
-       win = sbspace(&so->so_rcv);
-       flags = tcp_outflags[tp->t_state];
-       if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc)
+       if (len > tp->t_maxseg) {
+               len = tp->t_maxseg;
+               sendalot = 1;
+       }
+       if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
                flags &= ~TH_FIN;
                flags &= ~TH_FIN;
-       if (flags & (TH_SYN|TH_RST|TH_FIN))
-               goto send;
+       win = sbspace(&so->so_rcv);
+
 
 
+       /*
+        * If our state indicates that FIN should be sent
+        * and we have not yet done so, or we're retransmitting the FIN,
+        * then we need to send.
+        */
+       if (flags & TH_FIN &&
+           ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+               goto send;
        /*
         * Send if we owe peer an ACK.
         */
        /*
         * Send if we owe peer an ACK.
         */
-       if (tp->t_flags&TF_ACKNOW)
+       if (tp->t_flags & TF_ACKNOW)
+               goto send;
+       if (flags & (TH_SYN|TH_RST))
                goto send;
        if (SEQ_GT(tp->snd_up, tp->snd_una))
                goto send;
                goto send;
        if (SEQ_GT(tp->snd_up, tp->snd_una))
                goto send;
@@ -116,7 +143,7 @@ again:
         * to send into a small window), then must resend.
         */
        if (len) {
         * to send into a small window), then must resend.
         */
        if (len) {
-               if (len == tp->t_maxseg || len >= TCP_MSS)      /* a lot */
+               if (len == tp->t_maxseg)
                        goto send;
                if ((idle || tp->t_flags & TF_NODELAY) &&
                    len + off >= so->so_snd.sb_cc)
                        goto send;
                if ((idle || tp->t_flags & TF_NODELAY) &&
                    len + off >= so->so_snd.sb_cc)
@@ -127,15 +154,7 @@ again:
                        goto send;
                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
                        goto send;
                        goto send;
                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
                        goto send;
-       } else
-               /*
-                * If window shrank after we sent into it,
-                * cancel pending retransmit.  We will enter
-                * persist state below.
-                */
-               if (off == 0 && SEQ_LT(tp->snd_nxt, tp->snd_max))
-                       tp->t_timer[TCPT_REXMT] = 0;
-
+       }
 
        /*
         * Compare available window to amount of window
 
        /*
         * Compare available window to amount of window
@@ -143,9 +162,14 @@ again:
         * next expected input.)  If the difference is 35% or more of the
         * maximum possible window, then want to send a window update to peer.
         */
         * next expected input.)  If the difference is 35% or more of the
         * maximum possible window, then want to send a window update to peer.
         */
-       if (win > 0 &&
-           ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
-               goto send;
+       if (win > 0) {
+               int adv = win - (tp->rcv_adv - tp->rcv_nxt);
+
+               if (100 * adv / so->so_rcv.sb_hiwat >= 35)
+                       goto send;
+               if (adv >= 2 * tp->t_maxseg && so->so_rcv.sb_cc == 0)
+                       goto send;
+       }
 
        /*
         * TCP window updates are not reliable, rather a polling protocol
 
        /*
         * TCP window updates are not reliable, rather a polling protocol
@@ -192,10 +216,27 @@ send:
        m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
        m->m_len = sizeof (struct tcpiphdr);
        if (len) {
        m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
        m->m_len = sizeof (struct tcpiphdr);
        if (len) {
+               if (tp->t_force && len == 1)
+                       tcpstat.tcps_sndprobe++;
+               else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+                       tcpstat.tcps_sndrexmitpack++;
+                       tcpstat.tcps_sndrexmitbyte += len;
+               } else {
+                       tcpstat.tcps_sndpack++;
+                       tcpstat.tcps_sndbyte += len;
+               }
                m->m_next = m_copy(so->so_snd.sb_mb, off, len);
                if (m->m_next == 0)
                        len = 0;
                m->m_next = m_copy(so->so_snd.sb_mb, off, len);
                if (m->m_next == 0)
                        len = 0;
-       }
+       } else if (tp->t_flags & TF_ACKNOW)
+               tcpstat.tcps_sndacks++;
+       else if (flags & (TH_SYN|TH_FIN|TH_RST))
+               tcpstat.tcps_sndctrl++;
+       else if (SEQ_GT(tp->snd_up, tp->snd_una))
+               tcpstat.tcps_sndurg++;
+       else
+               tcpstat.tcps_sndwinup++;
+
        ti = mtod(m, struct tcpiphdr *);
        if (tp->t_template == 0)
                panic("tcp_output");
        ti = mtod(m, struct tcpiphdr *);
        if (tp->t_template == 0)
                panic("tcp_output");
@@ -204,7 +245,11 @@ send:
        /*
         * Fill in fields, remembering maximum advertised
         * window for use in delaying messages about window sizes.
        /*
         * Fill in fields, remembering maximum advertised
         * window for use in delaying messages about window sizes.
+        * If resending a FIN, be sure not to use a new sequence number.
         */
         */
+       if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && 
+           tp->snd_nxt == tp->snd_max)
+               tp->snd_nxt--;
        ti->ti_seq = htonl(tp->snd_nxt);
        ti->ti_ack = htonl(tp->rcv_nxt);
        /*
        ti->ti_seq = htonl(tp->snd_nxt);
        ti->ti_ack = htonl(tp->rcv_nxt);
        /*
@@ -212,7 +257,7 @@ send:
         * unless TCP set to not do any options.
         */
        opt = NULL;
         * unless TCP set to not do any options.
         */
        opt = NULL;
-       if (tp->t_state < TCPS_ESTABLISHED && (tp->t_flags & TF_NOOPT) == 0) {
+       if (flags & TH_SYN && (tp->t_flags & TF_NOOPT) == 0) {
                u_short mss;
 
                mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
                u_short mss;
 
                mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
@@ -221,9 +266,6 @@ send:
                        optlen = sizeof (tcp_initopt);
                        *(u_short *)(opt + 2) = htons(mss);
                }
                        optlen = sizeof (tcp_initopt);
                        *(u_short *)(opt + 2) = htons(mss);
                }
-       } else if (tp->t_tcpopt) {
-               opt = mtod(tp->t_tcpopt, u_char *);
-               optlen = tp->t_tcpopt->m_len;
        }
        if (opt) {
                m0 = m->m_next;
        }
        if (opt) {
                m0 = m->m_next;
@@ -288,11 +330,17 @@ send:
         * the retransmit.  In persist state, just set snd_max.
         */
        if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
         * the retransmit.  In persist state, just set snd_max.
         */
        if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
+               tcp_seq startseq = tp->snd_nxt;
+
                /*
                 * Advance snd_nxt over sequence space of this segment.
                 */
                /*
                 * Advance snd_nxt over sequence space of this segment.
                 */
-               if (flags & (TH_SYN|TH_FIN))
+               if (flags & TH_SYN)
                        tp->snd_nxt++;
                        tp->snd_nxt++;
+               if (flags & TH_FIN) {
+                       tp->snd_nxt++;
+                       tp->t_flags |= TF_SENTFIN;
+               }
                tp->snd_nxt += len;
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
                        tp->snd_max = tp->snd_nxt;
                tp->snd_nxt += len;
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
                        tp->snd_max = tp->snd_nxt;
@@ -302,28 +350,30 @@ send:
                         */
                        if (tp->t_rtt == 0) {
                                tp->t_rtt = 1;
                         */
                        if (tp->t_rtt == 0) {
                                tp->t_rtt = 1;
-                               tp->t_rtseq = tp->snd_nxt - len;
+                               tp->t_rtseq = startseq;
+                               tcpstat.tcps_segstimed++;
                        }
                }
 
                /*
                 * Set retransmit timer if not currently set,
                        }
                }
 
                /*
                 * Set retransmit timer if not currently set,
-                * and not doing a keep-alive probe.
-                * Initial value for retransmit timer is tcp_beta*tp->t_srtt.
-                * Initialize shift counter which is used for exponential
-                * backoff of retransmit time.
+                * and not doing an ack or a keep-alive probe.
+                * Initial value for retransmit timer is smoothed
+                * round-trip time + 2 * round-trip time variance.
+                * Initialize shift counter which is used for backoff
+                * of retransmit time.
                 */
                if (tp->t_timer[TCPT_REXMT] == 0 &&
                    tp->snd_nxt != tp->snd_una) {
                 */
                if (tp->t_timer[TCPT_REXMT] == 0 &&
                    tp->snd_nxt != tp->snd_una) {
-                       TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                           tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
-                       tp->t_rxtshift = 0;
+                       tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+                       if (tp->t_timer[TCPT_PERSIST]) {
+                               tp->t_timer[TCPT_PERSIST] = 0;
+                               tp->t_rxtshift = 0;
+                       }
                }
                }
-               tp->t_timer[TCPT_PERSIST] = 0;
-       } else {
+       } else
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
                        tp->snd_max = tp->snd_nxt + len;
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
                        tp->snd_max = tp->snd_nxt + len;
-       }
 
        /*
         * Trace.
 
        /*
         * Trace.
@@ -341,6 +391,7 @@ send:
            so->so_options & SO_DONTROUTE);
        if (error)
                return (error);
            so->so_options & SO_DONTROUTE);
        if (error)
                return (error);
+       tcpstat.tcps_sndtotal++;
 
        /*
         * Data sent (as far as we can tell).
 
        /*
         * Data sent (as far as we can tell).
@@ -359,6 +410,7 @@ send:
 tcp_setpersist(tp)
        register struct tcpcb *tp;
 {
 tcp_setpersist(tp)
        register struct tcpcb *tp;
 {
+       register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 
        if (tp->t_timer[TCPT_REXMT])
                panic("tcp_output REXMT");
 
        if (tp->t_timer[TCPT_REXMT])
                panic("tcp_output REXMT");
@@ -366,9 +418,8 @@ tcp_setpersist(tp)
         * Start/restart persistance timer.
         */
        TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
         * Start/restart persistance timer.
         */
        TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
-           ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
-           TCPTV_PERSMIN, TCPTV_MAX);
-       tp->t_rxtshift++;
-       if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
-               tp->t_rxtshift = 0;
+           t * tcp_backoff[tp->t_rxtshift],
+           TCPTV_PERSMIN, TCPTV_PERSMAX);
+       if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+               tp->t_rxtshift++;
 }
 }