BSD 4_3_Tahoe release
[unix-history] / usr / src / sys / netinet / tcp_timer.c
index df68499..45f827f 100644 (file)
@@ -1,27 +1,48 @@
-/*     tcp_timer.c     4.22    82/06/12        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/protosw.h"
-#include "../net/in.h"
-#include "../net/route.h"
-#include "../net/in_pcb.h"
-#include "../net/in_systm.h"
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *     @(#)tcp_timer.c 7.14 (Berkeley) 6/29/88
+ */
+
+#include "param.h"
+#include "systm.h"
+#include "mbuf.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "protosw.h"
+#include "errno.h"
+
 #include "../net/if.h"
 #include "../net/if.h"
-#include "../net/ip.h"
-#include "../net/ip_var.h"
-#include "../net/tcp.h"
-#include "../net/tcp_fsm.h"
-#include "../net/tcp_seq.h"
-#include "../net/tcp_timer.h"
-#include "../net/tcp_var.h"
-#include "../net/tcpip.h"
-#include "../errno.h"
-
-int    tcpnodelack = 0;
+#include "../net/route.h"
+
+#include "in.h"
+#include "in_pcb.h"
+#include "in_systm.h"
+#include "ip.h"
+#include "ip_var.h"
+#include "tcp.h"
+#include "tcp_fsm.h"
+#include "tcp_seq.h"
+#include "tcp_timer.h"
+#include "tcp_var.h"
+#include "tcpip.h"
+
+int    tcp_keepidle = TCPTV_KEEP_IDLE;
+int    tcp_keepintvl = TCPTV_KEEPINTVL;
+int    tcp_maxidle;
 /*
  * Fast timeout routine for processing delayed acks
  */
 /*
  * Fast timeout routine for processing delayed acks
  */
@@ -30,7 +51,6 @@ tcp_fasttimo()
        register struct inpcb *inp;
        register struct tcpcb *tp;
        int s = splnet();
        register struct inpcb *inp;
        register struct tcpcb *tp;
        int s = splnet();
-COUNT(TCP_FASTTIMO);
 
        inp = tcb.inp_next;
        if (inp)
 
        inp = tcb.inp_next;
        if (inp)
@@ -39,6 +59,7 @@ COUNT(TCP_FASTTIMO);
                    (tp->t_flags & TF_DELACK)) {
                        tp->t_flags &= ~TF_DELACK;
                        tp->t_flags |= TF_ACKNOW;
                    (tp->t_flags & TF_DELACK)) {
                        tp->t_flags &= ~TF_DELACK;
                        tp->t_flags |= TF_ACKNOW;
+                       tcpstat.tcps_delack++;
                        (void) tcp_output(tp);
                }
        splx(s);
                        (void) tcp_output(tp);
                }
        splx(s);
@@ -55,8 +76,8 @@ tcp_slowtimo()
        register struct tcpcb *tp;
        int s = splnet();
        register int i;
        register struct tcpcb *tp;
        int s = splnet();
        register int i;
-COUNT(TCP_SLOWTIMO);
 
 
+       tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
        /*
         * Search through tcb's and update active timers.
         */
        /*
         * Search through tcb's and update active timers.
         */
@@ -65,16 +86,16 @@ COUNT(TCP_SLOWTIMO);
                splx(s);
                return;
        }
                splx(s);
                return;
        }
-       while (ip != &tcb) {
+       for (; ip != &tcb; ip = ipnxt) {
+               ipnxt = ip->inp_next;
                tp = intotcpcb(ip);
                if (tp == 0)
                        continue;
                tp = intotcpcb(ip);
                if (tp == 0)
                        continue;
-               ipnxt = ip->inp_next;
                for (i = 0; i < TCPT_NTIMERS; i++) {
                        if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
                                (void) tcp_usrreq(tp->t_inpcb->inp_socket,
                                    PRU_SLOWTIMO, (struct mbuf *)0,
                for (i = 0; i < TCPT_NTIMERS; i++) {
                        if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
                                (void) tcp_usrreq(tp->t_inpcb->inp_socket,
                                    PRU_SLOWTIMO, (struct mbuf *)0,
-                                   (caddr_t)i);
+                                   (struct mbuf *)i, (struct mbuf *)0);
                                if (ipnxt->inp_prev != ip)
                                        goto tpgone;
                        }
                                if (ipnxt->inp_prev != ip)
                                        goto tpgone;
                        }
@@ -83,9 +104,13 @@ COUNT(TCP_SLOWTIMO);
                if (tp->t_rtt)
                        tp->t_rtt++;
 tpgone:
                if (tp->t_rtt)
                        tp->t_rtt++;
 tpgone:
-               ip = ipnxt;
+               ;
        }
        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
        }
        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
+#ifdef TCP_COMPAT_42
+       if ((int)tcp_iss < 0)
+               tcp_iss = 0;                            /* XXX */
+#endif
        splx(s);
 }
 
        splx(s);
 }
 
@@ -97,117 +122,164 @@ tcp_canceltimers(tp)
 {
        register int i;
 
 {
        register int i;
 
-COUNT(TCP_CANCELTIMERS);
        for (i = 0; i < TCPT_NTIMERS; i++)
                tp->t_timer[i] = 0;
 }
 
        for (i = 0; i < TCPT_NTIMERS; i++)
                tp->t_timer[i] = 0;
 }
 
-float  tcp_backoff[TCP_MAXRXTSHIFT] =
-    { 1.0, 1.2, 1.4, 1.7, 2.0, 3.0, 5.0, 8.0, 16.0, 32.0 };
-int    tcprexmtprint = 0;
-int    tcpexprexmtbackoff = 0;
+int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
 /*
  * TCP timer processing.
  */
 /*
  * TCP timer processing.
  */
+struct tcpcb *
 tcp_timers(tp, timer)
        register struct tcpcb *tp;
        int timer;
 {
 tcp_timers(tp, timer)
        register struct tcpcb *tp;
        int timer;
 {
+       register int rexmt;
 
 
-COUNT(TCP_TIMERS);
        switch (timer) {
 
        /*
        switch (timer) {
 
        /*
-        * 2 MSL timeout in shutdown went off.  Delete connection
-        * control block.
+        * 2 MSL timeout in shutdown went off.  If we're closed but
+        * still waiting for peer to close and connection has been idle
+        * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+        * control block.  Otherwise, check again in a bit.
         */
        case TCPT_2MSL:
         */
        case TCPT_2MSL:
-               tcp_close(tp);
-               return;
+               if (tp->t_state != TCPS_TIME_WAIT &&
+                   tp->t_idle <= tcp_maxidle)
+                       tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
+               else
+                       tp = tcp_close(tp);
+               break;
 
        /*
         * Retransmission timer went off.  Message has not
         * been acked within retransmit interval.  Back off
 
        /*
         * Retransmission timer went off.  Message has not
         * been acked within retransmit interval.  Back off
-        * to a longer retransmit interval and retransmit all
-        * unacknowledged messages in the window.
+        * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
         */
        case TCPT_REXMT:
-               tp->t_rxtshift++;
-               if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
-                       tcp_drop(tp, ETIMEDOUT);
-                       return;
+               if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+                       tp->t_rxtshift = TCP_MAXRXTSHIFT;
+                       tcpstat.tcps_timeoutdrop++;
+                       tp = tcp_drop(tp, ETIMEDOUT);
+                       break;
                }
                }
-               TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                   (int)tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
-               if (tcpexprexmtbackoff) {
-                       TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                           tp->t_timer[TCPT_REXMT] << tp->t_rxtshift,
-                           TCPTV_MIN, TCPTV_MAX);
-               } else {
-                       TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                           tp->t_timer[TCPT_REXMT] *
-                               tcp_backoff[tp->t_rxtshift - 1],
-                           TCPTV_MIN, TCPTV_MAX);
+               tcpstat.tcps_rexmttimeo++;
+               rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+               rexmt *= tcp_backoff[tp->t_rxtshift];
+               TCPT_RANGESET(tp->t_rxtcur, rexmt, TCPTV_MIN, TCPTV_REXMTMAX);
+               tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+               /*
+                * If losing, let the lower level know and try for
+                * a better route.  Also, if we backed off this far,
+                * our srtt estimate is probably bogus.  Clobber it
+                * so we'll take the next rtt measurement as our srtt;
+                * move the current srtt into rttvar to keep the current
+                * retransmit times until then.
+                */
+               if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+                       in_losing(tp->t_inpcb);
+                       tp->t_rttvar += (tp->t_srtt >> 2);
+                       tp->t_srtt = 0;
                }
                }
-if (tcprexmtprint)
-printf("rexmt set to %d\n", tp->t_timer[TCPT_REXMT]);
                tp->snd_nxt = tp->snd_una;
                tp->snd_nxt = tp->snd_una;
-               /* this only transmits one segment! */
+               /*
+                * If timing a segment in this window, stop the timer.
+                */
+               tp->t_rtt = 0;
+               /*
+                * Close the congestion window down to one segment
+                * (we'll open it by one segment for each ack we get).
+                * Since we probably have a window's worth of unacked
+                * data accumulated, this "slow start" keeps us from
+                * dumping all that data as back-to-back packets (which
+                * might overwhelm an intermediate gateway).
+                *
+                * There are two phases to the opening: Initially we
+                * open by one mss on each ack.  This makes the window
+                * size increase exponentially with time.  If the
+                * window is larger than the path can handle, this
+                * exponential growth results in dropped packet(s)
+                * almost immediately.  To get more time between 
+                * drops but still "push" the network to take advantage
+                * of improving conditions, we switch from exponential
+                * to linear window opening at some threshhold size.
+                * For a threshhold, we use half the current window
+                * size, truncated to a multiple of the mss.
+                *
+                * (the minimum cwnd that will give us exponential
+                * growth is 2 mss.  We don't allow the threshhold
+                * to go below this.)
+                */
+               {
+               u_int win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+               if (win < 2)
+                       win = 2;
+               tp->snd_cwnd = tp->t_maxseg;
+               tp->snd_ssthresh = win * tp->t_maxseg;
+               }
                (void) tcp_output(tp);
                (void) tcp_output(tp);
-               return;
+               break;
 
        /*
         * Persistance timer into zero window.
         * Force a byte to be output, if possible.
         */
        case TCPT_PERSIST:
 
        /*
         * Persistance timer into zero window.
         * Force a byte to be output, if possible.
         */
        case TCPT_PERSIST:
+               tcpstat.tcps_persisttimeo++;
                tcp_setpersist(tp);
                tp->t_force = 1;
                (void) tcp_output(tp);
                tp->t_force = 0;
                tcp_setpersist(tp);
                tp->t_force = 1;
                (void) tcp_output(tp);
                tp->t_force = 0;
-               return;
+               break;
 
        /*
         * Keep-alive timer went off; send something
         * or drop connection if idle for too long.
         */
        case TCPT_KEEP:
 
        /*
         * Keep-alive timer went off; send something
         * or drop connection if idle for too long.
         */
        case TCPT_KEEP:
+               tcpstat.tcps_keeptimeo++;
                if (tp->t_state < TCPS_ESTABLISHED)
                        goto dropit;
                if (tp->t_state < TCPS_ESTABLISHED)
                        goto dropit;
-               if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) {
-                       if (tp->t_idle >= TCPTV_MAXIDLE)
+               if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
+                   tp->t_state <= TCPS_CLOSE_WAIT) {
+                       if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
                                goto dropit;
                        /*
                                goto dropit;
                        /*
-                        * Saying tp->rcv_nxt-1 lies about what
-                        * we have received, and by the protocol spec
-                        * requires the correspondent TCP to respond.
-                        * Saying tp->snd_una-1 causes the transmitted
-                        * byte to lie outside the receive window; this
-                        * is important because we don't necessarily
-                        * have a byte in the window to send (consider
-                        * a one-way stream!)
+                        * Send a packet designed to force a response
+                        * if the peer is up and reachable:
+                        * either an ACK if the connection is still alive,
+                        * or an RST if the peer has closed the connection
+                        * due to timeout or reboot.
+                        * Using sequence number tp->snd_una-1
+                        * causes the transmitted zero-length segment
+                        * to lie outside the receive window;
+                        * by the protocol spec, this requires the
+                        * correspondent TCP to respond.
+                        */
+                       tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
+                       /*
+                        * The keepalive packet must have nonzero length
+                        * to get a 4.2 host to respond.
                         */
                         */
-                       tcp_respond(tp,
-                           tp->t_template, tp->rcv_nxt-1, tp->snd_una-1, 0);
+                       tcp_respond(tp, tp->t_template,
+                           tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+                       tcp_respond(tp, tp->t_template,
+                           tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+                       tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
                } else
                } else
-                       tp->t_idle = 0;
-               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
-               return;
+                       tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+               break;
        dropit:
        dropit:
-               tcp_drop(tp, ETIMEDOUT);
-               return;
-
-#ifdef TCPTRUEOOB
-       /*
-        * Out-of-band data retransmit timer.
-        */
-       case TCPT_OOBREXMT:
-               if (tp->t_flags & TF_NOOPT)
-                       return;
-               (void) tcp_output(tp);
-               TCPT_RANGESET(tp->t_timer[TCPT_OOBREXMT],
-                   2 * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
-               return;
-#endif
+               tcpstat.tcps_keepdrops++;
+               tp = tcp_drop(tp, ETIMEDOUT);
+               break;
        }
        }
+       return (tp);
 }
 }