BSD 4_3_Tahoe release
[unix-history] / usr / src / sys / netinet / tcp_timer.c
index dc40da3..45f827f 100644 (file)
@@ -1,26 +1,48 @@
-/* tcp_timer.c 4.11 82/01/13 */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/protosw.h"
-#include "../net/in.h"
-#include "../net/in_pcb.h"
-#include "../net/in_systm.h"
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *     @(#)tcp_timer.c 7.14 (Berkeley) 6/29/88
+ */
+
+#include "param.h"
+#include "systm.h"
+#include "mbuf.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "protosw.h"
+#include "errno.h"
+
 #include "../net/if.h"
 #include "../net/if.h"
-#include "../net/ip.h"
-#include "../net/ip_var.h"
-#include "../net/tcp.h"
-#include "../net/tcp_fsm.h"
-#include "../net/tcp_seq.h"
-#include "../net/tcp_timer.h"
-#include "../net/tcp_var.h"
-#include "../net/tcpip.h"
-#include "../errno.h"
-
-int    tcpdelack = 0;
+#include "../net/route.h"
+
+#include "in.h"
+#include "in_pcb.h"
+#include "in_systm.h"
+#include "ip.h"
+#include "ip_var.h"
+#include "tcp.h"
+#include "tcp_fsm.h"
+#include "tcp_seq.h"
+#include "tcp_timer.h"
+#include "tcp_var.h"
+#include "tcpip.h"
+
+int    tcp_keepidle = TCPTV_KEEP_IDLE;
+int    tcp_keepintvl = TCPTV_KEEPINTVL;
+int    tcp_maxidle;
 /*
  * Fast timeout routine for processing delayed acks
  */
 /*
  * Fast timeout routine for processing delayed acks
  */
@@ -29,13 +51,15 @@ tcp_fasttimo()
        register struct inpcb *inp;
        register struct tcpcb *tp;
        int s = splnet();
        register struct inpcb *inp;
        register struct tcpcb *tp;
        int s = splnet();
-COUNT(TCP_FASTTIMO);
 
 
-       for (inp = tcb.inp_next; inp != &tcb; inp = inp->inp_next)
+       inp = tcb.inp_next;
+       if (inp)
+       for (; inp != &tcb; inp = inp->inp_next)
                if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
                    (tp->t_flags & TF_DELACK)) {
                        tp->t_flags &= ~TF_DELACK;
                        tp->t_flags |= TF_ACKNOW;
                if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
                    (tp->t_flags & TF_DELACK)) {
                        tp->t_flags &= ~TF_DELACK;
                        tp->t_flags |= TF_ACKNOW;
+                       tcpstat.tcps_delack++;
                        (void) tcp_output(tp);
                }
        splx(s);
                        (void) tcp_output(tp);
                }
        splx(s);
@@ -48,12 +72,12 @@ COUNT(TCP_FASTTIMO);
  */
 tcp_slowtimo()
 {
  */
 tcp_slowtimo()
 {
-       register struct inpcb *ip;
+       register struct inpcb *ip, *ipnxt;
        register struct tcpcb *tp;
        int s = splnet();
        register int i;
        register struct tcpcb *tp;
        int s = splnet();
        register int i;
-COUNT(TCP_SLOWTIMO);
 
 
+       tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
        /*
         * Search through tcb's and update active timers.
         */
        /*
         * Search through tcb's and update active timers.
         */
@@ -62,21 +86,31 @@ COUNT(TCP_SLOWTIMO);
                splx(s);
                return;
        }
                splx(s);
                return;
        }
-       for (; ip != &tcb; ip = ip->inp_next) {
+       for (; ip != &tcb; ip = ipnxt) {
+               ipnxt = ip->inp_next;
                tp = intotcpcb(ip);
                if (tp == 0)
                        continue;
                for (i = 0; i < TCPT_NTIMERS; i++) {
                tp = intotcpcb(ip);
                if (tp == 0)
                        continue;
                for (i = 0; i < TCPT_NTIMERS; i++) {
-                       if (tp->t_timer[i] && --tp->t_timer[i] == 0)
+                       if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
                                (void) tcp_usrreq(tp->t_inpcb->inp_socket,
                                    PRU_SLOWTIMO, (struct mbuf *)0,
                                (void) tcp_usrreq(tp->t_inpcb->inp_socket,
                                    PRU_SLOWTIMO, (struct mbuf *)0,
-                                   (caddr_t)i);
+                                   (struct mbuf *)i, (struct mbuf *)0);
+                               if (ipnxt->inp_prev != ip)
+                                       goto tpgone;
+                       }
                }
                tp->t_idle++;
                if (tp->t_rtt)
                        tp->t_rtt++;
                }
                tp->t_idle++;
                if (tp->t_rtt)
                        tp->t_rtt++;
+tpgone:
+               ;
        }
        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
        }
        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
+#ifdef TCP_COMPAT_42
+       if ((int)tcp_iss < 0)
+               tcp_iss = 0;                            /* XXX */
+#endif
        splx(s);
 }
 
        splx(s);
 }
 
@@ -88,82 +122,164 @@ tcp_canceltimers(tp)
 {
        register int i;
 
 {
        register int i;
 
-COUNT(TCP_CANCELTIMERS);
        for (i = 0; i < TCPT_NTIMERS; i++)
                tp->t_timer[i] = 0;
 }
 
        for (i = 0; i < TCPT_NTIMERS; i++)
                tp->t_timer[i] = 0;
 }
 
+int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
 /*
  * TCP timer processing.
  */
 /*
  * TCP timer processing.
  */
+struct tcpcb *
 tcp_timers(tp, timer)
        register struct tcpcb *tp;
        int timer;
 {
 tcp_timers(tp, timer)
        register struct tcpcb *tp;
        int timer;
 {
+       register int rexmt;
 
 
-COUNT(TCP_TIMERS);
        switch (timer) {
 
        /*
        switch (timer) {
 
        /*
-        * 2 MSL timeout in shutdown went off.  Delete connection
-        * control block.
+        * 2 MSL timeout in shutdown went off.  If we're closed but
+        * still waiting for peer to close and connection has been idle
+        * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+        * control block.  Otherwise, check again in a bit.
         */
        case TCPT_2MSL:
         */
        case TCPT_2MSL:
-               tcp_close(tp);
-               return;
+               if (tp->t_state != TCPS_TIME_WAIT &&
+                   tp->t_idle <= tcp_maxidle)
+                       tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
+               else
+                       tp = tcp_close(tp);
+               break;
 
        /*
         * Retransmission timer went off.  Message has not
         * been acked within retransmit interval.  Back off
 
        /*
         * Retransmission timer went off.  Message has not
         * been acked within retransmit interval.  Back off
-        * to a longer retransmit interval and retransmit all
-        * unacknowledged messages in the window.
+        * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
         */
        case TCPT_REXMT:
-               tp->t_rxtshift++;
-               TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                   ((int)(2 * tp->t_srtt)),
-                   TCPTV_MIN, TCPTV_MAX);
-               TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                   tp->t_timer[TCPT_REXMT] << tp->t_rxtshift,
-                   TCPTV_MIN, TCPTV_MAX);
-               if (tp->t_timer[TCPT_REXMT] > TCPTV_MAXIDLE / 2) {
-                       tcp_drop(tp, ETIMEDOUT);
-                       return;
+               if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+                       tp->t_rxtshift = TCP_MAXRXTSHIFT;
+                       tcpstat.tcps_timeoutdrop++;
+                       tp = tcp_drop(tp, ETIMEDOUT);
+                       break;
+               }
+               tcpstat.tcps_rexmttimeo++;
+               rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+               rexmt *= tcp_backoff[tp->t_rxtshift];
+               TCPT_RANGESET(tp->t_rxtcur, rexmt, TCPTV_MIN, TCPTV_REXMTMAX);
+               tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+               /*
+                * If losing, let the lower level know and try for
+                * a better route.  Also, if we backed off this far,
+                * our srtt estimate is probably bogus.  Clobber it
+                * so we'll take the next rtt measurement as our srtt;
+                * move the current srtt into rttvar to keep the current
+                * retransmit times until then.
+                */
+               if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+                       in_losing(tp->t_inpcb);
+                       tp->t_rttvar += (tp->t_srtt >> 2);
+                       tp->t_srtt = 0;
                }
                }
-printf("rexmt set to %d\n", tp->t_timer[TCPT_REXMT]);
                tp->snd_nxt = tp->snd_una;
                tp->snd_nxt = tp->snd_una;
-               /* this only transmits one segment! */
+               /*
+                * If timing a segment in this window, stop the timer.
+                */
+               tp->t_rtt = 0;
+               /*
+                * Close the congestion window down to one segment
+                * (we'll open it by one segment for each ack we get).
+                * Since we probably have a window's worth of unacked
+                * data accumulated, this "slow start" keeps us from
+                * dumping all that data as back-to-back packets (which
+                * might overwhelm an intermediate gateway).
+                *
+                * There are two phases to the opening: Initially we
+                * open by one mss on each ack.  This makes the window
+                * size increase exponentially with time.  If the
+                * window is larger than the path can handle, this
+                * exponential growth results in dropped packet(s)
+                * almost immediately.  To get more time between 
+                * drops but still "push" the network to take advantage
+                * of improving conditions, we switch from exponential
+                * to linear window opening at some threshhold size.
+                * For a threshhold, we use half the current window
+                * size, truncated to a multiple of the mss.
+                *
+                * (the minimum cwnd that will give us exponential
+                * growth is 2 mss.  We don't allow the threshhold
+                * to go below this.)
+                */
+               {
+               u_int win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+               if (win < 2)
+                       win = 2;
+               tp->snd_cwnd = tp->t_maxseg;
+               tp->snd_ssthresh = win * tp->t_maxseg;
+               }
                (void) tcp_output(tp);
                (void) tcp_output(tp);
-               return;
+               break;
 
        /*
         * Persistance timer into zero window.
         * Force a byte to be output, if possible.
         */
        case TCPT_PERSIST:
 
        /*
         * Persistance timer into zero window.
         * Force a byte to be output, if possible.
         */
        case TCPT_PERSIST:
+               tcpstat.tcps_persisttimeo++;
+               tcp_setpersist(tp);
                tp->t_force = 1;
                (void) tcp_output(tp);
                tp->t_force = 0;
                tp->t_force = 1;
                (void) tcp_output(tp);
                tp->t_force = 0;
-               TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
-                   2 * tp->t_srtt, TCPTV_PERSMIN, TCPTV_MAX);
-               return;
+               break;
 
        /*
         * Keep-alive timer went off; send something
         * or drop connection if idle for too long.
         */
        case TCPT_KEEP:
 
        /*
         * Keep-alive timer went off; send something
         * or drop connection if idle for too long.
         */
        case TCPT_KEEP:
-               if (tp->t_state < TCPS_ESTABLISHED ||
-                   tp->t_idle >= TCPTV_MAXIDLE) {
-                       tcp_drop(tp, ETIMEDOUT);
-                       return;
-               }
-               if (tp->t_inpcb->inp_socket->so_options & SO_NOKEEPALIVE)
-                       tp->t_idle = 0;
-               else
-                       tcp_respond(tp,
-                           tp->t_template, tp->rcv_nxt, tp->snd_una-1, 0);
-               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
-               return;
+               tcpstat.tcps_keeptimeo++;
+               if (tp->t_state < TCPS_ESTABLISHED)
+                       goto dropit;
+               if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
+                   tp->t_state <= TCPS_CLOSE_WAIT) {
+                       if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
+                               goto dropit;
+                       /*
+                        * Send a packet designed to force a response
+                        * if the peer is up and reachable:
+                        * either an ACK if the connection is still alive,
+                        * or an RST if the peer has closed the connection
+                        * due to timeout or reboot.
+                        * Using sequence number tp->snd_una-1
+                        * causes the transmitted zero-length segment
+                        * to lie outside the receive window;
+                        * by the protocol spec, this requires the
+                        * correspondent TCP to respond.
+                        */
+                       tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
+                       /*
+                        * The keepalive packet must have nonzero length
+                        * to get a 4.2 host to respond.
+                        */
+                       tcp_respond(tp, tp->t_template,
+                           tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+                       tcp_respond(tp, tp->t_template,
+                           tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+                       tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
+               } else
+                       tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+               break;
+       dropit:
+               tcpstat.tcps_keepdrops++;
+               tp = tcp_drop(tp, ETIMEDOUT);
+               break;
        }
        }
+       return (tp);
 }
 }