Phil Karn's retransmit algorithm, using t_rxtcur for current rexmt value;
authorMike Karels <karels@ucbvax.Berkeley.EDU>
Tue, 11 Aug 1987 11:53:38 +0000 (03:53 -0800)
committerMike Karels <karels@ucbvax.Berkeley.EDU>
Tue, 11 Aug 1987 11:53:38 +0000 (03:53 -0800)
slow start from beginning if connection not local; get rtt estimate on SYN

SCCS-vsn: sys/netinet/tcp_input.c 7.10
SCCS-vsn: sys/netinet/tcp_output.c 7.9
SCCS-vsn: sys/netinet/tcp_timer.c 7.9
SCCS-vsn: sys/netinet/tcp_var.h 7.4

usr/src/sys/netinet/tcp_input.c
usr/src/sys/netinet/tcp_output.c
usr/src/sys/netinet/tcp_timer.c
usr/src/sys/netinet/tcp_var.h

index c043ad0..f3d0cbd 100644 (file)
@@ -3,7 +3,7 @@
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
- *     @(#)tcp_input.c 7.9 (Berkeley) %G%
+ *     @(#)tcp_input.c 7.10 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -443,6 +443,21 @@ findpcb:
                                tp->snd_nxt = tp->snd_una;
                }
                tp->t_timer[TCPT_REXMT] = 0;
                                tp->snd_nxt = tp->snd_una;
                }
                tp->t_timer[TCPT_REXMT] = 0;
+               /*
+                * If we didn't have to retransmit,
+                * set the initial estimate of srtt.
+                * Set the variance to half the rtt
+                * (so our first retransmit happens at 2*rtt).
+                */
+               if (tp->t_rtt) {
+                       tp->t_srtt = tp->t_rtt << 3;
+                       tp->t_rttvar = tp->t_rtt << 1;
+                       tp->t_rtt = 0;
+                       tp->t_rxtshift = 0;
+                       TCPT_RANGESET(tp->t_rxtcur, 
+                           ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+                           TCPTV_MIN, TCPTV_REXMTMAX);
+               }
                tp->irs = ti->ti_seq;
                tcp_rcvseqinit(tp);
                tp->t_flags |= TF_ACKNOW;
                tp->irs = ti->ti_seq;
                tcp_rcvseqinit(tp);
                tp->t_flags |= TF_ACKNOW;
@@ -503,10 +518,12 @@ trimthenstep6:
 #endif
                        tcpstat.tcps_rcvduppack++;
                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
 #endif
                        tcpstat.tcps_rcvduppack++;
                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
-                       goto dropafterack;
+                       todrop = ti->ti_len;
+                       tp->t_flags |= TF_ACKNOW;
+               } else {
+                       tcpstat.tcps_rcvpartduppack++;
+                       tcpstat.tcps_rcvpartdupbyte += todrop;
                }
                }
-               tcpstat.tcps_rcvpartduppack++;
-               tcpstat.tcps_rcvpartdupbyte += todrop;
                m_adj(m, todrop);
                ti->ti_seq += todrop;
                ti->ti_len -= todrop;
                m_adj(m, todrop);
                ti->ti_seq += todrop;
                ti->ti_len -= todrop;
@@ -688,6 +705,9 @@ do_rst:
                /*
                 * If transmit timer is running and timed sequence
                 * number was acked, update smoothed round trip time.
                /*
                 * If transmit timer is running and timed sequence
                 * number was acked, update smoothed round trip time.
+                * Since we now have an rtt measurement, cancel the
+                * timer backoff (cf., Phil Karn's retransmit alg.).
+                * Recompute the initial retransmit timer.
                 */
                if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
                        tcpstat.tcps_rttupdated++;
                 */
                if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
                        tcpstat.tcps_rttupdated++;
@@ -706,7 +726,8 @@ do_rst:
                                if ((tp->t_srtt += delta) <= 0)
                                        tp->t_srtt = 1;
                                /*
                                if ((tp->t_srtt += delta) <= 0)
                                        tp->t_srtt = 1;
                                /*
-                                * We accumulate a smoothed rtt variance,
+                                * We accumulate a smoothed rtt variance
+                                * (actually, a smoothed mean difference),
                                 * then set the retransmit timer to smoothed
                                 * rtt + 2 times the smoothed variance.
                                 * rttvar is strored as fixed point
                                 * then set the retransmit timer to smoothed
                                 * rtt + 2 times the smoothed variance.
                                 * rttvar is strored as fixed point
@@ -732,26 +753,26 @@ do_rst:
                                tp->t_rttvar = tp->t_rtt << 1;
                        }
                        tp->t_rtt = 0;
                                tp->t_rttvar = tp->t_rtt << 1;
                        }
                        tp->t_rtt = 0;
+                       tp->t_rxtshift = 0;
+                       TCPT_RANGESET(tp->t_rxtcur, 
+                           ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+                           TCPTV_MIN, TCPTV_REXMTMAX);
                }
 
                /*
                 * If all outstanding data is acked, stop retransmit
                 * timer and remember to restart (more output or persist).
                 * If there is more data to be acked, restart retransmit
                }
 
                /*
                 * If all outstanding data is acked, stop retransmit
                 * timer and remember to restart (more output or persist).
                 * If there is more data to be acked, restart retransmit
-                * timer; set to smoothed rtt + 2*rttvar.
+                * timer, using current (possibly backed-off) value.
                 */
                if (ti->ti_ack == tp->snd_max) {
                        tp->t_timer[TCPT_REXMT] = 0;
                        needoutput = 1;
                 */
                if (ti->ti_ack == tp->snd_max) {
                        tp->t_timer[TCPT_REXMT] = 0;
                        needoutput = 1;
-               } else if (tp->t_timer[TCPT_PERSIST] == 0) {
-                       TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                           ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-                           TCPTV_MIN, TCPTV_REXMTMAX);
-                       tp->t_rxtshift = 0;
-               }
+               } else if (tp->t_timer[TCPT_PERSIST] == 0)
+                       tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                /*
                 * When new data is acked, open the congestion window
                /*
                 * When new data is acked, open the congestion window
-                * by one max sized segment.
+                * by one max-sized segment.
                 */
                tp->snd_cwnd = MIN(tp->snd_cwnd + tp->t_maxseg, 65535);
                if (acked > so->so_snd.sb_cc) {
                 */
                tp->snd_cwnd = MIN(tp->snd_cwnd + tp->t_maxseg, 65535);
                if (acked > so->so_snd.sb_cc) {
@@ -933,7 +954,7 @@ dodata:                                                     /* XXX */
                 * our window, in order to estimate the sender's
                 * buffer size.
                 */
                 * our window, in order to estimate the sender's
                 * buffer size.
                 */
-               len = so->so_rcv.sb_hiwat - (tp->rcv_nxt - tp->rcv_adv);
+               len = tp->rcv_nxt - tp->rcv_adv;
                if (len > tp->max_rcvd)
                        tp->max_rcvd = len;
        } else {
                if (len > tp->max_rcvd)
                        tp->max_rcvd = len;
        } else {
@@ -1135,6 +1156,9 @@ tcp_pulloutofband(so, ti)
  *  use a conservative size (512 or the default IP max size, but no more
  *  than the mtu of the interface through which we route),
  *  as we can't discover anything about intervening gateways or networks.
  *  use a conservative size (512 or the default IP max size, but no more
  *  than the mtu of the interface through which we route),
  *  as we can't discover anything about intervening gateways or networks.
+ *  We also initialize the congestion/slow start window to be a single
+ *  segment if the destination isn't local; this information should
+ *  probably all be saved with the routing entry at the transport level.
  *
  *  This is ugly, and doesn't belong at this level, but has to happen somehow.
  */
  *
  *  This is ugly, and doesn't belong at this level, but has to happen somehow.
  */
@@ -1171,5 +1195,7 @@ tcp_mss(tp)
 #endif
        if (in_localaddr(inp->inp_faddr))
                return (mss);
 #endif
        if (in_localaddr(inp->inp_faddr))
                return (mss);
-       return (MIN(mss, TCP_MSS));
+       mss = MIN(mss, TCP_MSS);
+       tp->snd_cwnd = mss;
+       return (mss);
 }
 }
index e21d195..32fdfd3 100644 (file)
@@ -3,7 +3,7 @@
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
- *     @(#)tcp_output.c        7.8 (Berkeley) %G%
+ *     @(#)tcp_output.c        7.9 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -162,9 +162,14 @@ again:
         * next expected input.)  If the difference is 35% or more of the
         * maximum possible window, then want to send a window update to peer.
         */
         * next expected input.)  If the difference is 35% or more of the
         * maximum possible window, then want to send a window update to peer.
         */
-       if (win > 0 &&
-           ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
-               goto send;
+       if (win > 0) {
+               int adv = win - (tp->rcv_adv - tp->rcv_nxt);
+
+               if (100 * adv / so->so_rcv.sb_hiwat >= 35)
+                       goto send;
+               if (adv >= 2 * tp->t_maxseg && so->so_rcv.sb_cc == 0)
+                       goto send;
+       }
 
        /*
         * TCP window updates are not reliable, rather a polling protocol
 
        /*
         * TCP window updates are not reliable, rather a polling protocol
@@ -261,9 +266,6 @@ send:
                        optlen = sizeof (tcp_initopt);
                        *(u_short *)(opt + 2) = htons(mss);
                }
                        optlen = sizeof (tcp_initopt);
                        *(u_short *)(opt + 2) = htons(mss);
                }
-       } else if (tp->t_tcpopt) {
-               opt = mtod(tp->t_tcpopt, u_char *);
-               optlen = tp->t_tcpopt->m_len;
        }
        if (opt) {
                m0 = m->m_next;
        }
        if (opt) {
                m0 = m->m_next;
@@ -363,11 +365,11 @@ send:
                 */
                if (tp->t_timer[TCPT_REXMT] == 0 &&
                    tp->snd_nxt != tp->snd_una) {
                 */
                if (tp->t_timer[TCPT_REXMT] == 0 &&
                    tp->snd_nxt != tp->snd_una) {
-                       TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
-                         ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-                         TCPTV_MIN, TCPTV_REXMTMAX);
-                       tp->t_rxtshift = 0;
-                       tp->t_timer[TCPT_PERSIST] = 0;
+                       tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+                       if (tp->t_timer[TCPT_PERSIST]) {
+                               tp->t_timer[TCPT_PERSIST] = 0;
+                               tp->t_rxtshift = 0;
+                       }
                }
        } else
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
                }
        } else
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
index 6ee4c91..18d6a63 100644 (file)
@@ -3,7 +3,7 @@
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
- *     @(#)tcp_timer.c 7.8 (Berkeley) %G%
+ *     @(#)tcp_timer.c 7.9 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -112,10 +112,8 @@ tcp_canceltimers(tp)
                tp->t_timer[i] = 0;
 }
 
                tp->t_timer[i] = 0;
 }
 
-int    tcp_backoff[TCP_MAXRXTSHIFT] =
-    { 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
-
-int    tcp_keeplen = 1;        /* must be nonzero for 4.2 compat- XXX */
+int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
 
 /*
  * TCP timer processing.
 
 /*
  * TCP timer processing.
@@ -149,7 +147,8 @@ tcp_timers(tp, timer)
         * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
         * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
-               if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) {
+               if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+                       tp->t_rxtshift = TCP_MAXRXTSHIFT;
                        tcpstat.tcps_timeoutdrop++;
                        tp = tcp_drop(tp, ETIMEDOUT);
                        break;
                        tcpstat.tcps_timeoutdrop++;
                        tp = tcp_drop(tp, ETIMEDOUT);
                        break;
@@ -157,16 +156,21 @@ tcp_timers(tp, timer)
                tcpstat.tcps_rexmttimeo++;
                rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
                rexmt *= tcp_backoff[tp->t_rxtshift];
                tcpstat.tcps_rexmttimeo++;
                rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
                rexmt *= tcp_backoff[tp->t_rxtshift];
-               tp->t_rxtshift++;
-               TCPT_RANGESET(tp->t_timer[TCPT_REXMT], rexmt,
-                           TCPTV_MIN, TCPTV_REXMTMAX);
+               TCPT_RANGESET(tp->t_rxtcur, rexmt, TCPTV_MIN, TCPTV_REXMTMAX);
+               tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                /*
                /*
-                * If losing, let the lower level know
-                * and try for a better route.
+                * If losing, let the lower level know and try for
+                * a better route.  Also, if we backed off this far,
+                * our srtt estimate is probably bogus.  Clobber it
+                * so we'll take the next rtt measurement as our srtt;
+                * move the current srtt into rttvar to keep the current
+                * retransmit times until then.
                 */
                 */
-               if (tp->t_rxtshift >= TCP_MAXRXTSHIFT / 4 ||
-                   rexmt >= 10 * PR_SLOWHZ)
+               if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
                        in_losing(tp->t_inpcb);
                        in_losing(tp->t_inpcb);
+                       tp->t_rttvar += (tp->t_srtt >> 2);
+                       tp->t_srtt = 0;
+               }
                tp->snd_nxt = tp->snd_una;
                /*
                 * If timing a segment in this window, stop the timer.
                tp->snd_nxt = tp->snd_una;
                /*
                 * If timing a segment in this window, stop the timer.
index 20dcd77..511b9a7 100644 (file)
@@ -3,7 +3,7 @@
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
  * All rights reserved.  The Berkeley software License Agreement
  * specifies the terms and conditions for redistribution.
  *
- *     @(#)tcp_var.h   7.3 (Berkeley) %G%
+ *     @(#)tcp_var.h   7.4 (Berkeley) %G%
  */
 
 /*
  */
 
 /*
@@ -19,7 +19,8 @@ struct tcpcb {
        short   t_state;                /* state of this connection */
        short   t_timer[TCPT_NTIMERS];  /* tcp timers */
        short   t_rxtshift;             /* log(2) of rexmt exp. backoff */
        short   t_state;                /* state of this connection */
        short   t_timer[TCPT_NTIMERS];  /* tcp timers */
        short   t_rxtshift;             /* log(2) of rexmt exp. backoff */
-       struct  mbuf *t_tcpopt;         /* tcp options */
+       short   t_rxtcur;               /* current retransmit value */
+       short   t_unused;               /* XXX */
        u_short t_maxseg;               /* maximum segment size */
        char    t_force;                /* 1 if forcing out a byte */
        u_char  t_flags;
        u_short t_maxseg;               /* maximum segment size */
        char    t_force;                /* 1 if forcing out a byte */
        u_char  t_flags;