slow start from beginning if connection not local; get rtt estimate on SYN
SCCS-vsn: sys/netinet/tcp_input.c 7.10
SCCS-vsn: sys/netinet/tcp_output.c 7.9
SCCS-vsn: sys/netinet/tcp_timer.c 7.9
SCCS-vsn: sys/netinet/tcp_var.h 7.4
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
- * @(#)tcp_input.c 7.9 (Berkeley) %G%
+ * @(#)tcp_input.c 7.10 (Berkeley) %G%
tp->snd_nxt = tp->snd_una;
}
tp->t_timer[TCPT_REXMT] = 0;
tp->snd_nxt = tp->snd_una;
}
tp->t_timer[TCPT_REXMT] = 0;
+ /*
+ * If we didn't have to retransmit,
+ * set the initial estimate of srtt.
+ * Set the variance to half the rtt
+ * (so our first retransmit happens at 2*rtt).
+ */
+ if (tp->t_rtt) {
+ tp->t_srtt = tp->t_rtt << 3;
+ tp->t_rttvar = tp->t_rtt << 1;
+ tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ TCPTV_MIN, TCPTV_REXMTMAX);
+ }
tp->irs = ti->ti_seq;
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
tp->irs = ti->ti_seq;
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
#endif
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += ti->ti_len;
#endif
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ todrop = ti->ti_len;
+ tp->t_flags |= TF_ACKNOW;
+ } else {
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += todrop;
- tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte += todrop;
m_adj(m, todrop);
ti->ti_seq += todrop;
ti->ti_len -= todrop;
m_adj(m, todrop);
ti->ti_seq += todrop;
ti->ti_len -= todrop;
/*
* If transmit timer is running and timed sequence
* number was acked, update smoothed round trip time.
/*
* If transmit timer is running and timed sequence
* number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
*/
if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
tcpstat.tcps_rttupdated++;
*/
if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
tcpstat.tcps_rttupdated++;
if ((tp->t_srtt += delta) <= 0)
tp->t_srtt = 1;
/*
if ((tp->t_srtt += delta) <= 0)
tp->t_srtt = 1;
/*
- * We accumulate a smoothed rtt variance,
+ * We accumulate a smoothed rtt variance
+ * (actually, a smoothed mean difference),
* then set the retransmit timer to smoothed
* rtt + 2 times the smoothed variance.
* rttvar is strored as fixed point
* then set the retransmit timer to smoothed
* rtt + 2 times the smoothed variance.
* rttvar is strored as fixed point
tp->t_rttvar = tp->t_rtt << 1;
}
tp->t_rtt = 0;
tp->t_rttvar = tp->t_rtt << 1;
}
tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ TCPTV_MIN, TCPTV_REXMTMAX);
}
/*
* If all outstanding data is acked, stop retransmit
* timer and remember to restart (more output or persist).
* If there is more data to be acked, restart retransmit
}
/*
* If all outstanding data is acked, stop retransmit
* timer and remember to restart (more output or persist).
* If there is more data to be acked, restart retransmit
- * timer; set to smoothed rtt + 2*rttvar.
+ * timer, using current (possibly backed-off) value.
*/
if (ti->ti_ack == tp->snd_max) {
tp->t_timer[TCPT_REXMT] = 0;
needoutput = 1;
*/
if (ti->ti_ack == tp->snd_max) {
tp->t_timer[TCPT_REXMT] = 0;
needoutput = 1;
- } else if (tp->t_timer[TCPT_PERSIST] == 0) {
- TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
- ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
- TCPTV_MIN, TCPTV_REXMTMAX);
- tp->t_rxtshift = 0;
- }
+ } else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
/*
* When new data is acked, open the congestion window
/*
* When new data is acked, open the congestion window
- * by one max sized segment.
+ * by one max-sized segment.
*/
tp->snd_cwnd = MIN(tp->snd_cwnd + tp->t_maxseg, 65535);
if (acked > so->so_snd.sb_cc) {
*/
tp->snd_cwnd = MIN(tp->snd_cwnd + tp->t_maxseg, 65535);
if (acked > so->so_snd.sb_cc) {
* our window, in order to estimate the sender's
* buffer size.
*/
* our window, in order to estimate the sender's
* buffer size.
*/
- len = so->so_rcv.sb_hiwat - (tp->rcv_nxt - tp->rcv_adv);
+ len = tp->rcv_nxt - tp->rcv_adv;
if (len > tp->max_rcvd)
tp->max_rcvd = len;
} else {
if (len > tp->max_rcvd)
tp->max_rcvd = len;
} else {
* use a conservative size (512 or the default IP max size, but no more
* than the mtu of the interface through which we route),
* as we can't discover anything about intervening gateways or networks.
* use a conservative size (512 or the default IP max size, but no more
* than the mtu of the interface through which we route),
* as we can't discover anything about intervening gateways or networks.
+ * We also initialize the congestion/slow start window to be a single
+ * segment if the destination isn't local; this information should
+ * probably all be saved with the routing entry at the transport level.
*
* This is ugly, and doesn't belong at this level, but has to happen somehow.
*/
*
* This is ugly, and doesn't belong at this level, but has to happen somehow.
*/
#endif
if (in_localaddr(inp->inp_faddr))
return (mss);
#endif
if (in_localaddr(inp->inp_faddr))
return (mss);
- return (MIN(mss, TCP_MSS));
+ mss = MIN(mss, TCP_MSS);
+ tp->snd_cwnd = mss;
+ return (mss);
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
- * @(#)tcp_output.c 7.8 (Berkeley) %G%
+ * @(#)tcp_output.c 7.9 (Berkeley) %G%
* next expected input.) If the difference is 35% or more of the
* maximum possible window, then want to send a window update to peer.
*/
* next expected input.) If the difference is 35% or more of the
* maximum possible window, then want to send a window update to peer.
*/
- if (win > 0 &&
- ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
- goto send;
+ if (win > 0) {
+ int adv = win - (tp->rcv_adv - tp->rcv_nxt);
+
+ if (100 * adv / so->so_rcv.sb_hiwat >= 35)
+ goto send;
+ if (adv >= 2 * tp->t_maxseg && so->so_rcv.sb_cc == 0)
+ goto send;
+ }
/*
* TCP window updates are not reliable, rather a polling protocol
/*
* TCP window updates are not reliable, rather a polling protocol
optlen = sizeof (tcp_initopt);
*(u_short *)(opt + 2) = htons(mss);
}
optlen = sizeof (tcp_initopt);
*(u_short *)(opt + 2) = htons(mss);
}
- } else if (tp->t_tcpopt) {
- opt = mtod(tp->t_tcpopt, u_char *);
- optlen = tp->t_tcpopt->m_len;
}
if (opt) {
m0 = m->m_next;
}
if (opt) {
m0 = m->m_next;
*/
if (tp->t_timer[TCPT_REXMT] == 0 &&
tp->snd_nxt != tp->snd_una) {
*/
if (tp->t_timer[TCPT_REXMT] == 0 &&
tp->snd_nxt != tp->snd_una) {
- TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
- ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
- TCPTV_MIN, TCPTV_REXMTMAX);
- tp->t_rxtshift = 0;
- tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ if (tp->t_timer[TCPT_PERSIST]) {
+ tp->t_timer[TCPT_PERSIST] = 0;
+ tp->t_rxtshift = 0;
+ }
}
} else
if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
}
} else
if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
- * @(#)tcp_timer.c 7.8 (Berkeley) %G%
+ * @(#)tcp_timer.c 7.9 (Berkeley) %G%
-int tcp_backoff[TCP_MAXRXTSHIFT] =
- { 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
-
-int tcp_keeplen = 1; /* must be nonzero for 4.2 compat- XXX */
+int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+ { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
/*
* TCP timer processing.
/*
* TCP timer processing.
* to a longer retransmit interval and retransmit one segment.
*/
case TCPT_REXMT:
* to a longer retransmit interval and retransmit one segment.
*/
case TCPT_REXMT:
- if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) {
+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ tp->t_rxtshift = TCP_MAXRXTSHIFT;
tcpstat.tcps_timeoutdrop++;
tp = tcp_drop(tp, ETIMEDOUT);
break;
tcpstat.tcps_timeoutdrop++;
tp = tcp_drop(tp, ETIMEDOUT);
break;
tcpstat.tcps_rexmttimeo++;
rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
rexmt *= tcp_backoff[tp->t_rxtshift];
tcpstat.tcps_rexmttimeo++;
rexmt = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
rexmt *= tcp_backoff[tp->t_rxtshift];
- tp->t_rxtshift++;
- TCPT_RANGESET(tp->t_timer[TCPT_REXMT], rexmt,
- TCPTV_MIN, TCPTV_REXMTMAX);
+ TCPT_RANGESET(tp->t_rxtcur, rexmt, TCPTV_MIN, TCPTV_REXMTMAX);
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
- * If losing, let the lower level know
- * and try for a better route.
+ * If losing, let the lower level know and try for
+ * a better route. Also, if we backed off this far,
+ * our srtt estimate is probably bogus. Clobber it
+ * so we'll take the next rtt measurement as our srtt;
+ * move the current srtt into rttvar to keep the current
+ * retransmit times until then.
- if (tp->t_rxtshift >= TCP_MAXRXTSHIFT / 4 ||
- rexmt >= 10 * PR_SLOWHZ)
+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+ tp->t_rttvar += (tp->t_srtt >> 2);
+ tp->t_srtt = 0;
+ }
tp->snd_nxt = tp->snd_una;
/*
* If timing a segment in this window, stop the timer.
tp->snd_nxt = tp->snd_una;
/*
* If timing a segment in this window, stop the timer.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
- * @(#)tcp_var.h 7.3 (Berkeley) %G%
+ * @(#)tcp_var.h 7.4 (Berkeley) %G%
short t_state; /* state of this connection */
short t_timer[TCPT_NTIMERS]; /* tcp timers */
short t_rxtshift; /* log(2) of rexmt exp. backoff */
short t_state; /* state of this connection */
short t_timer[TCPT_NTIMERS]; /* tcp timers */
short t_rxtshift; /* log(2) of rexmt exp. backoff */
- struct mbuf *t_tcpopt; /* tcp options */
+ short t_rxtcur; /* current retransmit value */
+ short t_unused; /* XXX */
u_short t_maxseg; /* maximum segment size */
char t_force; /* 1 if forcing out a byte */
u_char t_flags;
u_short t_maxseg; /* maximum segment size */
char t_force; /* 1 if forcing out a byte */
u_char t_flags;