BSD 4_3_Reno release
[unix-history] / usr / src / sys / netinet / tcp_timer.c
CommitLineData
8ae0e4b4 1/*
7d999c60 2 * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
2b6b6284 3 * All rights reserved.
8ae0e4b4 4 *
1c15e888
C
5 * Redistribution is only permitted until one year after the first shipment
6 * of 4.4BSD by the Regents. Otherwise, redistribution and use in source and
7 * binary forms are permitted provided that: (1) source distributions retain
8 * this entire copyright notice and comment, and (2) distributions including
9 * binaries display the following acknowledgement: This product includes
10 * software developed by the University of California, Berkeley and its
11 * contributors'' in the documentation or other materials provided with the
12 * distribution and in all advertising materials mentioning features or use
13 * of this software. Neither the name of the University nor the names of
14 * its contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2b6b6284 19 *
1c15e888 20 * @(#)tcp_timer.c 7.18 (Berkeley) 6/28/90
8ae0e4b4 21 */
f03530e6 22
20666ad3
JB
23#include "param.h"
24#include "systm.h"
cc072043 25#include "malloc.h"
20666ad3
JB
26#include "mbuf.h"
27#include "socket.h"
28#include "socketvar.h"
29#include "protosw.h"
30#include "errno.h"
6e7edb25
BJ
31
32#include "../net/if.h"
c124e997 33#include "../net/route.h"
f4d55810 34
20666ad3 35#include "in.h"
20666ad3
JB
36#include "in_systm.h"
37#include "ip.h"
2b25f79c 38#include "in_pcb.h"
20666ad3
JB
39#include "ip_var.h"
40#include "tcp.h"
41#include "tcp_fsm.h"
42#include "tcp_seq.h"
43#include "tcp_timer.h"
44#include "tcp_var.h"
45#include "tcpip.h"
f03530e6 46
8a36cf82
MK
47int tcp_keepidle = TCPTV_KEEP_IDLE;
48int tcp_keepintvl = TCPTV_KEEPINTVL;
49int tcp_maxidle;
f03530e6
BJ
50/*
51 * Fast timeout routine for processing delayed acks
52 */
53tcp_fasttimo()
54{
b8b9174f
BJ
55 register struct inpcb *inp;
56 register struct tcpcb *tp;
57 int s = splnet();
b8b9174f 58
fd5dc5f0
BJ
59 inp = tcb.inp_next;
60 if (inp)
61 for (; inp != &tcb; inp = inp->inp_next)
b8b9174f
BJ
62 if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
63 (tp->t_flags & TF_DELACK)) {
64 tp->t_flags &= ~TF_DELACK;
65 tp->t_flags |= TF_ACKNOW;
35f3fc10 66 tcpstat.tcps_delack++;
b8b9174f
BJ
67 (void) tcp_output(tp);
68 }
69 splx(s);
f03530e6
BJ
70}
71
72/*
73 * Tcp protocol timeout routine called every 500 ms.
74 * Updates the timers in all active tcb's and
75 * causes finite state machine actions if timers expire.
76 */
77tcp_slowtimo()
78{
1e977657 79 register struct inpcb *ip, *ipnxt;
f03530e6
BJ
80 register struct tcpcb *tp;
81 int s = splnet();
f03530e6 82 register int i;
f03530e6 83
8a36cf82 84 tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
f03530e6
BJ
85 /*
86 * Search through tcb's and update active timers.
87 */
4aed14e3
BJ
88 ip = tcb.inp_next;
89 if (ip == 0) {
90 splx(s);
91 return;
92 }
039b88d5
MK
93 for (; ip != &tcb; ip = ipnxt) {
94 ipnxt = ip->inp_next;
f03530e6 95 tp = intotcpcb(ip);
37de812c
BJ
96 if (tp == 0)
97 continue;
a6503abf 98 for (i = 0; i < TCPT_NTIMERS; i++) {
1e977657 99 if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
f03530e6
BJ
100 (void) tcp_usrreq(tp->t_inpcb->inp_socket,
101 PRU_SLOWTIMO, (struct mbuf *)0,
755d8841 102 (struct mbuf *)i, (struct mbuf *)0);
1e977657
BJ
103 if (ipnxt->inp_prev != ip)
104 goto tpgone;
105 }
f03530e6 106 }
405c9168
BJ
107 tp->t_idle++;
108 if (tp->t_rtt)
109 tp->t_rtt++;
1e977657 110tpgone:
039b88d5 111 ;
f03530e6 112 }
a6503abf 113 tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
367bed15 114#ifdef TCP_COMPAT_42
c50542f3
MK
115 if ((int)tcp_iss < 0)
116 tcp_iss = 0; /* XXX */
117#endif
f03530e6
BJ
118 splx(s);
119}
120
121/*
a6503abf 122 * Cancel all timers for TCP tp.
f03530e6 123 */
0974b45c 124tcp_canceltimers(tp)
f03530e6
BJ
125 struct tcpcb *tp;
126{
f03530e6
BJ
127 register int i;
128
a6503abf
BJ
129 for (i = 0; i < TCPT_NTIMERS; i++)
130 tp->t_timer[i] = 0;
f03530e6
BJ
131}
132
a6bbda13
MK
133int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
134 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
8b6ad229 135
f03530e6 136/*
405c9168 137 * TCP timer processing.
f03530e6 138 */
0e3936fa 139struct tcpcb *
a6503abf 140tcp_timers(tp, timer)
f03530e6 141 register struct tcpcb *tp;
a6503abf 142 int timer;
f03530e6 143{
2a89e5a6 144 register int rexmt;
f03530e6 145
0974b45c 146 switch (timer) {
f03530e6 147
405c9168 148 /*
6209c5c4
MK
149 * 2 MSL timeout in shutdown went off. If we're closed but
150 * still waiting for peer to close and connection has been idle
151 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
152 * control block. Otherwise, check again in a bit.
405c9168 153 */
a6503abf 154 case TCPT_2MSL:
6209c5c4 155 if (tp->t_state != TCPS_TIME_WAIT &&
8a36cf82
MK
156 tp->t_idle <= tcp_maxidle)
157 tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
6209c5c4
MK
158 else
159 tp = tcp_close(tp);
0e3936fa 160 break;
f03530e6 161
405c9168
BJ
162 /*
163 * Retransmission timer went off. Message has not
164 * been acked within retransmit interval. Back off
eeaf00e3 165 * to a longer retransmit interval and retransmit one segment.
405c9168 166 */
a6503abf 167 case TCPT_REXMT:
a6bbda13
MK
168 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
169 tp->t_rxtshift = TCP_MAXRXTSHIFT;
35f3fc10 170 tcpstat.tcps_timeoutdrop++;
7d999c60
MK
171 tp = tcp_drop(tp, tp->t_softerror ?
172 tp->t_softerror : ETIMEDOUT);
0e3936fa 173 break;
fd5dc5f0 174 }
35f3fc10 175 tcpstat.tcps_rexmttimeo++;
7d999c60
MK
176 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
177 TCPT_RANGESET(tp->t_rxtcur, rexmt,
178 tp->t_rttmin, TCPTV_REXMTMAX);
a6bbda13 179 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
6a4fd140 180 /*
a6bbda13
MK
181 * If losing, let the lower level know and try for
182 * a better route. Also, if we backed off this far,
183 * our srtt estimate is probably bogus. Clobber it
184 * so we'll take the next rtt measurement as our srtt;
185 * move the current srtt into rttvar to keep the current
186 * retransmit times until then.
6a4fd140 187 */
a6bbda13 188 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
6a4fd140 189 in_losing(tp->t_inpcb);
7d999c60 190 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
a6bbda13
MK
191 tp->t_srtt = 0;
192 }
405c9168 193 tp->snd_nxt = tp->snd_una;
7872cc0a 194 /*
7cc62c26 195 * If timing a segment in this window, stop the timer.
7872cc0a 196 */
7cc62c26 197 tp->t_rtt = 0;
2e5a76f2
MK
198 /*
199 * Close the congestion window down to one segment
200 * (we'll open it by one segment for each ack we get).
201 * Since we probably have a window's worth of unacked
202 * data accumulated, this "slow start" keeps us from
203 * dumping all that data as back-to-back packets (which
204 * might overwhelm an intermediate gateway).
6f854ef4
MK
205 *
206 * There are two phases to the opening: Initially we
207 * open by one mss on each ack. This makes the window
208 * size increase exponentially with time. If the
209 * window is larger than the path can handle, this
210 * exponential growth results in dropped packet(s)
211 * almost immediately. To get more time between
212 * drops but still "push" the network to take advantage
213 * of improving conditions, we switch from exponential
214 * to linear window opening at some threshhold size.
215 * For a threshhold, we use half the current window
216 * size, truncated to a multiple of the mss.
217 *
218 * (the minimum cwnd that will give us exponential
219 * growth is 2 mss. We don't allow the threshhold
220 * to go below this.)
2e5a76f2 221 */
6f854ef4 222 {
7d999c60 223 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
6f854ef4
MK
224 if (win < 2)
225 win = 2;
2e5a76f2 226 tp->snd_cwnd = tp->t_maxseg;
6f854ef4 227 tp->snd_ssthresh = win * tp->t_maxseg;
7d999c60 228 tp->t_dupacks = 0;
6f854ef4 229 }
405c9168 230 (void) tcp_output(tp);
0e3936fa 231 break;
f03530e6 232
405c9168
BJ
233 /*
234 * Persistance timer into zero window.
235 * Force a byte to be output, if possible.
236 */
a6503abf 237 case TCPT_PERSIST:
35f3fc10 238 tcpstat.tcps_persisttimeo++;
a13c006d 239 tcp_setpersist(tp);
405c9168
BJ
240 tp->t_force = 1;
241 (void) tcp_output(tp);
242 tp->t_force = 0;
0e3936fa 243 break;
f03530e6 244
405c9168
BJ
245 /*
246 * Keep-alive timer went off; send something
247 * or drop connection if idle for too long.
248 */
a6503abf 249 case TCPT_KEEP:
35f3fc10 250 tcpstat.tcps_keeptimeo++;
f3cdd721
BJ
251 if (tp->t_state < TCPS_ESTABLISHED)
252 goto dropit;
c0200c65
MK
253 if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
254 tp->t_state <= TCPS_CLOSE_WAIT) {
8a36cf82 255 if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
f3cdd721 256 goto dropit;
3bd14c98 257 /*
35f3fc10
MK
258 * Send a packet designed to force a response
259 * if the peer is up and reachable:
260 * either an ACK if the connection is still alive,
261 * or an RST if the peer has closed the connection
262 * due to timeout or reboot.
263 * Using sequence number tp->snd_una-1
264 * causes the transmitted zero-length segment
265 * to lie outside the receive window;
266 * by the protocol spec, this requires the
267 * correspondent TCP to respond.
3bd14c98 268 */
35f3fc10 269 tcpstat.tcps_keepprobe++;
eeef4ac3
MK
270#ifdef TCP_COMPAT_42
271 /*
272 * The keepalive packet must have nonzero length
273 * to get a 4.2 host to respond.
274 */
cc072043 275 tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
eeef4ac3
MK
276 tp->rcv_nxt - 1, tp->snd_una - 1, 0);
277#else
cc072043 278 tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
eeef4ac3
MK
279 tp->rcv_nxt, tp->snd_una - 1, 0);
280#endif
8a36cf82
MK
281 tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
282 } else
283 tp->t_timer[TCPT_KEEP] = tcp_keepidle;
0e3936fa 284 break;
f3cdd721 285 dropit:
35f3fc10 286 tcpstat.tcps_keepdrops++;
0e3936fa
SL
287 tp = tcp_drop(tp, ETIMEDOUT);
288 break;
f03530e6 289 }
0e3936fa 290 return (tp);
f03530e6 291}