change to keyword search
[unix-history] / usr / src / sys / netinet / tcp_output.c
CommitLineData
8ae0e4b4 1/*
0880b18e 2 * Copyright (c) 1982, 1986 Regents of the University of California.
8ae0e4b4
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
a6bbda13 6 * @(#)tcp_output.c 7.9 (Berkeley) %G%
8ae0e4b4 7 */
76ee76df 8
20666ad3
JB
9#include "param.h"
10#include "systm.h"
11#include "mbuf.h"
12#include "protosw.h"
13#include "socket.h"
14#include "socketvar.h"
15#include "errno.h"
f4d55810 16
c124e997 17#include "../net/route.h"
f4d55810 18
20666ad3
JB
19#include "in.h"
20#include "in_pcb.h"
21#include "in_systm.h"
22#include "ip.h"
23#include "ip_var.h"
24#include "tcp.h"
0974b45c 25#define TCPOUTFLAGS
20666ad3
JB
26#include "tcp_fsm.h"
27#include "tcp_seq.h"
28#include "tcp_timer.h"
29#include "tcp_var.h"
30#include "tcpip.h"
31#include "tcp_debug.h"
76ee76df 32
8b5a83bb 33/*
77a4e3ca 34 * Initial options.
8b5a83bb 35 */
8b5a83bb 36u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
8b5a83bb 37
ea727f86 38/*
4aed14e3 39 * Tcp output routine: figure out what should be sent and send it.
ea727f86 40 */
a6503abf 41tcp_output(tp)
53a5409e 42 register struct tcpcb *tp;
ea727f86 43{
53a5409e 44 register struct socket *so = tp->t_inpcb->inp_socket;
acad00cc 45 register int len, win;
a6503abf 46 struct mbuf *m0;
acad00cc 47 int off, flags, error;
a6503abf
BJ
48 register struct mbuf *m;
49 register struct tcpiphdr *ti;
8b5a83bb
BJ
50 u_char *opt;
51 unsigned optlen = 0;
acad00cc 52 int idle, sendalot;
76ee76df 53
a6503abf 54 /*
8ae6c089 55 * Determine length of data that should be transmitted,
0974b45c
BJ
56 * and flags that will be used.
57 * If there is some data or critical controls (SYN, RST)
58 * to send, then transmit; otherwise, investigate further.
a6503abf 59 */
acad00cc 60 idle = (tp->snd_max == tp->snd_una);
2266a466
BJ
61again:
62 sendalot = 0;
a6503abf 63 off = tp->snd_nxt - tp->snd_una;
eaf69575 64 win = MIN(tp->snd_wnd, tp->snd_cwnd);
e4af65f3 65
eaf69575
MK
66 /*
67 * If in persist timeout with window of 0, send 1 byte.
acad00cc
MK
68 * Otherwise, if window is small but nonzero
69 * and timer expired, we will send what we can
70 * and go to transmit state.
eaf69575
MK
71 */
72 if (tp->t_force) {
e4af65f3 73 if (win == 0)
eaf69575
MK
74 win = 1;
75 else {
76 tp->t_timer[TCPT_PERSIST] = 0;
77 tp->t_rxtshift = 0;
78 }
79 }
acad00cc 80
8278ae69 81 len = MIN(so->so_snd.sb_cc, win) - off;
0974b45c 82 flags = tcp_outflags[tp->t_state];
e4af65f3
MK
83
84 if (len < 0) {
85 /*
dd0ed0b1
MK
86 * If FIN has been sent but not acked,
87 * but we haven't been called to retransmit,
faa26a98 88 * len will be -1; transmit if acking, otherwise no need.
e4af65f3
MK
89 * Otherwise, window shrank after we sent into it.
90 * If window shrank to 0, cancel pending retransmit
91 * and pull snd_nxt back to (closed) window.
92 * We will enter persist state below.
dd0ed0b1
MK
93 * If the window didn't close completely,
94 * just wait for an ACK.
e4af65f3 95 */
faa26a98
MK
96 if (flags & TH_FIN) {
97 if (tp->t_flags & TF_ACKNOW)
98 len = 0;
99 else
100 return (0);
101 } else if (win == 0) {
102 tp->t_timer[TCPT_REXMT] = 0;
103 tp->snd_nxt = tp->snd_una;
104 len = 0;
105 } else
e4af65f3 106 return (0);
faa26a98
MK
107 }
108 if (len > tp->t_maxseg) {
109 len = tp->t_maxseg;
2e5a76f2 110 sendalot = 1;
e4af65f3 111 }
baf677ce
MK
112 if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
113 flags &= ~TH_FIN;
e4af65f3 114 win = sbspace(&so->so_rcv);
acad00cc 115
faa26a98
MK
116
117 /*
118 * If our state indicates that FIN should be sent
119 * and we have not yet done so, or we're retransmitting the FIN,
120 * then we need to send.
121 */
122 if (flags & TH_FIN &&
123 ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
124 goto send;
acad00cc
MK
125 /*
126 * Send if we owe peer an ACK.
127 */
e4af65f3
MK
128 if (tp->t_flags & TF_ACKNOW)
129 goto send;
faa26a98 130 if (flags & (TH_SYN|TH_RST))
acad00cc 131 goto send;
8ae6c089 132 if (SEQ_GT(tp->snd_up, tp->snd_una))
a6503abf
BJ
133 goto send;
134
8ae6c089 135 /*
7d304adf
MK
136 * Sender silly window avoidance. If connection is idle
137 * and can send all data, a maximum segment,
138 * at least a maximum default-size segment do it,
8ae6c089 139 * or are forced, do it; otherwise don't bother.
18a438b6
MK
140 * If peer's buffer is tiny, then send
141 * when window is at least half open.
eaf69575
MK
142 * If retransmitting (possibly after persist timer forced us
143 * to send into a small window), then must resend.
8ae6c089
BJ
144 */
145 if (len) {
3d92549d 146 if (len == tp->t_maxseg)
8ae6c089 147 goto send;
acad00cc
MK
148 if ((idle || tp->t_flags & TF_NODELAY) &&
149 len + off >= so->so_snd.sb_cc)
8ae6c089
BJ
150 goto send;
151 if (tp->t_force)
152 goto send;
18a438b6
MK
153 if (len >= tp->max_sndwnd / 2)
154 goto send;
eaf69575
MK
155 if (SEQ_LT(tp->snd_nxt, tp->snd_max))
156 goto send;
e4af65f3 157 }
76ee76df 158
a6503abf 159 /*
acad00cc
MK
160 * Compare available window to amount of window
161 * known to peer (as advertised window less
8278ae69
MK
162 * next expected input.) If the difference is 35% or more of the
163 * maximum possible window, then want to send a window update to peer.
a6503abf 164 */
a6bbda13
MK
165 if (win > 0) {
166 int adv = win - (tp->rcv_adv - tp->rcv_nxt);
167
168 if (100 * adv / so->so_rcv.sb_hiwat >= 35)
169 goto send;
170 if (adv >= 2 * tp->t_maxseg && so->so_rcv.sb_cc == 0)
171 goto send;
172 }
a6503abf 173
2266a466
BJ
174 /*
175 * TCP window updates are not reliable, rather a polling protocol
176 * using ``persist'' packets is used to insure receipt of window
177 * updates. The three ``states'' for the output side are:
178 * idle not doing retransmits or persists
acad00cc 179 * persisting to move a small or zero window
2266a466
BJ
180 * (re)transmitting and thereby not persisting
181 *
182 * tp->t_timer[TCPT_PERSIST]
183 * is set when we are in persist state.
184 * tp->t_force
185 * is set when we are called to send a persist packet.
186 * tp->t_timer[TCPT_REXMT]
187 * is set when we are retransmitting
188 * The output side is idle when both timers are zero.
189 *
eaf69575
MK
190 * If send window is too small, there is data to transmit, and no
191 * retransmit or persist is pending, then go to persist state.
192 * If nothing happens soon, send when timer expires:
193 * if window is nonzero, transmit what we can,
194 * otherwise force out a byte.
2266a466 195 */
eaf69575
MK
196 if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
197 tp->t_timer[TCPT_PERSIST] == 0) {
2266a466
BJ
198 tp->t_rxtshift = 0;
199 tcp_setpersist(tp);
200 }
201
a6503abf
BJ
202 /*
203 * No reason to send a segment, just return.
204 */
f1b2fa5b 205 return (0);
a6503abf
BJ
206
207send:
208 /*
209 * Grab a header mbuf, attaching a copy of data to
210 * be transmitted, and initialize the header from
211 * the template for sends on this connection.
212 */
60d68e9e
SL
213 MGET(m, M_DONTWAIT, MT_HEADER);
214 if (m == NULL)
8a2f82db 215 return (ENOBUFS);
4aed14e3 216 m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
53a5409e 217 m->m_len = sizeof (struct tcpiphdr);
a6503abf 218 if (len) {
3b52afc5
MK
219 if (tp->t_force && len == 1)
220 tcpstat.tcps_sndprobe++;
221 else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
222 tcpstat.tcps_sndrexmitpack++;
223 tcpstat.tcps_sndrexmitbyte += len;
224 } else {
225 tcpstat.tcps_sndpack++;
226 tcpstat.tcps_sndbyte += len;
227 }
a6503abf
BJ
228 m->m_next = m_copy(so->so_snd.sb_mb, off, len);
229 if (m->m_next == 0)
230 len = 0;
3b52afc5
MK
231 } else if (tp->t_flags & TF_ACKNOW)
232 tcpstat.tcps_sndacks++;
233 else if (flags & (TH_SYN|TH_FIN|TH_RST))
234 tcpstat.tcps_sndctrl++;
235 else if (SEQ_GT(tp->snd_up, tp->snd_una))
236 tcpstat.tcps_sndurg++;
237 else
238 tcpstat.tcps_sndwinup++;
239
a6503abf
BJ
240 ti = mtod(m, struct tcpiphdr *);
241 if (tp->t_template == 0)
242 panic("tcp_output");
f1b2fa5b 243 bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
a6503abf
BJ
244
245 /*
246 * Fill in fields, remembering maximum advertised
247 * window for use in delaying messages about window sizes.
faa26a98 248 * If resending a FIN, be sure not to use a new sequence number.
a6503abf 249 */
3d92549d
MK
250 if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
251 tp->snd_nxt == tp->snd_max)
faa26a98 252 tp->snd_nxt--;
acad00cc
MK
253 ti->ti_seq = htonl(tp->snd_nxt);
254 ti->ti_ack = htonl(tp->rcv_nxt);
8b5a83bb
BJ
255 /*
256 * Before ESTABLISHED, force sending of initial options
257 * unless TCP set to not do any options.
258 */
acad00cc
MK
259 opt = NULL;
260 if (tp->t_state < TCPS_ESTABLISHED && (tp->t_flags & TF_NOOPT) == 0) {
8011f5df 261 u_short mss;
99578149 262
99578149 263 mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
acad00cc
MK
264 if (mss > IP_MSS - sizeof(struct tcpiphdr)) {
265 opt = tcp_initopt;
266 optlen = sizeof (tcp_initopt);
267 *(u_short *)(opt + 2) = htons(mss);
268 }
8b5a83bb 269 }
77a4e3ca 270 if (opt) {
f1b2fa5b 271 m0 = m->m_next;
cce93e4b 272 m->m_next = m_get(M_DONTWAIT, MT_DATA);
0974b45c
BJ
273 if (m->m_next == 0) {
274 (void) m_free(m);
8b5a83bb 275 m_freem(m0);
8a2f82db 276 return (ENOBUFS);
0974b45c
BJ
277 }
278 m->m_next->m_next = m0;
8b5a83bb 279 m0 = m->m_next;
8b5a83bb 280 m0->m_len = optlen;
668cc26d 281 bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
8b5a83bb 282 opt = (u_char *)(mtod(m0, caddr_t) + optlen);
8b5a83bb
BJ
283 while (m0->m_len & 0x3) {
284 *opt++ = TCPOPT_EOL;
285 m0->m_len++;
286 }
287 optlen = m0->m_len;
288 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
0974b45c
BJ
289 }
290 ti->ti_flags = flags;
acad00cc
MK
291 /*
292 * Calculate receive window. Don't shrink window,
293 * but avoid silly window syndrome.
294 */
295 if (win < so->so_rcv.sb_hiwat / 4 && win < tp->t_maxseg)
296 win = 0;
297 if (win < (int)(tp->rcv_adv - tp->rcv_nxt))
298 win = (int)(tp->rcv_adv - tp->rcv_nxt);
299 ti->ti_win = htons((u_short)win);
0974b45c 300 if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
8011f5df 301 ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
a6503abf
BJ
302 ti->ti_flags |= TH_URG;
303 } else
304 /*
305 * If no urgent pointer to send, then we pull
306 * the urgent pointer to the left edge of the send window
307 * so that it doesn't drift into the send window on sequence
308 * number wraparound.
309 */
0974b45c 310 tp->snd_up = tp->snd_una; /* drag it along */
02c1608b
BJ
311 /*
312 * If anything to send and we can send it all, set PUSH.
313 * (This will keep happy those implementations which only
5cdc4d65 314 * give data to the user when a buffer fills or a PUSH comes in.)
02c1608b 315 */
02c1608b
BJ
316 if (len && off+len == so->so_snd.sb_cc)
317 ti->ti_flags |= TH_PUSH;
a6503abf
BJ
318
319 /*
320 * Put TCP length in extended header, and then
321 * checksum extended header and data.
322 */
acad00cc
MK
323 if (len + optlen)
324 ti->ti_len = htons((u_short)(sizeof(struct tcphdr) +
325 optlen + len));
668cc26d 326 ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
0974b45c
BJ
327
328 /*
2266a466 329 * In transmit state, time the transmission and arrange for
eaf69575 330 * the retransmit. In persist state, just set snd_max.
0974b45c 331 */
eaf69575 332 if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
eb5508b2
MK
333 tcp_seq startseq = tp->snd_nxt;
334
2266a466 335 /*
8931cb5b 336 * Advance snd_nxt over sequence space of this segment.
2266a466 337 */
faa26a98
MK
338 if (flags & TH_SYN)
339 tp->snd_nxt++;
340 if (flags & TH_FIN) {
2266a466 341 tp->snd_nxt++;
faa26a98
MK
342 tp->t_flags |= TF_SENTFIN;
343 }
2266a466 344 tp->snd_nxt += len;
eb5508b2 345 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
e45e6858 346 tp->snd_max = tp->snd_nxt;
eb5508b2
MK
347 /*
348 * Time this transmission if not a retransmission and
349 * not currently timing anything.
350 */
351 if (tp->t_rtt == 0) {
352 tp->t_rtt = 1;
353 tp->t_rtseq = startseq;
354 tcpstat.tcps_segstimed++;
355 }
356 }
405c9168 357
2266a466 358 /*
eaf69575 359 * Set retransmit timer if not currently set,
6be9a225 360 * and not doing an ack or a keep-alive probe.
7cc62c26
MK
361 * Initial value for retransmit timer is smoothed
362 * round-trip time + 2 * round-trip time variance.
e4af65f3
MK
363 * Initialize shift counter which is used for backoff
364 * of retransmit time.
2266a466
BJ
365 */
366 if (tp->t_timer[TCPT_REXMT] == 0 &&
367 tp->snd_nxt != tp->snd_una) {
a6bbda13
MK
368 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
369 if (tp->t_timer[TCPT_PERSIST]) {
370 tp->t_timer[TCPT_PERSIST] = 0;
371 tp->t_rxtshift = 0;
372 }
2266a466 373 }
6be9a225 374 } else
acad00cc
MK
375 if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
376 tp->snd_max = tp->snd_nxt + len;
a6503abf 377
f1dd32da
BJ
378 /*
379 * Trace.
380 */
8931cb5b 381 if (so->so_options & SO_DEBUG)
f1dd32da
BJ
382 tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
383
a6503abf
BJ
384 /*
385 * Fill in IP length and desired time to live and
386 * send to IP level.
387 */
8b5a83bb 388 ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
a6503abf 389 ((struct ip *)ti)->ip_ttl = TCP_TTL;
d55475b1
MK
390 error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
391 so->so_options & SO_DONTROUTE);
0e3f761f 392 if (error)
8a2f82db 393 return (error);
3b52afc5 394 tcpstat.tcps_sndtotal++;
a6503abf
BJ
395
396 /*
397 * Data sent (as far as we can tell).
398 * If this advertises a larger window than any other segment,
4aed14e3 399 * then remember the size of the advertised window.
acad00cc 400 * Any pending ACK has now been sent.
a6503abf 401 */
be43ac7f 402 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
a6503abf 403 tp->rcv_adv = tp->rcv_nxt + win;
0974b45c 404 tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
acad00cc 405 if (sendalot)
2266a466 406 goto again;
8a2f82db 407 return (0);
76ee76df 408}
2266a466
BJ
409
410tcp_setpersist(tp)
411 register struct tcpcb *tp;
412{
7cc62c26 413 register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
2266a466
BJ
414
415 if (tp->t_timer[TCPT_REXMT])
416 panic("tcp_output REXMT");
417 /*
418 * Start/restart persistance timer.
419 */
420 TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
7cc62c26 421 t * tcp_backoff[tp->t_rxtshift],
3d92549d
MK
422 TCPTV_PERSMIN, TCPTV_PERSMAX);
423 if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
424 tp->t_rxtshift++;
2266a466 425}