always set src address to one of ours on icmp errors, even if we don't
[unix-history] / usr / src / sys / netinet / tcp_output.c
CommitLineData
8ae0e4b4
KM
1/*
2 * Copyright (c) 1982 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
faa26a98 6 * @(#)tcp_output.c 6.18 (Berkeley) %G%
8ae0e4b4 7 */
76ee76df 8
20666ad3
JB
9#include "param.h"
10#include "systm.h"
11#include "mbuf.h"
12#include "protosw.h"
13#include "socket.h"
14#include "socketvar.h"
15#include "errno.h"
f4d55810 16
c124e997 17#include "../net/route.h"
f4d55810 18
20666ad3
JB
19#include "in.h"
20#include "in_pcb.h"
21#include "in_systm.h"
22#include "ip.h"
23#include "ip_var.h"
24#include "tcp.h"
0974b45c 25#define TCPOUTFLAGS
20666ad3
JB
26#include "tcp_fsm.h"
27#include "tcp_seq.h"
28#include "tcp_timer.h"
29#include "tcp_var.h"
30#include "tcpip.h"
31#include "tcp_debug.h"
76ee76df 32
8b5a83bb 33/*
77a4e3ca 34 * Initial options.
8b5a83bb 35 */
8b5a83bb 36u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
8b5a83bb 37
ea727f86 38/*
4aed14e3 39 * Tcp output routine: figure out what should be sent and send it.
ea727f86 40 */
a6503abf 41tcp_output(tp)
53a5409e 42 register struct tcpcb *tp;
ea727f86 43{
53a5409e 44 register struct socket *so = tp->t_inpcb->inp_socket;
acad00cc 45 register int len, win;
a6503abf 46 struct mbuf *m0;
acad00cc 47 int off, flags, error;
a6503abf
BJ
48 register struct mbuf *m;
49 register struct tcpiphdr *ti;
8b5a83bb
BJ
50 u_char *opt;
51 unsigned optlen = 0;
acad00cc 52 int idle, sendalot;
76ee76df 53
a6503abf 54 /*
8ae6c089 55 * Determine length of data that should be transmitted,
0974b45c
BJ
56 * and flags that will be used.
57 * If there is some data or critical controls (SYN, RST)
58 * to send, then transmit; otherwise, investigate further.
a6503abf 59 */
acad00cc 60 idle = (tp->snd_max == tp->snd_una);
2266a466
BJ
61again:
62 sendalot = 0;
a6503abf 63 off = tp->snd_nxt - tp->snd_una;
eaf69575 64 win = MIN(tp->snd_wnd, tp->snd_cwnd);
e4af65f3 65
eaf69575
MK
66 /*
67 * If in persist timeout with window of 0, send 1 byte.
acad00cc
MK
68 * Otherwise, if window is small but nonzero
69 * and timer expired, we will send what we can
70 * and go to transmit state.
eaf69575
MK
71 */
72 if (tp->t_force) {
e4af65f3 73 if (win == 0)
eaf69575
MK
74 win = 1;
75 else {
76 tp->t_timer[TCPT_PERSIST] = 0;
77 tp->t_rxtshift = 0;
78 }
79 }
acad00cc 80
8278ae69 81 len = MIN(so->so_snd.sb_cc, win) - off;
0974b45c 82 flags = tcp_outflags[tp->t_state];
e4af65f3 83 if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
405c9168 84 flags &= ~TH_FIN;
e4af65f3
MK
85
86 if (len < 0) {
87 /*
dd0ed0b1
MK
88 * If FIN has been sent but not acked,
89 * but we haven't been called to retransmit,
faa26a98 90 * len will be -1; transmit if acking, otherwise no need.
e4af65f3
MK
91 * Otherwise, window shrank after we sent into it.
92 * If window shrank to 0, cancel pending retransmit
93 * and pull snd_nxt back to (closed) window.
94 * We will enter persist state below.
dd0ed0b1
MK
95 * If the window didn't close completely,
96 * just wait for an ACK.
e4af65f3 97 */
faa26a98
MK
98 if (flags & TH_FIN) {
99 if (tp->t_flags & TF_ACKNOW)
100 len = 0;
101 else
102 return (0);
103 } else if (win == 0) {
104 tp->t_timer[TCPT_REXMT] = 0;
105 tp->snd_nxt = tp->snd_una;
106 len = 0;
107 } else
e4af65f3 108 return (0);
faa26a98
MK
109 }
110 if (len > tp->t_maxseg) {
111 len = tp->t_maxseg;
112 /*
113 * Don't send more than one segment if retransmitting
114 * (or persisting, but then we shouldn't be here).
115 */
116 if (tp->t_rxtshift == 0)
117 sendalot = 1;
e4af65f3
MK
118 }
119 win = sbspace(&so->so_rcv);
acad00cc 120
faa26a98
MK
121
122 /*
123 * If our state indicates that FIN should be sent
124 * and we have not yet done so, or we're retransmitting the FIN,
125 * then we need to send.
126 */
127 if (flags & TH_FIN &&
128 ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
129 goto send;
acad00cc
MK
130 /*
131 * Send if we owe peer an ACK.
132 */
e4af65f3
MK
133 if (tp->t_flags & TF_ACKNOW)
134 goto send;
faa26a98 135 if (flags & (TH_SYN|TH_RST))
acad00cc 136 goto send;
8ae6c089 137 if (SEQ_GT(tp->snd_up, tp->snd_una))
a6503abf
BJ
138 goto send;
139
8ae6c089 140 /*
7d304adf
MK
141 * Sender silly window avoidance. If connection is idle
142 * and can send all data, a maximum segment,
143 * at least a maximum default-size segment do it,
8ae6c089 144 * or are forced, do it; otherwise don't bother.
18a438b6
MK
145 * If peer's buffer is tiny, then send
146 * when window is at least half open.
eaf69575
MK
147 * If retransmitting (possibly after persist timer forced us
148 * to send into a small window), then must resend.
8ae6c089
BJ
149 */
150 if (len) {
acad00cc 151 if (len == tp->t_maxseg || len >= TCP_MSS) /* a lot */
8ae6c089 152 goto send;
acad00cc
MK
153 if ((idle || tp->t_flags & TF_NODELAY) &&
154 len + off >= so->so_snd.sb_cc)
8ae6c089
BJ
155 goto send;
156 if (tp->t_force)
157 goto send;
18a438b6
MK
158 if (len >= tp->max_sndwnd / 2)
159 goto send;
eaf69575
MK
160 if (SEQ_LT(tp->snd_nxt, tp->snd_max))
161 goto send;
e4af65f3 162 }
76ee76df 163
a6503abf 164 /*
acad00cc
MK
165 * Compare available window to amount of window
166 * known to peer (as advertised window less
8278ae69
MK
167 * next expected input.) If the difference is 35% or more of the
168 * maximum possible window, then want to send a window update to peer.
a6503abf 169 */
0974b45c
BJ
170 if (win > 0 &&
171 ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
a6503abf
BJ
172 goto send;
173
2266a466
BJ
174 /*
175 * TCP window updates are not reliable, rather a polling protocol
176 * using ``persist'' packets is used to insure receipt of window
177 * updates. The three ``states'' for the output side are:
178 * idle not doing retransmits or persists
acad00cc 179 * persisting to move a small or zero window
2266a466
BJ
180 * (re)transmitting and thereby not persisting
181 *
182 * tp->t_timer[TCPT_PERSIST]
183 * is set when we are in persist state.
184 * tp->t_force
185 * is set when we are called to send a persist packet.
186 * tp->t_timer[TCPT_REXMT]
187 * is set when we are retransmitting
188 * The output side is idle when both timers are zero.
189 *
eaf69575
MK
190 * If send window is too small, there is data to transmit, and no
191 * retransmit or persist is pending, then go to persist state.
192 * If nothing happens soon, send when timer expires:
193 * if window is nonzero, transmit what we can,
194 * otherwise force out a byte.
2266a466 195 */
eaf69575
MK
196 if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
197 tp->t_timer[TCPT_PERSIST] == 0) {
2266a466
BJ
198 tp->t_rxtshift = 0;
199 tcp_setpersist(tp);
200 }
201
a6503abf
BJ
202 /*
203 * No reason to send a segment, just return.
204 */
f1b2fa5b 205 return (0);
a6503abf
BJ
206
207send:
208 /*
209 * Grab a header mbuf, attaching a copy of data to
210 * be transmitted, and initialize the header from
211 * the template for sends on this connection.
212 */
60d68e9e
SL
213 MGET(m, M_DONTWAIT, MT_HEADER);
214 if (m == NULL)
8a2f82db 215 return (ENOBUFS);
4aed14e3 216 m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
53a5409e 217 m->m_len = sizeof (struct tcpiphdr);
a6503abf
BJ
218 if (len) {
219 m->m_next = m_copy(so->so_snd.sb_mb, off, len);
220 if (m->m_next == 0)
221 len = 0;
222 }
223 ti = mtod(m, struct tcpiphdr *);
224 if (tp->t_template == 0)
225 panic("tcp_output");
f1b2fa5b 226 bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
a6503abf
BJ
227
228 /*
229 * Fill in fields, remembering maximum advertised
230 * window for use in delaying messages about window sizes.
faa26a98 231 * If resending a FIN, be sure not to use a new sequence number.
a6503abf 232 */
faa26a98
MK
233 if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
234 tp->snd_nxt != tp->snd_una)
235 tp->snd_nxt--;
acad00cc
MK
236 ti->ti_seq = htonl(tp->snd_nxt);
237 ti->ti_ack = htonl(tp->rcv_nxt);
8b5a83bb
BJ
238 /*
239 * Before ESTABLISHED, force sending of initial options
240 * unless TCP set to not do any options.
241 */
acad00cc
MK
242 opt = NULL;
243 if (tp->t_state < TCPS_ESTABLISHED && (tp->t_flags & TF_NOOPT) == 0) {
8011f5df 244 u_short mss;
99578149 245
99578149 246 mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
acad00cc
MK
247 if (mss > IP_MSS - sizeof(struct tcpiphdr)) {
248 opt = tcp_initopt;
249 optlen = sizeof (tcp_initopt);
250 *(u_short *)(opt + 2) = htons(mss);
251 }
252 } else if (tp->t_tcpopt) {
8b5a83bb
BJ
253 opt = mtod(tp->t_tcpopt, u_char *);
254 optlen = tp->t_tcpopt->m_len;
255 }
77a4e3ca 256 if (opt) {
f1b2fa5b 257 m0 = m->m_next;
cce93e4b 258 m->m_next = m_get(M_DONTWAIT, MT_DATA);
0974b45c
BJ
259 if (m->m_next == 0) {
260 (void) m_free(m);
8b5a83bb 261 m_freem(m0);
8a2f82db 262 return (ENOBUFS);
0974b45c
BJ
263 }
264 m->m_next->m_next = m0;
8b5a83bb 265 m0 = m->m_next;
8b5a83bb 266 m0->m_len = optlen;
668cc26d 267 bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
8b5a83bb 268 opt = (u_char *)(mtod(m0, caddr_t) + optlen);
8b5a83bb
BJ
269 while (m0->m_len & 0x3) {
270 *opt++ = TCPOPT_EOL;
271 m0->m_len++;
272 }
273 optlen = m0->m_len;
274 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
0974b45c
BJ
275 }
276 ti->ti_flags = flags;
acad00cc
MK
277 /*
278 * Calculate receive window. Don't shrink window,
279 * but avoid silly window syndrome.
280 */
281 if (win < so->so_rcv.sb_hiwat / 4 && win < tp->t_maxseg)
282 win = 0;
283 if (win < (int)(tp->rcv_adv - tp->rcv_nxt))
284 win = (int)(tp->rcv_adv - tp->rcv_nxt);
285 ti->ti_win = htons((u_short)win);
0974b45c 286 if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
8011f5df 287 ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
a6503abf
BJ
288 ti->ti_flags |= TH_URG;
289 } else
290 /*
291 * If no urgent pointer to send, then we pull
292 * the urgent pointer to the left edge of the send window
293 * so that it doesn't drift into the send window on sequence
294 * number wraparound.
295 */
0974b45c 296 tp->snd_up = tp->snd_una; /* drag it along */
02c1608b
BJ
297 /*
298 * If anything to send and we can send it all, set PUSH.
299 * (This will keep happy those implementations which only
5cdc4d65 300 * give data to the user when a buffer fills or a PUSH comes in.)
02c1608b 301 */
02c1608b
BJ
302 if (len && off+len == so->so_snd.sb_cc)
303 ti->ti_flags |= TH_PUSH;
a6503abf
BJ
304
305 /*
306 * Put TCP length in extended header, and then
307 * checksum extended header and data.
308 */
acad00cc
MK
309 if (len + optlen)
310 ti->ti_len = htons((u_short)(sizeof(struct tcphdr) +
311 optlen + len));
668cc26d 312 ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
0974b45c
BJ
313
314 /*
2266a466 315 * In transmit state, time the transmission and arrange for
eaf69575 316 * the retransmit. In persist state, just set snd_max.
0974b45c 317 */
eaf69575 318 if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
2266a466 319 /*
8931cb5b 320 * Advance snd_nxt over sequence space of this segment.
2266a466 321 */
faa26a98
MK
322 if (flags & TH_SYN)
323 tp->snd_nxt++;
324 if (flags & TH_FIN) {
2266a466 325 tp->snd_nxt++;
faa26a98
MK
326 tp->t_flags |= TF_SENTFIN;
327 }
2266a466 328 tp->snd_nxt += len;
21fc141f 329 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
e45e6858 330 tp->snd_max = tp->snd_nxt;
21fc141f
SL
331 /*
332 * Time this transmission if not a retransmission and
333 * not currently timing anything.
334 */
335 if (tp->t_rtt == 0) {
336 tp->t_rtt = 1;
337 tp->t_rtseq = tp->snd_nxt - len;
338 }
2266a466 339 }
405c9168 340
2266a466 341 /*
eaf69575 342 * Set retransmit timer if not currently set,
6be9a225 343 * and not doing an ack or a keep-alive probe.
acad00cc 344 * Initial value for retransmit timer is tcp_beta*tp->t_srtt.
e4af65f3
MK
345 * Initialize shift counter which is used for backoff
346 * of retransmit time.
2266a466
BJ
347 */
348 if (tp->t_timer[TCPT_REXMT] == 0 &&
349 tp->snd_nxt != tp->snd_una) {
350 TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
dd0ed0b1
MK
351 tcp_beta * (tp->t_srtt ? tp->t_srtt : TCPTV_SRTTDFLT),
352 TCPTV_MIN, TCPTV_MAX);
2266a466 353 tp->t_rxtshift = 0;
6be9a225 354 tp->t_timer[TCPT_PERSIST] = 0;
2266a466 355 }
6be9a225 356 } else
acad00cc
MK
357 if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
358 tp->snd_max = tp->snd_nxt + len;
a6503abf 359
f1dd32da
BJ
360 /*
361 * Trace.
362 */
8931cb5b 363 if (so->so_options & SO_DEBUG)
f1dd32da
BJ
364 tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
365
a6503abf
BJ
366 /*
367 * Fill in IP length and desired time to live and
368 * send to IP level.
369 */
8b5a83bb 370 ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
a6503abf 371 ((struct ip *)ti)->ip_ttl = TCP_TTL;
d55475b1
MK
372 error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
373 so->so_options & SO_DONTROUTE);
0e3f761f 374 if (error)
8a2f82db 375 return (error);
a6503abf
BJ
376
377 /*
378 * Data sent (as far as we can tell).
379 * If this advertises a larger window than any other segment,
4aed14e3 380 * then remember the size of the advertised window.
acad00cc 381 * Any pending ACK has now been sent.
a6503abf 382 */
be43ac7f 383 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
a6503abf 384 tp->rcv_adv = tp->rcv_nxt + win;
0974b45c 385 tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
acad00cc 386 if (sendalot)
2266a466 387 goto again;
8a2f82db 388 return (0);
76ee76df 389}
2266a466
BJ
390
391tcp_setpersist(tp)
392 register struct tcpcb *tp;
393{
394
395 if (tp->t_timer[TCPT_REXMT])
396 panic("tcp_output REXMT");
397 /*
398 * Start/restart persistance timer.
399 */
400 TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
401 ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
402 TCPTV_PERSMIN, TCPTV_MAX);
403 tp->t_rxtshift++;
404 if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
405 tp->t_rxtshift = 0;
406}