keep separate interface list so we just delete routes when they expire
[unix-history] / usr / src / sys / netinet / tcp_output.c
CommitLineData
2266a466 1/* tcp_output.c 4.39 82/06/08 */
76ee76df
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/mbuf.h"
405c9168 6#include "../h/protosw.h"
76ee76df 7#include "../h/socket.h"
d52566dd 8#include "../h/socketvar.h"
0974b45c 9#include "../net/in.h"
c124e997 10#include "../net/route.h"
0974b45c
BJ
11#include "../net/in_pcb.h"
12#include "../net/in_systm.h"
d52566dd 13#include "../net/ip.h"
eb44bfb2 14#include "../net/ip_var.h"
d52566dd 15#include "../net/tcp.h"
0974b45c 16#define TCPOUTFLAGS
d52566dd 17#include "../net/tcp_fsm.h"
0974b45c
BJ
18#include "../net/tcp_seq.h"
19#include "../net/tcp_timer.h"
20#include "../net/tcp_var.h"
21#include "../net/tcpip.h"
f1dd32da 22#include "../net/tcp_debug.h"
8a2f82db 23#include <errno.h>
76ee76df 24
4aed14e3 25char *tcpstates[]; /* XXX */
8b5a83bb
BJ
26
27/*
28 * Initial options: indicate max segment length 1/2 of space
29 * allocated for receive; if TCPTRUEOOB is defined, indicate
30 * willingness to do true out-of-band.
31 */
32#ifndef TCPTRUEOOB
33u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
34#else
35u_char tcp_initopt[6] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, TCPOPT_WILLOOB, 2 };
36#endif
37
ea727f86 38/*
4aed14e3 39 * Tcp output routine: figure out what should be sent and send it.
ea727f86 40 */
a6503abf 41tcp_output(tp)
53a5409e 42 register struct tcpcb *tp;
ea727f86 43{
53a5409e 44 register struct socket *so = tp->t_inpcb->inp_socket;
a6503abf
BJ
45 register int len;
46 struct mbuf *m0;
8a2f82db 47 int off, flags, win, error;
a6503abf
BJ
48 register struct mbuf *m;
49 register struct tcpiphdr *ti;
8b5a83bb
BJ
50 u_char *opt;
51 unsigned optlen = 0;
2266a466 52 int sendalot;
76ee76df 53
a6503abf 54COUNT(TCP_OUTPUT);
76ee76df 55
a6503abf 56 /*
8ae6c089 57 * Determine length of data that should be transmitted,
0974b45c
BJ
58 * and flags that will be used.
59 * If there is some data or critical controls (SYN, RST)
60 * to send, then transmit; otherwise, investigate further.
a6503abf 61 */
2266a466
BJ
62again:
63 sendalot = 0;
a6503abf 64 off = tp->snd_nxt - tp->snd_una;
405c9168 65 len = MIN(so->so_snd.sb_cc, tp->snd_wnd+tp->t_force) - off;
3232ef09 66 if (len < 0)
8a2f82db 67 return (0); /* ??? */ /* past FIN */
2266a466 68 if (len > tp->t_maxseg) {
0974b45c 69 len = tp->t_maxseg;
2266a466
BJ
70 sendalot = 1;
71 }
8ae6c089 72
0974b45c 73 flags = tcp_outflags[tp->t_state];
275e05b7 74 if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc)
405c9168 75 flags &= ~TH_FIN;
8ae6c089
BJ
76 if (flags & (TH_SYN|TH_RST|TH_FIN))
77 goto send;
78 if (SEQ_GT(tp->snd_up, tp->snd_una))
a6503abf
BJ
79 goto send;
80
8ae6c089
BJ
81 /*
82 * Sender silly window avoidance. If can send all data,
83 * a maximum segment, at least 1/4 of window do it,
84 * or are forced, do it; otherwise don't bother.
85 */
86 if (len) {
87 if (len == tp->t_maxseg || off+len >= so->so_snd.sb_cc)
88 goto send;
89 if (len * 4 >= tp->snd_wnd) /* a lot */
90 goto send;
91 if (tp->t_force)
92 goto send;
93 }
94
a6503abf 95 /*
3232ef09 96 * Send if we owe peer an ACK.
a6503abf 97 */
8b5a83bb
BJ
98 if (tp->t_flags&TF_ACKNOW)
99 goto send;
100
101#ifdef TCPTRUEOOB
102 /*
103 * Send if an out of band data or ack should be transmitted.
104 */
105 if (tp->t_oobflags&(TCPOOB_OWEACK|TCPOOB_NEEDACK)))
a6503abf 106 goto send;
8b5a83bb 107#endif
76ee76df 108
a6503abf
BJ
109 /*
110 * Calculate available window in i, and also amount
111 * of window known to peer (as advertised window less
112 * next expected input.) If this is 35% or more of the
113 * maximum possible window, then want to send a segment to peer.
114 */
0974b45c
BJ
115 win = sbspace(&so->so_rcv);
116 if (win > 0 &&
117 ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
a6503abf
BJ
118 goto send;
119
2266a466
BJ
120 /*
121 * TCP window updates are not reliable, rather a polling protocol
122 * using ``persist'' packets is used to insure receipt of window
123 * updates. The three ``states'' for the output side are:
124 * idle not doing retransmits or persists
125 * persisting to move a zero window
126 * (re)transmitting and thereby not persisting
127 *
128 * tp->t_timer[TCPT_PERSIST]
129 * is set when we are in persist state.
130 * tp->t_force
131 * is set when we are called to send a persist packet.
132 * tp->t_timer[TCPT_REXMT]
133 * is set when we are retransmitting
134 * The output side is idle when both timers are zero.
135 *
136 * If send window is closed, there is data to transmit, and no
137 * retransmit or persist is pending, then go to persist state,
138 * arranging to force out a byte to get more current window information
139 * if nothing happens soon.
140 */
141 if (tp->snd_wnd == 0 && so->so_snd.sb_cc &&
142 tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) {
143 tp->t_rxtshift = 0;
144 tcp_setpersist(tp);
145 }
146
a6503abf
BJ
147 /*
148 * No reason to send a segment, just return.
149 */
f1b2fa5b 150 return (0);
a6503abf
BJ
151
152send:
153 /*
154 * Grab a header mbuf, attaching a copy of data to
155 * be transmitted, and initialize the header from
156 * the template for sends on this connection.
157 */
76ee76df
BJ
158 MGET(m, 0);
159 if (m == 0)
8a2f82db 160 return (ENOBUFS);
4aed14e3 161 m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
53a5409e 162 m->m_len = sizeof (struct tcpiphdr);
a6503abf
BJ
163 if (len) {
164 m->m_next = m_copy(so->so_snd.sb_mb, off, len);
165 if (m->m_next == 0)
166 len = 0;
167 }
168 ti = mtod(m, struct tcpiphdr *);
169 if (tp->t_template == 0)
170 panic("tcp_output");
f1b2fa5b 171 bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
a6503abf
BJ
172
173 /*
174 * Fill in fields, remembering maximum advertised
175 * window for use in delaying messages about window sizes.
176 */
4aed14e3
BJ
177 ti->ti_seq = tp->snd_nxt;
178 ti->ti_ack = tp->rcv_nxt;
179#if vax
180 ti->ti_seq = htonl(ti->ti_seq);
181 ti->ti_ack = htonl(ti->ti_ack);
182#endif
8b5a83bb
BJ
183 /*
184 * Before ESTABLISHED, force sending of initial options
185 * unless TCP set to not do any options.
186 */
187 if (tp->t_state < TCPS_ESTABLISHED) {
188 if (tp->t_flags&TF_NOOPT)
189 goto noopt;
190 opt = tcp_initopt;
191 optlen = sizeof (tcp_initopt);
192 *(u_short *)(opt + 2) = so->so_rcv.sb_hiwat / 2;
193#if vax
194 *(u_short *)(opt + 2) = htons(*(u_short *)(opt + 2));
195#endif
196 } else {
197 if (tp->t_tcpopt == 0)
198 goto noopt;
199 opt = mtod(tp->t_tcpopt, u_char *);
200 optlen = tp->t_tcpopt->m_len;
201 }
202#ifndef TCPTRUEOOB
203 if (opt)
204#else
205 if (opt || (tp->t_oobflags&(TCPOOB_OWEACK|TCPOOB_NEEDACK)))
206#endif
207 {
f1b2fa5b 208 m0 = m->m_next;
ef9b4258 209 m->m_next = m_get(M_DONTWAIT);
0974b45c
BJ
210 if (m->m_next == 0) {
211 (void) m_free(m);
8b5a83bb 212 m_freem(m0);
8a2f82db 213 return (ENOBUFS);
0974b45c
BJ
214 }
215 m->m_next->m_next = m0;
8b5a83bb
BJ
216 m0 = m->m_next;
217 m0->m_off = MMINOFF;
218 m0->m_len = optlen;
668cc26d 219 bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
8b5a83bb
BJ
220 opt = (u_char *)(mtod(m0, caddr_t) + optlen);
221#ifdef TCPTRUEOOB
222 if (tp->t_oobflags&TCPOOB_OWEACK) {
223printf("tp %x send OOBACK for %x\n", tp->t_iobseq);
224 *opt++ = TCPOPT_OOBACK;
225 *opt++ = 3;
226 *opt++ = tp->t_iobseq;
227 m0->m_len += 3;
228 tp->t_oobflags &= ~TCPOOB_OWEACK;
229 /* sender should rexmt oob to force ack repeat */
230 }
231 if (tp->t_oobflags&TCPOOB_NEEDACK) {
232printf("tp %x send OOBDATA seq %x data %x\n", tp->t_oobseq, tp->t_oobc);
233 *opt++ = TCPOPT_OOBDATA;
b2db9217 234 *opt++ = 8;
8b5a83bb
BJ
235 *opt++ = tp->t_oobseq;
236 *opt++ = tp->t_oobc;
b2db9217
BJ
237 *(tcp_seq *)opt = tp->t_oobmark - tp->snd_nxt;
238#ifdef vax
239 *(tcp_seq *)opt = htonl((unsigned)*(tcp_seq *)opt);
240#endif
241 m0->m_len += 8;
8b5a83bb
BJ
242 TCPT_RANGESET(tp->t_timer[TCPT_OOBREXMT],
243 tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
244 }
245#endif
246 while (m0->m_len & 0x3) {
247 *opt++ = TCPOPT_EOL;
248 m0->m_len++;
249 }
250 optlen = m0->m_len;
251 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
0974b45c 252 }
8b5a83bb 253noopt:
0974b45c 254 ti->ti_flags = flags;
a6503abf 255 win = sbspace(&so->so_rcv);
8ae6c089
BJ
256 if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */
257 win = 0;
a6503abf 258 if (win > 0)
8ae6c089 259#if vax
f1b2fa5b 260 ti->ti_win = htons((u_short)win);
8ae6c089
BJ
261#else
262 ti->ti_win = win;
263#endif
0974b45c 264 if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
5e74df82
BJ
265 ti->ti_urp = tp->snd_up - tp->snd_nxt;
266#if vax
267 ti->ti_urp = htons(ti->ti_urp);
268#endif
a6503abf
BJ
269 ti->ti_flags |= TH_URG;
270 } else
271 /*
272 * If no urgent pointer to send, then we pull
273 * the urgent pointer to the left edge of the send window
274 * so that it doesn't drift into the send window on sequence
275 * number wraparound.
276 */
0974b45c
BJ
277 tp->snd_up = tp->snd_una; /* drag it along */
278 /* PUSH */
a6503abf
BJ
279
280 /*
281 * Put TCP length in extended header, and then
282 * checksum extended header and data.
283 */
8b5a83bb
BJ
284 if (len + optlen) {
285 ti->ti_len = sizeof (struct tcphdr) + optlen + len;
286#if vax
287 ti->ti_len = htons((u_short)ti->ti_len);
288#endif
289 }
668cc26d 290 ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
0974b45c
BJ
291
292 /*
2266a466
BJ
293 * In transmit state, time the transmission and arrange for
294 * the retransmit. In persist state, reset persist time for
295 * next persist.
0974b45c 296 */
2266a466
BJ
297 if (tp->t_force == 0) {
298 /*
299 * Advance snd_nxt over sequence space of this segment
300 */
301 if (flags & (TH_SYN|TH_FIN))
302 tp->snd_nxt++;
303 tp->snd_nxt += len;
405c9168 304
2266a466
BJ
305 /*
306 * Time this transmission if not a retransmission and
307 * not currently timing anything.
308 */
309 if (SEQ_GT(tp->snd_nxt, tp->snd_max) && tp->t_rtt == 0) {
310 tp->t_rtt = 1;
311 tp->t_rtseq = tp->snd_nxt - len;
312 }
405c9168 313
2266a466
BJ
314 /*
315 * Set retransmit timer if not currently set.
316 * Initial value for retransmit timer to tcp_beta*tp->t_srtt.
317 * Initialize shift counter which is used for exponential
318 * backoff of retransmit time.
319 */
320 if (tp->t_timer[TCPT_REXMT] == 0 &&
321 tp->snd_nxt != tp->snd_una) {
322 TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
323 tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
324 tp->t_rtt = 0;
325 tp->t_rxtshift = 0;
326 }
327 tp->t_timer[TCPT_PERSIST] = 0;
328 } else
329 tcp_setpersist(tp);
a6503abf 330
f1dd32da
BJ
331 /*
332 * Trace.
333 */
334 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
335 tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
336
a6503abf
BJ
337 /*
338 * Fill in IP length and desired time to live and
339 * send to IP level.
340 */
8b5a83bb 341 ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
a6503abf 342 ((struct ip *)ti)->ip_ttl = TCP_TTL;
8a2f82db
SL
343 if (error = ip_output(m, tp->t_ipopt, &tp->t_inpcb->inp_route, 0))
344 return (error);
a6503abf
BJ
345
346 /*
347 * Data sent (as far as we can tell).
348 * If this advertises a larger window than any other segment,
4aed14e3 349 * then remember the size of the advertised window.
0974b45c 350 * Drop send for purpose of ACK requirements.
a6503abf 351 */
be43ac7f 352 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
a6503abf 353 tp->rcv_adv = tp->rcv_nxt + win;
0974b45c 354 tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
4aed14e3
BJ
355 if (SEQ_GT(tp->snd_nxt, tp->snd_max))
356 tp->snd_max = tp->snd_nxt;
2266a466
BJ
357 if (sendalot && tp->t_force == 0)
358 goto again;
8a2f82db 359 return (0);
76ee76df 360}
2266a466
BJ
361
362tcp_setpersist(tp)
363 register struct tcpcb *tp;
364{
365
366 if (tp->t_timer[TCPT_REXMT])
367 panic("tcp_output REXMT");
368 /*
369 * Start/restart persistance timer.
370 */
371 TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
372 ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
373 TCPTV_PERSMIN, TCPTV_MAX);
374 tp->t_rxtshift++;
375 if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
376 tp->t_rxtshift = 0;
377}