remove fix for acking window probes: don't ack acks!
[unix-history] / usr / src / sys / netinet / tcp_output.c
CommitLineData
8278ae69 1/* tcp_output.c 6.7 84/11/14 */
76ee76df 2
20666ad3
JB
3#include "param.h"
4#include "systm.h"
5#include "mbuf.h"
6#include "protosw.h"
7#include "socket.h"
8#include "socketvar.h"
9#include "errno.h"
f4d55810 10
c124e997 11#include "../net/route.h"
f4d55810 12
20666ad3
JB
13#include "in.h"
14#include "in_pcb.h"
15#include "in_systm.h"
16#include "ip.h"
17#include "ip_var.h"
18#include "tcp.h"
0974b45c 19#define TCPOUTFLAGS
20666ad3
JB
20#include "tcp_fsm.h"
21#include "tcp_seq.h"
22#include "tcp_timer.h"
23#include "tcp_var.h"
24#include "tcpip.h"
25#include "tcp_debug.h"
76ee76df 26
8b5a83bb 27/*
77a4e3ca 28 * Initial options.
8b5a83bb 29 */
8b5a83bb 30u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
8b5a83bb 31
ea727f86 32/*
4aed14e3 33 * Tcp output routine: figure out what should be sent and send it.
ea727f86 34 */
a6503abf 35tcp_output(tp)
53a5409e 36 register struct tcpcb *tp;
ea727f86 37{
53a5409e 38 register struct socket *so = tp->t_inpcb->inp_socket;
a6503abf
BJ
39 register int len;
40 struct mbuf *m0;
8a2f82db 41 int off, flags, win, error;
a6503abf
BJ
42 register struct mbuf *m;
43 register struct tcpiphdr *ti;
8b5a83bb
BJ
44 u_char *opt;
45 unsigned optlen = 0;
2266a466 46 int sendalot;
76ee76df 47
a6503abf 48 /*
8ae6c089 49 * Determine length of data that should be transmitted,
0974b45c
BJ
50 * and flags that will be used.
51 * If there is some data or critical controls (SYN, RST)
52 * to send, then transmit; otherwise, investigate further.
a6503abf 53 */
2266a466
BJ
54again:
55 sendalot = 0;
a6503abf 56 off = tp->snd_nxt - tp->snd_una;
8278ae69
MK
57 win = MIN(tp->snd_wnd, tp->snd_cwnd) + tp->t_force;
58 len = MIN(so->so_snd.sb_cc, win) - off;
3232ef09 59 if (len < 0)
8a2f82db 60 return (0); /* ??? */ /* past FIN */
2266a466 61 if (len > tp->t_maxseg) {
0974b45c 62 len = tp->t_maxseg;
7d304adf
MK
63 /*
64 * Don't send more than one segment if retransmitting.
65 */
8278ae69 66 if (tp->t_rxtshift == 0)
7d304adf 67 sendalot = 1;
2266a466 68 }
8ae6c089 69
0974b45c 70 flags = tcp_outflags[tp->t_state];
275e05b7 71 if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc)
405c9168 72 flags &= ~TH_FIN;
8ae6c089
BJ
73 if (flags & (TH_SYN|TH_RST|TH_FIN))
74 goto send;
75 if (SEQ_GT(tp->snd_up, tp->snd_una))
a6503abf
BJ
76 goto send;
77
8ae6c089 78 /*
7d304adf
MK
79 * Sender silly window avoidance. If connection is idle
80 * and can send all data, a maximum segment,
81 * at least a maximum default-size segment do it,
8ae6c089
BJ
82 * or are forced, do it; otherwise don't bother.
83 */
84 if (len) {
7d304adf 85 if (len == tp->t_maxseg || len >= so->so_snd.sb_cc) /* off = 0*/
8ae6c089 86 goto send;
7d304adf 87 if (len >= TCP_MSS) /* a lot */
8ae6c089
BJ
88 goto send;
89 if (tp->t_force)
90 goto send;
91 }
92
a6503abf 93 /*
3232ef09 94 * Send if we owe peer an ACK.
a6503abf 95 */
8b5a83bb
BJ
96 if (tp->t_flags&TF_ACKNOW)
97 goto send;
98
76ee76df 99
a6503abf 100 /*
8278ae69 101 * Calculate available window, and also amount
a6503abf 102 * of window known to peer (as advertised window less
8278ae69
MK
103 * next expected input.) If the difference is 35% or more of the
104 * maximum possible window, then want to send a window update to peer.
a6503abf 105 */
0974b45c
BJ
106 win = sbspace(&so->so_rcv);
107 if (win > 0 &&
108 ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
a6503abf
BJ
109 goto send;
110
2266a466
BJ
111 /*
112 * TCP window updates are not reliable, rather a polling protocol
113 * using ``persist'' packets is used to insure receipt of window
114 * updates. The three ``states'' for the output side are:
115 * idle not doing retransmits or persists
116 * persisting to move a zero window
117 * (re)transmitting and thereby not persisting
118 *
119 * tp->t_timer[TCPT_PERSIST]
120 * is set when we are in persist state.
121 * tp->t_force
122 * is set when we are called to send a persist packet.
123 * tp->t_timer[TCPT_REXMT]
124 * is set when we are retransmitting
125 * The output side is idle when both timers are zero.
126 *
127 * If send window is closed, there is data to transmit, and no
128 * retransmit or persist is pending, then go to persist state,
129 * arranging to force out a byte to get more current window information
130 * if nothing happens soon.
131 */
132 if (tp->snd_wnd == 0 && so->so_snd.sb_cc &&
133 tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) {
134 tp->t_rxtshift = 0;
135 tcp_setpersist(tp);
136 }
137
a6503abf
BJ
138 /*
139 * No reason to send a segment, just return.
140 */
f1b2fa5b 141 return (0);
a6503abf
BJ
142
143send:
144 /*
145 * Grab a header mbuf, attaching a copy of data to
146 * be transmitted, and initialize the header from
147 * the template for sends on this connection.
148 */
60d68e9e
SL
149 MGET(m, M_DONTWAIT, MT_HEADER);
150 if (m == NULL)
8a2f82db 151 return (ENOBUFS);
4aed14e3 152 m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
53a5409e 153 m->m_len = sizeof (struct tcpiphdr);
a6503abf
BJ
154 if (len) {
155 m->m_next = m_copy(so->so_snd.sb_mb, off, len);
156 if (m->m_next == 0)
157 len = 0;
158 }
159 ti = mtod(m, struct tcpiphdr *);
160 if (tp->t_template == 0)
161 panic("tcp_output");
f1b2fa5b 162 bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
a6503abf
BJ
163
164 /*
165 * Fill in fields, remembering maximum advertised
166 * window for use in delaying messages about window sizes.
167 */
4aed14e3
BJ
168 ti->ti_seq = tp->snd_nxt;
169 ti->ti_ack = tp->rcv_nxt;
4aed14e3
BJ
170 ti->ti_seq = htonl(ti->ti_seq);
171 ti->ti_ack = htonl(ti->ti_ack);
8b5a83bb
BJ
172 /*
173 * Before ESTABLISHED, force sending of initial options
174 * unless TCP set to not do any options.
175 */
176 if (tp->t_state < TCPS_ESTABLISHED) {
99578149
MK
177 int mss;
178
8b5a83bb
BJ
179 if (tp->t_flags&TF_NOOPT)
180 goto noopt;
99578149
MK
181 mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
182 if (mss <= IP_MSS - sizeof(struct tcpiphdr))
183 goto noopt;
8b5a83bb
BJ
184 opt = tcp_initopt;
185 optlen = sizeof (tcp_initopt);
99578149 186 *(u_short *)(opt + 2) = htons(mss);
8b5a83bb
BJ
187 } else {
188 if (tp->t_tcpopt == 0)
189 goto noopt;
190 opt = mtod(tp->t_tcpopt, u_char *);
191 optlen = tp->t_tcpopt->m_len;
192 }
77a4e3ca 193 if (opt) {
f1b2fa5b 194 m0 = m->m_next;
cce93e4b 195 m->m_next = m_get(M_DONTWAIT, MT_DATA);
0974b45c
BJ
196 if (m->m_next == 0) {
197 (void) m_free(m);
8b5a83bb 198 m_freem(m0);
8a2f82db 199 return (ENOBUFS);
0974b45c
BJ
200 }
201 m->m_next->m_next = m0;
8b5a83bb 202 m0 = m->m_next;
8b5a83bb 203 m0->m_len = optlen;
668cc26d 204 bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
8b5a83bb 205 opt = (u_char *)(mtod(m0, caddr_t) + optlen);
8b5a83bb
BJ
206 while (m0->m_len & 0x3) {
207 *opt++ = TCPOPT_EOL;
208 m0->m_len++;
209 }
210 optlen = m0->m_len;
211 ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
0974b45c 212 }
8b5a83bb 213noopt:
0974b45c 214 ti->ti_flags = flags;
a6503abf 215 win = sbspace(&so->so_rcv);
8ae6c089
BJ
216 if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */
217 win = 0;
a6503abf 218 if (win > 0)
f1b2fa5b 219 ti->ti_win = htons((u_short)win);
0974b45c 220 if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
5e74df82 221 ti->ti_urp = tp->snd_up - tp->snd_nxt;
5e74df82 222 ti->ti_urp = htons(ti->ti_urp);
a6503abf
BJ
223 ti->ti_flags |= TH_URG;
224 } else
225 /*
226 * If no urgent pointer to send, then we pull
227 * the urgent pointer to the left edge of the send window
228 * so that it doesn't drift into the send window on sequence
229 * number wraparound.
230 */
0974b45c 231 tp->snd_up = tp->snd_una; /* drag it along */
02c1608b
BJ
232 /*
233 * If anything to send and we can send it all, set PUSH.
234 * (This will keep happy those implementations which only
5cdc4d65 235 * give data to the user when a buffer fills or a PUSH comes in.)
02c1608b 236 */
02c1608b
BJ
237 if (len && off+len == so->so_snd.sb_cc)
238 ti->ti_flags |= TH_PUSH;
a6503abf
BJ
239
240 /*
241 * Put TCP length in extended header, and then
242 * checksum extended header and data.
243 */
8b5a83bb
BJ
244 if (len + optlen) {
245 ti->ti_len = sizeof (struct tcphdr) + optlen + len;
8b5a83bb 246 ti->ti_len = htons((u_short)ti->ti_len);
8b5a83bb 247 }
668cc26d 248 ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
0974b45c
BJ
249
250 /*
2266a466
BJ
251 * In transmit state, time the transmission and arrange for
252 * the retransmit. In persist state, reset persist time for
253 * next persist.
0974b45c 254 */
2266a466
BJ
255 if (tp->t_force == 0) {
256 /*
8931cb5b 257 * Advance snd_nxt over sequence space of this segment.
2266a466
BJ
258 */
259 if (flags & (TH_SYN|TH_FIN))
260 tp->snd_nxt++;
261 tp->snd_nxt += len;
21fc141f 262 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
e45e6858 263 tp->snd_max = tp->snd_nxt;
21fc141f
SL
264 /*
265 * Time this transmission if not a retransmission and
266 * not currently timing anything.
267 */
268 if (tp->t_rtt == 0) {
269 tp->t_rtt = 1;
270 tp->t_rtseq = tp->snd_nxt - len;
271 }
2266a466 272 }
405c9168 273
2266a466
BJ
274 /*
275 * Set retransmit timer if not currently set.
276 * Initial value for retransmit timer to tcp_beta*tp->t_srtt.
277 * Initialize shift counter which is used for exponential
278 * backoff of retransmit time.
279 */
280 if (tp->t_timer[TCPT_REXMT] == 0 &&
281 tp->snd_nxt != tp->snd_una) {
282 TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
283 tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
2266a466
BJ
284 tp->t_rxtshift = 0;
285 }
286 tp->t_timer[TCPT_PERSIST] = 0;
e45e6858
BJ
287 } else {
288 if (SEQ_GT(tp->snd_una+1, tp->snd_max))
289 tp->snd_max = tp->snd_una+1;
8931cb5b 290 }
a6503abf 291
f1dd32da
BJ
292 /*
293 * Trace.
294 */
8931cb5b 295 if (so->so_options & SO_DEBUG)
f1dd32da
BJ
296 tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
297
a6503abf
BJ
298 /*
299 * Fill in IP length and desired time to live and
300 * send to IP level.
301 */
8b5a83bb 302 ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
a6503abf 303 ((struct ip *)ti)->ip_ttl = TCP_TTL;
0e3f761f 304 if (so->so_options & SO_DONTROUTE)
755d8841
SL
305 error =
306 ip_output(m, tp->t_ipopt, (struct route *)0, IP_ROUTETOIF);
0e3f761f
SL
307 else
308 error = ip_output(m, tp->t_ipopt, &tp->t_inpcb->inp_route, 0);
309 if (error)
8a2f82db 310 return (error);
a6503abf
BJ
311
312 /*
313 * Data sent (as far as we can tell).
314 * If this advertises a larger window than any other segment,
4aed14e3 315 * then remember the size of the advertised window.
0974b45c 316 * Drop send for purpose of ACK requirements.
a6503abf 317 */
be43ac7f 318 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
a6503abf 319 tp->rcv_adv = tp->rcv_nxt + win;
0974b45c 320 tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
2266a466
BJ
321 if (sendalot && tp->t_force == 0)
322 goto again;
8a2f82db 323 return (0);
76ee76df 324}
2266a466
BJ
325
326tcp_setpersist(tp)
327 register struct tcpcb *tp;
328{
329
330 if (tp->t_timer[TCPT_REXMT])
331 panic("tcp_output REXMT");
332 /*
333 * Start/restart persistance timer.
334 */
335 TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
336 ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
337 TCPTV_PERSMIN, TCPTV_MAX);
338 tp->t_rxtshift++;
339 if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
340 tp->t_rxtshift = 0;
341}