Add copyright
[unix-history] / usr / src / sys / netinet / tcp_input.c
CommitLineData
8ae0e4b4
KM
1/*
2 * Copyright (c) 1982 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
6 * @(#)tcp_input.c 6.11 (Berkeley) %G%
7 */
87e78f19 8
20666ad3
JB
9#include "param.h"
10#include "systm.h"
11#include "mbuf.h"
12#include "protosw.h"
13#include "socket.h"
14#include "socketvar.h"
15#include "errno.h"
f4d55810
SL
16
17#include "../net/if.h"
c124e997 18#include "../net/route.h"
f4d55810 19
20666ad3
JB
20#include "in.h"
21#include "in_pcb.h"
22#include "in_systm.h"
23#include "ip.h"
24#include "ip_var.h"
25#include "tcp.h"
26#include "tcp_fsm.h"
27#include "tcp_seq.h"
28#include "tcp_timer.h"
29#include "tcp_var.h"
30#include "tcpip.h"
31#include "tcp_debug.h"
87e78f19 32
22856bb8 33int tcpprintfs = 0;
60b16fa9 34int tcpcksum = 1;
4b935108 35struct tcpiphdr tcp_saveti;
8b5a83bb 36extern tcpnodelack;
87e78f19 37
4b935108 38struct tcpcb *tcp_newtcpcb();
2ff61f9d
BJ
39/*
40 * TCP input routine, follows pages 65-76 of the
41 * protocol specification dated September, 1981 very closely.
42 */
2b4b57cd
BJ
43tcp_input(m0)
44 struct mbuf *m0;
87e78f19 45{
2b4b57cd 46 register struct tcpiphdr *ti;
53a5409e 47 struct inpcb *inp;
2b4b57cd 48 register struct mbuf *m;
8b5a83bb 49 struct mbuf *om = 0;
2b4b57cd 50 int len, tlen, off;
8e65fd66 51 register struct tcpcb *tp = 0;
2b4b57cd 52 register int tiflags;
d52566dd 53 struct socket *so;
f1b2fa5b 54 int todrop, acked;
4b935108 55 short ostate;
ebcadd38 56 struct in_addr laddr;
7aa16f99 57 int dropsocket = 0;
87e78f19
BJ
58
59 /*
4aed14e3
BJ
60 * Get IP and TCP header together in first mbuf.
61 * Note: IP leaves IP header in first mbuf.
87e78f19 62 */
2b4b57cd 63 m = m0;
20790db4 64 ti = mtod(m, struct tcpiphdr *);
4aed14e3 65 if (((struct ip *)ti)->ip_hl > (sizeof (struct ip) >> 2))
d63599ac 66 ip_stripoptions((struct ip *)ti, (struct mbuf *)0);
6703c41f
BJ
67 if (m->m_off > MMAXOFF || m->m_len < sizeof (struct tcpiphdr)) {
68 if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
8a13b737 69 tcpstat.tcps_hdrops++;
6703c41f 70 return;
8a13b737
BJ
71 }
72 ti = mtod(m, struct tcpiphdr *);
73 }
87e78f19 74
2b4b57cd 75 /*
4aed14e3 76 * Checksum extended TCP header and data.
2b4b57cd
BJ
77 */
78 tlen = ((struct ip *)ti)->ip_len;
79 len = sizeof (struct ip) + tlen;
60b16fa9 80 if (tcpcksum) {
2b4b57cd
BJ
81 ti->ti_next = ti->ti_prev = 0;
82 ti->ti_x1 = 0;
ac83b17a 83 ti->ti_len = (u_short)tlen;
668cc26d 84 ti->ti_len = htons((u_short)ti->ti_len);
4b6b94ca 85 if (ti->ti_sum = in_cksum(m, len)) {
ee954ff1
SL
86 if (tcpprintfs)
87 printf("tcp sum: src %x\n", ti->ti_src);
2b4b57cd 88 tcpstat.tcps_badsum++;
8a13b737 89 goto drop;
87e78f19
BJ
90 }
91 }
92
93 /*
4aed14e3 94 * Check that TCP offset makes sense,
8b5a83bb 95 * pull out TCP options and adjust length.
87e78f19 96 */
2b4b57cd 97 off = ti->ti_off << 2;
4b6b94ca 98 if (off < sizeof (struct tcphdr) || off > tlen) {
ee954ff1
SL
99 if (tcpprintfs)
100 printf("tcp off: src %x off %d\n", ti->ti_src, off);
2b4b57cd 101 tcpstat.tcps_badoff++;
8a13b737 102 goto drop;
2b4b57cd 103 }
1e977657
BJ
104 tlen -= off;
105 ti->ti_len = tlen;
8b5a83bb
BJ
106 if (off > sizeof (struct tcphdr)) {
107 if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
108 tcpstat.tcps_hdrops++;
01b1394e 109 return;
8b5a83bb
BJ
110 }
111 ti = mtod(m, struct tcpiphdr *);
cce93e4b 112 om = m_get(M_DONTWAIT, MT_DATA);
8b5a83bb
BJ
113 if (om == 0)
114 goto drop;
8b5a83bb
BJ
115 om->m_len = off - sizeof (struct tcphdr);
116 { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
668cc26d 117 bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len);
8b5a83bb 118 m->m_len -= om->m_len;
668cc26d
SL
119 bcopy(op+om->m_len, op,
120 (unsigned)(m->m_len-sizeof (struct tcpiphdr)));
8b5a83bb
BJ
121 }
122 }
2ff61f9d 123 tiflags = ti->ti_flags;
2b4b57cd 124
795e0416 125 /*
1e977657 126 * Drop TCP and IP headers.
795e0416
BJ
127 */
128 off += sizeof (struct ip);
129 m->m_off += off;
130 m->m_len -= off;
131
8a13b737 132 /*
4aed14e3 133 * Convert TCP protocol specific fields to host format.
8a13b737
BJ
134 */
135 ti->ti_seq = ntohl(ti->ti_seq);
136 ti->ti_ack = ntohl(ti->ti_ack);
137 ti->ti_win = ntohs(ti->ti_win);
138 ti->ti_urp = ntohs(ti->ti_urp);
139
2b4b57cd 140 /*
8075bb0e 141 * Locate pcb for segment.
2b4b57cd 142 */
2ff61f9d 143 inp = in_pcblookup
ebcadd38
BJ
144 (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport,
145 INPLOOKUP_WILDCARD);
2ff61f9d
BJ
146
147 /*
148 * If the state is CLOSED (i.e., TCB does not exist) then
4aed14e3 149 * all data in the incoming segment is discarded.
2ff61f9d 150 */
22856bb8 151 if (inp == 0)
8a13b737 152 goto dropwithreset;
2ff61f9d 153 tp = intotcpcb(inp);
22856bb8 154 if (tp == 0)
8a13b737 155 goto dropwithreset;
f1b2fa5b 156 so = inp->inp_socket;
4b935108
BJ
157 if (so->so_options & SO_DEBUG) {
158 ostate = tp->t_state;
159 tcp_saveti = *ti;
160 }
ebf42a75
BJ
161 if (so->so_options & SO_ACCEPTCONN) {
162 so = sonewconn(so);
163 if (so == 0)
164 goto drop;
7aa16f99
SL
165 /*
166 * This is ugly, but ....
167 *
168 * Mark socket as temporary until we're
169 * committed to keeping it. The code at
170 * ``drop'' and ``dropwithreset'' check the
171 * flag dropsocket to see if the temporary
172 * socket created here should be discarded.
173 * We mark the socket as discardable until
174 * we're committed to it below in TCPS_LISTEN.
175 */
176 dropsocket++;
ebf42a75
BJ
177 inp = (struct inpcb *)so->so_pcb;
178 inp->inp_laddr = ti->ti_dst;
179 inp->inp_lport = ti->ti_dport;
180 tp = intotcpcb(inp);
181 tp->t_state = TCPS_LISTEN;
182 }
87e78f19 183
405c9168
BJ
184 /*
185 * Segment received on connection.
186 * Reset idle time and keep-alive timer.
187 */
188 tp->t_idle = 0;
189 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
190
8b5a83bb 191 /*
99578149
MK
192 * Process options if not in LISTEN state,
193 * else do it below (after getting remote address).
8b5a83bb 194 */
99578149
MK
195 if (om && tp->t_state != TCPS_LISTEN) {
196 tcp_dooptions(tp, om, ti);
8b5a83bb
BJ
197 om = 0;
198 }
199
87e78f19 200 /*
8a13b737
BJ
201 * Calculate amount of space in receive window,
202 * and then do TCP input processing.
87e78f19 203 */
8a13b737 204 tp->rcv_wnd = sbspace(&so->so_rcv);
4b6b94ca
BJ
205 if (tp->rcv_wnd < 0)
206 tp->rcv_wnd = 0;
2ff61f9d 207
87e78f19
BJ
208 switch (tp->t_state) {
209
2ff61f9d
BJ
210 /*
211 * If the state is LISTEN then ignore segment if it contains an RST.
212 * If the segment contains an ACK then it is bad and send a RST.
213 * If it does not contain a SYN then it is not interesting; drop it.
8a13b737 214 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
2ff61f9d 215 * tp->iss, and send a segment:
8a13b737 216 * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
2ff61f9d
BJ
217 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
218 * Fill in remote peer address fields if not previously specified.
219 * Enter SYN_RECEIVED state, and process any other fields of this
4aed14e3 220 * segment in this state.
2ff61f9d 221 */
8075bb0e 222 case TCPS_LISTEN: {
789d2a39 223 struct mbuf *am;
8075bb0e
BJ
224 register struct sockaddr_in *sin;
225
2ff61f9d
BJ
226 if (tiflags & TH_RST)
227 goto drop;
22856bb8 228 if (tiflags & TH_ACK)
8a13b737 229 goto dropwithreset;
22856bb8 230 if ((tiflags & TH_SYN) == 0)
2ff61f9d 231 goto drop;
789d2a39
SL
232 am = m_get(M_DONTWAIT, MT_SONAME);
233 if (am == NULL)
234 goto drop;
235 am->m_len = sizeof (struct sockaddr_in);
a8d3bf7f 236 sin = mtod(am, struct sockaddr_in *);
8075bb0e
BJ
237 sin->sin_family = AF_INET;
238 sin->sin_addr = ti->ti_src;
239 sin->sin_port = ti->ti_sport;
ebcadd38 240 laddr = inp->inp_laddr;
789d2a39 241 if (inp->inp_laddr.s_addr == INADDR_ANY)
ebcadd38 242 inp->inp_laddr = ti->ti_dst;
a8d3bf7f 243 if (in_pcbconnect(inp, am)) {
ebcadd38 244 inp->inp_laddr = laddr;
5a1f132a 245 (void) m_free(am);
4aed14e3 246 goto drop;
ebcadd38 247 }
5a1f132a 248 (void) m_free(am);
4aed14e3
BJ
249 tp->t_template = tcp_template(tp);
250 if (tp->t_template == 0) {
251 in_pcbdisconnect(inp);
a4f7ea71 252 dropsocket = 0; /* socket is already gone */
ebcadd38 253 inp->inp_laddr = laddr;
93f92b1d 254 tp = 0;
4aed14e3
BJ
255 goto drop;
256 }
99578149
MK
257 if (om) {
258 tcp_dooptions(tp, om, ti);
259 om = 0;
260 }
8a13b737 261 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
2ff61f9d 262 tp->irs = ti->ti_seq;
8a13b737
BJ
263 tcp_sendseqinit(tp);
264 tcp_rcvseqinit(tp);
2ff61f9d 265 tp->t_state = TCPS_SYN_RECEIVED;
4aed14e3 266 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
7aa16f99 267 dropsocket = 0; /* committed to socket */
8a13b737 268 goto trimthenstep6;
8075bb0e 269 }
87e78f19 270
2ff61f9d
BJ
271 /*
272 * If the state is SYN_SENT:
273 * if seg contains an ACK, but not for our SYN, drop the input.
274 * if seg contains a RST, then drop the connection.
275 * if seg does not contain SYN, then drop it.
276 * Otherwise this is an acceptable SYN segment
277 * initialize tp->rcv_nxt and tp->irs
278 * if seg contains ack then advance tp->snd_una
279 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
280 * arrange for segment to be acked (eventually)
281 * continue processing rest of data/controls, beginning with URG
282 */
283 case TCPS_SYN_SENT:
284 if ((tiflags & TH_ACK) &&
22856bb8
BJ
285/* this should be SEQ_LT; is SEQ_LEQ for BBN vax TCP only */
286 (SEQ_LT(ti->ti_ack, tp->iss) ||
4b6b94ca 287 SEQ_GT(ti->ti_ack, tp->snd_max)))
8a13b737 288 goto dropwithreset;
2ff61f9d 289 if (tiflags & TH_RST) {
0e3936fa
SL
290 if (tiflags & TH_ACK)
291 tp = tcp_drop(tp, ECONNREFUSED);
2ff61f9d 292 goto drop;
87e78f19 293 }
2ff61f9d
BJ
294 if ((tiflags & TH_SYN) == 0)
295 goto drop;
4b6b94ca 296 tp->snd_una = ti->ti_ack;
b8977237
BJ
297 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
298 tp->snd_nxt = tp->snd_una;
4aed14e3 299 tp->t_timer[TCPT_REXMT] = 0;
2ff61f9d 300 tp->irs = ti->ti_seq;
8a13b737
BJ
301 tcp_rcvseqinit(tp);
302 tp->t_flags |= TF_ACKNOW;
405c9168 303 if (SEQ_GT(tp->snd_una, tp->iss)) {
4aed14e3 304 soisconnected(so);
2ff61f9d 305 tp->t_state = TCPS_ESTABLISHED;
99578149 306 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
405c9168
BJ
307 (void) tcp_reass(tp, (struct tcpiphdr *)0);
308 } else
8a13b737
BJ
309 tp->t_state = TCPS_SYN_RECEIVED;
310 goto trimthenstep6;
311
312trimthenstep6:
313 /*
4b6b94ca 314 * Advance ti->ti_seq to correspond to first data byte.
8a13b737
BJ
315 * If data, trim to stay within window,
316 * dropping FIN if necessary.
317 */
4b6b94ca 318 ti->ti_seq++;
8a13b737
BJ
319 if (ti->ti_len > tp->rcv_wnd) {
320 todrop = ti->ti_len - tp->rcv_wnd;
321 m_adj(m, -todrop);
322 ti->ti_len = tp->rcv_wnd;
323 ti->ti_flags &= ~TH_FIN;
87e78f19 324 }
e832edbc 325 tp->snd_wl1 = ti->ti_seq - 1;
8a13b737 326 goto step6;
2ff61f9d 327 }
87e78f19 328
ed6315dd
MK
329 /*
330 * If data is received on a connection after the
331 * user processes are gone, then RST the other end.
332 */
333 if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT &&
334 ti->ti_len) {
335 tp = tcp_close(tp);
336 goto dropwithreset;
337 }
338
2ff61f9d
BJ
339 /*
340 * States other than LISTEN or SYN_SENT.
341 * First check that at least some bytes of segment are within
342 * receive window.
343 */
344 if (tp->rcv_wnd == 0) {
345 /*
346 * If window is closed can only take segments at
4b6b94ca 347 * window edge, and have to drop data and PUSH from
2ff61f9d
BJ
348 * incoming segments.
349 */
22856bb8 350 if (tp->rcv_nxt != ti->ti_seq)
2ff61f9d 351 goto dropafterack;
8a13b737 352 if (ti->ti_len > 0) {
fd5dc5f0 353 m_adj(m, ti->ti_len);
8a13b737
BJ
354 ti->ti_len = 0;
355 ti->ti_flags &= ~(TH_PUSH|TH_FIN);
87e78f19 356 }
2ff61f9d
BJ
357 } else {
358 /*
4b6b94ca 359 * If segment begins before rcv_nxt, drop leading
2ff61f9d
BJ
360 * data (and SYN); if nothing left, just ack.
361 */
fd5dc5f0
BJ
362 todrop = tp->rcv_nxt - ti->ti_seq;
363 if (todrop > 0) {
8a13b737 364 if (tiflags & TH_SYN) {
22856bb8 365 tiflags &= ~TH_SYN;
fd5dc5f0 366 ti->ti_flags &= ~TH_SYN;
8a13b737
BJ
367 ti->ti_seq++;
368 if (ti->ti_urp > 1)
369 ti->ti_urp--;
370 else
371 tiflags &= ~TH_URG;
372 todrop--;
373 }
1e977657
BJ
374 if (todrop > ti->ti_len ||
375 todrop == ti->ti_len && (tiflags&TH_FIN) == 0)
2ff61f9d
BJ
376 goto dropafterack;
377 m_adj(m, todrop);
378 ti->ti_seq += todrop;
379 ti->ti_len -= todrop;
8a13b737
BJ
380 if (ti->ti_urp > todrop)
381 ti->ti_urp -= todrop;
382 else {
383 tiflags &= ~TH_URG;
fd5dc5f0
BJ
384 ti->ti_flags &= ~TH_URG;
385 ti->ti_urp = 0;
8a13b737 386 }
2ff61f9d
BJ
387 }
388 /*
389 * If segment ends after window, drop trailing data
8a13b737 390 * (and PUSH and FIN); if nothing left, just ACK.
2ff61f9d 391 */
fd5dc5f0
BJ
392 todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
393 if (todrop > 0) {
1e977657 394 if (todrop >= ti->ti_len)
2ff61f9d
BJ
395 goto dropafterack;
396 m_adj(m, -todrop);
397 ti->ti_len -= todrop;
8a13b737 398 ti->ti_flags &= ~(TH_PUSH|TH_FIN);
87e78f19 399 }
87e78f19 400 }
87e78f19 401
87e78f19 402 /*
2ff61f9d
BJ
403 * If the RST bit is set examine the state:
404 * SYN_RECEIVED STATE:
405 * If passive open, return to LISTEN state.
406 * If active open, inform user that connection was refused.
407 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
408 * Inform user that connection was reset, and close tcb.
409 * CLOSING, LAST_ACK, TIME_WAIT STATES
410 * Close the tcb.
87e78f19 411 */
2ff61f9d 412 if (tiflags&TH_RST) switch (tp->t_state) {
4b935108 413
2ff61f9d 414 case TCPS_SYN_RECEIVED:
0e3936fa 415 tp = tcp_drop(tp, ECONNREFUSED);
2ff61f9d
BJ
416 goto drop;
417
418 case TCPS_ESTABLISHED:
419 case TCPS_FIN_WAIT_1:
420 case TCPS_FIN_WAIT_2:
421 case TCPS_CLOSE_WAIT:
0e3936fa 422 tp = tcp_drop(tp, ECONNRESET);
2ff61f9d
BJ
423 goto drop;
424
425 case TCPS_CLOSING:
426 case TCPS_LAST_ACK:
427 case TCPS_TIME_WAIT:
0e3936fa 428 tp = tcp_close(tp);
2ff61f9d 429 goto drop;
87e78f19 430 }
87e78f19
BJ
431
432 /*
2ff61f9d
BJ
433 * If a SYN is in the window, then this is an
434 * error and we send an RST and drop the connection.
435 */
436 if (tiflags & TH_SYN) {
0e3936fa 437 tp = tcp_drop(tp, ECONNRESET);
8a13b737 438 goto dropwithreset;
2ff61f9d
BJ
439 }
440
441 /*
442 * If the ACK bit is off we drop the segment and return.
443 */
8a13b737 444 if ((tiflags & TH_ACK) == 0)
2ff61f9d
BJ
445 goto drop;
446
447 /*
448 * Ack processing.
87e78f19 449 */
87e78f19
BJ
450 switch (tp->t_state) {
451
2ff61f9d
BJ
452 /*
453 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
454 * ESTABLISHED state and continue processing, othewise
455 * send an RST.
456 */
457 case TCPS_SYN_RECEIVED:
8a13b737 458 if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
4b6b94ca 459 SEQ_GT(ti->ti_ack, tp->snd_max))
8a13b737 460 goto dropwithreset;
4aed14e3 461 tp->snd_una++; /* SYN acked */
b8977237
BJ
462 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
463 tp->snd_nxt = tp->snd_una;
4aed14e3 464 tp->t_timer[TCPT_REXMT] = 0;
8a13b737
BJ
465 soisconnected(so);
466 tp->t_state = TCPS_ESTABLISHED;
99578149 467 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
405c9168 468 (void) tcp_reass(tp, (struct tcpiphdr *)0);
4aed14e3 469 tp->snd_wl1 = ti->ti_seq - 1;
8a13b737 470 /* fall into ... */
87e78f19 471
2ff61f9d
BJ
472 /*
473 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
474 * ACKs. If the ack is in the range
4b6b94ca 475 * tp->snd_una < ti->ti_ack <= tp->snd_max
2ff61f9d
BJ
476 * then advance tp->snd_una to ti->ti_ack and drop
477 * data from the retransmission queue. If this ACK reflects
478 * more up to date window information we update our window information.
479 */
480 case TCPS_ESTABLISHED:
481 case TCPS_FIN_WAIT_1:
482 case TCPS_FIN_WAIT_2:
483 case TCPS_CLOSE_WAIT:
484 case TCPS_CLOSING:
4aed14e3
BJ
485 case TCPS_LAST_ACK:
486 case TCPS_TIME_WAIT:
8a13b737
BJ
487#define ourfinisacked (acked > 0)
488
4aed14e3 489 if (SEQ_LEQ(ti->ti_ack, tp->snd_una))
2ff61f9d 490 break;
22856bb8 491 if (SEQ_GT(ti->ti_ack, tp->snd_max))
2ff61f9d 492 goto dropafterack;
8a13b737 493 acked = ti->ti_ack - tp->snd_una;
dd020fc8
BJ
494
495 /*
496 * If transmit timer is running and timed sequence
497 * number was acked, update smoothed round trip time.
498 */
499 if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
500 if (tp->t_srtt == 0)
501 tp->t_srtt = tp->t_rtt;
502 else
503 tp->t_srtt =
504 tcp_alpha * tp->t_srtt +
505 (1 - tcp_alpha) * tp->t_rtt;
dd020fc8
BJ
506 tp->t_rtt = 0;
507 }
508
6703c41f 509 if (ti->ti_ack == tp->snd_max)
4aed14e3 510 tp->t_timer[TCPT_REXMT] = 0;
6703c41f 511 else {
4aed14e3
BJ
512 TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
513 tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
22856bb8 514 tp->t_rxtshift = 0;
8a13b737 515 }
1e9621b8
MK
516 /*
517 * When new data is acked, open the congestion window a bit.
518 */
519 if (acked > 0)
520 tp->snd_cwnd = MIN(11 * tp->snd_cwnd / 10, 65535);
6703c41f 521 if (acked > so->so_snd.sb_cc) {
6703c41f 522 tp->snd_wnd -= so->so_snd.sb_cc;
26e96dd8 523 sbdrop(&so->so_snd, so->so_snd.sb_cc);
6703c41f 524 } else {
668cc26d 525 sbdrop(&so->so_snd, acked);
6703c41f
BJ
526 tp->snd_wnd -= acked;
527 acked = 0;
528 }
5744ed2b 529 if ((so->so_snd.sb_flags & SB_WAIT) || so->so_snd.sb_sel)
22856bb8 530 sowwakeup(so);
4b6b94ca 531 tp->snd_una = ti->ti_ack;
b8977237
BJ
532 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
533 tp->snd_nxt = tp->snd_una;
405c9168 534
87e78f19
BJ
535 switch (tp->t_state) {
536
2ff61f9d
BJ
537 /*
538 * In FIN_WAIT_1 STATE in addition to the processing
539 * for the ESTABLISHED state if our FIN is now acknowledged
8a13b737 540 * then enter FIN_WAIT_2.
2ff61f9d
BJ
541 */
542 case TCPS_FIN_WAIT_1:
fdae4427
BJ
543 if (ourfinisacked) {
544 /*
545 * If we can't receive any more
546 * data, then closing user can proceed.
547 */
548 if (so->so_state & SS_CANTRCVMORE)
549 soisdisconnected(so);
8a13b737 550 tp->t_state = TCPS_FIN_WAIT_2;
a4f7ea71
MK
551 /*
552 * This is contrary to the specification,
553 * but if we haven't gotten our FIN in
554 * 5 minutes, it's not forthcoming.
0d115dcc 555 tp->t_timer[TCPT_2MSL] = 5 * 60 * PR_SLOWHZ;
07846d26
MK
556 * MUST WORRY ABOUT ONE-WAY CONNECTIONS.
557 */
fdae4427 558 }
87e78f19
BJ
559 break;
560
2ff61f9d
BJ
561 /*
562 * In CLOSING STATE in addition to the processing for
563 * the ESTABLISHED state if the ACK acknowledges our FIN
564 * then enter the TIME-WAIT state, otherwise ignore
565 * the segment.
566 */
567 case TCPS_CLOSING:
4aed14e3 568 if (ourfinisacked) {
2ff61f9d 569 tp->t_state = TCPS_TIME_WAIT;
4aed14e3
BJ
570 tcp_canceltimers(tp);
571 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
572 soisdisconnected(so);
573 }
574 break;
87e78f19 575
2ff61f9d 576 /*
8a13b737
BJ
577 * The only thing that can arrive in LAST_ACK state
578 * is an acknowledgment of our FIN. If our FIN is now
579 * acknowledged, delete the TCB, enter the closed state
580 * and return.
2ff61f9d
BJ
581 */
582 case TCPS_LAST_ACK:
0e3936fa
SL
583 if (ourfinisacked)
584 tp = tcp_close(tp);
2ff61f9d 585 goto drop;
87e78f19 586
2ff61f9d
BJ
587 /*
588 * In TIME_WAIT state the only thing that should arrive
589 * is a retransmission of the remote FIN. Acknowledge
590 * it and restart the finack timer.
591 */
592 case TCPS_TIME_WAIT:
405c9168 593 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2ff61f9d 594 goto dropafterack;
87e78f19 595 }
8a13b737
BJ
596#undef ourfinisacked
597 }
87e78f19 598
2ff61f9d 599step6:
4aed14e3
BJ
600 /*
601 * Update window information.
602 */
22856bb8 603 if (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
8e65fd66 604 (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
22856bb8 605 tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)) {
4aed14e3
BJ
606 tp->snd_wnd = ti->ti_win;
607 tp->snd_wl1 = ti->ti_seq;
608 tp->snd_wl2 = ti->ti_ack;
4aed14e3 609 }
4aed14e3 610
2ff61f9d 611 /*
b2db9217 612 * Process segments with URG.
2ff61f9d 613 */
9c811062
BJ
614 if ((tiflags & TH_URG) && ti->ti_urp &&
615 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
f4be5024 616 /*
a5d9c993
SL
617 * This is a kludge, but if we receive accept
618 * random urgent pointers, we'll crash in
619 * soreceive. It's hard to imagine someone
620 * actually wanting to send this much urgent data.
f4be5024 621 */
1e9621b8 622 if (ti->ti_urp + (unsigned) so->so_rcv.sb_cc > 32767) {
f4be5024
SL
623 ti->ti_urp = 0; /* XXX */
624 tiflags &= ~TH_URG; /* XXX */
625 ti->ti_flags &= ~TH_URG; /* XXX */
a5d9c993 626 goto badurp; /* XXX */
f4be5024 627 }
b2db9217
BJ
628 /*
629 * If this segment advances the known urgent pointer,
630 * then mark the data stream. This should not happen
631 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
632 * a FIN has been received from the remote side.
633 * In these states we ignore the URG.
634 */
635 if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
636 tp->rcv_up = ti->ti_seq + ti->ti_urp;
637 so->so_oobmark = so->so_rcv.sb_cc +
638 (tp->rcv_up - tp->rcv_nxt) - 1;
639 if (so->so_oobmark == 0)
640 so->so_state |= SS_RCVATMARK;
77a4e3ca 641 sohasoutofband(so);
b2db9217
BJ
642 tp->t_oobflags &= ~TCPOOB_HAVEDATA;
643 }
644 /*
645 * Remove out of band data so doesn't get presented to user.
646 * This can happen independent of advancing the URG pointer,
647 * but if two URG's are pending at once, some out-of-band
648 * data may creep in... ick.
649 */
ebf42a75 650 if (ti->ti_urp <= ti->ti_len)
b2db9217 651 tcp_pulloutofband(so, ti);
5e74df82 652 }
a5d9c993 653badurp: /* XXX */
87e78f19
BJ
654
655 /*
2ff61f9d
BJ
656 * Process the segment text, merging it into the TCP sequencing queue,
657 * and arranging for acknowledgment of receipt if necessary.
658 * This process logically involves adjusting tp->rcv_wnd as data
659 * is presented to the user (this happens in tcp_usrreq.c,
660 * case PRU_RCVD). If a FIN has already been received on this
661 * connection then we just ignore the text.
87e78f19 662 */
7984a662
MK
663 if ((ti->ti_len || (tiflags&TH_FIN)) &&
664 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
665 tiflags = tcp_reass(tp, ti);
8b5a83bb
BJ
666 if (tcpnodelack == 0)
667 tp->t_flags |= TF_DELACK;
668 else
669 tp->t_flags |= TF_ACKNOW;
4aed14e3 670 } else {
2b4b57cd 671 m_freem(m);
e832edbc 672 tiflags &= ~TH_FIN;
4aed14e3 673 }
87e78f19
BJ
674
675 /*
e832edbc
BJ
676 * If FIN is received ACK the FIN and let the user know
677 * that the connection is closing.
87e78f19 678 */
e832edbc 679 if (tiflags & TH_FIN) {
4aed14e3
BJ
680 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
681 socantrcvmore(so);
682 tp->t_flags |= TF_ACKNOW;
683 tp->rcv_nxt++;
684 }
2ff61f9d 685 switch (tp->t_state) {
87e78f19 686
2ff61f9d
BJ
687 /*
688 * In SYN_RECEIVED and ESTABLISHED STATES
689 * enter the CLOSE_WAIT state.
53a5409e 690 */
2ff61f9d
BJ
691 case TCPS_SYN_RECEIVED:
692 case TCPS_ESTABLISHED:
693 tp->t_state = TCPS_CLOSE_WAIT;
694 break;
53a5409e 695
2ff61f9d 696 /*
8a13b737
BJ
697 * If still in FIN_WAIT_1 STATE FIN has not been acked so
698 * enter the CLOSING state.
53a5409e 699 */
2ff61f9d 700 case TCPS_FIN_WAIT_1:
8a13b737 701 tp->t_state = TCPS_CLOSING;
2ff61f9d 702 break;
87e78f19 703
2ff61f9d
BJ
704 /*
705 * In FIN_WAIT_2 state enter the TIME_WAIT state,
706 * starting the time-wait timer, turning off the other
707 * standard timers.
708 */
709 case TCPS_FIN_WAIT_2:
4aed14e3 710 tp->t_state = TCPS_TIME_WAIT;
a6503abf 711 tcp_canceltimers(tp);
405c9168 712 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
4aed14e3 713 soisdisconnected(so);
2ff61f9d
BJ
714 break;
715
53a5409e 716 /*
2ff61f9d 717 * In TIME_WAIT state restart the 2 MSL time_wait timer.
53a5409e 718 */
2ff61f9d 719 case TCPS_TIME_WAIT:
405c9168 720 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2ff61f9d 721 break;
8a13b737 722 }
87e78f19 723 }
4b935108
BJ
724 if (so->so_options & SO_DEBUG)
725 tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
8a13b737
BJ
726
727 /*
728 * Return any desired output.
729 */
668cc26d 730 (void) tcp_output(tp);
2ff61f9d 731 return;
8a13b737 732
2ff61f9d 733dropafterack:
8a13b737 734 /*
1e977657
BJ
735 * Generate an ACK dropping incoming segment if it occupies
736 * sequence space, where the ACK reflects our state.
8a13b737 737 */
1e977657
BJ
738 if ((tiflags&TH_RST) ||
739 tlen == 0 && (tiflags&(TH_SYN|TH_FIN)) == 0)
8a13b737 740 goto drop;
f3cdd721
BJ
741 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
742 tcp_trace(TA_RESPOND, ostate, tp, &tcp_saveti, 0);
8e65fd66 743 tcp_respond(tp, ti, tp->rcv_nxt, tp->snd_nxt, TH_ACK);
4b6b94ca 744 return;
8a13b737
BJ
745
746dropwithreset:
f37c1c84 747 if (om) {
668cc26d 748 (void) m_free(om);
f37c1c84
SL
749 om = 0;
750 }
8a13b737 751 /*
4aed14e3 752 * Generate a RST, dropping incoming segment.
8a13b737
BJ
753 * Make ACK acceptable to originator of segment.
754 */
755 if (tiflags & TH_RST)
756 goto drop;
757 if (tiflags & TH_ACK)
8e65fd66 758 tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST);
8a13b737
BJ
759 else {
760 if (tiflags & TH_SYN)
761 ti->ti_len++;
1e977657
BJ
762 tcp_respond(tp, ti, ti->ti_seq+ti->ti_len, (tcp_seq)0,
763 TH_RST|TH_ACK);
8a13b737 764 }
7aa16f99
SL
765 /* destroy temporarily created socket */
766 if (dropsocket)
767 (void) soabort(so);
4b6b94ca 768 return;
8a13b737 769
2ff61f9d 770drop:
01b1394e
SL
771 if (om)
772 (void) m_free(om);
8a13b737
BJ
773 /*
774 * Drop space held by incoming segment and return.
775 */
f3cdd721
BJ
776 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
777 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
2ff61f9d 778 m_freem(m);
7aa16f99
SL
779 /* destroy temporarily created socket */
780 if (dropsocket)
781 (void) soabort(so);
4b935108 782 return;
2ff61f9d
BJ
783}
784
99578149 785tcp_dooptions(tp, om, ti)
8b5a83bb
BJ
786 struct tcpcb *tp;
787 struct mbuf *om;
99578149 788 struct tcpiphdr *ti;
5e74df82 789{
8b5a83bb
BJ
790 register u_char *cp;
791 int opt, optlen, cnt;
792
793 cp = mtod(om, u_char *);
794 cnt = om->m_len;
795 for (; cnt > 0; cnt -= optlen, cp += optlen) {
796 opt = cp[0];
797 if (opt == TCPOPT_EOL)
798 break;
799 if (opt == TCPOPT_NOP)
800 optlen = 1;
357b20fc 801 else {
8b5a83bb 802 optlen = cp[1];
357b20fc
SL
803 if (optlen <= 0)
804 break;
805 }
8b5a83bb
BJ
806 switch (opt) {
807
808 default:
809 break;
810
811 case TCPOPT_MAXSEG:
812 if (optlen != 4)
813 continue;
99578149
MK
814 if (!(ti->ti_flags & TH_SYN))
815 continue;
8b5a83bb 816 tp->t_maxseg = *(u_short *)(cp + 2);
668cc26d 817 tp->t_maxseg = ntohs((u_short)tp->t_maxseg);
99578149 818 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
8b5a83bb 819 break;
8b5a83bb 820 }
5e74df82 821 }
668cc26d 822 (void) m_free(om);
5e74df82
BJ
823}
824
b2db9217
BJ
825/*
826 * Pull out of band byte out of a segment so
827 * it doesn't appear in the user's data queue.
828 * It is still reflected in the segment length for
829 * sequencing purposes.
830 */
831tcp_pulloutofband(so, ti)
832 struct socket *so;
833 struct tcpiphdr *ti;
834{
835 register struct mbuf *m;
1acff8ec 836 int cnt = ti->ti_urp - 1;
b2db9217
BJ
837
838 m = dtom(ti);
839 while (cnt >= 0) {
840 if (m->m_len > cnt) {
841 char *cp = mtod(m, caddr_t) + cnt;
842 struct tcpcb *tp = sototcpcb(so);
843
844 tp->t_iobc = *cp;
845 tp->t_oobflags |= TCPOOB_HAVEDATA;
668cc26d 846 bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
b2db9217
BJ
847 m->m_len--;
848 return;
849 }
850 cnt -= m->m_len;
851 m = m->m_next;
852 if (m == 0)
853 break;
854 }
855 panic("tcp_pulloutofband");
856}
857
2ff61f9d
BJ
858/*
859 * Insert segment ti into reassembly queue of tcp with
860 * control block tp. Return TH_FIN if reassembly now includes
861 * a segment with FIN.
862 */
f1b2fa5b 863tcp_reass(tp, ti)
2ff61f9d
BJ
864 register struct tcpcb *tp;
865 register struct tcpiphdr *ti;
2ff61f9d
BJ
866{
867 register struct tcpiphdr *q;
8a13b737 868 struct socket *so = tp->t_inpcb->inp_socket;
e832edbc
BJ
869 struct mbuf *m;
870 int flags;
2ff61f9d
BJ
871
872 /*
405c9168
BJ
873 * Call with ti==0 after become established to
874 * force pre-ESTABLISHED data up to user socket.
2ff61f9d 875 */
405c9168 876 if (ti == 0)
2ff61f9d 877 goto present;
87e78f19 878
2ff61f9d
BJ
879 /*
880 * Find a segment which begins after this one does.
881 */
882 for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
883 q = (struct tcpiphdr *)q->ti_next)
884 if (SEQ_GT(q->ti_seq, ti->ti_seq))
885 break;
886
887 /*
888 * If there is a preceding segment, it may provide some of
889 * our data already. If so, drop the data from the incoming
890 * segment. If it provides all of our data, drop us.
891 */
892 if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
893 register int i;
fd5dc5f0 894 q = (struct tcpiphdr *)q->ti_prev;
2ff61f9d
BJ
895 /* conversion to int (in i) handles seq wraparound */
896 i = q->ti_seq + q->ti_len - ti->ti_seq;
897 if (i > 0) {
2b4b57cd 898 if (i >= ti->ti_len)
2ff61f9d 899 goto drop;
4ab1a5c3 900 m_adj(dtom(ti), i);
2b4b57cd 901 ti->ti_len -= i;
2ff61f9d 902 ti->ti_seq += i;
53a5409e 903 }
2ff61f9d
BJ
904 q = (struct tcpiphdr *)(q->ti_next);
905 }
87e78f19 906
2ff61f9d
BJ
907 /*
908 * While we overlap succeeding segments trim them or,
909 * if they are completely covered, dequeue them.
910 */
fd5dc5f0 911 while (q != (struct tcpiphdr *)tp) {
2ff61f9d 912 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
fd5dc5f0
BJ
913 if (i <= 0)
914 break;
2ff61f9d 915 if (i < q->ti_len) {
fd5dc5f0 916 q->ti_seq += i;
2ff61f9d
BJ
917 q->ti_len -= i;
918 m_adj(dtom(q), i);
919 break;
ac5e71a1 920 }
2ff61f9d 921 q = (struct tcpiphdr *)q->ti_next;
473a17a5 922 m = dtom(q->ti_prev);
2ff61f9d 923 remque(q->ti_prev);
473a17a5 924 m_freem(m);
2ff61f9d 925 }
87e78f19 926
2ff61f9d
BJ
927 /*
928 * Stick new segment in its place.
929 */
930 insque(ti, q->ti_prev);
2ff61f9d 931
2ff61f9d
BJ
932present:
933 /*
4aed14e3
BJ
934 * Present data to user, advancing rcv_nxt through
935 * completed sequence space.
2ff61f9d 936 */
e832edbc 937 if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
4aed14e3 938 return (0);
2b4b57cd 939 ti = tp->seg_next;
e832edbc
BJ
940 if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
941 return (0);
942 if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
943 return (0);
944 do {
4aed14e3
BJ
945 tp->rcv_nxt += ti->ti_len;
946 flags = ti->ti_flags & TH_FIN;
2b4b57cd 947 remque(ti);
e832edbc 948 m = dtom(ti);
2b4b57cd 949 ti = (struct tcpiphdr *)ti->ti_next;
e832edbc 950 if (so->so_state & SS_CANTRCVMORE)
668cc26d 951 m_freem(m);
789d2a39 952 else
e832edbc
BJ
953 sbappend(&so->so_rcv, m);
954 } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
955 sorwakeup(so);
2ff61f9d
BJ
956 return (flags);
957drop:
958 m_freem(dtom(ti));
e832edbc 959 return (0);
d52566dd 960}
99578149
MK
961
962/*
963 * Determine a reasonable value for maxseg size.
964 * If the route is known, use one that can be handled
965 * on the given interface without forcing IP to fragment.
966 * If bigger than a page (CLSIZE), round down to nearest pagesize
967 * to utilize pagesize mbufs.
968 * If interface pointer is unavailable, or the destination isn't local,
969 * use a conservative size (512 or the default IP max size),
970 * as we can't discover anything about intervening gateways or networks.
971 *
972 * This is ugly, and doesn't belong at this level, but has to happen somehow.
973 */
974tcp_mss(tp)
975register struct tcpcb *tp;
976{
977 struct route *ro;
978 struct ifnet *ifp;
979 int mss;
980 struct inpcb *inp;
981
982 inp = tp->t_inpcb;
983 ro = &inp->inp_route;
984 if ((ro->ro_rt == (struct rtentry *)0) ||
985 (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) {
986 /* No route yet, so try to acquire one */
987 if (inp->inp_faddr.s_addr != INADDR_ANY) {
988 ro->ro_dst.sa_family = AF_INET;
989 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
990 inp->inp_faddr;
991 rtalloc(ro);
992 }
993 if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0)
0d115dcc 994 return (TCP_MSS);
99578149
MK
995 }
996
997 mss = ifp->if_mtu - sizeof(struct tcpiphdr);
998#if (CLBYTES & (CLBYTES - 1)) == 0
999 if (mss > CLBYTES)
1000 mss &= ~(CLBYTES-1);
1001#else
1002 if (mss > CLBYTES)
1003 mss = mss / CLBYTES * CLBYTES;
1004#endif
1005 if (in_localaddr(tp->t_inpcb->inp_faddr))
1006 return(mss);
0d115dcc 1007 return (MIN(mss, TCP_MSS));
99578149 1008}