ack immediately when segment is received out of order;
[unix-history] / usr / src / sys / netinet / tcp_input.c
CommitLineData
8ae0e4b4 1/*
8a36cf82 2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
2b6b6284 3 * All rights reserved.
8ae0e4b4 4 *
2b6b6284
KB
5 * Redistribution and use in source and binary forms are permitted
6 * provided that this notice is preserved and that due credit is given
7 * to the University of California at Berkeley. The name of the University
8 * may not be used to endorse or promote products derived from this
9 * software without specific prior written permission. This software
10 * is provided ``as is'' without express or implied warranty.
11 *
9e4788e4 12 * @(#)tcp_input.c 7.18 (Berkeley) %G%
8ae0e4b4 13 */
87e78f19 14
20666ad3
JB
15#include "param.h"
16#include "systm.h"
17#include "mbuf.h"
18#include "protosw.h"
19#include "socket.h"
20#include "socketvar.h"
21#include "errno.h"
f4d55810
SL
22
23#include "../net/if.h"
c124e997 24#include "../net/route.h"
f4d55810 25
20666ad3
JB
26#include "in.h"
27#include "in_pcb.h"
28#include "in_systm.h"
29#include "ip.h"
30#include "ip_var.h"
31#include "tcp.h"
32#include "tcp_fsm.h"
33#include "tcp_seq.h"
34#include "tcp_timer.h"
35#include "tcp_var.h"
36#include "tcpip.h"
37#include "tcp_debug.h"
87e78f19 38
22856bb8 39int tcpprintfs = 0;
60b16fa9 40int tcpcksum = 1;
386369f5 41int tcprexmtthresh = 3;
4b935108 42struct tcpiphdr tcp_saveti;
87e78f19 43
4b935108 44struct tcpcb *tcp_newtcpcb();
a17510f3
MK
45
46/*
47 * Insert segment ti into reassembly queue of tcp with
48 * control block tp. Return TH_FIN if reassembly now includes
49 * a segment with FIN. The macro form does the common case inline
50 * (segment is the next to be received on an established connection,
51 * and the queue is empty), avoiding linkage into and removal
52 * from the queue and repetition of various conversions.
9e4788e4
MK
53 * Set DELACK for segments received in order, but ack immediately
54 * when segments are out of order (so fast retransmit can work).
a17510f3
MK
55 */
56#define TCP_REASS(tp, ti, m, so, flags) { \
57 if ((ti)->ti_seq == (tp)->rcv_nxt && \
58 (tp)->seg_next == (struct tcpiphdr *)(tp) && \
59 (tp)->t_state == TCPS_ESTABLISHED) { \
9e4788e4 60 tp->t_flags |= TF_DELACK; \
a17510f3
MK
61 (tp)->rcv_nxt += (ti)->ti_len; \
62 flags = (ti)->ti_flags & TH_FIN; \
96c50630
MK
63 tcpstat.tcps_rcvpack++;\
64 tcpstat.tcps_rcvbyte += (ti)->ti_len;\
a17510f3
MK
65 sbappend(&(so)->so_rcv, (m)); \
66 sorwakeup(so); \
9e4788e4 67 } else { \
a17510f3 68 (flags) = tcp_reass((tp), (ti)); \
9e4788e4
MK
69 tp->t_flags |= TF_ACKNOW; \
70 } \
a17510f3
MK
71}
72
73tcp_reass(tp, ti)
74 register struct tcpcb *tp;
75 register struct tcpiphdr *ti;
76{
77 register struct tcpiphdr *q;
78 struct socket *so = tp->t_inpcb->inp_socket;
79 struct mbuf *m;
80 int flags;
81
82 /*
83 * Call with ti==0 after become established to
84 * force pre-ESTABLISHED data up to user socket.
85 */
86 if (ti == 0)
87 goto present;
88
89 /*
90 * Find a segment which begins after this one does.
91 */
92 for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
93 q = (struct tcpiphdr *)q->ti_next)
94 if (SEQ_GT(q->ti_seq, ti->ti_seq))
95 break;
96
97 /*
98 * If there is a preceding segment, it may provide some of
99 * our data already. If so, drop the data from the incoming
100 * segment. If it provides all of our data, drop us.
101 */
102 if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
103 register int i;
104 q = (struct tcpiphdr *)q->ti_prev;
105 /* conversion to int (in i) handles seq wraparound */
106 i = q->ti_seq + q->ti_len - ti->ti_seq;
107 if (i > 0) {
96c50630
MK
108 if (i >= ti->ti_len) {
109 tcpstat.tcps_rcvduppack++;
110 tcpstat.tcps_rcvdupbyte += ti->ti_len;
a17510f3 111 goto drop;
96c50630 112 }
a17510f3
MK
113 m_adj(dtom(ti), i);
114 ti->ti_len -= i;
115 ti->ti_seq += i;
116 }
117 q = (struct tcpiphdr *)(q->ti_next);
118 }
96c50630
MK
119 tcpstat.tcps_rcvoopack++;
120 tcpstat.tcps_rcvoobyte += ti->ti_len;
a17510f3
MK
121
122 /*
123 * While we overlap succeeding segments trim them or,
124 * if they are completely covered, dequeue them.
125 */
126 while (q != (struct tcpiphdr *)tp) {
127 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
128 if (i <= 0)
129 break;
130 if (i < q->ti_len) {
131 q->ti_seq += i;
132 q->ti_len -= i;
133 m_adj(dtom(q), i);
134 break;
135 }
136 q = (struct tcpiphdr *)q->ti_next;
137 m = dtom(q->ti_prev);
138 remque(q->ti_prev);
139 m_freem(m);
140 }
141
142 /*
143 * Stick new segment in its place.
144 */
145 insque(ti, q->ti_prev);
146
147present:
148 /*
149 * Present data to user, advancing rcv_nxt through
150 * completed sequence space.
151 */
152 if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
153 return (0);
154 ti = tp->seg_next;
155 if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
156 return (0);
157 if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
158 return (0);
159 do {
160 tp->rcv_nxt += ti->ti_len;
161 flags = ti->ti_flags & TH_FIN;
162 remque(ti);
163 m = dtom(ti);
164 ti = (struct tcpiphdr *)ti->ti_next;
165 if (so->so_state & SS_CANTRCVMORE)
166 m_freem(m);
167 else
168 sbappend(&so->so_rcv, m);
169 } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
170 sorwakeup(so);
171 return (flags);
172drop:
173 m_freem(dtom(ti));
174 return (0);
175}
176
2ff61f9d
BJ
177/*
178 * TCP input routine, follows pages 65-76 of the
179 * protocol specification dated September, 1981 very closely.
180 */
2b4b57cd
BJ
181tcp_input(m0)
182 struct mbuf *m0;
87e78f19 183{
2b4b57cd 184 register struct tcpiphdr *ti;
53a5409e 185 struct inpcb *inp;
2b4b57cd 186 register struct mbuf *m;
8b5a83bb 187 struct mbuf *om = 0;
2b4b57cd 188 int len, tlen, off;
8e65fd66 189 register struct tcpcb *tp = 0;
2b4b57cd 190 register int tiflags;
d52566dd 191 struct socket *so;
4859921b 192 int todrop, acked, ourfinisacked, needoutput = 0;
4b935108 193 short ostate;
ebcadd38 194 struct in_addr laddr;
7aa16f99 195 int dropsocket = 0;
96c50630 196 int iss = 0;
87e78f19 197
96c50630 198 tcpstat.tcps_rcvtotal++;
87e78f19 199 /*
4aed14e3
BJ
200 * Get IP and TCP header together in first mbuf.
201 * Note: IP leaves IP header in first mbuf.
87e78f19 202 */
2b4b57cd 203 m = m0;
20790db4 204 ti = mtod(m, struct tcpiphdr *);
4aed14e3 205 if (((struct ip *)ti)->ip_hl > (sizeof (struct ip) >> 2))
d63599ac 206 ip_stripoptions((struct ip *)ti, (struct mbuf *)0);
6703c41f
BJ
207 if (m->m_off > MMAXOFF || m->m_len < sizeof (struct tcpiphdr)) {
208 if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
96c50630 209 tcpstat.tcps_rcvshort++;
6703c41f 210 return;
8a13b737
BJ
211 }
212 ti = mtod(m, struct tcpiphdr *);
213 }
87e78f19 214
2b4b57cd 215 /*
4aed14e3 216 * Checksum extended TCP header and data.
2b4b57cd
BJ
217 */
218 tlen = ((struct ip *)ti)->ip_len;
219 len = sizeof (struct ip) + tlen;
60b16fa9 220 if (tcpcksum) {
2b4b57cd
BJ
221 ti->ti_next = ti->ti_prev = 0;
222 ti->ti_x1 = 0;
ac83b17a 223 ti->ti_len = (u_short)tlen;
668cc26d 224 ti->ti_len = htons((u_short)ti->ti_len);
4b6b94ca 225 if (ti->ti_sum = in_cksum(m, len)) {
ee954ff1
SL
226 if (tcpprintfs)
227 printf("tcp sum: src %x\n", ti->ti_src);
96c50630 228 tcpstat.tcps_rcvbadsum++;
8a13b737 229 goto drop;
87e78f19
BJ
230 }
231 }
232
233 /*
4aed14e3 234 * Check that TCP offset makes sense,
8b5a83bb 235 * pull out TCP options and adjust length.
87e78f19 236 */
2b4b57cd 237 off = ti->ti_off << 2;
4b6b94ca 238 if (off < sizeof (struct tcphdr) || off > tlen) {
ee954ff1
SL
239 if (tcpprintfs)
240 printf("tcp off: src %x off %d\n", ti->ti_src, off);
96c50630 241 tcpstat.tcps_rcvbadoff++;
8a13b737 242 goto drop;
2b4b57cd 243 }
1e977657
BJ
244 tlen -= off;
245 ti->ti_len = tlen;
8b5a83bb 246 if (off > sizeof (struct tcphdr)) {
a17510f3
MK
247 if (m->m_len < sizeof(struct ip) + off) {
248 if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
96c50630 249 tcpstat.tcps_rcvshort++;
a17510f3
MK
250 return;
251 }
252 ti = mtod(m, struct tcpiphdr *);
8b5a83bb 253 }
cce93e4b 254 om = m_get(M_DONTWAIT, MT_DATA);
8b5a83bb
BJ
255 if (om == 0)
256 goto drop;
8b5a83bb
BJ
257 om->m_len = off - sizeof (struct tcphdr);
258 { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
668cc26d 259 bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len);
8b5a83bb 260 m->m_len -= om->m_len;
668cc26d
SL
261 bcopy(op+om->m_len, op,
262 (unsigned)(m->m_len-sizeof (struct tcpiphdr)));
8b5a83bb
BJ
263 }
264 }
2ff61f9d 265 tiflags = ti->ti_flags;
2b4b57cd 266
795e0416 267 /*
669abecf 268 * Drop TCP and IP headers; TCP options were dropped above.
795e0416 269 */
669abecf
KM
270 m->m_off += sizeof(struct tcpiphdr);
271 m->m_len -= sizeof(struct tcpiphdr);
795e0416 272
8a13b737 273 /*
4aed14e3 274 * Convert TCP protocol specific fields to host format.
8a13b737
BJ
275 */
276 ti->ti_seq = ntohl(ti->ti_seq);
277 ti->ti_ack = ntohl(ti->ti_ack);
278 ti->ti_win = ntohs(ti->ti_win);
279 ti->ti_urp = ntohs(ti->ti_urp);
280
2b4b57cd 281 /*
8075bb0e 282 * Locate pcb for segment.
2b4b57cd 283 */
96c50630 284findpcb:
2ff61f9d 285 inp = in_pcblookup
ebcadd38
BJ
286 (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport,
287 INPLOOKUP_WILDCARD);
2ff61f9d
BJ
288
289 /*
290 * If the state is CLOSED (i.e., TCB does not exist) then
4aed14e3 291 * all data in the incoming segment is discarded.
386369f5
MK
292 * If the TCB exists but is in CLOSED state, it is embryonic,
293 * but should either do a listen or a connect soon.
2ff61f9d 294 */
22856bb8 295 if (inp == 0)
8a13b737 296 goto dropwithreset;
2ff61f9d 297 tp = intotcpcb(inp);
22856bb8 298 if (tp == 0)
8a13b737 299 goto dropwithreset;
386369f5
MK
300 if (tp->t_state == TCPS_CLOSED)
301 goto drop;
f1b2fa5b 302 so = inp->inp_socket;
4b935108
BJ
303 if (so->so_options & SO_DEBUG) {
304 ostate = tp->t_state;
305 tcp_saveti = *ti;
306 }
ebf42a75
BJ
307 if (so->so_options & SO_ACCEPTCONN) {
308 so = sonewconn(so);
309 if (so == 0)
310 goto drop;
7aa16f99
SL
311 /*
312 * This is ugly, but ....
313 *
314 * Mark socket as temporary until we're
315 * committed to keeping it. The code at
316 * ``drop'' and ``dropwithreset'' check the
317 * flag dropsocket to see if the temporary
318 * socket created here should be discarded.
319 * We mark the socket as discardable until
320 * we're committed to it below in TCPS_LISTEN.
321 */
322 dropsocket++;
ebf42a75
BJ
323 inp = (struct inpcb *)so->so_pcb;
324 inp->inp_laddr = ti->ti_dst;
325 inp->inp_lport = ti->ti_dport;
9d866d2f 326#if BSD>=43
a17510f3 327 inp->inp_options = ip_srcroute();
9d866d2f 328#endif
ebf42a75
BJ
329 tp = intotcpcb(inp);
330 tp->t_state = TCPS_LISTEN;
331 }
87e78f19 332
405c9168
BJ
333 /*
334 * Segment received on connection.
335 * Reset idle time and keep-alive timer.
336 */
337 tp->t_idle = 0;
8a36cf82 338 tp->t_timer[TCPT_KEEP] = tcp_keepidle;
405c9168 339
8b5a83bb 340 /*
99578149
MK
341 * Process options if not in LISTEN state,
342 * else do it below (after getting remote address).
8b5a83bb 343 */
99578149
MK
344 if (om && tp->t_state != TCPS_LISTEN) {
345 tcp_dooptions(tp, om, ti);
8b5a83bb
BJ
346 om = 0;
347 }
348
87e78f19 349 /*
8a13b737
BJ
350 * Calculate amount of space in receive window,
351 * and then do TCP input processing.
a17510f3
MK
352 * Receive window is amount of space in rcv queue,
353 * but not less than advertised window.
87e78f19 354 */
bbaaf0fd
MK
355 { int win;
356
357 win = sbspace(&so->so_rcv);
358 if (win < 0)
359 win = 0;
360 tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt));
361 }
2ff61f9d 362
87e78f19
BJ
363 switch (tp->t_state) {
364
2ff61f9d
BJ
365 /*
366 * If the state is LISTEN then ignore segment if it contains an RST.
367 * If the segment contains an ACK then it is bad and send a RST.
368 * If it does not contain a SYN then it is not interesting; drop it.
224f3a72 369 * Don't bother responding if the destination was a broadcast.
8a13b737 370 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
2ff61f9d 371 * tp->iss, and send a segment:
8a13b737 372 * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
2ff61f9d
BJ
373 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
374 * Fill in remote peer address fields if not previously specified.
375 * Enter SYN_RECEIVED state, and process any other fields of this
4aed14e3 376 * segment in this state.
2ff61f9d 377 */
8075bb0e 378 case TCPS_LISTEN: {
789d2a39 379 struct mbuf *am;
8075bb0e
BJ
380 register struct sockaddr_in *sin;
381
2ff61f9d
BJ
382 if (tiflags & TH_RST)
383 goto drop;
22856bb8 384 if (tiflags & TH_ACK)
8a13b737 385 goto dropwithreset;
22856bb8 386 if ((tiflags & TH_SYN) == 0)
2ff61f9d 387 goto drop;
224f3a72
MK
388 if (in_broadcast(ti->ti_dst))
389 goto drop;
789d2a39
SL
390 am = m_get(M_DONTWAIT, MT_SONAME);
391 if (am == NULL)
392 goto drop;
393 am->m_len = sizeof (struct sockaddr_in);
a8d3bf7f 394 sin = mtod(am, struct sockaddr_in *);
8075bb0e
BJ
395 sin->sin_family = AF_INET;
396 sin->sin_addr = ti->ti_src;
397 sin->sin_port = ti->ti_sport;
ebcadd38 398 laddr = inp->inp_laddr;
789d2a39 399 if (inp->inp_laddr.s_addr == INADDR_ANY)
ebcadd38 400 inp->inp_laddr = ti->ti_dst;
a8d3bf7f 401 if (in_pcbconnect(inp, am)) {
ebcadd38 402 inp->inp_laddr = laddr;
5a1f132a 403 (void) m_free(am);
4aed14e3 404 goto drop;
ebcadd38 405 }
5a1f132a 406 (void) m_free(am);
4aed14e3
BJ
407 tp->t_template = tcp_template(tp);
408 if (tp->t_template == 0) {
8011f5df 409 tp = tcp_drop(tp, ENOBUFS);
a4f7ea71 410 dropsocket = 0; /* socket is already gone */
4aed14e3
BJ
411 goto drop;
412 }
99578149
MK
413 if (om) {
414 tcp_dooptions(tp, om, ti);
415 om = 0;
416 }
96c50630
MK
417 if (iss)
418 tp->iss = iss;
419 else
420 tp->iss = tcp_iss;
421 tcp_iss += TCP_ISSINCR/2;
2ff61f9d 422 tp->irs = ti->ti_seq;
8a13b737
BJ
423 tcp_sendseqinit(tp);
424 tcp_rcvseqinit(tp);
bbaaf0fd 425 tp->t_flags |= TF_ACKNOW;
2ff61f9d 426 tp->t_state = TCPS_SYN_RECEIVED;
8a36cf82 427 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
7aa16f99 428 dropsocket = 0; /* committed to socket */
96c50630 429 tcpstat.tcps_accepts++;
8a13b737 430 goto trimthenstep6;
8075bb0e 431 }
87e78f19 432
2ff61f9d
BJ
433 /*
434 * If the state is SYN_SENT:
435 * if seg contains an ACK, but not for our SYN, drop the input.
436 * if seg contains a RST, then drop the connection.
437 * if seg does not contain SYN, then drop it.
438 * Otherwise this is an acceptable SYN segment
439 * initialize tp->rcv_nxt and tp->irs
440 * if seg contains ack then advance tp->snd_una
441 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
442 * arrange for segment to be acked (eventually)
443 * continue processing rest of data/controls, beginning with URG
444 */
445 case TCPS_SYN_SENT:
446 if ((tiflags & TH_ACK) &&
a17510f3 447 (SEQ_LEQ(ti->ti_ack, tp->iss) ||
4b6b94ca 448 SEQ_GT(ti->ti_ack, tp->snd_max)))
8a13b737 449 goto dropwithreset;
2ff61f9d 450 if (tiflags & TH_RST) {
0e3936fa
SL
451 if (tiflags & TH_ACK)
452 tp = tcp_drop(tp, ECONNREFUSED);
2ff61f9d 453 goto drop;
87e78f19 454 }
2ff61f9d
BJ
455 if ((tiflags & TH_SYN) == 0)
456 goto drop;
b57e9490
MK
457 if (tiflags & TH_ACK) {
458 tp->snd_una = ti->ti_ack;
459 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
460 tp->snd_nxt = tp->snd_una;
461 }
4aed14e3 462 tp->t_timer[TCPT_REXMT] = 0;
2ff61f9d 463 tp->irs = ti->ti_seq;
8a13b737
BJ
464 tcp_rcvseqinit(tp);
465 tp->t_flags |= TF_ACKNOW;
b57e9490 466 if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
96c50630 467 tcpstat.tcps_connects++;
4aed14e3 468 soisconnected(so);
2ff61f9d 469 tp->t_state = TCPS_ESTABLISHED;
99578149 470 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
405c9168 471 (void) tcp_reass(tp, (struct tcpiphdr *)0);
386369f5
MK
472 /*
473 * if we didn't have to retransmit the SYN,
474 * use its rtt as our initial srtt & rtt var.
475 */
476 if (tp->t_rtt) {
477 tp->t_srtt = tp->t_rtt << 3;
478 tp->t_rttvar = tp->t_rtt << 1;
479 TCPT_RANGESET(tp->t_rxtcur,
480 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
481 TCPTV_MIN, TCPTV_REXMTMAX);
482 tp->t_rtt = 0;
483 }
405c9168 484 } else
8a13b737 485 tp->t_state = TCPS_SYN_RECEIVED;
8a13b737
BJ
486
487trimthenstep6:
488 /*
4b6b94ca 489 * Advance ti->ti_seq to correspond to first data byte.
8a13b737
BJ
490 * If data, trim to stay within window,
491 * dropping FIN if necessary.
492 */
4b6b94ca 493 ti->ti_seq++;
8a13b737
BJ
494 if (ti->ti_len > tp->rcv_wnd) {
495 todrop = ti->ti_len - tp->rcv_wnd;
9d866d2f 496#if BSD>=43
8a13b737 497 m_adj(m, -todrop);
9d866d2f
MK
498#else
499 /* XXX work around 4.2 m_adj bug */
500 if (m->m_len) {
501 m_adj(m, -todrop);
502 } else {
503 /* skip tcp/ip header in first mbuf */
504 m_adj(m->m_next, -todrop);
505 }
506#endif
8a13b737 507 ti->ti_len = tp->rcv_wnd;
bbaaf0fd 508 tiflags &= ~TH_FIN;
96c50630
MK
509 tcpstat.tcps_rcvpackafterwin++;
510 tcpstat.tcps_rcvbyteafterwin += todrop;
87e78f19 511 }
e832edbc 512 tp->snd_wl1 = ti->ti_seq - 1;
bbaaf0fd 513 tp->rcv_up = ti->ti_seq;
8a13b737 514 goto step6;
2ff61f9d 515 }
87e78f19 516
2ff61f9d
BJ
517 /*
518 * States other than LISTEN or SYN_SENT.
519 * First check that at least some bytes of segment are within
96c50630
MK
520 * receive window. If segment begins before rcv_nxt,
521 * drop leading data (and SYN); if nothing left, just ack.
2ff61f9d 522 */
96c50630
MK
523 todrop = tp->rcv_nxt - ti->ti_seq;
524 if (todrop > 0) {
525 if (tiflags & TH_SYN) {
526 tiflags &= ~TH_SYN;
527 ti->ti_seq++;
528 if (ti->ti_urp > 1)
529 ti->ti_urp--;
530 else
531 tiflags &= ~TH_URG;
532 todrop--;
533 }
534 if (todrop > ti->ti_len ||
535 todrop == ti->ti_len && (tiflags&TH_FIN) == 0) {
8a36cf82
MK
536 tcpstat.tcps_rcvduppack++;
537 tcpstat.tcps_rcvdupbyte += ti->ti_len;
39b02f3c 538 /*
8a36cf82
MK
539 * If segment is just one to the left of the window,
540 * check two special cases:
541 * 1. Don't toss RST in response to 4.2-style keepalive.
542 * 2. If the only thing to drop is a FIN, we can drop
543 * it, but check the ACK or we will get into FIN
544 * wars if our FINs crossed (both CLOSING).
545 * In either case, send ACK to resynchronize,
546 * but keep on processing for RST or ACK.
39b02f3c 547 */
8a36cf82
MK
548 if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
549#ifdef TCP_COMPAT_42
550 || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
39b02f3c 551#endif
8a36cf82
MK
552 ) {
553 todrop = ti->ti_len;
554 tiflags &= ~TH_FIN;
555 tp->t_flags |= TF_ACKNOW;
556 } else
557 goto dropafterack;
a6bbda13
MK
558 } else {
559 tcpstat.tcps_rcvpartduppack++;
560 tcpstat.tcps_rcvpartdupbyte += todrop;
96c50630 561 }
96c50630
MK
562 m_adj(m, todrop);
563 ti->ti_seq += todrop;
564 ti->ti_len -= todrop;
565 if (ti->ti_urp > todrop)
566 ti->ti_urp -= todrop;
567 else {
568 tiflags &= ~TH_URG;
569 ti->ti_urp = 0;
570 }
571 }
572
b819e9ea 573 /*
8a36cf82 574 * If new data are received on a connection after the
b819e9ea
MK
575 * user processes are gone, then RST the other end.
576 */
577 if ((so->so_state & SS_NOFDREF) &&
578 tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
579 tp = tcp_close(tp);
580 tcpstat.tcps_rcvafterclose++;
581 goto dropwithreset;
582 }
583
4f182c3f
MK
584 /*
585 * If segment ends after window, drop trailing data
586 * (and PUSH and FIN); if nothing left, just ACK.
587 */
588 todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
589 if (todrop > 0) {
590 tcpstat.tcps_rcvpackafterwin++;
591 if (todrop >= ti->ti_len) {
96c50630 592 tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
4f182c3f
MK
593 /*
594 * If a new connection request is received
595 * while in TIME_WAIT, drop the old connection
596 * and start over if the sequence numbers
597 * are above the previous ones.
598 */
599 if (tiflags & TH_SYN &&
600 tp->t_state == TCPS_TIME_WAIT &&
601 SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
602 iss = tp->rcv_nxt + TCP_ISSINCR;
603 (void) tcp_close(tp);
604 goto findpcb;
96c50630 605 }
4f182c3f
MK
606 /*
607 * If window is closed can only take segments at
608 * window edge, and have to drop data and PUSH from
609 * incoming segments. Continue processing, but
610 * remember to ack. Otherwise, drop segment
611 * and ack.
612 */
613 if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
614 tp->t_flags |= TF_ACKNOW;
615 tcpstat.tcps_rcvwinprobe++;
616 } else
2ff61f9d 617 goto dropafterack;
4f182c3f 618 } else
96c50630 619 tcpstat.tcps_rcvbyteafterwin += todrop;
9d866d2f 620#if BSD>=43
4f182c3f 621 m_adj(m, -todrop);
9d866d2f
MK
622#else
623 /* XXX work around m_adj bug */
624 if (m->m_len) {
625 m_adj(m, -todrop);
626 } else {
627 /* skip tcp/ip header in first mbuf */
628 m_adj(m->m_next, -todrop);
629 }
630#endif
4f182c3f
MK
631 ti->ti_len -= todrop;
632 tiflags &= ~(TH_PUSH|TH_FIN);
87e78f19 633 }
87e78f19 634
87e78f19 635 /*
2ff61f9d
BJ
636 * If the RST bit is set examine the state:
637 * SYN_RECEIVED STATE:
638 * If passive open, return to LISTEN state.
639 * If active open, inform user that connection was refused.
640 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
641 * Inform user that connection was reset, and close tcb.
642 * CLOSING, LAST_ACK, TIME_WAIT STATES
643 * Close the tcb.
87e78f19 644 */
2ff61f9d 645 if (tiflags&TH_RST) switch (tp->t_state) {
4b935108 646
2ff61f9d 647 case TCPS_SYN_RECEIVED:
8a36cf82
MK
648 so->so_error = ECONNREFUSED;
649 goto close;
2ff61f9d
BJ
650
651 case TCPS_ESTABLISHED:
652 case TCPS_FIN_WAIT_1:
653 case TCPS_FIN_WAIT_2:
654 case TCPS_CLOSE_WAIT:
8a36cf82
MK
655 so->so_error = ECONNRESET;
656 close:
657 tp->t_state = TCPS_CLOSED;
658 tcpstat.tcps_drops++;
659 tp = tcp_close(tp);
2ff61f9d
BJ
660 goto drop;
661
662 case TCPS_CLOSING:
663 case TCPS_LAST_ACK:
664 case TCPS_TIME_WAIT:
0e3936fa 665 tp = tcp_close(tp);
2ff61f9d 666 goto drop;
87e78f19 667 }
87e78f19
BJ
668
669 /*
2ff61f9d
BJ
670 * If a SYN is in the window, then this is an
671 * error and we send an RST and drop the connection.
672 */
673 if (tiflags & TH_SYN) {
0e3936fa 674 tp = tcp_drop(tp, ECONNRESET);
8a13b737 675 goto dropwithreset;
2ff61f9d
BJ
676 }
677
678 /*
679 * If the ACK bit is off we drop the segment and return.
680 */
8a13b737 681 if ((tiflags & TH_ACK) == 0)
2ff61f9d
BJ
682 goto drop;
683
684 /*
685 * Ack processing.
87e78f19 686 */
87e78f19
BJ
687 switch (tp->t_state) {
688
2ff61f9d
BJ
689 /*
690 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
4859921b 691 * ESTABLISHED state and continue processing, otherwise
2ff61f9d
BJ
692 * send an RST.
693 */
694 case TCPS_SYN_RECEIVED:
8a13b737 695 if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
4b6b94ca 696 SEQ_GT(ti->ti_ack, tp->snd_max))
8a13b737 697 goto dropwithreset;
96c50630 698 tcpstat.tcps_connects++;
8a13b737
BJ
699 soisconnected(so);
700 tp->t_state = TCPS_ESTABLISHED;
99578149 701 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
405c9168 702 (void) tcp_reass(tp, (struct tcpiphdr *)0);
4aed14e3 703 tp->snd_wl1 = ti->ti_seq - 1;
8a13b737 704 /* fall into ... */
87e78f19 705
2ff61f9d
BJ
706 /*
707 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
708 * ACKs. If the ack is in the range
4b6b94ca 709 * tp->snd_una < ti->ti_ack <= tp->snd_max
2ff61f9d
BJ
710 * then advance tp->snd_una to ti->ti_ack and drop
711 * data from the retransmission queue. If this ACK reflects
712 * more up to date window information we update our window information.
713 */
714 case TCPS_ESTABLISHED:
715 case TCPS_FIN_WAIT_1:
716 case TCPS_FIN_WAIT_2:
717 case TCPS_CLOSE_WAIT:
718 case TCPS_CLOSING:
4aed14e3
BJ
719 case TCPS_LAST_ACK:
720 case TCPS_TIME_WAIT:
8a13b737 721
96c50630 722 if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
386369f5 723 if (ti->ti_len == 0 && ti->ti_win == tp->snd_wnd) {
96c50630 724 tcpstat.tcps_rcvdupack++;
386369f5
MK
725 /*
726 * If we have outstanding data (not a
727 * window probe), this is a completely
728 * duplicate ack (ie, window info didn't
729 * change), the ack is the biggest we've
730 * seen and we've seen exactly our rexmt
731 * threshhold of them, assume a packet
732 * has been dropped and retransmit it.
733 * Kludge snd_nxt & the congestion
734 * window so we send only this one
3c317835
MK
735 * packet. If this packet fills the
736 * only hole in the receiver's seq.
737 * space, the next real ack will fully
738 * open our window. This means we
739 * have to do the usual slow-start to
740 * not overwhelm an intermediate gateway
741 * with a burst of packets. Leave
742 * here with the congestion window set
743 * to allow 2 packets on the next real
744 * ack and the exp-to-linear thresh
745 * set for half the current window
746 * size (since we know we're losing at
747 * the current window size).
386369f5
MK
748 */
749 if (tp->t_timer[TCPT_REXMT] == 0 ||
750 ti->ti_ack != tp->snd_una)
751 tp->t_dupacks = 0;
752 else if (++tp->t_dupacks == tcprexmtthresh) {
753 tcp_seq onxt = tp->snd_nxt;
3c317835
MK
754 u_int win =
755 MIN(tp->snd_wnd, tp->snd_cwnd) / 2 /
756 tp->t_maxseg;
757
758 if (win < 2)
759 win = 2;
760 tp->snd_ssthresh = win * tp->t_maxseg;
386369f5
MK
761
762 tp->t_timer[TCPT_REXMT] = 0;
763 tp->t_rtt = 0;
764 tp->snd_nxt = ti->ti_ack;
765 tp->snd_cwnd = tp->t_maxseg;
766 (void) tcp_output(tp);
767
386369f5
MK
768 if (SEQ_GT(onxt, tp->snd_nxt))
769 tp->snd_nxt = onxt;
770 goto drop;
771 }
772 } else
773 tp->t_dupacks = 0;
2ff61f9d 774 break;
96c50630 775 }
386369f5 776 tp->t_dupacks = 0;
96c50630
MK
777 if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
778 tcpstat.tcps_rcvacktoomuch++;
2ff61f9d 779 goto dropafterack;
96c50630 780 }
8a13b737 781 acked = ti->ti_ack - tp->snd_una;
96c50630
MK
782 tcpstat.tcps_rcvackpack++;
783 tcpstat.tcps_rcvackbyte += acked;
dd020fc8
BJ
784
785 /*
786 * If transmit timer is running and timed sequence
787 * number was acked, update smoothed round trip time.
a6bbda13
MK
788 * Since we now have an rtt measurement, cancel the
789 * timer backoff (cf., Phil Karn's retransmit alg.).
790 * Recompute the initial retransmit timer.
dd020fc8
BJ
791 */
792 if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) {
96c50630 793 tcpstat.tcps_rttupdated++;
7cc62c26
MK
794 if (tp->t_srtt != 0) {
795 register short delta;
796
797 /*
798 * srtt is stored as fixed point with 3 bits
799 * after the binary point (i.e., scaled by 8).
800 * The following magic is equivalent
801 * to the smoothing algorithm in rfc793
802 * with an alpha of .875
803 * (srtt = rtt/8 + srtt*7/8 in fixed point).
386369f5 804 * Adjust t_rtt to origin 0.
7cc62c26 805 */
8a36cf82 806 delta = tp->t_rtt - 1 - (tp->t_srtt >> 3);
7cc62c26
MK
807 if ((tp->t_srtt += delta) <= 0)
808 tp->t_srtt = 1;
809 /*
a6bbda13
MK
810 * We accumulate a smoothed rtt variance
811 * (actually, a smoothed mean difference),
7cc62c26
MK
812 * then set the retransmit timer to smoothed
813 * rtt + 2 times the smoothed variance.
386369f5 814 * rttvar is stored as fixed point
7cc62c26
MK
815 * with 2 bits after the binary point
816 * (scaled by 4). The following is equivalent
817 * to rfc793 smoothing with an alpha of .75
818 * (rttvar = rttvar*3/4 + |delta| / 4).
819 * This replaces rfc793's wired-in beta.
820 */
821 if (delta < 0)
822 delta = -delta;
823 delta -= (tp->t_rttvar >> 2);
824 if ((tp->t_rttvar += delta) <= 0)
825 tp->t_rttvar = 1;
826 } else {
827 /*
828 * No rtt measurement yet - use the
829 * unsmoothed rtt. Set the variance
830 * to half the rtt (so our first
831 * retransmit happens at 2*rtt)
832 */
833 tp->t_srtt = tp->t_rtt << 3;
834 tp->t_rttvar = tp->t_rtt << 1;
835 }
dd020fc8 836 tp->t_rtt = 0;
a6bbda13
MK
837 tp->t_rxtshift = 0;
838 TCPT_RANGESET(tp->t_rxtcur,
839 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
840 TCPTV_MIN, TCPTV_REXMTMAX);
dd020fc8
BJ
841 }
842
91039e49
MK
843 /*
844 * If all outstanding data is acked, stop retransmit
845 * timer and remember to restart (more output or persist).
846 * If there is more data to be acked, restart retransmit
a6bbda13 847 * timer, using current (possibly backed-off) value.
91039e49
MK
848 */
849 if (ti->ti_ack == tp->snd_max) {
4aed14e3 850 tp->t_timer[TCPT_REXMT] = 0;
91039e49 851 needoutput = 1;
a6bbda13
MK
852 } else if (tp->t_timer[TCPT_PERSIST] == 0)
853 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1e9621b8 854 /*
386369f5
MK
855 * When new data is acked, open the congestion window.
856 * If the window gives us less than ssthresh packets
857 * in flight, open exponentially (maxseg per packet).
858 * Otherwise open linearly (maxseg per window,
859 * or maxseg^2 / cwnd per packet).
1e9621b8 860 */
386369f5
MK
861 {
862 u_int incr = tp->t_maxseg;
863
864 if (tp->snd_cwnd > tp->snd_ssthresh)
865 incr = MAX(incr * incr / tp->snd_cwnd, 1);
866
5d1f4090 867 tp->snd_cwnd = MIN(tp->snd_cwnd + incr, IP_MAXPACKET); /* XXX */
386369f5 868 }
6703c41f 869 if (acked > so->so_snd.sb_cc) {
6703c41f 870 tp->snd_wnd -= so->so_snd.sb_cc;
8011f5df 871 sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
4859921b 872 ourfinisacked = 1;
6703c41f 873 } else {
668cc26d 874 sbdrop(&so->so_snd, acked);
6703c41f 875 tp->snd_wnd -= acked;
4859921b 876 ourfinisacked = 0;
6703c41f 877 }
5744ed2b 878 if ((so->so_snd.sb_flags & SB_WAIT) || so->so_snd.sb_sel)
22856bb8 879 sowwakeup(so);
4b6b94ca 880 tp->snd_una = ti->ti_ack;
b8977237
BJ
881 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
882 tp->snd_nxt = tp->snd_una;
405c9168 883
87e78f19
BJ
884 switch (tp->t_state) {
885
2ff61f9d
BJ
886 /*
887 * In FIN_WAIT_1 STATE in addition to the processing
888 * for the ESTABLISHED state if our FIN is now acknowledged
8a13b737 889 * then enter FIN_WAIT_2.
2ff61f9d
BJ
890 */
891 case TCPS_FIN_WAIT_1:
fdae4427
BJ
892 if (ourfinisacked) {
893 /*
894 * If we can't receive any more
895 * data, then closing user can proceed.
a17510f3
MK
896 * Starting the timer is contrary to the
897 * specification, but if we don't get a FIN
898 * we'll hang forever.
fdae4427 899 */
a17510f3 900 if (so->so_state & SS_CANTRCVMORE) {
fdae4427 901 soisdisconnected(so);
8a36cf82 902 tp->t_timer[TCPT_2MSL] = tcp_maxidle;
a17510f3 903 }
8a13b737 904 tp->t_state = TCPS_FIN_WAIT_2;
fdae4427 905 }
87e78f19
BJ
906 break;
907
2ff61f9d
BJ
908 /*
909 * In CLOSING STATE in addition to the processing for
910 * the ESTABLISHED state if the ACK acknowledges our FIN
911 * then enter the TIME-WAIT state, otherwise ignore
912 * the segment.
913 */
914 case TCPS_CLOSING:
4aed14e3 915 if (ourfinisacked) {
2ff61f9d 916 tp->t_state = TCPS_TIME_WAIT;
4aed14e3
BJ
917 tcp_canceltimers(tp);
918 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
919 soisdisconnected(so);
920 }
921 break;
87e78f19 922
2ff61f9d 923 /*
e20bac9c
MK
924 * In LAST_ACK, we may still be waiting for data to drain
925 * and/or to be acked, as well as for the ack of our FIN.
926 * If our FIN is now acknowledged, delete the TCB,
927 * enter the closed state and return.
2ff61f9d
BJ
928 */
929 case TCPS_LAST_ACK:
e20bac9c 930 if (ourfinisacked) {
0e3936fa 931 tp = tcp_close(tp);
e20bac9c
MK
932 goto drop;
933 }
934 break;
87e78f19 935
2ff61f9d
BJ
936 /*
937 * In TIME_WAIT state the only thing that should arrive
938 * is a retransmission of the remote FIN. Acknowledge
939 * it and restart the finack timer.
940 */
941 case TCPS_TIME_WAIT:
405c9168 942 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2ff61f9d 943 goto dropafterack;
87e78f19 944 }
8a13b737 945 }
87e78f19 946
2ff61f9d 947step6:
4aed14e3
BJ
948 /*
949 * Update window information.
bbaaf0fd 950 * Don't look at window if no ACK: TAC's send garbage on first SYN.
4aed14e3 951 */
bbaaf0fd
MK
952 if ((tiflags & TH_ACK) &&
953 (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
8e65fd66 954 (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
bbaaf0fd 955 tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd))) {
96c50630
MK
956 /* keep track of pure window updates */
957 if (ti->ti_len == 0 &&
386369f5 958 tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)
96c50630 959 tcpstat.tcps_rcvwinupd++;
4aed14e3
BJ
960 tp->snd_wnd = ti->ti_win;
961 tp->snd_wl1 = ti->ti_seq;
962 tp->snd_wl2 = ti->ti_ack;
18a438b6
MK
963 if (tp->snd_wnd > tp->max_sndwnd)
964 tp->max_sndwnd = tp->snd_wnd;
91039e49
MK
965 needoutput = 1;
966 }
4aed14e3 967
2ff61f9d 968 /*
b2db9217 969 * Process segments with URG.
2ff61f9d 970 */
9c811062
BJ
971 if ((tiflags & TH_URG) && ti->ti_urp &&
972 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
f4be5024 973 /*
bbaaf0fd 974 * This is a kludge, but if we receive and accept
a5d9c993
SL
975 * random urgent pointers, we'll crash in
976 * soreceive. It's hard to imagine someone
977 * actually wanting to send this much urgent data.
f4be5024 978 */
2f4f574f 979 if (ti->ti_urp + so->so_rcv.sb_cc > SB_MAX) {
f4be5024
SL
980 ti->ti_urp = 0; /* XXX */
981 tiflags &= ~TH_URG; /* XXX */
bbaaf0fd 982 goto dodata; /* XXX */
f4be5024 983 }
b2db9217
BJ
984 /*
985 * If this segment advances the known urgent pointer,
986 * then mark the data stream. This should not happen
987 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
988 * a FIN has been received from the remote side.
989 * In these states we ignore the URG.
ae6760c5
MK
990 *
991 * According to RFC961 (Assigned Protocols),
992 * the urgent pointer points to the last octet
993 * of urgent data. We continue, however,
994 * to consider it to indicate the first octet
995 * of data past the urgent section
996 * as the original spec states.
b2db9217
BJ
997 */
998 if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
999 tp->rcv_up = ti->ti_seq + ti->ti_urp;
1000 so->so_oobmark = so->so_rcv.sb_cc +
1001 (tp->rcv_up - tp->rcv_nxt) - 1;
1002 if (so->so_oobmark == 0)
1003 so->so_state |= SS_RCVATMARK;
77a4e3ca 1004 sohasoutofband(so);
a17510f3 1005 tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
b2db9217
BJ
1006 }
1007 /*
1008 * Remove out of band data so doesn't get presented to user.
1009 * This can happen independent of advancing the URG pointer,
1010 * but if two URG's are pending at once, some out-of-band
1011 * data may creep in... ick.
1012 */
9d866d2f
MK
1013 if (ti->ti_urp <= ti->ti_len
1014#ifdef SO_OOBINLINE
1015 && (so->so_options & SO_OOBINLINE) == 0
1016#endif
1017 )
b2db9217 1018 tcp_pulloutofband(so, ti);
bbaaf0fd
MK
1019 } else
1020 /*
1021 * If no out of band data is expected,
1022 * pull receive urgent pointer along
1023 * with the receive window.
1024 */
1025 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1026 tp->rcv_up = tp->rcv_nxt;
1027dodata: /* XXX */
87e78f19
BJ
1028
1029 /*
2ff61f9d
BJ
1030 * Process the segment text, merging it into the TCP sequencing queue,
1031 * and arranging for acknowledgment of receipt if necessary.
1032 * This process logically involves adjusting tp->rcv_wnd as data
1033 * is presented to the user (this happens in tcp_usrreq.c,
1034 * case PRU_RCVD). If a FIN has already been received on this
1035 * connection then we just ignore the text.
87e78f19 1036 */
7984a662
MK
1037 if ((ti->ti_len || (tiflags&TH_FIN)) &&
1038 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
a17510f3 1039 TCP_REASS(tp, ti, m, so, tiflags);
18a438b6
MK
1040 /*
1041 * Note the amount of data that peer has sent into
1042 * our window, in order to estimate the sender's
1043 * buffer size.
1044 */
386369f5 1045 len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
18a438b6
MK
1046 if (len > tp->max_rcvd)
1047 tp->max_rcvd = len;
4aed14e3 1048 } else {
2b4b57cd 1049 m_freem(m);
e832edbc 1050 tiflags &= ~TH_FIN;
4aed14e3 1051 }
87e78f19
BJ
1052
1053 /*
e832edbc
BJ
1054 * If FIN is received ACK the FIN and let the user know
1055 * that the connection is closing.
87e78f19 1056 */
e832edbc 1057 if (tiflags & TH_FIN) {
4aed14e3
BJ
1058 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1059 socantrcvmore(so);
1060 tp->t_flags |= TF_ACKNOW;
1061 tp->rcv_nxt++;
1062 }
2ff61f9d 1063 switch (tp->t_state) {
87e78f19 1064
2ff61f9d
BJ
1065 /*
1066 * In SYN_RECEIVED and ESTABLISHED STATES
1067 * enter the CLOSE_WAIT state.
53a5409e 1068 */
2ff61f9d
BJ
1069 case TCPS_SYN_RECEIVED:
1070 case TCPS_ESTABLISHED:
1071 tp->t_state = TCPS_CLOSE_WAIT;
1072 break;
53a5409e 1073
2ff61f9d 1074 /*
8a13b737
BJ
1075 * If still in FIN_WAIT_1 STATE FIN has not been acked so
1076 * enter the CLOSING state.
53a5409e 1077 */
2ff61f9d 1078 case TCPS_FIN_WAIT_1:
8a13b737 1079 tp->t_state = TCPS_CLOSING;
2ff61f9d 1080 break;
87e78f19 1081
2ff61f9d
BJ
1082 /*
1083 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1084 * starting the time-wait timer, turning off the other
1085 * standard timers.
1086 */
1087 case TCPS_FIN_WAIT_2:
4aed14e3 1088 tp->t_state = TCPS_TIME_WAIT;
a6503abf 1089 tcp_canceltimers(tp);
405c9168 1090 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
4aed14e3 1091 soisdisconnected(so);
2ff61f9d
BJ
1092 break;
1093
53a5409e 1094 /*
2ff61f9d 1095 * In TIME_WAIT state restart the 2 MSL time_wait timer.
53a5409e 1096 */
2ff61f9d 1097 case TCPS_TIME_WAIT:
405c9168 1098 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
2ff61f9d 1099 break;
8a13b737 1100 }
87e78f19 1101 }
4b935108
BJ
1102 if (so->so_options & SO_DEBUG)
1103 tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
8a13b737
BJ
1104
1105 /*
1106 * Return any desired output.
1107 */
91039e49 1108 if (needoutput || (tp->t_flags & TF_ACKNOW))
bbaaf0fd 1109 (void) tcp_output(tp);
2ff61f9d 1110 return;
8a13b737 1111
2ff61f9d 1112dropafterack:
8a13b737 1113 /*
1e977657
BJ
1114 * Generate an ACK dropping incoming segment if it occupies
1115 * sequence space, where the ACK reflects our state.
8a13b737 1116 */
ad616704 1117 if (tiflags & TH_RST)
8a13b737 1118 goto drop;
5722bd39 1119 m_freem(m);
4859921b
MK
1120 tp->t_flags |= TF_ACKNOW;
1121 (void) tcp_output(tp);
4b6b94ca 1122 return;
8a13b737
BJ
1123
1124dropwithreset:
f37c1c84 1125 if (om) {
668cc26d 1126 (void) m_free(om);
f37c1c84
SL
1127 om = 0;
1128 }
8a13b737 1129 /*
4aed14e3 1130 * Generate a RST, dropping incoming segment.
8a13b737 1131 * Make ACK acceptable to originator of segment.
224f3a72 1132 * Don't bother to respond if destination was broadcast.
8a13b737 1133 */
224f3a72 1134 if ((tiflags & TH_RST) || in_broadcast(ti->ti_dst))
8a13b737
BJ
1135 goto drop;
1136 if (tiflags & TH_ACK)
8e65fd66 1137 tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST);
8a13b737
BJ
1138 else {
1139 if (tiflags & TH_SYN)
1140 ti->ti_len++;
1e977657
BJ
1141 tcp_respond(tp, ti, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1142 TH_RST|TH_ACK);
8a13b737 1143 }
7aa16f99
SL
1144 /* destroy temporarily created socket */
1145 if (dropsocket)
1146 (void) soabort(so);
4b6b94ca 1147 return;
8a13b737 1148
2ff61f9d 1149drop:
01b1394e
SL
1150 if (om)
1151 (void) m_free(om);
8a13b737
BJ
1152 /*
1153 * Drop space held by incoming segment and return.
1154 */
f3cdd721
BJ
1155 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
1156 tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
2ff61f9d 1157 m_freem(m);
7aa16f99
SL
1158 /* destroy temporarily created socket */
1159 if (dropsocket)
1160 (void) soabort(so);
4b935108 1161 return;
2ff61f9d
BJ
1162}
1163
99578149 1164tcp_dooptions(tp, om, ti)
8b5a83bb
BJ
1165 struct tcpcb *tp;
1166 struct mbuf *om;
99578149 1167 struct tcpiphdr *ti;
5e74df82 1168{
8b5a83bb
BJ
1169 register u_char *cp;
1170 int opt, optlen, cnt;
1171
1172 cp = mtod(om, u_char *);
1173 cnt = om->m_len;
1174 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1175 opt = cp[0];
1176 if (opt == TCPOPT_EOL)
1177 break;
1178 if (opt == TCPOPT_NOP)
1179 optlen = 1;
357b20fc 1180 else {
8b5a83bb 1181 optlen = cp[1];
357b20fc
SL
1182 if (optlen <= 0)
1183 break;
1184 }
8b5a83bb
BJ
1185 switch (opt) {
1186
1187 default:
1188 break;
1189
1190 case TCPOPT_MAXSEG:
1191 if (optlen != 4)
1192 continue;
99578149
MK
1193 if (!(ti->ti_flags & TH_SYN))
1194 continue;
8b5a83bb 1195 tp->t_maxseg = *(u_short *)(cp + 2);
668cc26d 1196 tp->t_maxseg = ntohs((u_short)tp->t_maxseg);
99578149 1197 tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp));
8b5a83bb 1198 break;
8b5a83bb 1199 }
5e74df82 1200 }
668cc26d 1201 (void) m_free(om);
5e74df82
BJ
1202}
1203
b2db9217
BJ
1204/*
1205 * Pull out of band byte out of a segment so
1206 * it doesn't appear in the user's data queue.
1207 * It is still reflected in the segment length for
1208 * sequencing purposes.
1209 */
1210tcp_pulloutofband(so, ti)
1211 struct socket *so;
1212 struct tcpiphdr *ti;
1213{
1214 register struct mbuf *m;
1acff8ec 1215 int cnt = ti->ti_urp - 1;
b2db9217
BJ
1216
1217 m = dtom(ti);
1218 while (cnt >= 0) {
1219 if (m->m_len > cnt) {
1220 char *cp = mtod(m, caddr_t) + cnt;
1221 struct tcpcb *tp = sototcpcb(so);
1222
1223 tp->t_iobc = *cp;
1224 tp->t_oobflags |= TCPOOB_HAVEDATA;
668cc26d 1225 bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
b2db9217
BJ
1226 m->m_len--;
1227 return;
1228 }
1229 cnt -= m->m_len;
1230 m = m->m_next;
1231 if (m == 0)
1232 break;
1233 }
1234 panic("tcp_pulloutofband");
1235}
1236
99578149
MK
1237/*
1238 * Determine a reasonable value for maxseg size.
1239 * If the route is known, use one that can be handled
1240 * on the given interface without forcing IP to fragment.
7cc62c26
MK
1241 * If bigger than an mbuf cluster (MCLBYTES), round down to nearest size
1242 * to utilize large mbufs.
99578149 1243 * If interface pointer is unavailable, or the destination isn't local,
c2a1cd2c
MK
1244 * use a conservative size (512 or the default IP max size, but no more
1245 * than the mtu of the interface through which we route),
99578149 1246 * as we can't discover anything about intervening gateways or networks.
a6bbda13
MK
1247 * We also initialize the congestion/slow start window to be a single
1248 * segment if the destination isn't local; this information should
1249 * probably all be saved with the routing entry at the transport level.
99578149
MK
1250 *
1251 * This is ugly, and doesn't belong at this level, but has to happen somehow.
1252 */
1253tcp_mss(tp)
c2a1cd2c 1254 register struct tcpcb *tp;
99578149
MK
1255{
1256 struct route *ro;
1257 struct ifnet *ifp;
1258 int mss;
1259 struct inpcb *inp;
1260
1261 inp = tp->t_inpcb;
1262 ro = &inp->inp_route;
1263 if ((ro->ro_rt == (struct rtentry *)0) ||
1264 (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) {
1265 /* No route yet, so try to acquire one */
1266 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1267 ro->ro_dst.sa_family = AF_INET;
1268 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
1269 inp->inp_faddr;
1270 rtalloc(ro);
1271 }
1272 if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0)
0d115dcc 1273 return (TCP_MSS);
99578149
MK
1274 }
1275
1276 mss = ifp->if_mtu - sizeof(struct tcpiphdr);
7cc62c26
MK
1277#if (MCLBYTES & (MCLBYTES - 1)) == 0
1278 if (mss > MCLBYTES)
1279 mss &= ~(MCLBYTES-1);
99578149 1280#else
7cc62c26
MK
1281 if (mss > MCLBYTES)
1282 mss = mss / MCLBYTES * MCLBYTES;
99578149 1283#endif
c2a1cd2c
MK
1284 if (in_localaddr(inp->inp_faddr))
1285 return (mss);
386369f5 1286
a6bbda13
MK
1287 mss = MIN(mss, TCP_MSS);
1288 tp->snd_cwnd = mss;
1289 return (mss);
99578149 1290}
9d866d2f
MK
1291
1292#if BSD<43
1293/* XXX this belongs in netinet/in.c */
1294in_localaddr(in)
1295 struct in_addr in;
1296{
1297 register u_long i = ntohl(in.s_addr);
1298 register struct ifnet *ifp;
1299 register struct sockaddr_in *sin;
1300 register u_long mask;
1301
1302 if (IN_CLASSA(i))
1303 mask = IN_CLASSA_NET;
1304 else if (IN_CLASSB(i))
1305 mask = IN_CLASSB_NET;
1306 else if (IN_CLASSC(i))
1307 mask = IN_CLASSC_NET;
1308 else
1309 return (0);
1310
1311 i &= mask;
1312 for (ifp = ifnet; ifp; ifp = ifp->if_next) {
1313 if (ifp->if_addr.sa_family != AF_INET)
1314 continue;
1315 sin = (struct sockaddr_in *)&ifp->if_addr;
1316 if ((sin->sin_addr.s_addr & mask) == i)
1317 return (1);
1318 }
1319 return (0);
1320}
1321#endif