Commit | Line | Data |
---|---|---|
8ae0e4b4 | 1 | /* |
e7a3707f KB |
2 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
8ae0e4b4 | 4 | * |
dbf0c423 | 5 | * %sccs.include.redist.c% |
2b6b6284 | 6 | * |
6cbc4e69 | 7 | * @(#)tcp_input.c 8.2 (Berkeley) %G% |
8ae0e4b4 | 8 | */ |
87e78f19 | 9 | |
55ad7758 | 10 | #ifndef TUBA_INCLUDE |
5548a02f KB |
11 | #include <sys/param.h> |
12 | #include <sys/systm.h> | |
13 | #include <sys/malloc.h> | |
14 | #include <sys/mbuf.h> | |
15 | #include <sys/protosw.h> | |
16 | #include <sys/socket.h> | |
17 | #include <sys/socketvar.h> | |
18 | #include <sys/errno.h> | |
f4d55810 | 19 | |
5548a02f KB |
20 | #include <net/if.h> |
21 | #include <net/route.h> | |
f4d55810 | 22 | |
5548a02f KB |
23 | #include <netinet/in.h> |
24 | #include <netinet/in_systm.h> | |
25 | #include <netinet/ip.h> | |
26 | #include <netinet/in_pcb.h> | |
27 | #include <netinet/ip_var.h> | |
28 | #include <netinet/tcp.h> | |
29 | #include <netinet/tcp_fsm.h> | |
30 | #include <netinet/tcp_seq.h> | |
31 | #include <netinet/tcp_timer.h> | |
32 | #include <netinet/tcp_var.h> | |
33 | #include <netinet/tcpip.h> | |
34 | #include <netinet/tcp_debug.h> | |
87e78f19 | 35 | |
386369f5 | 36 | int tcprexmtthresh = 3; |
6cf1965b MK |
37 | int tcppredack; /* XXX debugging: times hdr predict ok for acks */ |
38 | int tcppreddat; /* XXX # times header prediction ok for data packets */ | |
39 | int tcppcbcachemiss; | |
4b935108 | 40 | struct tcpiphdr tcp_saveti; |
6cf1965b | 41 | struct inpcb *tcp_last_inpcb = &tcb; |
87e78f19 | 42 | |
55ad7758 KS |
43 | extern u_long sb_max; |
44 | ||
45 | #endif /* TUBA_INCLUDE */ | |
69d96ae2 AC |
46 | #define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) |
47 | ||
48 | /* for modulo comparisons of timestamps */ | |
49 | #define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) | |
50 | #define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) | |
51 | ||
69d96ae2 | 52 | |
a17510f3 MK |
53 | /* |
54 | * Insert segment ti into reassembly queue of tcp with | |
55 | * control block tp. Return TH_FIN if reassembly now includes | |
56 | * a segment with FIN. The macro form does the common case inline | |
57 | * (segment is the next to be received on an established connection, | |
58 | * and the queue is empty), avoiding linkage into and removal | |
59 | * from the queue and repetition of various conversions. | |
9e4788e4 MK |
60 | * Set DELACK for segments received in order, but ack immediately |
61 | * when segments are out of order (so fast retransmit can work). | |
a17510f3 MK |
62 | */ |
63 | #define TCP_REASS(tp, ti, m, so, flags) { \ | |
64 | if ((ti)->ti_seq == (tp)->rcv_nxt && \ | |
65 | (tp)->seg_next == (struct tcpiphdr *)(tp) && \ | |
66 | (tp)->t_state == TCPS_ESTABLISHED) { \ | |
9e4788e4 | 67 | tp->t_flags |= TF_DELACK; \ |
a17510f3 MK |
68 | (tp)->rcv_nxt += (ti)->ti_len; \ |
69 | flags = (ti)->ti_flags & TH_FIN; \ | |
96c50630 MK |
70 | tcpstat.tcps_rcvpack++;\ |
71 | tcpstat.tcps_rcvbyte += (ti)->ti_len;\ | |
a17510f3 MK |
72 | sbappend(&(so)->so_rcv, (m)); \ |
73 | sorwakeup(so); \ | |
9e4788e4 | 74 | } else { \ |
6cf1965b | 75 | (flags) = tcp_reass((tp), (ti), (m)); \ |
9e4788e4 MK |
76 | tp->t_flags |= TF_ACKNOW; \ |
77 | } \ | |
a17510f3 | 78 | } |
55ad7758 | 79 | #ifndef TUBA_INCLUDE |
a17510f3 | 80 | |
c46785cb | 81 | int |
6cf1965b | 82 | tcp_reass(tp, ti, m) |
a17510f3 MK |
83 | register struct tcpcb *tp; |
84 | register struct tcpiphdr *ti; | |
6cf1965b | 85 | struct mbuf *m; |
a17510f3 MK |
86 | { |
87 | register struct tcpiphdr *q; | |
88 | struct socket *so = tp->t_inpcb->inp_socket; | |
a17510f3 MK |
89 | int flags; |
90 | ||
91 | /* | |
92 | * Call with ti==0 after become established to | |
93 | * force pre-ESTABLISHED data up to user socket. | |
94 | */ | |
95 | if (ti == 0) | |
96 | goto present; | |
97 | ||
98 | /* | |
99 | * Find a segment which begins after this one does. | |
100 | */ | |
101 | for (q = tp->seg_next; q != (struct tcpiphdr *)tp; | |
102 | q = (struct tcpiphdr *)q->ti_next) | |
103 | if (SEQ_GT(q->ti_seq, ti->ti_seq)) | |
104 | break; | |
105 | ||
106 | /* | |
107 | * If there is a preceding segment, it may provide some of | |
108 | * our data already. If so, drop the data from the incoming | |
109 | * segment. If it provides all of our data, drop us. | |
110 | */ | |
111 | if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { | |
112 | register int i; | |
113 | q = (struct tcpiphdr *)q->ti_prev; | |
114 | /* conversion to int (in i) handles seq wraparound */ | |
115 | i = q->ti_seq + q->ti_len - ti->ti_seq; | |
116 | if (i > 0) { | |
96c50630 MK |
117 | if (i >= ti->ti_len) { |
118 | tcpstat.tcps_rcvduppack++; | |
119 | tcpstat.tcps_rcvdupbyte += ti->ti_len; | |
6cf1965b MK |
120 | m_freem(m); |
121 | return (0); | |
96c50630 | 122 | } |
6cf1965b | 123 | m_adj(m, i); |
a17510f3 MK |
124 | ti->ti_len -= i; |
125 | ti->ti_seq += i; | |
126 | } | |
127 | q = (struct tcpiphdr *)(q->ti_next); | |
128 | } | |
96c50630 MK |
129 | tcpstat.tcps_rcvoopack++; |
130 | tcpstat.tcps_rcvoobyte += ti->ti_len; | |
6cf1965b | 131 | REASS_MBUF(ti) = m; /* XXX */ |
a17510f3 MK |
132 | |
133 | /* | |
134 | * While we overlap succeeding segments trim them or, | |
135 | * if they are completely covered, dequeue them. | |
136 | */ | |
137 | while (q != (struct tcpiphdr *)tp) { | |
138 | register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; | |
139 | if (i <= 0) | |
140 | break; | |
141 | if (i < q->ti_len) { | |
142 | q->ti_seq += i; | |
143 | q->ti_len -= i; | |
6cf1965b | 144 | m_adj(REASS_MBUF(q), i); |
a17510f3 MK |
145 | break; |
146 | } | |
147 | q = (struct tcpiphdr *)q->ti_next; | |
6cf1965b | 148 | m = REASS_MBUF((struct tcpiphdr *)q->ti_prev); |
a17510f3 MK |
149 | remque(q->ti_prev); |
150 | m_freem(m); | |
151 | } | |
152 | ||
153 | /* | |
154 | * Stick new segment in its place. | |
155 | */ | |
156 | insque(ti, q->ti_prev); | |
157 | ||
158 | present: | |
159 | /* | |
160 | * Present data to user, advancing rcv_nxt through | |
161 | * completed sequence space. | |
162 | */ | |
163 | if (TCPS_HAVERCVDSYN(tp->t_state) == 0) | |
164 | return (0); | |
165 | ti = tp->seg_next; | |
166 | if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) | |
167 | return (0); | |
168 | if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) | |
169 | return (0); | |
170 | do { | |
171 | tp->rcv_nxt += ti->ti_len; | |
172 | flags = ti->ti_flags & TH_FIN; | |
173 | remque(ti); | |
6cf1965b | 174 | m = REASS_MBUF(ti); |
a17510f3 MK |
175 | ti = (struct tcpiphdr *)ti->ti_next; |
176 | if (so->so_state & SS_CANTRCVMORE) | |
177 | m_freem(m); | |
178 | else | |
179 | sbappend(&so->so_rcv, m); | |
180 | } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); | |
181 | sorwakeup(so); | |
182 | return (flags); | |
a17510f3 MK |
183 | } |
184 | ||
2ff61f9d BJ |
185 | /* |
186 | * TCP input routine, follows pages 65-76 of the | |
187 | * protocol specification dated September, 1981 very closely. | |
188 | */ | |
c46785cb | 189 | void |
9d91b170 MK |
190 | tcp_input(m, iphlen) |
191 | register struct mbuf *m; | |
192 | int iphlen; | |
87e78f19 | 193 | { |
2b4b57cd | 194 | register struct tcpiphdr *ti; |
6cf1965b | 195 | register struct inpcb *inp; |
69d96ae2 AC |
196 | caddr_t optp = NULL; |
197 | int optlen; | |
2b4b57cd | 198 | int len, tlen, off; |
8e65fd66 | 199 | register struct tcpcb *tp = 0; |
2b4b57cd | 200 | register int tiflags; |
d52566dd | 201 | struct socket *so; |
4859921b | 202 | int todrop, acked, ourfinisacked, needoutput = 0; |
4b935108 | 203 | short ostate; |
ebcadd38 | 204 | struct in_addr laddr; |
7aa16f99 | 205 | int dropsocket = 0; |
96c50630 | 206 | int iss = 0; |
69d96ae2 AC |
207 | u_long tiwin, ts_val, ts_ecr; |
208 | int ts_present = 0; | |
87e78f19 | 209 | |
96c50630 | 210 | tcpstat.tcps_rcvtotal++; |
87e78f19 | 211 | /* |
4aed14e3 BJ |
212 | * Get IP and TCP header together in first mbuf. |
213 | * Note: IP leaves IP header in first mbuf. | |
87e78f19 | 214 | */ |
20790db4 | 215 | ti = mtod(m, struct tcpiphdr *); |
9d91b170 MK |
216 | if (iphlen > sizeof (struct ip)) |
217 | ip_stripoptions(m, (struct mbuf *)0); | |
6cf1965b | 218 | if (m->m_len < sizeof (struct tcpiphdr)) { |
6703c41f | 219 | if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { |
96c50630 | 220 | tcpstat.tcps_rcvshort++; |
6703c41f | 221 | return; |
8a13b737 BJ |
222 | } |
223 | ti = mtod(m, struct tcpiphdr *); | |
224 | } | |
87e78f19 | 225 | |
2b4b57cd | 226 | /* |
4aed14e3 | 227 | * Checksum extended TCP header and data. |
2b4b57cd BJ |
228 | */ |
229 | tlen = ((struct ip *)ti)->ip_len; | |
230 | len = sizeof (struct ip) + tlen; | |
9d91b170 MK |
231 | ti->ti_next = ti->ti_prev = 0; |
232 | ti->ti_x1 = 0; | |
233 | ti->ti_len = (u_short)tlen; | |
6cf1965b | 234 | HTONS(ti->ti_len); |
9d91b170 | 235 | if (ti->ti_sum = in_cksum(m, len)) { |
9d91b170 MK |
236 | tcpstat.tcps_rcvbadsum++; |
237 | goto drop; | |
87e78f19 | 238 | } |
55ad7758 | 239 | #endif /* TUBA_INCLUDE */ |
87e78f19 BJ |
240 | |
241 | /* | |
4aed14e3 | 242 | * Check that TCP offset makes sense, |
6cf1965b | 243 | * pull out TCP options and adjust length. XXX |
87e78f19 | 244 | */ |
2b4b57cd | 245 | off = ti->ti_off << 2; |
4b6b94ca | 246 | if (off < sizeof (struct tcphdr) || off > tlen) { |
96c50630 | 247 | tcpstat.tcps_rcvbadoff++; |
8a13b737 | 248 | goto drop; |
2b4b57cd | 249 | } |
1e977657 BJ |
250 | tlen -= off; |
251 | ti->ti_len = tlen; | |
8b5a83bb | 252 | if (off > sizeof (struct tcphdr)) { |
a17510f3 MK |
253 | if (m->m_len < sizeof(struct ip) + off) { |
254 | if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { | |
96c50630 | 255 | tcpstat.tcps_rcvshort++; |
a17510f3 MK |
256 | return; |
257 | } | |
258 | ti = mtod(m, struct tcpiphdr *); | |
8b5a83bb | 259 | } |
69d96ae2 AC |
260 | optlen = off - sizeof (struct tcphdr); |
261 | optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); | |
262 | /* | |
263 | * Do quick retrieval of timestamp options ("options | |
264 | * prediction?"). If timestamp is the only option and it's | |
265 | * formatted as recommended in RFC 1323 appendix A, we | |
266 | * quickly get the values now and not bother calling | |
267 | * tcp_dooptions(), etc. | |
268 | */ | |
269 | if ((optlen == TCPOLEN_TSTAMP_APPA || | |
270 | (optlen > TCPOLEN_TSTAMP_APPA && | |
271 | optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && | |
272 | *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) && | |
273 | (ti->ti_flags & TH_SYN) == 0) { | |
274 | ts_present = 1; | |
275 | ts_val = ntohl(*(u_long *)(optp + 4)); | |
276 | ts_ecr = ntohl(*(u_long *)(optp + 8)); | |
277 | optp = NULL; /* we've parsed the options */ | |
8b5a83bb BJ |
278 | } |
279 | } | |
2ff61f9d | 280 | tiflags = ti->ti_flags; |
2b4b57cd | 281 | |
8a13b737 | 282 | /* |
4aed14e3 | 283 | * Convert TCP protocol specific fields to host format. |
8a13b737 | 284 | */ |
6cf1965b MK |
285 | NTOHL(ti->ti_seq); |
286 | NTOHL(ti->ti_ack); | |
287 | NTOHS(ti->ti_win); | |
288 | NTOHS(ti->ti_urp); | |
8a13b737 | 289 | |
2b4b57cd | 290 | /* |
8075bb0e | 291 | * Locate pcb for segment. |
2b4b57cd | 292 | */ |
96c50630 | 293 | findpcb: |
6cf1965b MK |
294 | inp = tcp_last_inpcb; |
295 | if (inp->inp_lport != ti->ti_dport || | |
296 | inp->inp_fport != ti->ti_sport || | |
297 | inp->inp_faddr.s_addr != ti->ti_src.s_addr || | |
298 | inp->inp_laddr.s_addr != ti->ti_dst.s_addr) { | |
299 | inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport, | |
300 | ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD); | |
301 | if (inp) | |
302 | tcp_last_inpcb = inp; | |
303 | ++tcppcbcachemiss; | |
304 | } | |
2ff61f9d BJ |
305 | |
306 | /* | |
307 | * If the state is CLOSED (i.e., TCB does not exist) then | |
4aed14e3 | 308 | * all data in the incoming segment is discarded. |
386369f5 MK |
309 | * If the TCB exists but is in CLOSED state, it is embryonic, |
310 | * but should either do a listen or a connect soon. | |
2ff61f9d | 311 | */ |
22856bb8 | 312 | if (inp == 0) |
8a13b737 | 313 | goto dropwithreset; |
2ff61f9d | 314 | tp = intotcpcb(inp); |
22856bb8 | 315 | if (tp == 0) |
8a13b737 | 316 | goto dropwithreset; |
386369f5 MK |
317 | if (tp->t_state == TCPS_CLOSED) |
318 | goto drop; | |
69d96ae2 AC |
319 | |
320 | /* Unscale the window into a 32-bit value. */ | |
321 | if ((tiflags & TH_SYN) == 0) | |
322 | tiwin = ti->ti_win << tp->snd_scale; | |
323 | else | |
324 | tiwin = ti->ti_win; | |
325 | ||
f1b2fa5b | 326 | so = inp->inp_socket; |
6cf1965b MK |
327 | if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { |
328 | if (so->so_options & SO_DEBUG) { | |
329 | ostate = tp->t_state; | |
330 | tcp_saveti = *ti; | |
331 | } | |
332 | if (so->so_options & SO_ACCEPTCONN) { | |
333 | so = sonewconn(so, 0); | |
334 | if (so == 0) | |
335 | goto drop; | |
336 | /* | |
337 | * This is ugly, but .... | |
338 | * | |
339 | * Mark socket as temporary until we're | |
340 | * committed to keeping it. The code at | |
341 | * ``drop'' and ``dropwithreset'' check the | |
342 | * flag dropsocket to see if the temporary | |
343 | * socket created here should be discarded. | |
344 | * We mark the socket as discardable until | |
345 | * we're committed to it below in TCPS_LISTEN. | |
346 | */ | |
347 | dropsocket++; | |
348 | inp = (struct inpcb *)so->so_pcb; | |
349 | inp->inp_laddr = ti->ti_dst; | |
350 | inp->inp_lport = ti->ti_dport; | |
9d866d2f | 351 | #if BSD>=43 |
6cf1965b | 352 | inp->inp_options = ip_srcroute(); |
9d866d2f | 353 | #endif |
6cf1965b MK |
354 | tp = intotcpcb(inp); |
355 | tp->t_state = TCPS_LISTEN; | |
69d96ae2 AC |
356 | |
357 | /* Compute proper scaling value from buffer space | |
358 | */ | |
359 | while (tp->request_r_scale < TCP_MAX_WINSHIFT && | |
360 | TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) | |
361 | tp->request_r_scale++; | |
6cf1965b | 362 | } |
ebf42a75 | 363 | } |
87e78f19 | 364 | |
405c9168 BJ |
365 | /* |
366 | * Segment received on connection. | |
367 | * Reset idle time and keep-alive timer. | |
368 | */ | |
369 | tp->t_idle = 0; | |
8a36cf82 | 370 | tp->t_timer[TCPT_KEEP] = tcp_keepidle; |
405c9168 | 371 | |
8b5a83bb | 372 | /* |
99578149 MK |
373 | * Process options if not in LISTEN state, |
374 | * else do it below (after getting remote address). | |
8b5a83bb | 375 | */ |
69d96ae2 AC |
376 | if (optp && tp->t_state != TCPS_LISTEN) |
377 | tcp_dooptions(tp, optp, optlen, ti, | |
378 | &ts_present, &ts_val, &ts_ecr); | |
379 | ||
6cf1965b MK |
380 | /* |
381 | * Header prediction: check for the two common cases | |
382 | * of a uni-directional data xfer. If the packet has | |
383 | * no control flags, is in-sequence, the window didn't | |
384 | * change and we're not retransmitting, it's a | |
385 | * candidate. If the length is zero and the ack moved | |
386 | * forward, we're the sender side of the xfer. Just | |
387 | * free the data acked & wake any higher level process | |
388 | * that was blocked waiting for space. If the length | |
389 | * is non-zero and the ack didn't move, we're the | |
390 | * receiver side. If we're getting packets in-order | |
391 | * (the reassembly queue is empty), add the data to | |
392 | * the socket buffer and note that we need a delayed ack. | |
393 | */ | |
394 | if (tp->t_state == TCPS_ESTABLISHED && | |
395 | (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && | |
69d96ae2 | 396 | (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && |
6cf1965b | 397 | ti->ti_seq == tp->rcv_nxt && |
69d96ae2 | 398 | tiwin && tiwin == tp->snd_wnd && |
6cf1965b | 399 | tp->snd_nxt == tp->snd_max) { |
69d96ae2 AC |
400 | |
401 | /* | |
402 | * If last ACK falls within this segment's sequence numbers, | |
403 | * record the timestamp. | |
404 | */ | |
405 | if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && | |
406 | SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) { | |
407 | tp->ts_recent_age = tcp_now; | |
408 | tp->ts_recent = ts_val; | |
409 | } | |
410 | ||
6cf1965b MK |
411 | if (ti->ti_len == 0) { |
412 | if (SEQ_GT(ti->ti_ack, tp->snd_una) && | |
413 | SEQ_LEQ(ti->ti_ack, tp->snd_max) && | |
414 | tp->snd_cwnd >= tp->snd_wnd) { | |
415 | /* | |
416 | * this is a pure ack for outstanding data. | |
417 | */ | |
418 | ++tcppredack; | |
69d96ae2 AC |
419 | if (ts_present) |
420 | tcp_xmit_timer(tp, tcp_now-ts_ecr+1); | |
421 | else if (tp->t_rtt && | |
422 | SEQ_GT(ti->ti_ack, tp->t_rtseq)) | |
423 | tcp_xmit_timer(tp, tp->t_rtt); | |
6cf1965b MK |
424 | acked = ti->ti_ack - tp->snd_una; |
425 | tcpstat.tcps_rcvackpack++; | |
426 | tcpstat.tcps_rcvackbyte += acked; | |
427 | sbdrop(&so->so_snd, acked); | |
428 | tp->snd_una = ti->ti_ack; | |
429 | m_freem(m); | |
430 | ||
431 | /* | |
432 | * If all outstanding data are acked, stop | |
433 | * retransmit timer, otherwise restart timer | |
434 | * using current (possibly backed-off) value. | |
435 | * If process is waiting for space, | |
436 | * wakeup/selwakeup/signal. If data | |
437 | * are ready to send, let tcp_output | |
438 | * decide between more output or persist. | |
439 | */ | |
440 | if (tp->snd_una == tp->snd_max) | |
441 | tp->t_timer[TCPT_REXMT] = 0; | |
442 | else if (tp->t_timer[TCPT_PERSIST] == 0) | |
443 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; | |
444 | ||
445 | if (so->so_snd.sb_flags & SB_NOTIFY) | |
446 | sowwakeup(so); | |
447 | if (so->so_snd.sb_cc) | |
448 | (void) tcp_output(tp); | |
449 | return; | |
450 | } | |
451 | } else if (ti->ti_ack == tp->snd_una && | |
452 | tp->seg_next == (struct tcpiphdr *)tp && | |
453 | ti->ti_len <= sbspace(&so->so_rcv)) { | |
454 | /* | |
455 | * this is a pure, in-sequence data packet | |
456 | * with nothing on the reassembly queue and | |
457 | * we have enough buffer space to take it. | |
458 | */ | |
459 | ++tcppreddat; | |
460 | tp->rcv_nxt += ti->ti_len; | |
461 | tcpstat.tcps_rcvpack++; | |
462 | tcpstat.tcps_rcvbyte += ti->ti_len; | |
463 | /* | |
69d96ae2 AC |
464 | * Drop TCP, IP headers and TCP options then add data |
465 | * to socket buffer. | |
6cf1965b | 466 | */ |
69d96ae2 AC |
467 | m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); |
468 | m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); | |
6cf1965b MK |
469 | sbappend(&so->so_rcv, m); |
470 | sorwakeup(so); | |
471 | tp->t_flags |= TF_DELACK; | |
472 | return; | |
473 | } | |
474 | } | |
475 | ||
476 | /* | |
69d96ae2 | 477 | * Drop TCP, IP headers and TCP options. |
6cf1965b | 478 | */ |
69d96ae2 AC |
479 | m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); |
480 | m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); | |
8b5a83bb | 481 | |
87e78f19 | 482 | /* |
8a13b737 BJ |
483 | * Calculate amount of space in receive window, |
484 | * and then do TCP input processing. | |
a17510f3 MK |
485 | * Receive window is amount of space in rcv queue, |
486 | * but not less than advertised window. | |
87e78f19 | 487 | */ |
bbaaf0fd MK |
488 | { int win; |
489 | ||
490 | win = sbspace(&so->so_rcv); | |
491 | if (win < 0) | |
492 | win = 0; | |
9d91b170 | 493 | tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); |
bbaaf0fd | 494 | } |
2ff61f9d | 495 | |
87e78f19 BJ |
496 | switch (tp->t_state) { |
497 | ||
2ff61f9d BJ |
498 | /* |
499 | * If the state is LISTEN then ignore segment if it contains an RST. | |
500 | * If the segment contains an ACK then it is bad and send a RST. | |
501 | * If it does not contain a SYN then it is not interesting; drop it. | |
224f3a72 | 502 | * Don't bother responding if the destination was a broadcast. |
8a13b737 | 503 | * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial |
2ff61f9d | 504 | * tp->iss, and send a segment: |
8a13b737 | 505 | * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> |
2ff61f9d BJ |
506 | * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. |
507 | * Fill in remote peer address fields if not previously specified. | |
508 | * Enter SYN_RECEIVED state, and process any other fields of this | |
4aed14e3 | 509 | * segment in this state. |
2ff61f9d | 510 | */ |
8075bb0e | 511 | case TCPS_LISTEN: { |
789d2a39 | 512 | struct mbuf *am; |
8075bb0e BJ |
513 | register struct sockaddr_in *sin; |
514 | ||
2ff61f9d BJ |
515 | if (tiflags & TH_RST) |
516 | goto drop; | |
22856bb8 | 517 | if (tiflags & TH_ACK) |
8a13b737 | 518 | goto dropwithreset; |
22856bb8 | 519 | if ((tiflags & TH_SYN) == 0) |
2ff61f9d | 520 | goto drop; |
09ed489e KS |
521 | /* |
522 | * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN | |
523 | * in_broadcast() should never return true on a received | |
524 | * packet with M_BCAST not set. | |
525 | */ | |
69d96ae2 | 526 | if (m->m_flags & (M_BCAST|M_MCAST) || |
09ed489e | 527 | IN_MULTICAST(ti->ti_dst.s_addr)) |
224f3a72 | 528 | goto drop; |
6cf1965b | 529 | am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ |
789d2a39 SL |
530 | if (am == NULL) |
531 | goto drop; | |
532 | am->m_len = sizeof (struct sockaddr_in); | |
a8d3bf7f | 533 | sin = mtod(am, struct sockaddr_in *); |
8075bb0e | 534 | sin->sin_family = AF_INET; |
0af8f6fc | 535 | sin->sin_len = sizeof(*sin); |
8075bb0e BJ |
536 | sin->sin_addr = ti->ti_src; |
537 | sin->sin_port = ti->ti_sport; | |
54cc1b26 | 538 | bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero)); |
ebcadd38 | 539 | laddr = inp->inp_laddr; |
789d2a39 | 540 | if (inp->inp_laddr.s_addr == INADDR_ANY) |
ebcadd38 | 541 | inp->inp_laddr = ti->ti_dst; |
a8d3bf7f | 542 | if (in_pcbconnect(inp, am)) { |
ebcadd38 | 543 | inp->inp_laddr = laddr; |
5a1f132a | 544 | (void) m_free(am); |
4aed14e3 | 545 | goto drop; |
ebcadd38 | 546 | } |
5a1f132a | 547 | (void) m_free(am); |
4aed14e3 BJ |
548 | tp->t_template = tcp_template(tp); |
549 | if (tp->t_template == 0) { | |
8011f5df | 550 | tp = tcp_drop(tp, ENOBUFS); |
a4f7ea71 | 551 | dropsocket = 0; /* socket is already gone */ |
4aed14e3 BJ |
552 | goto drop; |
553 | } | |
69d96ae2 AC |
554 | if (optp) |
555 | tcp_dooptions(tp, optp, optlen, ti, | |
556 | &ts_present, &ts_val, &ts_ecr); | |
96c50630 MK |
557 | if (iss) |
558 | tp->iss = iss; | |
559 | else | |
560 | tp->iss = tcp_iss; | |
561 | tcp_iss += TCP_ISSINCR/2; | |
2ff61f9d | 562 | tp->irs = ti->ti_seq; |
8a13b737 BJ |
563 | tcp_sendseqinit(tp); |
564 | tcp_rcvseqinit(tp); | |
bbaaf0fd | 565 | tp->t_flags |= TF_ACKNOW; |
2ff61f9d | 566 | tp->t_state = TCPS_SYN_RECEIVED; |
8a36cf82 | 567 | tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; |
7aa16f99 | 568 | dropsocket = 0; /* committed to socket */ |
96c50630 | 569 | tcpstat.tcps_accepts++; |
8a13b737 | 570 | goto trimthenstep6; |
8075bb0e | 571 | } |
87e78f19 | 572 | |
2ff61f9d BJ |
573 | /* |
574 | * If the state is SYN_SENT: | |
575 | * if seg contains an ACK, but not for our SYN, drop the input. | |
576 | * if seg contains a RST, then drop the connection. | |
577 | * if seg does not contain SYN, then drop it. | |
578 | * Otherwise this is an acceptable SYN segment | |
579 | * initialize tp->rcv_nxt and tp->irs | |
580 | * if seg contains ack then advance tp->snd_una | |
581 | * if SYN has been acked change to ESTABLISHED else SYN_RCVD state | |
582 | * arrange for segment to be acked (eventually) | |
583 | * continue processing rest of data/controls, beginning with URG | |
584 | */ | |
585 | case TCPS_SYN_SENT: | |
586 | if ((tiflags & TH_ACK) && | |
a17510f3 | 587 | (SEQ_LEQ(ti->ti_ack, tp->iss) || |
4b6b94ca | 588 | SEQ_GT(ti->ti_ack, tp->snd_max))) |
8a13b737 | 589 | goto dropwithreset; |
2ff61f9d | 590 | if (tiflags & TH_RST) { |
0e3936fa SL |
591 | if (tiflags & TH_ACK) |
592 | tp = tcp_drop(tp, ECONNREFUSED); | |
2ff61f9d | 593 | goto drop; |
87e78f19 | 594 | } |
2ff61f9d BJ |
595 | if ((tiflags & TH_SYN) == 0) |
596 | goto drop; | |
b57e9490 MK |
597 | if (tiflags & TH_ACK) { |
598 | tp->snd_una = ti->ti_ack; | |
599 | if (SEQ_LT(tp->snd_nxt, tp->snd_una)) | |
600 | tp->snd_nxt = tp->snd_una; | |
601 | } | |
4aed14e3 | 602 | tp->t_timer[TCPT_REXMT] = 0; |
2ff61f9d | 603 | tp->irs = ti->ti_seq; |
8a13b737 BJ |
604 | tcp_rcvseqinit(tp); |
605 | tp->t_flags |= TF_ACKNOW; | |
b57e9490 | 606 | if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { |
96c50630 | 607 | tcpstat.tcps_connects++; |
4aed14e3 | 608 | soisconnected(so); |
2ff61f9d | 609 | tp->t_state = TCPS_ESTABLISHED; |
69d96ae2 AC |
610 | /* Do window scaling on this connection? */ |
611 | if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == | |
612 | (TF_RCVD_SCALE|TF_REQ_SCALE)) { | |
613 | tp->snd_scale = tp->requested_s_scale; | |
614 | tp->rcv_scale = tp->request_r_scale; | |
615 | } | |
6cf1965b MK |
616 | (void) tcp_reass(tp, (struct tcpiphdr *)0, |
617 | (struct mbuf *)0); | |
386369f5 MK |
618 | /* |
619 | * if we didn't have to retransmit the SYN, | |
620 | * use its rtt as our initial srtt & rtt var. | |
621 | */ | |
6cf1965b | 622 | if (tp->t_rtt) |
69d96ae2 | 623 | tcp_xmit_timer(tp, tp->t_rtt); |
405c9168 | 624 | } else |
8a13b737 | 625 | tp->t_state = TCPS_SYN_RECEIVED; |
8a13b737 BJ |
626 | |
627 | trimthenstep6: | |
628 | /* | |
4b6b94ca | 629 | * Advance ti->ti_seq to correspond to first data byte. |
8a13b737 BJ |
630 | * If data, trim to stay within window, |
631 | * dropping FIN if necessary. | |
632 | */ | |
4b6b94ca | 633 | ti->ti_seq++; |
8a13b737 BJ |
634 | if (ti->ti_len > tp->rcv_wnd) { |
635 | todrop = ti->ti_len - tp->rcv_wnd; | |
9d866d2f | 636 | #if BSD>=43 |
8a13b737 | 637 | m_adj(m, -todrop); |
9d866d2f MK |
638 | #else |
639 | /* XXX work around 4.2 m_adj bug */ | |
640 | if (m->m_len) { | |
641 | m_adj(m, -todrop); | |
642 | } else { | |
643 | /* skip tcp/ip header in first mbuf */ | |
644 | m_adj(m->m_next, -todrop); | |
645 | } | |
646 | #endif | |
8a13b737 | 647 | ti->ti_len = tp->rcv_wnd; |
bbaaf0fd | 648 | tiflags &= ~TH_FIN; |
96c50630 MK |
649 | tcpstat.tcps_rcvpackafterwin++; |
650 | tcpstat.tcps_rcvbyteafterwin += todrop; | |
87e78f19 | 651 | } |
e832edbc | 652 | tp->snd_wl1 = ti->ti_seq - 1; |
bbaaf0fd | 653 | tp->rcv_up = ti->ti_seq; |
8a13b737 | 654 | goto step6; |
2ff61f9d | 655 | } |
87e78f19 | 656 | |
2ff61f9d BJ |
657 | /* |
658 | * States other than LISTEN or SYN_SENT. | |
69d96ae2 AC |
659 | * First check timestamp, if present. |
660 | * Then check that at least some bytes of segment are within | |
96c50630 MK |
661 | * receive window. If segment begins before rcv_nxt, |
662 | * drop leading data (and SYN); if nothing left, just ack. | |
69d96ae2 AC |
663 | * |
664 | * RFC 1323 PAWS: If we have a timestamp reply on this segment | |
665 | * and it's less than ts_recent, drop it. | |
2ff61f9d | 666 | */ |
69d96ae2 AC |
667 | if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && |
668 | TSTMP_LT(ts_val, tp->ts_recent)) { | |
669 | ||
670 | /* Check to see if ts_recent is over 24 days old. */ | |
671 | if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { | |
672 | /* | |
673 | * Invalidate ts_recent. If this segment updates | |
674 | * ts_recent, the age will be reset later and ts_recent | |
675 | * will get a valid value. If it does not, setting | |
676 | * ts_recent to zero will at least satisfy the | |
677 | * requirement that zero be placed in the timestamp | |
678 | * echo reply when ts_recent isn't valid. The | |
679 | * age isn't reset until we get a valid ts_recent | |
680 | * because we don't want out-of-order segments to be | |
681 | * dropped when ts_recent is old. | |
682 | */ | |
683 | tp->ts_recent = 0; | |
684 | } else { | |
685 | tcpstat.tcps_rcvduppack++; | |
686 | tcpstat.tcps_rcvdupbyte += ti->ti_len; | |
687 | tcpstat.tcps_pawsdrop++; | |
688 | goto dropafterack; | |
689 | } | |
690 | } | |
691 | ||
96c50630 MK |
692 | todrop = tp->rcv_nxt - ti->ti_seq; |
693 | if (todrop > 0) { | |
694 | if (tiflags & TH_SYN) { | |
695 | tiflags &= ~TH_SYN; | |
696 | ti->ti_seq++; | |
697 | if (ti->ti_urp > 1) | |
698 | ti->ti_urp--; | |
699 | else | |
700 | tiflags &= ~TH_URG; | |
701 | todrop--; | |
702 | } | |
6cbc4e69 | 703 | if (todrop >= ti->ti_len) { |
8a36cf82 MK |
704 | tcpstat.tcps_rcvduppack++; |
705 | tcpstat.tcps_rcvdupbyte += ti->ti_len; | |
39b02f3c | 706 | /* |
8a36cf82 MK |
707 | * If segment is just one to the left of the window, |
708 | * check two special cases: | |
709 | * 1. Don't toss RST in response to 4.2-style keepalive. | |
710 | * 2. If the only thing to drop is a FIN, we can drop | |
711 | * it, but check the ACK or we will get into FIN | |
712 | * wars if our FINs crossed (both CLOSING). | |
713 | * In either case, send ACK to resynchronize, | |
714 | * but keep on processing for RST or ACK. | |
39b02f3c | 715 | */ |
8a36cf82 MK |
716 | if ((tiflags & TH_FIN && todrop == ti->ti_len + 1) |
717 | #ifdef TCP_COMPAT_42 | |
718 | || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1) | |
39b02f3c | 719 | #endif |
8a36cf82 MK |
720 | ) { |
721 | todrop = ti->ti_len; | |
722 | tiflags &= ~TH_FIN; | |
723 | tp->t_flags |= TF_ACKNOW; | |
69d96ae2 AC |
724 | } else { |
725 | /* | |
726 | * Handle the case when a bound socket connects | |
727 | * to itself. Allow packets with a SYN and | |
728 | * an ACK to continue with the processing. | |
729 | */ | |
730 | if (todrop != 0 || (tiflags & TH_ACK) == 0) | |
731 | goto dropafterack; | |
732 | } | |
a6bbda13 MK |
733 | } else { |
734 | tcpstat.tcps_rcvpartduppack++; | |
735 | tcpstat.tcps_rcvpartdupbyte += todrop; | |
96c50630 | 736 | } |
96c50630 MK |
737 | m_adj(m, todrop); |
738 | ti->ti_seq += todrop; | |
739 | ti->ti_len -= todrop; | |
740 | if (ti->ti_urp > todrop) | |
741 | ti->ti_urp -= todrop; | |
742 | else { | |
743 | tiflags &= ~TH_URG; | |
744 | ti->ti_urp = 0; | |
745 | } | |
746 | } | |
747 | ||
b819e9ea | 748 | /* |
8a36cf82 | 749 | * If new data are received on a connection after the |
b819e9ea MK |
750 | * user processes are gone, then RST the other end. |
751 | */ | |
752 | if ((so->so_state & SS_NOFDREF) && | |
753 | tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { | |
754 | tp = tcp_close(tp); | |
755 | tcpstat.tcps_rcvafterclose++; | |
756 | goto dropwithreset; | |
757 | } | |
758 | ||
4f182c3f MK |
759 | /* |
760 | * If segment ends after window, drop trailing data | |
761 | * (and PUSH and FIN); if nothing left, just ACK. | |
762 | */ | |
763 | todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); | |
764 | if (todrop > 0) { | |
765 | tcpstat.tcps_rcvpackafterwin++; | |
766 | if (todrop >= ti->ti_len) { | |
96c50630 | 767 | tcpstat.tcps_rcvbyteafterwin += ti->ti_len; |
4f182c3f MK |
768 | /* |
769 | * If a new connection request is received | |
770 | * while in TIME_WAIT, drop the old connection | |
771 | * and start over if the sequence numbers | |
772 | * are above the previous ones. | |
773 | */ | |
774 | if (tiflags & TH_SYN && | |
775 | tp->t_state == TCPS_TIME_WAIT && | |
776 | SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { | |
777 | iss = tp->rcv_nxt + TCP_ISSINCR; | |
6cf1965b | 778 | tp = tcp_close(tp); |
4f182c3f | 779 | goto findpcb; |
96c50630 | 780 | } |
4f182c3f MK |
781 | /* |
782 | * If window is closed can only take segments at | |
783 | * window edge, and have to drop data and PUSH from | |
784 | * incoming segments. Continue processing, but | |
785 | * remember to ack. Otherwise, drop segment | |
786 | * and ack. | |
787 | */ | |
788 | if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { | |
789 | tp->t_flags |= TF_ACKNOW; | |
790 | tcpstat.tcps_rcvwinprobe++; | |
791 | } else | |
2ff61f9d | 792 | goto dropafterack; |
4f182c3f | 793 | } else |
96c50630 | 794 | tcpstat.tcps_rcvbyteafterwin += todrop; |
9d866d2f | 795 | #if BSD>=43 |
4f182c3f | 796 | m_adj(m, -todrop); |
9d866d2f MK |
797 | #else |
798 | /* XXX work around m_adj bug */ | |
799 | if (m->m_len) { | |
800 | m_adj(m, -todrop); | |
801 | } else { | |
802 | /* skip tcp/ip header in first mbuf */ | |
803 | m_adj(m->m_next, -todrop); | |
804 | } | |
805 | #endif | |
4f182c3f MK |
806 | ti->ti_len -= todrop; |
807 | tiflags &= ~(TH_PUSH|TH_FIN); | |
87e78f19 | 808 | } |
87e78f19 | 809 | |
69d96ae2 AC |
810 | /* |
811 | * If last ACK falls within this segment's sequence numbers, | |
812 | * record its timestamp. | |
813 | */ | |
814 | if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && | |
815 | SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len + | |
816 | ((tiflags & (TH_SYN|TH_FIN)) != 0))) { | |
817 | tp->ts_recent_age = tcp_now; | |
818 | tp->ts_recent = ts_val; | |
819 | } | |
820 | ||
87e78f19 | 821 | /* |
2ff61f9d BJ |
822 | * If the RST bit is set examine the state: |
823 | * SYN_RECEIVED STATE: | |
824 | * If passive open, return to LISTEN state. | |
825 | * If active open, inform user that connection was refused. | |
826 | * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: | |
827 | * Inform user that connection was reset, and close tcb. | |
828 | * CLOSING, LAST_ACK, TIME_WAIT STATES | |
829 | * Close the tcb. | |
87e78f19 | 830 | */ |
2ff61f9d | 831 | if (tiflags&TH_RST) switch (tp->t_state) { |
4b935108 | 832 | |
2ff61f9d | 833 | case TCPS_SYN_RECEIVED: |
8a36cf82 MK |
834 | so->so_error = ECONNREFUSED; |
835 | goto close; | |
2ff61f9d BJ |
836 | |
837 | case TCPS_ESTABLISHED: | |
838 | case TCPS_FIN_WAIT_1: | |
839 | case TCPS_FIN_WAIT_2: | |
840 | case TCPS_CLOSE_WAIT: | |
8a36cf82 MK |
841 | so->so_error = ECONNRESET; |
842 | close: | |
843 | tp->t_state = TCPS_CLOSED; | |
844 | tcpstat.tcps_drops++; | |
845 | tp = tcp_close(tp); | |
2ff61f9d BJ |
846 | goto drop; |
847 | ||
848 | case TCPS_CLOSING: | |
849 | case TCPS_LAST_ACK: | |
850 | case TCPS_TIME_WAIT: | |
0e3936fa | 851 | tp = tcp_close(tp); |
2ff61f9d | 852 | goto drop; |
87e78f19 | 853 | } |
87e78f19 BJ |
854 | |
855 | /* | |
2ff61f9d BJ |
856 | * If a SYN is in the window, then this is an |
857 | * error and we send an RST and drop the connection. | |
858 | */ | |
859 | if (tiflags & TH_SYN) { | |
0e3936fa | 860 | tp = tcp_drop(tp, ECONNRESET); |
8a13b737 | 861 | goto dropwithreset; |
2ff61f9d BJ |
862 | } |
863 | ||
864 | /* | |
865 | * If the ACK bit is off we drop the segment and return. | |
866 | */ | |
8a13b737 | 867 | if ((tiflags & TH_ACK) == 0) |
2ff61f9d BJ |
868 | goto drop; |
869 | ||
870 | /* | |
871 | * Ack processing. | |
87e78f19 | 872 | */ |
87e78f19 BJ |
873 | switch (tp->t_state) { |
874 | ||
2ff61f9d BJ |
875 | /* |
876 | * In SYN_RECEIVED state if the ack ACKs our SYN then enter | |
4859921b | 877 | * ESTABLISHED state and continue processing, otherwise |
2ff61f9d BJ |
878 | * send an RST. |
879 | */ | |
880 | case TCPS_SYN_RECEIVED: | |
8a13b737 | 881 | if (SEQ_GT(tp->snd_una, ti->ti_ack) || |
4b6b94ca | 882 | SEQ_GT(ti->ti_ack, tp->snd_max)) |
8a13b737 | 883 | goto dropwithreset; |
96c50630 | 884 | tcpstat.tcps_connects++; |
8a13b737 BJ |
885 | soisconnected(so); |
886 | tp->t_state = TCPS_ESTABLISHED; | |
69d96ae2 AC |
887 | /* Do window scaling? */ |
888 | if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == | |
889 | (TF_RCVD_SCALE|TF_REQ_SCALE)) { | |
890 | tp->snd_scale = tp->requested_s_scale; | |
891 | tp->rcv_scale = tp->request_r_scale; | |
892 | } | |
6cf1965b | 893 | (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); |
4aed14e3 | 894 | tp->snd_wl1 = ti->ti_seq - 1; |
8a13b737 | 895 | /* fall into ... */ |
87e78f19 | 896 | |
2ff61f9d BJ |
897 | /* |
898 | * In ESTABLISHED state: drop duplicate ACKs; ACK out of range | |
899 | * ACKs. If the ack is in the range | |
4b6b94ca | 900 | * tp->snd_una < ti->ti_ack <= tp->snd_max |
2ff61f9d BJ |
901 | * then advance tp->snd_una to ti->ti_ack and drop |
902 | * data from the retransmission queue. If this ACK reflects | |
903 | * more up to date window information we update our window information. | |
904 | */ | |
905 | case TCPS_ESTABLISHED: | |
906 | case TCPS_FIN_WAIT_1: | |
907 | case TCPS_FIN_WAIT_2: | |
908 | case TCPS_CLOSE_WAIT: | |
909 | case TCPS_CLOSING: | |
4aed14e3 BJ |
910 | case TCPS_LAST_ACK: |
911 | case TCPS_TIME_WAIT: | |
8a13b737 | 912 | |
96c50630 | 913 | if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { |
69d96ae2 | 914 | if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { |
96c50630 | 915 | tcpstat.tcps_rcvdupack++; |
386369f5 | 916 | /* |
6cf1965b MK |
917 | * If we have outstanding data (other than |
918 | * a window probe), this is a completely | |
386369f5 MK |
919 | * duplicate ack (ie, window info didn't |
920 | * change), the ack is the biggest we've | |
921 | * seen and we've seen exactly our rexmt | |
922 | * threshhold of them, assume a packet | |
923 | * has been dropped and retransmit it. | |
924 | * Kludge snd_nxt & the congestion | |
925 | * window so we send only this one | |
6cf1965b MK |
926 | * packet. |
927 | * | |
928 | * We know we're losing at the current | |
929 | * window size so do congestion avoidance | |
930 | * (set ssthresh to half the current window | |
931 | * and pull our congestion window back to | |
932 | * the new ssthresh). | |
933 | * | |
934 | * Dup acks mean that packets have left the | |
935 | * network (they're now cached at the receiver) | |
936 | * so bump cwnd by the amount in the receiver | |
937 | * to keep a constant cwnd packets in the | |
938 | * network. | |
386369f5 MK |
939 | */ |
940 | if (tp->t_timer[TCPT_REXMT] == 0 || | |
941 | ti->ti_ack != tp->snd_una) | |
942 | tp->t_dupacks = 0; | |
943 | else if (++tp->t_dupacks == tcprexmtthresh) { | |
944 | tcp_seq onxt = tp->snd_nxt; | |
3c317835 | 945 | u_int win = |
9d91b170 | 946 | min(tp->snd_wnd, tp->snd_cwnd) / 2 / |
3c317835 MK |
947 | tp->t_maxseg; |
948 | ||
949 | if (win < 2) | |
950 | win = 2; | |
951 | tp->snd_ssthresh = win * tp->t_maxseg; | |
386369f5 MK |
952 | tp->t_timer[TCPT_REXMT] = 0; |
953 | tp->t_rtt = 0; | |
954 | tp->snd_nxt = ti->ti_ack; | |
955 | tp->snd_cwnd = tp->t_maxseg; | |
956 | (void) tcp_output(tp); | |
6cf1965b MK |
957 | tp->snd_cwnd = tp->snd_ssthresh + |
958 | tp->t_maxseg * tp->t_dupacks; | |
386369f5 MK |
959 | if (SEQ_GT(onxt, tp->snd_nxt)) |
960 | tp->snd_nxt = onxt; | |
961 | goto drop; | |
6cf1965b MK |
962 | } else if (tp->t_dupacks > tcprexmtthresh) { |
963 | tp->snd_cwnd += tp->t_maxseg; | |
964 | (void) tcp_output(tp); | |
965 | goto drop; | |
386369f5 MK |
966 | } |
967 | } else | |
968 | tp->t_dupacks = 0; | |
2ff61f9d | 969 | break; |
96c50630 | 970 | } |
6cf1965b MK |
971 | /* |
972 | * If the congestion window was inflated to account | |
973 | * for the other side's cached packets, retract it. | |
974 | */ | |
975 | if (tp->t_dupacks > tcprexmtthresh && | |
976 | tp->snd_cwnd > tp->snd_ssthresh) | |
977 | tp->snd_cwnd = tp->snd_ssthresh; | |
386369f5 | 978 | tp->t_dupacks = 0; |
96c50630 MK |
979 | if (SEQ_GT(ti->ti_ack, tp->snd_max)) { |
980 | tcpstat.tcps_rcvacktoomuch++; | |
2ff61f9d | 981 | goto dropafterack; |
96c50630 | 982 | } |
8a13b737 | 983 | acked = ti->ti_ack - tp->snd_una; |
96c50630 MK |
984 | tcpstat.tcps_rcvackpack++; |
985 | tcpstat.tcps_rcvackbyte += acked; | |
dd020fc8 BJ |
986 | |
987 | /* | |
69d96ae2 AC |
988 | * If we have a timestamp reply, update smoothed |
989 | * round trip time. If no timestamp is present but | |
990 | * transmit timer is running and timed sequence | |
dd020fc8 | 991 | * number was acked, update smoothed round trip time. |
a6bbda13 MK |
992 | * Since we now have an rtt measurement, cancel the |
993 | * timer backoff (cf., Phil Karn's retransmit alg.). | |
994 | * Recompute the initial retransmit timer. | |
dd020fc8 | 995 | */ |
69d96ae2 AC |
996 | if (ts_present) |
997 | tcp_xmit_timer(tp, tcp_now-ts_ecr+1); | |
998 | else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) | |
999 | tcp_xmit_timer(tp,tp->t_rtt); | |
dd020fc8 | 1000 | |
91039e49 MK |
1001 | /* |
1002 | * If all outstanding data is acked, stop retransmit | |
1003 | * timer and remember to restart (more output or persist). | |
1004 | * If there is more data to be acked, restart retransmit | |
a6bbda13 | 1005 | * timer, using current (possibly backed-off) value. |
91039e49 MK |
1006 | */ |
1007 | if (ti->ti_ack == tp->snd_max) { | |
4aed14e3 | 1008 | tp->t_timer[TCPT_REXMT] = 0; |
91039e49 | 1009 | needoutput = 1; |
a6bbda13 MK |
1010 | } else if (tp->t_timer[TCPT_PERSIST] == 0) |
1011 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; | |
1e9621b8 | 1012 | /* |
386369f5 MK |
1013 | * When new data is acked, open the congestion window. |
1014 | * If the window gives us less than ssthresh packets | |
1015 | * in flight, open exponentially (maxseg per packet). | |
6cf1965b MK |
1016 | * Otherwise open linearly: maxseg per window |
1017 | * (maxseg^2 / cwnd per packet), plus a constant | |
1018 | * fraction of a packet (maxseg/8) to help larger windows | |
1019 | * open quickly enough. | |
1e9621b8 | 1020 | */ |
386369f5 | 1021 | { |
6cf1965b MK |
1022 | register u_int cw = tp->snd_cwnd; |
1023 | register u_int incr = tp->t_maxseg; | |
386369f5 | 1024 | |
6cf1965b MK |
1025 | if (cw > tp->snd_ssthresh) |
1026 | incr = incr * incr / cw + incr / 8; | |
69d96ae2 | 1027 | tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); |
386369f5 | 1028 | } |
6703c41f | 1029 | if (acked > so->so_snd.sb_cc) { |
6703c41f | 1030 | tp->snd_wnd -= so->so_snd.sb_cc; |
8011f5df | 1031 | sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); |
4859921b | 1032 | ourfinisacked = 1; |
6703c41f | 1033 | } else { |
668cc26d | 1034 | sbdrop(&so->so_snd, acked); |
6703c41f | 1035 | tp->snd_wnd -= acked; |
4859921b | 1036 | ourfinisacked = 0; |
6703c41f | 1037 | } |
6cf1965b MK |
1038 | if (so->so_snd.sb_flags & SB_NOTIFY) |
1039 | sowwakeup(so); | |
4b6b94ca | 1040 | tp->snd_una = ti->ti_ack; |
b8977237 BJ |
1041 | if (SEQ_LT(tp->snd_nxt, tp->snd_una)) |
1042 | tp->snd_nxt = tp->snd_una; | |
405c9168 | 1043 | |
87e78f19 BJ |
1044 | switch (tp->t_state) { |
1045 | ||
2ff61f9d BJ |
1046 | /* |
1047 | * In FIN_WAIT_1 STATE in addition to the processing | |
1048 | * for the ESTABLISHED state if our FIN is now acknowledged | |
8a13b737 | 1049 | * then enter FIN_WAIT_2. |
2ff61f9d BJ |
1050 | */ |
1051 | case TCPS_FIN_WAIT_1: | |
fdae4427 BJ |
1052 | if (ourfinisacked) { |
1053 | /* | |
1054 | * If we can't receive any more | |
1055 | * data, then closing user can proceed. | |
a17510f3 MK |
1056 | * Starting the timer is contrary to the |
1057 | * specification, but if we don't get a FIN | |
1058 | * we'll hang forever. | |
fdae4427 | 1059 | */ |
a17510f3 | 1060 | if (so->so_state & SS_CANTRCVMORE) { |
fdae4427 | 1061 | soisdisconnected(so); |
8a36cf82 | 1062 | tp->t_timer[TCPT_2MSL] = tcp_maxidle; |
a17510f3 | 1063 | } |
8a13b737 | 1064 | tp->t_state = TCPS_FIN_WAIT_2; |
fdae4427 | 1065 | } |
87e78f19 BJ |
1066 | break; |
1067 | ||
2ff61f9d BJ |
1068 | /* |
1069 | * In CLOSING STATE in addition to the processing for | |
1070 | * the ESTABLISHED state if the ACK acknowledges our FIN | |
1071 | * then enter the TIME-WAIT state, otherwise ignore | |
1072 | * the segment. | |
1073 | */ | |
1074 | case TCPS_CLOSING: | |
4aed14e3 | 1075 | if (ourfinisacked) { |
2ff61f9d | 1076 | tp->t_state = TCPS_TIME_WAIT; |
4aed14e3 BJ |
1077 | tcp_canceltimers(tp); |
1078 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; | |
1079 | soisdisconnected(so); | |
1080 | } | |
1081 | break; | |
87e78f19 | 1082 | |
2ff61f9d | 1083 | /* |
e20bac9c MK |
1084 | * In LAST_ACK, we may still be waiting for data to drain |
1085 | * and/or to be acked, as well as for the ack of our FIN. | |
1086 | * If our FIN is now acknowledged, delete the TCB, | |
1087 | * enter the closed state and return. | |
2ff61f9d BJ |
1088 | */ |
1089 | case TCPS_LAST_ACK: | |
e20bac9c | 1090 | if (ourfinisacked) { |
0e3936fa | 1091 | tp = tcp_close(tp); |
e20bac9c MK |
1092 | goto drop; |
1093 | } | |
1094 | break; | |
87e78f19 | 1095 | |
2ff61f9d BJ |
1096 | /* |
1097 | * In TIME_WAIT state the only thing that should arrive | |
1098 | * is a retransmission of the remote FIN. Acknowledge | |
1099 | * it and restart the finack timer. | |
1100 | */ | |
1101 | case TCPS_TIME_WAIT: | |
405c9168 | 1102 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
2ff61f9d | 1103 | goto dropafterack; |
87e78f19 | 1104 | } |
8a13b737 | 1105 | } |
87e78f19 | 1106 | |
2ff61f9d | 1107 | step6: |
4aed14e3 BJ |
1108 | /* |
1109 | * Update window information. | |
bbaaf0fd | 1110 | * Don't look at window if no ACK: TAC's send garbage on first SYN. |
4aed14e3 | 1111 | */ |
bbaaf0fd MK |
1112 | if ((tiflags & TH_ACK) && |
1113 | (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && | |
8e65fd66 | 1114 | (SEQ_LT(tp->snd_wl2, ti->ti_ack) || |
69d96ae2 | 1115 | tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) { |
96c50630 MK |
1116 | /* keep track of pure window updates */ |
1117 | if (ti->ti_len == 0 && | |
69d96ae2 | 1118 | tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) |
96c50630 | 1119 | tcpstat.tcps_rcvwinupd++; |
69d96ae2 | 1120 | tp->snd_wnd = tiwin; |
4aed14e3 BJ |
1121 | tp->snd_wl1 = ti->ti_seq; |
1122 | tp->snd_wl2 = ti->ti_ack; | |
18a438b6 MK |
1123 | if (tp->snd_wnd > tp->max_sndwnd) |
1124 | tp->max_sndwnd = tp->snd_wnd; | |
91039e49 MK |
1125 | needoutput = 1; |
1126 | } | |
4aed14e3 | 1127 | |
2ff61f9d | 1128 | /* |
b2db9217 | 1129 | * Process segments with URG. |
2ff61f9d | 1130 | */ |
9c811062 BJ |
1131 | if ((tiflags & TH_URG) && ti->ti_urp && |
1132 | TCPS_HAVERCVDFIN(tp->t_state) == 0) { | |
f4be5024 | 1133 | /* |
bbaaf0fd | 1134 | * This is a kludge, but if we receive and accept |
a5d9c993 SL |
1135 | * random urgent pointers, we'll crash in |
1136 | * soreceive. It's hard to imagine someone | |
1137 | * actually wanting to send this much urgent data. | |
f4be5024 | 1138 | */ |
69d96ae2 | 1139 | if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) { |
f4be5024 SL |
1140 | ti->ti_urp = 0; /* XXX */ |
1141 | tiflags &= ~TH_URG; /* XXX */ | |
bbaaf0fd | 1142 | goto dodata; /* XXX */ |
f4be5024 | 1143 | } |
b2db9217 BJ |
1144 | /* |
1145 | * If this segment advances the known urgent pointer, | |
1146 | * then mark the data stream. This should not happen | |
1147 | * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since | |
1148 | * a FIN has been received from the remote side. | |
1149 | * In these states we ignore the URG. | |
ae6760c5 MK |
1150 | * |
1151 | * According to RFC961 (Assigned Protocols), | |
1152 | * the urgent pointer points to the last octet | |
1153 | * of urgent data. We continue, however, | |
1154 | * to consider it to indicate the first octet | |
6cf1965b MK |
1155 | * of data past the urgent section as the original |
1156 | * spec states (in one of two places). | |
b2db9217 BJ |
1157 | */ |
1158 | if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { | |
1159 | tp->rcv_up = ti->ti_seq + ti->ti_urp; | |
1160 | so->so_oobmark = so->so_rcv.sb_cc + | |
1161 | (tp->rcv_up - tp->rcv_nxt) - 1; | |
1162 | if (so->so_oobmark == 0) | |
1163 | so->so_state |= SS_RCVATMARK; | |
77a4e3ca | 1164 | sohasoutofband(so); |
a17510f3 | 1165 | tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); |
b2db9217 BJ |
1166 | } |
1167 | /* | |
1168 | * Remove out of band data so doesn't get presented to user. | |
1169 | * This can happen independent of advancing the URG pointer, | |
1170 | * but if two URG's are pending at once, some out-of-band | |
1171 | * data may creep in... ick. | |
1172 | */ | |
9d866d2f MK |
1173 | if (ti->ti_urp <= ti->ti_len |
1174 | #ifdef SO_OOBINLINE | |
1175 | && (so->so_options & SO_OOBINLINE) == 0 | |
1176 | #endif | |
6cf1965b MK |
1177 | ) |
1178 | tcp_pulloutofband(so, ti, m); | |
bbaaf0fd MK |
1179 | } else |
1180 | /* | |
1181 | * If no out of band data is expected, | |
1182 | * pull receive urgent pointer along | |
1183 | * with the receive window. | |
1184 | */ | |
1185 | if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) | |
1186 | tp->rcv_up = tp->rcv_nxt; | |
1187 | dodata: /* XXX */ | |
87e78f19 BJ |
1188 | |
1189 | /* | |
2ff61f9d BJ |
1190 | * Process the segment text, merging it into the TCP sequencing queue, |
1191 | * and arranging for acknowledgment of receipt if necessary. | |
1192 | * This process logically involves adjusting tp->rcv_wnd as data | |
1193 | * is presented to the user (this happens in tcp_usrreq.c, | |
1194 | * case PRU_RCVD). If a FIN has already been received on this | |
1195 | * connection then we just ignore the text. | |
87e78f19 | 1196 | */ |
7984a662 MK |
1197 | if ((ti->ti_len || (tiflags&TH_FIN)) && |
1198 | TCPS_HAVERCVDFIN(tp->t_state) == 0) { | |
a17510f3 | 1199 | TCP_REASS(tp, ti, m, so, tiflags); |
18a438b6 MK |
1200 | /* |
1201 | * Note the amount of data that peer has sent into | |
1202 | * our window, in order to estimate the sender's | |
1203 | * buffer size. | |
1204 | */ | |
386369f5 | 1205 | len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); |
4aed14e3 | 1206 | } else { |
2b4b57cd | 1207 | m_freem(m); |
e832edbc | 1208 | tiflags &= ~TH_FIN; |
4aed14e3 | 1209 | } |
87e78f19 BJ |
1210 | |
1211 | /* | |
e832edbc BJ |
1212 | * If FIN is received ACK the FIN and let the user know |
1213 | * that the connection is closing. | |
87e78f19 | 1214 | */ |
e832edbc | 1215 | if (tiflags & TH_FIN) { |
4aed14e3 BJ |
1216 | if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { |
1217 | socantrcvmore(so); | |
1218 | tp->t_flags |= TF_ACKNOW; | |
1219 | tp->rcv_nxt++; | |
1220 | } | |
2ff61f9d | 1221 | switch (tp->t_state) { |
87e78f19 | 1222 | |
2ff61f9d BJ |
1223 | /* |
1224 | * In SYN_RECEIVED and ESTABLISHED STATES | |
1225 | * enter the CLOSE_WAIT state. | |
53a5409e | 1226 | */ |
2ff61f9d BJ |
1227 | case TCPS_SYN_RECEIVED: |
1228 | case TCPS_ESTABLISHED: | |
1229 | tp->t_state = TCPS_CLOSE_WAIT; | |
1230 | break; | |
53a5409e | 1231 | |
2ff61f9d | 1232 | /* |
8a13b737 BJ |
1233 | * If still in FIN_WAIT_1 STATE FIN has not been acked so |
1234 | * enter the CLOSING state. | |
53a5409e | 1235 | */ |
2ff61f9d | 1236 | case TCPS_FIN_WAIT_1: |
8a13b737 | 1237 | tp->t_state = TCPS_CLOSING; |
2ff61f9d | 1238 | break; |
87e78f19 | 1239 | |
2ff61f9d BJ |
1240 | /* |
1241 | * In FIN_WAIT_2 state enter the TIME_WAIT state, | |
1242 | * starting the time-wait timer, turning off the other | |
1243 | * standard timers. | |
1244 | */ | |
1245 | case TCPS_FIN_WAIT_2: | |
4aed14e3 | 1246 | tp->t_state = TCPS_TIME_WAIT; |
a6503abf | 1247 | tcp_canceltimers(tp); |
405c9168 | 1248 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
4aed14e3 | 1249 | soisdisconnected(so); |
2ff61f9d BJ |
1250 | break; |
1251 | ||
53a5409e | 1252 | /* |
2ff61f9d | 1253 | * In TIME_WAIT state restart the 2 MSL time_wait timer. |
53a5409e | 1254 | */ |
2ff61f9d | 1255 | case TCPS_TIME_WAIT: |
405c9168 | 1256 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
2ff61f9d | 1257 | break; |
8a13b737 | 1258 | } |
87e78f19 | 1259 | } |
4b935108 BJ |
1260 | if (so->so_options & SO_DEBUG) |
1261 | tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); | |
8a13b737 BJ |
1262 | |
1263 | /* | |
1264 | * Return any desired output. | |
1265 | */ | |
91039e49 | 1266 | if (needoutput || (tp->t_flags & TF_ACKNOW)) |
bbaaf0fd | 1267 | (void) tcp_output(tp); |
2ff61f9d | 1268 | return; |
8a13b737 | 1269 | |
2ff61f9d | 1270 | dropafterack: |
8a13b737 | 1271 | /* |
1e977657 BJ |
1272 | * Generate an ACK dropping incoming segment if it occupies |
1273 | * sequence space, where the ACK reflects our state. | |
8a13b737 | 1274 | */ |
ad616704 | 1275 | if (tiflags & TH_RST) |
8a13b737 | 1276 | goto drop; |
5722bd39 | 1277 | m_freem(m); |
4859921b MK |
1278 | tp->t_flags |= TF_ACKNOW; |
1279 | (void) tcp_output(tp); | |
4b6b94ca | 1280 | return; |
8a13b737 BJ |
1281 | |
1282 | dropwithreset: | |
1283 | /* | |
4aed14e3 | 1284 | * Generate a RST, dropping incoming segment. |
8a13b737 | 1285 | * Make ACK acceptable to originator of segment. |
69d96ae2 | 1286 | * Don't bother to respond if destination was broadcast/multicast. |
8a13b737 | 1287 | */ |
69d96ae2 | 1288 | if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) || |
09ed489e | 1289 | IN_MULTICAST(ti->ti_dst.s_addr)) |
8a13b737 BJ |
1290 | goto drop; |
1291 | if (tiflags & TH_ACK) | |
9d91b170 | 1292 | tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); |
8a13b737 BJ |
1293 | else { |
1294 | if (tiflags & TH_SYN) | |
1295 | ti->ti_len++; | |
9d91b170 | 1296 | tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, |
1e977657 | 1297 | TH_RST|TH_ACK); |
8a13b737 | 1298 | } |
7aa16f99 SL |
1299 | /* destroy temporarily created socket */ |
1300 | if (dropsocket) | |
1301 | (void) soabort(so); | |
4b6b94ca | 1302 | return; |
8a13b737 | 1303 | |
2ff61f9d | 1304 | drop: |
8a13b737 BJ |
1305 | /* |
1306 | * Drop space held by incoming segment and return. | |
1307 | */ | |
f3cdd721 BJ |
1308 | if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) |
1309 | tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); | |
2ff61f9d | 1310 | m_freem(m); |
7aa16f99 SL |
1311 | /* destroy temporarily created socket */ |
1312 | if (dropsocket) | |
1313 | (void) soabort(so); | |
4b935108 | 1314 | return; |
55ad7758 | 1315 | #ifndef TUBA_INCLUDE |
2ff61f9d BJ |
1316 | } |
1317 | ||
c46785cb | 1318 | void |
69d96ae2 | 1319 | tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) |
8b5a83bb | 1320 | struct tcpcb *tp; |
69d96ae2 AC |
1321 | u_char *cp; |
1322 | int cnt; | |
99578149 | 1323 | struct tcpiphdr *ti; |
69d96ae2 AC |
1324 | int *ts_present; |
1325 | u_long *ts_val, *ts_ecr; | |
5e74df82 | 1326 | { |
6cf1965b | 1327 | u_short mss; |
69d96ae2 | 1328 | int opt, optlen; |
8b5a83bb | 1329 | |
8b5a83bb BJ |
1330 | for (; cnt > 0; cnt -= optlen, cp += optlen) { |
1331 | opt = cp[0]; | |
1332 | if (opt == TCPOPT_EOL) | |
1333 | break; | |
1334 | if (opt == TCPOPT_NOP) | |
1335 | optlen = 1; | |
357b20fc | 1336 | else { |
8b5a83bb | 1337 | optlen = cp[1]; |
357b20fc SL |
1338 | if (optlen <= 0) |
1339 | break; | |
1340 | } | |
8b5a83bb BJ |
1341 | switch (opt) { |
1342 | ||
1343 | default: | |
6cf1965b | 1344 | continue; |
8b5a83bb BJ |
1345 | |
1346 | case TCPOPT_MAXSEG: | |
69d96ae2 | 1347 | if (optlen != TCPOLEN_MAXSEG) |
8b5a83bb | 1348 | continue; |
99578149 MK |
1349 | if (!(ti->ti_flags & TH_SYN)) |
1350 | continue; | |
6cf1965b MK |
1351 | bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); |
1352 | NTOHS(mss); | |
1353 | (void) tcp_mss(tp, mss); /* sets t_maxseg */ | |
8b5a83bb | 1354 | break; |
69d96ae2 AC |
1355 | |
1356 | case TCPOPT_WINDOW: | |
1357 | if (optlen != TCPOLEN_WINDOW) | |
1358 | continue; | |
1359 | if (!(ti->ti_flags & TH_SYN)) | |
1360 | continue; | |
1361 | tp->t_flags |= TF_RCVD_SCALE; | |
1362 | tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); | |
1363 | break; | |
1364 | ||
1365 | case TCPOPT_TIMESTAMP: | |
1366 | if (optlen != TCPOLEN_TIMESTAMP) | |
1367 | continue; | |
1368 | *ts_present = 1; | |
1369 | bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val)); | |
1370 | NTOHL(*ts_val); | |
1371 | bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr)); | |
1372 | NTOHL(*ts_ecr); | |
1373 | ||
1374 | /* | |
1375 | * A timestamp received in a SYN makes | |
1376 | * it ok to send timestamp requests and replies. | |
1377 | */ | |
1378 | if (ti->ti_flags & TH_SYN) { | |
1379 | tp->t_flags |= TF_RCVD_TSTMP; | |
1380 | tp->ts_recent = *ts_val; | |
1381 | tp->ts_recent_age = tcp_now; | |
1382 | } | |
1383 | break; | |
8b5a83bb | 1384 | } |
5e74df82 | 1385 | } |
5e74df82 BJ |
1386 | } |
1387 | ||
b2db9217 BJ |
1388 | /* |
1389 | * Pull out of band byte out of a segment so | |
1390 | * it doesn't appear in the user's data queue. | |
1391 | * It is still reflected in the segment length for | |
1392 | * sequencing purposes. | |
1393 | */ | |
c46785cb | 1394 | void |
6cf1965b | 1395 | tcp_pulloutofband(so, ti, m) |
b2db9217 BJ |
1396 | struct socket *so; |
1397 | struct tcpiphdr *ti; | |
b2db9217 | 1398 | register struct mbuf *m; |
6cf1965b | 1399 | { |
1acff8ec | 1400 | int cnt = ti->ti_urp - 1; |
b2db9217 | 1401 | |
b2db9217 BJ |
1402 | while (cnt >= 0) { |
1403 | if (m->m_len > cnt) { | |
1404 | char *cp = mtod(m, caddr_t) + cnt; | |
1405 | struct tcpcb *tp = sototcpcb(so); | |
1406 | ||
1407 | tp->t_iobc = *cp; | |
1408 | tp->t_oobflags |= TCPOOB_HAVEDATA; | |
668cc26d | 1409 | bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); |
b2db9217 BJ |
1410 | m->m_len--; |
1411 | return; | |
1412 | } | |
1413 | cnt -= m->m_len; | |
1414 | m = m->m_next; | |
1415 | if (m == 0) | |
1416 | break; | |
1417 | } | |
1418 | panic("tcp_pulloutofband"); | |
1419 | } | |
1420 | ||
99578149 | 1421 | /* |
6cf1965b MK |
1422 | * Collect new round-trip time estimate |
1423 | * and update averages and current timeout. | |
99578149 | 1424 | */ |
c46785cb | 1425 | void |
69d96ae2 | 1426 | tcp_xmit_timer(tp, rtt) |
c2a1cd2c | 1427 | register struct tcpcb *tp; |
c46785cb | 1428 | short rtt; |
6cf1965b MK |
1429 | { |
1430 | register short delta; | |
1431 | ||
1432 | tcpstat.tcps_rttupdated++; | |
1433 | if (tp->t_srtt != 0) { | |
1434 | /* | |
1435 | * srtt is stored as fixed point with 3 bits after the | |
1436 | * binary point (i.e., scaled by 8). The following magic | |
1437 | * is equivalent to the smoothing algorithm in rfc793 with | |
1438 | * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed | |
69d96ae2 | 1439 | * point). Adjust rtt to origin 0. |
6cf1965b | 1440 | */ |
69d96ae2 | 1441 | delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); |
6cf1965b MK |
1442 | if ((tp->t_srtt += delta) <= 0) |
1443 | tp->t_srtt = 1; | |
1444 | /* | |
1445 | * We accumulate a smoothed rtt variance (actually, a | |
1446 | * smoothed mean difference), then set the retransmit | |
1447 | * timer to smoothed rtt + 4 times the smoothed variance. | |
1448 | * rttvar is stored as fixed point with 2 bits after the | |
1449 | * binary point (scaled by 4). The following is | |
1450 | * equivalent to rfc793 smoothing with an alpha of .75 | |
1451 | * (rttvar = rttvar*3/4 + |delta| / 4). This replaces | |
1452 | * rfc793's wired-in beta. | |
1453 | */ | |
1454 | if (delta < 0) | |
1455 | delta = -delta; | |
1456 | delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); | |
1457 | if ((tp->t_rttvar += delta) <= 0) | |
1458 | tp->t_rttvar = 1; | |
1459 | } else { | |
1460 | /* | |
1461 | * No rtt measurement yet - use the unsmoothed rtt. | |
1462 | * Set the variance to half the rtt (so our first | |
22b91e81 | 1463 | * retransmit happens at 3*rtt). |
6cf1965b | 1464 | */ |
69d96ae2 AC |
1465 | tp->t_srtt = rtt << TCP_RTT_SHIFT; |
1466 | tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); | |
6cf1965b MK |
1467 | } |
1468 | tp->t_rtt = 0; | |
1469 | tp->t_rxtshift = 0; | |
1470 | ||
1471 | /* | |
1472 | * the retransmit should happen at rtt + 4 * rttvar. | |
1473 | * Because of the way we do the smoothing, srtt and rttvar | |
1474 | * will each average +1/2 tick of bias. When we compute | |
1475 | * the retransmit timer, we want 1/2 tick of rounding and | |
1476 | * 1 extra tick because of +-1/2 tick uncertainty in the | |
1477 | * firing of the timer. The bias will give us exactly the | |
1478 | * 1.5 tick we need. But, because the bias is | |
1479 | * statistical, we have to test that we don't drop below | |
1480 | * the minimum feasible timer (which is 2 ticks). | |
1481 | */ | |
1482 | TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), | |
1483 | tp->t_rttmin, TCPTV_REXMTMAX); | |
1484 | ||
1485 | /* | |
1486 | * We received an ack for a packet that wasn't retransmitted; | |
1487 | * it is probably safe to discard any error indications we've | |
1488 | * received recently. This isn't quite right, but close enough | |
1489 | * for now (a route might have failed after we sent a segment, | |
1490 | * and the return path might not be symmetrical). | |
1491 | */ | |
1492 | tp->t_softerror = 0; | |
1493 | } | |
1494 | ||
1495 | /* | |
1496 | * Determine a reasonable value for maxseg size. | |
1497 | * If the route is known, check route for mtu. | |
1498 | * If none, use an mss that can be handled on the outgoing | |
1499 | * interface without forcing IP to fragment; if bigger than | |
1500 | * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES | |
1501 | * to utilize large mbufs. If no route is found, route has no mtu, | |
1502 | * or the destination isn't local, use a default, hopefully conservative | |
1503 | * size (usually 512 or the default IP max size, but no more than the mtu | |
1504 | * of the interface), as we can't discover anything about intervening | |
1505 | * gateways or networks. We also initialize the congestion/slow start | |
1506 | * window to be a single segment if the destination isn't local. | |
1507 | * While looking at the routing entry, we also initialize other path-dependent | |
1508 | * parameters from pre-set or cached values in the routing entry. | |
1509 | */ | |
c46785cb | 1510 | int |
6cf1965b MK |
1511 | tcp_mss(tp, offer) |
1512 | register struct tcpcb *tp; | |
1513 | u_short offer; | |
99578149 MK |
1514 | { |
1515 | struct route *ro; | |
6cf1965b | 1516 | register struct rtentry *rt; |
99578149 | 1517 | struct ifnet *ifp; |
6cf1965b MK |
1518 | register int rtt, mss; |
1519 | u_long bufsize; | |
99578149 | 1520 | struct inpcb *inp; |
6cf1965b MK |
1521 | struct socket *so; |
1522 | extern int tcp_mssdflt, tcp_rttdflt; | |
99578149 MK |
1523 | |
1524 | inp = tp->t_inpcb; | |
1525 | ro = &inp->inp_route; | |
6cf1965b MK |
1526 | |
1527 | if ((rt = ro->ro_rt) == (struct rtentry *)0) { | |
99578149 MK |
1528 | /* No route yet, so try to acquire one */ |
1529 | if (inp->inp_faddr.s_addr != INADDR_ANY) { | |
1530 | ro->ro_dst.sa_family = AF_INET; | |
0af8f6fc | 1531 | ro->ro_dst.sa_len = sizeof(ro->ro_dst); |
99578149 MK |
1532 | ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = |
1533 | inp->inp_faddr; | |
1534 | rtalloc(ro); | |
1535 | } | |
6cf1965b | 1536 | if ((rt = ro->ro_rt) == (struct rtentry *)0) |
22e026ed | 1537 | return (tcp_mssdflt); |
99578149 | 1538 | } |
6cf1965b MK |
1539 | ifp = rt->rt_ifp; |
1540 | so = inp->inp_socket; | |
99578149 | 1541 | |
6cf1965b MK |
1542 | #ifdef RTV_MTU /* if route characteristics exist ... */ |
1543 | /* | |
1544 | * While we're here, check if there's an initial rtt | |
1545 | * or rttvar. Convert from the route-table units | |
1546 | * to scaled multiples of the slow timeout timer. | |
1547 | */ | |
1548 | if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { | |
22b91e81 MK |
1549 | /* |
1550 | * XXX the lock bit for MTU indicates that the value | |
1551 | * is also a minimum value; this is subject to time. | |
1552 | */ | |
1553 | if (rt->rt_rmx.rmx_locks & RTV_RTT) | |
6cf1965b MK |
1554 | tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ); |
1555 | tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); | |
1556 | if (rt->rt_rmx.rmx_rttvar) | |
1557 | tp->t_rttvar = rt->rt_rmx.rmx_rttvar / | |
1558 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); | |
1559 | else | |
1560 | /* default variation is +- 1 rtt */ | |
1561 | tp->t_rttvar = | |
1562 | tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; | |
1563 | TCPT_RANGESET(tp->t_rxtcur, | |
1564 | ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, | |
1565 | tp->t_rttmin, TCPTV_REXMTMAX); | |
1566 | } | |
1567 | /* | |
1568 | * if there's an mtu associated with the route, use it | |
1569 | */ | |
1570 | if (rt->rt_rmx.rmx_mtu) | |
22e026ed | 1571 | mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); |
6cf1965b MK |
1572 | else |
1573 | #endif /* RTV_MTU */ | |
1574 | { | |
22e026ed | 1575 | mss = ifp->if_mtu - sizeof(struct tcpiphdr); |
7cc62c26 | 1576 | #if (MCLBYTES & (MCLBYTES - 1)) == 0 |
6cf1965b MK |
1577 | if (mss > MCLBYTES) |
1578 | mss &= ~(MCLBYTES-1); | |
99578149 | 1579 | #else |
6cf1965b MK |
1580 | if (mss > MCLBYTES) |
1581 | mss = mss / MCLBYTES * MCLBYTES; | |
99578149 | 1582 | #endif |
6cf1965b | 1583 | if (!in_localaddr(inp->inp_faddr)) |
22e026ed | 1584 | mss = min(mss, tcp_mssdflt); |
6cf1965b MK |
1585 | } |
1586 | /* | |
1587 | * The current mss, t_maxseg, is initialized to the default value. | |
1588 | * If we compute a smaller value, reduce the current mss. | |
1589 | * If we compute a larger value, return it for use in sending | |
1590 | * a max seg size option, but don't store it for use | |
1591 | * unless we received an offer at least that large from peer. | |
1592 | * However, do not accept offers under 32 bytes. | |
1593 | */ | |
1594 | if (offer) | |
1595 | mss = min(mss, offer); | |
1596 | mss = max(mss, 32); /* sanity */ | |
1597 | if (mss < tp->t_maxseg || offer != 0) { | |
1598 | /* | |
1599 | * If there's a pipesize, change the socket buffer | |
1600 | * to that size. Make the socket buffers an integral | |
1601 | * number of mss units; if the mss is larger than | |
1602 | * the socket buffer, decrease the mss. | |
1603 | */ | |
1604 | #ifdef RTV_SPIPE | |
1605 | if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) | |
1606 | #endif | |
1607 | bufsize = so->so_snd.sb_hiwat; | |
1608 | if (bufsize < mss) | |
1609 | mss = bufsize; | |
1610 | else { | |
78bce952 | 1611 | bufsize = roundup(bufsize, mss); |
69d96ae2 AC |
1612 | if (bufsize > sb_max) |
1613 | bufsize = sb_max; | |
78bce952 | 1614 | (void)sbreserve(&so->so_snd, bufsize); |
6cf1965b MK |
1615 | } |
1616 | tp->t_maxseg = mss; | |
386369f5 | 1617 | |
6cf1965b MK |
1618 | #ifdef RTV_RPIPE |
1619 | if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) | |
1620 | #endif | |
1621 | bufsize = so->so_rcv.sb_hiwat; | |
1622 | if (bufsize > mss) { | |
78bce952 | 1623 | bufsize = roundup(bufsize, mss); |
69d96ae2 AC |
1624 | if (bufsize > sb_max) |
1625 | bufsize = sb_max; | |
78bce952 | 1626 | (void)sbreserve(&so->so_rcv, bufsize); |
6cf1965b MK |
1627 | } |
1628 | } | |
a6bbda13 | 1629 | tp->snd_cwnd = mss; |
6cf1965b MK |
1630 | |
1631 | #ifdef RTV_SSTHRESH | |
1632 | if (rt->rt_rmx.rmx_ssthresh) { | |
1633 | /* | |
1634 | * There's some sort of gateway or interface | |
1635 | * buffer limit on the path. Use this to set | |
1636 | * the slow start threshhold, but set the | |
1637 | * threshold to no less than 2*mss. | |
1638 | */ | |
1639 | tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); | |
1640 | } | |
1641 | #endif /* RTV_MTU */ | |
a6bbda13 | 1642 | return (mss); |
99578149 | 1643 | } |
55ad7758 | 1644 | #endif /* TUBA_INCLUDE */ |
9d866d2f MK |
1645 | |
1646 | #if BSD<43 | |
1647 | /* XXX this belongs in netinet/in.c */ | |
1648 | in_localaddr(in) | |
1649 | struct in_addr in; | |
1650 | { | |
1651 | register u_long i = ntohl(in.s_addr); | |
1652 | register struct ifnet *ifp; | |
1653 | register struct sockaddr_in *sin; | |
1654 | register u_long mask; | |
1655 | ||
1656 | if (IN_CLASSA(i)) | |
1657 | mask = IN_CLASSA_NET; | |
1658 | else if (IN_CLASSB(i)) | |
1659 | mask = IN_CLASSB_NET; | |
1660 | else if (IN_CLASSC(i)) | |
1661 | mask = IN_CLASSC_NET; | |
1662 | else | |
1663 | return (0); | |
1664 | ||
1665 | i &= mask; | |
1666 | for (ifp = ifnet; ifp; ifp = ifp->if_next) { | |
1667 | if (ifp->if_addr.sa_family != AF_INET) | |
1668 | continue; | |
1669 | sin = (struct sockaddr_in *)&ifp->if_addr; | |
1670 | if ((sin->sin_addr.s_addr & mask) == i) | |
1671 | return (1); | |
1672 | } | |
1673 | return (0); | |
1674 | } | |
1675 | #endif |