Commit | Line | Data |
---|---|---|
8ae0e4b4 | 1 | /* |
6cf1965b | 2 | * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. |
2b6b6284 | 3 | * All rights reserved. |
8ae0e4b4 | 4 | * |
dbf0c423 | 5 | * %sccs.include.redist.c% |
2b6b6284 | 6 | * |
29314731 | 7 | * @(#)tcp_input.c 7.25 (Berkeley) %G% |
8ae0e4b4 | 8 | */ |
87e78f19 | 9 | |
20666ad3 JB |
10 | #include "param.h" |
11 | #include "systm.h" | |
9d91b170 | 12 | #include "malloc.h" |
20666ad3 JB |
13 | #include "mbuf.h" |
14 | #include "protosw.h" | |
15 | #include "socket.h" | |
16 | #include "socketvar.h" | |
17 | #include "errno.h" | |
f4d55810 SL |
18 | |
19 | #include "../net/if.h" | |
c124e997 | 20 | #include "../net/route.h" |
f4d55810 | 21 | |
20666ad3 | 22 | #include "in.h" |
20666ad3 JB |
23 | #include "in_systm.h" |
24 | #include "ip.h" | |
2b1c4192 | 25 | #include "in_pcb.h" |
20666ad3 JB |
26 | #include "ip_var.h" |
27 | #include "tcp.h" | |
28 | #include "tcp_fsm.h" | |
29 | #include "tcp_seq.h" | |
30 | #include "tcp_timer.h" | |
31 | #include "tcp_var.h" | |
32 | #include "tcpip.h" | |
33 | #include "tcp_debug.h" | |
87e78f19 | 34 | |
386369f5 | 35 | int tcprexmtthresh = 3; |
6cf1965b MK |
36 | int tcppredack; /* XXX debugging: times hdr predict ok for acks */ |
37 | int tcppreddat; /* XXX # times header prediction ok for data packets */ | |
38 | int tcppcbcachemiss; | |
4b935108 | 39 | struct tcpiphdr tcp_saveti; |
6cf1965b | 40 | struct inpcb *tcp_last_inpcb = &tcb; |
87e78f19 | 41 | |
4b935108 | 42 | struct tcpcb *tcp_newtcpcb(); |
a17510f3 MK |
43 | |
44 | /* | |
45 | * Insert segment ti into reassembly queue of tcp with | |
46 | * control block tp. Return TH_FIN if reassembly now includes | |
47 | * a segment with FIN. The macro form does the common case inline | |
48 | * (segment is the next to be received on an established connection, | |
49 | * and the queue is empty), avoiding linkage into and removal | |
50 | * from the queue and repetition of various conversions. | |
9e4788e4 MK |
51 | * Set DELACK for segments received in order, but ack immediately |
52 | * when segments are out of order (so fast retransmit can work). | |
a17510f3 MK |
53 | */ |
54 | #define TCP_REASS(tp, ti, m, so, flags) { \ | |
55 | if ((ti)->ti_seq == (tp)->rcv_nxt && \ | |
56 | (tp)->seg_next == (struct tcpiphdr *)(tp) && \ | |
57 | (tp)->t_state == TCPS_ESTABLISHED) { \ | |
9e4788e4 | 58 | tp->t_flags |= TF_DELACK; \ |
a17510f3 MK |
59 | (tp)->rcv_nxt += (ti)->ti_len; \ |
60 | flags = (ti)->ti_flags & TH_FIN; \ | |
96c50630 MK |
61 | tcpstat.tcps_rcvpack++;\ |
62 | tcpstat.tcps_rcvbyte += (ti)->ti_len;\ | |
a17510f3 MK |
63 | sbappend(&(so)->so_rcv, (m)); \ |
64 | sorwakeup(so); \ | |
9e4788e4 | 65 | } else { \ |
6cf1965b | 66 | (flags) = tcp_reass((tp), (ti), (m)); \ |
9e4788e4 MK |
67 | tp->t_flags |= TF_ACKNOW; \ |
68 | } \ | |
a17510f3 MK |
69 | } |
70 | ||
6cf1965b | 71 | tcp_reass(tp, ti, m) |
a17510f3 MK |
72 | register struct tcpcb *tp; |
73 | register struct tcpiphdr *ti; | |
6cf1965b | 74 | struct mbuf *m; |
a17510f3 MK |
75 | { |
76 | register struct tcpiphdr *q; | |
77 | struct socket *so = tp->t_inpcb->inp_socket; | |
a17510f3 MK |
78 | int flags; |
79 | ||
80 | /* | |
81 | * Call with ti==0 after become established to | |
82 | * force pre-ESTABLISHED data up to user socket. | |
83 | */ | |
84 | if (ti == 0) | |
85 | goto present; | |
86 | ||
87 | /* | |
88 | * Find a segment which begins after this one does. | |
89 | */ | |
90 | for (q = tp->seg_next; q != (struct tcpiphdr *)tp; | |
91 | q = (struct tcpiphdr *)q->ti_next) | |
92 | if (SEQ_GT(q->ti_seq, ti->ti_seq)) | |
93 | break; | |
94 | ||
95 | /* | |
96 | * If there is a preceding segment, it may provide some of | |
97 | * our data already. If so, drop the data from the incoming | |
98 | * segment. If it provides all of our data, drop us. | |
99 | */ | |
100 | if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { | |
101 | register int i; | |
102 | q = (struct tcpiphdr *)q->ti_prev; | |
103 | /* conversion to int (in i) handles seq wraparound */ | |
104 | i = q->ti_seq + q->ti_len - ti->ti_seq; | |
105 | if (i > 0) { | |
96c50630 MK |
106 | if (i >= ti->ti_len) { |
107 | tcpstat.tcps_rcvduppack++; | |
108 | tcpstat.tcps_rcvdupbyte += ti->ti_len; | |
6cf1965b MK |
109 | m_freem(m); |
110 | return (0); | |
96c50630 | 111 | } |
6cf1965b | 112 | m_adj(m, i); |
a17510f3 MK |
113 | ti->ti_len -= i; |
114 | ti->ti_seq += i; | |
115 | } | |
116 | q = (struct tcpiphdr *)(q->ti_next); | |
117 | } | |
96c50630 MK |
118 | tcpstat.tcps_rcvoopack++; |
119 | tcpstat.tcps_rcvoobyte += ti->ti_len; | |
6cf1965b | 120 | REASS_MBUF(ti) = m; /* XXX */ |
a17510f3 MK |
121 | |
122 | /* | |
123 | * While we overlap succeeding segments trim them or, | |
124 | * if they are completely covered, dequeue them. | |
125 | */ | |
126 | while (q != (struct tcpiphdr *)tp) { | |
127 | register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; | |
128 | if (i <= 0) | |
129 | break; | |
130 | if (i < q->ti_len) { | |
131 | q->ti_seq += i; | |
132 | q->ti_len -= i; | |
6cf1965b | 133 | m_adj(REASS_MBUF(q), i); |
a17510f3 MK |
134 | break; |
135 | } | |
136 | q = (struct tcpiphdr *)q->ti_next; | |
6cf1965b | 137 | m = REASS_MBUF((struct tcpiphdr *)q->ti_prev); |
a17510f3 MK |
138 | remque(q->ti_prev); |
139 | m_freem(m); | |
140 | } | |
141 | ||
142 | /* | |
143 | * Stick new segment in its place. | |
144 | */ | |
145 | insque(ti, q->ti_prev); | |
146 | ||
147 | present: | |
148 | /* | |
149 | * Present data to user, advancing rcv_nxt through | |
150 | * completed sequence space. | |
151 | */ | |
152 | if (TCPS_HAVERCVDSYN(tp->t_state) == 0) | |
153 | return (0); | |
154 | ti = tp->seg_next; | |
155 | if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) | |
156 | return (0); | |
157 | if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) | |
158 | return (0); | |
159 | do { | |
160 | tp->rcv_nxt += ti->ti_len; | |
161 | flags = ti->ti_flags & TH_FIN; | |
162 | remque(ti); | |
6cf1965b | 163 | m = REASS_MBUF(ti); |
a17510f3 MK |
164 | ti = (struct tcpiphdr *)ti->ti_next; |
165 | if (so->so_state & SS_CANTRCVMORE) | |
166 | m_freem(m); | |
167 | else | |
168 | sbappend(&so->so_rcv, m); | |
169 | } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); | |
170 | sorwakeup(so); | |
171 | return (flags); | |
a17510f3 MK |
172 | } |
173 | ||
2ff61f9d BJ |
174 | /* |
175 | * TCP input routine, follows pages 65-76 of the | |
176 | * protocol specification dated September, 1981 very closely. | |
177 | */ | |
9d91b170 MK |
178 | tcp_input(m, iphlen) |
179 | register struct mbuf *m; | |
180 | int iphlen; | |
87e78f19 | 181 | { |
2b4b57cd | 182 | register struct tcpiphdr *ti; |
6cf1965b | 183 | register struct inpcb *inp; |
8b5a83bb | 184 | struct mbuf *om = 0; |
2b4b57cd | 185 | int len, tlen, off; |
8e65fd66 | 186 | register struct tcpcb *tp = 0; |
2b4b57cd | 187 | register int tiflags; |
d52566dd | 188 | struct socket *so; |
4859921b | 189 | int todrop, acked, ourfinisacked, needoutput = 0; |
4b935108 | 190 | short ostate; |
ebcadd38 | 191 | struct in_addr laddr; |
7aa16f99 | 192 | int dropsocket = 0; |
96c50630 | 193 | int iss = 0; |
87e78f19 | 194 | |
96c50630 | 195 | tcpstat.tcps_rcvtotal++; |
87e78f19 | 196 | /* |
4aed14e3 BJ |
197 | * Get IP and TCP header together in first mbuf. |
198 | * Note: IP leaves IP header in first mbuf. | |
87e78f19 | 199 | */ |
20790db4 | 200 | ti = mtod(m, struct tcpiphdr *); |
9d91b170 MK |
201 | if (iphlen > sizeof (struct ip)) |
202 | ip_stripoptions(m, (struct mbuf *)0); | |
6cf1965b | 203 | if (m->m_len < sizeof (struct tcpiphdr)) { |
6703c41f | 204 | if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { |
96c50630 | 205 | tcpstat.tcps_rcvshort++; |
6703c41f | 206 | return; |
8a13b737 BJ |
207 | } |
208 | ti = mtod(m, struct tcpiphdr *); | |
209 | } | |
87e78f19 | 210 | |
2b4b57cd | 211 | /* |
4aed14e3 | 212 | * Checksum extended TCP header and data. |
2b4b57cd BJ |
213 | */ |
214 | tlen = ((struct ip *)ti)->ip_len; | |
215 | len = sizeof (struct ip) + tlen; | |
9d91b170 MK |
216 | ti->ti_next = ti->ti_prev = 0; |
217 | ti->ti_x1 = 0; | |
218 | ti->ti_len = (u_short)tlen; | |
6cf1965b | 219 | HTONS(ti->ti_len); |
9d91b170 | 220 | if (ti->ti_sum = in_cksum(m, len)) { |
9d91b170 MK |
221 | tcpstat.tcps_rcvbadsum++; |
222 | goto drop; | |
87e78f19 BJ |
223 | } |
224 | ||
225 | /* | |
4aed14e3 | 226 | * Check that TCP offset makes sense, |
6cf1965b | 227 | * pull out TCP options and adjust length. XXX |
87e78f19 | 228 | */ |
2b4b57cd | 229 | off = ti->ti_off << 2; |
4b6b94ca | 230 | if (off < sizeof (struct tcphdr) || off > tlen) { |
96c50630 | 231 | tcpstat.tcps_rcvbadoff++; |
8a13b737 | 232 | goto drop; |
2b4b57cd | 233 | } |
1e977657 BJ |
234 | tlen -= off; |
235 | ti->ti_len = tlen; | |
8b5a83bb | 236 | if (off > sizeof (struct tcphdr)) { |
a17510f3 MK |
237 | if (m->m_len < sizeof(struct ip) + off) { |
238 | if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { | |
96c50630 | 239 | tcpstat.tcps_rcvshort++; |
a17510f3 MK |
240 | return; |
241 | } | |
242 | ti = mtod(m, struct tcpiphdr *); | |
8b5a83bb | 243 | } |
cce93e4b | 244 | om = m_get(M_DONTWAIT, MT_DATA); |
8b5a83bb BJ |
245 | if (om == 0) |
246 | goto drop; | |
8b5a83bb BJ |
247 | om->m_len = off - sizeof (struct tcphdr); |
248 | { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr); | |
668cc26d | 249 | bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len); |
8b5a83bb | 250 | m->m_len -= om->m_len; |
9d91b170 | 251 | m->m_pkthdr.len -= om->m_len; |
668cc26d SL |
252 | bcopy(op+om->m_len, op, |
253 | (unsigned)(m->m_len-sizeof (struct tcpiphdr))); | |
8b5a83bb BJ |
254 | } |
255 | } | |
2ff61f9d | 256 | tiflags = ti->ti_flags; |
2b4b57cd | 257 | |
8a13b737 | 258 | /* |
4aed14e3 | 259 | * Convert TCP protocol specific fields to host format. |
8a13b737 | 260 | */ |
6cf1965b MK |
261 | NTOHL(ti->ti_seq); |
262 | NTOHL(ti->ti_ack); | |
263 | NTOHS(ti->ti_win); | |
264 | NTOHS(ti->ti_urp); | |
8a13b737 | 265 | |
2b4b57cd | 266 | /* |
8075bb0e | 267 | * Locate pcb for segment. |
2b4b57cd | 268 | */ |
96c50630 | 269 | findpcb: |
6cf1965b MK |
270 | inp = tcp_last_inpcb; |
271 | if (inp->inp_lport != ti->ti_dport || | |
272 | inp->inp_fport != ti->ti_sport || | |
273 | inp->inp_faddr.s_addr != ti->ti_src.s_addr || | |
274 | inp->inp_laddr.s_addr != ti->ti_dst.s_addr) { | |
275 | inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport, | |
276 | ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD); | |
277 | if (inp) | |
278 | tcp_last_inpcb = inp; | |
279 | ++tcppcbcachemiss; | |
280 | } | |
2ff61f9d BJ |
281 | |
282 | /* | |
283 | * If the state is CLOSED (i.e., TCB does not exist) then | |
4aed14e3 | 284 | * all data in the incoming segment is discarded. |
386369f5 MK |
285 | * If the TCB exists but is in CLOSED state, it is embryonic, |
286 | * but should either do a listen or a connect soon. | |
2ff61f9d | 287 | */ |
22856bb8 | 288 | if (inp == 0) |
8a13b737 | 289 | goto dropwithreset; |
2ff61f9d | 290 | tp = intotcpcb(inp); |
22856bb8 | 291 | if (tp == 0) |
8a13b737 | 292 | goto dropwithreset; |
386369f5 MK |
293 | if (tp->t_state == TCPS_CLOSED) |
294 | goto drop; | |
f1b2fa5b | 295 | so = inp->inp_socket; |
6cf1965b MK |
296 | if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { |
297 | if (so->so_options & SO_DEBUG) { | |
298 | ostate = tp->t_state; | |
299 | tcp_saveti = *ti; | |
300 | } | |
301 | if (so->so_options & SO_ACCEPTCONN) { | |
302 | so = sonewconn(so, 0); | |
303 | if (so == 0) | |
304 | goto drop; | |
305 | /* | |
306 | * This is ugly, but .... | |
307 | * | |
308 | * Mark socket as temporary until we're | |
309 | * committed to keeping it. The code at | |
310 | * ``drop'' and ``dropwithreset'' check the | |
311 | * flag dropsocket to see if the temporary | |
312 | * socket created here should be discarded. | |
313 | * We mark the socket as discardable until | |
314 | * we're committed to it below in TCPS_LISTEN. | |
315 | */ | |
316 | dropsocket++; | |
317 | inp = (struct inpcb *)so->so_pcb; | |
318 | inp->inp_laddr = ti->ti_dst; | |
319 | inp->inp_lport = ti->ti_dport; | |
9d866d2f | 320 | #if BSD>=43 |
6cf1965b | 321 | inp->inp_options = ip_srcroute(); |
9d866d2f | 322 | #endif |
6cf1965b MK |
323 | tp = intotcpcb(inp); |
324 | tp->t_state = TCPS_LISTEN; | |
325 | } | |
ebf42a75 | 326 | } |
87e78f19 | 327 | |
405c9168 BJ |
328 | /* |
329 | * Segment received on connection. | |
330 | * Reset idle time and keep-alive timer. | |
331 | */ | |
332 | tp->t_idle = 0; | |
8a36cf82 | 333 | tp->t_timer[TCPT_KEEP] = tcp_keepidle; |
405c9168 | 334 | |
8b5a83bb | 335 | /* |
99578149 MK |
336 | * Process options if not in LISTEN state, |
337 | * else do it below (after getting remote address). | |
8b5a83bb | 338 | */ |
99578149 MK |
339 | if (om && tp->t_state != TCPS_LISTEN) { |
340 | tcp_dooptions(tp, om, ti); | |
8b5a83bb BJ |
341 | om = 0; |
342 | } | |
6cf1965b MK |
343 | /* |
344 | * Header prediction: check for the two common cases | |
345 | * of a uni-directional data xfer. If the packet has | |
346 | * no control flags, is in-sequence, the window didn't | |
347 | * change and we're not retransmitting, it's a | |
348 | * candidate. If the length is zero and the ack moved | |
349 | * forward, we're the sender side of the xfer. Just | |
350 | * free the data acked & wake any higher level process | |
351 | * that was blocked waiting for space. If the length | |
352 | * is non-zero and the ack didn't move, we're the | |
353 | * receiver side. If we're getting packets in-order | |
354 | * (the reassembly queue is empty), add the data to | |
355 | * the socket buffer and note that we need a delayed ack. | |
356 | */ | |
357 | if (tp->t_state == TCPS_ESTABLISHED && | |
358 | (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && | |
359 | ti->ti_seq == tp->rcv_nxt && | |
360 | ti->ti_win && ti->ti_win == tp->snd_wnd && | |
361 | tp->snd_nxt == tp->snd_max) { | |
362 | if (ti->ti_len == 0) { | |
363 | if (SEQ_GT(ti->ti_ack, tp->snd_una) && | |
364 | SEQ_LEQ(ti->ti_ack, tp->snd_max) && | |
365 | tp->snd_cwnd >= tp->snd_wnd) { | |
366 | /* | |
367 | * this is a pure ack for outstanding data. | |
368 | */ | |
369 | ++tcppredack; | |
370 | if (tp->t_rtt && SEQ_GT(ti->ti_ack,tp->t_rtseq)) | |
371 | tcp_xmit_timer(tp); | |
372 | acked = ti->ti_ack - tp->snd_una; | |
373 | tcpstat.tcps_rcvackpack++; | |
374 | tcpstat.tcps_rcvackbyte += acked; | |
375 | sbdrop(&so->so_snd, acked); | |
376 | tp->snd_una = ti->ti_ack; | |
377 | m_freem(m); | |
378 | ||
379 | /* | |
380 | * If all outstanding data are acked, stop | |
381 | * retransmit timer, otherwise restart timer | |
382 | * using current (possibly backed-off) value. | |
383 | * If process is waiting for space, | |
384 | * wakeup/selwakeup/signal. If data | |
385 | * are ready to send, let tcp_output | |
386 | * decide between more output or persist. | |
387 | */ | |
388 | if (tp->snd_una == tp->snd_max) | |
389 | tp->t_timer[TCPT_REXMT] = 0; | |
390 | else if (tp->t_timer[TCPT_PERSIST] == 0) | |
391 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; | |
392 | ||
393 | if (so->so_snd.sb_flags & SB_NOTIFY) | |
394 | sowwakeup(so); | |
395 | if (so->so_snd.sb_cc) | |
396 | (void) tcp_output(tp); | |
397 | return; | |
398 | } | |
399 | } else if (ti->ti_ack == tp->snd_una && | |
400 | tp->seg_next == (struct tcpiphdr *)tp && | |
401 | ti->ti_len <= sbspace(&so->so_rcv)) { | |
402 | /* | |
403 | * this is a pure, in-sequence data packet | |
404 | * with nothing on the reassembly queue and | |
405 | * we have enough buffer space to take it. | |
406 | */ | |
407 | ++tcppreddat; | |
408 | tp->rcv_nxt += ti->ti_len; | |
409 | tcpstat.tcps_rcvpack++; | |
410 | tcpstat.tcps_rcvbyte += ti->ti_len; | |
411 | /* | |
412 | * Drop TCP and IP headers then add data | |
413 | * to socket buffer | |
414 | */ | |
415 | m->m_data += sizeof(struct tcpiphdr); | |
416 | m->m_len -= sizeof(struct tcpiphdr); | |
417 | sbappend(&so->so_rcv, m); | |
418 | sorwakeup(so); | |
419 | tp->t_flags |= TF_DELACK; | |
420 | return; | |
421 | } | |
422 | } | |
423 | ||
424 | /* | |
425 | * Drop TCP and IP headers; TCP options were dropped above. | |
426 | */ | |
427 | m->m_data += sizeof(struct tcpiphdr); | |
428 | m->m_len -= sizeof(struct tcpiphdr); | |
8b5a83bb | 429 | |
87e78f19 | 430 | /* |
8a13b737 BJ |
431 | * Calculate amount of space in receive window, |
432 | * and then do TCP input processing. | |
a17510f3 MK |
433 | * Receive window is amount of space in rcv queue, |
434 | * but not less than advertised window. | |
87e78f19 | 435 | */ |
bbaaf0fd MK |
436 | { int win; |
437 | ||
438 | win = sbspace(&so->so_rcv); | |
439 | if (win < 0) | |
440 | win = 0; | |
9d91b170 | 441 | tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); |
bbaaf0fd | 442 | } |
2ff61f9d | 443 | |
87e78f19 BJ |
444 | switch (tp->t_state) { |
445 | ||
2ff61f9d BJ |
446 | /* |
447 | * If the state is LISTEN then ignore segment if it contains an RST. | |
448 | * If the segment contains an ACK then it is bad and send a RST. | |
449 | * If it does not contain a SYN then it is not interesting; drop it. | |
224f3a72 | 450 | * Don't bother responding if the destination was a broadcast. |
8a13b737 | 451 | * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial |
2ff61f9d | 452 | * tp->iss, and send a segment: |
8a13b737 | 453 | * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> |
2ff61f9d BJ |
454 | * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. |
455 | * Fill in remote peer address fields if not previously specified. | |
456 | * Enter SYN_RECEIVED state, and process any other fields of this | |
4aed14e3 | 457 | * segment in this state. |
2ff61f9d | 458 | */ |
8075bb0e | 459 | case TCPS_LISTEN: { |
789d2a39 | 460 | struct mbuf *am; |
8075bb0e BJ |
461 | register struct sockaddr_in *sin; |
462 | ||
2ff61f9d BJ |
463 | if (tiflags & TH_RST) |
464 | goto drop; | |
22856bb8 | 465 | if (tiflags & TH_ACK) |
8a13b737 | 466 | goto dropwithreset; |
22856bb8 | 467 | if ((tiflags & TH_SYN) == 0) |
2ff61f9d | 468 | goto drop; |
9d91b170 | 469 | if (m->m_flags & M_BCAST) |
224f3a72 | 470 | goto drop; |
6cf1965b | 471 | am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */ |
789d2a39 SL |
472 | if (am == NULL) |
473 | goto drop; | |
474 | am->m_len = sizeof (struct sockaddr_in); | |
a8d3bf7f | 475 | sin = mtod(am, struct sockaddr_in *); |
8075bb0e | 476 | sin->sin_family = AF_INET; |
0af8f6fc | 477 | sin->sin_len = sizeof(*sin); |
8075bb0e BJ |
478 | sin->sin_addr = ti->ti_src; |
479 | sin->sin_port = ti->ti_sport; | |
ebcadd38 | 480 | laddr = inp->inp_laddr; |
789d2a39 | 481 | if (inp->inp_laddr.s_addr == INADDR_ANY) |
ebcadd38 | 482 | inp->inp_laddr = ti->ti_dst; |
a8d3bf7f | 483 | if (in_pcbconnect(inp, am)) { |
ebcadd38 | 484 | inp->inp_laddr = laddr; |
5a1f132a | 485 | (void) m_free(am); |
4aed14e3 | 486 | goto drop; |
ebcadd38 | 487 | } |
5a1f132a | 488 | (void) m_free(am); |
4aed14e3 BJ |
489 | tp->t_template = tcp_template(tp); |
490 | if (tp->t_template == 0) { | |
8011f5df | 491 | tp = tcp_drop(tp, ENOBUFS); |
a4f7ea71 | 492 | dropsocket = 0; /* socket is already gone */ |
4aed14e3 BJ |
493 | goto drop; |
494 | } | |
99578149 MK |
495 | if (om) { |
496 | tcp_dooptions(tp, om, ti); | |
497 | om = 0; | |
498 | } | |
96c50630 MK |
499 | if (iss) |
500 | tp->iss = iss; | |
501 | else | |
502 | tp->iss = tcp_iss; | |
503 | tcp_iss += TCP_ISSINCR/2; | |
2ff61f9d | 504 | tp->irs = ti->ti_seq; |
8a13b737 BJ |
505 | tcp_sendseqinit(tp); |
506 | tcp_rcvseqinit(tp); | |
bbaaf0fd | 507 | tp->t_flags |= TF_ACKNOW; |
2ff61f9d | 508 | tp->t_state = TCPS_SYN_RECEIVED; |
8a36cf82 | 509 | tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; |
7aa16f99 | 510 | dropsocket = 0; /* committed to socket */ |
96c50630 | 511 | tcpstat.tcps_accepts++; |
8a13b737 | 512 | goto trimthenstep6; |
8075bb0e | 513 | } |
87e78f19 | 514 | |
2ff61f9d BJ |
515 | /* |
516 | * If the state is SYN_SENT: | |
517 | * if seg contains an ACK, but not for our SYN, drop the input. | |
518 | * if seg contains a RST, then drop the connection. | |
519 | * if seg does not contain SYN, then drop it. | |
520 | * Otherwise this is an acceptable SYN segment | |
521 | * initialize tp->rcv_nxt and tp->irs | |
522 | * if seg contains ack then advance tp->snd_una | |
523 | * if SYN has been acked change to ESTABLISHED else SYN_RCVD state | |
524 | * arrange for segment to be acked (eventually) | |
525 | * continue processing rest of data/controls, beginning with URG | |
526 | */ | |
527 | case TCPS_SYN_SENT: | |
528 | if ((tiflags & TH_ACK) && | |
a17510f3 | 529 | (SEQ_LEQ(ti->ti_ack, tp->iss) || |
4b6b94ca | 530 | SEQ_GT(ti->ti_ack, tp->snd_max))) |
8a13b737 | 531 | goto dropwithreset; |
2ff61f9d | 532 | if (tiflags & TH_RST) { |
0e3936fa SL |
533 | if (tiflags & TH_ACK) |
534 | tp = tcp_drop(tp, ECONNREFUSED); | |
2ff61f9d | 535 | goto drop; |
87e78f19 | 536 | } |
2ff61f9d BJ |
537 | if ((tiflags & TH_SYN) == 0) |
538 | goto drop; | |
b57e9490 MK |
539 | if (tiflags & TH_ACK) { |
540 | tp->snd_una = ti->ti_ack; | |
541 | if (SEQ_LT(tp->snd_nxt, tp->snd_una)) | |
542 | tp->snd_nxt = tp->snd_una; | |
543 | } | |
4aed14e3 | 544 | tp->t_timer[TCPT_REXMT] = 0; |
2ff61f9d | 545 | tp->irs = ti->ti_seq; |
8a13b737 BJ |
546 | tcp_rcvseqinit(tp); |
547 | tp->t_flags |= TF_ACKNOW; | |
b57e9490 | 548 | if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { |
96c50630 | 549 | tcpstat.tcps_connects++; |
4aed14e3 | 550 | soisconnected(so); |
2ff61f9d | 551 | tp->t_state = TCPS_ESTABLISHED; |
6cf1965b MK |
552 | (void) tcp_reass(tp, (struct tcpiphdr *)0, |
553 | (struct mbuf *)0); | |
386369f5 MK |
554 | /* |
555 | * if we didn't have to retransmit the SYN, | |
556 | * use its rtt as our initial srtt & rtt var. | |
557 | */ | |
6cf1965b MK |
558 | if (tp->t_rtt) |
559 | tcp_xmit_timer(tp); | |
405c9168 | 560 | } else |
8a13b737 | 561 | tp->t_state = TCPS_SYN_RECEIVED; |
8a13b737 BJ |
562 | |
563 | trimthenstep6: | |
564 | /* | |
4b6b94ca | 565 | * Advance ti->ti_seq to correspond to first data byte. |
8a13b737 BJ |
566 | * If data, trim to stay within window, |
567 | * dropping FIN if necessary. | |
568 | */ | |
4b6b94ca | 569 | ti->ti_seq++; |
8a13b737 BJ |
570 | if (ti->ti_len > tp->rcv_wnd) { |
571 | todrop = ti->ti_len - tp->rcv_wnd; | |
9d866d2f | 572 | #if BSD>=43 |
8a13b737 | 573 | m_adj(m, -todrop); |
9d866d2f MK |
574 | #else |
575 | /* XXX work around 4.2 m_adj bug */ | |
576 | if (m->m_len) { | |
577 | m_adj(m, -todrop); | |
578 | } else { | |
579 | /* skip tcp/ip header in first mbuf */ | |
580 | m_adj(m->m_next, -todrop); | |
581 | } | |
582 | #endif | |
8a13b737 | 583 | ti->ti_len = tp->rcv_wnd; |
bbaaf0fd | 584 | tiflags &= ~TH_FIN; |
96c50630 MK |
585 | tcpstat.tcps_rcvpackafterwin++; |
586 | tcpstat.tcps_rcvbyteafterwin += todrop; | |
87e78f19 | 587 | } |
e832edbc | 588 | tp->snd_wl1 = ti->ti_seq - 1; |
bbaaf0fd | 589 | tp->rcv_up = ti->ti_seq; |
8a13b737 | 590 | goto step6; |
2ff61f9d | 591 | } |
87e78f19 | 592 | |
2ff61f9d BJ |
593 | /* |
594 | * States other than LISTEN or SYN_SENT. | |
595 | * First check that at least some bytes of segment are within | |
96c50630 MK |
596 | * receive window. If segment begins before rcv_nxt, |
597 | * drop leading data (and SYN); if nothing left, just ack. | |
2ff61f9d | 598 | */ |
96c50630 MK |
599 | todrop = tp->rcv_nxt - ti->ti_seq; |
600 | if (todrop > 0) { | |
601 | if (tiflags & TH_SYN) { | |
602 | tiflags &= ~TH_SYN; | |
603 | ti->ti_seq++; | |
604 | if (ti->ti_urp > 1) | |
605 | ti->ti_urp--; | |
606 | else | |
607 | tiflags &= ~TH_URG; | |
608 | todrop--; | |
609 | } | |
610 | if (todrop > ti->ti_len || | |
611 | todrop == ti->ti_len && (tiflags&TH_FIN) == 0) { | |
8a36cf82 MK |
612 | tcpstat.tcps_rcvduppack++; |
613 | tcpstat.tcps_rcvdupbyte += ti->ti_len; | |
39b02f3c | 614 | /* |
8a36cf82 MK |
615 | * If segment is just one to the left of the window, |
616 | * check two special cases: | |
617 | * 1. Don't toss RST in response to 4.2-style keepalive. | |
618 | * 2. If the only thing to drop is a FIN, we can drop | |
619 | * it, but check the ACK or we will get into FIN | |
620 | * wars if our FINs crossed (both CLOSING). | |
621 | * In either case, send ACK to resynchronize, | |
622 | * but keep on processing for RST or ACK. | |
39b02f3c | 623 | */ |
8a36cf82 MK |
624 | if ((tiflags & TH_FIN && todrop == ti->ti_len + 1) |
625 | #ifdef TCP_COMPAT_42 | |
626 | || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1) | |
39b02f3c | 627 | #endif |
8a36cf82 MK |
628 | ) { |
629 | todrop = ti->ti_len; | |
630 | tiflags &= ~TH_FIN; | |
631 | tp->t_flags |= TF_ACKNOW; | |
632 | } else | |
633 | goto dropafterack; | |
a6bbda13 MK |
634 | } else { |
635 | tcpstat.tcps_rcvpartduppack++; | |
636 | tcpstat.tcps_rcvpartdupbyte += todrop; | |
96c50630 | 637 | } |
96c50630 MK |
638 | m_adj(m, todrop); |
639 | ti->ti_seq += todrop; | |
640 | ti->ti_len -= todrop; | |
641 | if (ti->ti_urp > todrop) | |
642 | ti->ti_urp -= todrop; | |
643 | else { | |
644 | tiflags &= ~TH_URG; | |
645 | ti->ti_urp = 0; | |
646 | } | |
647 | } | |
648 | ||
b819e9ea | 649 | /* |
8a36cf82 | 650 | * If new data are received on a connection after the |
b819e9ea MK |
651 | * user processes are gone, then RST the other end. |
652 | */ | |
653 | if ((so->so_state & SS_NOFDREF) && | |
654 | tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { | |
655 | tp = tcp_close(tp); | |
656 | tcpstat.tcps_rcvafterclose++; | |
657 | goto dropwithreset; | |
658 | } | |
659 | ||
4f182c3f MK |
660 | /* |
661 | * If segment ends after window, drop trailing data | |
662 | * (and PUSH and FIN); if nothing left, just ACK. | |
663 | */ | |
664 | todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); | |
665 | if (todrop > 0) { | |
666 | tcpstat.tcps_rcvpackafterwin++; | |
667 | if (todrop >= ti->ti_len) { | |
96c50630 | 668 | tcpstat.tcps_rcvbyteafterwin += ti->ti_len; |
4f182c3f MK |
669 | /* |
670 | * If a new connection request is received | |
671 | * while in TIME_WAIT, drop the old connection | |
672 | * and start over if the sequence numbers | |
673 | * are above the previous ones. | |
674 | */ | |
675 | if (tiflags & TH_SYN && | |
676 | tp->t_state == TCPS_TIME_WAIT && | |
677 | SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { | |
678 | iss = tp->rcv_nxt + TCP_ISSINCR; | |
6cf1965b | 679 | tp = tcp_close(tp); |
4f182c3f | 680 | goto findpcb; |
96c50630 | 681 | } |
4f182c3f MK |
682 | /* |
683 | * If window is closed can only take segments at | |
684 | * window edge, and have to drop data and PUSH from | |
685 | * incoming segments. Continue processing, but | |
686 | * remember to ack. Otherwise, drop segment | |
687 | * and ack. | |
688 | */ | |
689 | if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { | |
690 | tp->t_flags |= TF_ACKNOW; | |
691 | tcpstat.tcps_rcvwinprobe++; | |
692 | } else | |
2ff61f9d | 693 | goto dropafterack; |
4f182c3f | 694 | } else |
96c50630 | 695 | tcpstat.tcps_rcvbyteafterwin += todrop; |
9d866d2f | 696 | #if BSD>=43 |
4f182c3f | 697 | m_adj(m, -todrop); |
9d866d2f MK |
698 | #else |
699 | /* XXX work around m_adj bug */ | |
700 | if (m->m_len) { | |
701 | m_adj(m, -todrop); | |
702 | } else { | |
703 | /* skip tcp/ip header in first mbuf */ | |
704 | m_adj(m->m_next, -todrop); | |
705 | } | |
706 | #endif | |
4f182c3f MK |
707 | ti->ti_len -= todrop; |
708 | tiflags &= ~(TH_PUSH|TH_FIN); | |
87e78f19 | 709 | } |
87e78f19 | 710 | |
87e78f19 | 711 | /* |
2ff61f9d BJ |
712 | * If the RST bit is set examine the state: |
713 | * SYN_RECEIVED STATE: | |
714 | * If passive open, return to LISTEN state. | |
715 | * If active open, inform user that connection was refused. | |
716 | * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: | |
717 | * Inform user that connection was reset, and close tcb. | |
718 | * CLOSING, LAST_ACK, TIME_WAIT STATES | |
719 | * Close the tcb. | |
87e78f19 | 720 | */ |
2ff61f9d | 721 | if (tiflags&TH_RST) switch (tp->t_state) { |
4b935108 | 722 | |
2ff61f9d | 723 | case TCPS_SYN_RECEIVED: |
8a36cf82 MK |
724 | so->so_error = ECONNREFUSED; |
725 | goto close; | |
2ff61f9d BJ |
726 | |
727 | case TCPS_ESTABLISHED: | |
728 | case TCPS_FIN_WAIT_1: | |
729 | case TCPS_FIN_WAIT_2: | |
730 | case TCPS_CLOSE_WAIT: | |
8a36cf82 MK |
731 | so->so_error = ECONNRESET; |
732 | close: | |
733 | tp->t_state = TCPS_CLOSED; | |
734 | tcpstat.tcps_drops++; | |
735 | tp = tcp_close(tp); | |
2ff61f9d BJ |
736 | goto drop; |
737 | ||
738 | case TCPS_CLOSING: | |
739 | case TCPS_LAST_ACK: | |
740 | case TCPS_TIME_WAIT: | |
0e3936fa | 741 | tp = tcp_close(tp); |
2ff61f9d | 742 | goto drop; |
87e78f19 | 743 | } |
87e78f19 BJ |
744 | |
745 | /* | |
2ff61f9d BJ |
746 | * If a SYN is in the window, then this is an |
747 | * error and we send an RST and drop the connection. | |
748 | */ | |
749 | if (tiflags & TH_SYN) { | |
0e3936fa | 750 | tp = tcp_drop(tp, ECONNRESET); |
8a13b737 | 751 | goto dropwithreset; |
2ff61f9d BJ |
752 | } |
753 | ||
754 | /* | |
755 | * If the ACK bit is off we drop the segment and return. | |
756 | */ | |
8a13b737 | 757 | if ((tiflags & TH_ACK) == 0) |
2ff61f9d BJ |
758 | goto drop; |
759 | ||
760 | /* | |
761 | * Ack processing. | |
87e78f19 | 762 | */ |
87e78f19 BJ |
763 | switch (tp->t_state) { |
764 | ||
2ff61f9d BJ |
765 | /* |
766 | * In SYN_RECEIVED state if the ack ACKs our SYN then enter | |
4859921b | 767 | * ESTABLISHED state and continue processing, otherwise |
2ff61f9d BJ |
768 | * send an RST. |
769 | */ | |
770 | case TCPS_SYN_RECEIVED: | |
8a13b737 | 771 | if (SEQ_GT(tp->snd_una, ti->ti_ack) || |
4b6b94ca | 772 | SEQ_GT(ti->ti_ack, tp->snd_max)) |
8a13b737 | 773 | goto dropwithreset; |
96c50630 | 774 | tcpstat.tcps_connects++; |
8a13b737 BJ |
775 | soisconnected(so); |
776 | tp->t_state = TCPS_ESTABLISHED; | |
6cf1965b | 777 | (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); |
4aed14e3 | 778 | tp->snd_wl1 = ti->ti_seq - 1; |
8a13b737 | 779 | /* fall into ... */ |
87e78f19 | 780 | |
2ff61f9d BJ |
781 | /* |
782 | * In ESTABLISHED state: drop duplicate ACKs; ACK out of range | |
783 | * ACKs. If the ack is in the range | |
4b6b94ca | 784 | * tp->snd_una < ti->ti_ack <= tp->snd_max |
2ff61f9d BJ |
785 | * then advance tp->snd_una to ti->ti_ack and drop |
786 | * data from the retransmission queue. If this ACK reflects | |
787 | * more up to date window information we update our window information. | |
788 | */ | |
789 | case TCPS_ESTABLISHED: | |
790 | case TCPS_FIN_WAIT_1: | |
791 | case TCPS_FIN_WAIT_2: | |
792 | case TCPS_CLOSE_WAIT: | |
793 | case TCPS_CLOSING: | |
4aed14e3 BJ |
794 | case TCPS_LAST_ACK: |
795 | case TCPS_TIME_WAIT: | |
8a13b737 | 796 | |
96c50630 | 797 | if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { |
386369f5 | 798 | if (ti->ti_len == 0 && ti->ti_win == tp->snd_wnd) { |
96c50630 | 799 | tcpstat.tcps_rcvdupack++; |
386369f5 | 800 | /* |
6cf1965b MK |
801 | * If we have outstanding data (other than |
802 | * a window probe), this is a completely | |
386369f5 MK |
803 | * duplicate ack (ie, window info didn't |
804 | * change), the ack is the biggest we've | |
805 | * seen and we've seen exactly our rexmt | |
806 | * threshhold of them, assume a packet | |
807 | * has been dropped and retransmit it. | |
808 | * Kludge snd_nxt & the congestion | |
809 | * window so we send only this one | |
6cf1965b MK |
810 | * packet. |
811 | * | |
812 | * We know we're losing at the current | |
813 | * window size so do congestion avoidance | |
814 | * (set ssthresh to half the current window | |
815 | * and pull our congestion window back to | |
816 | * the new ssthresh). | |
817 | * | |
818 | * Dup acks mean that packets have left the | |
819 | * network (they're now cached at the receiver) | |
820 | * so bump cwnd by the amount in the receiver | |
821 | * to keep a constant cwnd packets in the | |
822 | * network. | |
386369f5 MK |
823 | */ |
824 | if (tp->t_timer[TCPT_REXMT] == 0 || | |
825 | ti->ti_ack != tp->snd_una) | |
826 | tp->t_dupacks = 0; | |
827 | else if (++tp->t_dupacks == tcprexmtthresh) { | |
828 | tcp_seq onxt = tp->snd_nxt; | |
3c317835 | 829 | u_int win = |
9d91b170 | 830 | min(tp->snd_wnd, tp->snd_cwnd) / 2 / |
3c317835 MK |
831 | tp->t_maxseg; |
832 | ||
833 | if (win < 2) | |
834 | win = 2; | |
835 | tp->snd_ssthresh = win * tp->t_maxseg; | |
386369f5 MK |
836 | tp->t_timer[TCPT_REXMT] = 0; |
837 | tp->t_rtt = 0; | |
838 | tp->snd_nxt = ti->ti_ack; | |
839 | tp->snd_cwnd = tp->t_maxseg; | |
840 | (void) tcp_output(tp); | |
6cf1965b MK |
841 | tp->snd_cwnd = tp->snd_ssthresh + |
842 | tp->t_maxseg * tp->t_dupacks; | |
386369f5 MK |
843 | if (SEQ_GT(onxt, tp->snd_nxt)) |
844 | tp->snd_nxt = onxt; | |
845 | goto drop; | |
6cf1965b MK |
846 | } else if (tp->t_dupacks > tcprexmtthresh) { |
847 | tp->snd_cwnd += tp->t_maxseg; | |
848 | (void) tcp_output(tp); | |
849 | goto drop; | |
386369f5 MK |
850 | } |
851 | } else | |
852 | tp->t_dupacks = 0; | |
2ff61f9d | 853 | break; |
96c50630 | 854 | } |
6cf1965b MK |
855 | /* |
856 | * If the congestion window was inflated to account | |
857 | * for the other side's cached packets, retract it. | |
858 | */ | |
859 | if (tp->t_dupacks > tcprexmtthresh && | |
860 | tp->snd_cwnd > tp->snd_ssthresh) | |
861 | tp->snd_cwnd = tp->snd_ssthresh; | |
386369f5 | 862 | tp->t_dupacks = 0; |
96c50630 MK |
863 | if (SEQ_GT(ti->ti_ack, tp->snd_max)) { |
864 | tcpstat.tcps_rcvacktoomuch++; | |
2ff61f9d | 865 | goto dropafterack; |
96c50630 | 866 | } |
8a13b737 | 867 | acked = ti->ti_ack - tp->snd_una; |
96c50630 MK |
868 | tcpstat.tcps_rcvackpack++; |
869 | tcpstat.tcps_rcvackbyte += acked; | |
dd020fc8 BJ |
870 | |
871 | /* | |
872 | * If transmit timer is running and timed sequence | |
873 | * number was acked, update smoothed round trip time. | |
a6bbda13 MK |
874 | * Since we now have an rtt measurement, cancel the |
875 | * timer backoff (cf., Phil Karn's retransmit alg.). | |
876 | * Recompute the initial retransmit timer. | |
dd020fc8 | 877 | */ |
6cf1965b MK |
878 | if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) |
879 | tcp_xmit_timer(tp); | |
dd020fc8 | 880 | |
91039e49 MK |
881 | /* |
882 | * If all outstanding data is acked, stop retransmit | |
883 | * timer and remember to restart (more output or persist). | |
884 | * If there is more data to be acked, restart retransmit | |
a6bbda13 | 885 | * timer, using current (possibly backed-off) value. |
91039e49 MK |
886 | */ |
887 | if (ti->ti_ack == tp->snd_max) { | |
4aed14e3 | 888 | tp->t_timer[TCPT_REXMT] = 0; |
91039e49 | 889 | needoutput = 1; |
a6bbda13 MK |
890 | } else if (tp->t_timer[TCPT_PERSIST] == 0) |
891 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; | |
1e9621b8 | 892 | /* |
386369f5 MK |
893 | * When new data is acked, open the congestion window. |
894 | * If the window gives us less than ssthresh packets | |
895 | * in flight, open exponentially (maxseg per packet). | |
6cf1965b MK |
896 | * Otherwise open linearly: maxseg per window |
897 | * (maxseg^2 / cwnd per packet), plus a constant | |
898 | * fraction of a packet (maxseg/8) to help larger windows | |
899 | * open quickly enough. | |
1e9621b8 | 900 | */ |
386369f5 | 901 | { |
6cf1965b MK |
902 | register u_int cw = tp->snd_cwnd; |
903 | register u_int incr = tp->t_maxseg; | |
386369f5 | 904 | |
6cf1965b MK |
905 | if (cw > tp->snd_ssthresh) |
906 | incr = incr * incr / cw + incr / 8; | |
907 | tp->snd_cwnd = min(cw + incr, TCP_MAXWIN); | |
386369f5 | 908 | } |
6703c41f | 909 | if (acked > so->so_snd.sb_cc) { |
6703c41f | 910 | tp->snd_wnd -= so->so_snd.sb_cc; |
8011f5df | 911 | sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); |
4859921b | 912 | ourfinisacked = 1; |
6703c41f | 913 | } else { |
668cc26d | 914 | sbdrop(&so->so_snd, acked); |
6703c41f | 915 | tp->snd_wnd -= acked; |
4859921b | 916 | ourfinisacked = 0; |
6703c41f | 917 | } |
6cf1965b MK |
918 | if (so->so_snd.sb_flags & SB_NOTIFY) |
919 | sowwakeup(so); | |
4b6b94ca | 920 | tp->snd_una = ti->ti_ack; |
b8977237 BJ |
921 | if (SEQ_LT(tp->snd_nxt, tp->snd_una)) |
922 | tp->snd_nxt = tp->snd_una; | |
405c9168 | 923 | |
87e78f19 BJ |
924 | switch (tp->t_state) { |
925 | ||
2ff61f9d BJ |
926 | /* |
927 | * In FIN_WAIT_1 STATE in addition to the processing | |
928 | * for the ESTABLISHED state if our FIN is now acknowledged | |
8a13b737 | 929 | * then enter FIN_WAIT_2. |
2ff61f9d BJ |
930 | */ |
931 | case TCPS_FIN_WAIT_1: | |
fdae4427 BJ |
932 | if (ourfinisacked) { |
933 | /* | |
934 | * If we can't receive any more | |
935 | * data, then closing user can proceed. | |
a17510f3 MK |
936 | * Starting the timer is contrary to the |
937 | * specification, but if we don't get a FIN | |
938 | * we'll hang forever. | |
fdae4427 | 939 | */ |
a17510f3 | 940 | if (so->so_state & SS_CANTRCVMORE) { |
fdae4427 | 941 | soisdisconnected(so); |
8a36cf82 | 942 | tp->t_timer[TCPT_2MSL] = tcp_maxidle; |
a17510f3 | 943 | } |
8a13b737 | 944 | tp->t_state = TCPS_FIN_WAIT_2; |
fdae4427 | 945 | } |
87e78f19 BJ |
946 | break; |
947 | ||
2ff61f9d BJ |
948 | /* |
949 | * In CLOSING STATE in addition to the processing for | |
950 | * the ESTABLISHED state if the ACK acknowledges our FIN | |
951 | * then enter the TIME-WAIT state, otherwise ignore | |
952 | * the segment. | |
953 | */ | |
954 | case TCPS_CLOSING: | |
4aed14e3 | 955 | if (ourfinisacked) { |
2ff61f9d | 956 | tp->t_state = TCPS_TIME_WAIT; |
4aed14e3 BJ |
957 | tcp_canceltimers(tp); |
958 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; | |
959 | soisdisconnected(so); | |
960 | } | |
961 | break; | |
87e78f19 | 962 | |
2ff61f9d | 963 | /* |
e20bac9c MK |
964 | * In LAST_ACK, we may still be waiting for data to drain |
965 | * and/or to be acked, as well as for the ack of our FIN. | |
966 | * If our FIN is now acknowledged, delete the TCB, | |
967 | * enter the closed state and return. | |
2ff61f9d BJ |
968 | */ |
969 | case TCPS_LAST_ACK: | |
e20bac9c | 970 | if (ourfinisacked) { |
0e3936fa | 971 | tp = tcp_close(tp); |
e20bac9c MK |
972 | goto drop; |
973 | } | |
974 | break; | |
87e78f19 | 975 | |
2ff61f9d BJ |
976 | /* |
977 | * In TIME_WAIT state the only thing that should arrive | |
978 | * is a retransmission of the remote FIN. Acknowledge | |
979 | * it and restart the finack timer. | |
980 | */ | |
981 | case TCPS_TIME_WAIT: | |
405c9168 | 982 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
2ff61f9d | 983 | goto dropafterack; |
87e78f19 | 984 | } |
8a13b737 | 985 | } |
87e78f19 | 986 | |
2ff61f9d | 987 | step6: |
4aed14e3 BJ |
988 | /* |
989 | * Update window information. | |
bbaaf0fd | 990 | * Don't look at window if no ACK: TAC's send garbage on first SYN. |
4aed14e3 | 991 | */ |
bbaaf0fd MK |
992 | if ((tiflags & TH_ACK) && |
993 | (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && | |
8e65fd66 | 994 | (SEQ_LT(tp->snd_wl2, ti->ti_ack) || |
bbaaf0fd | 995 | tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd))) { |
96c50630 MK |
996 | /* keep track of pure window updates */ |
997 | if (ti->ti_len == 0 && | |
386369f5 | 998 | tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd) |
96c50630 | 999 | tcpstat.tcps_rcvwinupd++; |
4aed14e3 BJ |
1000 | tp->snd_wnd = ti->ti_win; |
1001 | tp->snd_wl1 = ti->ti_seq; | |
1002 | tp->snd_wl2 = ti->ti_ack; | |
18a438b6 MK |
1003 | if (tp->snd_wnd > tp->max_sndwnd) |
1004 | tp->max_sndwnd = tp->snd_wnd; | |
91039e49 MK |
1005 | needoutput = 1; |
1006 | } | |
4aed14e3 | 1007 | |
2ff61f9d | 1008 | /* |
b2db9217 | 1009 | * Process segments with URG. |
2ff61f9d | 1010 | */ |
9c811062 BJ |
1011 | if ((tiflags & TH_URG) && ti->ti_urp && |
1012 | TCPS_HAVERCVDFIN(tp->t_state) == 0) { | |
f4be5024 | 1013 | /* |
bbaaf0fd | 1014 | * This is a kludge, but if we receive and accept |
a5d9c993 SL |
1015 | * random urgent pointers, we'll crash in |
1016 | * soreceive. It's hard to imagine someone | |
1017 | * actually wanting to send this much urgent data. | |
f4be5024 | 1018 | */ |
2f4f574f | 1019 | if (ti->ti_urp + so->so_rcv.sb_cc > SB_MAX) { |
f4be5024 SL |
1020 | ti->ti_urp = 0; /* XXX */ |
1021 | tiflags &= ~TH_URG; /* XXX */ | |
bbaaf0fd | 1022 | goto dodata; /* XXX */ |
f4be5024 | 1023 | } |
b2db9217 BJ |
1024 | /* |
1025 | * If this segment advances the known urgent pointer, | |
1026 | * then mark the data stream. This should not happen | |
1027 | * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since | |
1028 | * a FIN has been received from the remote side. | |
1029 | * In these states we ignore the URG. | |
ae6760c5 MK |
1030 | * |
1031 | * According to RFC961 (Assigned Protocols), | |
1032 | * the urgent pointer points to the last octet | |
1033 | * of urgent data. We continue, however, | |
1034 | * to consider it to indicate the first octet | |
6cf1965b MK |
1035 | * of data past the urgent section as the original |
1036 | * spec states (in one of two places). | |
b2db9217 BJ |
1037 | */ |
1038 | if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { | |
1039 | tp->rcv_up = ti->ti_seq + ti->ti_urp; | |
1040 | so->so_oobmark = so->so_rcv.sb_cc + | |
1041 | (tp->rcv_up - tp->rcv_nxt) - 1; | |
1042 | if (so->so_oobmark == 0) | |
1043 | so->so_state |= SS_RCVATMARK; | |
77a4e3ca | 1044 | sohasoutofband(so); |
a17510f3 | 1045 | tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); |
b2db9217 BJ |
1046 | } |
1047 | /* | |
1048 | * Remove out of band data so doesn't get presented to user. | |
1049 | * This can happen independent of advancing the URG pointer, | |
1050 | * but if two URG's are pending at once, some out-of-band | |
1051 | * data may creep in... ick. | |
1052 | */ | |
9d866d2f MK |
1053 | if (ti->ti_urp <= ti->ti_len |
1054 | #ifdef SO_OOBINLINE | |
1055 | && (so->so_options & SO_OOBINLINE) == 0 | |
1056 | #endif | |
6cf1965b MK |
1057 | ) |
1058 | tcp_pulloutofband(so, ti, m); | |
bbaaf0fd MK |
1059 | } else |
1060 | /* | |
1061 | * If no out of band data is expected, | |
1062 | * pull receive urgent pointer along | |
1063 | * with the receive window. | |
1064 | */ | |
1065 | if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) | |
1066 | tp->rcv_up = tp->rcv_nxt; | |
1067 | dodata: /* XXX */ | |
87e78f19 BJ |
1068 | |
1069 | /* | |
2ff61f9d BJ |
1070 | * Process the segment text, merging it into the TCP sequencing queue, |
1071 | * and arranging for acknowledgment of receipt if necessary. | |
1072 | * This process logically involves adjusting tp->rcv_wnd as data | |
1073 | * is presented to the user (this happens in tcp_usrreq.c, | |
1074 | * case PRU_RCVD). If a FIN has already been received on this | |
1075 | * connection then we just ignore the text. | |
87e78f19 | 1076 | */ |
7984a662 MK |
1077 | if ((ti->ti_len || (tiflags&TH_FIN)) && |
1078 | TCPS_HAVERCVDFIN(tp->t_state) == 0) { | |
a17510f3 | 1079 | TCP_REASS(tp, ti, m, so, tiflags); |
18a438b6 MK |
1080 | /* |
1081 | * Note the amount of data that peer has sent into | |
1082 | * our window, in order to estimate the sender's | |
1083 | * buffer size. | |
1084 | */ | |
386369f5 | 1085 | len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); |
4aed14e3 | 1086 | } else { |
2b4b57cd | 1087 | m_freem(m); |
e832edbc | 1088 | tiflags &= ~TH_FIN; |
4aed14e3 | 1089 | } |
87e78f19 BJ |
1090 | |
1091 | /* | |
e832edbc BJ |
1092 | * If FIN is received ACK the FIN and let the user know |
1093 | * that the connection is closing. | |
87e78f19 | 1094 | */ |
e832edbc | 1095 | if (tiflags & TH_FIN) { |
4aed14e3 BJ |
1096 | if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { |
1097 | socantrcvmore(so); | |
1098 | tp->t_flags |= TF_ACKNOW; | |
1099 | tp->rcv_nxt++; | |
1100 | } | |
2ff61f9d | 1101 | switch (tp->t_state) { |
87e78f19 | 1102 | |
2ff61f9d BJ |
1103 | /* |
1104 | * In SYN_RECEIVED and ESTABLISHED STATES | |
1105 | * enter the CLOSE_WAIT state. | |
53a5409e | 1106 | */ |
2ff61f9d BJ |
1107 | case TCPS_SYN_RECEIVED: |
1108 | case TCPS_ESTABLISHED: | |
1109 | tp->t_state = TCPS_CLOSE_WAIT; | |
1110 | break; | |
53a5409e | 1111 | |
2ff61f9d | 1112 | /* |
8a13b737 BJ |
1113 | * If still in FIN_WAIT_1 STATE FIN has not been acked so |
1114 | * enter the CLOSING state. | |
53a5409e | 1115 | */ |
2ff61f9d | 1116 | case TCPS_FIN_WAIT_1: |
8a13b737 | 1117 | tp->t_state = TCPS_CLOSING; |
2ff61f9d | 1118 | break; |
87e78f19 | 1119 | |
2ff61f9d BJ |
1120 | /* |
1121 | * In FIN_WAIT_2 state enter the TIME_WAIT state, | |
1122 | * starting the time-wait timer, turning off the other | |
1123 | * standard timers. | |
1124 | */ | |
1125 | case TCPS_FIN_WAIT_2: | |
4aed14e3 | 1126 | tp->t_state = TCPS_TIME_WAIT; |
a6503abf | 1127 | tcp_canceltimers(tp); |
405c9168 | 1128 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
4aed14e3 | 1129 | soisdisconnected(so); |
2ff61f9d BJ |
1130 | break; |
1131 | ||
53a5409e | 1132 | /* |
2ff61f9d | 1133 | * In TIME_WAIT state restart the 2 MSL time_wait timer. |
53a5409e | 1134 | */ |
2ff61f9d | 1135 | case TCPS_TIME_WAIT: |
405c9168 | 1136 | tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; |
2ff61f9d | 1137 | break; |
8a13b737 | 1138 | } |
87e78f19 | 1139 | } |
4b935108 BJ |
1140 | if (so->so_options & SO_DEBUG) |
1141 | tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); | |
8a13b737 BJ |
1142 | |
1143 | /* | |
1144 | * Return any desired output. | |
1145 | */ | |
91039e49 | 1146 | if (needoutput || (tp->t_flags & TF_ACKNOW)) |
bbaaf0fd | 1147 | (void) tcp_output(tp); |
2ff61f9d | 1148 | return; |
8a13b737 | 1149 | |
2ff61f9d | 1150 | dropafterack: |
8a13b737 | 1151 | /* |
1e977657 BJ |
1152 | * Generate an ACK dropping incoming segment if it occupies |
1153 | * sequence space, where the ACK reflects our state. | |
8a13b737 | 1154 | */ |
ad616704 | 1155 | if (tiflags & TH_RST) |
8a13b737 | 1156 | goto drop; |
5722bd39 | 1157 | m_freem(m); |
4859921b MK |
1158 | tp->t_flags |= TF_ACKNOW; |
1159 | (void) tcp_output(tp); | |
4b6b94ca | 1160 | return; |
8a13b737 BJ |
1161 | |
1162 | dropwithreset: | |
f37c1c84 | 1163 | if (om) { |
668cc26d | 1164 | (void) m_free(om); |
f37c1c84 SL |
1165 | om = 0; |
1166 | } | |
8a13b737 | 1167 | /* |
4aed14e3 | 1168 | * Generate a RST, dropping incoming segment. |
8a13b737 | 1169 | * Make ACK acceptable to originator of segment. |
224f3a72 | 1170 | * Don't bother to respond if destination was broadcast. |
8a13b737 | 1171 | */ |
9d91b170 | 1172 | if ((tiflags & TH_RST) || m->m_flags & M_BCAST) |
8a13b737 BJ |
1173 | goto drop; |
1174 | if (tiflags & TH_ACK) | |
9d91b170 | 1175 | tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); |
8a13b737 BJ |
1176 | else { |
1177 | if (tiflags & TH_SYN) | |
1178 | ti->ti_len++; | |
9d91b170 | 1179 | tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, |
1e977657 | 1180 | TH_RST|TH_ACK); |
8a13b737 | 1181 | } |
7aa16f99 SL |
1182 | /* destroy temporarily created socket */ |
1183 | if (dropsocket) | |
1184 | (void) soabort(so); | |
4b6b94ca | 1185 | return; |
8a13b737 | 1186 | |
2ff61f9d | 1187 | drop: |
01b1394e SL |
1188 | if (om) |
1189 | (void) m_free(om); | |
8a13b737 BJ |
1190 | /* |
1191 | * Drop space held by incoming segment and return. | |
1192 | */ | |
f3cdd721 BJ |
1193 | if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) |
1194 | tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); | |
2ff61f9d | 1195 | m_freem(m); |
7aa16f99 SL |
1196 | /* destroy temporarily created socket */ |
1197 | if (dropsocket) | |
1198 | (void) soabort(so); | |
4b935108 | 1199 | return; |
2ff61f9d BJ |
1200 | } |
1201 | ||
99578149 | 1202 | tcp_dooptions(tp, om, ti) |
8b5a83bb BJ |
1203 | struct tcpcb *tp; |
1204 | struct mbuf *om; | |
99578149 | 1205 | struct tcpiphdr *ti; |
5e74df82 | 1206 | { |
8b5a83bb | 1207 | register u_char *cp; |
6cf1965b | 1208 | u_short mss; |
8b5a83bb BJ |
1209 | int opt, optlen, cnt; |
1210 | ||
1211 | cp = mtod(om, u_char *); | |
1212 | cnt = om->m_len; | |
1213 | for (; cnt > 0; cnt -= optlen, cp += optlen) { | |
1214 | opt = cp[0]; | |
1215 | if (opt == TCPOPT_EOL) | |
1216 | break; | |
1217 | if (opt == TCPOPT_NOP) | |
1218 | optlen = 1; | |
357b20fc | 1219 | else { |
8b5a83bb | 1220 | optlen = cp[1]; |
357b20fc SL |
1221 | if (optlen <= 0) |
1222 | break; | |
1223 | } | |
8b5a83bb BJ |
1224 | switch (opt) { |
1225 | ||
1226 | default: | |
6cf1965b | 1227 | continue; |
8b5a83bb BJ |
1228 | |
1229 | case TCPOPT_MAXSEG: | |
1230 | if (optlen != 4) | |
1231 | continue; | |
99578149 MK |
1232 | if (!(ti->ti_flags & TH_SYN)) |
1233 | continue; | |
6cf1965b MK |
1234 | bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); |
1235 | NTOHS(mss); | |
1236 | (void) tcp_mss(tp, mss); /* sets t_maxseg */ | |
8b5a83bb | 1237 | break; |
8b5a83bb | 1238 | } |
5e74df82 | 1239 | } |
668cc26d | 1240 | (void) m_free(om); |
5e74df82 BJ |
1241 | } |
1242 | ||
b2db9217 BJ |
1243 | /* |
1244 | * Pull out of band byte out of a segment so | |
1245 | * it doesn't appear in the user's data queue. | |
1246 | * It is still reflected in the segment length for | |
1247 | * sequencing purposes. | |
1248 | */ | |
6cf1965b | 1249 | tcp_pulloutofband(so, ti, m) |
b2db9217 BJ |
1250 | struct socket *so; |
1251 | struct tcpiphdr *ti; | |
b2db9217 | 1252 | register struct mbuf *m; |
6cf1965b | 1253 | { |
1acff8ec | 1254 | int cnt = ti->ti_urp - 1; |
b2db9217 | 1255 | |
b2db9217 BJ |
1256 | while (cnt >= 0) { |
1257 | if (m->m_len > cnt) { | |
1258 | char *cp = mtod(m, caddr_t) + cnt; | |
1259 | struct tcpcb *tp = sototcpcb(so); | |
1260 | ||
1261 | tp->t_iobc = *cp; | |
1262 | tp->t_oobflags |= TCPOOB_HAVEDATA; | |
668cc26d | 1263 | bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); |
b2db9217 BJ |
1264 | m->m_len--; |
1265 | return; | |
1266 | } | |
1267 | cnt -= m->m_len; | |
1268 | m = m->m_next; | |
1269 | if (m == 0) | |
1270 | break; | |
1271 | } | |
1272 | panic("tcp_pulloutofband"); | |
1273 | } | |
1274 | ||
99578149 | 1275 | /* |
6cf1965b MK |
1276 | * Collect new round-trip time estimate |
1277 | * and update averages and current timeout. | |
99578149 | 1278 | */ |
6cf1965b | 1279 | tcp_xmit_timer(tp) |
c2a1cd2c | 1280 | register struct tcpcb *tp; |
6cf1965b MK |
1281 | { |
1282 | register short delta; | |
1283 | ||
1284 | tcpstat.tcps_rttupdated++; | |
1285 | if (tp->t_srtt != 0) { | |
1286 | /* | |
1287 | * srtt is stored as fixed point with 3 bits after the | |
1288 | * binary point (i.e., scaled by 8). The following magic | |
1289 | * is equivalent to the smoothing algorithm in rfc793 with | |
1290 | * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed | |
1291 | * point). Adjust t_rtt to origin 0. | |
1292 | */ | |
1293 | delta = tp->t_rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); | |
1294 | if ((tp->t_srtt += delta) <= 0) | |
1295 | tp->t_srtt = 1; | |
1296 | /* | |
1297 | * We accumulate a smoothed rtt variance (actually, a | |
1298 | * smoothed mean difference), then set the retransmit | |
1299 | * timer to smoothed rtt + 4 times the smoothed variance. | |
1300 | * rttvar is stored as fixed point with 2 bits after the | |
1301 | * binary point (scaled by 4). The following is | |
1302 | * equivalent to rfc793 smoothing with an alpha of .75 | |
1303 | * (rttvar = rttvar*3/4 + |delta| / 4). This replaces | |
1304 | * rfc793's wired-in beta. | |
1305 | */ | |
1306 | if (delta < 0) | |
1307 | delta = -delta; | |
1308 | delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); | |
1309 | if ((tp->t_rttvar += delta) <= 0) | |
1310 | tp->t_rttvar = 1; | |
1311 | } else { | |
1312 | /* | |
1313 | * No rtt measurement yet - use the unsmoothed rtt. | |
1314 | * Set the variance to half the rtt (so our first | |
1315 | * retransmit happens at 2*rtt) | |
1316 | */ | |
1317 | tp->t_srtt = tp->t_rtt << TCP_RTT_SHIFT; | |
1318 | tp->t_rttvar = tp->t_rtt << (TCP_RTTVAR_SHIFT - 1); | |
1319 | } | |
1320 | tp->t_rtt = 0; | |
1321 | tp->t_rxtshift = 0; | |
1322 | ||
1323 | /* | |
1324 | * the retransmit should happen at rtt + 4 * rttvar. | |
1325 | * Because of the way we do the smoothing, srtt and rttvar | |
1326 | * will each average +1/2 tick of bias. When we compute | |
1327 | * the retransmit timer, we want 1/2 tick of rounding and | |
1328 | * 1 extra tick because of +-1/2 tick uncertainty in the | |
1329 | * firing of the timer. The bias will give us exactly the | |
1330 | * 1.5 tick we need. But, because the bias is | |
1331 | * statistical, we have to test that we don't drop below | |
1332 | * the minimum feasible timer (which is 2 ticks). | |
1333 | */ | |
1334 | TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), | |
1335 | tp->t_rttmin, TCPTV_REXMTMAX); | |
1336 | ||
1337 | /* | |
1338 | * We received an ack for a packet that wasn't retransmitted; | |
1339 | * it is probably safe to discard any error indications we've | |
1340 | * received recently. This isn't quite right, but close enough | |
1341 | * for now (a route might have failed after we sent a segment, | |
1342 | * and the return path might not be symmetrical). | |
1343 | */ | |
1344 | tp->t_softerror = 0; | |
1345 | } | |
1346 | ||
1347 | /* | |
1348 | * Determine a reasonable value for maxseg size. | |
1349 | * If the route is known, check route for mtu. | |
1350 | * If none, use an mss that can be handled on the outgoing | |
1351 | * interface without forcing IP to fragment; if bigger than | |
1352 | * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES | |
1353 | * to utilize large mbufs. If no route is found, route has no mtu, | |
1354 | * or the destination isn't local, use a default, hopefully conservative | |
1355 | * size (usually 512 or the default IP max size, but no more than the mtu | |
1356 | * of the interface), as we can't discover anything about intervening | |
1357 | * gateways or networks. We also initialize the congestion/slow start | |
1358 | * window to be a single segment if the destination isn't local. | |
1359 | * While looking at the routing entry, we also initialize other path-dependent | |
1360 | * parameters from pre-set or cached values in the routing entry. | |
1361 | */ | |
1362 | ||
1363 | tcp_mss(tp, offer) | |
1364 | register struct tcpcb *tp; | |
1365 | u_short offer; | |
99578149 MK |
1366 | { |
1367 | struct route *ro; | |
6cf1965b | 1368 | register struct rtentry *rt; |
99578149 | 1369 | struct ifnet *ifp; |
6cf1965b MK |
1370 | register int rtt, mss; |
1371 | u_long bufsize; | |
99578149 | 1372 | struct inpcb *inp; |
6cf1965b MK |
1373 | struct socket *so; |
1374 | extern int tcp_mssdflt, tcp_rttdflt; | |
99578149 MK |
1375 | |
1376 | inp = tp->t_inpcb; | |
1377 | ro = &inp->inp_route; | |
6cf1965b MK |
1378 | |
1379 | if ((rt = ro->ro_rt) == (struct rtentry *)0) { | |
99578149 MK |
1380 | /* No route yet, so try to acquire one */ |
1381 | if (inp->inp_faddr.s_addr != INADDR_ANY) { | |
1382 | ro->ro_dst.sa_family = AF_INET; | |
0af8f6fc | 1383 | ro->ro_dst.sa_len = sizeof(ro->ro_dst); |
99578149 MK |
1384 | ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = |
1385 | inp->inp_faddr; | |
1386 | rtalloc(ro); | |
1387 | } | |
6cf1965b MK |
1388 | if ((rt = ro->ro_rt) == (struct rtentry *)0) |
1389 | return (tcp_mssdflt); | |
99578149 | 1390 | } |
6cf1965b MK |
1391 | ifp = rt->rt_ifp; |
1392 | so = inp->inp_socket; | |
99578149 | 1393 | |
6cf1965b MK |
1394 | #ifdef RTV_MTU /* if route characteristics exist ... */ |
1395 | /* | |
1396 | * While we're here, check if there's an initial rtt | |
1397 | * or rttvar. Convert from the route-table units | |
1398 | * to scaled multiples of the slow timeout timer. | |
1399 | */ | |
1400 | if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { | |
1401 | if (rt->rt_rmx.rmx_locks & RTV_MTU) | |
1402 | tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ); | |
1403 | tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); | |
1404 | if (rt->rt_rmx.rmx_rttvar) | |
1405 | tp->t_rttvar = rt->rt_rmx.rmx_rttvar / | |
1406 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); | |
1407 | else | |
1408 | /* default variation is +- 1 rtt */ | |
1409 | tp->t_rttvar = | |
1410 | tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; | |
1411 | TCPT_RANGESET(tp->t_rxtcur, | |
1412 | ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, | |
1413 | tp->t_rttmin, TCPTV_REXMTMAX); | |
1414 | } | |
1415 | /* | |
1416 | * if there's an mtu associated with the route, use it | |
1417 | */ | |
1418 | if (rt->rt_rmx.rmx_mtu) | |
1419 | mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); | |
1420 | else | |
1421 | #endif /* RTV_MTU */ | |
1422 | { | |
1423 | mss = ifp->if_mtu - sizeof(struct tcpiphdr); | |
7cc62c26 | 1424 | #if (MCLBYTES & (MCLBYTES - 1)) == 0 |
6cf1965b MK |
1425 | if (mss > MCLBYTES) |
1426 | mss &= ~(MCLBYTES-1); | |
99578149 | 1427 | #else |
6cf1965b MK |
1428 | if (mss > MCLBYTES) |
1429 | mss = mss / MCLBYTES * MCLBYTES; | |
99578149 | 1430 | #endif |
6cf1965b MK |
1431 | if (!in_localaddr(inp->inp_faddr)) |
1432 | mss = min(mss, tcp_mssdflt); | |
1433 | } | |
1434 | /* | |
1435 | * The current mss, t_maxseg, is initialized to the default value. | |
1436 | * If we compute a smaller value, reduce the current mss. | |
1437 | * If we compute a larger value, return it for use in sending | |
1438 | * a max seg size option, but don't store it for use | |
1439 | * unless we received an offer at least that large from peer. | |
1440 | * However, do not accept offers under 32 bytes. | |
1441 | */ | |
1442 | if (offer) | |
1443 | mss = min(mss, offer); | |
1444 | mss = max(mss, 32); /* sanity */ | |
1445 | if (mss < tp->t_maxseg || offer != 0) { | |
1446 | /* | |
1447 | * If there's a pipesize, change the socket buffer | |
1448 | * to that size. Make the socket buffers an integral | |
1449 | * number of mss units; if the mss is larger than | |
1450 | * the socket buffer, decrease the mss. | |
1451 | */ | |
1452 | #ifdef RTV_SPIPE | |
1453 | if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) | |
1454 | #endif | |
1455 | bufsize = so->so_snd.sb_hiwat; | |
1456 | if (bufsize < mss) | |
1457 | mss = bufsize; | |
1458 | else { | |
1459 | bufsize = min(bufsize, SB_MAX) / mss * mss; | |
1460 | (void) sbreserve(&so->so_snd, bufsize); | |
1461 | } | |
1462 | tp->t_maxseg = mss; | |
386369f5 | 1463 | |
6cf1965b MK |
1464 | #ifdef RTV_RPIPE |
1465 | if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) | |
1466 | #endif | |
1467 | bufsize = so->so_rcv.sb_hiwat; | |
1468 | if (bufsize > mss) { | |
1469 | bufsize = min(bufsize, SB_MAX) / mss * mss; | |
1470 | (void) sbreserve(&so->so_rcv, bufsize); | |
1471 | } | |
1472 | } | |
a6bbda13 | 1473 | tp->snd_cwnd = mss; |
6cf1965b MK |
1474 | |
1475 | #ifdef RTV_SSTHRESH | |
1476 | if (rt->rt_rmx.rmx_ssthresh) { | |
1477 | /* | |
1478 | * There's some sort of gateway or interface | |
1479 | * buffer limit on the path. Use this to set | |
1480 | * the slow start threshhold, but set the | |
1481 | * threshold to no less than 2*mss. | |
1482 | */ | |
1483 | tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); | |
1484 | } | |
1485 | #endif /* RTV_MTU */ | |
a6bbda13 | 1486 | return (mss); |
99578149 | 1487 | } |
9d866d2f MK |
1488 | |
1489 | #if BSD<43 | |
1490 | /* XXX this belongs in netinet/in.c */ | |
1491 | in_localaddr(in) | |
1492 | struct in_addr in; | |
1493 | { | |
1494 | register u_long i = ntohl(in.s_addr); | |
1495 | register struct ifnet *ifp; | |
1496 | register struct sockaddr_in *sin; | |
1497 | register u_long mask; | |
1498 | ||
1499 | if (IN_CLASSA(i)) | |
1500 | mask = IN_CLASSA_NET; | |
1501 | else if (IN_CLASSB(i)) | |
1502 | mask = IN_CLASSB_NET; | |
1503 | else if (IN_CLASSC(i)) | |
1504 | mask = IN_CLASSC_NET; | |
1505 | else | |
1506 | return (0); | |
1507 | ||
1508 | i &= mask; | |
1509 | for (ifp = ifnet; ifp; ifp = ifp->if_next) { | |
1510 | if (ifp->if_addr.sa_family != AF_INET) | |
1511 | continue; | |
1512 | sin = (struct sockaddr_in *)&ifp->if_addr; | |
1513 | if ((sin->sin_addr.s_addr & mask) == i) | |
1514 | return (1); | |
1515 | } | |
1516 | return (0); | |
1517 | } | |
1518 | #endif |