Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | * | |
38e82238 | 33 | * from: @(#)tcp_subr.c 7.20 (Berkeley) 12/1/90 |
fde1aeb2 | 34 | * $Id: tcp_subr.c,v 1.5 1993/11/25 01:35:16 wollman Exp $ |
15637ed4 RG |
35 | */ |
36 | ||
37 | #include "param.h" | |
38 | #include "systm.h" | |
39 | #include "malloc.h" | |
40 | #include "mbuf.h" | |
41 | #include "socket.h" | |
42 | #include "socketvar.h" | |
43 | #include "protosw.h" | |
44 | #include "errno.h" | |
45 | ||
46 | #include "../net/route.h" | |
47 | #include "../net/if.h" | |
48 | ||
49 | #include "in.h" | |
50 | #include "in_systm.h" | |
51 | #include "ip.h" | |
52 | #include "in_pcb.h" | |
2cb63509 | 53 | #include "in_var.h" |
15637ed4 RG |
54 | #include "ip_var.h" |
55 | #include "ip_icmp.h" | |
56 | #include "tcp.h" | |
57 | #include "tcp_fsm.h" | |
58 | #include "tcp_seq.h" | |
59 | #include "tcp_timer.h" | |
60 | #include "tcp_var.h" | |
61 | #include "tcpip.h" | |
62 | ||
8ace4366 GW |
63 | tcp_seq tcp_iss; |
64 | struct inpcb tcb; | |
65 | struct tcpstat tcpstat; | |
66 | ||
2cb63509 GW |
67 | #ifdef MTUDISC |
68 | int tcp_mtuchanged(struct inpcb *, int); | |
69 | #endif | |
8ace4366 | 70 | |
15637ed4 RG |
71 | /* |
72 | * Tcp initialization | |
73 | */ | |
4c45483e | 74 | void |
15637ed4 RG |
75 | tcp_init() |
76 | { | |
77 | ||
78 | tcp_iss = 1; /* wrong */ | |
79 | tcb.inp_next = tcb.inp_prev = &tcb; | |
80 | if (max_protohdr < sizeof(struct tcpiphdr)) | |
81 | max_protohdr = sizeof(struct tcpiphdr); | |
82 | if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) | |
83 | panic("tcp_init"); | |
84 | } | |
85 | ||
86 | /* | |
87 | * Create template to be used to send tcp packets on a connection. | |
88 | * Call after host entry created, allocates an mbuf and fills | |
89 | * in a skeletal tcp/ip header, minimizing the amount of work | |
90 | * necessary when the connection is used. | |
91 | */ | |
92 | struct tcpiphdr * | |
93 | tcp_template(tp) | |
94 | struct tcpcb *tp; | |
95 | { | |
96 | register struct inpcb *inp = tp->t_inpcb; | |
97 | register struct mbuf *m; | |
98 | register struct tcpiphdr *n; | |
99 | ||
100 | if ((n = tp->t_template) == 0) { | |
101 | m = m_get(M_DONTWAIT, MT_HEADER); | |
102 | if (m == NULL) | |
103 | return (0); | |
104 | m->m_len = sizeof (struct tcpiphdr); | |
105 | n = mtod(m, struct tcpiphdr *); | |
106 | } | |
107 | n->ti_next = n->ti_prev = 0; | |
108 | n->ti_x1 = 0; | |
109 | n->ti_pr = IPPROTO_TCP; | |
110 | n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); | |
111 | n->ti_src = inp->inp_laddr; | |
112 | n->ti_dst = inp->inp_faddr; | |
113 | n->ti_sport = inp->inp_lport; | |
114 | n->ti_dport = inp->inp_fport; | |
115 | n->ti_seq = 0; | |
116 | n->ti_ack = 0; | |
117 | n->ti_x2 = 0; | |
118 | n->ti_off = 5; | |
119 | n->ti_flags = 0; | |
120 | n->ti_win = 0; | |
121 | n->ti_sum = 0; | |
122 | n->ti_urp = 0; | |
123 | return (n); | |
124 | } | |
125 | ||
126 | /* | |
127 | * Send a single message to the TCP at address specified by | |
128 | * the given TCP/IP header. If m == 0, then we make a copy | |
129 | * of the tcpiphdr at ti and send directly to the addressed host. | |
130 | * This is used to force keep alive messages out using the TCP | |
131 | * template for a connection tp->t_template. If flags are given | |
132 | * then we send a message back to the TCP which originated the | |
133 | * segment ti, and discard the mbuf containing it and any other | |
134 | * attached mbufs. | |
135 | * | |
136 | * In any case the ack and sequence number of the transmitted | |
137 | * segment are as specified by the parameters. | |
138 | */ | |
4c45483e | 139 | void |
15637ed4 RG |
140 | tcp_respond(tp, ti, m, ack, seq, flags) |
141 | struct tcpcb *tp; | |
142 | register struct tcpiphdr *ti; | |
143 | register struct mbuf *m; | |
144 | tcp_seq ack, seq; | |
145 | int flags; | |
146 | { | |
147 | register int tlen; | |
148 | int win = 0; | |
149 | struct route *ro = 0; | |
150 | ||
151 | if (tp) { | |
152 | win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); | |
153 | ro = &tp->t_inpcb->inp_route; | |
154 | } | |
155 | if (m == 0) { | |
156 | m = m_gethdr(M_DONTWAIT, MT_HEADER); | |
157 | if (m == NULL) | |
158 | return; | |
159 | #ifdef TCP_COMPAT_42 | |
160 | tlen = 1; | |
161 | #else | |
162 | tlen = 0; | |
163 | #endif | |
164 | m->m_data += max_linkhdr; | |
165 | *mtod(m, struct tcpiphdr *) = *ti; | |
166 | ti = mtod(m, struct tcpiphdr *); | |
167 | flags = TH_ACK; | |
168 | } else { | |
169 | m_freem(m->m_next); | |
170 | m->m_next = 0; | |
171 | m->m_data = (caddr_t)ti; | |
172 | m->m_len = sizeof (struct tcpiphdr); | |
173 | tlen = 0; | |
174 | #define xchg(a,b,type) { type t; t=a; a=b; b=t; } | |
175 | xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); | |
176 | xchg(ti->ti_dport, ti->ti_sport, u_short); | |
177 | #undef xchg | |
178 | } | |
179 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); | |
180 | tlen += sizeof (struct tcpiphdr); | |
181 | m->m_len = tlen; | |
182 | m->m_pkthdr.len = tlen; | |
183 | m->m_pkthdr.rcvif = (struct ifnet *) 0; | |
184 | ti->ti_next = ti->ti_prev = 0; | |
185 | ti->ti_x1 = 0; | |
186 | ti->ti_seq = htonl(seq); | |
187 | ti->ti_ack = htonl(ack); | |
188 | ti->ti_x2 = 0; | |
189 | ti->ti_off = sizeof (struct tcphdr) >> 2; | |
190 | ti->ti_flags = flags; | |
191 | ti->ti_win = htons((u_short)win); | |
192 | ti->ti_urp = 0; | |
193 | ti->ti_sum = in_cksum(m, tlen); | |
194 | ((struct ip *)ti)->ip_len = tlen; | |
195 | ((struct ip *)ti)->ip_ttl = tcp_ttl; | |
196 | (void) ip_output(m, (struct mbuf *)0, ro, 0); | |
197 | } | |
198 | ||
199 | /* | |
200 | * Create a new TCP control block, making an | |
201 | * empty reassembly queue and hooking it to the argument | |
202 | * protocol control block. | |
203 | */ | |
204 | struct tcpcb * | |
205 | tcp_newtcpcb(inp) | |
206 | struct inpcb *inp; | |
207 | { | |
208 | struct mbuf *m = m_getclr(M_DONTWAIT, MT_PCB); | |
209 | register struct tcpcb *tp; | |
210 | ||
211 | if (m == NULL) | |
212 | return ((struct tcpcb *)0); | |
213 | tp = mtod(m, struct tcpcb *); | |
214 | tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; | |
215 | tp->t_maxseg = tcp_mssdflt; | |
216 | ||
217 | tp->t_flags = 0; /* sends options! */ | |
218 | tp->t_inpcb = inp; | |
219 | /* | |
220 | * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no | |
221 | * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives | |
222 | * reasonable initial retransmit time. | |
223 | */ | |
224 | tp->t_srtt = TCPTV_SRTTBASE; | |
225 | tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; | |
226 | tp->t_rttmin = TCPTV_MIN; | |
227 | TCPT_RANGESET(tp->t_rxtcur, | |
228 | ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, | |
229 | TCPTV_MIN, TCPTV_REXMTMAX); | |
230 | tp->snd_cwnd = TCP_MAXWIN; | |
231 | tp->snd_ssthresh = TCP_MAXWIN; | |
232 | inp->inp_ip.ip_ttl = tcp_ttl; | |
233 | inp->inp_ppcb = (caddr_t)tp; | |
2cb63509 GW |
234 | #ifdef MTUDISC |
235 | /* | |
236 | * Enable Path MTU Discovery on this PCB. | |
237 | */ | |
238 | inp->inp_mtunotify = tcp_mtuchanged; | |
239 | inp->inp_flags |= INP_DISCOVERMTU; | |
240 | #endif /* MTUDISC */ | |
15637ed4 RG |
241 | return (tp); |
242 | } | |
243 | ||
244 | /* | |
245 | * Drop a TCP connection, reporting | |
246 | * the specified error. If connection is synchronized, | |
247 | * then send a RST to peer. | |
248 | */ | |
249 | struct tcpcb * | |
250 | tcp_drop(tp, errno) | |
251 | register struct tcpcb *tp; | |
252 | int errno; | |
253 | { | |
254 | struct socket *so = tp->t_inpcb->inp_socket; | |
255 | ||
256 | if (TCPS_HAVERCVDSYN(tp->t_state)) { | |
257 | tp->t_state = TCPS_CLOSED; | |
258 | (void) tcp_output(tp); | |
259 | tcpstat.tcps_drops++; | |
260 | } else | |
261 | tcpstat.tcps_conndrops++; | |
262 | if (errno == ETIMEDOUT && tp->t_softerror) | |
263 | errno = tp->t_softerror; | |
264 | so->so_error = errno; | |
265 | return (tcp_close(tp)); | |
266 | } | |
267 | ||
268 | /* | |
269 | * Close a TCP control block: | |
270 | * discard all space held by the tcp | |
271 | * discard internet protocol block | |
272 | * wake up any sleepers | |
273 | */ | |
274 | struct tcpcb * | |
275 | tcp_close(tp) | |
276 | register struct tcpcb *tp; | |
277 | { | |
278 | register struct tcpiphdr *t; | |
279 | struct inpcb *inp = tp->t_inpcb; | |
280 | struct socket *so = inp->inp_socket; | |
281 | register struct mbuf *m; | |
282 | #ifdef RTV_RTT | |
283 | register struct rtentry *rt; | |
284 | ||
285 | /* | |
286 | * If we sent enough data to get some meaningful characteristics, | |
287 | * save them in the routing entry. 'Enough' is arbitrarily | |
288 | * defined as the sendpipesize (default 4K) * 16. This would | |
289 | * give us 16 rtt samples assuming we only get one sample per | |
290 | * window (the usual case on a long haul net). 16 samples is | |
291 | * enough for the srtt filter to converge to within 5% of the correct | |
292 | * value; fewer samples and we could save a very bogus rtt. | |
293 | * | |
294 | * Don't update the default route's characteristics and don't | |
295 | * update anything that the user "locked". | |
296 | */ | |
297 | if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && | |
298 | (rt = inp->inp_route.ro_rt) && | |
299 | ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { | |
4c45483e | 300 | register u_long i = 0; |
15637ed4 RG |
301 | |
302 | if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { | |
303 | i = tp->t_srtt * | |
304 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); | |
305 | if (rt->rt_rmx.rmx_rtt && i) | |
306 | /* | |
307 | * filter this update to half the old & half | |
308 | * the new values, converting scale. | |
309 | * See route.h and tcp_var.h for a | |
310 | * description of the scaling constants. | |
311 | */ | |
312 | rt->rt_rmx.rmx_rtt = | |
313 | (rt->rt_rmx.rmx_rtt + i) / 2; | |
314 | else | |
315 | rt->rt_rmx.rmx_rtt = i; | |
316 | } | |
317 | if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { | |
318 | i = tp->t_rttvar * | |
319 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); | |
320 | if (rt->rt_rmx.rmx_rttvar && i) | |
321 | rt->rt_rmx.rmx_rttvar = | |
322 | (rt->rt_rmx.rmx_rttvar + i) / 2; | |
323 | else | |
324 | rt->rt_rmx.rmx_rttvar = i; | |
325 | } | |
326 | /* | |
327 | * update the pipelimit (ssthresh) if it has been updated | |
328 | * already or if a pipesize was specified & the threshhold | |
329 | * got below half the pipesize. I.e., wait for bad news | |
330 | * before we start updating, then update on both good | |
331 | * and bad news. | |
332 | */ | |
333 | if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && | |
334 | (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh || | |
335 | i < (rt->rt_rmx.rmx_sendpipe / 2)) { | |
336 | /* | |
337 | * convert the limit from user data bytes to | |
338 | * packets then to packet data bytes. | |
339 | */ | |
340 | i = (i + tp->t_maxseg / 2) / tp->t_maxseg; | |
341 | if (i < 2) | |
342 | i = 2; | |
343 | i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); | |
344 | if (rt->rt_rmx.rmx_ssthresh) | |
345 | rt->rt_rmx.rmx_ssthresh = | |
346 | (rt->rt_rmx.rmx_ssthresh + i) / 2; | |
347 | else | |
348 | rt->rt_rmx.rmx_ssthresh = i; | |
349 | } | |
350 | } | |
2cb63509 | 351 | #endif /* RTV_RTT */ |
15637ed4 RG |
352 | /* free the reassembly queue, if any */ |
353 | t = tp->seg_next; | |
354 | while (t != (struct tcpiphdr *)tp) { | |
355 | t = (struct tcpiphdr *)t->ti_next; | |
356 | m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); | |
357 | remque(t->ti_prev); | |
358 | m_freem(m); | |
359 | } | |
360 | if (tp->t_template) | |
361 | (void) m_free(dtom(tp->t_template)); | |
362 | (void) m_free(dtom(tp)); | |
363 | inp->inp_ppcb = 0; | |
364 | soisdisconnected(so); | |
365 | /* clobber input pcb cache if we're closing the cached connection */ | |
366 | if (inp == tcp_last_inpcb) | |
367 | tcp_last_inpcb = &tcb; | |
368 | in_pcbdetach(inp); | |
369 | tcpstat.tcps_closed++; | |
370 | return ((struct tcpcb *)0); | |
371 | } | |
372 | ||
4c45483e | 373 | void |
15637ed4 RG |
374 | tcp_drain() |
375 | { | |
376 | ||
377 | } | |
378 | ||
379 | /* | |
380 | * Notify a tcp user of an asynchronous error; | |
381 | * store error as soft error, but wake up user | |
382 | * (for now, won't do anything until can select for soft error). | |
383 | */ | |
4c45483e | 384 | void |
15637ed4 RG |
385 | tcp_notify(inp, error) |
386 | register struct inpcb *inp; | |
387 | int error; | |
388 | { | |
389 | ||
390 | ((struct tcpcb *)inp->inp_ppcb)->t_softerror = error; | |
391 | wakeup((caddr_t) &inp->inp_socket->so_timeo); | |
392 | sorwakeup(inp->inp_socket); | |
393 | sowwakeup(inp->inp_socket); | |
394 | } | |
395 | ||
2cb63509 GW |
396 | /* |
397 | * When we get a PRC_MSGSIZE error (generated by the ICMP layer upon | |
398 | * receipt of an ICMP_UNREACH_NEEDFRAG message), we need to get the | |
399 | * IP layer to check the cached MTU data that it has in its PCBs. | |
400 | * If things have changed, this will cause us to receive a | |
401 | * PRC_MTUCHANGED message for /every/ connection to the same | |
402 | * destination; that is handled by he tcp_mtuchanged() function, | |
403 | * below. | |
404 | * | |
405 | * In the immortal words of Ken and Dennis, ``You are not expected to | |
406 | * understand this.'' | |
407 | */ | |
fde1aeb2 | 408 | void |
2cb63509 GW |
409 | tcp_checkmtu(struct inpcb *inp, int error) { |
410 | #ifdef MTUDISC | |
411 | /* | |
412 | * XXX - this should also cause an immediate retransmission and | |
413 | * slow start, since we know for a fact that the message we just sent | |
414 | * got dropped on the floor. For now, just do what tcp_quench does. | |
415 | */ | |
416 | tcp_quench(inp); | |
417 | in_pcbmtu(inp); | |
418 | #endif /* MTUDISC */ | |
419 | } | |
420 | ||
421 | #ifdef MTUDISC | |
422 | int /* grrr... should be void... */ | |
423 | tcp_mtuchanged(struct inpcb *inp, int error) { | |
424 | /* don't do anything just yet... */; | |
425 | } | |
426 | #endif /* MTUDISC */ | |
427 | ||
4c45483e | 428 | void |
15637ed4 RG |
429 | tcp_ctlinput(cmd, sa, ip) |
430 | int cmd; | |
431 | struct sockaddr *sa; | |
432 | register struct ip *ip; | |
433 | { | |
434 | register struct tcphdr *th; | |
4c45483e | 435 | void (*notify)(struct inpcb *, int) = tcp_notify; |
15637ed4 RG |
436 | |
437 | if (cmd == PRC_QUENCH) | |
438 | notify = tcp_quench; | |
2cb63509 GW |
439 | #ifdef MTUDISC |
440 | else if (cmd == PRC_MSGSIZE) | |
441 | notify = tcp_checkmtu; | |
442 | else if (cmd == PRC_MTUCHANGED) /* just in case */ | |
443 | notify = tcp_mtuchanged; | |
444 | #endif /* MTUDISC */ | |
15637ed4 RG |
445 | else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) |
446 | return; | |
447 | if (ip) { | |
448 | th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); | |
449 | in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, | |
450 | cmd, notify); | |
451 | } else | |
452 | in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); | |
453 | } | |
454 | ||
455 | /* | |
456 | * When a source quench is received, close congestion window | |
457 | * to one segment. We will gradually open it again as we proceed. | |
458 | */ | |
4c45483e GW |
459 | void |
460 | tcp_quench(inp, errno) | |
15637ed4 | 461 | struct inpcb *inp; |
4c45483e | 462 | int errno; |
15637ed4 RG |
463 | { |
464 | struct tcpcb *tp = intotcpcb(inp); | |
465 | ||
466 | if (tp) | |
467 | tp->snd_cwnd = tp->t_maxseg; | |
468 | } |