Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | * | |
33 | * @(#)tcp_subr.c 7.20 (Berkeley) 12/1/90 | |
34 | */ | |
35 | ||
36 | #include "param.h" | |
37 | #include "systm.h" | |
38 | #include "malloc.h" | |
39 | #include "mbuf.h" | |
40 | #include "socket.h" | |
41 | #include "socketvar.h" | |
42 | #include "protosw.h" | |
43 | #include "errno.h" | |
44 | ||
45 | #include "../net/route.h" | |
46 | #include "../net/if.h" | |
47 | ||
48 | #include "in.h" | |
49 | #include "in_systm.h" | |
50 | #include "ip.h" | |
51 | #include "in_pcb.h" | |
52 | #include "ip_var.h" | |
53 | #include "ip_icmp.h" | |
54 | #include "tcp.h" | |
55 | #include "tcp_fsm.h" | |
56 | #include "tcp_seq.h" | |
57 | #include "tcp_timer.h" | |
58 | #include "tcp_var.h" | |
59 | #include "tcpip.h" | |
60 | ||
61 | /* patchable/settable parameters for tcp */ | |
62 | int tcp_ttl = TCP_TTL; | |
63 | int tcp_mssdflt = TCP_MSS; | |
64 | int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; | |
65 | ||
66 | extern struct inpcb *tcp_last_inpcb; | |
67 | ||
68 | /* | |
69 | * Tcp initialization | |
70 | */ | |
71 | tcp_init() | |
72 | { | |
73 | ||
74 | tcp_iss = 1; /* wrong */ | |
75 | tcb.inp_next = tcb.inp_prev = &tcb; | |
76 | if (max_protohdr < sizeof(struct tcpiphdr)) | |
77 | max_protohdr = sizeof(struct tcpiphdr); | |
78 | if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) | |
79 | panic("tcp_init"); | |
80 | } | |
81 | ||
82 | /* | |
83 | * Create template to be used to send tcp packets on a connection. | |
84 | * Call after host entry created, allocates an mbuf and fills | |
85 | * in a skeletal tcp/ip header, minimizing the amount of work | |
86 | * necessary when the connection is used. | |
87 | */ | |
88 | struct tcpiphdr * | |
89 | tcp_template(tp) | |
90 | struct tcpcb *tp; | |
91 | { | |
92 | register struct inpcb *inp = tp->t_inpcb; | |
93 | register struct mbuf *m; | |
94 | register struct tcpiphdr *n; | |
95 | ||
96 | if ((n = tp->t_template) == 0) { | |
97 | m = m_get(M_DONTWAIT, MT_HEADER); | |
98 | if (m == NULL) | |
99 | return (0); | |
100 | m->m_len = sizeof (struct tcpiphdr); | |
101 | n = mtod(m, struct tcpiphdr *); | |
102 | } | |
103 | n->ti_next = n->ti_prev = 0; | |
104 | n->ti_x1 = 0; | |
105 | n->ti_pr = IPPROTO_TCP; | |
106 | n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); | |
107 | n->ti_src = inp->inp_laddr; | |
108 | n->ti_dst = inp->inp_faddr; | |
109 | n->ti_sport = inp->inp_lport; | |
110 | n->ti_dport = inp->inp_fport; | |
111 | n->ti_seq = 0; | |
112 | n->ti_ack = 0; | |
113 | n->ti_x2 = 0; | |
114 | n->ti_off = 5; | |
115 | n->ti_flags = 0; | |
116 | n->ti_win = 0; | |
117 | n->ti_sum = 0; | |
118 | n->ti_urp = 0; | |
119 | return (n); | |
120 | } | |
121 | ||
122 | /* | |
123 | * Send a single message to the TCP at address specified by | |
124 | * the given TCP/IP header. If m == 0, then we make a copy | |
125 | * of the tcpiphdr at ti and send directly to the addressed host. | |
126 | * This is used to force keep alive messages out using the TCP | |
127 | * template for a connection tp->t_template. If flags are given | |
128 | * then we send a message back to the TCP which originated the | |
129 | * segment ti, and discard the mbuf containing it and any other | |
130 | * attached mbufs. | |
131 | * | |
132 | * In any case the ack and sequence number of the transmitted | |
133 | * segment are as specified by the parameters. | |
134 | */ | |
135 | tcp_respond(tp, ti, m, ack, seq, flags) | |
136 | struct tcpcb *tp; | |
137 | register struct tcpiphdr *ti; | |
138 | register struct mbuf *m; | |
139 | tcp_seq ack, seq; | |
140 | int flags; | |
141 | { | |
142 | register int tlen; | |
143 | int win = 0; | |
144 | struct route *ro = 0; | |
145 | ||
146 | if (tp) { | |
147 | win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); | |
148 | ro = &tp->t_inpcb->inp_route; | |
149 | } | |
150 | if (m == 0) { | |
151 | m = m_gethdr(M_DONTWAIT, MT_HEADER); | |
152 | if (m == NULL) | |
153 | return; | |
154 | #ifdef TCP_COMPAT_42 | |
155 | tlen = 1; | |
156 | #else | |
157 | tlen = 0; | |
158 | #endif | |
159 | m->m_data += max_linkhdr; | |
160 | *mtod(m, struct tcpiphdr *) = *ti; | |
161 | ti = mtod(m, struct tcpiphdr *); | |
162 | flags = TH_ACK; | |
163 | } else { | |
164 | m_freem(m->m_next); | |
165 | m->m_next = 0; | |
166 | m->m_data = (caddr_t)ti; | |
167 | m->m_len = sizeof (struct tcpiphdr); | |
168 | tlen = 0; | |
169 | #define xchg(a,b,type) { type t; t=a; a=b; b=t; } | |
170 | xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); | |
171 | xchg(ti->ti_dport, ti->ti_sport, u_short); | |
172 | #undef xchg | |
173 | } | |
174 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); | |
175 | tlen += sizeof (struct tcpiphdr); | |
176 | m->m_len = tlen; | |
177 | m->m_pkthdr.len = tlen; | |
178 | m->m_pkthdr.rcvif = (struct ifnet *) 0; | |
179 | ti->ti_next = ti->ti_prev = 0; | |
180 | ti->ti_x1 = 0; | |
181 | ti->ti_seq = htonl(seq); | |
182 | ti->ti_ack = htonl(ack); | |
183 | ti->ti_x2 = 0; | |
184 | ti->ti_off = sizeof (struct tcphdr) >> 2; | |
185 | ti->ti_flags = flags; | |
186 | ti->ti_win = htons((u_short)win); | |
187 | ti->ti_urp = 0; | |
188 | ti->ti_sum = in_cksum(m, tlen); | |
189 | ((struct ip *)ti)->ip_len = tlen; | |
190 | ((struct ip *)ti)->ip_ttl = tcp_ttl; | |
191 | (void) ip_output(m, (struct mbuf *)0, ro, 0); | |
192 | } | |
193 | ||
194 | /* | |
195 | * Create a new TCP control block, making an | |
196 | * empty reassembly queue and hooking it to the argument | |
197 | * protocol control block. | |
198 | */ | |
199 | struct tcpcb * | |
200 | tcp_newtcpcb(inp) | |
201 | struct inpcb *inp; | |
202 | { | |
203 | struct mbuf *m = m_getclr(M_DONTWAIT, MT_PCB); | |
204 | register struct tcpcb *tp; | |
205 | ||
206 | if (m == NULL) | |
207 | return ((struct tcpcb *)0); | |
208 | tp = mtod(m, struct tcpcb *); | |
209 | tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; | |
210 | tp->t_maxseg = tcp_mssdflt; | |
211 | ||
212 | tp->t_flags = 0; /* sends options! */ | |
213 | tp->t_inpcb = inp; | |
214 | /* | |
215 | * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no | |
216 | * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives | |
217 | * reasonable initial retransmit time. | |
218 | */ | |
219 | tp->t_srtt = TCPTV_SRTTBASE; | |
220 | tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; | |
221 | tp->t_rttmin = TCPTV_MIN; | |
222 | TCPT_RANGESET(tp->t_rxtcur, | |
223 | ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, | |
224 | TCPTV_MIN, TCPTV_REXMTMAX); | |
225 | tp->snd_cwnd = TCP_MAXWIN; | |
226 | tp->snd_ssthresh = TCP_MAXWIN; | |
227 | inp->inp_ip.ip_ttl = tcp_ttl; | |
228 | inp->inp_ppcb = (caddr_t)tp; | |
229 | return (tp); | |
230 | } | |
231 | ||
232 | /* | |
233 | * Drop a TCP connection, reporting | |
234 | * the specified error. If connection is synchronized, | |
235 | * then send a RST to peer. | |
236 | */ | |
237 | struct tcpcb * | |
238 | tcp_drop(tp, errno) | |
239 | register struct tcpcb *tp; | |
240 | int errno; | |
241 | { | |
242 | struct socket *so = tp->t_inpcb->inp_socket; | |
243 | ||
244 | if (TCPS_HAVERCVDSYN(tp->t_state)) { | |
245 | tp->t_state = TCPS_CLOSED; | |
246 | (void) tcp_output(tp); | |
247 | tcpstat.tcps_drops++; | |
248 | } else | |
249 | tcpstat.tcps_conndrops++; | |
250 | if (errno == ETIMEDOUT && tp->t_softerror) | |
251 | errno = tp->t_softerror; | |
252 | so->so_error = errno; | |
253 | return (tcp_close(tp)); | |
254 | } | |
255 | ||
256 | /* | |
257 | * Close a TCP control block: | |
258 | * discard all space held by the tcp | |
259 | * discard internet protocol block | |
260 | * wake up any sleepers | |
261 | */ | |
262 | struct tcpcb * | |
263 | tcp_close(tp) | |
264 | register struct tcpcb *tp; | |
265 | { | |
266 | register struct tcpiphdr *t; | |
267 | struct inpcb *inp = tp->t_inpcb; | |
268 | struct socket *so = inp->inp_socket; | |
269 | register struct mbuf *m; | |
270 | #ifdef RTV_RTT | |
271 | register struct rtentry *rt; | |
272 | ||
273 | /* | |
274 | * If we sent enough data to get some meaningful characteristics, | |
275 | * save them in the routing entry. 'Enough' is arbitrarily | |
276 | * defined as the sendpipesize (default 4K) * 16. This would | |
277 | * give us 16 rtt samples assuming we only get one sample per | |
278 | * window (the usual case on a long haul net). 16 samples is | |
279 | * enough for the srtt filter to converge to within 5% of the correct | |
280 | * value; fewer samples and we could save a very bogus rtt. | |
281 | * | |
282 | * Don't update the default route's characteristics and don't | |
283 | * update anything that the user "locked". | |
284 | */ | |
285 | if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && | |
286 | (rt = inp->inp_route.ro_rt) && | |
287 | ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { | |
288 | register u_long i; | |
289 | ||
290 | if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { | |
291 | i = tp->t_srtt * | |
292 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); | |
293 | if (rt->rt_rmx.rmx_rtt && i) | |
294 | /* | |
295 | * filter this update to half the old & half | |
296 | * the new values, converting scale. | |
297 | * See route.h and tcp_var.h for a | |
298 | * description of the scaling constants. | |
299 | */ | |
300 | rt->rt_rmx.rmx_rtt = | |
301 | (rt->rt_rmx.rmx_rtt + i) / 2; | |
302 | else | |
303 | rt->rt_rmx.rmx_rtt = i; | |
304 | } | |
305 | if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { | |
306 | i = tp->t_rttvar * | |
307 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); | |
308 | if (rt->rt_rmx.rmx_rttvar && i) | |
309 | rt->rt_rmx.rmx_rttvar = | |
310 | (rt->rt_rmx.rmx_rttvar + i) / 2; | |
311 | else | |
312 | rt->rt_rmx.rmx_rttvar = i; | |
313 | } | |
314 | /* | |
315 | * update the pipelimit (ssthresh) if it has been updated | |
316 | * already or if a pipesize was specified & the threshhold | |
317 | * got below half the pipesize. I.e., wait for bad news | |
318 | * before we start updating, then update on both good | |
319 | * and bad news. | |
320 | */ | |
321 | if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && | |
322 | (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh || | |
323 | i < (rt->rt_rmx.rmx_sendpipe / 2)) { | |
324 | /* | |
325 | * convert the limit from user data bytes to | |
326 | * packets then to packet data bytes. | |
327 | */ | |
328 | i = (i + tp->t_maxseg / 2) / tp->t_maxseg; | |
329 | if (i < 2) | |
330 | i = 2; | |
331 | i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); | |
332 | if (rt->rt_rmx.rmx_ssthresh) | |
333 | rt->rt_rmx.rmx_ssthresh = | |
334 | (rt->rt_rmx.rmx_ssthresh + i) / 2; | |
335 | else | |
336 | rt->rt_rmx.rmx_ssthresh = i; | |
337 | } | |
338 | } | |
339 | #endif RTV_RTT | |
340 | /* free the reassembly queue, if any */ | |
341 | t = tp->seg_next; | |
342 | while (t != (struct tcpiphdr *)tp) { | |
343 | t = (struct tcpiphdr *)t->ti_next; | |
344 | m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); | |
345 | remque(t->ti_prev); | |
346 | m_freem(m); | |
347 | } | |
348 | if (tp->t_template) | |
349 | (void) m_free(dtom(tp->t_template)); | |
350 | (void) m_free(dtom(tp)); | |
351 | inp->inp_ppcb = 0; | |
352 | soisdisconnected(so); | |
353 | /* clobber input pcb cache if we're closing the cached connection */ | |
354 | if (inp == tcp_last_inpcb) | |
355 | tcp_last_inpcb = &tcb; | |
356 | in_pcbdetach(inp); | |
357 | tcpstat.tcps_closed++; | |
358 | return ((struct tcpcb *)0); | |
359 | } | |
360 | ||
361 | tcp_drain() | |
362 | { | |
363 | ||
364 | } | |
365 | ||
366 | /* | |
367 | * Notify a tcp user of an asynchronous error; | |
368 | * store error as soft error, but wake up user | |
369 | * (for now, won't do anything until can select for soft error). | |
370 | */ | |
371 | tcp_notify(inp, error) | |
372 | register struct inpcb *inp; | |
373 | int error; | |
374 | { | |
375 | ||
376 | ((struct tcpcb *)inp->inp_ppcb)->t_softerror = error; | |
377 | wakeup((caddr_t) &inp->inp_socket->so_timeo); | |
378 | sorwakeup(inp->inp_socket); | |
379 | sowwakeup(inp->inp_socket); | |
380 | } | |
381 | ||
382 | tcp_ctlinput(cmd, sa, ip) | |
383 | int cmd; | |
384 | struct sockaddr *sa; | |
385 | register struct ip *ip; | |
386 | { | |
387 | register struct tcphdr *th; | |
388 | extern struct in_addr zeroin_addr; | |
389 | extern u_char inetctlerrmap[]; | |
390 | int (*notify)() = tcp_notify, tcp_quench(); | |
391 | ||
392 | if (cmd == PRC_QUENCH) | |
393 | notify = tcp_quench; | |
394 | else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) | |
395 | return; | |
396 | if (ip) { | |
397 | th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); | |
398 | in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, | |
399 | cmd, notify); | |
400 | } else | |
401 | in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); | |
402 | } | |
403 | ||
404 | /* | |
405 | * When a source quench is received, close congestion window | |
406 | * to one segment. We will gradually open it again as we proceed. | |
407 | */ | |
408 | tcp_quench(inp) | |
409 | struct inpcb *inp; | |
410 | { | |
411 | struct tcpcb *tp = intotcpcb(inp); | |
412 | ||
413 | if (tp) | |
414 | tp->snd_cwnd = tp->t_maxseg; | |
415 | } |