Commit | Line | Data |
---|---|---|
8ae0e4b4 | 1 | /* |
33042259 | 2 | * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. |
2b6b6284 | 3 | * All rights reserved. |
8ae0e4b4 | 4 | * |
dbf0c423 | 5 | * %sccs.include.redist.c% |
2b6b6284 | 6 | * |
69d96ae2 | 7 | * @(#)tcp_subr.c 7.25 (Berkeley) %G% |
8ae0e4b4 | 8 | */ |
ecaa4e6f | 9 | |
85ee91bb KS |
10 | #include <sys/param.h> |
11 | #include <sys/proc.h> | |
12 | #include <sys/systm.h> | |
13 | #include <sys/malloc.h> | |
14 | #include <sys/mbuf.h> | |
15 | #include <sys/socket.h> | |
16 | #include <sys/socketvar.h> | |
17 | #include <sys/protosw.h> | |
18 | #include <sys/errno.h> | |
5548a02f KB |
19 | |
20 | #include <net/route.h> | |
21 | #include <net/if.h> | |
22 | ||
23 | #include <netinet/in.h> | |
24 | #include <netinet/in_systm.h> | |
25 | #include <netinet/ip.h> | |
26 | #include <netinet/in_pcb.h> | |
27 | #include <netinet/ip_var.h> | |
28 | #include <netinet/ip_icmp.h> | |
29 | #include <netinet/tcp.h> | |
30 | #include <netinet/tcp_fsm.h> | |
31 | #include <netinet/tcp_seq.h> | |
32 | #include <netinet/tcp_timer.h> | |
33 | #include <netinet/tcp_var.h> | |
34 | #include <netinet/tcpip.h> | |
ecaa4e6f | 35 | |
33042259 | 36 | /* patchable/settable parameters for tcp */ |
10604dba | 37 | int tcp_ttl = TCP_TTL; |
33042259 MK |
38 | int tcp_mssdflt = TCP_MSS; |
39 | int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; | |
69d96ae2 | 40 | int tcp_do_rfc1323 = 1; |
33042259 MK |
41 | |
42 | extern struct inpcb *tcp_last_inpcb; | |
10604dba | 43 | |
ecaa4e6f BJ |
44 | /* |
45 | * Tcp initialization | |
46 | */ | |
47 | tcp_init() | |
48 | { | |
49 | ||
50 | tcp_iss = 1; /* wrong */ | |
51 | tcb.inp_next = tcb.inp_prev = &tcb; | |
9d91b170 MK |
52 | if (max_protohdr < sizeof(struct tcpiphdr)) |
53 | max_protohdr = sizeof(struct tcpiphdr); | |
54 | if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) | |
55 | panic("tcp_init"); | |
ecaa4e6f BJ |
56 | } |
57 | ||
58 | /* | |
59 | * Create template to be used to send tcp packets on a connection. | |
60 | * Call after host entry created, allocates an mbuf and fills | |
61 | * in a skeletal tcp/ip header, minimizing the amount of work | |
62 | * necessary when the connection is used. | |
63 | */ | |
64 | struct tcpiphdr * | |
65 | tcp_template(tp) | |
66 | struct tcpcb *tp; | |
67 | { | |
68 | register struct inpcb *inp = tp->t_inpcb; | |
69 | register struct mbuf *m; | |
70 | register struct tcpiphdr *n; | |
71 | ||
ece01391 | 72 | if ((n = tp->t_template) == 0) { |
9f5105e3 | 73 | m = m_get(M_DONTWAIT, MT_HEADER); |
ece01391 MK |
74 | if (m == NULL) |
75 | return (0); | |
ece01391 MK |
76 | m->m_len = sizeof (struct tcpiphdr); |
77 | n = mtod(m, struct tcpiphdr *); | |
78 | } | |
ecaa4e6f BJ |
79 | n->ti_next = n->ti_prev = 0; |
80 | n->ti_x1 = 0; | |
81 | n->ti_pr = IPPROTO_TCP; | |
82 | n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); | |
83 | n->ti_src = inp->inp_laddr; | |
84 | n->ti_dst = inp->inp_faddr; | |
85 | n->ti_sport = inp->inp_lport; | |
86 | n->ti_dport = inp->inp_fport; | |
87 | n->ti_seq = 0; | |
0974b45c | 88 | n->ti_ack = 0; |
ecaa4e6f BJ |
89 | n->ti_x2 = 0; |
90 | n->ti_off = 5; | |
91 | n->ti_flags = 0; | |
92 | n->ti_win = 0; | |
93 | n->ti_sum = 0; | |
94 | n->ti_urp = 0; | |
95 | return (n); | |
96 | } | |
97 | ||
98 | /* | |
405c9168 | 99 | * Send a single message to the TCP at address specified by |
33042259 | 100 | * the given TCP/IP header. If m == 0, then we make a copy |
405c9168 BJ |
101 | * of the tcpiphdr at ti and send directly to the addressed host. |
102 | * This is used to force keep alive messages out using the TCP | |
103 | * template for a connection tp->t_template. If flags are given | |
104 | * then we send a message back to the TCP which originated the | |
105 | * segment ti, and discard the mbuf containing it and any other | |
106 | * attached mbufs. | |
107 | * | |
108 | * In any case the ack and sequence number of the transmitted | |
109 | * segment are as specified by the parameters. | |
ecaa4e6f | 110 | */ |
9d91b170 | 111 | tcp_respond(tp, ti, m, ack, seq, flags) |
8e65fd66 | 112 | struct tcpcb *tp; |
ecaa4e6f | 113 | register struct tcpiphdr *ti; |
9d91b170 | 114 | register struct mbuf *m; |
0974b45c | 115 | tcp_seq ack, seq; |
ecaa4e6f BJ |
116 | int flags; |
117 | { | |
37a28d38 MK |
118 | register int tlen; |
119 | int win = 0; | |
c124e997 | 120 | struct route *ro = 0; |
ecaa4e6f | 121 | |
c124e997 | 122 | if (tp) { |
8e65fd66 | 123 | win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); |
c124e997 SL |
124 | ro = &tp->t_inpcb->inp_route; |
125 | } | |
9d91b170 MK |
126 | if (m == 0) { |
127 | m = m_gethdr(M_DONTWAIT, MT_HEADER); | |
5cdc4d65 | 128 | if (m == NULL) |
405c9168 | 129 | return; |
eeef4ac3 MK |
130 | #ifdef TCP_COMPAT_42 |
131 | tlen = 1; | |
132 | #else | |
133 | tlen = 0; | |
134 | #endif | |
9d91b170 | 135 | m->m_data += max_linkhdr; |
405c9168 BJ |
136 | *mtod(m, struct tcpiphdr *) = *ti; |
137 | ti = mtod(m, struct tcpiphdr *); | |
138 | flags = TH_ACK; | |
139 | } else { | |
140 | m_freem(m->m_next); | |
141 | m->m_next = 0; | |
9d91b170 | 142 | m->m_data = (caddr_t)ti; |
405c9168 | 143 | m->m_len = sizeof (struct tcpiphdr); |
33042259 | 144 | tlen = 0; |
0974b45c | 145 | #define xchg(a,b,type) { type t; t=a; a=b; b=t; } |
405c9168 BJ |
146 | xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); |
147 | xchg(ti->ti_dport, ti->ti_sport, u_short); | |
ecaa4e6f | 148 | #undef xchg |
405c9168 | 149 | } |
37a28d38 MK |
150 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); |
151 | tlen += sizeof (struct tcpiphdr); | |
152 | m->m_len = tlen; | |
153 | m->m_pkthdr.len = tlen; | |
154 | m->m_pkthdr.rcvif = (struct ifnet *) 0; | |
0974b45c BJ |
155 | ti->ti_next = ti->ti_prev = 0; |
156 | ti->ti_x1 = 0; | |
2c48b3f8 BJ |
157 | ti->ti_seq = htonl(seq); |
158 | ti->ti_ack = htonl(ack); | |
0974b45c BJ |
159 | ti->ti_x2 = 0; |
160 | ti->ti_off = sizeof (struct tcphdr) >> 2; | |
ecaa4e6f | 161 | ti->ti_flags = flags; |
69d96ae2 AC |
162 | if (tp) |
163 | ti->ti_win = htons((u_short) (win >> tp->rcv_scale)); | |
164 | else | |
165 | ti->ti_win = htons((u_short)win); | |
8e65fd66 | 166 | ti->ti_urp = 0; |
69d96ae2 | 167 | ti->ti_sum = 0; |
37a28d38 MK |
168 | ti->ti_sum = in_cksum(m, tlen); |
169 | ((struct ip *)ti)->ip_len = tlen; | |
10604dba | 170 | ((struct ip *)ti)->ip_ttl = tcp_ttl; |
c124e997 | 171 | (void) ip_output(m, (struct mbuf *)0, ro, 0); |
ecaa4e6f | 172 | } |
a6503abf | 173 | |
0974b45c BJ |
174 | /* |
175 | * Create a new TCP control block, making an | |
176 | * empty reassembly queue and hooking it to the argument | |
177 | * protocol control block. | |
178 | */ | |
a6503abf BJ |
179 | struct tcpcb * |
180 | tcp_newtcpcb(inp) | |
181 | struct inpcb *inp; | |
182 | { | |
a6503abf | 183 | register struct tcpcb *tp; |
a6503abf | 184 | |
69d96ae2 AC |
185 | tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); |
186 | if (tp == NULL) | |
5cdc4d65 | 187 | return ((struct tcpcb *)0); |
69d96ae2 | 188 | bzero((char *) tp, sizeof(struct tcpcb)); |
a6503abf | 189 | tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; |
33042259 MK |
190 | tp->t_maxseg = tcp_mssdflt; |
191 | ||
69d96ae2 | 192 | tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; |
a6503abf | 193 | tp->t_inpcb = inp; |
7cc62c26 | 194 | /* |
5ca0b868 MK |
195 | * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no |
196 | * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives | |
197 | * reasonable initial retransmit time. | |
7cc62c26 | 198 | */ |
5ca0b868 | 199 | tp->t_srtt = TCPTV_SRTTBASE; |
33042259 MK |
200 | tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; |
201 | tp->t_rttmin = TCPTV_MIN; | |
dabb0e53 MK |
202 | TCPT_RANGESET(tp->t_rxtcur, |
203 | ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, | |
204 | TCPTV_MIN, TCPTV_REXMTMAX); | |
69d96ae2 AC |
205 | tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
206 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; | |
33042259 | 207 | inp->inp_ip.ip_ttl = tcp_ttl; |
a6503abf BJ |
208 | inp->inp_ppcb = (caddr_t)tp; |
209 | return (tp); | |
210 | } | |
211 | ||
0974b45c BJ |
212 | /* |
213 | * Drop a TCP connection, reporting | |
214 | * the specified error. If connection is synchronized, | |
215 | * then send a RST to peer. | |
216 | */ | |
0e3936fa | 217 | struct tcpcb * |
a6503abf | 218 | tcp_drop(tp, errno) |
0e3936fa | 219 | register struct tcpcb *tp; |
a6503abf BJ |
220 | int errno; |
221 | { | |
222 | struct socket *so = tp->t_inpcb->inp_socket; | |
223 | ||
d3504cc0 | 224 | if (TCPS_HAVERCVDSYN(tp->t_state)) { |
a6503abf | 225 | tp->t_state = TCPS_CLOSED; |
39d536e6 | 226 | (void) tcp_output(tp); |
35f3fc10 MK |
227 | tcpstat.tcps_drops++; |
228 | } else | |
229 | tcpstat.tcps_conndrops++; | |
33042259 MK |
230 | if (errno == ETIMEDOUT && tp->t_softerror) |
231 | errno = tp->t_softerror; | |
a6503abf | 232 | so->so_error = errno; |
0e3936fa | 233 | return (tcp_close(tp)); |
a6503abf BJ |
234 | } |
235 | ||
0974b45c BJ |
236 | /* |
237 | * Close a TCP control block: | |
238 | * discard all space held by the tcp | |
239 | * discard internet protocol block | |
240 | * wake up any sleepers | |
241 | */ | |
0e3936fa | 242 | struct tcpcb * |
a6503abf BJ |
243 | tcp_close(tp) |
244 | register struct tcpcb *tp; | |
245 | { | |
246 | register struct tcpiphdr *t; | |
364801f5 BJ |
247 | struct inpcb *inp = tp->t_inpcb; |
248 | struct socket *so = inp->inp_socket; | |
13e2480b | 249 | register struct mbuf *m; |
33042259 MK |
250 | #ifdef RTV_RTT |
251 | register struct rtentry *rt; | |
a6503abf | 252 | |
33042259 MK |
253 | /* |
254 | * If we sent enough data to get some meaningful characteristics, | |
255 | * save them in the routing entry. 'Enough' is arbitrarily | |
1ac2096c | 256 | * defined as the sendpipesize (default 4K) * 16. This would |
33042259 MK |
257 | * give us 16 rtt samples assuming we only get one sample per |
258 | * window (the usual case on a long haul net). 16 samples is | |
259 | * enough for the srtt filter to converge to within 5% of the correct | |
260 | * value; fewer samples and we could save a very bogus rtt. | |
261 | * | |
262 | * Don't update the default route's characteristics and don't | |
263 | * update anything that the user "locked". | |
264 | */ | |
1ac2096c | 265 | if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && |
33042259 | 266 | (rt = inp->inp_route.ro_rt) && |
1ac2096c | 267 | ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { |
33042259 MK |
268 | register u_long i; |
269 | ||
270 | if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { | |
271 | i = tp->t_srtt * | |
272 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); | |
273 | if (rt->rt_rmx.rmx_rtt && i) | |
274 | /* | |
275 | * filter this update to half the old & half | |
276 | * the new values, converting scale. | |
277 | * See route.h and tcp_var.h for a | |
278 | * description of the scaling constants. | |
279 | */ | |
280 | rt->rt_rmx.rmx_rtt = | |
281 | (rt->rt_rmx.rmx_rtt + i) / 2; | |
282 | else | |
283 | rt->rt_rmx.rmx_rtt = i; | |
284 | } | |
285 | if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { | |
286 | i = tp->t_rttvar * | |
287 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); | |
288 | if (rt->rt_rmx.rmx_rttvar && i) | |
289 | rt->rt_rmx.rmx_rttvar = | |
290 | (rt->rt_rmx.rmx_rttvar + i) / 2; | |
291 | else | |
292 | rt->rt_rmx.rmx_rttvar = i; | |
293 | } | |
294 | /* | |
295 | * update the pipelimit (ssthresh) if it has been updated | |
296 | * already or if a pipesize was specified & the threshhold | |
297 | * got below half the pipesize. I.e., wait for bad news | |
298 | * before we start updating, then update on both good | |
299 | * and bad news. | |
300 | */ | |
301 | if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && | |
302 | (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh || | |
303 | i < (rt->rt_rmx.rmx_sendpipe / 2)) { | |
304 | /* | |
305 | * convert the limit from user data bytes to | |
306 | * packets then to packet data bytes. | |
307 | */ | |
308 | i = (i + tp->t_maxseg / 2) / tp->t_maxseg; | |
309 | if (i < 2) | |
310 | i = 2; | |
311 | i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); | |
312 | if (rt->rt_rmx.rmx_ssthresh) | |
313 | rt->rt_rmx.rmx_ssthresh = | |
314 | (rt->rt_rmx.rmx_ssthresh + i) / 2; | |
315 | else | |
316 | rt->rt_rmx.rmx_ssthresh = i; | |
317 | } | |
318 | } | |
319 | #endif RTV_RTT | |
320 | /* free the reassembly queue, if any */ | |
a6503abf | 321 | t = tp->seg_next; |
13e2480b SL |
322 | while (t != (struct tcpiphdr *)tp) { |
323 | t = (struct tcpiphdr *)t->ti_next; | |
33042259 | 324 | m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); |
13e2480b SL |
325 | remque(t->ti_prev); |
326 | m_freem(m); | |
327 | } | |
0974b45c | 328 | if (tp->t_template) |
a6503abf | 329 | (void) m_free(dtom(tp->t_template)); |
69d96ae2 | 330 | free(tp, M_PCB); |
364801f5 | 331 | inp->inp_ppcb = 0; |
4aed14e3 | 332 | soisdisconnected(so); |
33042259 MK |
333 | /* clobber input pcb cache if we're closing the cached connection */ |
334 | if (inp == tcp_last_inpcb) | |
335 | tcp_last_inpcb = &tcb; | |
86676257 | 336 | in_pcbdetach(inp); |
35f3fc10 | 337 | tcpstat.tcps_closed++; |
0e3936fa | 338 | return ((struct tcpcb *)0); |
a6503abf BJ |
339 | } |
340 | ||
a6503abf BJ |
341 | tcp_drain() |
342 | { | |
a6503abf | 343 | |
a6503abf BJ |
344 | } |
345 | ||
be841dc3 MK |
346 | /* |
347 | * Notify a tcp user of an asynchronous error; | |
33042259 MK |
348 | * store error as soft error, but wake up user |
349 | * (for now, won't do anything until can select for soft error). | |
be841dc3 | 350 | */ |
33042259 | 351 | tcp_notify(inp, error) |
ba200b9a | 352 | struct inpcb *inp; |
33042259 | 353 | int error; |
be841dc3 | 354 | { |
ba200b9a MK |
355 | register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; |
356 | register struct socket *so = inp->inp_socket; | |
be841dc3 | 357 | |
ba200b9a | 358 | /* |
69d96ae2 | 359 | * Ignore some errors if we are hooked up. |
ba200b9a MK |
360 | * If connection hasn't completed, has retransmitted several times, |
361 | * and receives a second error, give up now. This is better | |
362 | * than waiting a long time to establish a connection that | |
363 | * can never complete. | |
364 | */ | |
69d96ae2 AC |
365 | if (tp->t_state == TCPS_ESTABLISHED && |
366 | (error == EHOSTUNREACH || error == ENETUNREACH || | |
367 | error == EHOSTDOWN)) { | |
368 | return; | |
369 | } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && | |
ba200b9a MK |
370 | tp->t_softerror) |
371 | so->so_error = error; | |
69d96ae2 | 372 | else |
ba200b9a MK |
373 | tp->t_softerror = error; |
374 | wakeup((caddr_t) &so->so_timeo); | |
375 | sorwakeup(so); | |
376 | sowwakeup(so); | |
be841dc3 | 377 | } |
b1dd4cca MK |
378 | |
379 | tcp_ctlinput(cmd, sa, ip) | |
72e4f44e | 380 | int cmd; |
7c626d4d | 381 | struct sockaddr *sa; |
b1dd4cca | 382 | register struct ip *ip; |
a6503abf | 383 | { |
b1dd4cca MK |
384 | register struct tcphdr *th; |
385 | extern struct in_addr zeroin_addr; | |
39674d5f | 386 | extern u_char inetctlerrmap[]; |
b1dd4cca | 387 | int (*notify)() = tcp_notify, tcp_quench(); |
39674d5f | 388 | |
b1dd4cca MK |
389 | if (cmd == PRC_QUENCH) |
390 | notify = tcp_quench; | |
69d96ae2 AC |
391 | else if (!PRC_IS_REDIRECT(cmd) && |
392 | ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) | |
7c626d4d | 393 | return; |
b1dd4cca MK |
394 | if (ip) { |
395 | th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); | |
396 | in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, | |
397 | cmd, notify); | |
398 | } else | |
399 | in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); | |
a6503abf | 400 | } |
05586739 | 401 | |
9d866d2f MK |
402 | #if BSD<43 |
403 | /* XXX fake routine */ | |
404 | tcp_abort(inp) | |
405 | struct inpcb *inp; | |
406 | { | |
407 | return; | |
408 | } | |
409 | #endif | |
410 | ||
05586739 MK |
411 | /* |
412 | * When a source quench is received, close congestion window | |
2e5a76f2 | 413 | * to one segment. We will gradually open it again as we proceed. |
05586739 MK |
414 | */ |
415 | tcp_quench(inp) | |
416 | struct inpcb *inp; | |
417 | { | |
418 | struct tcpcb *tp = intotcpcb(inp); | |
419 | ||
7c626d4d | 420 | if (tp) |
2e5a76f2 | 421 | tp->snd_cwnd = tp->t_maxseg; |
05586739 | 422 | } |