Commit | Line | Data |
---|---|---|
8ae0e4b4 | 1 | /* |
22cc6d10 | 2 | * Copyright (c) 1982, 1986, 1988, 1993, 1995 |
e7a3707f | 3 | * The Regents of the University of California. All rights reserved. |
8ae0e4b4 | 4 | * |
dbf0c423 | 5 | * %sccs.include.redist.c% |
2b6b6284 | 6 | * |
9916278a | 7 | * @(#)tcp_usrreq.c 8.5 (Berkeley) %G% |
8ae0e4b4 | 8 | */ |
72f24d7d | 9 | |
5548a02f KB |
10 | #include <sys/param.h> |
11 | #include <sys/systm.h> | |
12 | #include <sys/malloc.h> | |
13 | #include <sys/mbuf.h> | |
14 | #include <sys/socket.h> | |
15 | #include <sys/socketvar.h> | |
16 | #include <sys/protosw.h> | |
17 | #include <sys/errno.h> | |
18 | #include <sys/stat.h> | |
6e7edb25 | 19 | |
5548a02f KB |
20 | #include <net/if.h> |
21 | #include <net/route.h> | |
f4d55810 | 22 | |
5548a02f KB |
23 | #include <netinet/in.h> |
24 | #include <netinet/in_systm.h> | |
25 | #include <netinet/ip.h> | |
26 | #include <netinet/in_pcb.h> | |
27 | #include <netinet/ip_var.h> | |
28 | #include <netinet/tcp.h> | |
29 | #include <netinet/tcp_fsm.h> | |
30 | #include <netinet/tcp_seq.h> | |
31 | #include <netinet/tcp_timer.h> | |
32 | #include <netinet/tcp_var.h> | |
33 | #include <netinet/tcpip.h> | |
34 | #include <netinet/tcp_debug.h> | |
eee3ab16 | 35 | |
290e0b0a BJ |
36 | /* |
37 | * TCP protocol interface to socket abstraction. | |
38 | */ | |
39 | extern char *tcpstates[]; | |
290e0b0a | 40 | |
9c5022e3 | 41 | /* |
290e0b0a | 42 | * Process a TCP user request for TCP tb. If this is a send request |
9c5022e3 BJ |
43 | * then m is the mbuf chain of send data. If this is a timer expiration |
44 | * (called from the software clock routine), then timertype tells which timer. | |
45 | */ | |
a8d3bf7f | 46 | /*ARGSUSED*/ |
c46785cb | 47 | int |
a942fe02 | 48 | tcp_usrreq(so, req, m, nam, control) |
eee3ab16 BJ |
49 | struct socket *so; |
50 | int req; | |
a942fe02 | 51 | struct mbuf *m, *nam, *control; |
4eb5d593 | 52 | { |
5db7dd0d | 53 | register struct inpcb *inp; |
cdad2eb1 | 54 | register struct tcpcb *tp; |
5db7dd0d | 55 | int s; |
eee3ab16 | 56 | int error = 0; |
17b82ed4 | 57 | int ostate; |
72f24d7d | 58 | |
9d866d2f | 59 | #if BSD>=43 |
5db7dd0d | 60 | if (req == PRU_CONTROL) |
b47fe08a | 61 | return (in_control(so, (u_long)m, (caddr_t)nam, |
a942fe02 MK |
62 | (struct ifnet *)control)); |
63 | if (control && control->m_len) { | |
64 | m_freem(control); | |
65 | if (m) | |
66 | m_freem(m); | |
ab85b059 | 67 | return (EINVAL); |
a942fe02 | 68 | } |
5db7dd0d MK |
69 | |
70 | s = splnet(); | |
71 | inp = sotoinpcb(so); | |
53a5409e | 72 | /* |
290e0b0a BJ |
73 | * When a TCP is attached to a socket, then there will be |
74 | * a (struct inpcb) pointed at by the socket, and this | |
75 | * structure will point at a subsidary (struct tcpcb). | |
53a5409e | 76 | */ |
0974b45c | 77 | if (inp == 0 && req != PRU_ATTACH) { |
a6503abf | 78 | splx(s); |
22cc6d10 MK |
79 | #if 0 |
80 | /* | |
81 | * The following corrects an mbuf leak under rare | |
82 | * circumstances, but has not been fully tested. | |
83 | */ | |
84 | if (m && req != PRU_SENSE) | |
85 | m_freem(m); | |
86 | #else | |
87 | /* safer version of fix for mbuf leak */ | |
88 | if (m && (req == PRU_SEND || req == PRU_SENDOOB)) | |
89 | m_freem(m); | |
90 | #endif | |
290e0b0a | 91 | return (EINVAL); /* XXX */ |
a6503abf BJ |
92 | } |
93 | if (inp) { | |
cdad2eb1 | 94 | tp = intotcpcb(inp); |
8075bb0e | 95 | /* WHAT IF TP IS 0? */ |
9c5022e3 | 96 | #ifdef KPROF |
a6503abf | 97 | tcp_acounts[tp->t_state][req]++; |
9c5022e3 | 98 | #endif |
17b82ed4 | 99 | ostate = tp->t_state; |
ebf42a75 BJ |
100 | } else |
101 | ostate = 0; | |
eee3ab16 | 102 | switch (req) { |
4eb5d593 | 103 | |
290e0b0a BJ |
104 | /* |
105 | * TCP attaches to socket via PRU_ATTACH, reserving space, | |
8075bb0e | 106 | * and an internet control block. |
290e0b0a | 107 | */ |
eee3ab16 | 108 | case PRU_ATTACH: |
4ad99bae | 109 | if (inp) { |
eee3ab16 | 110 | error = EISCONN; |
cdad2eb1 | 111 | break; |
53a5409e | 112 | } |
a1edc12b | 113 | error = tcp_attach(so); |
a6503abf | 114 | if (error) |
4ad99bae | 115 | break; |
0e3936fa | 116 | if ((so->so_options & SO_LINGER) && so->so_linger == 0) |
8e65fd66 | 117 | so->so_linger = TCP_LINGERTIME; |
290e0b0a | 118 | tp = sototcpcb(so); |
72f24d7d | 119 | break; |
4eb5d593 | 120 | |
290e0b0a BJ |
121 | /* |
122 | * PRU_DETACH detaches the TCP protocol from the socket. | |
123 | * If the protocol state is non-embryonic, then can't | |
124 | * do this directly: have to initiate a PRU_DISCONNECT, | |
125 | * which may finish later; embryonic TCB's can just | |
126 | * be discarded here. | |
127 | */ | |
eee3ab16 | 128 | case PRU_DETACH: |
290e0b0a | 129 | if (tp->t_state > TCPS_LISTEN) |
0e3936fa SL |
130 | tp = tcp_disconnect(tp); |
131 | else | |
132 | tp = tcp_close(tp); | |
eee3ab16 BJ |
133 | break; |
134 | ||
8075bb0e BJ |
135 | /* |
136 | * Give the socket an address. | |
137 | */ | |
138 | case PRU_BIND: | |
139 | error = in_pcbbind(inp, nam); | |
140 | if (error) | |
141 | break; | |
142 | break; | |
143 | ||
144 | /* | |
145 | * Prepare to accept connections. | |
146 | */ | |
147 | case PRU_LISTEN: | |
148 | if (inp->inp_lport == 0) | |
149 | error = in_pcbbind(inp, (struct mbuf *)0); | |
150 | if (error == 0) | |
151 | tp->t_state = TCPS_LISTEN; | |
152 | break; | |
153 | ||
290e0b0a BJ |
154 | /* |
155 | * Initiate connection to peer. | |
156 | * Create a template for use in transmissions on this connection. | |
157 | * Enter SYN_SENT state, and mark socket as connecting. | |
158 | * Start keep-alive timer, and seed output sequence space. | |
159 | * Send initial segment on connection. | |
160 | */ | |
eee3ab16 | 161 | case PRU_CONNECT: |
8075bb0e BJ |
162 | if (inp->inp_lport == 0) { |
163 | error = in_pcbbind(inp, (struct mbuf *)0); | |
164 | if (error) | |
165 | break; | |
166 | } | |
167 | error = in_pcbconnect(inp, nam); | |
4ad99bae | 168 | if (error) |
53a5409e | 169 | break; |
b454c3ea | 170 | tp->t_template = tcp_template(tp); |
290e0b0a BJ |
171 | if (tp->t_template == 0) { |
172 | in_pcbdisconnect(inp); | |
173 | error = ENOBUFS; | |
174 | break; | |
175 | } | |
69d96ae2 AC |
176 | /* Compute window scaling to request. */ |
177 | while (tp->request_r_scale < TCP_MAX_WINSHIFT && | |
178 | (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) | |
179 | tp->request_r_scale++; | |
53a5409e | 180 | soisconnecting(so); |
3b52afc5 | 181 | tcpstat.tcps_connattempt++; |
a6503abf | 182 | tp->t_state = TCPS_SYN_SENT; |
8a36cf82 | 183 | tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; |
9916278a | 184 | tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/4; |
4aed14e3 | 185 | tcp_sendseqinit(tp); |
8a2f82db | 186 | error = tcp_output(tp); |
72f24d7d | 187 | break; |
4eb5d593 | 188 | |
4945768c SL |
189 | /* |
190 | * Create a TCP connection between two sockets. | |
191 | */ | |
192 | case PRU_CONNECT2: | |
193 | error = EOPNOTSUPP; | |
194 | break; | |
195 | ||
290e0b0a BJ |
196 | /* |
197 | * Initiate disconnect from peer. | |
198 | * If connection never passed embryonic stage, just drop; | |
199 | * else if don't need to let data drain, then can just drop anyways, | |
200 | * else have to begin TCP shutdown process: mark socket disconnecting, | |
201 | * drain unread data, state switch to reflect user close, and | |
202 | * send segment (e.g. FIN) to peer. Socket will be really disconnected | |
203 | * when peer sends FIN and acks ours. | |
204 | * | |
205 | * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. | |
206 | */ | |
207 | case PRU_DISCONNECT: | |
0e3936fa | 208 | tp = tcp_disconnect(tp); |
4aed14e3 BJ |
209 | break; |
210 | ||
290e0b0a BJ |
211 | /* |
212 | * Accept a connection. Essentially all the work is | |
213 | * done at higher levels; just return the address | |
214 | * of the peer, storing through addr. | |
215 | */ | |
65df0627 CT |
216 | case PRU_ACCEPT: |
217 | in_setpeeraddr(inp, nam); | |
eee3ab16 BJ |
218 | break; |
219 | ||
290e0b0a BJ |
220 | /* |
221 | * Mark the connection as being incapable of further output. | |
222 | */ | |
eee3ab16 | 223 | case PRU_SHUTDOWN: |
0974b45c | 224 | socantsendmore(so); |
0e3936fa SL |
225 | tp = tcp_usrclosed(tp); |
226 | if (tp) | |
227 | error = tcp_output(tp); | |
72f24d7d BJ |
228 | break; |
229 | ||
290e0b0a BJ |
230 | /* |
231 | * After a receive, possibly send window update to peer. | |
232 | */ | |
eee3ab16 | 233 | case PRU_RCVD: |
f1b2fa5b | 234 | (void) tcp_output(tp); |
72f24d7d BJ |
235 | break; |
236 | ||
290e0b0a BJ |
237 | /* |
238 | * Do a send by putting data in output queue and updating urgent | |
239 | * marker if URG set. Possibly send more data. | |
240 | */ | |
eee3ab16 | 241 | case PRU_SEND: |
a6503abf | 242 | sbappend(&so->so_snd, m); |
8a2f82db | 243 | error = tcp_output(tp); |
72f24d7d BJ |
244 | break; |
245 | ||
290e0b0a BJ |
246 | /* |
247 | * Abort the TCP. | |
248 | */ | |
eee3ab16 | 249 | case PRU_ABORT: |
0e3936fa | 250 | tp = tcp_drop(tp, ECONNABORTED); |
72f24d7d BJ |
251 | break; |
252 | ||
f1b2fa5b | 253 | case PRU_SENSE: |
74040e68 | 254 | ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; |
ded7a1df | 255 | (void) splx(s); |
74040e68 | 256 | return (0); |
f1b2fa5b BJ |
257 | |
258 | case PRU_RCVOOB: | |
01234a7d MK |
259 | if ((so->so_oobmark == 0 && |
260 | (so->so_state & SS_RCVATMARK) == 0) || | |
9d866d2f | 261 | #ifdef SO_OOBINLINE |
f6a4d6a4 | 262 | so->so_options & SO_OOBINLINE || |
9d866d2f | 263 | #endif |
01234a7d | 264 | tp->t_oobflags & TCPOOB_HADDATA) { |
0244dbc7 BJ |
265 | error = EINVAL; |
266 | break; | |
267 | } | |
b2db9217 | 268 | if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { |
8b5a83bb | 269 | error = EWOULDBLOCK; |
b2db9217 | 270 | break; |
8b5a83bb | 271 | } |
283ea225 | 272 | m->m_len = 1; |
b2db9217 | 273 | *mtod(m, caddr_t) = tp->t_iobc; |
01234a7d MK |
274 | if (((int)nam & MSG_PEEK) == 0) |
275 | tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); | |
f1b2fa5b BJ |
276 | break; |
277 | ||
278 | case PRU_SENDOOB: | |
8b5a83bb | 279 | if (sbspace(&so->so_snd) < -512) { |
37279c1b | 280 | m_freem(m); |
8b5a83bb BJ |
281 | error = ENOBUFS; |
282 | break; | |
283 | } | |
f6a4d6a4 MK |
284 | /* |
285 | * According to RFC961 (Assigned Protocols), | |
286 | * the urgent pointer points to the last octet | |
287 | * of urgent data. We continue, however, | |
288 | * to consider it to indicate the first octet | |
289 | * of data past the urgent section. | |
290 | * Otherwise, snd_up should be one lower. | |
291 | */ | |
0244dbc7 | 292 | sbappend(&so->so_snd, m); |
f6a4d6a4 | 293 | tp->snd_up = tp->snd_una + so->so_snd.sb_cc; |
b2db9217 | 294 | tp->t_force = 1; |
8a2f82db | 295 | error = tcp_output(tp); |
b2db9217 | 296 | tp->t_force = 0; |
f1b2fa5b BJ |
297 | break; |
298 | ||
126472ab | 299 | case PRU_SOCKADDR: |
8075bb0e | 300 | in_setsockaddr(inp, nam); |
126472ab SL |
301 | break; |
302 | ||
a7343092 SL |
303 | case PRU_PEERADDR: |
304 | in_setpeeraddr(inp, nam); | |
305 | break; | |
306 | ||
290e0b0a BJ |
307 | /* |
308 | * TCP slow timer went off; going through this | |
309 | * routine for tracing's sake. | |
310 | */ | |
eee3ab16 | 311 | case PRU_SLOWTIMO: |
0e3936fa | 312 | tp = tcp_timers(tp, (int)nam); |
8075bb0e | 313 | req |= (int)nam << 8; /* for debug's sake */ |
eee3ab16 BJ |
314 | break; |
315 | ||
9c5022e3 BJ |
316 | default: |
317 | panic("tcp_usrreq"); | |
72f24d7d | 318 | } |
17b82ed4 BJ |
319 | if (tp && (so->so_options & SO_DEBUG)) |
320 | tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); | |
72f24d7d | 321 | splx(s); |
53a5409e | 322 | return (error); |
4eb5d593 | 323 | } |
4aed14e3 | 324 | |
c46785cb | 325 | int |
9d866d2f | 326 | #if BSD>=43 |
54c84456 | 327 | tcp_ctloutput(op, so, level, optname, mp) |
01234a7d MK |
328 | int op; |
329 | struct socket *so; | |
330 | int level, optname; | |
54c84456 | 331 | struct mbuf **mp; |
01234a7d | 332 | { |
69d96ae2 AC |
333 | int error = 0, s; |
334 | struct inpcb *inp; | |
335 | register struct tcpcb *tp; | |
54c84456 | 336 | register struct mbuf *m; |
cb02140a | 337 | register int i; |
54c84456 | 338 | |
69d96ae2 AC |
339 | s = splnet(); |
340 | inp = sotoinpcb(so); | |
341 | if (inp == NULL) { | |
342 | splx(s); | |
daefc710 KB |
343 | if (op == PRCO_SETOPT && *mp) |
344 | (void) m_free(*mp); | |
69d96ae2 AC |
345 | return (ECONNRESET); |
346 | } | |
347 | if (level != IPPROTO_TCP) { | |
348 | error = ip_ctloutput(op, so, level, optname, mp); | |
349 | splx(s); | |
350 | return (error); | |
351 | } | |
352 | tp = intotcpcb(inp); | |
54c84456 MK |
353 | |
354 | switch (op) { | |
355 | ||
356 | case PRCO_SETOPT: | |
357 | m = *mp; | |
358 | switch (optname) { | |
359 | ||
360 | case TCP_NODELAY: | |
361 | if (m == NULL || m->m_len < sizeof (int)) | |
362 | error = EINVAL; | |
363 | else if (*mtod(m, int *)) | |
364 | tp->t_flags |= TF_NODELAY; | |
365 | else | |
366 | tp->t_flags &= ~TF_NODELAY; | |
367 | break; | |
368 | ||
cb02140a KM |
369 | case TCP_MAXSEG: |
370 | if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg) | |
371 | tp->t_maxseg = i; | |
372 | else | |
373 | error = EINVAL; | |
374 | break; | |
375 | ||
54c84456 | 376 | default: |
daefc710 | 377 | error = ENOPROTOOPT; |
54c84456 MK |
378 | break; |
379 | } | |
53af7510 SL |
380 | if (m) |
381 | (void) m_free(m); | |
54c84456 MK |
382 | break; |
383 | ||
384 | case PRCO_GETOPT: | |
385 | *mp = m = m_get(M_WAIT, MT_SOOPTS); | |
386 | m->m_len = sizeof(int); | |
387 | ||
388 | switch (optname) { | |
389 | case TCP_NODELAY: | |
390 | *mtod(m, int *) = tp->t_flags & TF_NODELAY; | |
391 | break; | |
392 | case TCP_MAXSEG: | |
393 | *mtod(m, int *) = tp->t_maxseg; | |
394 | break; | |
395 | default: | |
daefc710 | 396 | error = ENOPROTOOPT; |
54c84456 MK |
397 | break; |
398 | } | |
399 | break; | |
400 | } | |
69d96ae2 | 401 | splx(s); |
54c84456 | 402 | return (error); |
01234a7d | 403 | } |
9d866d2f | 404 | #endif |
01234a7d | 405 | |
cb02140a KM |
406 | u_long tcp_sendspace = 1024*8; |
407 | u_long tcp_recvspace = 1024*8; | |
9d91b170 | 408 | |
290e0b0a BJ |
409 | /* |
410 | * Attach TCP protocol to socket, allocating | |
411 | * internet protocol control block, tcp control block, | |
412 | * bufer space, and entering LISTEN state if to accept connections. | |
413 | */ | |
c46785cb | 414 | int |
8075bb0e | 415 | tcp_attach(so) |
290e0b0a | 416 | struct socket *so; |
290e0b0a BJ |
417 | { |
418 | register struct tcpcb *tp; | |
419 | struct inpcb *inp; | |
420 | int error; | |
421 | ||
4f5156ea MK |
422 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { |
423 | error = soreserve(so, tcp_sendspace, tcp_recvspace); | |
424 | if (error) | |
425 | return (error); | |
426 | } | |
ebf42a75 | 427 | error = in_pcballoc(so, &tcb); |
290e0b0a | 428 | if (error) |
054054fd | 429 | return (error); |
8075bb0e | 430 | inp = sotoinpcb(so); |
290e0b0a | 431 | tp = tcp_newtcpcb(inp); |
ebf42a75 | 432 | if (tp == 0) { |
054054fd MK |
433 | int nofd = so->so_state & SS_NOFDREF; /* XXX */ |
434 | ||
435 | so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ | |
436 | in_pcbdetach(inp); | |
437 | so->so_state |= nofd; | |
438 | return (ENOBUFS); | |
ebf42a75 | 439 | } |
8075bb0e | 440 | tp->t_state = TCPS_CLOSED; |
290e0b0a BJ |
441 | return (0); |
442 | } | |
443 | ||
444 | /* | |
445 | * Initiate (or continue) disconnect. | |
446 | * If embryonic state, just send reset (once). | |
f9e4ec68 | 447 | * If in ``let data drain'' option and linger null, just drop. |
290e0b0a BJ |
448 | * Otherwise (hard), mark socket disconnecting and drop |
449 | * current input data; switch states based on user close, and | |
450 | * send segment to peer (with FIN). | |
451 | */ | |
0e3936fa | 452 | struct tcpcb * |
290e0b0a | 453 | tcp_disconnect(tp) |
0e3936fa | 454 | register struct tcpcb *tp; |
290e0b0a BJ |
455 | { |
456 | struct socket *so = tp->t_inpcb->inp_socket; | |
457 | ||
458 | if (tp->t_state < TCPS_ESTABLISHED) | |
0e3936fa | 459 | tp = tcp_close(tp); |
f9e4ec68 | 460 | else if ((so->so_options & SO_LINGER) && so->so_linger == 0) |
0e3936fa | 461 | tp = tcp_drop(tp, 0); |
290e0b0a BJ |
462 | else { |
463 | soisdisconnecting(so); | |
464 | sbflush(&so->so_rcv); | |
0e3936fa SL |
465 | tp = tcp_usrclosed(tp); |
466 | if (tp) | |
467 | (void) tcp_output(tp); | |
290e0b0a | 468 | } |
0e3936fa | 469 | return (tp); |
290e0b0a BJ |
470 | } |
471 | ||
472 | /* | |
473 | * User issued close, and wish to trail through shutdown states: | |
474 | * if never received SYN, just forget it. If got a SYN from peer, | |
475 | * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. | |
476 | * If already got a FIN from peer, then almost done; go to LAST_ACK | |
477 | * state. In all other cases, have already sent FIN to peer (e.g. | |
478 | * after PRU_SHUTDOWN), and just have to play tedious game waiting | |
479 | * for peer to send FIN or not respond to keep-alives, etc. | |
085a0b90 | 480 | * We can let the user exit from the close as soon as the FIN is acked. |
290e0b0a | 481 | */ |
0e3936fa | 482 | struct tcpcb * |
4aed14e3 | 483 | tcp_usrclosed(tp) |
0e3936fa | 484 | register struct tcpcb *tp; |
4aed14e3 BJ |
485 | { |
486 | ||
4aed14e3 BJ |
487 | switch (tp->t_state) { |
488 | ||
815b24e1 | 489 | case TCPS_CLOSED: |
4aed14e3 BJ |
490 | case TCPS_LISTEN: |
491 | case TCPS_SYN_SENT: | |
492 | tp->t_state = TCPS_CLOSED; | |
0e3936fa | 493 | tp = tcp_close(tp); |
4aed14e3 BJ |
494 | break; |
495 | ||
496 | case TCPS_SYN_RECEIVED: | |
497 | case TCPS_ESTABLISHED: | |
498 | tp->t_state = TCPS_FIN_WAIT_1; | |
499 | break; | |
500 | ||
501 | case TCPS_CLOSE_WAIT: | |
502 | tp->t_state = TCPS_LAST_ACK; | |
503 | break; | |
504 | } | |
0e3936fa | 505 | if (tp && tp->t_state >= TCPS_FIN_WAIT_2) |
085a0b90 | 506 | soisdisconnected(tp->t_inpcb->inp_socket); |
0e3936fa | 507 | return (tp); |
4aed14e3 | 508 | } |