Commit | Line | Data |
---|---|---|
8ae0e4b4 | 1 | /* |
0880b18e | 2 | * Copyright (c) 1982, 1986 Regents of the University of California. |
2b6b6284 | 3 | * All rights reserved. |
8ae0e4b4 | 4 | * |
2b6b6284 | 5 | * Redistribution and use in source and binary forms are permitted |
616d42db KB |
6 | * provided that the above copyright notice and this paragraph are |
7 | * duplicated in all such forms and that any documentation, | |
8 | * advertising materials, and other materials related to such | |
9 | * distribution and use acknowledge that the software was developed | |
10 | * by the University of California, Berkeley. The name of the | |
11 | * University may not be used to endorse or promote products derived | |
12 | * from this software without specific prior written permission. | |
13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
14 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
15 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
2b6b6284 | 16 | * |
616d42db | 17 | * @(#)tcp_usrreq.c 7.10 (Berkeley) %G% |
8ae0e4b4 | 18 | */ |
72f24d7d | 19 | |
20666ad3 JB |
20 | #include "param.h" |
21 | #include "systm.h" | |
22 | #include "mbuf.h" | |
23 | #include "socket.h" | |
24 | #include "socketvar.h" | |
25 | #include "protosw.h" | |
26 | #include "errno.h" | |
27 | #include "stat.h" | |
6e7edb25 BJ |
28 | |
29 | #include "../net/if.h" | |
c124e997 | 30 | #include "../net/route.h" |
f4d55810 | 31 | |
20666ad3 JB |
32 | #include "in.h" |
33 | #include "in_pcb.h" | |
34 | #include "in_systm.h" | |
35 | #include "ip.h" | |
36 | #include "ip_var.h" | |
37 | #include "tcp.h" | |
38 | #include "tcp_fsm.h" | |
39 | #include "tcp_seq.h" | |
40 | #include "tcp_timer.h" | |
41 | #include "tcp_var.h" | |
42 | #include "tcpip.h" | |
43 | #include "tcp_debug.h" | |
eee3ab16 | 44 | |
290e0b0a BJ |
45 | /* |
46 | * TCP protocol interface to socket abstraction. | |
47 | */ | |
48 | extern char *tcpstates[]; | |
4ad99bae | 49 | struct tcpcb *tcp_newtcpcb(); |
290e0b0a | 50 | |
9c5022e3 | 51 | /* |
290e0b0a | 52 | * Process a TCP user request for TCP tb. If this is a send request |
9c5022e3 BJ |
53 | * then m is the mbuf chain of send data. If this is a timer expiration |
54 | * (called from the software clock routine), then timertype tells which timer. | |
55 | */ | |
a8d3bf7f | 56 | /*ARGSUSED*/ |
ab85b059 | 57 | tcp_usrreq(so, req, m, nam, rights) |
eee3ab16 BJ |
58 | struct socket *so; |
59 | int req; | |
ab85b059 | 60 | struct mbuf *m, *nam, *rights; |
4eb5d593 | 61 | { |
5db7dd0d | 62 | register struct inpcb *inp; |
cdad2eb1 | 63 | register struct tcpcb *tp; |
5db7dd0d | 64 | int s; |
eee3ab16 | 65 | int error = 0; |
17b82ed4 | 66 | int ostate; |
72f24d7d | 67 | |
9d866d2f | 68 | #if BSD>=43 |
5db7dd0d MK |
69 | if (req == PRU_CONTROL) |
70 | return (in_control(so, (int)m, (caddr_t)nam, | |
71 | (struct ifnet *)rights)); | |
9d866d2f MK |
72 | #else |
73 | if (req == PRU_CONTROL) | |
74 | return(EOPNOTSUPP); | |
75 | #endif | |
5db7dd0d | 76 | if (rights && rights->m_len) |
ab85b059 | 77 | return (EINVAL); |
5db7dd0d MK |
78 | |
79 | s = splnet(); | |
80 | inp = sotoinpcb(so); | |
53a5409e | 81 | /* |
290e0b0a BJ |
82 | * When a TCP is attached to a socket, then there will be |
83 | * a (struct inpcb) pointed at by the socket, and this | |
84 | * structure will point at a subsidary (struct tcpcb). | |
53a5409e | 85 | */ |
0974b45c | 86 | if (inp == 0 && req != PRU_ATTACH) { |
a6503abf | 87 | splx(s); |
290e0b0a | 88 | return (EINVAL); /* XXX */ |
a6503abf BJ |
89 | } |
90 | if (inp) { | |
cdad2eb1 | 91 | tp = intotcpcb(inp); |
8075bb0e | 92 | /* WHAT IF TP IS 0? */ |
9c5022e3 | 93 | #ifdef KPROF |
a6503abf | 94 | tcp_acounts[tp->t_state][req]++; |
9c5022e3 | 95 | #endif |
17b82ed4 | 96 | ostate = tp->t_state; |
ebf42a75 BJ |
97 | } else |
98 | ostate = 0; | |
eee3ab16 | 99 | switch (req) { |
4eb5d593 | 100 | |
290e0b0a BJ |
101 | /* |
102 | * TCP attaches to socket via PRU_ATTACH, reserving space, | |
8075bb0e | 103 | * and an internet control block. |
290e0b0a | 104 | */ |
eee3ab16 | 105 | case PRU_ATTACH: |
4ad99bae | 106 | if (inp) { |
eee3ab16 | 107 | error = EISCONN; |
cdad2eb1 | 108 | break; |
53a5409e | 109 | } |
a1edc12b | 110 | error = tcp_attach(so); |
a6503abf | 111 | if (error) |
4ad99bae | 112 | break; |
0e3936fa | 113 | if ((so->so_options & SO_LINGER) && so->so_linger == 0) |
8e65fd66 | 114 | so->so_linger = TCP_LINGERTIME; |
290e0b0a | 115 | tp = sototcpcb(so); |
72f24d7d | 116 | break; |
4eb5d593 | 117 | |
290e0b0a BJ |
118 | /* |
119 | * PRU_DETACH detaches the TCP protocol from the socket. | |
120 | * If the protocol state is non-embryonic, then can't | |
121 | * do this directly: have to initiate a PRU_DISCONNECT, | |
122 | * which may finish later; embryonic TCB's can just | |
123 | * be discarded here. | |
124 | */ | |
eee3ab16 | 125 | case PRU_DETACH: |
290e0b0a | 126 | if (tp->t_state > TCPS_LISTEN) |
0e3936fa SL |
127 | tp = tcp_disconnect(tp); |
128 | else | |
129 | tp = tcp_close(tp); | |
eee3ab16 BJ |
130 | break; |
131 | ||
8075bb0e BJ |
132 | /* |
133 | * Give the socket an address. | |
134 | */ | |
135 | case PRU_BIND: | |
136 | error = in_pcbbind(inp, nam); | |
137 | if (error) | |
138 | break; | |
139 | break; | |
140 | ||
141 | /* | |
142 | * Prepare to accept connections. | |
143 | */ | |
144 | case PRU_LISTEN: | |
145 | if (inp->inp_lport == 0) | |
146 | error = in_pcbbind(inp, (struct mbuf *)0); | |
147 | if (error == 0) | |
148 | tp->t_state = TCPS_LISTEN; | |
149 | break; | |
150 | ||
290e0b0a BJ |
151 | /* |
152 | * Initiate connection to peer. | |
153 | * Create a template for use in transmissions on this connection. | |
154 | * Enter SYN_SENT state, and mark socket as connecting. | |
155 | * Start keep-alive timer, and seed output sequence space. | |
156 | * Send initial segment on connection. | |
157 | */ | |
eee3ab16 | 158 | case PRU_CONNECT: |
8075bb0e BJ |
159 | if (inp->inp_lport == 0) { |
160 | error = in_pcbbind(inp, (struct mbuf *)0); | |
161 | if (error) | |
162 | break; | |
163 | } | |
164 | error = in_pcbconnect(inp, nam); | |
4ad99bae | 165 | if (error) |
53a5409e | 166 | break; |
b454c3ea | 167 | tp->t_template = tcp_template(tp); |
290e0b0a BJ |
168 | if (tp->t_template == 0) { |
169 | in_pcbdisconnect(inp); | |
170 | error = ENOBUFS; | |
171 | break; | |
172 | } | |
53a5409e | 173 | soisconnecting(so); |
3b52afc5 | 174 | tcpstat.tcps_connattempt++; |
a6503abf | 175 | tp->t_state = TCPS_SYN_SENT; |
8a36cf82 | 176 | tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; |
4aed14e3 BJ |
177 | tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; |
178 | tcp_sendseqinit(tp); | |
8a2f82db | 179 | error = tcp_output(tp); |
72f24d7d | 180 | break; |
4eb5d593 | 181 | |
4945768c SL |
182 | /* |
183 | * Create a TCP connection between two sockets. | |
184 | */ | |
185 | case PRU_CONNECT2: | |
186 | error = EOPNOTSUPP; | |
187 | break; | |
188 | ||
290e0b0a BJ |
189 | /* |
190 | * Initiate disconnect from peer. | |
191 | * If connection never passed embryonic stage, just drop; | |
192 | * else if don't need to let data drain, then can just drop anyways, | |
193 | * else have to begin TCP shutdown process: mark socket disconnecting, | |
194 | * drain unread data, state switch to reflect user close, and | |
195 | * send segment (e.g. FIN) to peer. Socket will be really disconnected | |
196 | * when peer sends FIN and acks ours. | |
197 | * | |
198 | * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. | |
199 | */ | |
200 | case PRU_DISCONNECT: | |
0e3936fa | 201 | tp = tcp_disconnect(tp); |
4aed14e3 BJ |
202 | break; |
203 | ||
290e0b0a BJ |
204 | /* |
205 | * Accept a connection. Essentially all the work is | |
206 | * done at higher levels; just return the address | |
207 | * of the peer, storing through addr. | |
208 | */ | |
1acff8ec | 209 | case PRU_ACCEPT: { |
8075bb0e | 210 | struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); |
1acff8ec | 211 | |
8075bb0e BJ |
212 | nam->m_len = sizeof (struct sockaddr_in); |
213 | sin->sin_family = AF_INET; | |
214 | sin->sin_port = inp->inp_fport; | |
215 | sin->sin_addr = inp->inp_faddr; | |
eee3ab16 | 216 | break; |
8075bb0e | 217 | } |
eee3ab16 | 218 | |
290e0b0a BJ |
219 | /* |
220 | * Mark the connection as being incapable of further output. | |
221 | */ | |
eee3ab16 | 222 | case PRU_SHUTDOWN: |
0974b45c | 223 | socantsendmore(so); |
0e3936fa SL |
224 | tp = tcp_usrclosed(tp); |
225 | if (tp) | |
226 | error = tcp_output(tp); | |
72f24d7d BJ |
227 | break; |
228 | ||
290e0b0a BJ |
229 | /* |
230 | * After a receive, possibly send window update to peer. | |
231 | */ | |
eee3ab16 | 232 | case PRU_RCVD: |
f1b2fa5b | 233 | (void) tcp_output(tp); |
72f24d7d BJ |
234 | break; |
235 | ||
290e0b0a BJ |
236 | /* |
237 | * Do a send by putting data in output queue and updating urgent | |
238 | * marker if URG set. Possibly send more data. | |
239 | */ | |
eee3ab16 | 240 | case PRU_SEND: |
a6503abf | 241 | sbappend(&so->so_snd, m); |
8a2f82db | 242 | error = tcp_output(tp); |
72f24d7d BJ |
243 | break; |
244 | ||
290e0b0a BJ |
245 | /* |
246 | * Abort the TCP. | |
247 | */ | |
eee3ab16 | 248 | case PRU_ABORT: |
0e3936fa | 249 | tp = tcp_drop(tp, ECONNABORTED); |
72f24d7d BJ |
250 | break; |
251 | ||
f1b2fa5b | 252 | case PRU_SENSE: |
74040e68 | 253 | ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; |
ded7a1df | 254 | (void) splx(s); |
74040e68 | 255 | return (0); |
f1b2fa5b BJ |
256 | |
257 | case PRU_RCVOOB: | |
01234a7d MK |
258 | if ((so->so_oobmark == 0 && |
259 | (so->so_state & SS_RCVATMARK) == 0) || | |
9d866d2f | 260 | #ifdef SO_OOBINLINE |
f6a4d6a4 | 261 | so->so_options & SO_OOBINLINE || |
9d866d2f | 262 | #endif |
01234a7d | 263 | tp->t_oobflags & TCPOOB_HADDATA) { |
0244dbc7 BJ |
264 | error = EINVAL; |
265 | break; | |
266 | } | |
b2db9217 | 267 | if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { |
8b5a83bb | 268 | error = EWOULDBLOCK; |
b2db9217 | 269 | break; |
8b5a83bb | 270 | } |
283ea225 | 271 | m->m_len = 1; |
b2db9217 | 272 | *mtod(m, caddr_t) = tp->t_iobc; |
01234a7d MK |
273 | if (((int)nam & MSG_PEEK) == 0) |
274 | tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); | |
f1b2fa5b BJ |
275 | break; |
276 | ||
277 | case PRU_SENDOOB: | |
8b5a83bb | 278 | if (sbspace(&so->so_snd) < -512) { |
37279c1b | 279 | m_freem(m); |
8b5a83bb BJ |
280 | error = ENOBUFS; |
281 | break; | |
282 | } | |
f6a4d6a4 MK |
283 | /* |
284 | * According to RFC961 (Assigned Protocols), | |
285 | * the urgent pointer points to the last octet | |
286 | * of urgent data. We continue, however, | |
287 | * to consider it to indicate the first octet | |
288 | * of data past the urgent section. | |
289 | * Otherwise, snd_up should be one lower. | |
290 | */ | |
0244dbc7 | 291 | sbappend(&so->so_snd, m); |
f6a4d6a4 | 292 | tp->snd_up = tp->snd_una + so->so_snd.sb_cc; |
b2db9217 | 293 | tp->t_force = 1; |
8a2f82db | 294 | error = tcp_output(tp); |
b2db9217 | 295 | tp->t_force = 0; |
f1b2fa5b BJ |
296 | break; |
297 | ||
126472ab | 298 | case PRU_SOCKADDR: |
8075bb0e | 299 | in_setsockaddr(inp, nam); |
126472ab SL |
300 | break; |
301 | ||
a7343092 SL |
302 | case PRU_PEERADDR: |
303 | in_setpeeraddr(inp, nam); | |
304 | break; | |
305 | ||
290e0b0a BJ |
306 | /* |
307 | * TCP slow timer went off; going through this | |
308 | * routine for tracing's sake. | |
309 | */ | |
eee3ab16 | 310 | case PRU_SLOWTIMO: |
0e3936fa | 311 | tp = tcp_timers(tp, (int)nam); |
8075bb0e | 312 | req |= (int)nam << 8; /* for debug's sake */ |
eee3ab16 BJ |
313 | break; |
314 | ||
9c5022e3 BJ |
315 | default: |
316 | panic("tcp_usrreq"); | |
72f24d7d | 317 | } |
17b82ed4 BJ |
318 | if (tp && (so->so_options & SO_DEBUG)) |
319 | tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); | |
72f24d7d | 320 | splx(s); |
53a5409e | 321 | return (error); |
4eb5d593 | 322 | } |
4aed14e3 | 323 | |
9d866d2f | 324 | #if BSD>=43 |
54c84456 | 325 | tcp_ctloutput(op, so, level, optname, mp) |
01234a7d MK |
326 | int op; |
327 | struct socket *so; | |
328 | int level, optname; | |
54c84456 | 329 | struct mbuf **mp; |
01234a7d | 330 | { |
54c84456 MK |
331 | int error = 0; |
332 | struct inpcb *inp = sotoinpcb(so); | |
333 | register struct tcpcb *tp = intotcpcb(inp); | |
334 | register struct mbuf *m; | |
335 | ||
01234a7d | 336 | if (level != IPPROTO_TCP) |
b2a3d559 | 337 | return (ip_ctloutput(op, so, level, optname, mp)); |
54c84456 MK |
338 | |
339 | switch (op) { | |
340 | ||
341 | case PRCO_SETOPT: | |
342 | m = *mp; | |
343 | switch (optname) { | |
344 | ||
345 | case TCP_NODELAY: | |
346 | if (m == NULL || m->m_len < sizeof (int)) | |
347 | error = EINVAL; | |
348 | else if (*mtod(m, int *)) | |
349 | tp->t_flags |= TF_NODELAY; | |
350 | else | |
351 | tp->t_flags &= ~TF_NODELAY; | |
352 | break; | |
353 | ||
354 | case TCP_MAXSEG: /* not yet */ | |
355 | default: | |
356 | error = EINVAL; | |
357 | break; | |
358 | } | |
53af7510 SL |
359 | if (m) |
360 | (void) m_free(m); | |
54c84456 MK |
361 | break; |
362 | ||
363 | case PRCO_GETOPT: | |
364 | *mp = m = m_get(M_WAIT, MT_SOOPTS); | |
365 | m->m_len = sizeof(int); | |
366 | ||
367 | switch (optname) { | |
368 | case TCP_NODELAY: | |
369 | *mtod(m, int *) = tp->t_flags & TF_NODELAY; | |
370 | break; | |
371 | case TCP_MAXSEG: | |
372 | *mtod(m, int *) = tp->t_maxseg; | |
373 | break; | |
374 | default: | |
375 | error = EINVAL; | |
376 | break; | |
377 | } | |
378 | break; | |
379 | } | |
380 | return (error); | |
01234a7d | 381 | } |
9d866d2f | 382 | #endif |
01234a7d | 383 | |
4f5156ea MK |
384 | u_long tcp_sendspace = 1024*4; |
385 | u_long tcp_recvspace = 1024*4; | |
290e0b0a BJ |
386 | /* |
387 | * Attach TCP protocol to socket, allocating | |
388 | * internet protocol control block, tcp control block, | |
389 | * bufer space, and entering LISTEN state if to accept connections. | |
390 | */ | |
8075bb0e | 391 | tcp_attach(so) |
290e0b0a | 392 | struct socket *so; |
290e0b0a BJ |
393 | { |
394 | register struct tcpcb *tp; | |
395 | struct inpcb *inp; | |
396 | int error; | |
397 | ||
4f5156ea MK |
398 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { |
399 | error = soreserve(so, tcp_sendspace, tcp_recvspace); | |
400 | if (error) | |
401 | return (error); | |
402 | } | |
ebf42a75 | 403 | error = in_pcballoc(so, &tcb); |
290e0b0a | 404 | if (error) |
054054fd | 405 | return (error); |
8075bb0e | 406 | inp = sotoinpcb(so); |
290e0b0a | 407 | tp = tcp_newtcpcb(inp); |
ebf42a75 | 408 | if (tp == 0) { |
054054fd MK |
409 | int nofd = so->so_state & SS_NOFDREF; /* XXX */ |
410 | ||
411 | so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ | |
412 | in_pcbdetach(inp); | |
413 | so->so_state |= nofd; | |
414 | return (ENOBUFS); | |
ebf42a75 | 415 | } |
8075bb0e | 416 | tp->t_state = TCPS_CLOSED; |
290e0b0a BJ |
417 | return (0); |
418 | } | |
419 | ||
420 | /* | |
421 | * Initiate (or continue) disconnect. | |
422 | * If embryonic state, just send reset (once). | |
f9e4ec68 | 423 | * If in ``let data drain'' option and linger null, just drop. |
290e0b0a BJ |
424 | * Otherwise (hard), mark socket disconnecting and drop |
425 | * current input data; switch states based on user close, and | |
426 | * send segment to peer (with FIN). | |
427 | */ | |
0e3936fa | 428 | struct tcpcb * |
290e0b0a | 429 | tcp_disconnect(tp) |
0e3936fa | 430 | register struct tcpcb *tp; |
290e0b0a BJ |
431 | { |
432 | struct socket *so = tp->t_inpcb->inp_socket; | |
433 | ||
434 | if (tp->t_state < TCPS_ESTABLISHED) | |
0e3936fa | 435 | tp = tcp_close(tp); |
f9e4ec68 | 436 | else if ((so->so_options & SO_LINGER) && so->so_linger == 0) |
0e3936fa | 437 | tp = tcp_drop(tp, 0); |
290e0b0a BJ |
438 | else { |
439 | soisdisconnecting(so); | |
440 | sbflush(&so->so_rcv); | |
0e3936fa SL |
441 | tp = tcp_usrclosed(tp); |
442 | if (tp) | |
443 | (void) tcp_output(tp); | |
290e0b0a | 444 | } |
0e3936fa | 445 | return (tp); |
290e0b0a BJ |
446 | } |
447 | ||
448 | /* | |
449 | * User issued close, and wish to trail through shutdown states: | |
450 | * if never received SYN, just forget it. If got a SYN from peer, | |
451 | * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. | |
452 | * If already got a FIN from peer, then almost done; go to LAST_ACK | |
453 | * state. In all other cases, have already sent FIN to peer (e.g. | |
454 | * after PRU_SHUTDOWN), and just have to play tedious game waiting | |
455 | * for peer to send FIN or not respond to keep-alives, etc. | |
085a0b90 | 456 | * We can let the user exit from the close as soon as the FIN is acked. |
290e0b0a | 457 | */ |
0e3936fa | 458 | struct tcpcb * |
4aed14e3 | 459 | tcp_usrclosed(tp) |
0e3936fa | 460 | register struct tcpcb *tp; |
4aed14e3 BJ |
461 | { |
462 | ||
4aed14e3 BJ |
463 | switch (tp->t_state) { |
464 | ||
815b24e1 | 465 | case TCPS_CLOSED: |
4aed14e3 BJ |
466 | case TCPS_LISTEN: |
467 | case TCPS_SYN_SENT: | |
468 | tp->t_state = TCPS_CLOSED; | |
0e3936fa | 469 | tp = tcp_close(tp); |
4aed14e3 BJ |
470 | break; |
471 | ||
472 | case TCPS_SYN_RECEIVED: | |
473 | case TCPS_ESTABLISHED: | |
474 | tp->t_state = TCPS_FIN_WAIT_1; | |
475 | break; | |
476 | ||
477 | case TCPS_CLOSE_WAIT: | |
478 | tp->t_state = TCPS_LAST_ACK; | |
479 | break; | |
480 | } | |
0e3936fa | 481 | if (tp && tp->t_state >= TCPS_FIN_WAIT_2) |
085a0b90 | 482 | soisdisconnected(tp->t_inpcb->inp_socket); |
0e3936fa | 483 | return (tp); |
4aed14e3 | 484 | } |