Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1988 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | * | |
38e82238 | 33 | * from: @(#)tcp_usrreq.c 7.15 (Berkeley) 6/28/90 |
ff1a3ad7 | 34 | * $Id: tcp_usrreq.c,v 1.4 1993/12/19 00:52:53 wollman Exp $ |
15637ed4 RG |
35 | */ |
36 | ||
37 | #include "param.h" | |
38 | #include "systm.h" | |
39 | #include "malloc.h" | |
40 | #include "mbuf.h" | |
41 | #include "socket.h" | |
42 | #include "socketvar.h" | |
43 | #include "protosw.h" | |
44 | #include "errno.h" | |
45 | #include "stat.h" | |
46 | ||
47 | #include "../net/if.h" | |
48 | #include "../net/route.h" | |
49 | ||
50 | #include "in.h" | |
51 | #include "in_systm.h" | |
52 | #include "ip.h" | |
53 | #include "in_pcb.h" | |
54 | #include "ip_var.h" | |
55 | #include "tcp.h" | |
56 | #include "tcp_fsm.h" | |
57 | #include "tcp_seq.h" | |
58 | #include "tcp_timer.h" | |
59 | #include "tcp_var.h" | |
60 | #include "tcpip.h" | |
ff1a3ad7 | 61 | #ifdef TCPDEBUG |
15637ed4 | 62 | #include "tcp_debug.h" |
ff1a3ad7 | 63 | #endif |
15637ed4 RG |
64 | |
65 | /* | |
66 | * TCP protocol interface to socket abstraction. | |
67 | */ | |
68 | extern char *tcpstates[]; | |
15637ed4 RG |
69 | |
70 | /* | |
71 | * Process a TCP user request for TCP tb. If this is a send request | |
72 | * then m is the mbuf chain of send data. If this is a timer expiration | |
73 | * (called from the software clock routine), then timertype tells which timer. | |
74 | */ | |
75 | /*ARGSUSED*/ | |
4c45483e | 76 | int |
fde1aeb2 | 77 | tcp_usrreq(so, req, m, nam, control, dummy) |
15637ed4 RG |
78 | struct socket *so; |
79 | int req; | |
80 | struct mbuf *m, *nam, *control; | |
fde1aeb2 | 81 | struct mbuf *dummy; |
15637ed4 RG |
82 | { |
83 | register struct inpcb *inp; | |
4c45483e | 84 | register struct tcpcb *tp = 0; |
15637ed4 RG |
85 | int s; |
86 | int error = 0; | |
87 | int ostate; | |
88 | ||
89 | if (req == PRU_CONTROL) | |
90 | return (in_control(so, (int)m, (caddr_t)nam, | |
91 | (struct ifnet *)control)); | |
92 | if (control && control->m_len) { | |
93 | m_freem(control); | |
94 | if (m) | |
95 | m_freem(m); | |
96 | return (EINVAL); | |
97 | } | |
98 | ||
99 | s = splnet(); | |
100 | inp = sotoinpcb(so); | |
101 | /* | |
102 | * When a TCP is attached to a socket, then there will be | |
103 | * a (struct inpcb) pointed at by the socket, and this | |
104 | * structure will point at a subsidary (struct tcpcb). | |
105 | */ | |
106 | if (inp == 0 && req != PRU_ATTACH) { | |
107 | splx(s); | |
108 | return (EINVAL); /* XXX */ | |
109 | } | |
110 | if (inp) { | |
111 | tp = intotcpcb(inp); | |
112 | /* WHAT IF TP IS 0? */ | |
113 | #ifdef KPROF | |
114 | tcp_acounts[tp->t_state][req]++; | |
115 | #endif | |
116 | ostate = tp->t_state; | |
117 | } else | |
118 | ostate = 0; | |
119 | switch (req) { | |
120 | ||
121 | /* | |
122 | * TCP attaches to socket via PRU_ATTACH, reserving space, | |
123 | * and an internet control block. | |
124 | */ | |
125 | case PRU_ATTACH: | |
126 | if (inp) { | |
127 | error = EISCONN; | |
128 | break; | |
129 | } | |
130 | error = tcp_attach(so); | |
131 | if (error) | |
132 | break; | |
133 | if ((so->so_options & SO_LINGER) && so->so_linger == 0) | |
134 | so->so_linger = TCP_LINGERTIME; | |
135 | tp = sototcpcb(so); | |
136 | break; | |
137 | ||
138 | /* | |
139 | * PRU_DETACH detaches the TCP protocol from the socket. | |
140 | * If the protocol state is non-embryonic, then can't | |
141 | * do this directly: have to initiate a PRU_DISCONNECT, | |
142 | * which may finish later; embryonic TCB's can just | |
143 | * be discarded here. | |
144 | */ | |
145 | case PRU_DETACH: | |
146 | if (tp->t_state > TCPS_LISTEN) | |
147 | tp = tcp_disconnect(tp); | |
148 | else | |
149 | tp = tcp_close(tp); | |
150 | break; | |
151 | ||
152 | /* | |
153 | * Give the socket an address. | |
154 | */ | |
155 | case PRU_BIND: | |
156 | error = in_pcbbind(inp, nam); | |
157 | if (error) | |
158 | break; | |
159 | break; | |
160 | ||
161 | /* | |
162 | * Prepare to accept connections. | |
163 | */ | |
164 | case PRU_LISTEN: | |
165 | if (inp->inp_lport == 0) | |
166 | error = in_pcbbind(inp, (struct mbuf *)0); | |
167 | if (error == 0) | |
168 | tp->t_state = TCPS_LISTEN; | |
169 | break; | |
170 | ||
171 | /* | |
172 | * Initiate connection to peer. | |
173 | * Create a template for use in transmissions on this connection. | |
174 | * Enter SYN_SENT state, and mark socket as connecting. | |
175 | * Start keep-alive timer, and seed output sequence space. | |
176 | * Send initial segment on connection. | |
177 | */ | |
178 | case PRU_CONNECT: | |
179 | if (inp->inp_lport == 0) { | |
180 | error = in_pcbbind(inp, (struct mbuf *)0); | |
181 | if (error) | |
182 | break; | |
183 | } | |
184 | error = in_pcbconnect(inp, nam); | |
185 | if (error) | |
186 | break; | |
187 | tp->t_template = tcp_template(tp); | |
188 | if (tp->t_template == 0) { | |
189 | in_pcbdisconnect(inp); | |
190 | error = ENOBUFS; | |
191 | break; | |
192 | } | |
193 | soisconnecting(so); | |
194 | tcpstat.tcps_connattempt++; | |
195 | tp->t_state = TCPS_SYN_SENT; | |
196 | tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; | |
197 | tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; | |
198 | tcp_sendseqinit(tp); | |
199 | error = tcp_output(tp); | |
200 | break; | |
201 | ||
202 | /* | |
203 | * Create a TCP connection between two sockets. | |
204 | */ | |
205 | case PRU_CONNECT2: | |
206 | error = EOPNOTSUPP; | |
207 | break; | |
208 | ||
209 | /* | |
210 | * Initiate disconnect from peer. | |
211 | * If connection never passed embryonic stage, just drop; | |
212 | * else if don't need to let data drain, then can just drop anyways, | |
213 | * else have to begin TCP shutdown process: mark socket disconnecting, | |
214 | * drain unread data, state switch to reflect user close, and | |
215 | * send segment (e.g. FIN) to peer. Socket will be really disconnected | |
216 | * when peer sends FIN and acks ours. | |
217 | * | |
218 | * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. | |
219 | */ | |
220 | case PRU_DISCONNECT: | |
221 | tp = tcp_disconnect(tp); | |
222 | break; | |
223 | ||
224 | /* | |
225 | * Accept a connection. Essentially all the work is | |
226 | * done at higher levels; just return the address | |
227 | * of the peer, storing through addr. | |
228 | */ | |
229 | case PRU_ACCEPT: { | |
230 | struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); | |
231 | ||
232 | nam->m_len = sizeof (struct sockaddr_in); | |
233 | sin->sin_family = AF_INET; | |
234 | sin->sin_len = sizeof(*sin); | |
235 | sin->sin_port = inp->inp_fport; | |
236 | sin->sin_addr = inp->inp_faddr; | |
237 | break; | |
238 | } | |
239 | ||
240 | /* | |
241 | * Mark the connection as being incapable of further output. | |
242 | */ | |
243 | case PRU_SHUTDOWN: | |
244 | socantsendmore(so); | |
245 | tp = tcp_usrclosed(tp); | |
246 | if (tp) | |
247 | error = tcp_output(tp); | |
248 | break; | |
249 | ||
250 | /* | |
251 | * After a receive, possibly send window update to peer. | |
252 | */ | |
253 | case PRU_RCVD: | |
254 | (void) tcp_output(tp); | |
255 | break; | |
256 | ||
257 | /* | |
258 | * Do a send by putting data in output queue and updating urgent | |
259 | * marker if URG set. Possibly send more data. | |
260 | */ | |
261 | case PRU_SEND: | |
262 | sbappend(&so->so_snd, m); | |
263 | error = tcp_output(tp); | |
264 | break; | |
265 | ||
266 | /* | |
267 | * Abort the TCP. | |
268 | */ | |
269 | case PRU_ABORT: | |
270 | tp = tcp_drop(tp, ECONNABORTED); | |
271 | break; | |
272 | ||
273 | case PRU_SENSE: | |
274 | ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; | |
275 | (void) splx(s); | |
276 | return (0); | |
277 | ||
278 | case PRU_RCVOOB: | |
279 | if ((so->so_oobmark == 0 && | |
280 | (so->so_state & SS_RCVATMARK) == 0) || | |
281 | so->so_options & SO_OOBINLINE || | |
282 | tp->t_oobflags & TCPOOB_HADDATA) { | |
283 | error = EINVAL; | |
284 | break; | |
285 | } | |
286 | if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { | |
287 | error = EWOULDBLOCK; | |
288 | break; | |
289 | } | |
290 | m->m_len = 1; | |
291 | *mtod(m, caddr_t) = tp->t_iobc; | |
292 | if (((int)nam & MSG_PEEK) == 0) | |
293 | tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); | |
294 | break; | |
295 | ||
296 | case PRU_SENDOOB: | |
297 | if (sbspace(&so->so_snd) < -512) { | |
298 | m_freem(m); | |
299 | error = ENOBUFS; | |
300 | break; | |
301 | } | |
302 | /* | |
303 | * According to RFC961 (Assigned Protocols), | |
304 | * the urgent pointer points to the last octet | |
305 | * of urgent data. We continue, however, | |
306 | * to consider it to indicate the first octet | |
307 | * of data past the urgent section. | |
308 | * Otherwise, snd_up should be one lower. | |
309 | */ | |
310 | sbappend(&so->so_snd, m); | |
311 | tp->snd_up = tp->snd_una + so->so_snd.sb_cc; | |
312 | tp->t_force = 1; | |
313 | error = tcp_output(tp); | |
314 | tp->t_force = 0; | |
315 | break; | |
316 | ||
317 | case PRU_SOCKADDR: | |
318 | in_setsockaddr(inp, nam); | |
319 | break; | |
320 | ||
321 | case PRU_PEERADDR: | |
322 | in_setpeeraddr(inp, nam); | |
323 | break; | |
324 | ||
325 | /* | |
326 | * TCP slow timer went off; going through this | |
327 | * routine for tracing's sake. | |
328 | */ | |
329 | case PRU_SLOWTIMO: | |
330 | tp = tcp_timers(tp, (int)nam); | |
331 | req |= (int)nam << 8; /* for debug's sake */ | |
332 | break; | |
333 | ||
334 | default: | |
335 | panic("tcp_usrreq"); | |
336 | } | |
ff1a3ad7 | 337 | #ifdef TCPDEBUG |
15637ed4 RG |
338 | if (tp && (so->so_options & SO_DEBUG)) |
339 | tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); | |
ff1a3ad7 | 340 | #endif |
15637ed4 RG |
341 | splx(s); |
342 | return (error); | |
343 | } | |
344 | ||
4c45483e | 345 | int |
15637ed4 RG |
346 | tcp_ctloutput(op, so, level, optname, mp) |
347 | int op; | |
348 | struct socket *so; | |
349 | int level, optname; | |
350 | struct mbuf **mp; | |
351 | { | |
352 | int error = 0; | |
353 | struct inpcb *inp = sotoinpcb(so); | |
354 | register struct tcpcb *tp = intotcpcb(inp); | |
355 | register struct mbuf *m; | |
356 | ||
357 | if (level != IPPROTO_TCP) | |
358 | return (ip_ctloutput(op, so, level, optname, mp)); | |
359 | ||
360 | switch (op) { | |
361 | ||
362 | case PRCO_SETOPT: | |
363 | m = *mp; | |
364 | switch (optname) { | |
365 | ||
366 | case TCP_NODELAY: | |
367 | if (m == NULL || m->m_len < sizeof (int)) | |
368 | error = EINVAL; | |
369 | else if (*mtod(m, int *)) | |
370 | tp->t_flags |= TF_NODELAY; | |
371 | else | |
372 | tp->t_flags &= ~TF_NODELAY; | |
373 | break; | |
374 | ||
375 | case TCP_MAXSEG: /* not yet */ | |
376 | default: | |
377 | error = EINVAL; | |
378 | break; | |
379 | } | |
380 | if (m) | |
381 | (void) m_free(m); | |
382 | break; | |
383 | ||
384 | case PRCO_GETOPT: | |
385 | *mp = m = m_get(M_WAIT, MT_SOOPTS); | |
386 | m->m_len = sizeof(int); | |
387 | ||
388 | switch (optname) { | |
389 | case TCP_NODELAY: | |
390 | *mtod(m, int *) = tp->t_flags & TF_NODELAY; | |
391 | break; | |
392 | case TCP_MAXSEG: | |
393 | *mtod(m, int *) = tp->t_maxseg; | |
394 | break; | |
395 | default: | |
396 | error = EINVAL; | |
397 | break; | |
398 | } | |
399 | break; | |
400 | } | |
401 | return (error); | |
402 | } | |
403 | ||
15637ed4 RG |
404 | /* |
405 | * Attach TCP protocol to socket, allocating | |
406 | * internet protocol control block, tcp control block, | |
407 | * bufer space, and entering LISTEN state if to accept connections. | |
408 | */ | |
4c45483e | 409 | int |
15637ed4 RG |
410 | tcp_attach(so) |
411 | struct socket *so; | |
412 | { | |
413 | register struct tcpcb *tp; | |
414 | struct inpcb *inp; | |
415 | int error; | |
416 | ||
417 | if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { | |
418 | error = soreserve(so, tcp_sendspace, tcp_recvspace); | |
419 | if (error) | |
420 | return (error); | |
421 | } | |
422 | error = in_pcballoc(so, &tcb); | |
423 | if (error) | |
424 | return (error); | |
425 | inp = sotoinpcb(so); | |
426 | tp = tcp_newtcpcb(inp); | |
427 | if (tp == 0) { | |
428 | int nofd = so->so_state & SS_NOFDREF; /* XXX */ | |
429 | ||
430 | so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ | |
431 | in_pcbdetach(inp); | |
432 | so->so_state |= nofd; | |
433 | return (ENOBUFS); | |
434 | } | |
435 | tp->t_state = TCPS_CLOSED; | |
436 | return (0); | |
437 | } | |
438 | ||
439 | /* | |
440 | * Initiate (or continue) disconnect. | |
441 | * If embryonic state, just send reset (once). | |
442 | * If in ``let data drain'' option and linger null, just drop. | |
443 | * Otherwise (hard), mark socket disconnecting and drop | |
444 | * current input data; switch states based on user close, and | |
445 | * send segment to peer (with FIN). | |
446 | */ | |
447 | struct tcpcb * | |
448 | tcp_disconnect(tp) | |
449 | register struct tcpcb *tp; | |
450 | { | |
451 | struct socket *so = tp->t_inpcb->inp_socket; | |
452 | ||
453 | if (tp->t_state < TCPS_ESTABLISHED) | |
454 | tp = tcp_close(tp); | |
455 | else if ((so->so_options & SO_LINGER) && so->so_linger == 0) | |
456 | tp = tcp_drop(tp, 0); | |
457 | else { | |
458 | soisdisconnecting(so); | |
459 | sbflush(&so->so_rcv); | |
460 | tp = tcp_usrclosed(tp); | |
461 | if (tp) | |
462 | (void) tcp_output(tp); | |
463 | } | |
464 | return (tp); | |
465 | } | |
466 | ||
467 | /* | |
468 | * User issued close, and wish to trail through shutdown states: | |
469 | * if never received SYN, just forget it. If got a SYN from peer, | |
470 | * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. | |
471 | * If already got a FIN from peer, then almost done; go to LAST_ACK | |
472 | * state. In all other cases, have already sent FIN to peer (e.g. | |
473 | * after PRU_SHUTDOWN), and just have to play tedious game waiting | |
474 | * for peer to send FIN or not respond to keep-alives, etc. | |
475 | * We can let the user exit from the close as soon as the FIN is acked. | |
476 | */ | |
477 | struct tcpcb * | |
478 | tcp_usrclosed(tp) | |
479 | register struct tcpcb *tp; | |
480 | { | |
481 | ||
482 | switch (tp->t_state) { | |
483 | ||
484 | case TCPS_CLOSED: | |
485 | case TCPS_LISTEN: | |
486 | case TCPS_SYN_SENT: | |
487 | tp->t_state = TCPS_CLOSED; | |
488 | tp = tcp_close(tp); | |
489 | break; | |
490 | ||
491 | case TCPS_SYN_RECEIVED: | |
492 | case TCPS_ESTABLISHED: | |
493 | tp->t_state = TCPS_FIN_WAIT_1; | |
494 | break; | |
495 | ||
496 | case TCPS_CLOSE_WAIT: | |
497 | tp->t_state = TCPS_LAST_ACK; | |
498 | break; | |
499 | } | |
500 | if (tp && tp->t_state >= TCPS_FIN_WAIT_2) | |
501 | soisdisconnected(tp->t_inpcb->inp_socket); | |
502 | return (tp); | |
503 | } |