change iss increment to agree with tcp_input (and tcp_seq.h comments)
[unix-history] / usr / src / sys / netinet / tcp_usrreq.c
CommitLineData
8ae0e4b4 1/*
22cc6d10 2 * Copyright (c) 1982, 1986, 1988, 1993, 1995
e7a3707f 3 * The Regents of the University of California. All rights reserved.
8ae0e4b4 4 *
dbf0c423 5 * %sccs.include.redist.c%
2b6b6284 6 *
9916278a 7 * @(#)tcp_usrreq.c 8.5 (Berkeley) %G%
8ae0e4b4 8 */
72f24d7d 9
5548a02f
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/malloc.h>
13#include <sys/mbuf.h>
14#include <sys/socket.h>
15#include <sys/socketvar.h>
16#include <sys/protosw.h>
17#include <sys/errno.h>
18#include <sys/stat.h>
6e7edb25 19
5548a02f
KB
20#include <net/if.h>
21#include <net/route.h>
f4d55810 22
5548a02f
KB
23#include <netinet/in.h>
24#include <netinet/in_systm.h>
25#include <netinet/ip.h>
26#include <netinet/in_pcb.h>
27#include <netinet/ip_var.h>
28#include <netinet/tcp.h>
29#include <netinet/tcp_fsm.h>
30#include <netinet/tcp_seq.h>
31#include <netinet/tcp_timer.h>
32#include <netinet/tcp_var.h>
33#include <netinet/tcpip.h>
34#include <netinet/tcp_debug.h>
eee3ab16 35
290e0b0a
BJ
36/*
37 * TCP protocol interface to socket abstraction.
38 */
39extern char *tcpstates[];
290e0b0a 40
9c5022e3 41/*
290e0b0a 42 * Process a TCP user request for TCP tb. If this is a send request
9c5022e3
BJ
43 * then m is the mbuf chain of send data. If this is a timer expiration
44 * (called from the software clock routine), then timertype tells which timer.
45 */
a8d3bf7f 46/*ARGSUSED*/
c46785cb 47int
a942fe02 48tcp_usrreq(so, req, m, nam, control)
eee3ab16
BJ
49 struct socket *so;
50 int req;
a942fe02 51 struct mbuf *m, *nam, *control;
4eb5d593 52{
5db7dd0d 53 register struct inpcb *inp;
cdad2eb1 54 register struct tcpcb *tp;
5db7dd0d 55 int s;
eee3ab16 56 int error = 0;
17b82ed4 57 int ostate;
72f24d7d 58
9d866d2f 59#if BSD>=43
5db7dd0d 60 if (req == PRU_CONTROL)
b47fe08a 61 return (in_control(so, (u_long)m, (caddr_t)nam,
a942fe02
MK
62 (struct ifnet *)control));
63 if (control && control->m_len) {
64 m_freem(control);
65 if (m)
66 m_freem(m);
ab85b059 67 return (EINVAL);
a942fe02 68 }
5db7dd0d
MK
69
70 s = splnet();
71 inp = sotoinpcb(so);
53a5409e 72 /*
290e0b0a
BJ
73 * When a TCP is attached to a socket, then there will be
74 * a (struct inpcb) pointed at by the socket, and this
75 * structure will point at a subsidary (struct tcpcb).
53a5409e 76 */
0974b45c 77 if (inp == 0 && req != PRU_ATTACH) {
a6503abf 78 splx(s);
22cc6d10
MK
79#if 0
80 /*
81 * The following corrects an mbuf leak under rare
82 * circumstances, but has not been fully tested.
83 */
84 if (m && req != PRU_SENSE)
85 m_freem(m);
86#else
87 /* safer version of fix for mbuf leak */
88 if (m && (req == PRU_SEND || req == PRU_SENDOOB))
89 m_freem(m);
90#endif
290e0b0a 91 return (EINVAL); /* XXX */
a6503abf
BJ
92 }
93 if (inp) {
cdad2eb1 94 tp = intotcpcb(inp);
8075bb0e 95 /* WHAT IF TP IS 0? */
9c5022e3 96#ifdef KPROF
a6503abf 97 tcp_acounts[tp->t_state][req]++;
9c5022e3 98#endif
17b82ed4 99 ostate = tp->t_state;
ebf42a75
BJ
100 } else
101 ostate = 0;
eee3ab16 102 switch (req) {
4eb5d593 103
290e0b0a
BJ
104 /*
105 * TCP attaches to socket via PRU_ATTACH, reserving space,
8075bb0e 106 * and an internet control block.
290e0b0a 107 */
eee3ab16 108 case PRU_ATTACH:
4ad99bae 109 if (inp) {
eee3ab16 110 error = EISCONN;
cdad2eb1 111 break;
53a5409e 112 }
a1edc12b 113 error = tcp_attach(so);
a6503abf 114 if (error)
4ad99bae 115 break;
0e3936fa 116 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
8e65fd66 117 so->so_linger = TCP_LINGERTIME;
290e0b0a 118 tp = sototcpcb(so);
72f24d7d 119 break;
4eb5d593 120
290e0b0a
BJ
121 /*
122 * PRU_DETACH detaches the TCP protocol from the socket.
123 * If the protocol state is non-embryonic, then can't
124 * do this directly: have to initiate a PRU_DISCONNECT,
125 * which may finish later; embryonic TCB's can just
126 * be discarded here.
127 */
eee3ab16 128 case PRU_DETACH:
290e0b0a 129 if (tp->t_state > TCPS_LISTEN)
0e3936fa
SL
130 tp = tcp_disconnect(tp);
131 else
132 tp = tcp_close(tp);
eee3ab16
BJ
133 break;
134
8075bb0e
BJ
135 /*
136 * Give the socket an address.
137 */
138 case PRU_BIND:
139 error = in_pcbbind(inp, nam);
140 if (error)
141 break;
142 break;
143
144 /*
145 * Prepare to accept connections.
146 */
147 case PRU_LISTEN:
148 if (inp->inp_lport == 0)
149 error = in_pcbbind(inp, (struct mbuf *)0);
150 if (error == 0)
151 tp->t_state = TCPS_LISTEN;
152 break;
153
290e0b0a
BJ
154 /*
155 * Initiate connection to peer.
156 * Create a template for use in transmissions on this connection.
157 * Enter SYN_SENT state, and mark socket as connecting.
158 * Start keep-alive timer, and seed output sequence space.
159 * Send initial segment on connection.
160 */
eee3ab16 161 case PRU_CONNECT:
8075bb0e
BJ
162 if (inp->inp_lport == 0) {
163 error = in_pcbbind(inp, (struct mbuf *)0);
164 if (error)
165 break;
166 }
167 error = in_pcbconnect(inp, nam);
4ad99bae 168 if (error)
53a5409e 169 break;
b454c3ea 170 tp->t_template = tcp_template(tp);
290e0b0a
BJ
171 if (tp->t_template == 0) {
172 in_pcbdisconnect(inp);
173 error = ENOBUFS;
174 break;
175 }
69d96ae2
AC
176 /* Compute window scaling to request. */
177 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
178 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
179 tp->request_r_scale++;
53a5409e 180 soisconnecting(so);
3b52afc5 181 tcpstat.tcps_connattempt++;
a6503abf 182 tp->t_state = TCPS_SYN_SENT;
8a36cf82 183 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
9916278a 184 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/4;
4aed14e3 185 tcp_sendseqinit(tp);
8a2f82db 186 error = tcp_output(tp);
72f24d7d 187 break;
4eb5d593 188
4945768c
SL
189 /*
190 * Create a TCP connection between two sockets.
191 */
192 case PRU_CONNECT2:
193 error = EOPNOTSUPP;
194 break;
195
290e0b0a
BJ
196 /*
197 * Initiate disconnect from peer.
198 * If connection never passed embryonic stage, just drop;
199 * else if don't need to let data drain, then can just drop anyways,
200 * else have to begin TCP shutdown process: mark socket disconnecting,
201 * drain unread data, state switch to reflect user close, and
202 * send segment (e.g. FIN) to peer. Socket will be really disconnected
203 * when peer sends FIN and acks ours.
204 *
205 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
206 */
207 case PRU_DISCONNECT:
0e3936fa 208 tp = tcp_disconnect(tp);
4aed14e3
BJ
209 break;
210
290e0b0a
BJ
211 /*
212 * Accept a connection. Essentially all the work is
213 * done at higher levels; just return the address
214 * of the peer, storing through addr.
215 */
65df0627
CT
216 case PRU_ACCEPT:
217 in_setpeeraddr(inp, nam);
eee3ab16
BJ
218 break;
219
290e0b0a
BJ
220 /*
221 * Mark the connection as being incapable of further output.
222 */
eee3ab16 223 case PRU_SHUTDOWN:
0974b45c 224 socantsendmore(so);
0e3936fa
SL
225 tp = tcp_usrclosed(tp);
226 if (tp)
227 error = tcp_output(tp);
72f24d7d
BJ
228 break;
229
290e0b0a
BJ
230 /*
231 * After a receive, possibly send window update to peer.
232 */
eee3ab16 233 case PRU_RCVD:
f1b2fa5b 234 (void) tcp_output(tp);
72f24d7d
BJ
235 break;
236
290e0b0a
BJ
237 /*
238 * Do a send by putting data in output queue and updating urgent
239 * marker if URG set. Possibly send more data.
240 */
eee3ab16 241 case PRU_SEND:
a6503abf 242 sbappend(&so->so_snd, m);
8a2f82db 243 error = tcp_output(tp);
72f24d7d
BJ
244 break;
245
290e0b0a
BJ
246 /*
247 * Abort the TCP.
248 */
eee3ab16 249 case PRU_ABORT:
0e3936fa 250 tp = tcp_drop(tp, ECONNABORTED);
72f24d7d
BJ
251 break;
252
f1b2fa5b 253 case PRU_SENSE:
74040e68 254 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
ded7a1df 255 (void) splx(s);
74040e68 256 return (0);
f1b2fa5b
BJ
257
258 case PRU_RCVOOB:
01234a7d
MK
259 if ((so->so_oobmark == 0 &&
260 (so->so_state & SS_RCVATMARK) == 0) ||
9d866d2f 261#ifdef SO_OOBINLINE
f6a4d6a4 262 so->so_options & SO_OOBINLINE ||
9d866d2f 263#endif
01234a7d 264 tp->t_oobflags & TCPOOB_HADDATA) {
0244dbc7
BJ
265 error = EINVAL;
266 break;
267 }
b2db9217 268 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
8b5a83bb 269 error = EWOULDBLOCK;
b2db9217 270 break;
8b5a83bb 271 }
283ea225 272 m->m_len = 1;
b2db9217 273 *mtod(m, caddr_t) = tp->t_iobc;
01234a7d
MK
274 if (((int)nam & MSG_PEEK) == 0)
275 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
f1b2fa5b
BJ
276 break;
277
278 case PRU_SENDOOB:
8b5a83bb 279 if (sbspace(&so->so_snd) < -512) {
37279c1b 280 m_freem(m);
8b5a83bb
BJ
281 error = ENOBUFS;
282 break;
283 }
f6a4d6a4
MK
284 /*
285 * According to RFC961 (Assigned Protocols),
286 * the urgent pointer points to the last octet
287 * of urgent data. We continue, however,
288 * to consider it to indicate the first octet
289 * of data past the urgent section.
290 * Otherwise, snd_up should be one lower.
291 */
0244dbc7 292 sbappend(&so->so_snd, m);
f6a4d6a4 293 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
b2db9217 294 tp->t_force = 1;
8a2f82db 295 error = tcp_output(tp);
b2db9217 296 tp->t_force = 0;
f1b2fa5b
BJ
297 break;
298
126472ab 299 case PRU_SOCKADDR:
8075bb0e 300 in_setsockaddr(inp, nam);
126472ab
SL
301 break;
302
a7343092
SL
303 case PRU_PEERADDR:
304 in_setpeeraddr(inp, nam);
305 break;
306
290e0b0a
BJ
307 /*
308 * TCP slow timer went off; going through this
309 * routine for tracing's sake.
310 */
eee3ab16 311 case PRU_SLOWTIMO:
0e3936fa 312 tp = tcp_timers(tp, (int)nam);
8075bb0e 313 req |= (int)nam << 8; /* for debug's sake */
eee3ab16
BJ
314 break;
315
9c5022e3
BJ
316 default:
317 panic("tcp_usrreq");
72f24d7d 318 }
17b82ed4
BJ
319 if (tp && (so->so_options & SO_DEBUG))
320 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
72f24d7d 321 splx(s);
53a5409e 322 return (error);
4eb5d593 323}
4aed14e3 324
c46785cb 325int
9d866d2f 326#if BSD>=43
54c84456 327tcp_ctloutput(op, so, level, optname, mp)
01234a7d
MK
328 int op;
329 struct socket *so;
330 int level, optname;
54c84456 331 struct mbuf **mp;
01234a7d 332{
69d96ae2
AC
333 int error = 0, s;
334 struct inpcb *inp;
335 register struct tcpcb *tp;
54c84456 336 register struct mbuf *m;
cb02140a 337 register int i;
54c84456 338
69d96ae2
AC
339 s = splnet();
340 inp = sotoinpcb(so);
341 if (inp == NULL) {
342 splx(s);
daefc710
KB
343 if (op == PRCO_SETOPT && *mp)
344 (void) m_free(*mp);
69d96ae2
AC
345 return (ECONNRESET);
346 }
347 if (level != IPPROTO_TCP) {
348 error = ip_ctloutput(op, so, level, optname, mp);
349 splx(s);
350 return (error);
351 }
352 tp = intotcpcb(inp);
54c84456
MK
353
354 switch (op) {
355
356 case PRCO_SETOPT:
357 m = *mp;
358 switch (optname) {
359
360 case TCP_NODELAY:
361 if (m == NULL || m->m_len < sizeof (int))
362 error = EINVAL;
363 else if (*mtod(m, int *))
364 tp->t_flags |= TF_NODELAY;
365 else
366 tp->t_flags &= ~TF_NODELAY;
367 break;
368
cb02140a
KM
369 case TCP_MAXSEG:
370 if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
371 tp->t_maxseg = i;
372 else
373 error = EINVAL;
374 break;
375
54c84456 376 default:
daefc710 377 error = ENOPROTOOPT;
54c84456
MK
378 break;
379 }
53af7510
SL
380 if (m)
381 (void) m_free(m);
54c84456
MK
382 break;
383
384 case PRCO_GETOPT:
385 *mp = m = m_get(M_WAIT, MT_SOOPTS);
386 m->m_len = sizeof(int);
387
388 switch (optname) {
389 case TCP_NODELAY:
390 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
391 break;
392 case TCP_MAXSEG:
393 *mtod(m, int *) = tp->t_maxseg;
394 break;
395 default:
daefc710 396 error = ENOPROTOOPT;
54c84456
MK
397 break;
398 }
399 break;
400 }
69d96ae2 401 splx(s);
54c84456 402 return (error);
01234a7d 403}
9d866d2f 404#endif
01234a7d 405
cb02140a
KM
406u_long tcp_sendspace = 1024*8;
407u_long tcp_recvspace = 1024*8;
9d91b170 408
290e0b0a
BJ
409/*
410 * Attach TCP protocol to socket, allocating
411 * internet protocol control block, tcp control block,
412 * bufer space, and entering LISTEN state if to accept connections.
413 */
c46785cb 414int
8075bb0e 415tcp_attach(so)
290e0b0a 416 struct socket *so;
290e0b0a
BJ
417{
418 register struct tcpcb *tp;
419 struct inpcb *inp;
420 int error;
421
4f5156ea
MK
422 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
423 error = soreserve(so, tcp_sendspace, tcp_recvspace);
424 if (error)
425 return (error);
426 }
ebf42a75 427 error = in_pcballoc(so, &tcb);
290e0b0a 428 if (error)
054054fd 429 return (error);
8075bb0e 430 inp = sotoinpcb(so);
290e0b0a 431 tp = tcp_newtcpcb(inp);
ebf42a75 432 if (tp == 0) {
054054fd
MK
433 int nofd = so->so_state & SS_NOFDREF; /* XXX */
434
435 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
436 in_pcbdetach(inp);
437 so->so_state |= nofd;
438 return (ENOBUFS);
ebf42a75 439 }
8075bb0e 440 tp->t_state = TCPS_CLOSED;
290e0b0a
BJ
441 return (0);
442}
443
444/*
445 * Initiate (or continue) disconnect.
446 * If embryonic state, just send reset (once).
f9e4ec68 447 * If in ``let data drain'' option and linger null, just drop.
290e0b0a
BJ
448 * Otherwise (hard), mark socket disconnecting and drop
449 * current input data; switch states based on user close, and
450 * send segment to peer (with FIN).
451 */
0e3936fa 452struct tcpcb *
290e0b0a 453tcp_disconnect(tp)
0e3936fa 454 register struct tcpcb *tp;
290e0b0a
BJ
455{
456 struct socket *so = tp->t_inpcb->inp_socket;
457
458 if (tp->t_state < TCPS_ESTABLISHED)
0e3936fa 459 tp = tcp_close(tp);
f9e4ec68 460 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
0e3936fa 461 tp = tcp_drop(tp, 0);
290e0b0a
BJ
462 else {
463 soisdisconnecting(so);
464 sbflush(&so->so_rcv);
0e3936fa
SL
465 tp = tcp_usrclosed(tp);
466 if (tp)
467 (void) tcp_output(tp);
290e0b0a 468 }
0e3936fa 469 return (tp);
290e0b0a
BJ
470}
471
472/*
473 * User issued close, and wish to trail through shutdown states:
474 * if never received SYN, just forget it. If got a SYN from peer,
475 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
476 * If already got a FIN from peer, then almost done; go to LAST_ACK
477 * state. In all other cases, have already sent FIN to peer (e.g.
478 * after PRU_SHUTDOWN), and just have to play tedious game waiting
479 * for peer to send FIN or not respond to keep-alives, etc.
085a0b90 480 * We can let the user exit from the close as soon as the FIN is acked.
290e0b0a 481 */
0e3936fa 482struct tcpcb *
4aed14e3 483tcp_usrclosed(tp)
0e3936fa 484 register struct tcpcb *tp;
4aed14e3
BJ
485{
486
4aed14e3
BJ
487 switch (tp->t_state) {
488
815b24e1 489 case TCPS_CLOSED:
4aed14e3
BJ
490 case TCPS_LISTEN:
491 case TCPS_SYN_SENT:
492 tp->t_state = TCPS_CLOSED;
0e3936fa 493 tp = tcp_close(tp);
4aed14e3
BJ
494 break;
495
496 case TCPS_SYN_RECEIVED:
497 case TCPS_ESTABLISHED:
498 tp->t_state = TCPS_FIN_WAIT_1;
499 break;
500
501 case TCPS_CLOSE_WAIT:
502 tp->t_state = TCPS_LAST_ACK;
503 break;
504 }
0e3936fa 505 if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
085a0b90 506 soisdisconnected(tp->t_inpcb->inp_socket);
0e3936fa 507 return (tp);
4aed14e3 508}