have to protect acct_process from acctwatch closing the vnode
[unix-history] / usr / src / sys / kern / uipc_socket.c
CommitLineData
da7c5cc6 1/*
ec54f0cc
KB
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
da7c5cc6 4 *
dbf0c423 5 * %sccs.include.redist.c%
5b519e94 6 *
1fd74a86 7 * @(#)uipc_socket.c 8.3 (Berkeley) %G%
da7c5cc6 8 */
ce9d8eb4 9
38a01dbe
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/proc.h>
13#include <sys/file.h>
14#include <sys/malloc.h>
15#include <sys/mbuf.h>
16#include <sys/domain.h>
17#include <sys/kernel.h>
18#include <sys/protosw.h>
19#include <sys/socket.h>
20#include <sys/socketvar.h>
21#include <sys/resourcevar.h>
ce9d8eb4 22
ce9d8eb4 23/*
cf012934
BJ
24 * Socket operation routines.
25 * These routines are called by the routines in
26 * sys_socket.c or from a system process, and
27 * implement the semantics of socket operations by
28 * switching out to the protocol specific routines.
ce9d8eb4 29 */
a8d3bf7f 30/*ARGSUSED*/
98422daa 31socreate(dom, aso, type, proto)
39f2b629 32 int dom;
ce9d8eb4 33 struct socket **aso;
88a7a62a
SL
34 register int type;
35 int proto;
ce9d8eb4 36{
dff5c020 37 struct proc *p = curproc; /* XXX */
ce9d8eb4
BJ
38 register struct protosw *prp;
39 register struct socket *so;
88a7a62a 40 register int error;
cc15ab5d 41
cc15ab5d 42 if (proto)
8c0650b0 43 prp = pffindproto(dom, proto, type);
cc15ab5d 44 else
4f083fd7 45 prp = pffindtype(dom, type);
2b6d14b8 46 if (prp == 0 || prp->pr_usrreq == 0)
cc15ab5d 47 return (EPROTONOSUPPORT);
cf012934
BJ
48 if (prp->pr_type != type)
49 return (EPROTOTYPE);
a2aebb63
KS
50 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
51 bzero((caddr_t)so, sizeof(*so));
4f083fd7 52 so->so_type = type;
dff5c020 53 if (p->p_ucred->cr_uid == 0)
62364f0e 54 so->so_state = SS_PRIV;
ce9d8eb4 55 so->so_proto = prp;
88a7a62a
SL
56 error =
57 (*prp->pr_usrreq)(so, PRU_ATTACH,
8c0650b0 58 (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
b91acce4 59 if (error) {
90aaea96 60 so->so_state |= SS_NOFDREF;
de48daf3 61 sofree(so);
cc15ab5d 62 return (error);
ce9d8eb4
BJ
63 }
64 *aso = so;
65 return (0);
66}
67
98422daa 68sobind(so, nam)
cf012934
BJ
69 struct socket *so;
70 struct mbuf *nam;
cf012934
BJ
71{
72 int s = splnet();
73 int error;
74
7c4ec3aa
MK
75 error =
76 (*so->so_proto->pr_usrreq)(so, PRU_BIND,
88a7a62a 77 (struct mbuf *)0, nam, (struct mbuf *)0);
cf012934
BJ
78 splx(s);
79 return (error);
80}
81
82solisten(so, backlog)
88a7a62a 83 register struct socket *so;
cf012934
BJ
84 int backlog;
85{
88a7a62a 86 int s = splnet(), error;
cf012934 87
88a7a62a
SL
88 error =
89 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
90 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
cf012934
BJ
91 if (error) {
92 splx(s);
93 return (error);
94 }
629e51da 95 if (so->so_q == 0)
cf012934 96 so->so_options |= SO_ACCEPTCONN;
cf012934
BJ
97 if (backlog < 0)
98 backlog = 0;
2557c1fc 99 so->so_qlimit = min(backlog, SOMAXCONN);
9e87be97 100 splx(s);
cf012934
BJ
101 return (0);
102}
103
ae921915 104sofree(so)
88a7a62a 105 register struct socket *so;
ae921915
BJ
106{
107
bb73a14e
MK
108 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
109 return;
90aaea96
BJ
110 if (so->so_head) {
111 if (!soqremque(so, 0) && !soqremque(so, 1))
112 panic("sofree dq");
113 so->so_head = 0;
114 }
4ad99bae 115 sbrelease(&so->so_snd);
88a7a62a 116 sorflush(so);
a2aebb63 117 FREE(so, M_SOCKET);
ae921915
BJ
118}
119
92a533e6 120/*
cc15ab5d
BJ
121 * Close a socket on last file table reference removal.
122 * Initiate disconnect if connected.
123 * Free socket when disconnect complete.
92a533e6 124 */
88a7a62a 125soclose(so)
92a533e6 126 register struct socket *so;
92a533e6 127{
cc15ab5d 128 int s = splnet(); /* conservative */
e58562f2 129 int error = 0;
cc15ab5d 130
90aaea96 131 if (so->so_options & SO_ACCEPTCONN) {
629e51da 132 while (so->so_q0)
26225f25 133 (void) soabort(so->so_q0);
629e51da 134 while (so->so_q)
26225f25 135 (void) soabort(so->so_q);
90aaea96 136 }
cc15ab5d
BJ
137 if (so->so_pcb == 0)
138 goto discard;
139 if (so->so_state & SS_ISCONNECTED) {
140 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
dedd6629 141 error = sodisconnect(so);
88a7a62a
SL
142 if (error)
143 goto drop;
cc15ab5d 144 }
98422daa 145 if (so->so_options & SO_LINGER) {
b8acc34d 146 if ((so->so_state & SS_ISDISCONNECTING) &&
88a7a62a
SL
147 (so->so_state & SS_NBIO))
148 goto drop;
b8acc34d 149 while (so->so_state & SS_ISCONNECTED)
83866636
MK
150 if (error = tsleep((caddr_t)&so->so_timeo,
151 PSOCK | PCATCH, netcls, so->so_linger))
152 break;
72857acf 153 }
cc15ab5d 154 }
89900a09 155drop:
37c0974a 156 if (so->so_pcb) {
88a7a62a
SL
157 int error2 =
158 (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
159 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
160 if (error == 0)
161 error = error2;
37c0974a 162 }
cc15ab5d 163discard:
26225f25
SL
164 if (so->so_state & SS_NOFDREF)
165 panic("soclose: NOFDREF");
90aaea96 166 so->so_state |= SS_NOFDREF;
4ad99bae 167 sofree(so);
cc15ab5d 168 splx(s);
88a7a62a 169 return (error);
92a533e6
BJ
170}
171
26225f25
SL
172/*
173 * Must be called at splnet...
174 */
175soabort(so)
176 struct socket *so;
177{
26225f25 178
88a7a62a
SL
179 return (
180 (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
181 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
92a533e6
BJ
182}
183
98422daa 184soaccept(so, nam)
88a7a62a 185 register struct socket *so;
cf012934 186 struct mbuf *nam;
2b4b57cd
BJ
187{
188 int s = splnet();
189 int error;
190
26225f25
SL
191 if ((so->so_state & SS_NOFDREF) == 0)
192 panic("soaccept: !NOFDREF");
98422daa 193 so->so_state &= ~SS_NOFDREF;
cf012934 194 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
88a7a62a 195 (struct mbuf *)0, nam, (struct mbuf *)0);
2b4b57cd
BJ
196 splx(s);
197 return (error);
198}
199
98422daa 200soconnect(so, nam)
88a7a62a 201 register struct socket *so;
cf012934 202 struct mbuf *nam;
ce9d8eb4 203{
7bcf9d13 204 int s;
cc15ab5d 205 int error;
ce9d8eb4 206
7bcf9d13
MK
207 if (so->so_options & SO_ACCEPTCONN)
208 return (EOPNOTSUPP);
209 s = splnet();
de2c74a5
MK
210 /*
211 * If protocol is connection-based, can only connect once.
212 * Otherwise, if connected, try to disconnect first.
213 * This allows user to disconnect by connecting to, e.g.,
214 * a null address.
215 */
216 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
217 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
218 (error = sodisconnect(so))))
cc15ab5d 219 error = EISCONN;
de2c74a5
MK
220 else
221 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
222 (struct mbuf *)0, nam, (struct mbuf *)0);
cc15ab5d
BJ
223 splx(s);
224 return (error);
ce9d8eb4
BJ
225}
226
88a7a62a
SL
227soconnect2(so1, so2)
228 register struct socket *so1;
229 struct socket *so2;
230{
231 int s = splnet();
232 int error;
233
5a48956d
SL
234 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
235 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
88a7a62a
SL
236 splx(s);
237 return (error);
238}
88a7a62a 239
dedd6629 240sodisconnect(so)
88a7a62a 241 register struct socket *so;
ce9d8eb4 242{
cc15ab5d
BJ
243 int s = splnet();
244 int error;
ce9d8eb4 245
cc15ab5d
BJ
246 if ((so->so_state & SS_ISCONNECTED) == 0) {
247 error = ENOTCONN;
248 goto bad;
ce9d8eb4 249 }
cc15ab5d
BJ
250 if (so->so_state & SS_ISDISCONNECTING) {
251 error = EALREADY;
252 goto bad;
ce9d8eb4 253 }
cf012934 254 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
dedd6629 255 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
cc15ab5d
BJ
256bad:
257 splx(s);
258 return (error);
ce9d8eb4
BJ
259}
260
440c48bf 261#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
cc15ab5d
BJ
262/*
263 * Send on a socket.
264 * If send must go all at once and message is larger than
265 * send buffering, then hard error.
266 * Lock against other senders.
267 * If must go all at once and not enough room now, then
268 * inform user that this would block and do nothing.
8250a099 269 * Otherwise, if nonblocking, send as much as possible.
7c4ec3aa
MK
270 * The data to be sent is described by "uio" if nonzero,
271 * otherwise by the mbuf chain "top" (which must be null
272 * if uio is not). Data provided in mbuf chain must be small
273 * enough to send all at once.
274 *
275 * Returns nonzero on error, timeout or signal; callers
276 * must check for short counts if EINTR/ERESTART are returned.
277 * Data and control buffers are freed on return.
cc15ab5d 278 */
4b9db1f5 279sosend(so, addr, uio, top, control, flags)
ce9d8eb4 280 register struct socket *so;
7c4ec3aa
MK
281 struct mbuf *addr;
282 struct uio *uio;
283 struct mbuf *top;
2967f28e 284 struct mbuf *control;
7c4ec3aa 285 int flags;
ce9d8eb4 286{
dff5c020 287 struct proc *p = curproc; /* XXX */
7c4ec3aa 288 struct mbuf **mp;
2557c1fc 289 register struct mbuf *m;
7c4ec3aa
MK
290 register long space, len, resid;
291 int clen = 0, error, s, dontroute, mlen;
292 int atomic = sosendallatonce(so) || top;
ce9d8eb4 293
7c4ec3aa
MK
294 if (uio)
295 resid = uio->uio_resid;
296 else
297 resid = top->m_pkthdr.len;
1fd74a86
KM
298 /*
299 * In theory resid should be unsigned.
300 * However, space must be signed, as it might be less than 0
301 * if we over-committed, and we must use a signed comparison
302 * of space and resid. On the other hand, a negative resid
303 * causes us to loop sending 0-length segments to the protocol.
304 */
305 if (resid < 0)
306 return (EINVAL);
88a7a62a
SL
307 dontroute =
308 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
309 (so->so_proto->pr_flags & PR_ATOMIC);
dff5c020 310 p->p_stats->p_ru.ru_msgsnd++;
2967f28e 311 if (control)
7c4ec3aa 312 clen = control->m_len;
cc15ab5d
BJ
313#define snderr(errno) { error = errno; splx(s); goto release; }
314
8250a099 315restart:
440c48bf 316 if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
7c4ec3aa 317 goto out;
8250a099
MK
318 do {
319 s = splnet();
af9c562f 320 if (so->so_state & SS_CANTSENDMORE)
8250a099 321 snderr(EPIPE);
a2aebb63
KS
322 if (so->so_error)
323 snderr(so->so_error);
8250a099 324 if ((so->so_state & SS_ISCONNECTED) == 0) {
a2aebb63 325 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
f3bf27ac
KS
326 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
327 !(resid == 0 && clen != 0))
a2aebb63 328 snderr(ENOTCONN);
7c4ec3aa 329 } else if (addr == 0)
8250a099
MK
330 snderr(EDESTADDRREQ);
331 }
7c4ec3aa 332 space = sbspace(&so->so_snd);
8250a099 333 if (flags & MSG_OOB)
7c4ec3aa 334 space += 1024;
440c48bf
KM
335 if (atomic && resid > so->so_snd.sb_hiwat ||
336 clen > so->so_snd.sb_hiwat)
337 snderr(EMSGSIZE);
338 if (space < resid + clen && uio &&
7c4ec3aa 339 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
7c4ec3aa
MK
340 if (so->so_state & SS_NBIO)
341 snderr(EWOULDBLOCK);
342 sbunlock(&so->so_snd);
4b9db1f5 343 error = sbwait(&so->so_snd);
7c4ec3aa
MK
344 splx(s);
345 if (error)
346 goto out;
347 goto restart;
8250a099 348 }
4c078bb2 349 splx(s);
8250a099 350 mp = &top;
7c4ec3aa 351 space -= clen;
4b9db1f5
MK
352 do {
353 if (uio == NULL) {
7c4ec3aa
MK
354 /*
355 * Data is prepackaged in "top".
356 */
357 resid = 0;
358 if (flags & MSG_EOR)
359 top->m_flags |= M_EOR;
4b9db1f5 360 } else do {
2557c1fc
MK
361 if (top == 0) {
362 MGETHDR(m, M_WAIT, MT_DATA);
363 mlen = MHLEN;
364 m->m_pkthdr.len = 0;
365 m->m_pkthdr.rcvif = (struct ifnet *)0;
366 } else {
367 MGET(m, M_WAIT, MT_DATA);
368 mlen = MLEN;
369 }
7c4ec3aa 370 if (resid >= MINCLSIZE && space >= MCLBYTES) {
2557c1fc
MK
371 MCLGET(m, M_WAIT);
372 if ((m->m_flags & M_EXT) == 0)
8250a099 373 goto nopages;
2557c1fc
MK
374 mlen = MCLBYTES;
375#ifdef MAPPED_MBUFS
7c4ec3aa 376 len = min(MCLBYTES, resid);
2557c1fc 377#else
9859687a 378 if (atomic && top == 0) {
7c4ec3aa
MK
379 len = min(MCLBYTES - max_hdr, resid);
380 m->m_data += max_hdr;
415a9324
KS
381 } else
382 len = min(MCLBYTES, resid);
2557c1fc
MK
383#endif
384 space -= MCLBYTES;
8250a099
MK
385 } else {
386nopages:
7c4ec3aa 387 len = min(min(mlen, resid), space);
8c0650b0 388 space -= len;
2557c1fc
MK
389 /*
390 * For datagram protocols, leave room
391 * for protocol headers in first mbuf.
392 */
84efcd38 393 if (atomic && top == 0 && len < mlen)
2557c1fc 394 MH_ALIGN(m, len);
8250a099 395 }
179cd11f 396 error = uiomove(mtod(m, caddr_t), (int)len, uio);
7c4ec3aa 397 resid = uio->uio_resid;
8250a099
MK
398 m->m_len = len;
399 *mp = m;
2557c1fc 400 top->m_pkthdr.len += len;
8250a099
MK
401 if (error)
402 goto release;
403 mp = &m->m_next;
7c4ec3aa
MK
404 if (resid <= 0) {
405 if (flags & MSG_EOR)
2557c1fc 406 top->m_flags |= M_EOR;
af9c562f 407 break;
2557c1fc
MK
408 }
409 } while (space > 0 && atomic);
410 if (dontroute)
411 so->so_options |= SO_DONTROUTE;
412 s = splnet(); /* XXX */
413 error = (*so->so_proto->pr_usrreq)(so,
414 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
7c4ec3aa 415 top, addr, control);
2557c1fc
MK
416 splx(s);
417 if (dontroute)
418 so->so_options &= ~SO_DONTROUTE;
7c4ec3aa
MK
419 clen = 0;
420 control = 0;
2557c1fc
MK
421 top = 0;
422 mp = &top;
2557c1fc
MK
423 if (error)
424 goto release;
7c4ec3aa
MK
425 } while (resid && space > 0);
426 } while (resid);
cc15ab5d 427
ce9d8eb4 428release:
cc15ab5d 429 sbunlock(&so->so_snd);
7c4ec3aa 430out:
0f90f987
BJ
431 if (top)
432 m_freem(top);
7c4ec3aa
MK
433 if (control)
434 m_freem(control);
ce9d8eb4
BJ
435 return (error);
436}
437
c34d38f4
MK
438/*
439 * Implement receive operations on a socket.
440 * We depend on the way that records are added to the sockbuf
441 * by sbappend*. In particular, each record (mbufs linked through m_next)
442 * must begin with an address if the protocol so specifies,
7c4ec3aa
MK
443 * followed by an optional mbuf or mbufs containing ancillary data,
444 * and then zero or more mbufs of data.
c34d38f4
MK
445 * In order to avoid blocking network interrupts for the entire time here,
446 * we splx() while doing the actual copy to user space.
447 * Although the sockbuf is locked, new data may still be appended,
448 * and thus we must maintain consistency of the sockbuf during that time.
179cd11f 449 *
7c4ec3aa 450 * The caller may receive the data as a single mbuf chain by supplying
4b9db1f5 451 * an mbuf **mp0 for use in returning the chain. The uio is then used
7c4ec3aa 452 * only for the count in uio_resid.
c34d38f4 453 */
4b9db1f5 454soreceive(so, paddr, uio, mp0, controlp, flagsp)
ce9d8eb4 455 register struct socket *so;
7c4ec3aa
MK
456 struct mbuf **paddr;
457 struct uio *uio;
4b9db1f5 458 struct mbuf **mp0;
7c4ec3aa 459 struct mbuf **controlp;
2557c1fc 460 int *flagsp;
ce9d8eb4 461{
4b9db1f5
MK
462 register struct mbuf *m, **mp;
463 register int flags, len, error, s, offset;
88a7a62a 464 struct protosw *pr = so->so_proto;
7c4ec3aa
MK
465 struct mbuf *nextrecord;
466 int moff, type;
34827683 467 int orig_resid = uio->uio_resid;
88a7a62a 468
4b9db1f5 469 mp = mp0;
7c4ec3aa
MK
470 if (paddr)
471 *paddr = 0;
2557c1fc
MK
472 if (controlp)
473 *controlp = 0;
474 if (flagsp)
475 flags = *flagsp &~ MSG_EOR;
179cd11f 476 else
2557c1fc 477 flags = 0;
88a7a62a 478 if (flags & MSG_OOB) {
cce93e4b 479 m = m_get(M_WAIT, MT_DATA);
88a7a62a 480 error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
de2c74a5 481 m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
a8d3bf7f 482 if (error)
5fe6f9d1 483 goto bad;
970108c7 484 do {
7c4ec3aa
MK
485 error = uiomove(mtod(m, caddr_t),
486 (int) min(uio->uio_resid, m->m_len), uio);
970108c7 487 m = m_free(m);
a8d3bf7f 488 } while (uio->uio_resid && error == 0 && m);
5fe6f9d1 489bad:
970108c7 490 if (m)
39d536e6 491 m_freem(m);
a8d3bf7f 492 return (error);
970108c7 493 }
7c4ec3aa
MK
494 if (mp)
495 *mp = (struct mbuf *)0;
4b9db1f5 496 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
2557c1fc
MK
497 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
498 (struct mbuf *)0, (struct mbuf *)0);
ce9d8eb4 499
cc15ab5d 500restart:
440c48bf 501 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
83866636 502 return (error);
cc15ab5d
BJ
503 s = splnet();
504
a2aebb63 505 m = so->so_rcv.sb_mb;
ba4350f5
MK
506 /*
507 * If we have less data than requested, block awaiting more
508 * (subject to any timeout) if:
509 * 1. the current count is less than the low water mark, or
510 * 2. MSG_WAITALL is set, and it is possible to do the entire
511 * receive operation at once if we block (resid <= hiwat).
440c48bf 512 * 3. MSG_DONTWAIT is not set
ba4350f5
MK
513 * If MSG_WAITALL is set but resid is larger than the receive buffer,
514 * we have to do the receive in sections, and thus risk returning
515 * a short count if a timeout or signal occurs after we start.
516 */
440c48bf
KM
517 if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
518 so->so_rcv.sb_cc < uio->uio_resid) &&
ba4350f5 519 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
c05ef6cd
KS
520 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)))
521 if (m && (m->m_nextpkt || (m->m_flags & M_EOR) ||
522 m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL))
523 break;
7c4ec3aa
MK
524#ifdef DIAGNOSTIC
525 if (m == 0 && so->so_rcv.sb_cc)
a2aebb63 526 panic("receive 1");
7c4ec3aa 527#endif
4c078bb2 528 if (so->so_error) {
95c435b0 529 if (m)
d4c3a4dd 530 goto dontblock;
4c078bb2 531 error = so->so_error;
95c435b0
MK
532 if ((flags & MSG_PEEK) == 0)
533 so->so_error = 0;
4c078bb2
BJ
534 goto release;
535 }
95c435b0
MK
536 if (so->so_state & SS_CANTRCVMORE) {
537 if (m)
d4c3a4dd 538 goto dontblock;
95c435b0
MK
539 else
540 goto release;
541 }
629e51da 542 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
f02d4eaa
KB
543 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
544 error = ENOTCONN;
545 goto release;
546 }
4b9db1f5 547 if (uio->uio_resid == 0)
c34d38f4 548 goto release;
440c48bf 549 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
f02d4eaa
KB
550 error = EWOULDBLOCK;
551 goto release;
552 }
cc15ab5d 553 sbunlock(&so->so_rcv);
4b9db1f5 554 error = sbwait(&so->so_rcv);
a4f6d93d 555 splx(s);
7c4ec3aa
MK
556 if (error)
557 return (error);
cc15ab5d 558 goto restart;
ce9d8eb4 559 }
0904b863
KM
560 if (uio->uio_procp)
561 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
2557c1fc 562 nextrecord = m->m_nextpkt;
c05ef6cd 563 record_eor = m->m_flags & M_EOR;
88a7a62a 564 if (pr->pr_flags & PR_ADDR) {
7c4ec3aa 565#ifdef DIAGNOSTIC
c34d38f4 566 if (m->m_type != MT_SONAME)
261a8548 567 panic("receive 1a");
7c4ec3aa 568#endif
34827683 569 orig_resid = 0;
261a8548 570 if (flags & MSG_PEEK) {
7c4ec3aa
MK
571 if (paddr)
572 *paddr = m_copy(m, 0, m->m_len);
c34d38f4 573 m = m->m_next;
261a8548 574 } else {
c34d38f4 575 sbfree(&so->so_rcv, m);
7c4ec3aa
MK
576 if (paddr) {
577 *paddr = m;
6ff43975 578 so->so_rcv.sb_mb = m->m_next;
c34d38f4 579 m->m_next = 0;
6ff43975 580 m = so->so_rcv.sb_mb;
c34d38f4 581 } else {
6ff43975
MK
582 MFREE(m, so->so_rcv.sb_mb);
583 m = so->so_rcv.sb_mb;
c34d38f4 584 }
88a7a62a 585 }
cc15ab5d 586 }
7c4ec3aa 587 while (m && m->m_type == MT_CONTROL && error == 0) {
2557c1fc
MK
588 if (flags & MSG_PEEK) {
589 if (controlp)
590 *controlp = m_copy(m, 0, m->m_len);
591 m = m->m_next;
592 } else {
593 sbfree(&so->so_rcv, m);
594 if (controlp) {
e8f8de91
KS
595 if (pr->pr_domain->dom_externalize &&
596 mtod(m, struct cmsghdr *)->cmsg_type ==
597 SCM_RIGHTS)
7c4ec3aa 598 error = (*pr->pr_domain->dom_externalize)(m);
2557c1fc
MK
599 *controlp = m;
600 so->so_rcv.sb_mb = m->m_next;
601 m->m_next = 0;
602 m = so->so_rcv.sb_mb;
603 } else {
604 MFREE(m, so->so_rcv.sb_mb);
605 m = so->so_rcv.sb_mb;
606 }
607 }
34827683
KM
608 if (controlp) {
609 orig_resid = 0;
7c4ec3aa 610 controlp = &(*controlp)->m_next;
34827683 611 }
2557c1fc 612 }
7c4ec3aa 613 if (m) {
ba4350f5
MK
614 if ((flags & MSG_PEEK) == 0)
615 m->m_nextpkt = nextrecord;
7c4ec3aa 616 type = m->m_type;
415a9324
KS
617 if (type == MT_OOBDATA)
618 flags |= MSG_OOB;
7c4ec3aa 619 }
970108c7 620 moff = 0;
dd1ca18d 621 offset = 0;
415a9324
KS
622 while (m && uio->uio_resid > 0 && error == 0) {
623 if (m->m_type == MT_OOBDATA) {
624 if (type != MT_OOBDATA)
625 break;
626 } else if (type == MT_OOBDATA)
627 break;
7c4ec3aa 628#ifdef DIAGNOSTIC
2557c1fc 629 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
c34d38f4 630 panic("receive 3");
7c4ec3aa 631#endif
32a43ee2 632 so->so_state &= ~SS_RCVATMARK;
4b9db1f5 633 len = uio->uio_resid;
dd1ca18d
MK
634 if (so->so_oobmark && len > so->so_oobmark - offset)
635 len = so->so_oobmark - offset;
8c0650b0 636 if (len > m->m_len - moff)
970108c7 637 len = m->m_len - moff;
7c4ec3aa
MK
638 /*
639 * If mp is set, just pass back the mbufs.
640 * Otherwise copy them out via the uio, then free.
641 * Sockbuf must be consistent here (points to current mbuf,
642 * it points to next record) when we drop priority;
643 * we must note any additions to the sockbuf when we
644 * block interrupts again.
645 */
646 if (mp == 0) {
647 splx(s);
648 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
7c4ec3aa 649 s = splnet();
4b9db1f5
MK
650 } else
651 uio->uio_resid -= len;
8c0650b0 652 if (len == m->m_len - moff) {
c34d38f4
MK
653 if (flags & MSG_PEEK) {
654 m = m->m_next;
655 moff = 0;
656 } else {
2557c1fc 657 nextrecord = m->m_nextpkt;
6ff43975 658 sbfree(&so->so_rcv, m);
7c4ec3aa
MK
659 if (mp) {
660 *mp = m;
661 mp = &m->m_next;
4b9db1f5
MK
662 so->so_rcv.sb_mb = m = m->m_next;
663 *mp = (struct mbuf *)0;
7c4ec3aa
MK
664 } else {
665 MFREE(m, so->so_rcv.sb_mb);
666 m = so->so_rcv.sb_mb;
667 }
6ff43975 668 if (m)
2557c1fc 669 m->m_nextpkt = nextrecord;
c34d38f4 670 }
ce9d8eb4 671 } else {
88a7a62a 672 if (flags & MSG_PEEK)
970108c7
BJ
673 moff += len;
674 else {
4b9db1f5
MK
675 if (mp)
676 *mp = m_copym(m, 0, len, M_WAIT);
2557c1fc 677 m->m_data += len;
970108c7
BJ
678 m->m_len -= len;
679 so->so_rcv.sb_cc -= len;
680 }
ce9d8eb4 681 }
dd1ca18d
MK
682 if (so->so_oobmark) {
683 if ((flags & MSG_PEEK) == 0) {
684 so->so_oobmark -= len;
685 if (so->so_oobmark == 0) {
686 so->so_state |= SS_RCVATMARK;
687 break;
688 }
a660cd8d 689 } else {
dd1ca18d 690 offset += len;
a660cd8d
CT
691 if (offset == so->so_oobmark)
692 break;
693 }
970108c7 694 }
c05ef6cd
KS
695 if (m == 0 && record_eor) {
696 flags |= record_eor;
2967f28e 697 break;
c05ef6cd 698 }
7c4ec3aa
MK
699 /*
700 * If the MSG_WAITALL flag is set (for non-atomic socket),
4b9db1f5 701 * we must not quit until "uio->uio_resid == 0" or an error
7c4ec3aa 702 * termination. If a signal/timeout occurs, return
4b9db1f5 703 * with a short count but without error.
7c4ec3aa
MK
704 * Keep sockbuf locked against other readers.
705 */
4b9db1f5 706 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
c05ef6cd 707 !(flags & MSG_OOB) && !sosendallatonce(so)) {
95c435b0
MK
708 if (so->so_error || so->so_state & SS_CANTRCVMORE)
709 break;
7c4ec3aa
MK
710 error = sbwait(&so->so_rcv);
711 if (error) {
712 sbunlock(&so->so_rcv);
713 splx(s);
7c4ec3aa
MK
714 return (0);
715 }
c05ef6cd 716 if (m = so->so_rcv.sb_mb) {
7c4ec3aa 717 nextrecord = m->m_nextpkt;
c05ef6cd
KS
718 record_eor |= m->m_flags & M_EOR;
719 }
7c4ec3aa 720 }
261a8548 721 }
34827683
KM
722
723 if (m && pr->pr_flags & PR_ATOMIC) {
724 flags |= MSG_TRUNC;
725 if ((flags & MSG_PEEK) == 0)
726 (void) sbdroprecord(&so->so_rcv);
727 }
261a8548 728 if ((flags & MSG_PEEK) == 0) {
491e9020 729 if (m == 0)
261a8548 730 so->so_rcv.sb_mb = nextrecord;
261a8548
MK
731 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
732 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
a2aebb63
KS
733 (struct mbuf *)flags, (struct mbuf *)0,
734 (struct mbuf *)0);
261a8548 735 }
34827683
KM
736 if (orig_resid == uio->uio_resid && orig_resid &&
737 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
738 sbunlock(&so->so_rcv);
739 splx(s);
740 goto restart;
741 }
742
2557c1fc
MK
743 if (flagsp)
744 *flagsp |= flags;
cc15ab5d 745release:
ae921915 746 sbunlock(&so->so_rcv);
cc15ab5d 747 splx(s);
ae921915 748 return (error);
92a533e6
BJ
749}
750
98422daa 751soshutdown(so, how)
88a7a62a
SL
752 register struct socket *so;
753 register int how;
98422daa 754{
88a7a62a 755 register struct protosw *pr = so->so_proto;
98422daa
SL
756
757 how++;
88a7a62a
SL
758 if (how & FREAD)
759 sorflush(so);
98422daa 760 if (how & FWRITE)
88a7a62a
SL
761 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
762 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
98422daa
SL
763 return (0);
764}
765
88a7a62a
SL
766sorflush(so)
767 register struct socket *so;
768{
769 register struct sockbuf *sb = &so->so_rcv;
770 register struct protosw *pr = so->so_proto;
771 register int s;
772 struct sockbuf asb;
773
83866636 774 sb->sb_flags |= SB_NOINTR;
440c48bf 775 (void) sblock(sb, M_WAITOK);
88a7a62a
SL
776 s = splimp();
777 socantrcvmore(so);
778 sbunlock(sb);
779 asb = *sb;
780 bzero((caddr_t)sb, sizeof (*sb));
781 splx(s);
261a8548
MK
782 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
783 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
88a7a62a
SL
784 sbrelease(&asb);
785}
786
bc2f5859 787sosetopt(so, level, optname, m0)
88a7a62a 788 register struct socket *so;
98422daa 789 int level, optname;
bc2f5859 790 struct mbuf *m0;
98422daa 791{
61ec2127 792 int error = 0;
bc2f5859 793 register struct mbuf *m = m0;
98422daa 794
61ec2127 795 if (level != SOL_SOCKET) {
cbe54390
MK
796 if (so->so_proto && so->so_proto->pr_ctloutput)
797 return ((*so->so_proto->pr_ctloutput)
bc2f5859 798 (PRCO_SETOPT, so, level, optname, &m0));
cbe54390
MK
799 error = ENOPROTOOPT;
800 } else {
801 switch (optname) {
98422daa 802
cbe54390
MK
803 case SO_LINGER:
804 if (m == NULL || m->m_len != sizeof (struct linger)) {
805 error = EINVAL;
806 goto bad;
807 }
808 so->so_linger = mtod(m, struct linger *)->l_linger;
809 /* fall thru... */
810
811 case SO_DEBUG:
812 case SO_KEEPALIVE:
813 case SO_DONTROUTE:
814 case SO_USELOOPBACK:
815 case SO_BROADCAST:
816 case SO_REUSEADDR:
f1242a73 817 case SO_REUSEPORT:
97c8f6a8 818 case SO_OOBINLINE:
cbe54390
MK
819 if (m == NULL || m->m_len < sizeof (int)) {
820 error = EINVAL;
821 goto bad;
822 }
823 if (*mtod(m, int *))
824 so->so_options |= optname;
825 else
826 so->so_options &= ~optname;
827 break;
98422daa 828
cbe54390 829 case SO_SNDBUF:
83866636 830 case SO_RCVBUF:
7c4ec3aa 831 case SO_SNDLOWAT:
83866636 832 case SO_RCVLOWAT:
cbe54390
MK
833 if (m == NULL || m->m_len < sizeof (int)) {
834 error = EINVAL;
835 goto bad;
836 }
837 switch (optname) {
838
839 case SO_SNDBUF:
840 case SO_RCVBUF:
7c4ec3aa
MK
841 if (sbreserve(optname == SO_SNDBUF ?
842 &so->so_snd : &so->so_rcv,
843 (u_long) *mtod(m, int *)) == 0) {
cbe54390
MK
844 error = ENOBUFS;
845 goto bad;
846 }
847 break;
848
849 case SO_SNDLOWAT:
7c4ec3aa
MK
850 so->so_snd.sb_lowat = *mtod(m, int *);
851 break;
cbe54390 852 case SO_RCVLOWAT:
7c4ec3aa 853 so->so_rcv.sb_lowat = *mtod(m, int *);
cbe54390 854 break;
fc2cae0b
MK
855 }
856 break;
857
858 case SO_SNDTIMEO:
859 case SO_RCVTIMEO:
860 {
861 struct timeval *tv;
862 short val;
863
864 if (m == NULL || m->m_len < sizeof (*tv)) {
865 error = EINVAL;
866 goto bad;
867 }
868 tv = mtod(m, struct timeval *);
869 if (tv->tv_sec > SHRT_MAX / hz - hz) {
870 error = EDOM;
871 goto bad;
872 }
873 val = tv->tv_sec * hz + tv->tv_usec / tick;
874
875 switch (optname) {
876
cbe54390 877 case SO_SNDTIMEO:
fc2cae0b 878 so->so_snd.sb_timeo = val;
7c4ec3aa 879 break;
cbe54390 880 case SO_RCVTIMEO:
fc2cae0b 881 so->so_rcv.sb_timeo = val;
cbe54390
MK
882 break;
883 }
884 break;
fc2cae0b 885 }
cbe54390
MK
886
887 default:
888 error = ENOPROTOOPT;
889 break;
890 }
f8bddee1 891 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
bfedcc73
KS
892 (void) ((*so->so_proto->pr_ctloutput)
893 (PRCO_SETOPT, so, level, optname, &m0));
f8bddee1
KB
894 m = NULL; /* freed by protocol */
895 }
98422daa 896 }
61ec2127
SL
897bad:
898 if (m)
899 (void) m_free(m);
900 return (error);
98422daa
SL
901}
902
61ec2127 903sogetopt(so, level, optname, mp)
88a7a62a 904 register struct socket *so;
98422daa 905 int level, optname;
61ec2127 906 struct mbuf **mp;
98422daa 907{
61ec2127 908 register struct mbuf *m;
98422daa 909
cbe54390
MK
910 if (level != SOL_SOCKET) {
911 if (so->so_proto && so->so_proto->pr_ctloutput) {
912 return ((*so->so_proto->pr_ctloutput)
913 (PRCO_GETOPT, so, level, optname, mp));
179cd11f 914 } else
cbe54390
MK
915 return (ENOPROTOOPT);
916 } else {
61ec2127 917 m = m_get(M_WAIT, MT_SOOPTS);
d6e6eea8
MK
918 m->m_len = sizeof (int);
919
cbe54390
MK
920 switch (optname) {
921
922 case SO_LINGER:
923 m->m_len = sizeof (struct linger);
924 mtod(m, struct linger *)->l_onoff =
925 so->so_options & SO_LINGER;
926 mtod(m, struct linger *)->l_linger = so->so_linger;
927 break;
928
929 case SO_USELOOPBACK:
930 case SO_DONTROUTE:
931 case SO_DEBUG:
932 case SO_KEEPALIVE:
933 case SO_REUSEADDR:
f1242a73 934 case SO_REUSEPORT:
cbe54390 935 case SO_BROADCAST:
97c8f6a8 936 case SO_OOBINLINE:
cbe54390
MK
937 *mtod(m, int *) = so->so_options & optname;
938 break;
939
d6e6eea8
MK
940 case SO_TYPE:
941 *mtod(m, int *) = so->so_type;
942 break;
943
de2c74a5
MK
944 case SO_ERROR:
945 *mtod(m, int *) = so->so_error;
946 so->so_error = 0;
947 break;
948
cbe54390
MK
949 case SO_SNDBUF:
950 *mtod(m, int *) = so->so_snd.sb_hiwat;
951 break;
98422daa 952
cbe54390
MK
953 case SO_RCVBUF:
954 *mtod(m, int *) = so->so_rcv.sb_hiwat;
955 break;
956
957 case SO_SNDLOWAT:
958 *mtod(m, int *) = so->so_snd.sb_lowat;
959 break;
960
961 case SO_RCVLOWAT:
962 *mtod(m, int *) = so->so_rcv.sb_lowat;
963 break;
964
965 case SO_SNDTIMEO:
cbe54390 966 case SO_RCVTIMEO:
fc2cae0b
MK
967 {
968 int val = (optname == SO_SNDTIMEO ?
969 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
970
971 m->m_len = sizeof(struct timeval);
972 mtod(m, struct timeval *)->tv_sec = val / hz;
973 mtod(m, struct timeval *)->tv_usec =
974 (val % hz) / tick;
cbe54390 975 break;
fc2cae0b 976 }
cbe54390
MK
977
978 default:
8011f5df 979 (void)m_free(m);
cbe54390
MK
980 return (ENOPROTOOPT);
981 }
982 *mp = m;
983 return (0);
98422daa 984 }
98422daa
SL
985}
986
edebca28 987sohasoutofband(so)
88a7a62a 988 register struct socket *so;
edebca28 989{
3d190e86 990 struct proc *p;
edebca28 991
a2aebb63
KS
992 if (so->so_pgid < 0)
993 gsignal(-so->so_pgid, SIGURG);
994 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
3d190e86 995 psignal(p, SIGURG);
b4f32e56 996 selwakeup(&so->so_rcv.sb_sel);
edebca28 997}