4.4BSD snapshot (revision 8.1); add 1993 to copyright
[unix-history] / usr / src / sys / kern / uipc_socket.c
CommitLineData
da7c5cc6 1/*
ec54f0cc
KB
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
da7c5cc6 4 *
dbf0c423 5 * %sccs.include.redist.c%
5b519e94 6 *
ec54f0cc 7 * @(#)uipc_socket.c 8.1 (Berkeley) %G%
da7c5cc6 8 */
ce9d8eb4 9
38a01dbe
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/proc.h>
13#include <sys/file.h>
14#include <sys/malloc.h>
15#include <sys/mbuf.h>
16#include <sys/domain.h>
17#include <sys/kernel.h>
18#include <sys/protosw.h>
19#include <sys/socket.h>
20#include <sys/socketvar.h>
21#include <sys/resourcevar.h>
ce9d8eb4 22
ce9d8eb4 23/*
cf012934
BJ
24 * Socket operation routines.
25 * These routines are called by the routines in
26 * sys_socket.c or from a system process, and
27 * implement the semantics of socket operations by
28 * switching out to the protocol specific routines.
ce9d8eb4 29 */
a8d3bf7f 30/*ARGSUSED*/
98422daa 31socreate(dom, aso, type, proto)
39f2b629 32 int dom;
ce9d8eb4 33 struct socket **aso;
88a7a62a
SL
34 register int type;
35 int proto;
ce9d8eb4 36{
dff5c020 37 struct proc *p = curproc; /* XXX */
ce9d8eb4
BJ
38 register struct protosw *prp;
39 register struct socket *so;
88a7a62a 40 register int error;
cc15ab5d 41
cc15ab5d 42 if (proto)
8c0650b0 43 prp = pffindproto(dom, proto, type);
cc15ab5d 44 else
4f083fd7 45 prp = pffindtype(dom, type);
2b6d14b8 46 if (prp == 0 || prp->pr_usrreq == 0)
cc15ab5d 47 return (EPROTONOSUPPORT);
cf012934
BJ
48 if (prp->pr_type != type)
49 return (EPROTOTYPE);
a2aebb63
KS
50 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
51 bzero((caddr_t)so, sizeof(*so));
4f083fd7 52 so->so_type = type;
dff5c020 53 if (p->p_ucred->cr_uid == 0)
62364f0e 54 so->so_state = SS_PRIV;
ce9d8eb4 55 so->so_proto = prp;
88a7a62a
SL
56 error =
57 (*prp->pr_usrreq)(so, PRU_ATTACH,
8c0650b0 58 (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
b91acce4 59 if (error) {
90aaea96 60 so->so_state |= SS_NOFDREF;
de48daf3 61 sofree(so);
cc15ab5d 62 return (error);
ce9d8eb4
BJ
63 }
64 *aso = so;
65 return (0);
66}
67
98422daa 68sobind(so, nam)
cf012934
BJ
69 struct socket *so;
70 struct mbuf *nam;
cf012934
BJ
71{
72 int s = splnet();
73 int error;
74
7c4ec3aa
MK
75 error =
76 (*so->so_proto->pr_usrreq)(so, PRU_BIND,
88a7a62a 77 (struct mbuf *)0, nam, (struct mbuf *)0);
cf012934
BJ
78 splx(s);
79 return (error);
80}
81
82solisten(so, backlog)
88a7a62a 83 register struct socket *so;
cf012934
BJ
84 int backlog;
85{
88a7a62a 86 int s = splnet(), error;
cf012934 87
88a7a62a
SL
88 error =
89 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
90 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
cf012934
BJ
91 if (error) {
92 splx(s);
93 return (error);
94 }
629e51da 95 if (so->so_q == 0)
cf012934 96 so->so_options |= SO_ACCEPTCONN;
cf012934
BJ
97 if (backlog < 0)
98 backlog = 0;
2557c1fc 99 so->so_qlimit = min(backlog, SOMAXCONN);
9e87be97 100 splx(s);
cf012934
BJ
101 return (0);
102}
103
ae921915 104sofree(so)
88a7a62a 105 register struct socket *so;
ae921915
BJ
106{
107
bb73a14e
MK
108 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
109 return;
90aaea96
BJ
110 if (so->so_head) {
111 if (!soqremque(so, 0) && !soqremque(so, 1))
112 panic("sofree dq");
113 so->so_head = 0;
114 }
4ad99bae 115 sbrelease(&so->so_snd);
88a7a62a 116 sorflush(so);
a2aebb63 117 FREE(so, M_SOCKET);
ae921915
BJ
118}
119
92a533e6 120/*
cc15ab5d
BJ
121 * Close a socket on last file table reference removal.
122 * Initiate disconnect if connected.
123 * Free socket when disconnect complete.
92a533e6 124 */
88a7a62a 125soclose(so)
92a533e6 126 register struct socket *so;
92a533e6 127{
cc15ab5d 128 int s = splnet(); /* conservative */
e58562f2 129 int error = 0;
cc15ab5d 130
90aaea96 131 if (so->so_options & SO_ACCEPTCONN) {
629e51da 132 while (so->so_q0)
26225f25 133 (void) soabort(so->so_q0);
629e51da 134 while (so->so_q)
26225f25 135 (void) soabort(so->so_q);
90aaea96 136 }
cc15ab5d
BJ
137 if (so->so_pcb == 0)
138 goto discard;
139 if (so->so_state & SS_ISCONNECTED) {
140 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
dedd6629 141 error = sodisconnect(so);
88a7a62a
SL
142 if (error)
143 goto drop;
cc15ab5d 144 }
98422daa 145 if (so->so_options & SO_LINGER) {
b8acc34d 146 if ((so->so_state & SS_ISDISCONNECTING) &&
88a7a62a
SL
147 (so->so_state & SS_NBIO))
148 goto drop;
b8acc34d 149 while (so->so_state & SS_ISCONNECTED)
83866636
MK
150 if (error = tsleep((caddr_t)&so->so_timeo,
151 PSOCK | PCATCH, netcls, so->so_linger))
152 break;
72857acf 153 }
cc15ab5d 154 }
89900a09 155drop:
37c0974a 156 if (so->so_pcb) {
88a7a62a
SL
157 int error2 =
158 (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
159 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
160 if (error == 0)
161 error = error2;
37c0974a 162 }
cc15ab5d 163discard:
26225f25
SL
164 if (so->so_state & SS_NOFDREF)
165 panic("soclose: NOFDREF");
90aaea96 166 so->so_state |= SS_NOFDREF;
4ad99bae 167 sofree(so);
cc15ab5d 168 splx(s);
88a7a62a 169 return (error);
92a533e6
BJ
170}
171
26225f25
SL
172/*
173 * Must be called at splnet...
174 */
175soabort(so)
176 struct socket *so;
177{
26225f25 178
88a7a62a
SL
179 return (
180 (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
181 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
92a533e6
BJ
182}
183
98422daa 184soaccept(so, nam)
88a7a62a 185 register struct socket *so;
cf012934 186 struct mbuf *nam;
2b4b57cd
BJ
187{
188 int s = splnet();
189 int error;
190
26225f25
SL
191 if ((so->so_state & SS_NOFDREF) == 0)
192 panic("soaccept: !NOFDREF");
98422daa 193 so->so_state &= ~SS_NOFDREF;
cf012934 194 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
88a7a62a 195 (struct mbuf *)0, nam, (struct mbuf *)0);
2b4b57cd
BJ
196 splx(s);
197 return (error);
198}
199
98422daa 200soconnect(so, nam)
88a7a62a 201 register struct socket *so;
cf012934 202 struct mbuf *nam;
ce9d8eb4 203{
7bcf9d13 204 int s;
cc15ab5d 205 int error;
ce9d8eb4 206
7bcf9d13
MK
207 if (so->so_options & SO_ACCEPTCONN)
208 return (EOPNOTSUPP);
209 s = splnet();
de2c74a5
MK
210 /*
211 * If protocol is connection-based, can only connect once.
212 * Otherwise, if connected, try to disconnect first.
213 * This allows user to disconnect by connecting to, e.g.,
214 * a null address.
215 */
216 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
217 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
218 (error = sodisconnect(so))))
cc15ab5d 219 error = EISCONN;
de2c74a5
MK
220 else
221 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
222 (struct mbuf *)0, nam, (struct mbuf *)0);
cc15ab5d
BJ
223 splx(s);
224 return (error);
ce9d8eb4
BJ
225}
226
88a7a62a
SL
227soconnect2(so1, so2)
228 register struct socket *so1;
229 struct socket *so2;
230{
231 int s = splnet();
232 int error;
233
5a48956d
SL
234 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
235 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
88a7a62a
SL
236 splx(s);
237 return (error);
238}
88a7a62a 239
dedd6629 240sodisconnect(so)
88a7a62a 241 register struct socket *so;
ce9d8eb4 242{
cc15ab5d
BJ
243 int s = splnet();
244 int error;
ce9d8eb4 245
cc15ab5d
BJ
246 if ((so->so_state & SS_ISCONNECTED) == 0) {
247 error = ENOTCONN;
248 goto bad;
ce9d8eb4 249 }
cc15ab5d
BJ
250 if (so->so_state & SS_ISDISCONNECTING) {
251 error = EALREADY;
252 goto bad;
ce9d8eb4 253 }
cf012934 254 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
dedd6629 255 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
cc15ab5d
BJ
256bad:
257 splx(s);
258 return (error);
ce9d8eb4
BJ
259}
260
440c48bf 261#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
cc15ab5d
BJ
262/*
263 * Send on a socket.
264 * If send must go all at once and message is larger than
265 * send buffering, then hard error.
266 * Lock against other senders.
267 * If must go all at once and not enough room now, then
268 * inform user that this would block and do nothing.
8250a099 269 * Otherwise, if nonblocking, send as much as possible.
7c4ec3aa
MK
270 * The data to be sent is described by "uio" if nonzero,
271 * otherwise by the mbuf chain "top" (which must be null
272 * if uio is not). Data provided in mbuf chain must be small
273 * enough to send all at once.
274 *
275 * Returns nonzero on error, timeout or signal; callers
276 * must check for short counts if EINTR/ERESTART are returned.
277 * Data and control buffers are freed on return.
cc15ab5d 278 */
4b9db1f5 279sosend(so, addr, uio, top, control, flags)
ce9d8eb4 280 register struct socket *so;
7c4ec3aa
MK
281 struct mbuf *addr;
282 struct uio *uio;
283 struct mbuf *top;
2967f28e 284 struct mbuf *control;
7c4ec3aa 285 int flags;
ce9d8eb4 286{
dff5c020 287 struct proc *p = curproc; /* XXX */
7c4ec3aa 288 struct mbuf **mp;
2557c1fc 289 register struct mbuf *m;
7c4ec3aa
MK
290 register long space, len, resid;
291 int clen = 0, error, s, dontroute, mlen;
292 int atomic = sosendallatonce(so) || top;
ce9d8eb4 293
7c4ec3aa
MK
294 if (uio)
295 resid = uio->uio_resid;
296 else
297 resid = top->m_pkthdr.len;
88a7a62a
SL
298 dontroute =
299 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
300 (so->so_proto->pr_flags & PR_ATOMIC);
dff5c020 301 p->p_stats->p_ru.ru_msgsnd++;
2967f28e 302 if (control)
7c4ec3aa 303 clen = control->m_len;
cc15ab5d
BJ
304#define snderr(errno) { error = errno; splx(s); goto release; }
305
8250a099 306restart:
440c48bf 307 if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
7c4ec3aa 308 goto out;
8250a099
MK
309 do {
310 s = splnet();
af9c562f 311 if (so->so_state & SS_CANTSENDMORE)
8250a099 312 snderr(EPIPE);
a2aebb63
KS
313 if (so->so_error)
314 snderr(so->so_error);
8250a099 315 if ((so->so_state & SS_ISCONNECTED) == 0) {
a2aebb63 316 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
f3bf27ac
KS
317 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
318 !(resid == 0 && clen != 0))
a2aebb63 319 snderr(ENOTCONN);
7c4ec3aa 320 } else if (addr == 0)
8250a099
MK
321 snderr(EDESTADDRREQ);
322 }
7c4ec3aa 323 space = sbspace(&so->so_snd);
8250a099 324 if (flags & MSG_OOB)
7c4ec3aa 325 space += 1024;
440c48bf
KM
326 if (atomic && resid > so->so_snd.sb_hiwat ||
327 clen > so->so_snd.sb_hiwat)
328 snderr(EMSGSIZE);
329 if (space < resid + clen && uio &&
7c4ec3aa 330 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
7c4ec3aa
MK
331 if (so->so_state & SS_NBIO)
332 snderr(EWOULDBLOCK);
333 sbunlock(&so->so_snd);
4b9db1f5 334 error = sbwait(&so->so_snd);
7c4ec3aa
MK
335 splx(s);
336 if (error)
337 goto out;
338 goto restart;
8250a099 339 }
4c078bb2 340 splx(s);
8250a099 341 mp = &top;
7c4ec3aa 342 space -= clen;
4b9db1f5
MK
343 do {
344 if (uio == NULL) {
7c4ec3aa
MK
345 /*
346 * Data is prepackaged in "top".
347 */
348 resid = 0;
349 if (flags & MSG_EOR)
350 top->m_flags |= M_EOR;
4b9db1f5 351 } else do {
2557c1fc
MK
352 if (top == 0) {
353 MGETHDR(m, M_WAIT, MT_DATA);
354 mlen = MHLEN;
355 m->m_pkthdr.len = 0;
356 m->m_pkthdr.rcvif = (struct ifnet *)0;
357 } else {
358 MGET(m, M_WAIT, MT_DATA);
359 mlen = MLEN;
360 }
7c4ec3aa 361 if (resid >= MINCLSIZE && space >= MCLBYTES) {
2557c1fc
MK
362 MCLGET(m, M_WAIT);
363 if ((m->m_flags & M_EXT) == 0)
8250a099 364 goto nopages;
2557c1fc
MK
365 mlen = MCLBYTES;
366#ifdef MAPPED_MBUFS
7c4ec3aa 367 len = min(MCLBYTES, resid);
2557c1fc 368#else
9859687a 369 if (atomic && top == 0) {
7c4ec3aa
MK
370 len = min(MCLBYTES - max_hdr, resid);
371 m->m_data += max_hdr;
415a9324
KS
372 } else
373 len = min(MCLBYTES, resid);
2557c1fc
MK
374#endif
375 space -= MCLBYTES;
8250a099
MK
376 } else {
377nopages:
7c4ec3aa 378 len = min(min(mlen, resid), space);
8c0650b0 379 space -= len;
2557c1fc
MK
380 /*
381 * For datagram protocols, leave room
382 * for protocol headers in first mbuf.
383 */
84efcd38 384 if (atomic && top == 0 && len < mlen)
2557c1fc 385 MH_ALIGN(m, len);
8250a099 386 }
179cd11f 387 error = uiomove(mtod(m, caddr_t), (int)len, uio);
7c4ec3aa 388 resid = uio->uio_resid;
8250a099
MK
389 m->m_len = len;
390 *mp = m;
2557c1fc 391 top->m_pkthdr.len += len;
8250a099
MK
392 if (error)
393 goto release;
394 mp = &m->m_next;
7c4ec3aa
MK
395 if (resid <= 0) {
396 if (flags & MSG_EOR)
2557c1fc 397 top->m_flags |= M_EOR;
af9c562f 398 break;
2557c1fc
MK
399 }
400 } while (space > 0 && atomic);
401 if (dontroute)
402 so->so_options |= SO_DONTROUTE;
403 s = splnet(); /* XXX */
404 error = (*so->so_proto->pr_usrreq)(so,
405 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
7c4ec3aa 406 top, addr, control);
2557c1fc
MK
407 splx(s);
408 if (dontroute)
409 so->so_options &= ~SO_DONTROUTE;
7c4ec3aa
MK
410 clen = 0;
411 control = 0;
2557c1fc
MK
412 top = 0;
413 mp = &top;
2557c1fc
MK
414 if (error)
415 goto release;
7c4ec3aa
MK
416 } while (resid && space > 0);
417 } while (resid);
cc15ab5d 418
ce9d8eb4 419release:
cc15ab5d 420 sbunlock(&so->so_snd);
7c4ec3aa 421out:
0f90f987
BJ
422 if (top)
423 m_freem(top);
7c4ec3aa
MK
424 if (control)
425 m_freem(control);
ce9d8eb4
BJ
426 return (error);
427}
428
c34d38f4
MK
429/*
430 * Implement receive operations on a socket.
431 * We depend on the way that records are added to the sockbuf
432 * by sbappend*. In particular, each record (mbufs linked through m_next)
433 * must begin with an address if the protocol so specifies,
7c4ec3aa
MK
434 * followed by an optional mbuf or mbufs containing ancillary data,
435 * and then zero or more mbufs of data.
c34d38f4
MK
436 * In order to avoid blocking network interrupts for the entire time here,
437 * we splx() while doing the actual copy to user space.
438 * Although the sockbuf is locked, new data may still be appended,
439 * and thus we must maintain consistency of the sockbuf during that time.
179cd11f 440 *
7c4ec3aa 441 * The caller may receive the data as a single mbuf chain by supplying
4b9db1f5 442 * an mbuf **mp0 for use in returning the chain. The uio is then used
7c4ec3aa 443 * only for the count in uio_resid.
c34d38f4 444 */
4b9db1f5 445soreceive(so, paddr, uio, mp0, controlp, flagsp)
ce9d8eb4 446 register struct socket *so;
7c4ec3aa
MK
447 struct mbuf **paddr;
448 struct uio *uio;
4b9db1f5 449 struct mbuf **mp0;
7c4ec3aa 450 struct mbuf **controlp;
2557c1fc 451 int *flagsp;
ce9d8eb4 452{
4b9db1f5
MK
453 register struct mbuf *m, **mp;
454 register int flags, len, error, s, offset;
88a7a62a 455 struct protosw *pr = so->so_proto;
7c4ec3aa
MK
456 struct mbuf *nextrecord;
457 int moff, type;
34827683 458 int orig_resid = uio->uio_resid;
88a7a62a 459
4b9db1f5 460 mp = mp0;
7c4ec3aa
MK
461 if (paddr)
462 *paddr = 0;
2557c1fc
MK
463 if (controlp)
464 *controlp = 0;
465 if (flagsp)
466 flags = *flagsp &~ MSG_EOR;
179cd11f 467 else
2557c1fc 468 flags = 0;
88a7a62a 469 if (flags & MSG_OOB) {
cce93e4b 470 m = m_get(M_WAIT, MT_DATA);
88a7a62a 471 error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
de2c74a5 472 m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
a8d3bf7f 473 if (error)
5fe6f9d1 474 goto bad;
970108c7 475 do {
7c4ec3aa
MK
476 error = uiomove(mtod(m, caddr_t),
477 (int) min(uio->uio_resid, m->m_len), uio);
970108c7 478 m = m_free(m);
a8d3bf7f 479 } while (uio->uio_resid && error == 0 && m);
5fe6f9d1 480bad:
970108c7 481 if (m)
39d536e6 482 m_freem(m);
a8d3bf7f 483 return (error);
970108c7 484 }
7c4ec3aa
MK
485 if (mp)
486 *mp = (struct mbuf *)0;
4b9db1f5 487 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
2557c1fc
MK
488 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
489 (struct mbuf *)0, (struct mbuf *)0);
ce9d8eb4 490
cc15ab5d 491restart:
440c48bf 492 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
83866636 493 return (error);
cc15ab5d
BJ
494 s = splnet();
495
a2aebb63 496 m = so->so_rcv.sb_mb;
ba4350f5
MK
497 /*
498 * If we have less data than requested, block awaiting more
499 * (subject to any timeout) if:
500 * 1. the current count is less than the low water mark, or
501 * 2. MSG_WAITALL is set, and it is possible to do the entire
502 * receive operation at once if we block (resid <= hiwat).
440c48bf 503 * 3. MSG_DONTWAIT is not set
ba4350f5
MK
504 * If MSG_WAITALL is set but resid is larger than the receive buffer,
505 * we have to do the receive in sections, and thus risk returning
506 * a short count if a timeout or signal occurs after we start.
507 */
440c48bf
KM
508 if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
509 so->so_rcv.sb_cc < uio->uio_resid) &&
ba4350f5 510 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
c05ef6cd
KS
511 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)))
512 if (m && (m->m_nextpkt || (m->m_flags & M_EOR) ||
513 m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL))
514 break;
7c4ec3aa
MK
515#ifdef DIAGNOSTIC
516 if (m == 0 && so->so_rcv.sb_cc)
a2aebb63 517 panic("receive 1");
7c4ec3aa 518#endif
4c078bb2 519 if (so->so_error) {
95c435b0 520 if (m)
d4c3a4dd 521 goto dontblock;
4c078bb2 522 error = so->so_error;
95c435b0
MK
523 if ((flags & MSG_PEEK) == 0)
524 so->so_error = 0;
4c078bb2
BJ
525 goto release;
526 }
95c435b0
MK
527 if (so->so_state & SS_CANTRCVMORE) {
528 if (m)
d4c3a4dd 529 goto dontblock;
95c435b0
MK
530 else
531 goto release;
532 }
629e51da 533 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
f02d4eaa
KB
534 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
535 error = ENOTCONN;
536 goto release;
537 }
4b9db1f5 538 if (uio->uio_resid == 0)
c34d38f4 539 goto release;
440c48bf 540 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
f02d4eaa
KB
541 error = EWOULDBLOCK;
542 goto release;
543 }
cc15ab5d 544 sbunlock(&so->so_rcv);
4b9db1f5 545 error = sbwait(&so->so_rcv);
a4f6d93d 546 splx(s);
7c4ec3aa
MK
547 if (error)
548 return (error);
cc15ab5d 549 goto restart;
ce9d8eb4 550 }
0904b863
KM
551 if (uio->uio_procp)
552 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
2557c1fc 553 nextrecord = m->m_nextpkt;
c05ef6cd 554 record_eor = m->m_flags & M_EOR;
88a7a62a 555 if (pr->pr_flags & PR_ADDR) {
7c4ec3aa 556#ifdef DIAGNOSTIC
c34d38f4 557 if (m->m_type != MT_SONAME)
261a8548 558 panic("receive 1a");
7c4ec3aa 559#endif
34827683 560 orig_resid = 0;
261a8548 561 if (flags & MSG_PEEK) {
7c4ec3aa
MK
562 if (paddr)
563 *paddr = m_copy(m, 0, m->m_len);
c34d38f4 564 m = m->m_next;
261a8548 565 } else {
c34d38f4 566 sbfree(&so->so_rcv, m);
7c4ec3aa
MK
567 if (paddr) {
568 *paddr = m;
6ff43975 569 so->so_rcv.sb_mb = m->m_next;
c34d38f4 570 m->m_next = 0;
6ff43975 571 m = so->so_rcv.sb_mb;
c34d38f4 572 } else {
6ff43975
MK
573 MFREE(m, so->so_rcv.sb_mb);
574 m = so->so_rcv.sb_mb;
c34d38f4 575 }
88a7a62a 576 }
cc15ab5d 577 }
7c4ec3aa 578 while (m && m->m_type == MT_CONTROL && error == 0) {
2557c1fc
MK
579 if (flags & MSG_PEEK) {
580 if (controlp)
581 *controlp = m_copy(m, 0, m->m_len);
582 m = m->m_next;
583 } else {
584 sbfree(&so->so_rcv, m);
585 if (controlp) {
e8f8de91
KS
586 if (pr->pr_domain->dom_externalize &&
587 mtod(m, struct cmsghdr *)->cmsg_type ==
588 SCM_RIGHTS)
7c4ec3aa 589 error = (*pr->pr_domain->dom_externalize)(m);
2557c1fc
MK
590 *controlp = m;
591 so->so_rcv.sb_mb = m->m_next;
592 m->m_next = 0;
593 m = so->so_rcv.sb_mb;
594 } else {
595 MFREE(m, so->so_rcv.sb_mb);
596 m = so->so_rcv.sb_mb;
597 }
598 }
34827683
KM
599 if (controlp) {
600 orig_resid = 0;
7c4ec3aa 601 controlp = &(*controlp)->m_next;
34827683 602 }
2557c1fc 603 }
7c4ec3aa 604 if (m) {
ba4350f5
MK
605 if ((flags & MSG_PEEK) == 0)
606 m->m_nextpkt = nextrecord;
7c4ec3aa 607 type = m->m_type;
415a9324
KS
608 if (type == MT_OOBDATA)
609 flags |= MSG_OOB;
7c4ec3aa 610 }
970108c7 611 moff = 0;
dd1ca18d 612 offset = 0;
415a9324
KS
613 while (m && uio->uio_resid > 0 && error == 0) {
614 if (m->m_type == MT_OOBDATA) {
615 if (type != MT_OOBDATA)
616 break;
617 } else if (type == MT_OOBDATA)
618 break;
7c4ec3aa 619#ifdef DIAGNOSTIC
2557c1fc 620 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
c34d38f4 621 panic("receive 3");
7c4ec3aa 622#endif
32a43ee2 623 so->so_state &= ~SS_RCVATMARK;
4b9db1f5 624 len = uio->uio_resid;
dd1ca18d
MK
625 if (so->so_oobmark && len > so->so_oobmark - offset)
626 len = so->so_oobmark - offset;
8c0650b0 627 if (len > m->m_len - moff)
970108c7 628 len = m->m_len - moff;
7c4ec3aa
MK
629 /*
630 * If mp is set, just pass back the mbufs.
631 * Otherwise copy them out via the uio, then free.
632 * Sockbuf must be consistent here (points to current mbuf,
633 * it points to next record) when we drop priority;
634 * we must note any additions to the sockbuf when we
635 * block interrupts again.
636 */
637 if (mp == 0) {
638 splx(s);
639 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
7c4ec3aa 640 s = splnet();
4b9db1f5
MK
641 } else
642 uio->uio_resid -= len;
8c0650b0 643 if (len == m->m_len - moff) {
c34d38f4
MK
644 if (flags & MSG_PEEK) {
645 m = m->m_next;
646 moff = 0;
647 } else {
2557c1fc 648 nextrecord = m->m_nextpkt;
6ff43975 649 sbfree(&so->so_rcv, m);
7c4ec3aa
MK
650 if (mp) {
651 *mp = m;
652 mp = &m->m_next;
4b9db1f5
MK
653 so->so_rcv.sb_mb = m = m->m_next;
654 *mp = (struct mbuf *)0;
7c4ec3aa
MK
655 } else {
656 MFREE(m, so->so_rcv.sb_mb);
657 m = so->so_rcv.sb_mb;
658 }
6ff43975 659 if (m)
2557c1fc 660 m->m_nextpkt = nextrecord;
c34d38f4 661 }
ce9d8eb4 662 } else {
88a7a62a 663 if (flags & MSG_PEEK)
970108c7
BJ
664 moff += len;
665 else {
4b9db1f5
MK
666 if (mp)
667 *mp = m_copym(m, 0, len, M_WAIT);
2557c1fc 668 m->m_data += len;
970108c7
BJ
669 m->m_len -= len;
670 so->so_rcv.sb_cc -= len;
671 }
ce9d8eb4 672 }
dd1ca18d
MK
673 if (so->so_oobmark) {
674 if ((flags & MSG_PEEK) == 0) {
675 so->so_oobmark -= len;
676 if (so->so_oobmark == 0) {
677 so->so_state |= SS_RCVATMARK;
678 break;
679 }
a660cd8d 680 } else {
dd1ca18d 681 offset += len;
a660cd8d
CT
682 if (offset == so->so_oobmark)
683 break;
684 }
970108c7 685 }
c05ef6cd
KS
686 if (m == 0 && record_eor) {
687 flags |= record_eor;
2967f28e 688 break;
c05ef6cd 689 }
7c4ec3aa
MK
690 /*
691 * If the MSG_WAITALL flag is set (for non-atomic socket),
4b9db1f5 692 * we must not quit until "uio->uio_resid == 0" or an error
7c4ec3aa 693 * termination. If a signal/timeout occurs, return
4b9db1f5 694 * with a short count but without error.
7c4ec3aa
MK
695 * Keep sockbuf locked against other readers.
696 */
4b9db1f5 697 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
c05ef6cd 698 !(flags & MSG_OOB) && !sosendallatonce(so)) {
95c435b0
MK
699 if (so->so_error || so->so_state & SS_CANTRCVMORE)
700 break;
7c4ec3aa
MK
701 error = sbwait(&so->so_rcv);
702 if (error) {
703 sbunlock(&so->so_rcv);
704 splx(s);
7c4ec3aa
MK
705 return (0);
706 }
c05ef6cd 707 if (m = so->so_rcv.sb_mb) {
7c4ec3aa 708 nextrecord = m->m_nextpkt;
c05ef6cd
KS
709 record_eor |= m->m_flags & M_EOR;
710 }
7c4ec3aa 711 }
261a8548 712 }
34827683
KM
713
714 if (m && pr->pr_flags & PR_ATOMIC) {
715 flags |= MSG_TRUNC;
716 if ((flags & MSG_PEEK) == 0)
717 (void) sbdroprecord(&so->so_rcv);
718 }
261a8548 719 if ((flags & MSG_PEEK) == 0) {
491e9020 720 if (m == 0)
261a8548 721 so->so_rcv.sb_mb = nextrecord;
261a8548
MK
722 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
723 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
a2aebb63
KS
724 (struct mbuf *)flags, (struct mbuf *)0,
725 (struct mbuf *)0);
261a8548 726 }
34827683
KM
727 if (orig_resid == uio->uio_resid && orig_resid &&
728 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
729 sbunlock(&so->so_rcv);
730 splx(s);
731 goto restart;
732 }
733
2557c1fc
MK
734 if (flagsp)
735 *flagsp |= flags;
cc15ab5d 736release:
ae921915 737 sbunlock(&so->so_rcv);
cc15ab5d 738 splx(s);
ae921915 739 return (error);
92a533e6
BJ
740}
741
98422daa 742soshutdown(so, how)
88a7a62a
SL
743 register struct socket *so;
744 register int how;
98422daa 745{
88a7a62a 746 register struct protosw *pr = so->so_proto;
98422daa
SL
747
748 how++;
88a7a62a
SL
749 if (how & FREAD)
750 sorflush(so);
98422daa 751 if (how & FWRITE)
88a7a62a
SL
752 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
753 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
98422daa
SL
754 return (0);
755}
756
88a7a62a
SL
757sorflush(so)
758 register struct socket *so;
759{
760 register struct sockbuf *sb = &so->so_rcv;
761 register struct protosw *pr = so->so_proto;
762 register int s;
763 struct sockbuf asb;
764
83866636 765 sb->sb_flags |= SB_NOINTR;
440c48bf 766 (void) sblock(sb, M_WAITOK);
88a7a62a
SL
767 s = splimp();
768 socantrcvmore(so);
769 sbunlock(sb);
770 asb = *sb;
771 bzero((caddr_t)sb, sizeof (*sb));
772 splx(s);
261a8548
MK
773 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
774 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
88a7a62a
SL
775 sbrelease(&asb);
776}
777
bc2f5859 778sosetopt(so, level, optname, m0)
88a7a62a 779 register struct socket *so;
98422daa 780 int level, optname;
bc2f5859 781 struct mbuf *m0;
98422daa 782{
61ec2127 783 int error = 0;
bc2f5859 784 register struct mbuf *m = m0;
98422daa 785
61ec2127 786 if (level != SOL_SOCKET) {
cbe54390
MK
787 if (so->so_proto && so->so_proto->pr_ctloutput)
788 return ((*so->so_proto->pr_ctloutput)
bc2f5859 789 (PRCO_SETOPT, so, level, optname, &m0));
cbe54390
MK
790 error = ENOPROTOOPT;
791 } else {
792 switch (optname) {
98422daa 793
cbe54390
MK
794 case SO_LINGER:
795 if (m == NULL || m->m_len != sizeof (struct linger)) {
796 error = EINVAL;
797 goto bad;
798 }
799 so->so_linger = mtod(m, struct linger *)->l_linger;
800 /* fall thru... */
801
802 case SO_DEBUG:
803 case SO_KEEPALIVE:
804 case SO_DONTROUTE:
805 case SO_USELOOPBACK:
806 case SO_BROADCAST:
807 case SO_REUSEADDR:
f1242a73 808 case SO_REUSEPORT:
97c8f6a8 809 case SO_OOBINLINE:
cbe54390
MK
810 if (m == NULL || m->m_len < sizeof (int)) {
811 error = EINVAL;
812 goto bad;
813 }
814 if (*mtod(m, int *))
815 so->so_options |= optname;
816 else
817 so->so_options &= ~optname;
818 break;
98422daa 819
cbe54390 820 case SO_SNDBUF:
83866636 821 case SO_RCVBUF:
7c4ec3aa 822 case SO_SNDLOWAT:
83866636 823 case SO_RCVLOWAT:
cbe54390
MK
824 if (m == NULL || m->m_len < sizeof (int)) {
825 error = EINVAL;
826 goto bad;
827 }
828 switch (optname) {
829
830 case SO_SNDBUF:
831 case SO_RCVBUF:
7c4ec3aa
MK
832 if (sbreserve(optname == SO_SNDBUF ?
833 &so->so_snd : &so->so_rcv,
834 (u_long) *mtod(m, int *)) == 0) {
cbe54390
MK
835 error = ENOBUFS;
836 goto bad;
837 }
838 break;
839
840 case SO_SNDLOWAT:
7c4ec3aa
MK
841 so->so_snd.sb_lowat = *mtod(m, int *);
842 break;
cbe54390 843 case SO_RCVLOWAT:
7c4ec3aa 844 so->so_rcv.sb_lowat = *mtod(m, int *);
cbe54390 845 break;
fc2cae0b
MK
846 }
847 break;
848
849 case SO_SNDTIMEO:
850 case SO_RCVTIMEO:
851 {
852 struct timeval *tv;
853 short val;
854
855 if (m == NULL || m->m_len < sizeof (*tv)) {
856 error = EINVAL;
857 goto bad;
858 }
859 tv = mtod(m, struct timeval *);
860 if (tv->tv_sec > SHRT_MAX / hz - hz) {
861 error = EDOM;
862 goto bad;
863 }
864 val = tv->tv_sec * hz + tv->tv_usec / tick;
865
866 switch (optname) {
867
cbe54390 868 case SO_SNDTIMEO:
fc2cae0b 869 so->so_snd.sb_timeo = val;
7c4ec3aa 870 break;
cbe54390 871 case SO_RCVTIMEO:
fc2cae0b 872 so->so_rcv.sb_timeo = val;
cbe54390
MK
873 break;
874 }
875 break;
fc2cae0b 876 }
cbe54390
MK
877
878 default:
879 error = ENOPROTOOPT;
880 break;
881 }
d4c3a4dd 882 m = 0;
bfedcc73
KS
883 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput)
884 (void) ((*so->so_proto->pr_ctloutput)
885 (PRCO_SETOPT, so, level, optname, &m0));
98422daa 886 }
61ec2127
SL
887bad:
888 if (m)
889 (void) m_free(m);
890 return (error);
98422daa
SL
891}
892
61ec2127 893sogetopt(so, level, optname, mp)
88a7a62a 894 register struct socket *so;
98422daa 895 int level, optname;
61ec2127 896 struct mbuf **mp;
98422daa 897{
61ec2127 898 register struct mbuf *m;
98422daa 899
cbe54390
MK
900 if (level != SOL_SOCKET) {
901 if (so->so_proto && so->so_proto->pr_ctloutput) {
902 return ((*so->so_proto->pr_ctloutput)
903 (PRCO_GETOPT, so, level, optname, mp));
179cd11f 904 } else
cbe54390
MK
905 return (ENOPROTOOPT);
906 } else {
61ec2127 907 m = m_get(M_WAIT, MT_SOOPTS);
d6e6eea8
MK
908 m->m_len = sizeof (int);
909
cbe54390
MK
910 switch (optname) {
911
912 case SO_LINGER:
913 m->m_len = sizeof (struct linger);
914 mtod(m, struct linger *)->l_onoff =
915 so->so_options & SO_LINGER;
916 mtod(m, struct linger *)->l_linger = so->so_linger;
917 break;
918
919 case SO_USELOOPBACK:
920 case SO_DONTROUTE:
921 case SO_DEBUG:
922 case SO_KEEPALIVE:
923 case SO_REUSEADDR:
f1242a73 924 case SO_REUSEPORT:
cbe54390 925 case SO_BROADCAST:
97c8f6a8 926 case SO_OOBINLINE:
cbe54390
MK
927 *mtod(m, int *) = so->so_options & optname;
928 break;
929
d6e6eea8
MK
930 case SO_TYPE:
931 *mtod(m, int *) = so->so_type;
932 break;
933
de2c74a5
MK
934 case SO_ERROR:
935 *mtod(m, int *) = so->so_error;
936 so->so_error = 0;
937 break;
938
cbe54390
MK
939 case SO_SNDBUF:
940 *mtod(m, int *) = so->so_snd.sb_hiwat;
941 break;
98422daa 942
cbe54390
MK
943 case SO_RCVBUF:
944 *mtod(m, int *) = so->so_rcv.sb_hiwat;
945 break;
946
947 case SO_SNDLOWAT:
948 *mtod(m, int *) = so->so_snd.sb_lowat;
949 break;
950
951 case SO_RCVLOWAT:
952 *mtod(m, int *) = so->so_rcv.sb_lowat;
953 break;
954
955 case SO_SNDTIMEO:
cbe54390 956 case SO_RCVTIMEO:
fc2cae0b
MK
957 {
958 int val = (optname == SO_SNDTIMEO ?
959 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
960
961 m->m_len = sizeof(struct timeval);
962 mtod(m, struct timeval *)->tv_sec = val / hz;
963 mtod(m, struct timeval *)->tv_usec =
964 (val % hz) / tick;
cbe54390 965 break;
fc2cae0b 966 }
cbe54390
MK
967
968 default:
8011f5df 969 (void)m_free(m);
cbe54390
MK
970 return (ENOPROTOOPT);
971 }
972 *mp = m;
973 return (0);
98422daa 974 }
98422daa
SL
975}
976
edebca28 977sohasoutofband(so)
88a7a62a 978 register struct socket *so;
edebca28 979{
3d190e86 980 struct proc *p;
edebca28 981
a2aebb63
KS
982 if (so->so_pgid < 0)
983 gsignal(-so->so_pgid, SIGURG);
984 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
3d190e86 985 psignal(p, SIGURG);
b4f32e56 986 selwakeup(&so->so_rcv.sb_sel);
edebca28 987}