BSD 4_4_Lite2 release
[unix-history] / usr / src / sys / kern / uipc_socket.c
index 73332eb..a9c5453 100644 (file)
-/*     uipc_socket.c   4.33    82/03/12        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/proc.h"
-#include "../h/file.h"
-#include "../h/inode.h"
-#include "../h/buf.h"
-#include "../h/mbuf.h"
-#include "../h/protosw.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/stat.h"
-#include "../h/ioctl.h"
-#include "../net/in.h"
-#include "../net/in_systm.h"
-
 /*
 /*
- * Socket support routines.
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  *
  *
- * DEAL WITH INTERRUPT NOTIFICATION.
+ *     @(#)uipc_socket.c       8.6 (Berkeley) 5/2/95
  */
 
  */
 
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/resourcevar.h>
+
 /*
 /*
- * Create a socket.
+ * Socket operation routines.
+ * These routines are called by the routines in
+ * sys_socket.c or from a system process, and
+ * implement the semantics of socket operations by
+ * switching out to the protocol specific routines.
  */
  */
-socreate(aso, type, asp, asa, options)
+/*ARGSUSED*/
+int
+socreate(dom, aso, type, proto)
+       int dom;
        struct socket **aso;
        struct socket **aso;
-       int type;
-       struct sockproto *asp;
-       struct sockaddr *asa;
-       int options;
+       register int type;
+       int proto;
 {
 {
+       struct proc *p = curproc;               /* XXX */
        register struct protosw *prp;
        register struct socket *so;
        register struct protosw *prp;
        register struct socket *so;
-       struct mbuf *m;
-       int pf, proto, error;
-COUNT(SOCREATE);
+       register int error;
 
 
-       /*
-        * Use process standard protocol/protocol family if none
-        * specified by address argument.
-        */
-       if (asp == 0) {
-               pf = PF_INET;           /* should be u.u_protof */
-               proto = 0;
-       } else {
-               pf = asp->sp_family;
-               proto = asp->sp_protocol;
-       }
-
-       /*
-        * If protocol specified, look for it, otherwise
-        * for a protocol of the correct type in the right family.
-        */
        if (proto)
        if (proto)
-               prp = pffindproto(pf, proto);
+               prp = pffindproto(dom, proto, type);
        else
        else
-               prp = pffindtype(pf, type);
-       if (prp == 0)
+               prp = pffindtype(dom, type);
+       if (prp == 0 || prp->pr_usrreq == 0)
                return (EPROTONOSUPPORT);
                return (EPROTONOSUPPORT);
-
-       /*
-        * Get a socket structure.
-        */
-       m = m_getclr(M_WAIT);
-       if (m == 0)
-               return (ENOBUFS);
-       so = mtod(m, struct socket *);
-       so->so_options = options;
-
-       /*
-        * Attach protocol to socket, initializing
-        * and reserving resources.
-        */
+       if (prp->pr_type != type)
+               return (EPROTOTYPE);
+       MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
+       bzero((caddr_t)so, sizeof(*so));
+       so->so_type = type;
+       if (p->p_ucred->cr_uid == 0)
+               so->so_state = SS_PRIV;
        so->so_proto = prp;
        so->so_proto = prp;
-       error = (*prp->pr_usrreq)(so, PRU_ATTACH, 0, asa);
+       error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
+           (struct mbuf *)(long)proto, (struct mbuf *)0);
        if (error) {
        if (error) {
-               (void) m_free(dtom(so));
+               so->so_state |= SS_NOFDREF;
+               sofree(so);
                return (error);
        }
        *aso = so;
        return (0);
 }
 
                return (error);
        }
        *aso = so;
        return (0);
 }
 
-sofree(so)
+int
+sobind(so, nam)
        struct socket *so;
        struct socket *so;
+       struct mbuf *nam;
+{
+       int s = splnet();
+       int error;
+
+       error =
+           (*so->so_proto->pr_usrreq)(so, PRU_BIND,
+               (struct mbuf *)0, nam, (struct mbuf *)0);
+       splx(s);
+       return (error);
+}
+
+int
+solisten(so, backlog)
+       register struct socket *so;
+       int backlog;
+{
+       int s = splnet(), error;
+
+       error =
+           (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+               (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+       if (error) {
+               splx(s);
+               return (error);
+       }
+       if (so->so_q == 0)
+               so->so_options |= SO_ACCEPTCONN;
+       if (backlog < 0)
+               backlog = 0;
+       so->so_qlimit = min(backlog, SOMAXCONN);
+       splx(s);
+       return (0);
+}
+
+int
+sofree(so)
+       register struct socket *so;
 {
 
 {
 
-COUNT(SOFREE);
-       if (so->so_pcb || (so->so_state & SS_USERGONE) == 0)
+       if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
                return;
                return;
+       if (so->so_head) {
+               if (!soqremque(so, 0) && !soqremque(so, 1))
+                       panic("sofree dq");
+               so->so_head = 0;
+       }
        sbrelease(&so->so_snd);
        sbrelease(&so->so_snd);
-       sbrelease(&so->so_rcv);
-       (void) m_free(dtom(so));
+       sorflush(so);
+       FREE(so, M_SOCKET);
 }
 
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
 }
 
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
- *
- * THIS IS REALLY A UNIX INTERFACE ROUTINE
  */
  */
-soclose(so, exiting)
+int
+soclose(so)
        register struct socket *so;
        register struct socket *so;
-       int exiting;
 {
        int s = splnet();               /* conservative */
 {
        int s = splnet();               /* conservative */
+       int error = 0;
 
 
-COUNT(SOCLOSE);
+       if (so->so_options & SO_ACCEPTCONN) {
+               while (so->so_q0)
+                       (void) soabort(so->so_q0);
+               while (so->so_q)
+                       (void) soabort(so->so_q);
+       }
        if (so->so_pcb == 0)
                goto discard;
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
        if (so->so_pcb == 0)
                goto discard;
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
-                       u.u_error = sodisconnect(so, (struct sockaddr *)0);
-                       if (u.u_error) {
-                               if (exiting)
-                                       goto drop;
-                               splx(s);
-                               return;
-                       }
+                       error = sodisconnect(so);
+                       if (error)
+                               goto drop;
                }
                }
-               if ((so->so_options & SO_DONTLINGER) == 0) {
+               if (so->so_options & SO_LINGER) {
                        if ((so->so_state & SS_ISDISCONNECTING) &&
                        if ((so->so_state & SS_ISDISCONNECTING) &&
-                           (so->so_options & SO_NONBLOCKING) &&
-                           exiting == 0) {
-                               u.u_error = EINPROGRESS;
-                               splx(s);
-                               return;
-                       }
-                       /* should use tsleep here, for at most linger */
+                           (so->so_state & SS_NBIO))
+                               goto drop;
                        while (so->so_state & SS_ISCONNECTED)
                        while (so->so_state & SS_ISCONNECTED)
-                               sleep((caddr_t)&so->so_timeo, PZERO+1);
+                               if (error = tsleep((caddr_t)&so->so_timeo,
+                                   PSOCK | PCATCH, netcls, so->so_linger * hz))
+                                       break;
                }
        }
 drop:
                }
        }
 drop:
-       u.u_error = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 0, 0);
+       if (so->so_pcb) {
+               int error2 =
+                   (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+                       (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+               if (error == 0)
+                       error = error2;
+       }
 discard:
 discard:
-       so->so_state |= SS_USERGONE;
+       if (so->so_state & SS_NOFDREF)
+               panic("soclose: NOFDREF");
+       so->so_state |= SS_NOFDREF;
        sofree(so);
        splx(s);
        sofree(so);
        splx(s);
+       return (error);
 }
 
 }
 
-sosplice(pso, so)
-       struct socket *pso, *so;
+/*
+ * Must be called at splnet...
+ */
+int
+soabort(so)
+       struct socket *so;
 {
 
 {
 
-COUNT(SOSPLICE);
-       if (pso->so_proto->pr_family != PF_UNIX) {
-               struct socket *tso;
-               tso = pso; pso = so; so = tso;
-       }
-       if (pso->so_proto->pr_family != PF_UNIX)
-               return (EOPNOTSUPP);
-       /* check types and buffer space */
-       /* merge buffers */
-       return (0);
+       return (
+           (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+               (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
 }
 
 }
 
-/*ARGSUSED*/
-sostat(so, sb)
-       struct socket *so;
-       struct stat *sb;
+int
+soaccept(so, nam)
+       register struct socket *so;
+       struct mbuf *nam;
 {
 {
+       int s = splnet();
+       int error;
 
 
-COUNT(SOSTAT);
-       bzero((caddr_t)sb, sizeof (*sb));               /* XXX */
-       return (0);                                     /* XXX */
+       if ((so->so_state & SS_NOFDREF) == 0)
+               panic("soaccept: !NOFDREF");
+       so->so_state &= ~SS_NOFDREF;
+       error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
+           (struct mbuf *)0, nam, (struct mbuf *)0);
+       splx(s);
+       return (error);
 }
 
 }
 
-/*
- * Accept connection on a socket.
- */
-soaccept(so, asa)
-       struct socket *so;
-       struct sockaddr *asa;
+int
+soconnect(so, nam)
+       register struct socket *so;
+       struct mbuf *nam;
 {
 {
-       int s = splnet();
+       int s;
        int error;
 
        int error;
 
-COUNT(SOACCEPT);
-       if ((so->so_options & SO_ACCEPTCONN) == 0) {
-               error = EINVAL;                 /* XXX */
-               goto bad;
-       }
-       if ((so->so_state & SS_CONNAWAITING) == 0) {
-               error = ENOTCONN;
-               goto bad;
-       }
-       so->so_state &= ~SS_CONNAWAITING;
-       error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 0, (caddr_t)asa);
-bad:
+       if (so->so_options & SO_ACCEPTCONN)
+               return (EOPNOTSUPP);
+       s = splnet();
+       /*
+        * If protocol is connection-based, can only connect once.
+        * Otherwise, if connected, try to disconnect first.
+        * This allows user to disconnect by connecting to, e.g.,
+        * a null address.
+        */
+       if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+           ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+           (error = sodisconnect(so))))
+               error = EISCONN;
+       else
+               error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+                   (struct mbuf *)0, nam, (struct mbuf *)0);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-/*
- * Connect socket to a specified address.
- * If already connected or connecting, then avoid
- * the protocol entry, to keep its job simpler.
- */
-soconnect(so, asa)
-       struct socket *so;
-       struct sockaddr *asa;
+int
+soconnect2(so1, so2)
+       register struct socket *so1;
+       struct socket *so2;
 {
        int s = splnet();
        int error;
 
 {
        int s = splnet();
        int error;
 
-COUNT(SOCONNECT);
-       if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
-               error = EISCONN;
-               goto bad;
-       }
-       error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 0, (caddr_t)asa);
-bad:
+       error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+           (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-/*
- * Disconnect from a socket.
- * Address parameter is from system call for later multicast
- * protocols.  Check to make sure that connected and no disconnect
- * in progress (for protocol's sake), and then invoke protocol.
- */
-sodisconnect(so, asa)
-       struct socket *so;
-       struct sockaddr *asa;
+int
+sodisconnect(so)
+       register struct socket *so;
 {
        int s = splnet();
        int error;
 
 {
        int s = splnet();
        int error;
 
-COUNT(SODISCONNECT);
        if ((so->so_state & SS_ISCONNECTED) == 0) {
                error = ENOTCONN;
                goto bad;
        if ((so->so_state & SS_ISCONNECTED) == 0) {
                error = ENOTCONN;
                goto bad;
@@ -242,12 +286,14 @@ COUNT(SODISCONNECT);
                error = EALREADY;
                goto bad;
        }
                error = EALREADY;
                goto bad;
        }
-       error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 0, asa);
+       error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+           (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
 bad:
        splx(s);
        return (error);
 }
 
 bad:
        splx(s);
        return (error);
 }
 
+#define        SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
 /*
  * Send on a socket.
  * If send must go all at once and message is larger than
 /*
  * Send on a socket.
  * If send must go all at once and message is larger than
@@ -255,366 +301,740 @@ bad:
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
+ * The data to be sent is described by "uio" if nonzero,
+ * otherwise by the mbuf chain "top" (which must be null
+ * if uio is not).  Data provided in mbuf chain must be small
+ * enough to send all at once.
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
  */
  */
-sosend(so, asa)
+int
+sosend(so, addr, uio, top, control, flags)
        register struct socket *so;
        register struct socket *so;
-       struct sockaddr *asa;
+       struct mbuf *addr;
+       struct uio *uio;
+       struct mbuf *top;
+       struct mbuf *control;
+       int flags;
 {
 {
-       struct mbuf *top = 0;
-       register struct mbuf *m, **mp = &top;
-       register u_int len;
-       int error = 0, space, s;
-
-COUNT(SOSEND);
-       if (so->so_state & SS_CANTSENDMORE) {
-               psignal(u.u_procp, SIGPIPE);
-               return (EPIPE);
-       }
-       if (sosendallatonce(so) && u.u_count > so->so_snd.sb_hiwat)
-               return (EMSGSIZE);
-       if ((so->so_snd.sb_flags & SB_LOCK) && (so->so_options & SO_NONBLOCKING))
-               return (EWOULDBLOCK);
-       sblock(&so->so_snd);
+       struct proc *p = curproc;               /* XXX */
+       struct mbuf **mp;
+       register struct mbuf *m;
+       register long space, len, resid;
+       int clen = 0, error, s, dontroute, mlen;
+       int atomic = sosendallatonce(so) || top;
+
+       if (uio)
+               resid = uio->uio_resid;
+       else
+               resid = top->m_pkthdr.len;
+       /*
+        * In theory resid should be unsigned.
+        * However, space must be signed, as it might be less than 0
+        * if we over-committed, and we must use a signed comparison
+        * of space and resid.  On the other hand, a negative resid
+        * causes us to loop sending 0-length segments to the protocol.
+        */
+       if (resid < 0)
+               return (EINVAL);
+       dontroute =
+           (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+           (so->so_proto->pr_flags & PR_ATOMIC);
+       p->p_stats->p_ru.ru_msgsnd++;
+       if (control)
+               clen = control->m_len;
 #define        snderr(errno)   { error = errno; splx(s); goto release; }
 
 #define        snderr(errno)   { error = errno; splx(s); goto release; }
 
-       s = splnet();
-again:
-       if (so->so_error) {
-               error = so->so_error;
-               so->so_error = 0;
-               splx(s);
-               goto release;
-       }
-       if ((so->so_state & SS_ISCONNECTED) == 0) {
-               if (so->so_proto->pr_flags & PR_CONNREQUIRED)
-                       snderr(ENOTCONN);
-               if (asa == 0)
-                       snderr(EDESTADDRREQ);
-       }
-       if (top) {
-               error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, top, asa);
-               if (error) {
+restart:
+       if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
+               goto out;
+       do {
+               s = splnet();
+               if (so->so_state & SS_CANTSENDMORE)
+                       snderr(EPIPE);
+               if (so->so_error)
+                       snderr(so->so_error);
+               if ((so->so_state & SS_ISCONNECTED) == 0) {
+                       if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+                               if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+                                   !(resid == 0 && clen != 0))
+                                       snderr(ENOTCONN);
+                       } else if (addr == 0)
+                               snderr(EDESTADDRREQ);
+               }
+               space = sbspace(&so->so_snd);
+               if (flags & MSG_OOB)
+                       space += 1024;
+               if (atomic && resid > so->so_snd.sb_hiwat ||
+                   clen > so->so_snd.sb_hiwat)
+                       snderr(EMSGSIZE);
+               if (space < resid + clen && uio &&
+                   (atomic || space < so->so_snd.sb_lowat || space < clen)) {
+                       if (so->so_state & SS_NBIO)
+                               snderr(EWOULDBLOCK);
+                       sbunlock(&so->so_snd);
+                       error = sbwait(&so->so_snd);
                        splx(s);
                        splx(s);
-                       goto release;
+                       if (error)
+                               goto out;
+                       goto restart;
                }
                }
-               top = 0;
-               mp = &top;
-       }
-       if (u.u_count == 0) {
-               splx(s);
-               goto release;
-       }
-       space = sbspace(&so->so_snd);
-       if (space <= 0 || sosendallatonce(so) && space < u.u_count) {
-               if (so->so_options & SO_NONBLOCKING)
-                       snderr(EWOULDBLOCK);
-               sbunlock(&so->so_snd);
-               sbwait(&so->so_snd);
                splx(s);
                splx(s);
-               goto again;
-       }
-       splx(s);
-       while (u.u_count && space > 0) {
-               MGET(m, 1);
-               if (m == NULL) {
-                       error = ENOBUFS;
-                       m_freem(top);
-                       goto release;
-               }
-               if (u.u_count >= CLBYTES && space >= CLBYTES) {
-                       register struct mbuf *p;
-                       MCLGET(p, 1);
-                       if (p == 0)
-                               goto nopages;
-                       m->m_off = (int)p - (int)m;
-                       len = CLBYTES;
-               } else {
+               mp = &top;
+               space -= clen;
+               do {
+                   if (uio == NULL) {
+                       /*
+                        * Data is prepackaged in "top".
+                        */
+                       resid = 0;
+                       if (flags & MSG_EOR)
+                               top->m_flags |= M_EOR;
+                   } else do {
+                       if (top == 0) {
+                               MGETHDR(m, M_WAIT, MT_DATA);
+                               mlen = MHLEN;
+                               m->m_pkthdr.len = 0;
+                               m->m_pkthdr.rcvif = (struct ifnet *)0;
+                       } else {
+                               MGET(m, M_WAIT, MT_DATA);
+                               mlen = MLEN;
+                       }
+                       if (resid >= MINCLSIZE && space >= MCLBYTES) {
+                               MCLGET(m, M_WAIT);
+                               if ((m->m_flags & M_EXT) == 0)
+                                       goto nopages;
+                               mlen = MCLBYTES;
+#ifdef MAPPED_MBUFS
+                               len = min(MCLBYTES, resid);
+#else
+                               if (atomic && top == 0) {
+                                       len = min(MCLBYTES - max_hdr, resid);
+                                       m->m_data += max_hdr;
+                               } else
+                                       len = min(MCLBYTES, resid);
+#endif
+                               space -= MCLBYTES;
+                       } else {
 nopages:
 nopages:
-                       m->m_off = MMINOFF;
-                       len = MIN(MLEN, u.u_count);
-               }
-               iomove(mtod(m, caddr_t), len, B_WRITE);
-               m->m_len = len;
-               *mp = m;
-               mp = &m->m_next;
-               space = sbspace(&so->so_snd);
-       }
-       s = splnet();
-       goto again;
+                               len = min(min(mlen, resid), space);
+                               space -= len;
+                               /*
+                                * For datagram protocols, leave room
+                                * for protocol headers in first mbuf.
+                                */
+                               if (atomic && top == 0 && len < mlen)
+                                       MH_ALIGN(m, len);
+                       }
+                       error = uiomove(mtod(m, caddr_t), (int)len, uio);
+                       resid = uio->uio_resid;
+                       m->m_len = len;
+                       *mp = m;
+                       top->m_pkthdr.len += len;
+                       if (error)
+                               goto release;
+                       mp = &m->m_next;
+                       if (resid <= 0) {
+                               if (flags & MSG_EOR)
+                                       top->m_flags |= M_EOR;
+                               break;
+                       }
+                   } while (space > 0 && atomic);
+                   if (dontroute)
+                           so->so_options |= SO_DONTROUTE;
+                   s = splnet();                               /* XXX */
+                   error = (*so->so_proto->pr_usrreq)(so,
+                       (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+                       top, addr, control);
+                   splx(s);
+                   if (dontroute)
+                           so->so_options &= ~SO_DONTROUTE;
+                   clen = 0;
+                   control = 0;
+                   top = 0;
+                   mp = &top;
+                   if (error)
+                       goto release;
+               } while (resid && space > 0);
+       } while (resid);
 
 release:
        sbunlock(&so->so_snd);
 
 release:
        sbunlock(&so->so_snd);
+out:
+       if (top)
+               m_freem(top);
+       if (control)
+               m_freem(control);
        return (error);
 }
 
        return (error);
 }
 
-soreceive(so, asa)
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*.  In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain.  The uio is then used
+ * only for the count in uio_resid.
+ */
+int
+soreceive(so, paddr, uio, mp0, controlp, flagsp)
        register struct socket *so;
        register struct socket *so;
-       struct sockaddr *asa;
+       struct mbuf **paddr;
+       struct uio *uio;
+       struct mbuf **mp0;
+       struct mbuf **controlp;
+       int *flagsp;
 {
 {
-       register struct mbuf *m, *n;
-       u_int len;
-       int eor, s, error = 0, cnt = u.u_count;
-       caddr_t base = u.u_base;
+       register struct mbuf *m, **mp;
+       register int flags, len, error, s, offset;
+       struct protosw *pr = so->so_proto;
+       struct mbuf *nextrecord;
+       int moff, type;
+       int orig_resid = uio->uio_resid;
+
+       mp = mp0;
+       if (paddr)
+               *paddr = 0;
+       if (controlp)
+               *controlp = 0;
+       if (flagsp)
+               flags = *flagsp &~ MSG_EOR;
+       else
+               flags = 0;
+       if (flags & MSG_OOB) {
+               m = m_get(M_WAIT, MT_DATA);
+               error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
+                   (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0);
+               if (error)
+                       goto bad;
+               do {
+                       error = uiomove(mtod(m, caddr_t),
+                           (int) min(uio->uio_resid, m->m_len), uio);
+                       m = m_free(m);
+               } while (uio->uio_resid && error == 0 && m);
+bad:
+               if (m)
+                       m_freem(m);
+               return (error);
+       }
+       if (mp)
+               *mp = (struct mbuf *)0;
+       if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
+               (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+                   (struct mbuf *)0, (struct mbuf *)0);
 
 
-COUNT(SORECEIVE);
 restart:
 restart:
-       sblock(&so->so_rcv);
+       if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
+               return (error);
        s = splnet();
 
        s = splnet();
 
-#define        rcverr(errno)   { error = errno; splx(s); goto release; }
-       if (so->so_rcv.sb_cc == 0) {
+       m = so->so_rcv.sb_mb;
+       /*
+        * If we have less data than requested, block awaiting more
+        * (subject to any timeout) if:
+        *   1. the current count is less than the low water mark, or
+        *   2. MSG_WAITALL is set, and it is possible to do the entire
+        *      receive operation at once if we block (resid <= hiwat), or
+        *   3. MSG_DONTWAIT is not set.
+        * If MSG_WAITALL is set but resid is larger than the receive buffer,
+        * we have to do the receive in sections, and thus risk returning
+        * a short count if a timeout or signal occurs after we start.
+        */
+       if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
+           so->so_rcv.sb_cc < uio->uio_resid) &&
+           (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+           ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+           m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
+#ifdef DIAGNOSTIC
+               if (m == 0 && so->so_rcv.sb_cc)
+                       panic("receive 1");
+#endif
                if (so->so_error) {
                if (so->so_error) {
+                       if (m)
+                               goto dontblock;
                        error = so->so_error;
                        error = so->so_error;
-                       so->so_error = 0;
-                       splx(s);
+                       if ((flags & MSG_PEEK) == 0)
+                               so->so_error = 0;
                        goto release;
                }
                if (so->so_state & SS_CANTRCVMORE) {
                        goto release;
                }
                if (so->so_state & SS_CANTRCVMORE) {
-                       splx(s);
+                       if (m)
+                               goto dontblock;
+                       else
+                               goto release;
+               }
+               for (; m; m = m->m_next)
+                       if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
+                               m = so->so_rcv.sb_mb;
+                               goto dontblock;
+                       }
+               if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+                   (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+                       error = ENOTCONN;
+                       goto release;
+               }
+               if (uio->uio_resid == 0)
+                       goto release;
+               if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+                       error = EWOULDBLOCK;
                        goto release;
                }
                        goto release;
                }
-               if ((so->so_state & SS_ISCONNECTED) == 0 &&
-                   (so->so_proto->pr_flags & PR_CONNREQUIRED))
-                       rcverr(ENOTCONN);
-               if (so->so_options & SO_NONBLOCKING)
-                       rcverr(EWOULDBLOCK);
                sbunlock(&so->so_rcv);
                sbunlock(&so->so_rcv);
-               sbwait(&so->so_rcv);
+               error = sbwait(&so->so_rcv);
                splx(s);
                splx(s);
+               if (error)
+                       return (error);
                goto restart;
        }
                goto restart;
        }
-       m = so->so_rcv.sb_mb;
-       if (m == 0)
-               panic("receive");
-       if (so->so_proto->pr_flags & PR_ADDR) {
-               if (m->m_len != sizeof (struct sockaddr))
-                       panic("soreceive addr");
-               if (asa)
-                       bcopy(mtod(m, caddr_t), (caddr_t)asa, sizeof (*asa));
-               so->so_rcv.sb_cc -= m->m_len;
-               so->so_rcv.sb_mbcnt -= MSIZE;
-               m = m_free(m);
-               if (m == 0)
-                       panic("receive 2");
-               so->so_rcv.sb_mb = m;
+dontblock:
+       if (uio->uio_procp)
+               uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
+       nextrecord = m->m_nextpkt;
+       if (pr->pr_flags & PR_ADDR) {
+#ifdef DIAGNOSTIC
+               if (m->m_type != MT_SONAME)
+                       panic("receive 1a");
+#endif
+               orig_resid = 0;
+               if (flags & MSG_PEEK) {
+                       if (paddr)
+                               *paddr = m_copy(m, 0, m->m_len);
+                       m = m->m_next;
+               } else {
+                       sbfree(&so->so_rcv, m);
+                       if (paddr) {
+                               *paddr = m;
+                               so->so_rcv.sb_mb = m->m_next;
+                               m->m_next = 0;
+                               m = so->so_rcv.sb_mb;
+                       } else {
+                               MFREE(m, so->so_rcv.sb_mb);
+                               m = so->so_rcv.sb_mb;
+                       }
+               }
        }
        }
-       so->so_state &= ~SS_RCVATMARK;
-       if (so->so_oobmark && cnt > so->so_oobmark)
-               cnt = so->so_oobmark;
-       eor = 0;
-       do {
-               len = MIN(m->m_len, cnt);
-               splx(s);
-               iomove(mtod(m, caddr_t), len, B_READ);
-               cnt -= len;
-               s = splnet();
-               if (len == m->m_len) {
-                       eor = (int)m->m_act;
+       while (m && m->m_type == MT_CONTROL && error == 0) {
+               if (flags & MSG_PEEK) {
+                       if (controlp)
+                               *controlp = m_copy(m, 0, m->m_len);
+                       m = m->m_next;
+               } else {
                        sbfree(&so->so_rcv, m);
                        sbfree(&so->so_rcv, m);
-                       so->so_rcv.sb_mb = m->m_next;
-                       MFREE(m, n);
+                       if (controlp) {
+                               if (pr->pr_domain->dom_externalize &&
+                                   mtod(m, struct cmsghdr *)->cmsg_type ==
+                                   SCM_RIGHTS)
+                                  error = (*pr->pr_domain->dom_externalize)(m);
+                               *controlp = m;
+                               so->so_rcv.sb_mb = m->m_next;
+                               m->m_next = 0;
+                               m = so->so_rcv.sb_mb;
+                       } else {
+                               MFREE(m, so->so_rcv.sb_mb);
+                               m = so->so_rcv.sb_mb;
+                       }
+               }
+               if (controlp) {
+                       orig_resid = 0;
+                       controlp = &(*controlp)->m_next;
+               }
+       }
+       if (m) {
+               if ((flags & MSG_PEEK) == 0)
+                       m->m_nextpkt = nextrecord;
+               type = m->m_type;
+               if (type == MT_OOBDATA)
+                       flags |= MSG_OOB;
+       }
+       moff = 0;
+       offset = 0;
+       while (m && uio->uio_resid > 0 && error == 0) {
+               if (m->m_type == MT_OOBDATA) {
+                       if (type != MT_OOBDATA)
+                               break;
+               } else if (type == MT_OOBDATA)
+                       break;
+#ifdef DIAGNOSTIC
+               else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+                       panic("receive 3");
+#endif
+               so->so_state &= ~SS_RCVATMARK;
+               len = uio->uio_resid;
+               if (so->so_oobmark && len > so->so_oobmark - offset)
+                       len = so->so_oobmark - offset;
+               if (len > m->m_len - moff)
+                       len = m->m_len - moff;
+               /*
+                * If mp is set, just pass back the mbufs.
+                * Otherwise copy them out via the uio, then free.
+                * Sockbuf must be consistent here (points to current mbuf,
+                * it points to next record) when we drop priority;
+                * we must note any additions to the sockbuf when we
+                * block interrupts again.
+                */
+               if (mp == 0) {
+                       splx(s);
+                       error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+                       s = splnet();
+               } else
+                       uio->uio_resid -= len;
+               if (len == m->m_len - moff) {
+                       if (m->m_flags & M_EOR)
+                               flags |= MSG_EOR;
+                       if (flags & MSG_PEEK) {
+                               m = m->m_next;
+                               moff = 0;
+                       } else {
+                               nextrecord = m->m_nextpkt;
+                               sbfree(&so->so_rcv, m);
+                               if (mp) {
+                                       *mp = m;
+                                       mp = &m->m_next;
+                                       so->so_rcv.sb_mb = m = m->m_next;
+                                       *mp = (struct mbuf *)0;
+                               } else {
+                                       MFREE(m, so->so_rcv.sb_mb);
+                                       m = so->so_rcv.sb_mb;
+                               }
+                               if (m)
+                                       m->m_nextpkt = nextrecord;
+                       }
                } else {
                } else {
-                       m->m_off += len;
-                       m->m_len -= len;
-                       so->so_rcv.sb_cc -= len;
+                       if (flags & MSG_PEEK)
+                               moff += len;
+                       else {
+                               if (mp)
+                                       *mp = m_copym(m, 0, len, M_WAIT);
+                               m->m_data += len;
+                               m->m_len -= len;
+                               so->so_rcv.sb_cc -= len;
+                       }
                }
                }
-       } while ((m = so->so_rcv.sb_mb) && cnt && !eor);
-       if ((so->so_proto->pr_flags & PR_ATOMIC) && eor == 0)
-               do {
-                       if (m == 0)
-                               panic("receive 3");
-                       sbfree(&so->so_rcv, m);
-                       eor = (int)m->m_act;
-                       so->so_rcv.sb_mb = m->m_next;
-                       MFREE(m, n);
-                       m = n;
-               } while (eor == 0);
-       if ((so->so_proto->pr_flags & PR_WANTRCVD) && so->so_pcb)
-               (*so->so_proto->pr_usrreq)(so, PRU_RCVD, 0, 0);
-       if (so->so_oobmark) {
-               so->so_oobmark -= u.u_base - base;
-               if (so->so_oobmark == 0)
-                       so->so_state |= SS_RCVATMARK;
+               if (so->so_oobmark) {
+                       if ((flags & MSG_PEEK) == 0) {
+                               so->so_oobmark -= len;
+                               if (so->so_oobmark == 0) {
+                                       so->so_state |= SS_RCVATMARK;
+                                       break;
+                               }
+                       } else {
+                               offset += len;
+                               if (offset == so->so_oobmark)
+                                       break;
+                       }
+               }
+               if (flags & MSG_EOR)
+                       break;
+               /*
+                * If the MSG_WAITALL flag is set (for non-atomic socket),
+                * we must not quit until "uio->uio_resid == 0" or an error
+                * termination.  If a signal/timeout occurs, return
+                * with a short count but without error.
+                * Keep sockbuf locked against other readers.
+                */
+               while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+                   !sosendallatonce(so) && !nextrecord) {
+                       if (so->so_error || so->so_state & SS_CANTRCVMORE)
+                               break;
+                       error = sbwait(&so->so_rcv);
+                       if (error) {
+                               sbunlock(&so->so_rcv);
+                               splx(s);
+                               return (0);
+                       }
+                       if (m = so->so_rcv.sb_mb)
+                               nextrecord = m->m_nextpkt;
+               }
+       }
+
+       if (m && pr->pr_flags & PR_ATOMIC) {
+               flags |= MSG_TRUNC;
+               if ((flags & MSG_PEEK) == 0)
+                       (void) sbdroprecord(&so->so_rcv);
+       }
+       if ((flags & MSG_PEEK) == 0) {
+               if (m == 0)
+                       so->so_rcv.sb_mb = nextrecord;
+               if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+                       (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+                           (struct mbuf *)(long)flags, (struct mbuf *)0,
+                           (struct mbuf *)0);
+       }
+       if (orig_resid == uio->uio_resid && orig_resid &&
+           (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+               sbunlock(&so->so_rcv);
+               splx(s);
+               goto restart;
        }
        }
+               
+       if (flagsp)
+               *flagsp |= flags;
 release:
        sbunlock(&so->so_rcv);
        splx(s);
        return (error);
 }
 
 release:
        sbunlock(&so->so_rcv);
        splx(s);
        return (error);
 }
 
-sohasoutofband(so)
-       struct socket *so;
+int
+soshutdown(so, how)
+       register struct socket *so;
+       register int how;
 {
 {
+       register struct protosw *pr = so->so_proto;
 
 
-       if (so->so_pgrp == 0)
-               return;
-       if (so->so_pgrp > 0)
-               gsignal(so->so_pgrp, SIGURG);
-       else {
-               struct proc *p = pfind(-so->so_pgrp);
+       how++;
+       if (how & FREAD)
+               sorflush(so);
+       if (how & FWRITE)
+               return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+                   (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+       return (0);
+}
 
 
-               if (p)
-                       psignal(p, SIGURG);
-       }
+void
+sorflush(so)
+       register struct socket *so;
+{
+       register struct sockbuf *sb = &so->so_rcv;
+       register struct protosw *pr = so->so_proto;
+       register int s;
+       struct sockbuf asb;
+
+       sb->sb_flags |= SB_NOINTR;
+       (void) sblock(sb, M_WAITOK);
+       s = splimp();
+       socantrcvmore(so);
+       sbunlock(sb);
+       asb = *sb;
+       bzero((caddr_t)sb, sizeof (*sb));
+       splx(s);
+       if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+               (*pr->pr_domain->dom_dispose)(asb.sb_mb);
+       sbrelease(&asb);
 }
 
 }
 
-/*ARGSUSED*/
-soioctl(so, cmd, cmdp)
+int
+sosetopt(so, level, optname, m0)
        register struct socket *so;
        register struct socket *so;
-       int cmd;
-       register caddr_t cmdp;
+       int level, optname;
+       struct mbuf *m0;
 {
 {
+       int error = 0;
+       register struct mbuf *m = m0;
 
 
-COUNT(SOIOCTL);
-       switch (cmd) {
+       if (level != SOL_SOCKET) {
+               if (so->so_proto && so->so_proto->pr_ctloutput)
+                       return ((*so->so_proto->pr_ctloutput)
+                                 (PRCO_SETOPT, so, level, optname, &m0));
+               error = ENOPROTOOPT;
+       } else {
+               switch (optname) {
 
 
-       case FIONBIO: {
-               int nbio;
-               if (copyin(cmdp, (caddr_t)&nbio, sizeof (nbio))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               if (nbio)
-                       so->so_options |= SO_NONBLOCKING;
-               else
-                       so->so_options &= ~SO_NONBLOCKING;
-               return;
-       }
+               case SO_LINGER:
+                       if (m == NULL || m->m_len != sizeof (struct linger)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       so->so_linger = mtod(m, struct linger *)->l_linger;
+                       /* fall thru... */
 
 
-       case FIOASYNC: {
-               int async;
-               if (copyin(cmdp, (caddr_t)&async, sizeof (async))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               if (async)
-                       ;
-               else
-                       ;
-               return;
-       }
+               case SO_DEBUG:
+               case SO_KEEPALIVE:
+               case SO_DONTROUTE:
+               case SO_USELOOPBACK:
+               case SO_BROADCAST:
+               case SO_REUSEADDR:
+               case SO_REUSEPORT:
+               case SO_OOBINLINE:
+                       if (m == NULL || m->m_len < sizeof (int)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       if (*mtod(m, int *))
+                               so->so_options |= optname;
+                       else
+                               so->so_options &= ~optname;
+                       break;
 
 
-       case SIOCSKEEP: {
-               int keep;
-               if (copyin(cmdp, (caddr_t)&keep, sizeof (keep))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               if (keep)
-                       so->so_options &= ~SO_NOKEEPALIVE;
-               else
-                       so->so_options |= SO_NOKEEPALIVE;
-               return;
-       }
+               case SO_SNDBUF:
+               case SO_RCVBUF:
+               case SO_SNDLOWAT:
+               case SO_RCVLOWAT:
+                       if (m == NULL || m->m_len < sizeof (int)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       switch (optname) {
 
 
-       case SIOCGKEEP: {
-               int keep = (so->so_options & SO_NOKEEPALIVE) == 0;
-               if (copyout((caddr_t)&keep, cmdp, sizeof (keep)))
-                       u.u_error = EFAULT;
-               return;
-       }
+                       case SO_SNDBUF:
+                       case SO_RCVBUF:
+                               if (sbreserve(optname == SO_SNDBUF ?
+                                   &so->so_snd : &so->so_rcv,
+                                   (u_long) *mtod(m, int *)) == 0) {
+                                       error = ENOBUFS;
+                                       goto bad;
+                               }
+                               break;
 
 
-       case SIOCSLINGER: {
-               int linger;
-               if (copyin(cmdp, (caddr_t)&linger, sizeof (linger))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               so->so_linger = linger;
-               if (so->so_linger)
-                       so->so_options &= ~SO_DONTLINGER;
-               else
-                       so->so_options |= SO_DONTLINGER;
-               return;
-       }
+                       case SO_SNDLOWAT:
+                               so->so_snd.sb_lowat = *mtod(m, int *);
+                               break;
+                       case SO_RCVLOWAT:
+                               so->so_rcv.sb_lowat = *mtod(m, int *);
+                               break;
+                       }
+                       break;
 
 
-       case SIOCGLINGER: {
-               int linger = so->so_linger;
-               if (copyout((caddr_t)&linger, cmdp, sizeof (linger))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-       }
-       case SIOCSPGRP: {
-               int pgrp;
-               if (copyin(cmdp, (caddr_t)&pgrp, sizeof (pgrp))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               so->so_pgrp = pgrp;
-               return;
-       }
+               case SO_SNDTIMEO:
+               case SO_RCVTIMEO:
+                   {
+                       struct timeval *tv;
+                       short val;
 
 
-       case SIOCGPGRP: {
-               int pgrp = so->so_pgrp;
-               if (copyout((caddr_t)&pgrp, cmdp, sizeof (pgrp))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-       }
+                       if (m == NULL || m->m_len < sizeof (*tv)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       tv = mtod(m, struct timeval *);
+                       if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
+                               error = EDOM;
+                               goto bad;
+                       }
+                       val = tv->tv_sec * hz + tv->tv_usec / tick;
 
 
-       case SIOCDONE: {
-               int flags;
-               if (copyin(cmdp, (caddr_t)&flags, sizeof (flags))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               flags++;
-               if (flags & FREAD) {
-                       int s = splimp();
-                       socantrcvmore(so);
-                       sbflush(&so->so_rcv);
-                       splx(s);
-               }
-               if (flags & FWRITE)
-                       u.u_error = (*so->so_proto->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 0);
-               return;
-       }
+                       switch (optname) {
 
 
-       case SIOCSENDOOB: {
-               char oob;
-               struct mbuf *m;
-               if (copyin(cmdp, (caddr_t)&oob, sizeof (oob))) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               m = m_get(M_DONTWAIT);
-               if (m == 0) {
-                       u.u_error = ENOBUFS;
-                       return;
-               }
-               m->m_off = MMINOFF;
-               m->m_len = 1;
-               *mtod(m, caddr_t) = oob;
-               (*so->so_proto->pr_usrreq)(so, PRU_SENDOOB, m, 0);
-               return;
-       }
+                       case SO_SNDTIMEO:
+                               so->so_snd.sb_timeo = val;
+                               break;
+                       case SO_RCVTIMEO:
+                               so->so_rcv.sb_timeo = val;
+                               break;
+                       }
+                       break;
+                   }
 
 
-       case SIOCRCVOOB: {
-               struct mbuf *m = m_get(M_DONTWAIT);
-               if (m == 0) {
-                       u.u_error = ENOBUFS;
-                       return;
+               default:
+                       error = ENOPROTOOPT;
+                       break;
                }
                }
-               m->m_off = MMINOFF; *mtod(m, caddr_t) = 0;
-               (*so->so_proto->pr_usrreq)(so, PRU_RCVOOB, m, 0);
-               if (copyout(mtod(m, caddr_t), cmdp, sizeof (char))) {
-                       u.u_error = EFAULT;
-                       return;
+               if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
+                       (void) ((*so->so_proto->pr_ctloutput)
+                                 (PRCO_SETOPT, so, level, optname, &m0));
+                       m = NULL;       /* freed by protocol */
                }
                }
-               m_free(m);
-               return;
        }
        }
+bad:
+       if (m)
+               (void) m_free(m);
+       return (error);
+}
+
+int
+sogetopt(so, level, optname, mp)
+       register struct socket *so;
+       int level, optname;
+       struct mbuf **mp;
+{
+       register struct mbuf *m;
+
+       if (level != SOL_SOCKET) {
+               if (so->so_proto && so->so_proto->pr_ctloutput) {
+                       return ((*so->so_proto->pr_ctloutput)
+                                 (PRCO_GETOPT, so, level, optname, mp));
+               } else
+                       return (ENOPROTOOPT);
+       } else {
+               m = m_get(M_WAIT, MT_SOOPTS);
+               m->m_len = sizeof (int);
+
+               switch (optname) {
+
+               case SO_LINGER:
+                       m->m_len = sizeof (struct linger);
+                       mtod(m, struct linger *)->l_onoff =
+                               so->so_options & SO_LINGER;
+                       mtod(m, struct linger *)->l_linger = so->so_linger;
+                       break;
+
+               case SO_USELOOPBACK:
+               case SO_DONTROUTE:
+               case SO_DEBUG:
+               case SO_KEEPALIVE:
+               case SO_REUSEADDR:
+               case SO_REUSEPORT:
+               case SO_BROADCAST:
+               case SO_OOBINLINE:
+                       *mtod(m, int *) = so->so_options & optname;
+                       break;
+
+               case SO_TYPE:
+                       *mtod(m, int *) = so->so_type;
+                       break;
+
+               case SO_ERROR:
+                       *mtod(m, int *) = so->so_error;
+                       so->so_error = 0;
+                       break;
+
+               case SO_SNDBUF:
+                       *mtod(m, int *) = so->so_snd.sb_hiwat;
+                       break;
+
+               case SO_RCVBUF:
+                       *mtod(m, int *) = so->so_rcv.sb_hiwat;
+                       break;
+
+               case SO_SNDLOWAT:
+                       *mtod(m, int *) = so->so_snd.sb_lowat;
+                       break;
 
 
-       case SIOCATMARK: {
-               int atmark = (so->so_state&SS_RCVATMARK) != 0;
-               if (copyout((caddr_t)&atmark, cmdp, sizeof (atmark))) {
-                       u.u_error = EFAULT;
-                       return;
+               case SO_RCVLOWAT:
+                       *mtod(m, int *) = so->so_rcv.sb_lowat;
+                       break;
+
+               case SO_SNDTIMEO:
+               case SO_RCVTIMEO:
+                   {
+                       int val = (optname == SO_SNDTIMEO ?
+                            so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
+
+                       m->m_len = sizeof(struct timeval);
+                       mtod(m, struct timeval *)->tv_sec = val / hz;
+                       mtod(m, struct timeval *)->tv_usec =
+                           (val % hz) * tick;
+                       break;
+                   }
+
+               default:
+                       (void)m_free(m);
+                       return (ENOPROTOOPT);
                }
                }
-               return;
-       }
-       /* type/protocol specific ioctls */
+               *mp = m;
+               return (0);
        }
        }
-       u.u_error = EOPNOTSUPP;
+}
+
+void
+sohasoutofband(so)
+       register struct socket *so;
+{
+       struct proc *p;
+
+       if (so->so_pgid < 0)
+               gsignal(-so->so_pgid, SIGURG);
+       else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+               psignal(p, SIGURG);
+       selwakeup(&so->so_rcv.sb_sel);
 }
 }