add Berkeley header
[unix-history] / usr / src / sys / kern / uipc_socket.c
index a8f121c..33b6274 100644 (file)
@@ -1,21 +1,27 @@
-/*     uipc_socket.c   4.56    82/10/17        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/proc.h"
-#include "../h/file.h"
-#include "../h/inode.h"
-#include "../h/buf.h"
-#include "../h/mbuf.h"
-#include "../h/protosw.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/stat.h"
-#include "../h/ioctl.h"
-#include "../h/uio.h"
-#include "../net/route.h"
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and that due credit is given
+ * to the University of California at Berkeley. The name of the University
+ * may not be used to endorse or promote products derived from this
+ * software without specific prior written permission. This software
+ * is provided ``as is'' without express or implied warranty.
+ *
+ *     @(#)uipc_socket.c       7.7 (Berkeley) %G%
+ */
+
+#include "param.h"
+#include "dir.h"
+#include "user.h"
+#include "proc.h"
+#include "file.h"
+#include "mbuf.h"
+#include "domain.h"
+#include "protosw.h"
+#include "socket.h"
+#include "socketvar.h"
 
 /*
  * Socket operation routines.
 
 /*
  * Socket operation routines.
  * sys_socket.c or from a system process, and
  * implement the semantics of socket operations by
  * switching out to the protocol specific routines.
  * sys_socket.c or from a system process, and
  * implement the semantics of socket operations by
  * switching out to the protocol specific routines.
+ *
+ * TODO:
+ *     test socketpair
+ *     clean up async
+ *     out-of-band is a kludge
  */
  */
-
 /*ARGSUSED*/
 /*ARGSUSED*/
-socreate(dom, aso, type, proto, opt)
+socreate(dom, aso, type, proto)
        struct socket **aso;
        struct socket **aso;
-       int type, proto;
-       struct socketopt *opt;
+       register int type;
+       int proto;
 {
        register struct protosw *prp;
        register struct socket *so;
 {
        register struct protosw *prp;
        register struct socket *so;
-       struct mbuf *m;
-       int pf, error;
+       register struct mbuf *m;
+       register int error;
 
 
-       pf = dom ? PF_UNIX : PF_INET;           /* should be u.u_protof */
        if (proto)
        if (proto)
-               prp = pffindproto(pf, proto);
+               prp = pffindproto(dom, proto, type);
        else
        else
-               prp = pffindtype(pf, type);
+               prp = pffindtype(dom, type);
        if (prp == 0)
                return (EPROTONOSUPPORT);
        if (prp->pr_type != type)
                return (EPROTOTYPE);
        if (prp == 0)
                return (EPROTONOSUPPORT);
        if (prp->pr_type != type)
                return (EPROTOTYPE);
-       m = m_getclr(M_WAIT);
-       if (m == 0)
-               return (ENOBUFS);
+       m = m_getclr(M_WAIT, MT_SOCKET);
        so = mtod(m, struct socket *);
        so->so_options = 0;
        so->so_state = 0;
        so = mtod(m, struct socket *);
        so->so_options = 0;
        so->so_state = 0;
+       so->so_type = type;
        if (u.u_uid == 0)
                so->so_state = SS_PRIV;
        so->so_proto = prp;
        if (u.u_uid == 0)
                so->so_state = SS_PRIV;
        so->so_proto = prp;
-       error = (*prp->pr_usrreq)(so, PRU_ATTACH,
-           (struct mbuf *)0, (struct mbuf *)0, (struct socketopt *)0);
+       error =
+           (*prp->pr_usrreq)(so, PRU_ATTACH,
+               (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
        if (error) {
                so->so_state |= SS_NOFDREF;
                sofree(so);
        if (error) {
                so->so_state |= SS_NOFDREF;
                sofree(so);
@@ -65,30 +74,29 @@ socreate(dom, aso, type, proto, opt)
        return (0);
 }
 
        return (0);
 }
 
-sobind(so, nam, opt)
+sobind(so, nam)
        struct socket *so;
        struct mbuf *nam;
        struct socket *so;
        struct mbuf *nam;
-       struct socketopt *opt;
 {
        int s = splnet();
        int error;
 
        error =
            (*so->so_proto->pr_usrreq)(so, PRU_BIND,
 {
        int s = splnet();
        int error;
 
        error =
            (*so->so_proto->pr_usrreq)(so, PRU_BIND,
-               (struct mbuf *)0, nam, opt);
+               (struct mbuf *)0, nam, (struct mbuf *)0);
        splx(s);
        return (error);
 }
 
 solisten(so, backlog)
        splx(s);
        return (error);
 }
 
 solisten(so, backlog)
-       struct socket *so;
+       register struct socket *so;
        int backlog;
 {
        int backlog;
 {
-       int s = splnet();
-       int error;
+       int s = splnet(), error;
 
 
-       error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
-           (struct mbuf *)0, (struct mbuf *)0, (struct socketopt *)0);
+       error =
+           (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+               (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
        if (error) {
                splx(s);
                return (error);
        if (error) {
                splx(s);
                return (error);
@@ -100,24 +108,24 @@ solisten(so, backlog)
        }
        if (backlog < 0)
                backlog = 0;
        }
        if (backlog < 0)
                backlog = 0;
-       so->so_qlimit = backlog < 5 ? backlog : 5;
-       so->so_options |= SO_NEWFDONCONN;
+       so->so_qlimit = MIN(backlog, SOMAXCONN);
+       splx(s);
        return (0);
 }
 
 sofree(so)
        return (0);
 }
 
 sofree(so)
-       struct socket *so;
+       register struct socket *so;
 {
 
 {
 
+       if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+               return;
        if (so->so_head) {
                if (!soqremque(so, 0) && !soqremque(so, 1))
                        panic("sofree dq");
                so->so_head = 0;
        }
        if (so->so_head) {
                if (!soqremque(so, 0) && !soqremque(so, 1))
                        panic("sofree dq");
                so->so_head = 0;
        }
-       if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
-               return;
        sbrelease(&so->so_snd);
        sbrelease(&so->so_snd);
-       sbrelease(&so->so_rcv);
+       sorflush(so);
        (void) m_free(dtom(so));
 }
 
        (void) m_free(dtom(so));
 }
 
@@ -126,106 +134,121 @@ sofree(so)
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
  */
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
  */
-soclose(so, exiting)
+soclose(so)
        register struct socket *so;
        register struct socket *so;
-       int exiting;
 {
        int s = splnet();               /* conservative */
 {
        int s = splnet();               /* conservative */
+       int error;
 
        if (so->so_options & SO_ACCEPTCONN) {
                while (so->so_q0 != so)
 
        if (so->so_options & SO_ACCEPTCONN) {
                while (so->so_q0 != so)
-                       soclose(so->so_q0, 1);
+                       (void) soabort(so->so_q0);
                while (so->so_q != so)
                while (so->so_q != so)
-                       soclose(so->so_q, 1);
+                       (void) soabort(so->so_q);
        }
        if (so->so_pcb == 0)
                goto discard;
        }
        if (so->so_pcb == 0)
                goto discard;
-       if (exiting)
-               so->so_options |= SO_KEEPALIVE;
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
-                       u.u_error = sodisconnect(so, (struct sockaddr *)0);
-                       if (u.u_error) {
-                               if (exiting)
-                                       goto drop;
-                               splx(s);
-                               return;
-                       }
+                       error = sodisconnect(so);
+                       if (error)
+                               goto drop;
                }
                }
-               if ((so->so_options & SO_DONTLINGER) == 0) {
+               if (so->so_options & SO_LINGER) {
                        if ((so->so_state & SS_ISDISCONNECTING) &&
                        if ((so->so_state & SS_ISDISCONNECTING) &&
-                           (so->so_state & SS_NBIO) &&
-                           exiting == 0) {
-                               u.u_error = EINPROGRESS;
-                               splx(s);
-                               return;
-                       }
-                       /* should use tsleep here, for at most linger */
+                           (so->so_state & SS_NBIO))
+                               goto drop;
                        while (so->so_state & SS_ISCONNECTED)
                                sleep((caddr_t)&so->so_timeo, PZERO+1);
                }
        }
 drop:
        if (so->so_pcb) {
                        while (so->so_state & SS_ISCONNECTED)
                                sleep((caddr_t)&so->so_timeo, PZERO+1);
                }
        }
 drop:
        if (so->so_pcb) {
-               u.u_error = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
-                   (struct mbuf *)0, (struct mbuf *)0, (struct socketopt *)0);
-               if (exiting == 0 && u.u_error) {
-                       splx(s);
-                       return;
-               }
+               int error2 =
+                   (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+                       (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+               if (error == 0)
+                       error = error2;
        }
 discard:
        }
 discard:
+       if (so->so_state & SS_NOFDREF)
+               panic("soclose: NOFDREF");
        so->so_state |= SS_NOFDREF;
        sofree(so);
        splx(s);
        so->so_state |= SS_NOFDREF;
        sofree(so);
        splx(s);
+       return (error);
 }
 
 }
 
-/*ARGSUSED*/
-sostat(so, sb)
+/*
+ * Must be called at splnet...
+ */
+soabort(so)
        struct socket *so;
        struct socket *so;
-       struct stat *sb;
 {
 
 {
 
-       bzero((caddr_t)sb, sizeof (*sb));               /* XXX */
-       return (0);                                     /* XXX */
+       return (
+           (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+               (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
 }
 
 }
 
-soaccept(so, nam, opt)
-       struct socket *so;
+soaccept(so, nam)
+       register struct socket *so;
        struct mbuf *nam;
        struct mbuf *nam;
-       struct socketopt *opt;
 {
        int s = splnet();
        int error;
 
 {
        int s = splnet();
        int error;
 
+       if ((so->so_state & SS_NOFDREF) == 0)
+               panic("soaccept: !NOFDREF");
+       so->so_state &= ~SS_NOFDREF;
        error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
        error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
-           (struct mbuf *)0, nam, opt);
+           (struct mbuf *)0, nam, (struct mbuf *)0);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-soconnect(so, nam, opt)
-       struct socket *so;
+soconnect(so, nam)
+       register struct socket *so;
        struct mbuf *nam;
        struct mbuf *nam;
-       struct socketopt *opt;
 {
 {
-       int s = splnet();
+       int s;
        int error;
 
        int error;
 
-       if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) {
+       if (so->so_options & SO_ACCEPTCONN)
+               return (EOPNOTSUPP);
+       s = splnet();
+       /*
+        * If protocol is connection-based, can only connect once.
+        * Otherwise, if connected, try to disconnect first.
+        * This allows user to disconnect by connecting to, e.g.,
+        * a null address.
+        */
+       if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+           ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+           (error = sodisconnect(so))))
                error = EISCONN;
                error = EISCONN;
-               goto bad;
-       }
-       error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
-           (struct mbuf *)0, nam, opt);
-bad:
+       else
+               error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+                   (struct mbuf *)0, nam, (struct mbuf *)0);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-sodisconnect(so, nam)
-       struct socket *so;
-       struct mbuf *nam;
+soconnect2(so1, so2)
+       register struct socket *so1;
+       struct socket *so2;
+{
+       int s = splnet();
+       int error;
+
+       error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+           (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
+       splx(s);
+       return (error);
+}
+
+sodisconnect(so)
+       register struct socket *so;
 {
        int s = splnet();
        int error;
 {
        int s = splnet();
        int error;
@@ -239,7 +262,7 @@ sodisconnect(so, nam)
                goto bad;
        }
        error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
                goto bad;
        }
        error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
-           (struct mbuf *)0, nam, (struct socketopt *)0);
+           (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
 bad:
        splx(s);
        return (error);
 bad:
        splx(s);
        return (error);
@@ -252,168 +275,193 @@ bad:
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
  */
  */
-sosend(so, nam, uio, flags)
+sosend(so, nam, uio, flags, rights)
        register struct socket *so;
        struct mbuf *nam;
        register struct socket *so;
        struct mbuf *nam;
-       struct uio *uio;
+       register struct uio *uio;
        int flags;
        int flags;
+       struct mbuf *rights;
 {
        struct mbuf *top = 0;
 {
        struct mbuf *top = 0;
-       register struct mbuf *m, **mp = &top;
-       register u_int len;
-       int error = 0, space, s;
+       register struct mbuf *m, **mp;
+       register int space;
+       int len, rlen = 0, error = 0, s, dontroute, first = 1;
 
        if (sosendallatonce(so) && uio->uio_resid > so->so_snd.sb_hiwat)
                return (EMSGSIZE);
 
        if (sosendallatonce(so) && uio->uio_resid > so->so_snd.sb_hiwat)
                return (EMSGSIZE);
-restart:
-       sblock(&so->so_snd);
+       dontroute =
+           (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+           (so->so_proto->pr_flags & PR_ATOMIC);
+       u.u_ru.ru_msgsnd++;
+       if (rights)
+               rlen = rights->m_len;
 #define        snderr(errno)   { error = errno; splx(s); goto release; }
 
 #define        snderr(errno)   { error = errno; splx(s); goto release; }
 
-       u.u_ru.ru_msgsnd++;
-again:
-       s = splnet();
-       if (so->so_state & SS_CANTSENDMORE) {
-               psignal(u.u_procp, SIGPIPE);
-               snderr(EPIPE);
-       }
-       if (so->so_error) {
-               error = so->so_error;
-               so->so_error = 0;                               /* ??? */
-               splx(s);
-               goto release;
-       }
-       if ((so->so_state & SS_ISCONNECTED) == 0) {
-               if (so->so_proto->pr_flags & PR_CONNREQUIRED)
-                       snderr(ENOTCONN);
-               if (nam == 0)
-                       snderr(EDESTADDRREQ);
-       }
-       if (top) {
-               error = (*so->so_proto->pr_usrreq)(so,
-                   (flags & SOF_OOB) ? PRU_SENDOOB : PRU_SEND,
-                   top, (caddr_t)nam, (struct socketopt *)0);
-               top = 0;
-               if (error) {
+restart:
+       sblock(&so->so_snd);
+       do {
+               s = splnet();
+               if (so->so_state & SS_CANTSENDMORE)
+                       snderr(EPIPE);
+               if (so->so_error) {
+                       error = so->so_error;
+                       so->so_error = 0;                       /* ??? */
                        splx(s);
                        goto release;
                }
                        splx(s);
                        goto release;
                }
-               mp = &top;
-       }
-       if (uio->uio_resid == 0) {
-               splx(s);
-               goto release;
-       }
-       if (flags & SOF_OOB)
-               space = 1024;
-       else {
-               space = sbspace(&so->so_snd);
-               if (space <= 0 ||
-                   sosendallatonce(so) && space < uio->uio_resid) {
-                       if (so->so_state & SS_NBIO)
-                               snderr(EWOULDBLOCK);
-                       sbunlock(&so->so_snd);
-                       sbwait(&so->so_snd);
-                       splx(s);
-                       goto restart;
-               }
-       }
-       splx(s);
-       while (uio->uio_resid > 0 && space > 0) {
-               register struct iovec *iov = uio->uio_iov;
-
-               if (iov->iov_len == 0) {
-                       uio->uio_iov++;
-                       uio->uio_iovcnt--;
-                       if (uio->uio_iovcnt < 0)
-                               panic("sosend");
-                       continue;
+               if ((so->so_state & SS_ISCONNECTED) == 0) {
+                       if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+                               snderr(ENOTCONN);
+                       if (nam == 0)
+                               snderr(EDESTADDRREQ);
                }
                }
-               MGET(m, 1);
-               if (m == NULL) {
-                       error = ENOBUFS;                        /* SIGPIPE? */
-                       goto release;
+               if (flags & MSG_OOB)
+                       space = 1024;
+               else {
+                       space = sbspace(&so->so_snd);
+                       if (space <= rlen ||
+                          (sosendallatonce(so) &&
+                               space < uio->uio_resid + rlen) ||
+                          (uio->uio_resid >= CLBYTES && space < CLBYTES &&
+                          so->so_snd.sb_cc >= CLBYTES &&
+                          (so->so_state & SS_NBIO) == 0)) {
+                               if (so->so_state & SS_NBIO) {
+                                       if (first)
+                                               error = EWOULDBLOCK;
+                                       splx(s);
+                                       goto release;
+                               }
+                               sbunlock(&so->so_snd);
+                               sbwait(&so->so_snd);
+                               splx(s);
+                               goto restart;
+                       }
                }
                }
-               if (iov->iov_len >= CLBYTES && space >= CLBYTES) {
-                       register struct mbuf *p;
-                       MCLGET(p, 1);
-                       if (p == 0)
-                               goto nopages;
-                       m->m_off = (int)p - (int)m;
-                       len = CLBYTES;
-               } else {
+               splx(s);
+               mp = &top;
+               space -= rlen;
+               while (space > 0) {
+                       MGET(m, M_WAIT, MT_DATA);
+                       if (uio->uio_resid >= CLBYTES / 2 && space >= CLBYTES) {
+                               MCLGET(m);
+                               if (m->m_len != CLBYTES)
+                                       goto nopages;
+                               len = MIN(CLBYTES, uio->uio_resid);
+                               space -= CLBYTES;
+                       } else {
 nopages:
 nopages:
-                       len = MIN(MLEN, iov->iov_len);
+                               len = MIN(MIN(MLEN, uio->uio_resid), space);
+                               space -= len;
+                       }
+                       error = uiomove(mtod(m, caddr_t), len, UIO_WRITE, uio);
+                       m->m_len = len;
+                       *mp = m;
+                       if (error)
+                               goto release;
+                       mp = &m->m_next;
+                       if (uio->uio_resid <= 0)
+                               break;
                }
                }
-               uiomove(mtod(m, caddr_t), len, UIO_WRITE, uio);
-               m->m_len = len;
-               *mp = m;
-               mp = &m->m_next;
-               if (flags & SOF_OOB)
-                       space -= len;
-               else
-                       space = sbspace(&so->so_snd);
-       }
-       goto again;
+               if (dontroute)
+                       so->so_options |= SO_DONTROUTE;
+               s = splnet();                                   /* XXX */
+               error = (*so->so_proto->pr_usrreq)(so,
+                   (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+                   top, (caddr_t)nam, rights);
+               splx(s);
+               if (dontroute)
+                       so->so_options &= ~SO_DONTROUTE;
+               rights = 0;
+               rlen = 0;
+               top = 0;
+               first = 0;
+               if (error)
+                       break;
+       } while (uio->uio_resid);
 
 release:
        sbunlock(&so->so_snd);
        if (top)
                m_freem(top);
 
 release:
        sbunlock(&so->so_snd);
        if (top)
                m_freem(top);
+       if (error == EPIPE)
+               psignal(u.u_procp, SIGPIPE);
        return (error);
 }
 
        return (error);
 }
 
-soreceive(so, aname, uio, flags)
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*.  In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf containing access rights if supported
+ * by the protocol, and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ */
+soreceive(so, aname, uio, flags, rightsp)
        register struct socket *so;
        struct mbuf **aname;
        register struct socket *so;
        struct mbuf **aname;
-       struct uio *uio;
+       register struct uio *uio;
        int flags;
        int flags;
+       struct mbuf **rightsp;
 {
 {
-       register struct mbuf *m, *n;
-       u_int len;
-       int eor, s, error = 0, moff, tomark;
-
-       if (flags & SOF_OOB) {
-               m = m_get(M_WAIT);
-               error = (*so->so_proto->pr_usrreq)(so, PRU_RCVOOB,
-                   m, (struct mbuf *)0, (struct socketopt *)0);
+       register struct mbuf *m;
+       register int len, error = 0, s, offset;
+       struct protosw *pr = so->so_proto;
+       struct mbuf *nextrecord;
+       int moff;
+
+       if (rightsp)
+               *rightsp = 0;
+       if (aname)
+               *aname = 0;
+       if (flags & MSG_OOB) {
+               m = m_get(M_WAIT, MT_DATA);
+               error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
+                   m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
                if (error)
                if (error)
-                       return;
-               len = uio->uio_resid;
+                       goto bad;
                do {
                do {
+                       len = uio->uio_resid;
                        if (len > m->m_len)
                                len = m->m_len;
                        error =
                            uiomove(mtod(m, caddr_t), (int)len, UIO_READ, uio);
                        m = m_free(m);
                } while (uio->uio_resid && error == 0 && m);
                        if (len > m->m_len)
                                len = m->m_len;
                        error =
                            uiomove(mtod(m, caddr_t), (int)len, UIO_READ, uio);
                        m = m_free(m);
                } while (uio->uio_resid && error == 0 && m);
+bad:
                if (m)
                if (m)
-                       (void) m_freem(m);
+                       m_freem(m);
                return (error);
        }
 
 restart:
        sblock(&so->so_rcv);
                return (error);
        }
 
 restart:
        sblock(&so->so_rcv);
-SBCHECK(&so->so_rcv, "soreceive restart");
        s = splnet();
 
        s = splnet();
 
-#define        rcverr(errno)   { error = errno; splx(s); goto release; }
        if (so->so_rcv.sb_cc == 0) {
                if (so->so_error) {
                        error = so->so_error;
                        so->so_error = 0;
        if (so->so_rcv.sb_cc == 0) {
                if (so->so_error) {
                        error = so->so_error;
                        so->so_error = 0;
-                       splx(s);
                        goto release;
                }
                        goto release;
                }
-               if (so->so_state & SS_CANTRCVMORE) {
-                       splx(s);
+               if (so->so_state & SS_CANTRCVMORE)
                        goto release;
                        goto release;
-               }
                if ((so->so_state & SS_ISCONNECTED) == 0 &&
                if ((so->so_state & SS_ISCONNECTED) == 0 &&
-                   (so->so_proto->pr_flags & PR_CONNREQUIRED))
-                       rcverr(ENOTCONN);
-               if (so->so_state & SS_NBIO)
-                       rcverr(EWOULDBLOCK);
+                   (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+                       error = ENOTCONN;
+                       goto release;
+               }
+               if (uio->uio_resid == 0)
+                       goto release;
+               if (so->so_state & SS_NBIO) {
+                       error = EWOULDBLOCK;
+                       goto release;
+               }
                sbunlock(&so->so_rcv);
                sbwait(&so->so_rcv);
                splx(s);
                sbunlock(&so->so_rcv);
                sbwait(&so->so_rcv);
                splx(s);
@@ -422,60 +470,81 @@ SBCHECK(&so->so_rcv, "soreceive restart");
        u.u_ru.ru_msgrcv++;
        m = so->so_rcv.sb_mb;
        if (m == 0)
        u.u_ru.ru_msgrcv++;
        m = so->so_rcv.sb_mb;
        if (m == 0)
-               panic("receive");
-SBCHECK(&so->so_snd, "soreceive havecc");
-       if (so->so_proto->pr_flags & PR_ADDR) {
-               if ((flags & SOF_PREVIEW) == 0) {
-                       so->so_rcv.sb_cc -= m->m_len;
-                       so->so_rcv.sb_mbcnt -= MSIZE;
-               }
-               if (aname) {
-                       if (flags & SOF_PREVIEW)
+               panic("receive 1");
+       nextrecord = m->m_act;
+       if (pr->pr_flags & PR_ADDR) {
+               if (m->m_type != MT_SONAME)
+                       panic("receive 1a");
+               if (flags & MSG_PEEK) {
+                       if (aname)
                                *aname = m_copy(m, 0, m->m_len);
                                *aname = m_copy(m, 0, m->m_len);
-                       else
-                               *aname = m;
                        m = m->m_next;
                        m = m->m_next;
-                       (*aname)->m_next = 0;
-               } else
-                       if (flags & SOF_PREVIEW)
+               } else {
+                       sbfree(&so->so_rcv, m);
+                       if (aname) {
+                               *aname = m;
                                m = m->m_next;
                                m = m->m_next;
-                       else
-                               m = m_free(m);
-               if (m == 0)
+                               (*aname)->m_next = 0;
+                               so->so_rcv.sb_mb = m;
+                       } else {
+                               MFREE(m, so->so_rcv.sb_mb);
+                               m = so->so_rcv.sb_mb;
+                       }
+                       if (m)
+                               m->m_act = nextrecord;
+               }
+       }
+       if (m && m->m_type == MT_RIGHTS) {
+               if ((pr->pr_flags & PR_RIGHTS) == 0)
                        panic("receive 2");
                        panic("receive 2");
-               if ((flags & SOF_PREVIEW) == 0)
-                       so->so_rcv.sb_mb = m;
-SBCHECK(&so->so_snd, "soreceive afteraddr");
+               if (flags & MSG_PEEK) {
+                       if (rightsp)
+                               *rightsp = m_copy(m, 0, m->m_len);
+                       m = m->m_next;
+               } else {
+                       sbfree(&so->so_rcv, m);
+                       if (rightsp) {
+                               *rightsp = m;
+                               so->so_rcv.sb_mb = m->m_next;
+                               m->m_next = 0;
+                               m = so->so_rcv.sb_mb;
+                       } else {
+                               MFREE(m, so->so_rcv.sb_mb);
+                               m = so->so_rcv.sb_mb;
+                       }
+                       if (m)
+                               m->m_act = nextrecord;
+               }
        }
        }
-       eor = 0;
        moff = 0;
        moff = 0;
-       tomark = so->so_oobmark;
-       do {
-               if (uio->uio_resid <= 0)
-                       break;
+       offset = 0;
+       while (m && uio->uio_resid > 0 && error == 0) {
+               if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+                       panic("receive 3");
                len = uio->uio_resid;
                so->so_state &= ~SS_RCVATMARK;
                len = uio->uio_resid;
                so->so_state &= ~SS_RCVATMARK;
-               if (tomark && len > tomark)
-                       len = tomark;
-               if (moff+len > m->m_len - moff)
+               if (so->so_oobmark && len > so->so_oobmark - offset)
+                       len = so->so_oobmark - offset;
+               if (len > m->m_len - moff)
                        len = m->m_len - moff;
                splx(s);
                error =
                    uiomove(mtod(m, caddr_t) + moff, (int)len, UIO_READ, uio);
                s = splnet();
                        len = m->m_len - moff;
                splx(s);
                error =
                    uiomove(mtod(m, caddr_t) + moff, (int)len, UIO_READ, uio);
                s = splnet();
-               if (len == m->m_len) {
-                       eor = (int)m->m_act;
-                       if (flags & SOF_PREVIEW)
+               if (len == m->m_len - moff) {
+                       if (flags & MSG_PEEK) {
                                m = m->m_next;
                                m = m->m_next;
-                       else {
+                               moff = 0;
+                       } else {
+                               nextrecord = m->m_act;
                                sbfree(&so->so_rcv, m);
                                sbfree(&so->so_rcv, m);
-                               MFREE(m, n);
-                               m = n;
-                               so->so_rcv.sb_mb = m;
+                               MFREE(m, so->so_rcv.sb_mb);
+                               m = so->so_rcv.sb_mb;
+                               if (m)
+                                       m->m_act = nextrecord;
                        }
                        }
-                       moff = 0;
                } else {
                } else {
-                       if (flags & SOF_PREVIEW)
+                       if (flags & MSG_PEEK)
                                moff += len;
                        else {
                                m->m_off += len;
                                moff += len;
                        else {
                                m->m_off += len;
@@ -483,168 +552,248 @@ SBCHECK(&so->so_snd, "soreceive afteraddr");
                                so->so_rcv.sb_cc -= len;
                        }
                }
                                so->so_rcv.sb_cc -= len;
                        }
                }
-               if ((flags & SOF_PREVIEW) == 0 && so->so_oobmark) {
-                       so->so_oobmark -= len;
-                       if (so->so_oobmark == 0) {
-                               so->so_state |= SS_RCVATMARK;
-                               break;
-                       }
-               }
-               if (tomark) {
-                       tomark -= len;
-                       if (tomark == 0)
-                               break;
+               if (so->so_oobmark) {
+                       if ((flags & MSG_PEEK) == 0) {
+                               so->so_oobmark -= len;
+                               if (so->so_oobmark == 0) {
+                                       so->so_state |= SS_RCVATMARK;
+                                       break;
+                               }
+                       } else
+                               offset += len;
                }
                }
-SBCHECK(&so->so_snd, "soreceive rcvloop");
-       } while (m && error == 0 && !eor);
-       if (flags & SOF_PREVIEW)
-               goto release;
-       if ((so->so_proto->pr_flags & PR_ATOMIC) && eor == 0)
-               do {
-                       if (m == 0)
-                               panic("receive 3");
-                       sbfree(&so->so_rcv, m);
-                       eor = (int)m->m_act;
-                       so->so_rcv.sb_mb = m->m_next;
-                       MFREE(m, n);
-                       m = n;
-SBCHECK(&so->so_snd, "soreceive atomicloop");
-               } while (eor == 0);
-       if ((so->so_proto->pr_flags & PR_WANTRCVD) && so->so_pcb)
-               (*so->so_proto->pr_usrreq)(so, PRU_RCVD,
-                   (struct mbuf *)0, (struct mbuf *)0, (struct socketopt *)0);
+       }
+       if ((flags & MSG_PEEK) == 0) {
+               if (m == 0)
+                       so->so_rcv.sb_mb = nextrecord;
+               else if (pr->pr_flags & PR_ATOMIC)
+                       (void) sbdroprecord(&so->so_rcv);
+               if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+                       (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+                           (struct mbuf *)0, (struct mbuf *)0);
+               if (error == 0 && rightsp && *rightsp &&
+                   pr->pr_domain->dom_externalize)
+                       error = (*pr->pr_domain->dom_externalize)(*rightsp);
+       }
 release:
        sbunlock(&so->so_rcv);
        splx(s);
        return (error);
 }
 
 release:
        sbunlock(&so->so_rcv);
        splx(s);
        return (error);
 }
 
-sohasoutofband(so)
-       struct socket *so;
+soshutdown(so, how)
+       register struct socket *so;
+       register int how;
 {
 {
+       register struct protosw *pr = so->so_proto;
+
+       how++;
+       if (how & FREAD)
+               sorflush(so);
+       if (how & FWRITE)
+               return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+                   (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+       return (0);
+}
 
 
-       if (so->so_pgrp == 0)
-               return;
-       if (so->so_pgrp > 0)
-               gsignal(so->so_pgrp, SIGURG);
-       else {
-               struct proc *p = pfind(-so->so_pgrp);
-
-               if (p)
-                       psignal(p, SIGURG);
-       }
+sorflush(so)
+       register struct socket *so;
+{
+       register struct sockbuf *sb = &so->so_rcv;
+       register struct protosw *pr = so->so_proto;
+       register int s;
+       struct sockbuf asb;
+
+       sblock(sb);
+       s = splimp();
+       socantrcvmore(so);
+       sbunlock(sb);
+       asb = *sb;
+       bzero((caddr_t)sb, sizeof (*sb));
+       splx(s);
+       if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+               (*pr->pr_domain->dom_dispose)(asb.sb_mb);
+       sbrelease(&asb);
 }
 
 }
 
-/*ARGSUSED*/
-soioctl(so, cmd, data)
+sosetopt(so, level, optname, m0)
        register struct socket *so;
        register struct socket *so;
-       int cmd;
-       register char *data;
+       int level, optname;
+       struct mbuf *m0;
 {
 {
+       int error = 0;
+       register struct mbuf *m = m0;
+
+       if (level != SOL_SOCKET) {
+               if (so->so_proto && so->so_proto->pr_ctloutput)
+                       return ((*so->so_proto->pr_ctloutput)
+                                 (PRCO_SETOPT, so, level, optname, &m0));
+               error = ENOPROTOOPT;
+       } else {
+               switch (optname) {
+
+               case SO_LINGER:
+                       if (m == NULL || m->m_len != sizeof (struct linger)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       so->so_linger = mtod(m, struct linger *)->l_linger;
+                       /* fall thru... */
+
+               case SO_DEBUG:
+               case SO_KEEPALIVE:
+               case SO_DONTROUTE:
+               case SO_USELOOPBACK:
+               case SO_BROADCAST:
+               case SO_REUSEADDR:
+               case SO_OOBINLINE:
+                       if (m == NULL || m->m_len < sizeof (int)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       if (*mtod(m, int *))
+                               so->so_options |= optname;
+                       else
+                               so->so_options &= ~optname;
+                       break;
 
 
-       switch (cmd) {
-
-       case FIONBIO:
-               if (*(int *)data)
-                       so->so_state |= SS_NBIO;
-               else
-                       so->so_state &= ~SS_NBIO;
-               break;
-
-       case FIOASYNC:
-               if (*(int *)data)
-                       so->so_state |= SS_ASYNC;
-               else
-                       so->so_state &= ~SS_ASYNC;
-               break;
-
-       case SIOCSKEEP:
-               if (*(int *)data)
-                       so->so_options &= ~SO_KEEPALIVE;
-               else
-                       so->so_options |= SO_KEEPALIVE;
-               break;
-
-       case SIOCGKEEP:
-               *(int *)data = (so->so_options & SO_KEEPALIVE) != 0;
-               break;
-
-       case SIOCSLINGER:
-               so->so_linger = *(int *)data;
-               if (so->so_linger)
-                       so->so_options &= ~SO_DONTLINGER;
-               else
-                       so->so_options |= SO_DONTLINGER;
-               break;
-
-       case SIOCGLINGER:
-               *(int *)data = so->so_linger;
-               break;
-
-       case SIOCSPGRP:
-               so->so_pgrp = *(int *)data;
-               break;
-
-       case SIOCGPGRP:
-               *(int *)data = so->so_pgrp;
-               break;
-
-       case SIOCDONE: {
-               int flags = *(int *)data;
-
-               flags++;
-               if (flags & FREAD) {
-                       int s = splimp();
-                       socantrcvmore(so);
-                       sbflush(&so->so_rcv);
-                       splx(s);
+               case SO_SNDBUF:
+               case SO_RCVBUF:
+               case SO_SNDLOWAT:
+               case SO_RCVLOWAT:
+               case SO_SNDTIMEO:
+               case SO_RCVTIMEO:
+                       if (m == NULL || m->m_len < sizeof (int)) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       switch (optname) {
+
+                       case SO_SNDBUF:
+                       case SO_RCVBUF:
+                               if (sbreserve(optname == SO_SNDBUF ? &so->so_snd :
+                                   &so->so_rcv, *mtod(m, int *)) == 0) {
+                                       error = ENOBUFS;
+                                       goto bad;
+                               }
+                               break;
+
+                       case SO_SNDLOWAT:
+                               so->so_snd.sb_lowat = *mtod(m, int *);
+                               break;
+                       case SO_RCVLOWAT:
+                               so->so_rcv.sb_lowat = *mtod(m, int *);
+                               break;
+                       case SO_SNDTIMEO:
+                               so->so_snd.sb_timeo = *mtod(m, int *);
+                               break;
+                       case SO_RCVTIMEO:
+                               so->so_rcv.sb_timeo = *mtod(m, int *);
+                               break;
+                       }
+                       break;
+
+               default:
+                       error = ENOPROTOOPT;
+                       break;
                }
                }
-               if (flags & FWRITE)
-                       return ((*so->so_proto->pr_usrreq)(so, PRU_SHUTDOWN,
-                           (struct mbuf *)0, (struct mbuf *)0,
-                           (struct socketopt *)0));
-               break;
        }
        }
+bad:
+       if (m)
+               (void) m_free(m);
+       return (error);
+}
 
 
-       case SIOCSENDOOB: {
-               char oob = *(char *)data;
-               struct mbuf *m = m_get(M_DONTWAIT);
+sogetopt(so, level, optname, mp)
+       register struct socket *so;
+       int level, optname;
+       struct mbuf **mp;
+{
+       register struct mbuf *m;
+
+       if (level != SOL_SOCKET) {
+               if (so->so_proto && so->so_proto->pr_ctloutput) {
+                       return ((*so->so_proto->pr_ctloutput)
+                                 (PRCO_GETOPT, so, level, optname, mp));
+               } else 
+                       return (ENOPROTOOPT);
+       } else {
+               m = m_get(M_WAIT, MT_SOOPTS);
+               m->m_len = sizeof (int);
+
+               switch (optname) {
+
+               case SO_LINGER:
+                       m->m_len = sizeof (struct linger);
+                       mtod(m, struct linger *)->l_onoff =
+                               so->so_options & SO_LINGER;
+                       mtod(m, struct linger *)->l_linger = so->so_linger;
+                       break;
 
 
-               if (m == 0)
-                       return (ENOBUFS);
-               m->m_len = 1;
-               *mtod(m, char *) = oob;
-               return ((*so->so_proto->pr_usrreq)(so, PRU_SENDOOB,
-                   m, (struct mbuf *)0, (struct socketopt *)0));
-       }
+               case SO_USELOOPBACK:
+               case SO_DONTROUTE:
+               case SO_DEBUG:
+               case SO_KEEPALIVE:
+               case SO_REUSEADDR:
+               case SO_BROADCAST:
+               case SO_OOBINLINE:
+                       *mtod(m, int *) = so->so_options & optname;
+                       break;
 
 
-       case SIOCRCVOOB: {
-               struct mbuf *m = m_get(M_WAIT);
+               case SO_TYPE:
+                       *mtod(m, int *) = so->so_type;
+                       break;
 
 
-               if (m == 0)
-                       return (ENOBUFS);
-               *mtod(m, caddr_t) = 0;
-               (*so->so_proto->pr_usrreq)(so, PRU_RCVOOB,
-                   m, (struct mbuf *)0, (struct socketopt *)0);
-               *(char *)data = *mtod(m, char *);
-               (void) m_free(m);
-               break;
-       }
+               case SO_ERROR:
+                       *mtod(m, int *) = so->so_error;
+                       so->so_error = 0;
+                       break;
 
 
-       case SIOCATMARK:
-               *(int *)data = (so->so_state&SS_RCVATMARK) != 0;
-               break;
+               case SO_SNDBUF:
+                       *mtod(m, int *) = so->so_snd.sb_hiwat;
+                       break;
 
 
-       /* routing table update calls */
-       case SIOCADDRT:
-       case SIOCDELRT:
-               if (!suser())
-                       return (u.u_error);             /* XXX */
-               return (rtrequest(cmd, (struct rtentry *)data));
+               case SO_RCVBUF:
+                       *mtod(m, int *) = so->so_rcv.sb_hiwat;
+                       break;
+
+               case SO_SNDLOWAT:
+                       *mtod(m, int *) = so->so_snd.sb_lowat;
+                       break;
 
 
-       /* type/protocol specific ioctls */
-       default:
-               return (ENOTTY);
+               case SO_RCVLOWAT:
+                       *mtod(m, int *) = so->so_rcv.sb_lowat;
+                       break;
+
+               case SO_SNDTIMEO:
+                       *mtod(m, int *) = so->so_snd.sb_timeo;
+                       break;
+
+               case SO_RCVTIMEO:
+                       *mtod(m, int *) = so->so_rcv.sb_timeo;
+                       break;
+
+               default:
+                       (void)m_free(m);
+                       return (ENOPROTOOPT);
+               }
+               *mp = m;
+               return (0);
+       }
+}
+
+sohasoutofband(so)
+       register struct socket *so;
+{
+       struct proc *p;
+
+       if (so->so_pgrp < 0)
+               gsignal(-so->so_pgrp, SIGURG);
+       else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0)
+               psignal(p, SIGURG);
+       if (so->so_rcv.sb_sel) {
+               selwakeup(so->so_rcv.sb_sel, so->so_rcv.sb_flags & SB_COLL);
+               so->so_rcv.sb_sel = 0;
+               so->so_rcv.sb_flags &= ~SB_COLL;
        }
        }
-       return (0);
 }
 }