update to current hashing techniques
[unix-history] / usr / src / sys / nfs / nfs_socket.c
index 40bb423..fd7ccc4 100644 (file)
@@ -7,53 +7,41 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)nfs_socket.c        7.25 (Berkeley) %G%
+ *     @(#)nfs_socket.c        7.36 (Berkeley) %G%
  */
 
 /*
  * Socket operations for use by nfs
  */
 
  */
 
 /*
  * Socket operations for use by nfs
  */
 
-#include "types.h"
-#include "param.h"
-#include "uio.h"
-#include "proc.h"
-#include "signal.h"
-#include "mount.h"
-#include "kernel.h"
-#include "malloc.h"
-#include "mbuf.h"
-#include "vnode.h"
-#include "domain.h"
-#include "protosw.h"
-#include "socket.h"
-#include "socketvar.h"
-#include "syslog.h"
-#include "tprintf.h"
-#include "machine/endian.h"
-#include "netinet/in.h"
-#include "netinet/tcp.h"
-#ifdef ISO
-#include "netiso/iso.h"
-#endif
-#include "ufs/ufs/quota.h"
-#include "ufs/ufs/ufsmount.h"
-#include "rpcv2.h"
-#include "nfsv2.h"
-#include "nfs.h"
-#include "xdr_subs.h"
-#include "nfsm_subs.h"
-#include "nfsmount.h"
-#include "nfsnode.h"
-#include "nfsrtt.h"
-#include "nqnfs.h"
-
-#include "syslog.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
 
 #define        TRUE    1
 #define        FALSE   0
 
 
 #define        TRUE    1
 #define        FALSE   0
 
-int netnetnet = sizeof (struct netaddrhash);
 /*
  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
  * Use the mean and mean deviation of rtt for the appropriate type of rpc
 /*
  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
  * Use the mean and mean deviation of rtt for the appropriate type of rpc
@@ -201,15 +189,38 @@ nfs_connect(nmp, rep)
 {
        register struct socket *so;
        int s, error, rcvreserve, sndreserve;
 {
        register struct socket *so;
        int s, error, rcvreserve, sndreserve;
+       struct sockaddr *saddr;
+       struct sockaddr_in *sin;
        struct mbuf *m;
        struct mbuf *m;
+       u_short tport;
 
        nmp->nm_so = (struct socket *)0;
 
        nmp->nm_so = (struct socket *)0;
-       if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family,
+       saddr = mtod(nmp->nm_nam, struct sockaddr *);
+       if (error = socreate(saddr->sa_family,
                &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
                goto bad;
        so = nmp->nm_so;
        nmp->nm_soflags = so->so_proto->pr_flags;
 
                &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
                goto bad;
        so = nmp->nm_so;
        nmp->nm_soflags = so->so_proto->pr_flags;
 
+       /*
+        * Some servers require that the client port be a reserved port number.
+        */
+       if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+               MGET(m, M_WAIT, MT_SONAME);
+               sin = mtod(m, struct sockaddr_in *);
+               sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+               sin->sin_family = AF_INET;
+               sin->sin_addr.s_addr = INADDR_ANY;
+               tport = IPPORT_RESERVED - 1;
+               sin->sin_port = htons(tport);
+               while ((error = sobind(so, m)) == EADDRINUSE &&
+                      --tport > IPPORT_RESERVED / 2)
+                       sin->sin_port = htons(tport);
+               m_freem(m);
+               if (error)
+                       goto bad;
+       }
+
        /*
         * Protocols that do not require connections may be optionally left
         * unconnected for servers that reply from a port other than NFS_PORT.
        /*
         * Protocols that do not require connections may be optionally left
         * unconnected for servers that reply from a port other than NFS_PORT.
@@ -401,9 +412,10 @@ nfs_send(so, nam, top, rep)
 
        error = sosend(so, sendnam, (struct uio *)0, top,
                (struct mbuf *)0, flags);
 
        error = sosend(so, sendnam, (struct uio *)0, top,
                (struct mbuf *)0, flags);
-if(error) printf("nfssnd err=%d\n",error);
        if (error) {
                if (rep) {
        if (error) {
                if (rep) {
+                       log(LOG_INFO, "nfs send error %d for server %s\n",error,
+                           rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
                        /*
                         * Deal with errors for the client side.
                         */
                        /*
                         * Deal with errors for the client side.
                         */
@@ -411,7 +423,8 @@ if(error) printf("nfssnd err=%d\n",error);
                                error = EINTR;
                        else
                                rep->r_flags |= R_MUSTRESEND;
                                error = EINTR;
                        else
                                rep->r_flags |= R_MUSTRESEND;
-               }
+               } else
+                       log(LOG_INFO, "nfsd send error %d\n", error);
 
                /*
                 * Handle any recoverable (soft) socket errors here. (???)
 
                /*
                 * Handle any recoverable (soft) socket errors here. (???)
@@ -445,6 +458,7 @@ nfs_receive(rep, aname, mp)
        u_long len;
        struct mbuf **getnam;
        int error, sotype, rcvflg;
        u_long len;
        struct mbuf **getnam;
        int error, sotype, rcvflg;
+       struct proc *p = curproc;       /* XXX */
 
        /*
         * Set up arguments for soreceive()
 
        /*
         * Set up arguments for soreceive()
@@ -507,6 +521,7 @@ tryagain:
                        auio.uio_rw = UIO_READ;
                        auio.uio_offset = 0;
                        auio.uio_resid = sizeof(u_long);
                        auio.uio_rw = UIO_READ;
                        auio.uio_offset = 0;
                        auio.uio_resid = sizeof(u_long);
+                       auio.uio_procp = p;
                        do {
                           rcvflg = MSG_WAITALL;
                           error = soreceive(so, (struct mbuf **)0, &auio,
                        do {
                           rcvflg = MSG_WAITALL;
                           error = soreceive(so, (struct mbuf **)0, &auio,
@@ -517,11 +532,10 @@ tryagain:
                           }
                        } while (error == EWOULDBLOCK);
                        if (!error && auio.uio_resid > 0) {
                           }
                        } while (error == EWOULDBLOCK);
                        if (!error && auio.uio_resid > 0) {
-                           if (rep)
-                               log(LOG_INFO,
-                                  "short receive (%d/%d) from nfs server %s\n",
-                                  sizeof(u_long) - auio.uio_resid,
-                                  sizeof(u_long),
+                           log(LOG_INFO,
+                                "short receive (%d/%d) from nfs server %s\n",
+                                sizeof(u_long) - auio.uio_resid,
+                                sizeof(u_long),
                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
                            error = EPIPE;
                        }
                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
                            error = EPIPE;
                        }
@@ -533,11 +547,10 @@ tryagain:
                         * and forcing a disconnect/reconnect is all I can do.
                         */
                        if (len > NFS_MAXPACKET) {
                         * and forcing a disconnect/reconnect is all I can do.
                         */
                        if (len > NFS_MAXPACKET) {
-                           if (rep)
-                               log(LOG_ERR, "%s (%d) from nfs server %s\n",
-                                   "impossible packet length",
-                                   len,
-                                rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+                           log(LOG_ERR, "%s (%d) from nfs server %s\n",
+                               "impossible packet length",
+                               len,
+                               rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
                            error = EFBIG;
                            goto errout;
                        }
                            error = EFBIG;
                            goto errout;
                        }
@@ -549,11 +562,10 @@ tryagain:
                        } while (error == EWOULDBLOCK || error == EINTR ||
                                 error == ERESTART);
                        if (!error && auio.uio_resid > 0) {
                        } while (error == EWOULDBLOCK || error == EINTR ||
                                 error == ERESTART);
                        if (!error && auio.uio_resid > 0) {
-                           if (rep)
-                               log(LOG_INFO,
-                                  "short receive (%d/%d) from nfs server %s\n",
-                                  len - auio.uio_resid, len,
-                                rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+                           log(LOG_INFO,
+                               "short receive (%d/%d) from nfs server %s\n",
+                               len - auio.uio_resid, len,
+                               rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
                            error = EPIPE;
                        }
                } else {
                            error = EPIPE;
                        }
                } else {
@@ -566,6 +578,7 @@ tryagain:
                         * on.
                         */
                        auio.uio_resid = len = 100000000; /* Anything Big */
                         * on.
                         */
                        auio.uio_resid = len = 100000000; /* Anything Big */
+                       auio.uio_procp = p;
                        do {
                            rcvflg = 0;
                            error =  soreceive(so, (struct mbuf **)0,
                        do {
                            rcvflg = 0;
                            error =  soreceive(so, (struct mbuf **)0,
@@ -588,7 +601,7 @@ errout:
                if (error && error != EINTR && error != ERESTART) {
                        m_freem(*mp);
                        *mp = (struct mbuf *)0;
                if (error && error != EINTR && error != ERESTART) {
                        m_freem(*mp);
                        *mp = (struct mbuf *)0;
-                       if (error != EPIPE && rep)
+                       if (error != EPIPE)
                                log(LOG_INFO,
                                    "receive error %d from nfs server %s\n",
                                    error,
                                log(LOG_INFO,
                                    "receive error %d from nfs server %s\n",
                                    error,
@@ -607,6 +620,7 @@ errout:
                else
                        getnam = aname;
                auio.uio_resid = len = 1000000;
                else
                        getnam = aname;
                auio.uio_resid = len = 1000000;
+               auio.uio_procp = p;
                do {
                        rcvflg = 0;
                        error =  soreceive(so, getnam, &auio, mp,
                do {
                        rcvflg = 0;
                        error =  soreceive(so, getnam, &auio, mp,
@@ -779,8 +793,11 @@ nfsmout:
                if (rep == &nfsreqh) {
                        nfsstats.rpcunexpected++;
                        m_freem(mrep);
                if (rep == &nfsreqh) {
                        nfsstats.rpcunexpected++;
                        m_freem(mrep);
-               } else if (rep == myrep)
+               } else if (rep == myrep) {
+                       if (rep->r_mrep == NULL)
+                               panic("nfsreply nil");
                        return (0);
                        return (0);
+               }
        }
 }
 
        }
 }
 
@@ -853,6 +870,8 @@ kerbauth:
                }
        } else {
                auth_type = RPCAUTH_UNIX;
                }
        } else {
                auth_type = RPCAUTH_UNIX;
+               if (cred->cr_ngroups < 1)
+                       panic("nfsreq nogrps");
                auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
                        nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
                        5 * NFSX_UNSIGNED;
                auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
                        nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
                        5 * NFSX_UNSIGNED;
@@ -931,7 +950,7 @@ tryagain:
        /*
         * Wait for the reply from our send or the timer's.
         */
        /*
         * Wait for the reply from our send or the timer's.
         */
-       if (!error)
+       if (!error || error == EPIPE)
                error = nfs_reply(rep);
 
        /*
                error = nfs_reply(rep);
 
        /*
@@ -1189,7 +1208,9 @@ nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
  * To avoid retransmission attempts on STREAM sockets (in the future) make
  * sure to set the r_retry field to 0 (implies nm_retry == 0).
  */
  * To avoid retransmission attempts on STREAM sockets (in the future) make
  * sure to set the r_retry field to 0 (implies nm_retry == 0).
  */
-nfs_timer()
+void
+nfs_timer(arg)
+       void *arg;
 {
        register struct nfsreq *rep;
        register struct mbuf *m;
 {
        register struct nfsreq *rep;
        register struct mbuf *m;
@@ -1393,220 +1414,6 @@ nfs_rcvunlock(flagp)
        }
 }
 
        }
 }
 
-/*
- * This function compares two net addresses by family and returns TRUE
- * if they are the same host.
- * If there is any doubt, return FALSE.
- * The AF_INET family is handled as a special case so that address mbufs
- * don't need to be saved to store "struct in_addr", which is only 4 bytes.
- */
-nfs_netaddr_match(family, haddr, hmask, nam)
-       int family;
-       union nethostaddr *haddr;
-       union nethostaddr *hmask;
-       struct mbuf *nam;
-{
-       register struct sockaddr_in *inetaddr;
-#ifdef ISO
-       register struct sockaddr_iso *isoaddr1, *isoaddr2;
-#endif
-
-
-       switch (family) {
-       case AF_INET:
-               inetaddr = mtod(nam, struct sockaddr_in *);
-               if (inetaddr->sin_family != AF_INET)
-                       return (0);
-               if (hmask) {
-                       if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) ==
-                           (haddr->had_inetaddr & hmask->had_inetaddr))
-                               return (1);
-               } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
-                       return (1);
-               break;
-#ifdef ISO
-       case AF_ISO:
-               isoaddr1 = mtod(nam, struct sockaddr_iso *);
-               if (isoaddr1->siso_family != AF_ISO)
-                       return (0);
-               isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
-               if (isoaddr1->siso_nlen > 0 &&
-                   isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
-                   SAME_ISOADDR(isoaddr1, isoaddr2))
-                       return (1);
-               break;
-#endif /* ISO */
-       default:
-               break;
-       };
-       return (0);
-}
-
-/*
- * Build hash lists of net addresses and hang them off the mount point.
- * Called by ufs_mount() to set up the lists of export addresses.
- */
-hang_addrlist(mp, argp)
-       struct mount *mp;
-       struct ufs_args *argp;
-{
-       register struct netaddrhash *np, **hnp;
-       register int i;
-       struct ufsmount *ump;
-       struct sockaddr *saddr;
-       struct mbuf *nam, *msk = (struct mbuf *)0;
-       union nethostaddr netmsk;
-       int error;
-
-       if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen,
-           MT_SONAME))
-           return (error);
-       saddr = mtod(nam, struct sockaddr *);
-       ump = VFSTOUFS(mp);
-       if (saddr->sa_family == AF_INET &&
-           ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) {
-           m_freem(nam);
-           if (mp->mnt_flag & MNT_DEFEXPORTED)
-               return (EPERM);
-           np = &ump->um_defexported;
-           np->neth_exflags = argp->exflags;
-           np->neth_anon = argp->anon;
-           np->neth_anon.cr_ref = 1;
-           mp->mnt_flag |= MNT_DEFEXPORTED;
-           return (0);
-       }
-       if (argp->msklen > 0) {
-           if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen,
-               MT_SONAME)) {
-               m_freem(nam);
-               return (error);
-           }
-
-           /*
-            * Scan all the hash lists to check against duplications.
-            * For the net list, try both masks to catch a subnet
-            * of another network.
-            */
-           hnp = &ump->um_netaddr[NETMASK_HASH];
-           np = *hnp;
-           if (saddr->sa_family == AF_INET)
-               netmsk.had_inetaddr =
-                   mtod(msk, struct sockaddr_in *)->sin_addr.s_addr;
-           else
-               netmsk.had_nam = msk;
-           while (np) {
-               if (nfs_netaddr_match(np->neth_family, &np->neth_haddr,
-                   &np->neth_hmask, nam) ||
-                   nfs_netaddr_match(np->neth_family, &np->neth_haddr,
-                   &netmsk, nam)) {
-                       m_freem(nam);
-                       m_freem(msk);
-                       return (EPERM);
-               }
-               np = np->neth_next;
-           }
-           for (i = 0; i < NETHASHSZ; i++) {
-               np = ump->um_netaddr[i];
-               while (np) {
-                   if (nfs_netaddr_match(np->neth_family, &np->neth_haddr,
-                       &netmsk, nam)) {
-                       m_freem(nam);
-                       m_freem(msk);
-                       return (EPERM);
-                   }
-                   np = np->neth_next;
-               }
-           }
-       } else {
-           hnp = &ump->um_netaddr[NETADDRHASH(saddr)];
-           np = ump->um_netaddr[NETMASK_HASH];
-           while (np) {
-               if (nfs_netaddr_match(np->neth_family, &np->neth_haddr,
-                   &np->neth_hmask, nam)) {
-                   m_freem(nam);
-                   return (EPERM);
-               }
-               np = np->neth_next;
-           }
-           np = *hnp;
-           while (np) {
-               if (nfs_netaddr_match(np->neth_family, &np->neth_haddr,
-                   (union nethostaddr *)0, nam)) {
-                   m_freem(nam);
-                   return (EPERM);
-               }
-               np = np->neth_next;
-           }
-       }
-       np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR,
-           M_WAITOK);
-       np->neth_family = saddr->sa_family;
-       if (saddr->sa_family == AF_INET) {
-               np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr;
-               m_freem(nam);
-               if (msk) {
-                       np->neth_inetmask = netmsk.had_inetaddr;
-                       m_freem(msk);
-                       if (np->neth_inetaddr &~ np->neth_inetmask)
-                               return (EPERM);
-               } else
-                       np->neth_inetmask = 0xffffffff;
-       } else {
-               np->neth_nam = nam;
-               np->neth_msk = msk;
-       }
-       np->neth_exflags = argp->exflags;
-       np->neth_anon = argp->anon;
-       np->neth_anon.cr_ref = 1;
-       np->neth_next = *hnp;
-       *hnp = np;
-       return (0);
-}
-
-/*
- * Free the net address hash lists that are hanging off the mount points.
- */
-free_addrlist(ump)
-       struct ufsmount *ump;
-{
-       register struct netaddrhash *np, *onp;
-       register int i;
-
-       for (i = 0; i <= NETHASHSZ; i++) {
-               np = ump->um_netaddr[i];
-               ump->um_netaddr[i] = (struct netaddrhash *)0;
-               while (np) {
-                       onp = np;
-                       np = np->neth_next;
-                       if (onp->neth_family != AF_INET) {
-                               m_freem(onp->neth_nam);
-                               m_freem(onp->neth_msk);
-                       }
-                       free((caddr_t)onp, M_NETADDR);
-               }
-       }
-}
-
-/*
- * Generate a hash code for an iso host address. Used by NETADDRHASH() for
- * iso addresses.
- */
-iso_addrhash(saddr)
-       struct sockaddr *saddr;
-{
-#ifdef ISO
-       register struct sockaddr_iso *siso;
-       register int i, sum;
-
-       sum = 0;
-       for (i = 0; i < siso->siso_nlen; i++)
-               sum += siso->siso_data[i];
-       return (sum & (NETHASHSZ - 1));
-#else
-       return (0);
-#endif /* ISO */
-}
-
 /*
  * Check for badly aligned mbuf data areas and
  * realign data in an mbuf list by copying the data areas up, as required.
 /*
  * Check for badly aligned mbuf data areas and
  * realign data in an mbuf list by copying the data areas up, as required.
@@ -1665,7 +1472,7 @@ nfs_realign(m, hsiz)
                                        mnew = m2;
                                        m2 = m2->m_next;
                                }
                                        mnew = m2;
                                        m2 = m2->m_next;
                                }
-                               siz = MIN(mlen, olen);
+                               siz = min(mlen, olen);
                                if (tcp != fcp)
                                        bcopy(fcp, tcp, siz);
                                mnew->m_len += siz;
                                if (tcp != fcp)
                                        bcopy(fcp, tcp, siz);
                                mnew->m_len += siz;
@@ -1723,6 +1530,7 @@ nfsrv_rcv(so, arg, waitflag)
                slp->ns_flag |= SLP_NEEDQ; goto dorecs;
        }
 #endif
                slp->ns_flag |= SLP_NEEDQ; goto dorecs;
        }
 #endif
+       auio.uio_procp = NULL;
        if (so->so_type == SOCK_STREAM) {
                /*
                 * If there are already records on the queue, defer soreceive()
        if (so->so_type == SOCK_STREAM) {
                /*
                 * If there are already records on the queue, defer soreceive()
@@ -2116,6 +1924,7 @@ nfsrv_wakenfsd(slp)
                        nd->nd_flag &= ~NFSD_WAITING;
                        if (nd->nd_slp)
                                panic("nfsd wakeup");
                        nd->nd_flag &= ~NFSD_WAITING;
                        if (nd->nd_slp)
                                panic("nfsd wakeup");
+                       slp->ns_sref++;
                        nd->nd_slp = slp;
                        wakeup((caddr_t)nd);
                        return;
                        nd->nd_slp = slp;
                        wakeup((caddr_t)nd);
                        return;