BSD 4_4_Lite2 release
[unix-history] / usr / src / sys / kern / vfs_lookup.c
index b5a025e..826fbfe 100644 (file)
-/*     vfs_lookup.c    6.13    84/07/08        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/inode.h"
-#include "../h/fs.h"
-#include "../h/mount.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/buf.h"
-#include "../h/conf.h"
-#include "../h/uio.h"
-#include "../h/kernel.h"
-
-struct buf *blkatoff();
-struct buf *freenamebuf;
-int    dirchk = 0;
-
 /*
 /*
- * Structures associated with name cacheing.
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)vfs_lookup.c        8.10 (Berkeley) 5/27/95
  */
  */
-#define        NCHHASH         32      /* size of hash table */
 
 
-#if    ((NCHHASH)&((NCHHASH)-1)) != 0
-#define        NHASH(h, i, d)  ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
-#else
-#define        NHASH(h, i, d)  ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
+#include <sys/param.h>
+#include <sys/syslimits.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+
+#ifdef KTRACE
+#include <sys/ktrace.h>
 #endif
 
 #endif
 
-union  nchash  {
-       union   nchash  *nch_head[2];
-       struct  nch     *nch_chain[2];
-} nchash[NCHHASH];
-#define        nch_forw        nch_chain[0]
-#define        nch_back        nch_chain[1]
-
-struct nch     *nchhead, **nchtail;    /* LRU chain pointers */
-struct nchstats nchstats;              /* cache effectiveness statistics */
-
 /*
 /*
- * Convert a pathname into a pointer to a locked inode,
- * with side effects usable in creating and removing files.
- * This is a very central and rather complicated routine.
- *
- * The segflg defines whether the name is to be copied from user
- * space or kernel space.
- *
- * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether
- * the name is to be (looked up, created, deleted).  If flag has
- * LOCKPARENT or'ed into it and the target of the pathname exists,
- * namei returns both the target and its parent directory locked. 
- * If the file system is not maintained in a strict tree hierarchy,
- * this can result in a deadlock situation.  When creating and
- * LOCKPARENT is specified, the target may not be ".".  When deleting
- * and LOCKPARENT is specified, the target may be ".", but the caller
- * must check to insure it does an irele and iput instead of two iputs.
+ * Convert a pathname into a pointer to a locked inode.
  *
  * The FOLLOW flag is set when symbolic links are to be followed
  * when they occur at the end of the name translation process.
  *
  * The FOLLOW flag is set when symbolic links are to be followed
  * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
  *
  *
- * Name caching works as follows:
- *
- *     names found by directory scans are retained in a cache
- *     for future reference.  It is managed LRU, so frequently
- *     used names will hang around.  Cache is indexed by hash value
- *     obtained from (ino,dev,name) where ino & dev refer to the
- *     directory containing name.
- *
- *     For simplicity (and economy of storage), names longer than
- *     some (small) maximum length are not cached, they occur
- *     infrequently in any case, and are almost never of interest.
- *
- *     Upon reaching the last segment of a path, if the reference
- *     is for DELETE, or NOCACHE is set (rewrite), and the
- *     name is located in the cache, it will be dropped.
- *
- *     We must be sure never to enter the name ".." into the cache
- *     because of the extremely kludgey way that rename() alters
- *     ".." in a situation like
- *             mv a/x b/x
- *     where x is a directory, and x/.. is the ".." in question.
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
  *
  * Overall outline of namei:
  *
  *     copy in name
  *     get starting directory
  *
  * Overall outline of namei:
  *
  *     copy in name
  *     get starting directory
- * dirloop:
- *     check accessibility of directory
- * dirloop2:
- *     copy next component of name to ndp->ni_dent
- *     handle degenerate case where name is null string
- *     look for name in cache, if found, then if at end of path
- *       and deleting or creating, drop it, else to haveino
- *     search for name in directory, to found or notfound
- * notfound:
- *     if creating, return locked directory, leaving info on avail. slots
- *     else return error
- * found:
- *     if at end of path and deleting, return information to allow delete
- *     if at end of path and rewriting (create and LOCKPARENT), lock target
- *       inode and return info to allow rewrite
- *     if .. and on mounted filesys, look in mount table for parent
- *     if not at end, if neither creating nor deleting, add name to cache
- * haveino:
- *     if symbolic link, massage name in buffer and continue at dirloop
- *     if more components of name, do next level at dirloop
- *     return the answer as locked inode
- *
- * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
- *      but unlocked.
+ *     while (!done && !error) {
+ *             call lookup to search path.
+ *             if symbolic link, massage name in buffer and continue
+ *     }
  */
  */
-struct inode *
+int
 namei(ndp)
        register struct nameidata *ndp;
 {
 namei(ndp)
        register struct nameidata *ndp;
 {
+       register struct filedesc *fdp;  /* pointer to file descriptor state */
        register char *cp;              /* pointer into pathname argument */
        register char *cp;              /* pointer into pathname argument */
-/* these variables refer to things which must be freed or unlocked */
-       register struct inode *dp = 0;  /* the directory we are searching */
-       register struct nch *ncp;       /* cache slot for entry */
-       register struct fs *fs;         /* file system that directory is in */
-       register struct buf *bp = 0;    /* a buffer of directory entries */
-       register struct direct *ep;     /* the current directory entry */
-       int entryoffsetinblock;         /* offset of ep in bp's buffer */
-       register struct buf *nbp;       /* buffer storing path name argument */
-/* these variables hold information about the search for a slot */
-       enum {NONE, COMPACT, FOUND} slotstatus;
-       int slotoffset = -1;            /* offset of area with free space */
-       int slotsize;                   /* size of area at slotoffset */
-       int slotfreespace;              /* amount of space free in slot */
-       int slotneeded;                 /* size of the entry we're seeking */
-/* */
-       int numdirpasses;               /* strategy for directory search */
-       int endsearch;                  /* offset to end directory search */
-       int prevoff;                    /* ndp->ni_offset of previous entry */
-       int nlink = 0;                  /* number of symbolic links taken */
-       struct inode *pdp;              /* saved dp during symlink work */
-       int error, i;
-       int lockparent;
-       int docache;
-       unsigned hash;                  /* value of name hash for entry */
-       union nchash *nhp;              /* cache chain head for entry */
-       int isdotdot;                   /* != 0 if current name is ".." */
-       int flag;                       /* op ie, LOOKUP, CREATE, or DELETE */
+       register struct vnode *dp;      /* the directory we are searching */
+       struct iovec aiov;              /* uio for reading symbolic links */
+       struct uio auio;
+       int error, linklen;
+       struct componentname *cnp = &ndp->ni_cnd;
+       struct proc *p = cnp->cn_proc;
+
+       ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
+#ifdef DIAGNOSTIC
+       if (!cnp->cn_cred || !cnp->cn_proc)
+               panic ("namei: bad cred/proc");
+       if (cnp->cn_nameiop & (~OPMASK))
+               panic ("namei: nameiop contaminated with flags");
+       if (cnp->cn_flags & OPMASK)
+               panic ("namei: flags contaminated with nameiops");
+#endif
+       fdp = cnp->cn_proc->p_fd;
 
 
-       lockparent = ndp->ni_nameiop & LOCKPARENT;
-       docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
-       flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
-       if (flag == DELETE)
-               docache = 0;
        /*
         * Get a buffer for the name to be translated, and copy the
         * name into the buffer.
         */
        /*
         * Get a buffer for the name to be translated, and copy the
         * name into the buffer.
         */
-       nbp = freenamebuf;
-       if (nbp == NULL)
-               nbp = geteblk(MAXPATHLEN);
-       else
-               freenamebuf = nbp->av_forw;
+       if ((cnp->cn_flags & HASBUF) == 0)
+               MALLOC(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
        if (ndp->ni_segflg == UIO_SYSSPACE)
        if (ndp->ni_segflg == UIO_SYSSPACE)
-               error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
-                   (u_int *)0);
+               error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
+                           MAXPATHLEN, &ndp->ni_pathlen);
        else
        else
-               error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
-                   (u_int *)0);
+               error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
+                           MAXPATHLEN, &ndp->ni_pathlen);
        if (error) {
        if (error) {
-               u.u_error = error;
-               goto bad;
-       }
-
-       /*
-        * Get starting directory.
-        */
-       cp = nbp->b_un.b_addr;
-       if (*cp == '/') {
-               while (*cp == '/')
-                       cp++;
-               if ((dp = u.u_rdir) == NULL)
-                       dp = rootdir;
-       } else
-               dp = u.u_cdir;
-       fs = dp->i_fs;
-       ILOCK(dp);
-       dp->i_count++;
-       ndp->ni_pdir = (struct inode *)0xc0000000;              /* illegal */
-
-       /*
-        * We come to dirloop to search a new directory.
-        * The directory must be locked so that it can be
-        * iput, and fs must be already set to dp->i_fs.
-        */
-dirloop:
-       /*
-        * Check accessiblity of directory.
-        */
-       if ((dp->i_mode&IFMT) != IFDIR) {
-               u.u_error = ENOTDIR;
-               goto bad;
-       }
-       if (access(dp, IEXEC))
-               goto bad;
-
-dirloop2:
-       /*
-        * Copy next component of name to ndp->ni_dent.
-        */
-       hash = 0;
-       for (i = 0; *cp != 0 && *cp != '/'; cp++) {
-               if (i >= MAXNAMLEN) {
-                       u.u_error = ENOENT;
-                       goto bad;
-               }
-               if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != DELETE) {
-                       u.u_error = EPERM;
-                       goto bad;
-               }
-               ndp->ni_dent.d_name[i++] = *cp;
-               hash += (unsigned char)*cp * i;
-       }
-       ndp->ni_dent.d_namlen = i;
-       ndp->ni_dent.d_name[i] = '\0';
-       isdotdot = (i == 2 &&
-               ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
-
-       /*
-        * Check for degenerate name (e.g. / or "")
-        * which is a way of talking about a directory,
-        * e.g. like "/." or ".".
-        */
-       if (ndp->ni_dent.d_name[0] == '\0') {
-               if (flag != LOOKUP || lockparent) {
-                       u.u_error = EISDIR;
-                       goto bad;
-               }
-               nbp->av_forw = freenamebuf;
-               freenamebuf = nbp;
-               return (dp);
-       }
-
-       /*
-        * We now have a segment name to search for, and a directory to search.
-        *
-        * Before tediously performing a linear scan of the directory,
-        * check the name cache to see if the directory/name pair
-        * we are looking for is known already.  We don't do this
-        * if the segment name is long, simply so the cache can avoid
-        * holding long names (which would either waste space, or
-        * add greatly to the complexity).
-        */
-       if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
-               nchstats.ncs_long++;
-               docache = 0;
-       } else {
-               nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
-               for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp;
-                   ncp = ncp->nc_forw) {
-                       if (ncp->nc_ino == dp->i_number &&
-                           ncp->nc_dev == dp->i_dev &&
-                           ncp->nc_nlen == ndp->ni_dent.d_namlen &&
-                           !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
-                               ncp->nc_nlen))
-                               break;
-               }
-
-               if (ncp == (struct nch *)nhp) {
-                       nchstats.ncs_miss++;
-                       ncp = NULL;
-               } else {
-                       if (ncp->nc_id != ncp->nc_ip->i_id) {
-                               nchstats.ncs_falsehits++;
-                       } else if (*cp == '\0' && !docache) {
-                               nchstats.ncs_badhits++;
-                       } else {
-
-                                       /*
-                                        * move this slot to end of LRU
-                                        * chain, if not already there
-                                        */
-                               if (ncp->nc_nxt) {
-                                               /* remove from LRU chain */
-                                       *ncp->nc_prev = ncp->nc_nxt;
-                                       ncp->nc_nxt->nc_prev = ncp->nc_prev;
-
-                                               /* and replace at end of it */
-                                       ncp->nc_nxt = NULL;
-                                       ncp->nc_prev = nchtail;
-                                       *nchtail = ncp;
-                                       nchtail = &ncp->nc_nxt;
-                               }
-
-                               /*
-                                * Get the next inode in the path.
-                                * See comment above other `IUNLOCK' code for
-                                * an explaination of the locking protocol.
-                                */
-                               pdp = dp;
-                               dp = ncp->nc_ip;
-                               if (dp == NULL)
-                                       panic("nami: null cache ino");
-                               if (pdp == dp)
-                                       dp->i_count++;
-                               else {
-                                       if (isdotdot) {
-                                               IUNLOCK(pdp);
-                                               igrab(dp);
-                                       } else {
-                                               igrab(dp);
-                                               IUNLOCK(pdp);
-                                       }
-                               }
-
-                               /*
-                                * Verify that the inode that we got
-                                * did not change while we were waiting
-                                * for it to be locked.
-                                */
-                               if (ncp->nc_id != ncp->nc_ip->i_id) {
-                                       iput(dp);
-                                       ILOCK(pdp);
-                                       dp = pdp;
-                                       nchstats.ncs_falsehits++;
-                               } else {
-                                       ndp->ni_dent.d_ino = dp->i_number;
-                                       /* ni_dent.d_reclen is garbage ... */
-                                       nchstats.ncs_goodhits++;
-                                       goto haveino;
-                               }
-                       }
-
-                       /*
-                        * Last component and we are renaming or deleting,
-                        * the cache entry is invalid, or otherwise don't
-                        * want cache entry to exist.
-                        */
-
-                               /* remove from LRU chain */
-                       *ncp->nc_prev = ncp->nc_nxt;
-                       if (ncp->nc_nxt)
-                               ncp->nc_nxt->nc_prev = ncp->nc_prev;
-                       else
-                               nchtail = ncp->nc_prev;
-
-                               /* remove from hash chain */
-                       remque(ncp);
-
-                               /* insert at head of LRU list (first to grab) */
-                       ncp->nc_nxt = nchhead;
-                       ncp->nc_prev = &nchhead;
-                       nchhead->nc_prev = &ncp->nc_nxt;
-                       nchhead = ncp;
-
-                               /* and make a dummy hash chain */
-                       ncp->nc_forw = ncp;
-                       ncp->nc_back = ncp;
-
-                       ncp = NULL;
-               }
+               free(cnp->cn_pnbuf, M_NAMEI);
+               ndp->ni_vp = NULL;
+               return (error);
        }
        }
+       ndp->ni_loopcnt = 0;
+#ifdef KTRACE
+       if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
+               ktrnamei(cnp->cn_proc->p_tracep, cnp->cn_pnbuf);
+#endif
 
        /*
 
        /*
-        * Suppress search for slots unless creating
-        * file and at end of pathname, in which case
-        * we watch for a place to put the new file in
-        * case it doesn't already exist.
+        * Get starting point for the translation.
         */
         */
-       slotstatus = FOUND;
-       if (flag == CREATE && *cp == 0) {
-               slotstatus = NONE;
-               slotfreespace = 0;
-               slotneeded = DIRSIZ(&ndp->ni_dent);
-       }
-       /*
-        * If this is the same directory that this process
-        * previously searched, pick up where we last left off.
-        * We cache only lookups as these are the most common
-        * and have the greatest payoff. Caching CREATE has little
-        * benefit as it usually must search the entire directory
-        * to determine that the entry does not exist. Caching the
-        * location of the last DELETE has not reduced profiling time
-        * and hence has been removed in the interest of simplicity.
-        */
-       if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
-           dp->i_dev != u.u_ncache.nc_dev) {
-               ndp->ni_offset = 0;
-               numdirpasses = 1;
-       } else {
-               if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) {
-                       u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1);
-                       u.u_ncache.nc_time = time.tv_sec;
-               }
-               ndp->ni_offset = u.u_ncache.nc_prevoffset;
-               entryoffsetinblock = blkoff(fs, ndp->ni_offset);
-               if (entryoffsetinblock != 0) {
-                       bp = blkatoff(dp, ndp->ni_offset, (char **)0);
-                       if (bp == 0)
-                               goto bad;
-               }
-               numdirpasses = 2;
-               nchstats.ncs_2passes++;
-       }
-       endsearch = roundup(dp->i_size, DIRBLKSIZ);
-
-searchloop:
-       while (ndp->ni_offset < endsearch) {
+       if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
+               ndp->ni_rootdir = rootvnode;
+       dp = fdp->fd_cdir;
+       VREF(dp);
+       for (;;) {
                /*
                /*
-                * If offset is on a block boundary,
-                * read the next directory block.
-                * Release previous if it exists.
+                * Check if root directory should replace current directory.
+                * Done at start of translation and after symbolic link.
                 */
                 */
-               if (blkoff(fs, ndp->ni_offset) == 0) {
-                       if (bp != NULL)
-                               brelse(bp);
-                       bp = blkatoff(dp, ndp->ni_offset, (char **)0);
-                       if (bp == 0)
-                               goto bad;
-                       entryoffsetinblock = 0;
+               cnp->cn_nameptr = cnp->cn_pnbuf;
+               if (*(cnp->cn_nameptr) == '/') {
+                       vrele(dp);
+                       while (*(cnp->cn_nameptr) == '/') {
+                               cnp->cn_nameptr++;
+                               ndp->ni_pathlen--;
+                       }
+                       dp = ndp->ni_rootdir;
+                       VREF(dp);
                }
                }
-
-               /*
-                * If still looking for a slot, and at a DIRBLKSIZE
-                * boundary, have to start looking for free space again.
-                */
-               if (slotstatus == NONE &&
-                   (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
-                       slotoffset = -1;
-                       slotfreespace = 0;
+               ndp->ni_startdir = dp;
+               if (error = lookup(ndp)) {
+                       FREE(cnp->cn_pnbuf, M_NAMEI);
+                       return (error);
                }
                }
-
                /*
                /*
-                * Get pointer to next entry.
-                * Full validation checks are slow, so we only check
-                * enough to insure forward progress through the
-                * directory. Complete checks can be run by patching
-                * "dirchk" to be true.
+                * Check for symbolic link
                 */
                 */
-               ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
-               if (ep->d_reclen <= 0 ||
-                   dirchk && dirbadentry(ep, entryoffsetinblock)) {
-                       dirbad(dp, ndp->ni_offset, "mangled entry");
-                       i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
-                       ndp->ni_offset += i;
-                       entryoffsetinblock += i;
-                       continue;
+               if ((cnp->cn_flags & ISSYMLINK) == 0) {
+                       if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
+                               FREE(cnp->cn_pnbuf, M_NAMEI);
+                       else
+                               cnp->cn_flags |= HASBUF;
+                       return (0);
                }
                }
-
-               /*
-                * If an appropriate sized slot has not yet been found,
-                * check to see if one is available. Also accumulate space
-                * in the current block so that we can determine if
-                * compaction is viable.
-                */
-               if (slotstatus != FOUND) {
-                       int size = ep->d_reclen;
-
-                       if (ep->d_ino != 0)
-                               size -= DIRSIZ(ep);
-                       if (size > 0) {
-                               if (size >= slotneeded) {
-                                       slotstatus = FOUND;
-                                       slotoffset = ndp->ni_offset;
-                                       slotsize = ep->d_reclen;
-                               } else if (slotstatus == NONE) {
-                                       slotfreespace += size;
-                                       if (slotoffset == -1)
-                                               slotoffset = ndp->ni_offset;
-                                       if (slotfreespace >= slotneeded) {
-                                               slotstatus = COMPACT;
-                                               slotsize = ndp->ni_offset +
-                                                     ep->d_reclen - slotoffset;
-                                       }
-                               }
-                       }
+               if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+                       VOP_UNLOCK(ndp->ni_dvp, 0, p);
+               if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+                       error = ELOOP;
+                       break;
                }
                }
-
-               /*
-                * Check for a name match.
-                */
-               if (ep->d_ino) {
-                       if (ep->d_namlen == ndp->ni_dent.d_namlen &&
-                           !bcmp(ndp->ni_dent.d_name, ep->d_name,
-                               ep->d_namlen))
-                               goto found;
+               if (ndp->ni_pathlen > 1)
+                       MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+               else
+                       cp = cnp->cn_pnbuf;
+               aiov.iov_base = cp;
+               aiov.iov_len = MAXPATHLEN;
+               auio.uio_iov = &aiov;
+               auio.uio_iovcnt = 1;
+               auio.uio_offset = 0;
+               auio.uio_rw = UIO_READ;
+               auio.uio_segflg = UIO_SYSSPACE;
+               auio.uio_procp = (struct proc *)0;
+               auio.uio_resid = MAXPATHLEN;
+               if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) {
+                       if (ndp->ni_pathlen > 1)
+                               free(cp, M_NAMEI);
+                       break;
                }
                }
-               prevoff = ndp->ni_offset;
-               ndp->ni_offset += ep->d_reclen;
-               entryoffsetinblock += ep->d_reclen;
-       }
-/* notfound: */
-       /*
-        * If we started in the middle of the directory and failed
-        * to find our target, we must check the beginning as well.
-        */
-       if (numdirpasses == 2) {
-               numdirpasses--;
-               ndp->ni_offset = 0;
-               endsearch = u.u_ncache.nc_prevoffset;
-               goto searchloop;
-       }
-       /*
-        * If creating, and at end of pathname and current
-        * directory has not been removed, then can consider
-        * allowing file to be created.
-        */
-       if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
-               /*
-                * Access for write is interpreted as allowing
-                * creation of files in the directory.
-                */
-               if (access(dp, IWRITE))
-                       goto bad;
-               /*
-                * Return an indication of where the new directory
-                * entry should be put.  If we didn't find a slot,
-                * then set ndp->ni_count to 0 indicating that the new
-                * slot belongs at the end of the directory. If we found
-                * a slot, then the new entry can be put in the range
-                * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
-                */
-               if (slotstatus == NONE) {
-                       ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
-                       ndp->ni_count = 0;
-               } else {
-                       ndp->ni_offset = slotoffset;
-                       ndp->ni_count = slotsize;
+               linklen = MAXPATHLEN - auio.uio_resid;
+               if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+                       if (ndp->ni_pathlen > 1)
+                               free(cp, M_NAMEI);
+                       error = ENAMETOOLONG;
+                       break;
                }
                }
-               dp->i_flag |= IUPD|ICHG;
-               if (bp)
-                       brelse(bp);
-               nbp->av_forw = freenamebuf;
-               freenamebuf = nbp;
-               /*
-                * We return with the directory locked, so that
-                * the parameters we set up above will still be
-                * valid if we actually decide to do a direnter().
-                * We return NULL to indicate that the entry doesn't
-                * currently exist, leaving a pointer to the (locked)
-                * directory inode in ndp->ni_pdir.
-                */
-               ndp->ni_pdir = dp;
-               return (NULL);
+               if (ndp->ni_pathlen > 1) {
+                       bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+                       FREE(cnp->cn_pnbuf, M_NAMEI);
+                       cnp->cn_pnbuf = cp;
+               } else
+                       cnp->cn_pnbuf[linklen] = '\0';
+               ndp->ni_pathlen += linklen;
+               vput(ndp->ni_vp);
+               dp = ndp->ni_dvp;
        }
        }
-       u.u_error = ENOENT;
-       goto bad;
-found:
-       if (numdirpasses == 2)
-               nchstats.ncs_pass2++;
+       FREE(cnp->cn_pnbuf, M_NAMEI);
+       vrele(ndp->ni_dvp);
+       vput(ndp->ni_vp);
+       ndp->ni_vp = NULL;
+       return (error);
+}
+
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
+ * 
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ *     identify next component of name at ndp->ni_ptr
+ *     handle degenerate case where name is null string
+ *     if .. and crossing mount points and on mounted filesys, find parent
+ *     call VOP_LOOKUP routine for next component name
+ *         directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ *         component vnode returned in ni_vp (if it exists), locked.
+ *     if result vnode is mounted on and crossing mount points,
+ *         find mounted on vnode
+ *     if more components of name, do next level at dirloop
+ *     return the answer in ni_vp, locked if LOCKLEAF set
+ *         if LOCKPARENT set, return locked parent in ni_dvp
+ *         if WANTPARENT set, return unlocked parent in ni_dvp
+ */
+int
+lookup(ndp)
+       register struct nameidata *ndp;
+{
+       register char *cp;              /* pointer into pathname argument */
+       register struct vnode *dp = 0;  /* the directory we are searching */
+       struct vnode *tdp;              /* saved dp */
+       struct mount *mp;               /* mount table entry */
+       int docache;                    /* == 0 do not cache last component */
+       int wantparent;                 /* 1 => wantparent or lockparent flag */
+       int rdonly;                     /* lookup read-only flag bit */
+       int error = 0;
+       struct componentname *cnp = &ndp->ni_cnd;
+       struct proc *p = cnp->cn_proc;
+
        /*
        /*
-        * Check that directory length properly reflects presence
-        * of this entry.
+        * Setup: break out flag bits into variables.
         */
         */
-       if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
-               dirbad(dp, ndp->ni_offset, "i_size too small");
-               dp->i_size = entryoffsetinblock + DIRSIZ(ep);
-               dp->i_flag |= IUPD|ICHG;
-       }
+       wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+       docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+       if (cnp->cn_nameiop == DELETE ||
+           (wantparent && cnp->cn_nameiop != CREATE))
+               docache = 0;
+       rdonly = cnp->cn_flags & RDONLY;
+       ndp->ni_dvp = NULL;
+       cnp->cn_flags &= ~ISSYMLINK;
+       dp = ndp->ni_startdir;
+       ndp->ni_startdir = NULLVP;
+       vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
 
 
+dirloop:
        /*
        /*
-        * Found component in pathname.
-        * If the final component of path name, save information
-        * in the cache as to where the entry was found.
+        * Search a new directory.
+        *
+        * The cn_hash value is for use by vfs_cache.
+        * The last component of the filename is left accessible via
+        * cnp->cn_nameptr for callers that need the name. Callers needing
+        * the name set the SAVENAME flag. When done, they assume
+        * responsibility for freeing the pathname buffer.
         */
         */
-       if (*cp == '\0' && flag == LOOKUP) {
-               u.u_ncache.nc_prevoffset = ndp->ni_offset;
-               u.u_ncache.nc_inumber = dp->i_number;
-               u.u_ncache.nc_dev = dp->i_dev;
-               u.u_ncache.nc_time = time.tv_sec;
+       cnp->cn_consume = 0;
+       cnp->cn_hash = 0;
+       for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+               cnp->cn_hash += (unsigned char)*cp;
+       cnp->cn_namelen = cp - cnp->cn_nameptr;
+       if (cnp->cn_namelen > NAME_MAX) {
+               error = ENAMETOOLONG;
+               goto bad;
        }
        }
-       /*
-        * Save directory entry in ndp->ni_dent,
-        * and release directory buffer.
-        */
-       bcopy((caddr_t)ep, (caddr_t)&ndp->ni_dent, (u_int)DIRSIZ(ep));
-       brelse(bp);
-       bp = NULL;
+#ifdef NAMEI_DIAGNOSTIC
+       { char c = *cp;
+       *cp = '\0';
+       printf("{%s}: ", cnp->cn_nameptr);
+       *cp = c; }
+#endif
+       ndp->ni_pathlen -= cnp->cn_namelen;
+       ndp->ni_next = cp;
+       cnp->cn_flags |= MAKEENTRY;
+       if (*cp == '\0' && docache == 0)
+               cnp->cn_flags &= ~MAKEENTRY;
+       if (cnp->cn_namelen == 2 &&
+           cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+               cnp->cn_flags |= ISDOTDOT;
+       else
+               cnp->cn_flags &= ~ISDOTDOT;
+       if (*ndp->ni_next == 0)
+               cnp->cn_flags |= ISLASTCN;
+       else
+               cnp->cn_flags &= ~ISLASTCN;
+
 
        /*
 
        /*
-        * If deleting, and at end of pathname, return
-        * parameters which can be used to remove file.
-        * If the lockparent flag isn't set, we return only
-        * the directory (in ndp->ni_pdir), otherwise we go
-        * on and lock the inode, being careful with ".".
+        * Check for degenerate name (e.g. / or "")
+        * which is a way of talking about a directory,
+        * e.g. like "/." or ".".
         */
         */
-       if (flag == DELETE && *cp == 0) {
-               /*
-                * Write access to directory required to delete files.
-                */
-               if (access(dp, IWRITE))
+       if (cnp->cn_nameptr[0] == '\0') {
+               if (dp->v_type != VDIR) {
+                       error = ENOTDIR;
                        goto bad;
                        goto bad;
-               ndp->ni_pdir = dp;              /* for dirremove() */
-               /*
-                * Return pointer to current entry in ndp->ni_offset,
-                * and distance past previous entry (if there
-                * is a previous entry in this block) in ndp->ni_count.
-                * Save directory inode pointer in ndp->ni_pdir for dirremove().
-                */
-               if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
-                       ndp->ni_count = 0;
-               else
-                       ndp->ni_count = ndp->ni_offset - prevoff;
-               if (lockparent) {
-                       if (dp->i_number == ndp->ni_dent.d_ino)
-                               dp->i_count++;
-                       else {
-                               dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
-                               if (dp == NULL) {
-                                       iput(ndp->ni_pdir);
-                                       goto bad;
-                               }
-                               /*
-                                * If directory is "sticky", then user must own
-                                * the directory, or the file in it, else he
-                                * may not delete it (unless he's root). This
-                                * implements append-only directories.
-                                */
-                               if ((ndp->ni_pdir->i_mode & ISVTX) &&
-                                   u.u_uid != 0 &&
-                                   u.u_uid != ndp->ni_pdir->i_uid &&
-                                   dp->i_uid != u.u_uid) {
-                                       iput(ndp->ni_pdir);
-                                       u.u_error = EPERM;
-                                       goto bad;
-                               }
-                       }
                }
                }
-               nbp->av_forw = freenamebuf;
-               freenamebuf = nbp;
-               return (dp);
+               if (cnp->cn_nameiop != LOOKUP) {
+                       error = EISDIR;
+                       goto bad;
+               }
+               if (wantparent) {
+                       ndp->ni_dvp = dp;
+                       VREF(dp);
+               }
+               ndp->ni_vp = dp;
+               if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
+                       VOP_UNLOCK(dp, 0, p);
+               if (cnp->cn_flags & SAVESTART)
+                       panic("lookup: SAVESTART");
+               return (0);
        }
 
        /*
        }
 
        /*
-        * Special handling for ".." allowing chdir out of mounted
-        * file system: indirect .. in root inode to reevaluate
-        * in directory file system was mounted on.
+        * Handle "..": two special cases.
+        * 1. If at root directory (e.g. after chroot)
+        *    or at absolute root directory
+        *    then ignore it so can't get out.
+        * 2. If this vnode is the root of a mounted
+        *    filesystem, then replace it with the
+        *    vnode which was mounted on so we take the
+        *    .. in the other file system.
         */
         */
-       if (isdotdot) {
-               if (dp == u.u_rdir)
-                       ndp->ni_dent.d_ino = dp->i_number;
-               else if (ndp->ni_dent.d_ino == ROOTINO &&
-                  dp->i_number == ROOTINO) {
-                       for (i = 1; i < NMOUNT; i++)
-                       if (mount[i].m_bufp != NULL &&
-                          mount[i].m_dev == dp->i_dev) {
-                               iput(dp);
-                               dp = mount[i].m_inodp;
-                               ILOCK(dp);
-                               dp->i_count++;
-                               fs = dp->i_fs;
-                               cp -= 2;     /* back over .. */
-                               goto dirloop2;
+       if (cnp->cn_flags & ISDOTDOT) {
+               for (;;) {
+                       if (dp == ndp->ni_rootdir || dp == rootvnode) {
+                               ndp->ni_dvp = dp;
+                               ndp->ni_vp = dp;
+                               VREF(dp);
+                               goto nextname;
                        }
                        }
+                       if ((dp->v_flag & VROOT) == 0 ||
+                           (cnp->cn_flags & NOCROSSMOUNT))
+                               break;
+                       tdp = dp;
+                       dp = dp->v_mount->mnt_vnodecovered;
+                       vput(tdp);
+                       VREF(dp);
+                       vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
                }
        }
 
        /*
                }
        }
 
        /*
-        * If rewriting (rename), return the inode and the
-        * information required to rewrite the present directory
-        * Must get inode of directory entry to verify it's a
-        * regular file, or empty directory.  
+        * We now have a segment name to search for, and a directory to search.
         */
         */
-       if ((flag == CREATE && lockparent) && *cp == 0) {
-               if (access(dp, IWRITE))
+unionlookup:
+       ndp->ni_dvp = dp;
+       ndp->ni_vp = NULL;
+       if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) {
+#ifdef DIAGNOSTIC
+               if (ndp->ni_vp != NULL)
+                       panic("leaf should be empty");
+#endif
+#ifdef NAMEI_DIAGNOSTIC
+               printf("not found\n");
+#endif
+               if ((error == ENOENT) &&
+                   (dp->v_flag & VROOT) &&
+                   (dp->v_mount->mnt_flag & MNT_UNION)) {
+                       tdp = dp;
+                       dp = dp->v_mount->mnt_vnodecovered;
+                       vput(tdp);
+                       VREF(dp);
+                       vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
+                       goto unionlookup;
+               }
+
+               if (error != EJUSTRETURN)
                        goto bad;
                        goto bad;
-               ndp->ni_pdir = dp;              /* for dirrewrite() */
                /*
                /*
-                * Careful about locking second inode. 
-                * This can only occur if the target is ".". 
+                * If creating and at end of pathname, then can consider
+                * allowing file to be created.
                 */
                 */
-               if (dp->i_number == ndp->ni_dent.d_ino) {
-                       u.u_error = EISDIR;             /* XXX */
+               if (rdonly) {
+                       error = EROFS;
                        goto bad;
                }
                        goto bad;
                }
-               dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
-               if (dp == NULL) {
-                       iput(ndp->ni_pdir);
-                       goto bad;
+               /*
+                * We return with ni_vp NULL to indicate that the entry
+                * doesn't currently exist, leaving a pointer to the
+                * (possibly locked) directory inode in ndp->ni_dvp.
+                */
+               if (cnp->cn_flags & SAVESTART) {
+                       ndp->ni_startdir = ndp->ni_dvp;
+                       VREF(ndp->ni_startdir);
                }
                }
-               nbp->av_forw = freenamebuf;
-               freenamebuf = nbp;
-               return (dp);
+               return (0);
        }
        }
+#ifdef NAMEI_DIAGNOSTIC
+       printf("found\n");
+#endif
 
        /*
 
        /*
-        * Check for symbolic link, which may require us to massage the
-        * name before we continue translation.  We do not `iput' the
-        * directory because we may need it again if the symbolic link
-        * is relative to the current directory.  Instead we save it
-        * unlocked as "pdp".  We must get the target inode before unlocking
-        * the directory to insure that the inode will not be removed
-        * before we get it.  We prevent deadlock by always fetching
-        * inodes from the root, moving down the directory tree. Thus
-        * when following backward pointers ".." we must unlock the
-        * parent directory before getting the requested directory.
-        * There is a potential race condition here if both the current
-        * and parent directories are removed before the `iget' for the
-        * inode associated with ".." returns.  We hope that this occurs
-        * infrequently since we cannot avoid this race condition without
-        * implementing a sophisticated deadlock detection algorithm.
-        * Note also that this simple deadlock detection scheme will not
-        * work if the file system has any hard links other than ".."
-        * that point backwards in the directory structure.
+        * Take into account any additional components consumed by
+        * the underlying filesystem.
         */
         */
-       pdp = dp;
-       if (isdotdot) {
-               IUNLOCK(pdp);   /* race to get the inode */
-               dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
-               if (dp == NULL)
-                       goto bad2;
-       } else if (dp->i_number == ndp->ni_dent.d_ino) {
-               dp->i_count++;  /* we want ourself, ie "." */
-       } else {
-               dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
-               IUNLOCK(pdp);
-               if (dp == NULL)
-                       goto bad2;
+       if (cnp->cn_consume > 0) {
+               cnp->cn_nameptr += cnp->cn_consume;
+               ndp->ni_next += cnp->cn_consume;
+               ndp->ni_pathlen -= cnp->cn_consume;
+               cnp->cn_consume = 0;
        }
 
        }
 
+       dp = ndp->ni_vp;
        /*
        /*
-        * insert name into cache (if we want it, and it isn't "." or "..")
-        *
-        * all other cases where making a cache entry would be wrong
-        * have already departed from the code sequence somewhere above.
+        * Check to see if the vnode has been mounted on;
+        * if so find the root of the mounted file system.
         */
         */
-       if (docache) {
-               if (ncp != NULL)
-                       panic("nami: duplicating cache");
-
-                       /*
-                        * free the cache slot at head of lru chain
-                        */
-               if (ncp = nchhead) {
-                               /* remove from lru chain */
-                       *ncp->nc_prev = ncp->nc_nxt;
-                       if (ncp->nc_nxt)
-                               ncp->nc_nxt->nc_prev = ncp->nc_prev;
-                       else
-                               nchtail = ncp->nc_prev;
-
-                               /* remove from old hash chain */
-                       remque(ncp);
-
-                               /* grab the inode we just found */
-                       ncp->nc_ip = dp;
-
-                               /* fill in cache info */
-                       ncp->nc_ino = pdp->i_number;    /* parents inum */
-                       ncp->nc_dev = pdp->i_dev;       /* & device */
-                       ncp->nc_idev = dp->i_dev;       /* our device */
-                       ncp->nc_id = dp->i_id;          /* identifier */
-                       ncp->nc_nlen = ndp->ni_dent.d_namlen;
-                       bcopy(ndp->ni_dent.d_name, ncp->nc_name, ncp->nc_nlen);
-
-                               /* link at end of lru chain */
-                       ncp->nc_nxt = NULL;
-                       ncp->nc_prev = nchtail;
-                       *nchtail = ncp;
-                       nchtail = &ncp->nc_nxt;
-
-                               /* and insert on hash chain */
-                       insque(ncp, nhp);
-               }
+       while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
+              (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+               if (vfs_busy(mp, 0, 0, p))
+                       continue;
+               error = VFS_ROOT(mp, &tdp);
+               vfs_unbusy(mp, p);
+               if (error)
+                       goto bad2;
+               vput(dp);
+               ndp->ni_vp = dp = tdp;
        }
 
        }
 
-haveino:
-       fs = dp->i_fs;
-
        /*
         * Check for symbolic link
         */
        /*
         * Check for symbolic link
         */
-       if ((dp->i_mode & IFMT) == IFLNK &&
-           ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
-               u_int pathlen = strlen(cp) + 1;
-
-               if (dp->i_size + pathlen >= MAXPATHLEN - 1 ||
-                   ++nlink > MAXSYMLINKS) {
-                       u.u_error = ELOOP;
-                       goto bad2;
-               }
-               ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen);
-               u.u_error =
-                   rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size,
-                       0, 1, (int *)0);
-               if (u.u_error)
-                       goto bad2;
-               cp = nbp->b_un.b_addr;
-               iput(dp);
-               if (*cp == '/') {
-                       irele(pdp);
-                       while (*cp == '/')
-                               cp++;
-                       if ((dp = u.u_rdir) == NULL)
-                               dp = rootdir;
-                       ILOCK(dp);
-                       dp->i_count++;
-               } else {
-                       dp = pdp;
-                       ILOCK(dp);
-               }
-               fs = dp->i_fs;
-               goto dirloop;
+       if ((dp->v_type == VLNK) &&
+           ((cnp->cn_flags & FOLLOW) || *ndp->ni_next == '/')) {
+               cnp->cn_flags |= ISSYMLINK;
+               return (0);
        }
 
        }
 
+nextname:
        /*
         * Not a symbolic link.  If more pathname,
         * continue at next component, else return.
         */
        /*
         * Not a symbolic link.  If more pathname,
         * continue at next component, else return.
         */
-       if (*cp == '/') {
-               while (*cp == '/')
-                       cp++;
-               irele(pdp);
+       if (*ndp->ni_next == '/') {
+               cnp->cn_nameptr = ndp->ni_next;
+               while (*cnp->cn_nameptr == '/') {
+                       cnp->cn_nameptr++;
+                       ndp->ni_pathlen--;
+               }
+               vrele(ndp->ni_dvp);
                goto dirloop;
        }
                goto dirloop;
        }
-       nbp->av_forw = freenamebuf;
-       freenamebuf = nbp;
-       if (lockparent)
-               ndp->ni_pdir = pdp;
-       else
-               irele(pdp);
-       return (dp);
+       /*
+        * Disallow directory write attempts on read-only file systems.
+        */
+       if (rdonly &&
+           (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+               error = EROFS;
+               goto bad2;
+       }
+       if (cnp->cn_flags & SAVESTART) {
+               ndp->ni_startdir = ndp->ni_dvp;
+               VREF(ndp->ni_startdir);
+       }
+       if (!wantparent)
+               vrele(ndp->ni_dvp);
+       if ((cnp->cn_flags & LOCKLEAF) == 0)
+               VOP_UNLOCK(dp, 0, p);
+       return (0);
+
 bad2:
 bad2:
-       irele(pdp);
+       if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0')
+               VOP_UNLOCK(ndp->ni_dvp, 0, p);
+       vrele(ndp->ni_dvp);
 bad:
 bad:
-       if (bp)
-               brelse(bp);
-       if (dp)
-               iput(dp);
-       nbp->av_forw = freenamebuf;
-       freenamebuf = nbp;
-       return (NULL);
-}
-
-
-dirbad(ip, offset, how)
-       struct inode *ip;
-       off_t offset;
-       char *how;
-{
-
-       printf("%s: bad dir ino %d at offset %d: %s\n",
-           ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
-}
-
-/*
- * Do consistency checking on a directory entry:
- *     record length must be multiple of 4
- *     record length must not be non-negative
- *     entry must fit in rest of its DIRBLKSIZ block
- *     record must be large enough to contain entry
- *     name is not longer than MAXNAMLEN
- *     name must be as long as advertised, and null terminated
- */
-dirbadentry(ep, entryoffsetinblock)
-       register struct direct *ep;
-       int entryoffsetinblock;
-{
-       register int i;
-
-       if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen <= 0 ||
-           ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
-           ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
-               return (1);
-       for (i = 0; i < ep->d_namlen; i++)
-               if (ep->d_name[i] == '\0')
-                       return (1);
-       return (ep->d_name[i]);
+       vput(dp);
+       ndp->ni_vp = NULL;
+       return (error);
 }
 
 /*
 }
 
 /*
- * Write a directory entry after a call to namei, using the parameters
- * which it left in the u. area.  The argument ip is the inode which
- * the new directory entry will refer to.  The u. area field ndp->ni_pdir is
- * a pointer to the directory to be written, which was left locked by
- * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
- * how the space for the new entry is to be gotten.
+ * relookup - lookup a path name component
+ *    Used by lookup to re-aquire things.
  */
  */
-direnter(ip, ndp)
-       struct inode *ip;
-       register struct nameidata *ndp;
+int
+relookup(dvp, vpp, cnp)
+       struct vnode *dvp, **vpp;
+       struct componentname *cnp;
 {
 {
-       register struct direct *ep, *nep;
-       struct buf *bp;
-       int loc, spacefree, error = 0;
-       u_int dsize;
-       int newentrysize;
-       char *dirbuf;
-
-       ndp->ni_dent.d_ino = ip->i_number;
-       newentrysize = DIRSIZ(&ndp->ni_dent);
-       if (ndp->ni_count == 0) {
-               /*
-                * If ndp->ni_count is 0, then namei could find no space in the
-                * directory. In this case ndp->ni_offset will be on a directory
-                * block boundary and we will write the new entry into a fresh
-                * block.
-                */
-               if (ndp->ni_offset&(DIRBLKSIZ-1))
-                       panic("wdir: newblk");
-               ndp->ni_dent.d_reclen = DIRBLKSIZ;
-               error = rdwri(UIO_WRITE, ndp->ni_pdir, (caddr_t)&ndp->ni_dent,
-                   newentrysize, ndp->ni_offset, 1, (int *)0);
-               iput(ndp->ni_pdir);
-               return (error);
-       }
-
-       /*
-        * If ndp->ni_count is non-zero, then namei found space for the new
-        * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
-        * in the directory.  To use this space, we may have to compact
-        * the entries located there, by copying them together towards
-        * the beginning of the block, leaving the free space in
-        * one usable chunk at the end.
-        */
+       struct proc *p = cnp->cn_proc;
+       struct vnode *dp = 0;           /* the directory we are searching */
+       int docache;                    /* == 0 do not cache last component */
+       int wantparent;                 /* 1 => wantparent or lockparent flag */
+       int rdonly;                     /* lookup read-only flag bit */
+       int error = 0;
+#ifdef NAMEI_DIAGNOSTIC
+       int newhash;                    /* DEBUG: check name hash */
+       char *cp;                       /* DEBUG: check name ptr/len */
+#endif
 
        /*
 
        /*
-        * Increase size of directory if entry eats into new space.
-        * This should never push the size past a new multiple of
-        * DIRBLKSIZE.
+        * Setup: break out flag bits into variables.
         */
         */
-       if (ndp->ni_offset + ndp->ni_count > ndp->ni_pdir->i_size)
-               ndp->ni_pdir->i_size = ndp->ni_offset + ndp->ni_count;
+       wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
+       docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+       if (cnp->cn_nameiop == DELETE ||
+           (wantparent && cnp->cn_nameiop != CREATE))
+               docache = 0;
+       rdonly = cnp->cn_flags & RDONLY;
+       cnp->cn_flags &= ~ISSYMLINK;
+       dp = dvp;
+       vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
 
 
+/* dirloop: */
        /*
        /*
-        * Get the block containing the space for the new directory
-        * entry.  Should return error by result instead of u.u_error.
+        * Search a new directory.
+        *
+        * The cn_hash value is for use by vfs_cache.
+        * The last component of the filename is left accessible via
+        * cnp->cn_nameptr for callers that need the name. Callers needing
+        * the name set the SAVENAME flag. When done, they assume
+        * responsibility for freeing the pathname buffer.
         */
         */
-       bp = blkatoff(ndp->ni_pdir, ndp->ni_offset, (char **)&dirbuf);
-       if (bp == 0) {
-               iput(ndp->ni_pdir);
-               return (u.u_error);
-       }
+#ifdef NAMEI_DIAGNOSTIC
+       for (newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+               newhash += (unsigned char)*cp;
+       if (newhash != cnp->cn_hash)
+               panic("relookup: bad hash");
+       if (cnp->cn_namelen != cp - cnp->cn_nameptr)
+               panic ("relookup: bad len");
+       if (*cp != 0)
+               panic("relookup: not last component");
+       printf("{%s}: ", cnp->cn_nameptr);
+#endif
 
        /*
 
        /*
-        * Find space for the new entry.  In the simple case, the
-        * entry at offset base will have the space.  If it does
-        * not, then namei arranged that compacting the region
-        * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
+        * Check for degenerate name (e.g. / or "")
+        * which is a way of talking about a directory,
+        * e.g. like "/." or ".".
         */
         */
-       ep = (struct direct *)dirbuf;
-       dsize = DIRSIZ(ep);
-       spacefree = ep->d_reclen - dsize;
-       for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
-               nep = (struct direct *)(dirbuf + loc);
-               if (ep->d_ino) {
-                       /* trim the existing slot */
-                       ep->d_reclen = dsize;
-                       ep = (struct direct *)((char *)ep + dsize);
-               } else {
-                       /* overwrite; nothing there; header is ours */
-                       spacefree += dsize;     
+       if (cnp->cn_nameptr[0] == '\0') {
+               if (cnp->cn_nameiop != LOOKUP || wantparent) {
+                       error = EISDIR;
+                       goto bad;
                }
                }
-               dsize = DIRSIZ(nep);
-               spacefree += nep->d_reclen - dsize;
-               loc += nep->d_reclen;
-               bcopy((caddr_t)nep, (caddr_t)ep, dsize);
-       }
-       /*
-        * Update the pointer fields in the previous entry (if any),
-        * copy in the new entry, and write out the block.
-        */
-       if (ep->d_ino == 0) {
-               if (spacefree + dsize < newentrysize)
-                       panic("wdir: compact1");
-               ndp->ni_dent.d_reclen = spacefree + dsize;
-       } else {
-               if (spacefree < newentrysize)
-                       panic("wdir: compact2");
-               ndp->ni_dent.d_reclen = spacefree;
-               ep->d_reclen = dsize;
-               ep = (struct direct *)((char *)ep + dsize);
-       }
-       bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
-       bwrite(bp);
-       ndp->ni_pdir->i_flag |= IUPD|ICHG;
-       iput(ndp->ni_pdir);
-       return (error);
-}
-
-/*
- * Remove a directory entry after a call to namei, using the
- * parameters which it left in the u. area.  The u. entry
- * ni_offset contains the offset into the directory of the
- * entry to be eliminated.  The ni_count field contains the
- * size of the previous record in the directory.  If this
- * is 0, the first entry is being deleted, so we need only
- * zero the inode number to mark the entry as free.  If the
- * entry isn't the first in the directory, we must reclaim
- * the space of the now empty record by adding the record size
- * to the size of the previous entry.
- */
-dirremove(ndp)
-       register struct nameidata *ndp;
-{
-       register struct inode *dp = ndp->ni_pdir;
-       register struct buf *bp;
-       struct direct *ep;
-
-       if (ndp->ni_count == 0) {
-               /*
-                * First entry in block: set d_ino to zero.
-                */
-               ndp->ni_dent.d_ino = 0;
-               (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
-                   (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
-       } else {
-               /*
-                * Collapse new free space into previous entry.
-                */
-               bp = blkatoff(dp, (int)(ndp->ni_offset - ndp->ni_count),
-                       (char **)&ep);
-               if (bp == 0)
-                       return (0);
-               ep->d_reclen += ndp->ni_dent.d_reclen;
-               bwrite(bp);
-               dp->i_flag |= IUPD|ICHG;
-       }
-       return (1);
-}
-
-/*
- * Rewrite an existing directory entry to point at the inode
- * supplied.  The parameters describing the directory entry are
- * set up by a call to namei.
- */
-dirrewrite(dp, ip, ndp)
-       struct inode *dp, *ip;
-       struct nameidata *ndp;
-{
-
-       ndp->ni_dent.d_ino = ip->i_number;
-       u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
-               (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
-       iput(dp);
-}
-
-/*
- * Return buffer with contents of block "offset"
- * from the beginning of directory "ip".  If "res"
- * is non-zero, fill it in with a pointer to the
- * remaining space in the directory.
- */
-struct buf *
-blkatoff(ip, offset, res)
-       struct inode *ip;
-       off_t offset;
-       char **res;
-{
-       register struct fs *fs = ip->i_fs;
-       daddr_t lbn = lblkno(fs, offset);
-       int base = blkoff(fs, offset);
-       int bsize = blksize(fs, ip, lbn);
-       daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize));
-       register struct buf *bp;
-
-       if (u.u_error)
-               return (0);
-       bp = bread(ip->i_dev, bn, bsize);
-       if (bp->b_flags & B_ERROR) {
-               brelse(bp);
+               if (dp->v_type != VDIR) {
+                       error = ENOTDIR;
+                       goto bad;
+               }
+               if (!(cnp->cn_flags & LOCKLEAF))
+                       VOP_UNLOCK(dp, 0, p);
+               *vpp = dp;
+               if (cnp->cn_flags & SAVESTART)
+                       panic("lookup: SAVESTART");
                return (0);
        }
                return (0);
        }
-       if (res)
-               *res = bp->b_un.b_addr + base;
-       return (bp);
-}
 
 
-/*
- * Check if a directory is empty or not.
- * Inode supplied must be locked.
- *
- * Using a struct dirtemplate here is not precisely
- * what we want, but better than using a struct direct.
- *
- * NB: does not handle corrupted directories.
- */
-dirempty(ip)
-       register struct inode *ip;
-{
-       register off_t off;
-       struct dirtemplate dbuf;
-       register struct direct *dp = (struct direct *)&dbuf;
-       int error, count;
-#define        MINDIRSIZ (sizeof (struct dirtemplate) / 2)
+       if (cnp->cn_flags & ISDOTDOT)
+               panic ("relookup: lookup on dot-dot");
 
 
-       for (off = 0; off < ip->i_size; off += dp->d_reclen) {
-               error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
-                   off, 1, &count);
+       /*
+        * We now have a segment name to search for, and a directory to search.
+        */
+       if (error = VOP_LOOKUP(dp, vpp, cnp)) {
+#ifdef DIAGNOSTIC
+               if (*vpp != NULL)
+                       panic("leaf should be empty");
+#endif
+               if (error != EJUSTRETURN)
+                       goto bad;
                /*
                /*
-                * Since we read MINDIRSIZ, residual must
-                * be 0 unless we're at end of file.
+                * If creating and at end of pathname, then can consider
+                * allowing file to be created.
                 */
                 */
-               if (error || count != 0)
-                       return (0);
-               /* skip empty entries */
-               if (dp->d_ino == 0)
-                       continue;
-               /* accept only "." and ".." */
-               if (dp->d_namlen > 2)
-                       return (0);
-               if (dp->d_name[0] != '.')
-                       return (0);
+               if (rdonly) {
+                       error = EROFS;
+                       goto bad;
+               }
+               /* ASSERT(dvp == ndp->ni_startdir) */
+               if (cnp->cn_flags & SAVESTART)
+                       VREF(dvp);
                /*
                /*
-                * At this point d_namlen must be 1 or 2.
-                * 1 implies ".", 2 implies ".." if second
-                * char is also "."
+                * We return with ni_vp NULL to indicate that the entry
+                * doesn't currently exist, leaving a pointer to the
+                * (possibly locked) directory inode in ndp->ni_dvp.
                 */
                 */
-               if (dp->d_namlen == 1 || dp->d_name[1] == '.')
-                       continue;
                return (0);
        }
                return (0);
        }
-       return (1);
-}
+       dp = *vpp;
 
 
-/*
- * Check if source directory is in the path of the target directory.
- * Target is supplied locked, source is unlocked.
- * The target is always iput() before returning.
- */
-checkpath(source, target)
-       struct inode *source, *target;
-{
-       struct dirtemplate dirbuf;
-       register struct inode *ip;
-       int error = 0;
-
-       ip = target;
-       if (ip->i_number == source->i_number) {
-               error = EEXIST;
-               goto out;
-       }
-       if (ip->i_number == ROOTINO)
-               goto out;
+#ifdef DIAGNOSTIC
+       /*
+        * Check for symbolic link
+        */
+       if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
+               panic ("relookup: symlink found.\n");
+#endif
 
 
-       for (;;) {
-               if ((ip->i_mode&IFMT) != IFDIR) {
-                       error = ENOTDIR;
-                       break;
-               }
-               error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
-                       sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
-               if (error != 0)
-                       break;
-               if (dirbuf.dotdot_namlen != 2 ||
-                   dirbuf.dotdot_name[0] != '.' ||
-                   dirbuf.dotdot_name[1] != '.') {
-                       error = ENOTDIR;
-                       break;
-               }
-               if (dirbuf.dotdot_ino == source->i_number) {
-                       error = EINVAL;
-                       break;
-               }
-               if (dirbuf.dotdot_ino == ROOTINO)
-                       break;
-               iput(ip);
-               ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
-               if (ip == NULL) {
-                       error = u.u_error;
-                       break;
-               }
+       /*
+        * Disallow directory write attempts on read-only file systems.
+        */
+       if (rdonly &&
+           (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+               error = EROFS;
+               goto bad2;
        }
        }
+       /* ASSERT(dvp == ndp->ni_startdir) */
+       if (cnp->cn_flags & SAVESTART)
+               VREF(dvp);
+       
+       if (!wantparent)
+               vrele(dvp);
+       if ((cnp->cn_flags & LOCKLEAF) == 0)
+               VOP_UNLOCK(dp, 0, p);
+       return (0);
 
 
-out:
-       if (error == ENOTDIR)
-               printf("checkpath: .. not a directory\n");
-       if (ip != NULL)
-               iput(ip);
+bad2:
+       if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
+               VOP_UNLOCK(dvp, 0, p);
+       vrele(dvp);
+bad:
+       vput(dp);
+       *vpp = NULL;
        return (error);
 }
        return (error);
 }
-
-/*
- * Name cache initialization, from main() when we are booting
- */
-nchinit()
-{
-       register union nchash *nchp;
-       register struct nch *ncp;
-
-       nchhead = 0;
-       nchtail = &nchhead;
-
-       for (ncp = nch; ncp < &nch[nchsize]; ncp++) {
-               ncp->nc_forw = ncp;                     /* hash chain */
-               ncp->nc_back = ncp;
-
-               ncp->nc_nxt = NULL;                     /* lru chain */
-               *nchtail = ncp;
-               ncp->nc_prev = nchtail;
-               nchtail = &ncp->nc_nxt;
-
-               /* all else is zero already */
-       }
-
-       for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
-               nchp->nch_head[0] = nchp;
-               nchp->nch_head[1] = nchp;
-       }
-}
-
-/*
- * Cache flush, called when filesys is umounted to
- * remove entries that would now be invalid
- *
- * The line "nxtcp = nchhead" near the end is to avoid potential problems
- * if the cache lru chain is modified while we are dumping the
- * inode.  This makes the algorithm O(n^2), but do you think I care?
- */
-nchinval(dev)
-       register dev_t dev;
-{
-       register struct nch *ncp, *nxtcp;
-
-       for (ncp = nchhead; ncp; ncp = nxtcp) {
-               nxtcp = ncp->nc_nxt;
-
-               if (ncp->nc_ip == NULL ||
-                   (ncp->nc_idev != dev && ncp->nc_dev != dev))
-                       continue;
-
-                       /* free the resources we had */
-               ncp->nc_idev = NODEV;
-               ncp->nc_dev = NODEV;
-               ncp->nc_id = NULL;
-               ncp->nc_ino = 0;
-               ncp->nc_ip = NULL;
-
-
-                       /* remove the entry from its hash chain */
-               remque(ncp);
-                       /* and make a dummy one */
-               ncp->nc_forw = ncp;
-               ncp->nc_back = ncp;
-
-                       /* delete this entry from LRU chain */
-               *ncp->nc_prev = nxtcp;
-               if (nxtcp)
-                       nxtcp->nc_prev = ncp->nc_prev;
-               else
-                       nchtail = ncp->nc_prev;
-
-                       /* cause rescan of list, it may have altered */
-               nxtcp = nchhead;
-                       /* put the now-free entry at head of LRU */
-               ncp->nc_nxt = nxtcp;
-               ncp->nc_prev = &nchhead;
-               nxtcp->nc_prev = &ncp->nc_nxt;
-               nchhead = ncp;
-       }
-}