X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/f2a5ad78e1ece492aea02f3096e9d49416fd29c4..cc1730771008ddc8f1bccc5ed3aa8a987856137e:/usr/src/sys/ufs/ffs/ufs_lookup.c diff --git a/usr/src/sys/ufs/ffs/ufs_lookup.c b/usr/src/sys/ufs/ffs/ufs_lookup.c index 015dc16e76..124a0ca411 100644 --- a/usr/src/sys/ufs/ffs/ufs_lookup.c +++ b/usr/src/sys/ufs/ffs/ufs_lookup.c @@ -1,170 +1,164 @@ -/* ufs_lookup.c 4.35 83/02/10 */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/inode.h" -#include "../h/fs.h" -#include "../h/mount.h" -#include "../h/dir.h" -#include "../h/user.h" -#include "../h/buf.h" -#include "../h/conf.h" -#include "../h/uio.h" -#include "../h/nami.h" - -struct buf *blkatoff(); -int dirchk = 0; /* - * Convert a pathname into a pointer to a locked inode, - * with side effects usable in creating and removing files. - * This is a very central and rather complicated routine. + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. * - * The func argument gives the routine which returns successive - * characters of the name to be translated. + * %sccs.include.redist.c% * - * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether - * the name is to be (looked up, created, deleted). If flag has - * LOCKPARENT or'ed into it and the target of the pathname exists, - * namei returns both the target and its parent directory locked. + * @(#)ufs_lookup.c 7.36 (Berkeley) %G% + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct nchstats nchstats; +#ifdef DIAGNOSTIC +int dirchk = 1; +#else +int dirchk = 0; +#endif + +/* + * Convert a component of a pathname into a pointer to a locked inode. + * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, - * this can result in a deadlock situation. When creating and - * LOCKPARENT is specified, the target may not be ".". When deleting - * and LOCKPARENT is specified, the target may be ".", but the caller - * must check to insure it does an irele and iput instead of two iputs. + * this can result in a deadlock situation (see comments in code below). * - * The follow argument is 1 when symbolic links are to be followed - * when they occur at the end of the name translation process. + * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on + * whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it and the target of the pathname + * exists, lookup returns both the target and its parent directory locked. + * When creating or renaming and LOCKPARENT is specified, the target may + * not be ".". When deleting and LOCKPARENT is specified, the target may + * be "."., but the caller must check to ensure it does an vrele and iput + * instead of two iputs. * - * Overall outline: + * Overall outline of ufs_lookup: * - * copy in name - * get starting directory - * dirloop: * check accessibility of directory - * dirloop2: - * copy next component of name to u.u_dent - * handle degenerate case where name is null string + * look for name in cache, if found, then if at end of path + * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: - * if creating, return locked directory, leaving info on avail. slots + * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete - * if at end of path and rewriting (create and LOCKPARENT), lock targe + * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite - * if .. and on mounted filesys, look in mount table for parent - * if symbolic link, massage name in buffer and continue at dirloop - * if more components of name, do next level at dirloop - * return the answer as locked inode + * if not at end, add name to cache; if at end and neither creating + * nor deleting, add name to cache * - * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, - * but unlocked. + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. */ -struct inode * -namei(func, flag, follow) - int (*func)(), flag, follow; +int +ufs_lookup(vdp, ndp, p) + register struct vnode *vdp; + register struct nameidata *ndp; + struct proc *p; { - register char *cp; /* pointer into pathname argument */ -/* these variables refer to things which must be freed or unlocked */ - register struct inode *dp = 0; /* the directory we are searching */ - register struct fs *fs; /* file system that directory is in */ - register struct buf *bp = 0; /* a buffer of directory entries */ + register struct inode *dp; /* the directory we are searching */ + struct buf *bp; /* a buffer of directory entries */ register struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ - register struct buf *nbp; /* buffer storing path name argument */ -/* these variables hold information about the search for a slot */ enum {NONE, COMPACT, FOUND} slotstatus; - int slotoffset = -1; /* offset of area with free space */ + int slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ -/* */ - int dirsize; - int prevoff; /* u.u_offset of previous entry */ - int nlink = 0; /* number of symbolic links taken */ + int numdirpasses; /* strategy for directory search */ + int endsearch; /* offset to end directory search */ + int prevoff; /* prev entry ndp->ni_ufs.ufs_offset */ struct inode *pdp; /* saved dp during symlink work */ - int i; - int lockparent; + struct vnode *tdp; /* returned by VOP_VGET */ + off_t enduseful; /* pointer past last used dir slot */ + u_long bmask; /* block offset mask */ + int flag; /* LOOKUP, CREATE, RENAME, or DELETE */ + int lockparent; /* 1 => lockparent flag is set */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int error; - lockparent = flag & LOCKPARENT; - flag &= ~LOCKPARENT; - /* - * Get a buffer for the name to be translated, and copy the - * name into the buffer. - */ - nbp = geteblk(MAXPATHLEN); - for (cp = nbp->b_un.b_addr; *cp = (*func)(); ) { - if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != 2) { - u.u_error = EPERM; - goto bad; - } - cp++; - if (cp >= nbp->b_un.b_addr + MAXPATHLEN) { - u.u_error = ENOENT; - goto bad; - } - } - if (u.u_error) - goto bad; - - /* - * Get starting directory. - */ - cp = nbp->b_un.b_addr; - if (*cp == '/') { - while (*cp == '/') - cp++; - if ((dp = u.u_rdir) == NULL) - dp = rootdir; - } else - dp = u.u_cdir; - fs = dp->i_fs; - ilock(dp); - dp->i_count++; - u.u_pdir = (struct inode *)0xc0000000; /* illegal */ + bp = NULL; + slotoffset = -1; + ndp->ni_dvp = vdp; + ndp->ni_vp = NULL; + dp = VTOI(vdp); + lockparent = ndp->ni_nameiop & LOCKPARENT; + flag = ndp->ni_nameiop & OPMASK; + wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT); - /* - * We come to dirloop to search a new directory. - * The directory must be locked so that it can be - * iput, and fs must be already set to dp->i_fs. - */ -dirloop: /* * Check accessiblity of directory. */ - if ((dp->i_mode&IFMT) != IFDIR) { - u.u_error = ENOTDIR; - goto bad; - } - if (access(dp, IEXEC)) - goto bad; + if ((dp->i_mode&IFMT) != IFDIR) + return (ENOTDIR); + if (error = ufs_access(vdp, VEXEC, ndp->ni_cred, p)) + return (error); -dirloop2: /* - * Copy next component of name to u.u_dent. + * We now have a segment name to search for, and a directory to search. + * + * Before tediously performing a linear scan of the directory, + * check the name cache to see if the directory/name pair + * we are looking for is known already. */ - for (i = 0; *cp != 0 && *cp != '/'; cp++) { - if (i >= MAXNAMLEN) { - u.u_error = ENOENT; - goto bad; + if (error = cache_lookup(ndp)) { + int vpid; /* capability number of vnode */ + + if (error == ENOENT) + return (error); +#ifdef PARANOID + if (vdp == ndp->ni_rdir && ndp->ni_isdotdot) + panic("ufs_lookup: .. through root"); +#endif + /* + * Get the next vnode in the path. + * See comment below starting `Step through' for + * an explaination of the locking protocol. + */ + pdp = dp; + dp = VTOI(ndp->ni_vp); + vdp = ndp->ni_vp; + vpid = vdp->v_id; + if (pdp == dp) { + VREF(vdp); + error = 0; + } else if (ndp->ni_isdotdot) { + IUNLOCK(pdp); + error = vget(vdp); + if (!error && lockparent && *ndp->ni_next == '\0') + ILOCK(pdp); + } else { + error = vget(vdp); + if (!lockparent || error || *ndp->ni_next != '\0') + IUNLOCK(pdp); } - u.u_dent.d_name[i++] = *cp; - } - u.u_dent.d_namlen = i; - u.u_dent.d_name[i] = 0; - - /* - * Check for degenerate name (e.g. / or "") - * which is a way of talking about a directory, - * e.g. like "/." or ".". - */ - if (u.u_dent.d_name[0] == 0) { - if (flag || lockparent) { - u.u_error = ENOENT; - goto bad; + /* + * Check that the capability number did not change + * while we were waiting for the lock. + */ + if (!error) { + if (vpid == vdp->v_id) + return (0); + ufs_iput(dp); + if (lockparent && pdp != dp && *ndp->ni_next == '\0') + IUNLOCK(pdp); } - brelse(nbp); - return (dp); + ILOCK(pdp); + dp = pdp; + vdp = ITOV(dp); + ndp->ni_vp = NULL; } /* @@ -174,59 +168,78 @@ dirloop2: * case it doesn't already exist. */ slotstatus = FOUND; - if (flag == CREATE && *cp == 0) { + if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) { slotstatus = NONE; slotfreespace = 0; - slotneeded = DIRSIZ(&u.u_dent); + slotneeded = ((sizeof (struct direct) - (MAXNAMLEN + 1)) + + ((ndp->ni_namelen + 1 + 3) &~ 3)); } - dirsize = roundup(dp->i_size, DIRBLKSIZ); - u.u_offset = 0; - while (u.u_offset < dirsize) { + /* + * If there is cached information on a previous search of + * this directory, pick up where we last left off. + * We cache only lookups as these are the most common + * and have the greatest payoff. Caching CREATE has little + * benefit as it usually must search the entire directory + * to determine that the entry does not exist. Caching the + * location of the last DELETE or RENAME has not reduced + * profiling time and hence has been removed in the interest + * of simplicity. + */ + bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { + ndp->ni_ufs.ufs_offset = 0; + numdirpasses = 1; + } else { + ndp->ni_ufs.ufs_offset = dp->i_diroff; + if ((entryoffsetinblock = ndp->ni_ufs.ufs_offset & bmask) && + (error = VOP_BLKATOFF(vdp, ndp->ni_ufs.ufs_offset, NULL, + &bp))) + return (error); + numdirpasses = 2; + nchstats.ncs_2passes++; + } + endsearch = roundup(dp->i_size, DIRBLKSIZ); + enduseful = 0; + +searchloop: + while (ndp->ni_ufs.ufs_offset < endsearch) { /* - * If offset is on a block boundary, - * read the next directory block. - * Release previous if it exists. + * If offset is on a block boundary, read the next directory + * block. Release previous if it exists. */ - if (blkoff(fs, u.u_offset) == 0) { + if ((ndp->ni_ufs.ufs_offset & bmask) == 0) { if (bp != NULL) brelse(bp); - bp = blkatoff(dp, u.u_offset, (char **)0); - if (bp == 0) - goto bad; + if (error = VOP_BLKATOFF(vdp, ndp->ni_ufs.ufs_offset, + NULL, &bp)) + return (error); entryoffsetinblock = 0; } - /* * If still looking for a slot, and at a DIRBLKSIZE - * boundary, have to start looking for free space - * again. + * boundary, have to start looking for free space again. */ if (slotstatus == NONE && - (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { + (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } - /* - * Get pointer to next entry, and do consistency checking: - * record length must be multiple of 4 - * record length must not be zero - * entry must fit in rest of this DIRBLKSIZ block - * record must be large enough to contain name - * When dirchk is set we also check: - * name is not longer than MAXNAMLEN - * name must be as long as advertised, and null terminated - * Checking last two conditions is done only when dirchk is - * set, to save time. + * Get pointer to next entry. + * Full validation checks are slow, so we only check + * enough to insure forward progress through the + * directory. Complete checks can be run by patching + * "dirchk" to be true. */ ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); - i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); - if ((ep->d_reclen & 0x3) || ep->d_reclen == 0 || - ep->d_reclen > i || DIRSIZ(ep) > ep->d_reclen || - dirchk && (ep->d_namlen > MAXNAMLEN || dirbadname(ep))) { - dirbad(dp, "mangled entry"); - u.u_offset += i; + if (ep->d_reclen == 0 || + dirchk && ufs_dirbadentry(ep, entryoffsetinblock)) { + int i; + + ufs_dirbad(dp, ndp->ni_ufs.ufs_offset, "mangled entry"); + i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); + ndp->ni_ufs.ufs_offset += i; entryoffsetinblock += i; continue; } @@ -245,17 +258,18 @@ dirloop2: if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; - slotoffset = u.u_offset; + slotoffset = ndp->ni_ufs.ufs_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) - slotoffset = u.u_offset; + slotoffset = + ndp->ni_ufs.ufs_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = - u.u_offset+ep->d_reclen - - slotoffset; + ndp->ni_ufs.ufs_offset + + ep->d_reclen - slotoffset; } } } @@ -265,339 +279,391 @@ dirloop2: * Check for a name match. */ if (ep->d_ino) { - if (ep->d_namlen == u.u_dent.d_namlen && - !bcmp(u.u_dent.d_name, ep->d_name, ep->d_namlen)) + if (ep->d_namlen == ndp->ni_namelen && + !bcmp(ndp->ni_ptr, ep->d_name, + (unsigned)ep->d_namlen)) { + /* + * Save directory entry's inode number and + * reclen in ndp->ni_ufs area, and release + * directory buffer. + */ + ndp->ni_ufs.ufs_ino = ep->d_ino; + ndp->ni_ufs.ufs_reclen = ep->d_reclen; + brelse(bp); goto found; + } } - prevoff = u.u_offset; - u.u_offset += ep->d_reclen; + prevoff = ndp->ni_ufs.ufs_offset; + ndp->ni_ufs.ufs_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; + if (ep->d_ino) + enduseful = ndp->ni_ufs.ufs_offset; } /* notfound: */ + /* + * If we started in the middle of the directory and failed + * to find our target, we must check the beginning as well. + */ + if (numdirpasses == 2) { + numdirpasses--; + ndp->ni_ufs.ufs_offset = 0; + endsearch = dp->i_diroff; + goto searchloop; + } + if (bp != NULL) + brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ - if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { + if ((flag == CREATE || flag == RENAME) && + *ndp->ni_next == 0 && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ - if (access(dp, IWRITE)) - goto bad; + if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p)) + return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, - * then set u.u_count to 0 indicating that the - * new slot belongs at the end of the directory. - * If we found a slot, then the new entry can be - * put in the range [u.u_offset..u.u_offset+u.u_count) + * then set ndp->ni_ufs.ufs_count to 0 indicating + * that the new slot belongs at the end of the + * directory. If we found a slot, then the new entry + * can be put in the range from ndp->ni_ufs.ufs_offset + * to ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count. */ - if (slotstatus == NONE) - u.u_count = 0; - else { - u.u_offset = slotoffset; - u.u_count = slotsize; + if (slotstatus == NONE) { + ndp->ni_ufs.ufs_offset = roundup(dp->i_size, DIRBLKSIZ); + ndp->ni_ufs.ufs_count = 0; + enduseful = ndp->ni_ufs.ufs_offset; + } else { + ndp->ni_ufs.ufs_offset = slotoffset; + ndp->ni_ufs.ufs_count = slotsize; + if (enduseful < slotoffset + slotsize) + enduseful = slotoffset + slotsize; } + ndp->ni_ufs.ufs_endoff = roundup(enduseful, DIRBLKSIZ); dp->i_flag |= IUPD|ICHG; - if (bp) - brelse(bp); - brelse(nbp); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). - * We return NULL to indicate that the entry doesn't - * currently exist, leaving a pointer to the (locked) - * directory inode in u.u_pdir. + * We return ni_vp == NULL to indicate that the entry + * does not currently exist; we leave a pointer to + * the (locked) directory inode in ndp->ni_dvp. + * The pathname buffer is saved so that the name + * can be obtained later. + * + * NB - if the directory is unlocked, then this + * information cannot be used. */ - u.u_pdir = dp; - return (NULL); + ndp->ni_nameiop |= SAVENAME; + if (!lockparent) + IUNLOCK(dp); } - u.u_error = ENOENT; - goto bad; + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if (ndp->ni_makeentry && flag != CREATE) + cache_enter(ndp); + return (ENOENT); + found: + if (numdirpasses == 2) + nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { - dirbad(dp, "i_size too small"); + ufs_dirbad(dp, ndp->ni_ufs.ufs_offset, "i_size too small"); dp->i_size = entryoffsetinblock + DIRSIZ(ep); dp->i_flag |= IUPD|ICHG; } /* - * Found component in pathname; save directory - * entry in u.u_dent, and release directory buffer. + * Found component in pathname. + * If the final component of path name, save information + * in the cache as to where the entry was found. */ - bcopy((caddr_t)ep, (caddr_t)&u.u_dent, (u_int)DIRSIZ(ep)); - brelse(bp); - bp = NULL; + if (*ndp->ni_next == '\0' && flag == LOOKUP) + dp->i_diroff = ndp->ni_ufs.ufs_offset &~ (DIRBLKSIZ - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. - * If the lockparent flag isn't set, we return only - * the directory (in u.u_pdir), otherwise we go + * If the wantparent flag isn't set, we return only + * the directory (in ndp->ni_dvp), otherwise we go * on and lock the inode, being careful with ".". */ - if (flag == DELETE && *cp == 0) { + if (flag == DELETE && *ndp->ni_next == 0) { /* * Write access to directory required to delete files. */ - if (access(dp, IWRITE)) - goto bad; - u.u_pdir = dp; /* for dirremove() */ + if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p)) + return (error); /* - * Return pointer to current entry in u.u_offset, + * Return pointer to current entry in ndp->ni_ufs.ufs_offset, * and distance past previous entry (if there - * is a previous entry in this block) in u.u_count. - * Save directory inode pointer in u.u_pdir for dirremove(). + * is a previous entry in this block) in ndp->ni_ufs.ufs_count. + * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ - if ((u.u_offset&(DIRBLKSIZ-1)) == 0) - u.u_count = 0; + if ((ndp->ni_ufs.ufs_offset&(DIRBLKSIZ-1)) == 0) + ndp->ni_ufs.ufs_count = 0; else - u.u_count = u.u_offset - prevoff; - if (lockparent) { - if (dp->i_number == u.u_dent.d_ino) - dp->i_count++; - else { - dp = iget(dp->i_dev, fs, u.u_dent.d_ino); - if (dp == NULL) { - iput(u.u_pdir); - goto bad; - } - } + ndp->ni_ufs.ufs_count = + ndp->ni_ufs.ufs_offset - prevoff; + if (dp->i_number == ndp->ni_ufs.ufs_ino) { + VREF(vdp); + ndp->ni_vp = vdp; + return (0); } - brelse(nbp); - return (dp); - } - - /* - * Special handling for ".." allowing chdir out of mounted - * file system: indirect .. in root inode to reevaluate - * in directory file system was mounted on. - */ - if (u.u_dent.d_name[0] == '.' && u.u_dent.d_name[1] == '.' && - u.u_dent.d_name[2] == '\0') { - if (dp == u.u_rdir) - u.u_dent.d_ino = dp->i_number; - else if (u.u_dent.d_ino == ROOTINO && - dp->i_number == ROOTINO) { - for (i = 1; i < NMOUNT; i++) - if (mount[i].m_bufp != NULL && - mount[i].m_dev == dp->i_dev) { - iput(dp); - dp = mount[i].m_inodp; - ilock(dp); - dp->i_count++; - fs = dp->i_fs; - cp -= 2; /* back over .. */ - goto dirloop2; - } + if (error = VOP_VGET(vdp, ndp->ni_ufs.ufs_ino, &tdp)) + return (error); + /* + * If directory is "sticky", then user must own + * the directory, or the file in it, else she + * may not delete it (unless she's root). This + * implements append-only directories. + */ + if ((dp->i_mode & ISVTX) && + ndp->ni_cred->cr_uid != 0 && + ndp->ni_cred->cr_uid != dp->i_uid && + VTOI(tdp)->i_uid != ndp->ni_cred->cr_uid) { + vput(tdp); + return (EPERM); } + ndp->ni_vp = tdp; + if (!lockparent) + IUNLOCK(dp); + return (0); } /* - * If rewriting (rename), return the inode and the + * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a - * regular file, or empty directory. + * regular file, or empty directory. */ - if ((flag == CREATE && lockparent) && *cp == 0) { - if (access(dp, IWRITE)) - goto bad; - u.u_pdir = dp; /* for dirrewrite() */ + if (flag == RENAME && wantparent && *ndp->ni_next == 0) { + if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p)) + return (error); /* - * Careful about locking second inode. - * This can only occur if the target is ".". + * Careful about locking second inode. + * This can only occur if the target is ".". */ - if (dp->i_number == u.u_dent.d_ino) { - u.u_error = EISDIR; /* XXX */ - goto bad; - } - dp = iget(dp->i_dev, fs, u.u_dent.d_ino); - if (dp == NULL) { - iput(u.u_pdir); - goto bad; - } - brelse(nbp); - return (dp); + if (dp->i_number == ndp->ni_ufs.ufs_ino) + return (EISDIR); + if (error = VOP_VGET(vdp, ndp->ni_ufs.ufs_ino, &tdp)) + return (error); + ndp->ni_vp = tdp; + ndp->ni_nameiop |= SAVENAME; + if (!lockparent) + IUNLOCK(dp); + return (0); } /* - * Check for symbolic link, which may require us - * to massage the name before we continue translation. - * To avoid deadlock have to unlock the current directory, - * but don't iput it because we may need it again (if - * the symbolic link is relative to .). Instead save - * it (unlocked) as pdp. + * Step through the translation in the name. We do not `iput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the `iget' for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. */ pdp = dp; - iunlock(pdp); - dp = iget(dp->i_dev, fs, u.u_dent.d_ino); - if (dp == NULL) - goto bad2; - fs = dp->i_fs; - - /* - * Check for symbolic link - */ - if ((dp->i_mode & IFMT) == IFLNK && (follow || *cp == '/')) { - u_int pathlen = strlen(cp) + 1; - - if (dp->i_size + pathlen >= MAXPATHLEN - 1 || - ++nlink > MAXSYMLINKS) { - u.u_error = ELOOP; - goto bad2; + if (ndp->ni_isdotdot) { + IUNLOCK(pdp); /* race to get the inode */ + if (error = VOP_VGET(vdp, ndp->ni_ufs.ufs_ino, &tdp)) { + ILOCK(pdp); + return (error); } - ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); - u.u_error = - rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, - 0, 1, (int *)0); - if (u.u_error) - goto bad2; - cp = nbp->b_un.b_addr; - iput(dp); - if (*cp == '/') { - irele(pdp); - while (*cp == '/') - cp++; - if ((dp = u.u_rdir) == NULL) - dp = rootdir; - ilock(dp); - dp->i_count++; - } else { - dp = pdp; - ilock(dp); - } - fs = dp->i_fs; - goto dirloop; + if (lockparent && *ndp->ni_next == '\0') + ILOCK(pdp); + ndp->ni_vp = tdp; + } else if (dp->i_number == ndp->ni_ufs.ufs_ino) { + VREF(vdp); /* we want ourself, ie "." */ + ndp->ni_vp = vdp; + } else { + if (error = VOP_VGET(vdp, ndp->ni_ufs.ufs_ino, &tdp)) + return (error); + if (!lockparent || *ndp->ni_next != '\0') + IUNLOCK(pdp); + ndp->ni_vp = tdp; } /* - * Not a symbolic link. If more pathname, - * continue at next component, else return. + * Insert name into cache if appropriate. */ - if (*cp == '/') { - while (*cp == '/') - cp++; - irele(pdp); - goto dirloop; - } - brelse(nbp); - if (lockparent) - u.u_pdir = pdp; - else - irele(pdp); - return (dp); -bad2: - irele(pdp); -bad: - if (bp) - brelse(bp); - if (dp) - iput(dp); - brelse(nbp); - return (NULL); + if (ndp->ni_makeentry) + cache_enter(ndp); + return (0); } -dirbad(ip, how) +void +ufs_dirbad(ip, offset, how) struct inode *ip; + off_t offset; char *how; { + struct mount *mp; - printf("%s: bad dir ino %d at offset %d: %s\n", - ip->i_fs->fs_fsmnt, ip->i_number, u.u_offset, how); + mp = ITOV(ip)->v_mount; + (void)printf("%s: bad dir ino %d at offset %d: %s\n", + mp->mnt_stat.f_mntonname, ip->i_number, offset, how); + if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) + panic("bad dir"); } -dirbadname(ep) +/* + * Do consistency checking on a directory entry: + * record length must be multiple of 4 + * entry must fit in rest of its DIRBLKSIZ block + * record must be large enough to contain entry + * name is not longer than MAXNAMLEN + * name must be as long as advertised, and null terminated + */ +int +ufs_dirbadentry(ep, entryoffsetinblock) register struct direct *ep; + int entryoffsetinblock; { register int i; + if ((ep->d_reclen & 0x3) != 0 || + ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || + ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) { + /*return (1); */ + printf("First bad\n"); + goto bad; + } for (i = 0; i < ep->d_namlen; i++) - if (ep->d_name[i] == 0) - return (1); + if (ep->d_name[i] == '\0') { + /*return (1); */ + printf("Second bad\n"); + goto bad; + } + if (ep->d_name[i]) + goto bad; return (ep->d_name[i]); +bad: +printf("ufs_dirbadentry: jumping out: reclen: %d namlen %d ino %d name %s\n", + ep->d_reclen, ep->d_namlen, ep->d_ino, ep->d_name ); + return(1); } /* * Write a directory entry after a call to namei, using the parameters - * which it left in the u. area. The argument ip is the inode which - * the new directory entry will refer to. The u. area field u.u_pdir is - * a pointer to the directory to be written, which was left locked by - * namei. Remaining parameters (u.u_offset, u.u_count) indicate - * how the space for the new entry is to be gotten. + * that it left in nameidata. The argument ip is the inode which the new + * directory entry will refer to. The nameidata field ndp->ni_dvp is a + * pointer to the directory to be written, which was left locked by namei. + * Remaining parameters (ndp->ni_ufs.ufs_offset, ndp->ni_ufs.ufs_count) + * indicate how the space for the new entry is to be obtained. */ -direnter(ip) +int +ufs_direnter(ip, ndp) struct inode *ip; + register struct nameidata *ndp; { register struct direct *ep, *nep; + register struct inode *dp; + register struct vnode *dvp; struct buf *bp; - int loc, freespace, error = 0; + struct direct newdir; + struct iovec aiov; + struct uio auio; u_int dsize; - int newentrysize; + int error, loc, newentrysize, spacefree; char *dirbuf; - u.u_dent.d_ino = ip->i_number; - u.u_segflg = 1; - newentrysize = DIRSIZ(&u.u_dent); - if (u.u_count == 0) { +#ifdef DIAGNOSTIC + if ((ndp->ni_nameiop & SAVENAME) == 0) + panic("direnter: missing name"); +#endif + dvp = ndp->ni_dvp; + dp = VTOI(dvp); + newdir.d_ino = ip->i_number; + newdir.d_namlen = ndp->ni_namelen; + bcopy(ndp->ni_ptr, newdir.d_name, (unsigned)ndp->ni_namelen + 1); + newentrysize = DIRSIZ(&newdir); + if (ndp->ni_ufs.ufs_count == 0) { /* - * If u.u_count is 0, then namei could find no space in the - * directory. In this case u.u_offset will be on a directory - * block boundary and we will write the new entry into a fresh - * block. + * If ndp->ni_ufs.ufs_count is 0, then namei could find no + * space in the directory. Here, ndp->ni_ufs.ufs_offset will + * be on a directory block boundary and we will write the + * new entry into a fresh block. */ - if (u.u_offset&(DIRBLKSIZ-1)) + if (ndp->ni_ufs.ufs_offset & (DIRBLKSIZ - 1)) panic("wdir: newblk"); - u.u_dent.d_reclen = DIRBLKSIZ; - error = rdwri(UIO_WRITE, u.u_pdir, (caddr_t)&u.u_dent, - newentrysize, u.u_offset, 1, (int *)0); - iput(u.u_pdir); + auio.uio_offset = ndp->ni_ufs.ufs_offset; + newdir.d_reclen = DIRBLKSIZ; + auio.uio_resid = newentrysize; + aiov.iov_len = newentrysize; + aiov.iov_base = (caddr_t)&newdir; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + error = VOP_WRITE(dvp, &auio, IO_SYNC, ndp->ni_cred); + if (DIRBLKSIZ > + VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + /* XXX should grow with balloc() */ + panic("ufs_direnter: frag size"); + else if (!error) { + dp->i_size = roundup(dp->i_size, DIRBLKSIZ); + dp->i_flag |= ICHG; + } return (error); } /* - * If u.u_count is non-zero, then namei found space for the - * new entry in the range u.u_offset to u.u_offset+u.u_count. - * in the directory. To use this space, we may have to compact - * the entries located there, by copying them together towards - * the beginning of the block, leaving the free space in - * one usable chunk at the end. + * If ndp->ni_ufs.ufs_count is non-zero, then namei found space + * for the new entry in the range ndp->ni_ufs.ufs_offset to + * ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count in the directory. + * To use this space, we may have to compact the entries located + * there, by copying them together towards the beginning of the + * block, leaving the free space in one usable chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. + * + * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ - if (u.u_offset + u.u_count > u.u_pdir->i_size) - u.u_pdir->i_size = u.u_offset + u.u_count; - + if (ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count > dp->i_size) + dp->i_size = ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count; /* - * Get the block containing the space for the new directory - * entry. Should return error by result instead of u.u_error. + * Get the block containing the space for the new directory entry. */ - bp = blkatoff(u.u_pdir, u.u_offset, (char **)&dirbuf); - if (bp == 0) { - iput(u.u_pdir); - return (u.u_error); - } - + if (error = VOP_BLKATOFF(dvp, ndp->ni_ufs.ufs_offset, &dirbuf, &bp)) + return (error); /* - * Find space for the new entry. In the simple case, the - * entry at offset base will have the space. If it does - * not, then namei arranged that compacting the region - * u.u_offset to u.u_offset+u.u_count would yield the space. + * Find space for the new entry. In the simple case, the entry at + * offset base will have the space. If it does not, then namei + * arranged that compacting the region ndp->ni_ufs.ufs_offset to + * ndp->ni_ufs.ufs_offset + ndp->ni_ufs.ufs_count would yield the + * space. */ ep = (struct direct *)dirbuf; dsize = DIRSIZ(ep); - freespace = ep->d_reclen - dsize; - for (loc = ep->d_reclen; loc < u.u_count; ) { + spacefree = ep->d_reclen - dsize; + for (loc = ep->d_reclen; loc < ndp->ni_ufs.ufs_count; ) { nep = (struct direct *)(dirbuf + loc); if (ep->d_ino) { /* trim the existing slot */ @@ -605,10 +671,10 @@ direnter(ip) ep = (struct direct *)((char *)ep + dsize); } else { /* overwrite; nothing there; header is ours */ - freespace += dsize; + spacefree += dsize; } dsize = DIRSIZ(nep); - freespace += nep->d_reclen - dsize; + spacefree += nep->d_reclen - dsize; loc += nep->d_reclen; bcopy((caddr_t)nep, (caddr_t)ep, dsize); } @@ -617,60 +683,70 @@ direnter(ip) * copy in the new entry, and write out the block. */ if (ep->d_ino == 0) { - if (freespace + dsize < newentrysize) + if (spacefree + dsize < newentrysize) panic("wdir: compact1"); - u.u_dent.d_reclen = freespace + dsize; + newdir.d_reclen = spacefree + dsize; } else { - if (freespace < newentrysize) + if (spacefree < newentrysize) panic("wdir: compact2"); - u.u_dent.d_reclen = freespace; + newdir.d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } - bcopy((caddr_t)&u.u_dent, (caddr_t)ep, (u_int)newentrysize); - bwrite(bp); - u.u_pdir->i_flag |= IUPD|ICHG; - iput(u.u_pdir); + bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); + error = VOP_BWRITE(bp); + dp->i_flag |= IUPD|ICHG; + if (!error && ndp->ni_ufs.ufs_endoff && + ndp->ni_ufs.ufs_endoff < dp->i_size) + error = VOP_TRUNCATE(dvp, (u_long)ndp->ni_ufs.ufs_endoff, + IO_SYNC); return (error); } /* - * Remove a directory entry after a call to namei, using the - * parameters which it left in the u. area. The u. entry - * u_offset contains the offset into the directory of the - * entry to be eliminated. The u_count field contains the + * Remove a directory entry after a call to namei, using + * the parameters which it left in nameidata. The entry + * ni_ufs.ufs_offset contains the offset into the directory of the + * entry to be eliminated. The ni_ufs.ufs_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the - * entry isn't the first in the directory, we must reclaim + * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ -dirremove() +int +ufs_dirremove(ndp) + register struct nameidata *ndp; { - register struct inode *dp = u.u_pdir; - register struct buf *bp; + register struct inode *dp; struct direct *ep; + struct buf *bp; + int error; - if (u.u_count == 0) { + dp = VTOI(ndp->ni_dvp); + if (ndp->ni_ufs.ufs_count == 0) { /* * First entry in block: set d_ino to zero. */ - u.u_dent.d_ino = 0; - (void) rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, - (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); - } else { - /* - * Collapse new free space into previous entry. - */ - bp = blkatoff(dp, (int)(u.u_offset - u.u_count), (char **)&ep); - if (bp == 0) - return (0); - ep->d_reclen += u.u_dent.d_reclen; - bwrite(bp); + if (error = VOP_BLKATOFF(ndp->ni_dvp, ndp->ni_ufs.ufs_offset, + (char **)&ep, &bp)) + return (error); + ep->d_ino = 0; + error = VOP_BWRITE(bp); dp->i_flag |= IUPD|ICHG; + return (error); } - return (1); + /* + * Collapse new free space into previous entry. + */ + if (error = VOP_BLKATOFF(ndp->ni_dvp, + ndp->ni_ufs.ufs_offset - ndp->ni_ufs.ufs_count, (char **)&ep, &bp)) + return (error); + ep->d_reclen += ndp->ni_ufs.ufs_reclen; + error = VOP_BWRITE(bp); + dp->i_flag |= IUPD|ICHG; + return (error); } /* @@ -678,74 +754,137 @@ dirremove() * supplied. The parameters describing the directory entry are * set up by a call to namei. */ -dirrewrite(dp, ip) +int +ufs_dirrewrite(dp, ip, ndp) struct inode *dp, *ip; + struct nameidata *ndp; { + struct buf *bp; + struct direct *ep; + int error; - u.u_dent.d_ino = ip->i_number; - u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, - (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); - iput(dp); -} - -/* - * Return buffer with contents of block "offset" - * from the beginning of directory "ip". If "res" - * is non-zero, fill it in with a pointer to the - * remaining space in the directory. - */ -struct buf * -blkatoff(ip, offset, res) - struct inode *ip; - off_t offset; - char **res; -{ - register struct fs *fs = ip->i_fs; - daddr_t lbn = lblkno(fs, offset); - int base = blkoff(fs, offset); - int bsize = blksize(fs, ip, lbn); - daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize)); - register struct buf *bp; - - if (u.u_error) - return (0); - bp = bread(ip->i_dev, bn, bsize); - if (bp->b_flags & B_ERROR) { - brelse(bp); - return (0); - } - if (res) - *res = bp->b_un.b_addr + base; - return (bp); + if (error = VOP_BLKATOFF(ITOV(dp), ndp->ni_ufs.ufs_offset, + (char **)&ep, &bp)) + return (error); + ep->d_ino = ip->i_number; + error = VOP_BWRITE(bp); + dp->i_flag |= IUPD|ICHG; + return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. + * + * Using a struct dirtemplate here is not precisely + * what we want, but better than using a struct direct. + * + * NB: does not handle corrupted directories. */ -dirempty(ip) +int +ufs_dirempty(ip, parentino, cred) register struct inode *ip; + ino_t parentino; + struct ucred *cred; { register off_t off; - struct direct dbuf; - register struct direct *dp = &dbuf; + struct dirtemplate dbuf; + register struct direct *dp = (struct direct *)&dbuf; int error, count; +#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += dp->d_reclen) { - error = rdwri(UIO_READ, ip, (caddr_t)dp, - sizeof (struct direct), off, 1, &count); - count = sizeof (struct direct) - count; -#define MINDIRSIZ (sizeof (struct direct) - (MAXNAMLEN + 1)) - if (error || count < MINDIRSIZ || count < DIRSIZ(dp)) + error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, + UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); + /* + * Since we read MINDIRSIZ, residual must + * be 0 unless we're at end of file. + */ + if (error || count != 0) + return (0); + /* avoid infinite loops */ + if (dp->d_reclen == 0) return (0); + /* skip empty entries */ if (dp->d_ino == 0) continue; + /* accept only "." and ".." */ + if (dp->d_namlen > 2) + return (0); if (dp->d_name[0] != '.') return (0); - if (dp->d_namlen == 1 || - (dp->d_namlen == 2 && dp->d_name[1] == '.')) + /* + * At this point d_namlen must be 1 or 2. + * 1 implies ".", 2 implies ".." if second + * char is also "." + */ + if (dp->d_namlen == 1) + continue; + if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } + +/* + * Check if source directory is in the path of the target directory. + * Target is supplied locked, source is unlocked. + * The target is always iput before returning. + */ +int +ufs_checkpath(source, target, cred) + struct inode *source, *target; + struct ucred *cred; +{ + struct dirtemplate dirbuf; + register struct inode *ip; + struct vnode *vp; + int error, rootino; + + ip = target; + if (ip->i_number == source->i_number) { + error = EEXIST; + goto out; + } + rootino = ROOTINO; + error = 0; + if (ip->i_number == rootino) + goto out; + + for (;;) { + if ((ip->i_mode&IFMT) != IFDIR) { + error = ENOTDIR; + break; + } + vp = ITOV(ip); + error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED, cred, (int *)0, (struct proc *)0); + if (error != 0) + break; + if (dirbuf.dotdot_namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + error = ENOTDIR; + break; + } + if (dirbuf.dotdot_ino == source->i_number) { + error = EINVAL; + break; + } + if (dirbuf.dotdot_ino == rootino) + break; + ufs_iput(ip); + if (error = VOP_VGET(vp, dirbuf.dotdot_ino, &vp)) + break; + ip = VTOI(vp); + } + +out: + if (error == ENOTDIR) + printf("checkpath: .. not a directory\n"); + if (ip != NULL) + ufs_iput(ip); + return (error); +}