if entry is gone, do not try to release the parent
[unix-history] / usr / src / sys / ufs / ffs / ufs_vnops.c
index d70847a..87abfe7 100644 (file)
@@ -2,19 +2,9 @@
  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
  * All rights reserved.
  *
  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms are permitted
- * provided that the above copyright notice and this paragraph are
- * duplicated in all such forms and that any documentation,
- * advertising materials, and other materials related to such
- * distribution and use acknowledge that the software was developed
- * by the University of California, Berkeley.  The name of the
- * University may not be used to endorse or promote products derived
- * from this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ * %sccs.include.redist.c%
  *
  *
- *     @(#)ufs_vnops.c 7.41 (Berkeley) %G%
+ *     @(#)ufs_vnops.c 7.51 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -31,6 +21,9 @@
 #include "mount.h"
 #include "vnode.h"
 #include "specdev.h"
 #include "mount.h"
 #include "vnode.h"
 #include "specdev.h"
+#include "fcntl.h"
+#include "malloc.h"
+#include "../ufs/lockf.h"
 #include "../ufs/quota.h"
 #include "../ufs/inode.h"
 #include "../ufs/fs.h"
 #include "../ufs/quota.h"
 #include "../ufs/inode.h"
 #include "../ufs/fs.h"
@@ -70,7 +63,8 @@ int   ufs_lookup(),
        ufs_bmap(),
        ufs_strategy(),
        ufs_print(),
        ufs_bmap(),
        ufs_strategy(),
        ufs_print(),
-       ufs_islocked();
+       ufs_islocked(),
+       ufs_advlock();
 
 struct vnodeops ufs_vnodeops = {
        ufs_lookup,             /* lookup */
 
 struct vnodeops ufs_vnodeops = {
        ufs_lookup,             /* lookup */
@@ -105,6 +99,7 @@ struct vnodeops ufs_vnodeops = {
        ufs_strategy,           /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        ufs_strategy,           /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       ufs_advlock,            /* advlock */
 };
 
 int    spec_lookup(),
 };
 
 int    spec_lookup(),
@@ -116,6 +111,7 @@ int spec_lookup(),
        spec_ioctl(),
        spec_select(),
        ufsspec_close(),
        spec_ioctl(),
        spec_select(),
        ufsspec_close(),
+       spec_advlock(),
        spec_badop(),
        spec_nullop();
 
        spec_badop(),
        spec_nullop();
 
@@ -152,6 +148,7 @@ struct vnodeops spec_inodeops = {
        spec_strategy,          /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        spec_strategy,          /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       spec_advlock,           /* advlock */
 };
 
 #ifdef FIFO
 };
 
 #ifdef FIFO
@@ -164,6 +161,7 @@ int fifo_lookup(),
        fifo_select(),
        ufsfifo_close(),
        fifo_print(),
        fifo_select(),
        ufsfifo_close(),
        fifo_print(),
+       fifo_advlock(),
        fifo_badop(),
        fifo_nullop();
 
        fifo_badop(),
        fifo_nullop();
 
@@ -200,6 +198,7 @@ struct vnodeops fifo_inodeops = {
        fifo_badop,             /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        fifo_badop,             /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       fifo_advlock,           /* advlock */
 };
 #endif /* FIFO */
 
 };
 #endif /* FIFO */
 
@@ -471,17 +470,17 @@ chmod1(vp, mode, cred)
        if (cred->cr_uid != ip->i_uid &&
            (error = suser(cred, &u.u_acflag)))
                return (error);
        if (cred->cr_uid != ip->i_uid &&
            (error = suser(cred, &u.u_acflag)))
                return (error);
-       ip->i_mode &= ~07777;
        if (cred->cr_uid) {
        if (cred->cr_uid) {
-               if (vp->v_type != VDIR)
-                       mode &= ~ISVTX;
-               if (!groupmember(ip->i_gid, cred))
-                       mode &= ~ISGID;
+               if (vp->v_type != VDIR && (mode & ISVTX))
+                       return (EFTYPE);
+               if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
+                       return (EPERM);
        }
        }
+       ip->i_mode &= ~07777;
        ip->i_mode |= mode & 07777;
        ip->i_flag |= ICHG;
        if ((vp->v_flag & VTEXT) && (ip->i_mode & ISVTX) == 0)
        ip->i_mode |= mode & 07777;
        ip->i_flag |= ICHG;
        if ((vp->v_flag & VTEXT) && (ip->i_mode & ISVTX) == 0)
-               xrele(vp);
+               (void) vnode_pager_uncache(vp);
        return (0);
 }
 
        return (0);
 }
 
@@ -663,8 +662,8 @@ ufs_write(vp, uio, ioflag, cred)
        struct buf *bp;
        daddr_t lbn, bn;
        u_long osize;
        struct buf *bp;
        daddr_t lbn, bn;
        u_long osize;
-       int i, n, on, flags;
-       int count, size, resid, error = 0;
+       int n, on, flags;
+       int size, resid, error = 0;
 
        if (uio->uio_rw != UIO_WRITE)
                panic("ufs_write mode");
 
        if (uio->uio_rw != UIO_WRITE)
                panic("ufs_write mode");
@@ -715,12 +714,12 @@ ufs_write(vp, uio, ioflag, cred)
                if (error = balloc(ip, lbn, (int)(on + n), &bp, flags))
                        break;
                bn = bp->b_blkno;
                if (error = balloc(ip, lbn, (int)(on + n), &bp, flags))
                        break;
                bn = bp->b_blkno;
-               if (uio->uio_offset + n > ip->i_size)
+               if (uio->uio_offset + n > ip->i_size) {
                        ip->i_size = uio->uio_offset + n;
                        ip->i_size = uio->uio_offset + n;
+                       vnode_pager_setsize(vp, ip->i_size);
+               }
                size = blksize(fs, ip, lbn);
                size = blksize(fs, ip, lbn);
-               count = howmany(size, CLBYTES);
-               for (i = 0; i < count; i++)
-                       munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
+               (void) vnode_pager_uncache(vp);
                n = MIN(n, size - bp->b_resid);
                error = uiomove(bp->b_un.b_addr + on, n, uio);
                if (ioflag & IO_SYNC)
                n = MIN(n, size - bp->b_resid);
                error = uiomove(bp->b_un.b_addr + on, n, uio);
                if (ioflag & IO_SYNC)
@@ -739,6 +738,8 @@ ufs_write(vp, uio, ioflag, cred)
                uio->uio_offset -= resid - uio->uio_resid;
                uio->uio_resid = resid;
        }
                uio->uio_offset -= resid - uio->uio_resid;
                uio->uio_resid = resid;
        }
+       if (!error && (ioflag & IO_SYNC))
+               error = iupdat(ip, &time, &time, 1);
        return (error);
 }
 
        return (error);
 }
 
@@ -912,9 +913,13 @@ ufs_rename(fndp, tndp)
                 */
                if ((d->d_namlen == 1 && d->d_name[0] == '.') || dp == ip ||
                    fndp->ni_isdotdot || (ip->i_flag & IRENAME)) {
                 */
                if ((d->d_namlen == 1 && d->d_name[0] == '.') || dp == ip ||
                    fndp->ni_isdotdot || (ip->i_flag & IRENAME)) {
-                       IUNLOCK(ip);
-                       ufs_abortop(fndp);
-                       ufs_abortop(tndp);
+                       VOP_ABORTOP(tndp);
+                       vput(tndp->ni_dvp);
+                       if (tndp->ni_vp)
+                               vput(tndp->ni_vp);
+                       VOP_ABORTOP(fndp);
+                       vrele(fndp->ni_dvp);
+                       vput(fndp->ni_vp);
                        return (EINVAL);
                }
                ip->i_flag |= IRENAME;
                        return (EINVAL);
                }
                ip->i_flag |= IRENAME;
@@ -1040,6 +1045,16 @@ ufs_rename(fndp, tndp)
                }
                if (error = dirrewrite(dp, ip, tndp))
                        goto bad;
                }
                if (error = dirrewrite(dp, ip, tndp))
                        goto bad;
+               /*
+                * If the target directory is in the same
+                * directory as the source directory,
+                * decrement the link count on the parent
+                * of the target directory.
+                */
+                if (doingdirectory && !newparent) {
+                       dp->i_nlink--;
+                       dp->i_flag |= ICHG;
+               }
                vput(ITOV(dp));
                /*
                 * Adjust the link count of the target to
                vput(ITOV(dp));
                /*
                 * Adjust the link count of the target to
@@ -1071,10 +1086,13 @@ ufs_rename(fndp, tndp)
                xp = VTOI(fndp->ni_vp);
                dp = VTOI(fndp->ni_dvp);
        } else {
                xp = VTOI(fndp->ni_vp);
                dp = VTOI(fndp->ni_dvp);
        } else {
-               if (fndp->ni_dvp != NULL)
-                       vput(fndp->ni_dvp);
-               xp = NULL;
-               dp = NULL;
+               /*
+                * From name has disappeared.
+                */
+               if (doingdirectory)
+                       panic("rename: lost dir entry");
+               vrele(ITOV(ip));
+               return (0);
        }
        /*
         * Ensure that the directory entry still exists and has not
        }
        /*
         * Ensure that the directory entry still exists and has not
@@ -1225,10 +1243,12 @@ ufs_mkdir(ndp, vap)
                dp->i_flag |= ICHG;
                goto bad;
        }
                dp->i_flag |= ICHG;
                goto bad;
        }
-       if (DIRBLKSIZ > dp->i_fs->fs_fsize)
+       if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
                panic("mkdir: blksize");     /* XXX - should grow w/balloc() */
                panic("mkdir: blksize");     /* XXX - should grow w/balloc() */
-       else
+       } else {
                ip->i_size = DIRBLKSIZ;
                ip->i_size = DIRBLKSIZ;
+               ip->i_flag |= ICHG;
+       }
        /*
         * Directory all set up, now
         * install the entry for it in
        /*
         * Directory all set up, now
         * install the entry for it in
@@ -1386,23 +1406,14 @@ ufs_readlink(vp, uiop, cred)
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
- * done. Iff ni_vp/ni_dvp not null and locked, unlock.
+ * done. Nothing to do at the moment.
  */
  */
+/* ARGSUSED */
 ufs_abortop(ndp)
 ufs_abortop(ndp)
-       register struct nameidata *ndp;
+       struct nameidata *ndp;
 {
 
 {
 
-       if (ndp->ni_dvp) {
-               if (VOP_ISLOCKED(ndp->ni_dvp))
-                       VOP_UNLOCK(ndp->ni_dvp);
-               vrele(ndp->ni_dvp);
-       }
-       if (ndp->ni_vp) {
-               if (VOP_ISLOCKED(ndp->ni_vp))
-                       VOP_UNLOCK(ndp->ni_vp);
-               vrele(ndp->ni_vp);
-       }
-       return;
+       return (0);
 }
 
 /*
 }
 
 /*
@@ -1717,3 +1728,246 @@ bad:
        iput(ip);
        return (error);
 }
        iput(ip);
        return (error);
 }
+
+/*
+ * Advisory record locking support
+ */
+ufs_advlock(vp, id, op, fl, flags)
+       struct vnode *vp;
+       caddr_t id;
+       int op;
+       register struct flock *fl;
+       int flags;
+{
+       register struct inode *ip = VTOI(vp);
+       register struct lockf *lock;
+       off_t start, end;
+       int error;
+
+       /*
+        * Avoid the common case of unlocking when inode has no locks.
+        */
+       if (ip->i_lockf == (struct lockf *)0) {
+               if (op != F_SETLK) {
+                       fl->l_type = F_UNLCK;
+                       return (0);
+               }
+       }
+       /*
+        * Convert the flock structure into a start and end.
+        */
+       switch (fl->l_whence) {
+
+       case SEEK_SET:
+       case SEEK_CUR:
+               /*
+                * Caller is responsible for adding any necessary offset
+                * when SEEK_CUR is used.
+                */
+               start = fl->l_start;
+               break;
+
+       case SEEK_END:
+               start = ip->i_size + fl->l_start;
+               break;
+
+       default:
+               return (EINVAL);
+       }
+       if (start < 0)
+               return (EINVAL);
+       if (fl->l_len == 0)
+               end = -1;
+       else
+               end = start + fl->l_len;
+       /*
+        * Create the lockf structure
+        */
+       MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+       lock->lf_start = start;
+       lock->lf_end = end;
+       lock->lf_id = id;
+       lock->lf_inode = ip;
+       lock->lf_type = fl->l_type;
+       lock->lf_next = (struct lockf *)0;
+       lock->lf_block = (struct lockf *)0;
+       lock->lf_flags = flags;
+       /*
+        * Do the requested operation.
+        */
+       switch(op) {
+       case F_SETLK:
+               return (ufs_setlock(lock));
+
+       case F_UNLCK:
+               return (ufs_advunlock(lock));
+
+       case F_GETLK:
+               return (ufs_advgetlock(lock, fl));
+       
+       default:
+               free(lock, M_LOCKF);
+               return (EINVAL);
+       }
+       /* NOTREACHED */
+}
+
+/*
+ * This variable controls the maximum number of processes that will
+ * be checked in doing deadlock detection.
+ */
+int maxlockdepth = MAXDEPTH;
+
+/*
+ * Set a byte-range lock.
+ */
+ufs_setlock(lock)
+       register struct lockf *lock;
+{
+       register struct inode *ip = lock->lf_inode;
+       register struct lockf *block;
+       static char lockstr[] = "lockf";
+       int priority, error;
+
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_setlock", lock);
+#endif /* LOCKF_DEBUG */
+
+       /*
+        * Set the priority
+        */
+       priority = PLOCK;
+       if ((lock->lf_type & F_WRLCK) == 0)
+               priority += 4;
+       priority |= PCATCH;
+       /*
+        * Scan lock list for this file looking for locks that would block us.
+        */
+       while (block = lf_getblock(lock)) {
+               /*
+                * Free the structure and return if nonblocking.
+                */
+               if ((lock->lf_flags & F_WAIT) == 0) {
+                       free(lock, M_LOCKF);
+                       return (EAGAIN);
+               }
+               /*
+                * We are blocked. Since flock style locks cover
+                * the whole file, there is no chance for deadlock.
+                * For byte-range locks we must check for deadlock.
+                *
+                * Deadlock detection is done by looking through the
+                * wait channels to see if there are any cycles that
+                * involve us. MAXDEPTH is set just to make sure we
+                * do not go off into neverland.
+                */
+               if ((lock->lf_flags & F_POSIX) &&
+                   (block->lf_flags & F_POSIX)) {
+                       register struct proc *wproc;
+                       register struct lockf *waitblock;
+                       int i = 0;
+
+                       /* The block is waiting on something */
+                       wproc = (struct proc *)block->lf_id;
+                       while (wproc->p_wchan &&
+                              (wproc->p_wmesg == lockstr) &&
+                              (i++ < maxlockdepth)) {
+                               waitblock = (struct lockf *)wproc->p_wchan;
+                               /* Get the owner of the blocking lock */
+                               waitblock = waitblock->lf_next;
+                               if ((waitblock->lf_flags & F_POSIX) == 0)
+                                       break;
+                               wproc = (struct proc *)waitblock->lf_id;
+                               if (wproc == (struct proc *)lock->lf_id) {
+                                       free(lock, M_LOCKF);
+                                       return (EDEADLK);
+                               }
+                       }
+               }
+               /*
+                * Add our lock to the blocked
+                * list and sleep until we're free.
+                */
+#ifdef LOCKF_DEBUG
+               if (lockf_debug & 4)
+                       lf_print("ufs_advlock: blocking on", block);
+#endif /* LOCKF_DEBUG */
+               /*
+                * Remember who blocked us (for deadlock detection)
+                */
+               lock->lf_next = block;
+               lf_addblock(block, lock);
+               if (error = tsleep((caddr_t *)lock, priority, lockstr, 0)) {
+                       free(lock, M_LOCKF);
+                       return (error);
+               }
+       }
+       /*
+        * No blocks!!  Add the lock.  Note that addlock will
+        * downgrade or upgrade any overlapping locks this
+        * process already owns.
+        */
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advlock: got the lock", lock);
+#endif /* LOCKF_DEBUG */
+       lf_addlock(lock);
+       return (0);
+}
+
+/*
+ * Remove a byte-range lock on an inode.
+ */
+ufs_advunlock(lock)
+       struct lockf *lock;
+{
+       struct lockf *blocklist;
+
+       if (lock->lf_inode->i_lockf == (struct lockf *)0)
+               return (0);
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advunlock", lock);
+#endif /* LOCKF_DEBUG */
+       /*
+        * Generally, find the lock (or an overlap to that lock)
+        * and remove it (or shrink it), then wakeup anyone we can.
+        */
+       blocklist = lf_remove(lock);
+       FREE(lock, M_LOCKF);
+       lf_wakelock(blocklist);
+       return (0);
+}
+
+/*
+ * Return the blocking pid
+ */
+ufs_advgetlock(lock, fl)
+       register struct lockf *lock;
+       register struct flock *fl;
+{
+       register struct lockf *block;
+       off_t start, end;
+
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advgetlock", lock);
+#endif /* LOCKF_DEBUG */
+
+       if (block = lf_getblock(lock)) {
+               fl->l_type = block->lf_type;
+               fl->l_whence = SEEK_SET;
+               fl->l_start = block->lf_start;
+               if (block->lf_end == -1)
+                       fl->l_len = 0;
+               else
+                       fl->l_len = block->lf_end - block->lf_start;
+               if (block->lf_flags & F_POSIX)
+                       fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
+               else
+                       fl->l_pid = -1;
+       }
+       FREE(lock, M_LOCKF);
+       return (0);
+}