add POSIX-style byte-level record locking
[unix-history] / usr / src / sys / ufs / ffs / ufs_vnops.c
index 6569cf1..1521d43 100644 (file)
@@ -4,7 +4,7 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)ufs_vnops.c 7.49 (Berkeley) %G%
+ *     @(#)ufs_vnops.c 7.50 (Berkeley) %G%
  */
 
 #include "param.h"
  */
 
 #include "param.h"
@@ -21,6 +21,9 @@
 #include "mount.h"
 #include "vnode.h"
 #include "specdev.h"
 #include "mount.h"
 #include "vnode.h"
 #include "specdev.h"
+#include "fcntl.h"
+#include "malloc.h"
+#include "../ufs/lockf.h"
 #include "../ufs/quota.h"
 #include "../ufs/inode.h"
 #include "../ufs/fs.h"
 #include "../ufs/quota.h"
 #include "../ufs/inode.h"
 #include "../ufs/fs.h"
@@ -60,7 +63,8 @@ int   ufs_lookup(),
        ufs_bmap(),
        ufs_strategy(),
        ufs_print(),
        ufs_bmap(),
        ufs_strategy(),
        ufs_print(),
-       ufs_islocked();
+       ufs_islocked(),
+       ufs_advlock();
 
 struct vnodeops ufs_vnodeops = {
        ufs_lookup,             /* lookup */
 
 struct vnodeops ufs_vnodeops = {
        ufs_lookup,             /* lookup */
@@ -95,6 +99,7 @@ struct vnodeops ufs_vnodeops = {
        ufs_strategy,           /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        ufs_strategy,           /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       ufs_advlock,            /* advlock */
 };
 
 int    spec_lookup(),
 };
 
 int    spec_lookup(),
@@ -106,6 +111,7 @@ int spec_lookup(),
        spec_ioctl(),
        spec_select(),
        ufsspec_close(),
        spec_ioctl(),
        spec_select(),
        ufsspec_close(),
+       spec_advlock(),
        spec_badop(),
        spec_nullop();
 
        spec_badop(),
        spec_nullop();
 
@@ -142,6 +148,7 @@ struct vnodeops spec_inodeops = {
        spec_strategy,          /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        spec_strategy,          /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       spec_advlock,           /* advlock */
 };
 
 #ifdef FIFO
 };
 
 #ifdef FIFO
@@ -154,6 +161,7 @@ int fifo_lookup(),
        fifo_select(),
        ufsfifo_close(),
        fifo_print(),
        fifo_select(),
        ufsfifo_close(),
        fifo_print(),
+       fifo_advlock(),
        fifo_badop(),
        fifo_nullop();
 
        fifo_badop(),
        fifo_nullop();
 
@@ -190,6 +198,7 @@ struct vnodeops fifo_inodeops = {
        fifo_badop,             /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
        fifo_badop,             /* strategy */
        ufs_print,              /* print */
        ufs_islocked,           /* islocked */
+       fifo_advlock,           /* advlock */
 };
 #endif /* FIFO */
 
 };
 #endif /* FIFO */
 
@@ -1716,3 +1725,246 @@ bad:
        iput(ip);
        return (error);
 }
        iput(ip);
        return (error);
 }
+
+/*
+ * Advisory record locking support
+ */
+ufs_advlock(vp, id, op, fl, flags)
+       struct vnode *vp;
+       caddr_t id;
+       int op;
+       register struct flock *fl;
+       int flags;
+{
+       register struct inode *ip = VTOI(vp);
+       register struct lockf *lock;
+       off_t start, end;
+       int error;
+
+       /*
+        * Avoid the common case of unlocking when inode has no locks.
+        */
+       if (ip->i_lockf == (struct lockf *)0) {
+               if (op != F_SETLK) {
+                       fl->l_type = F_UNLCK;
+                       return (0);
+               }
+       }
+       /*
+        * Convert the flock structure into a start and end.
+        */
+       switch (fl->l_whence) {
+
+       case SEEK_SET:
+       case SEEK_CUR:
+               /*
+                * Caller is responsible for adding any necessary offset
+                * when SEEK_CUR is used.
+                */
+               start = fl->l_start;
+               break;
+
+       case SEEK_END:
+               start = ip->i_size + fl->l_start;
+               break;
+
+       default:
+               return (EINVAL);
+       }
+       if (start < 0)
+               return (EINVAL);
+       if (fl->l_len == 0)
+               end = -1;
+       else
+               end = start + fl->l_len;
+       /*
+        * Create the lockf structure
+        */
+       MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+       lock->lf_start = start;
+       lock->lf_end = end;
+       lock->lf_id = id;
+       lock->lf_inode = ip;
+       lock->lf_type = fl->l_type;
+       lock->lf_next = (struct lockf *)0;
+       lock->lf_block = (struct lockf *)0;
+       lock->lf_flags = flags;
+       /*
+        * Do the requested operation.
+        */
+       switch(op) {
+       case F_SETLK:
+               return (ufs_setlock(lock));
+
+       case F_UNLCK:
+               return (ufs_advunlock(lock));
+
+       case F_GETLK:
+               return (ufs_advgetlock(lock, fl));
+       
+       default:
+               free(lock, M_LOCKF);
+               return (EINVAL);
+       }
+       /* NOTREACHED */
+}
+
+/*
+ * This variable controls the maximum number of processes that will
+ * be checked in doing deadlock detection.
+ */
+int maxlockdepth = MAXDEPTH;
+
+/*
+ * Set a byte-range lock.
+ */
+ufs_setlock(lock)
+       register struct lockf *lock;
+{
+       register struct inode *ip = lock->lf_inode;
+       register struct lockf *block;
+       static char lockstr[] = "lockf";
+       int priority, error;
+
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_setlock", lock);
+#endif /* LOCKF_DEBUG */
+
+       /*
+        * Set the priority
+        */
+       priority = PLOCK;
+       if ((lock->lf_type & F_WRLCK) == 0)
+               priority += 4;
+       priority |= PCATCH;
+       /*
+        * Scan lock list for this file looking for locks that would block us.
+        */
+       while (block = lf_getblock(lock)) {
+               /*
+                * Free the structure and return if nonblocking.
+                */
+               if ((lock->lf_flags & F_WAIT) == 0) {
+                       free(lock, M_LOCKF);
+                       return (EAGAIN);
+               }
+               /*
+                * We are blocked. Since flock style locks cover
+                * the whole file, there is no chance for deadlock.
+                * For byte-range locks we must check for deadlock.
+                *
+                * Deadlock detection is done by looking through the
+                * wait channels to see if there are any cycles that
+                * involve us. MAXDEPTH is set just to make sure we
+                * do not go off into neverland.
+                */
+               if ((lock->lf_flags & F_POSIX) &&
+                   (block->lf_flags & F_POSIX)) {
+                       register struct proc *wproc;
+                       register struct lockf *waitblock;
+                       int i = 0;
+
+                       /* The block is waiting on something */
+                       wproc = (struct proc *)block->lf_id;
+                       while (wproc->p_wchan &&
+                              (wproc->p_wmesg == lockstr) &&
+                              (i++ < maxlockdepth)) {
+                               waitblock = (struct lockf *)wproc->p_wchan;
+                               /* Get the owner of the blocking lock */
+                               waitblock = waitblock->lf_next;
+                               if ((waitblock->lf_flags & F_POSIX) == 0)
+                                       break;
+                               wproc = (struct proc *)waitblock->lf_id;
+                               if (wproc == (struct proc *)lock->lf_id) {
+                                       free(lock, M_LOCKF);
+                                       return (EDEADLK);
+                               }
+                       }
+               }
+               /*
+                * Add our lock to the blocked
+                * list and sleep until we're free.
+                */
+#ifdef LOCKF_DEBUG
+               if (lockf_debug & 4)
+                       lf_print("ufs_advlock: blocking on", block);
+#endif /* LOCKF_DEBUG */
+               /*
+                * Remember who blocked us (for deadlock detection)
+                */
+               lock->lf_next = block;
+               lf_addblock(block, lock);
+               if (error = tsleep((caddr_t *)lock, priority, lockstr, 0)) {
+                       free(lock, M_LOCKF);
+                       return (error);
+               }
+       }
+       /*
+        * No blocks!!  Add the lock.  Note that addlock will
+        * downgrade or upgrade any overlapping locks this
+        * process already owns.
+        */
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advlock: got the lock", lock);
+#endif /* LOCKF_DEBUG */
+       lf_addlock(lock);
+       return (0);
+}
+
+/*
+ * Remove a byte-range lock on an inode.
+ */
+ufs_advunlock(lock)
+       struct lockf *lock;
+{
+       struct lockf *blocklist;
+
+       if (lock->lf_inode->i_lockf == (struct lockf *)0)
+               return (0);
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advunlock", lock);
+#endif /* LOCKF_DEBUG */
+       /*
+        * Generally, find the lock (or an overlap to that lock)
+        * and remove it (or shrink it), then wakeup anyone we can.
+        */
+       blocklist = lf_remove(lock);
+       FREE(lock, M_LOCKF);
+       lf_wakelock(blocklist);
+       return (0);
+}
+
+/*
+ * Return the blocking pid
+ */
+ufs_advgetlock(lock, fl)
+       register struct lockf *lock;
+       register struct flock *fl;
+{
+       register struct lockf *block;
+       off_t start, end;
+
+#ifdef LOCKF_DEBUG
+       if (lockf_debug & 4)
+               lf_print("ufs_advgetlock", lock);
+#endif /* LOCKF_DEBUG */
+
+       if (block = lf_getblock(lock)) {
+               fl->l_type = block->lf_type;
+               fl->l_whence = SEEK_SET;
+               fl->l_start = block->lf_start;
+               if (block->lf_end == -1)
+                       fl->l_len = 0;
+               else
+                       fl->l_len = block->lf_end - block->lf_start;
+               if (block->lf_flags & F_POSIX)
+                       fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
+               else
+                       fl->l_pid = -1;
+       }
+       FREE(lock, M_LOCKF);
+       return (0);
+}