new locking scheme
[unix-history] / usr / src / sys / miscfs / union / union_subr.c
index 4532682..3319a34 100644 (file)
@@ -8,7 +8,7 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)union_subr.c        1.6 (Berkeley) %G%
+ *     @(#)union_subr.c        2.1 (Berkeley) %G%
  */
 
 #include <sys/param.h>
  */
 
 #include <sys/param.h>
@@ -37,6 +37,20 @@ union_init()
        unvplock = 0;
 }
 
        unvplock = 0;
 }
 
+static void
+union_remlist(un)
+       struct union_node *un;
+{
+       struct union_node **unpp;
+
+       for (unpp = &unhead; *unpp != 0; unpp = &(*unpp)->un_next) {
+               if (*unpp == un) {
+                       *unpp = un->un_next;
+                       break;
+               }
+       }
+}
+
 /*
  * allocate a union_node/vnode pair.  the vnode is
  * referenced and locked.  the new vnode is returned
 /*
  * allocate a union_node/vnode pair.  the vnode is
  * referenced and locked.  the new vnode is returned
@@ -47,6 +61,7 @@ union_init()
  * layer object to be created at a later time.  (uppervp)
  * and (lowervp) reference the upper and lower layer objects
  * being mapped.  either, but not both, can be nil.
  * layer object to be created at a later time.  (uppervp)
  * and (lowervp) reference the upper and lower layer objects
  * being mapped.  either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
  * the reference is either maintained in the new union_node
  * object which is allocated, or they are vrele'd.
  *
  * the reference is either maintained in the new union_node
  * object which is allocated, or they are vrele'd.
  *
@@ -99,25 +114,102 @@ loop:
                    (UNIONTOV(un)->v_mount == mp)) {
                        if (vget(UNIONTOV(un), 0))
                                goto loop;
                    (UNIONTOV(un)->v_mount == mp)) {
                        if (vget(UNIONTOV(un), 0))
                                goto loop;
-                       if (UNIONTOV(un) != undvp)
-                               VOP_LOCK(UNIONTOV(un));
-                       if (uppervp != un->un_uppervp) {
-                               if (un->un_uppervp)
-                                       vrele(un->un_uppervp);
-                               un->un_uppervp = uppervp;
-                       } else if (uppervp) {
-                               vrele(uppervp);
+                       break;
+               }
+       }
+
+       if (un) {
+               /*
+                * Obtain a lock on the union_node.
+                * uppervp is locked, though un->un_uppervp
+                * may not be.  this doesn't break the locking
+                * hierarchy since in the case that un->un_uppervp
+                * is not yet locked it will be vrele'd and replaced
+                * with uppervp.
+                */
+
+               if ((dvp != NULLVP) && (uppervp == dvp)) {
+                       /*
+                        * Access ``.'', so (un) will already
+                        * be locked.  Since this process has
+                        * the lock on (uppervp) no other
+                        * process can hold the lock on (un).
+                        */
+#ifdef DIAGNOSTIC
+                       if ((un->un_flags & UN_LOCKED) == 0)
+                               panic("union: . not locked");
+                       else if (curproc && un->un_pid != curproc->p_pid &&
+                                   un->un_pid > -1 && curproc->p_pid > -1)
+                               panic("union: allocvp not lock owner");
+#endif
+               } else {
+                       if (un->un_flags & UN_LOCKED) {
+                               vrele(UNIONTOV(un));
+                               un->un_flags |= UN_WANT;
+                               sleep((caddr_t) &un->un_flags, PINOD);
+                               goto loop;
+                       }
+                       un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+                       if (curproc)
+                               un->un_pid = curproc->p_pid;
+                       else
+                               un->un_pid = -1;
+#endif
+               }
+
+               /*
+                * At this point, the union_node is locked,
+                * un->un_uppervp may not be locked, and uppervp
+                * is locked or nil.
+                */
+
+               /*
+                * Save information about the upper layer.
+                */
+               if (uppervp != un->un_uppervp) {
+                       if (un->un_uppervp)
+                               vrele(un->un_uppervp);
+                       un->un_uppervp = uppervp;
+               } else if (uppervp) {
+                       vrele(uppervp);
+               }
+
+               if (un->un_uppervp) {
+                       un->un_flags |= UN_ULOCK;
+                       un->un_flags &= ~UN_KLOCK;
+               }
+
+               /*
+                * Save information about the lower layer.
+                * This needs to keep track of pathname
+                * and directory information which union_vn_create
+                * might need.
+                */
+               if (lowervp != un->un_lowervp) {
+                       if (un->un_lowervp) {
+                               vrele(un->un_lowervp);
+                               free(un->un_path, M_TEMP);
+                               vrele(un->un_dirvp);
                        }
                        }
-                       if (lowervp != un->un_lowervp) {
-                               if (un->un_lowervp)
-                                       vrele(un->un_lowervp);
-                               un->un_lowervp = lowervp;
-                       } else if (lowervp) {
-                               vrele(lowervp);
+                       un->un_lowervp = lowervp;
+                       if (cnp && (lowervp != NULLVP) &&
+                           (lowervp->v_type == VREG)) {
+                               un->un_hash = cnp->cn_hash;
+                               un->un_path = malloc(cnp->cn_namelen+1,
+                                               M_TEMP, M_WAITOK);
+                               bcopy(cnp->cn_nameptr, un->un_path,
+                                               cnp->cn_namelen);
+                               un->un_path[cnp->cn_namelen] = '\0';
+                               VREF(dvp);
+                               un->un_dirvp = dvp;
                        }
                        }
-                       *vpp = UNIONTOV(un);
-                       return (0);
+               } else if (lowervp) {
+                       vrele(lowervp);
                }
                }
+               *vpp = UNIONTOV(un);
+               return (0);
        }
 
        /*
        }
 
        /*
@@ -132,8 +224,18 @@ loop:
        unvplock |= UN_LOCKED;
 
        error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
        unvplock |= UN_LOCKED;
 
        error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
-       if (error)
+       if (error) {
+               if (uppervp) {
+                       if (dvp == uppervp)
+                               vrele(uppervp);
+                       else
+                               vput(uppervp);
+               }
+               if (lowervp)
+                       vrele(lowervp);
+
                goto out;
                goto out;
+       }
 
        MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
                M_TEMP, M_WAITOK);
 
        MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
                M_TEMP, M_WAITOK);
@@ -147,15 +249,25 @@ loop:
        un->un_next = 0;
        un->un_uppervp = uppervp;
        un->un_lowervp = lowervp;
        un->un_next = 0;
        un->un_uppervp = uppervp;
        un->un_lowervp = lowervp;
-       un->un_open = 0;
-       un->un_flags = 0;
-       if (uppervp == 0 && cnp) {
+       un->un_openl = 0;
+       un->un_flags = UN_LOCKED;
+       if (un->un_uppervp)
+               un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+       if (curproc)
+               un->un_pid = curproc->p_pid;
+       else
+               un->un_pid = -1;
+#endif
+       if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+               un->un_hash = cnp->cn_hash;
                un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
                bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
                un->un_path[cnp->cn_namelen] = '\0';
                VREF(dvp);
                un->un_dirvp = dvp;
        } else {
                un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
                bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
                un->un_path[cnp->cn_namelen] = '\0';
                VREF(dvp);
                un->un_dirvp = dvp;
        } else {
+               un->un_hash = 0;
                un->un_path = 0;
                un->un_dirvp = 0;
        }
                un->un_path = 0;
                un->un_dirvp = 0;
        }
@@ -165,12 +277,6 @@ loop:
                continue;
        *pp = un;
 
                continue;
        *pp = un;
 
-       un->un_flags |= UN_LOCKED;
-
-#ifdef DIAGNOSTIC
-       un->un_pid = curproc->p_pid;
-#endif
-
        if (xlowervp)
                vrele(xlowervp);
 
        if (xlowervp)
                vrele(xlowervp);
 
@@ -189,15 +295,9 @@ int
 union_freevp(vp)
        struct vnode *vp;
 {
 union_freevp(vp)
        struct vnode *vp;
 {
-       struct union_node **unpp;
        struct union_node *un = VTOUNION(vp);
 
        struct union_node *un = VTOUNION(vp);
 
-       for (unpp = &unhead; *unpp != 0; unpp = &(*unpp)->un_next) {
-               if (*unpp == un) {
-                       *unpp = un->un_next;
-                       break;
-               }
-       }
+       union_remlist(un);
 
        FREE(vp->v_data, M_TEMP);
        vp->v_data = 0;
 
        FREE(vp->v_data, M_TEMP);
        vp->v_data = 0;
@@ -303,12 +403,10 @@ union_mkshadow(um, dvp, cnp, vpp)
 
        /*
         * policy: when creating the shadow directory in the
 
        /*
         * policy: when creating the shadow directory in the
-        * upper layer, create it owned by the current user,
-        * group from parent directory, and mode 777 modified
-        * by umask (ie mostly identical to the mkdir syscall).
-        * (jsp, kb)
-        * TODO: create the directory owned by the user who
-        * did the mount (um->um_cred).
+        * upper layer, create it owned by the user who did
+        * the mount, group from parent directory, and mode
+        * 777 modified by umask (ie mostly identical to the
+        * mkdir syscall).  (jsp, kb)
         */
 
        /*
         */
 
        /*
@@ -323,19 +421,22 @@ union_mkshadow(um, dvp, cnp, vpp)
         * The pathname buffer will be FREEed by VOP_MKDIR.
         */
        cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
         * The pathname buffer will be FREEed by VOP_MKDIR.
         */
        cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
-       bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen+1);
+       bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+       cn.cn_pnbuf[cnp->cn_namelen] = '\0';
 
        cn.cn_nameiop = CREATE;
        cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
        cn.cn_proc = cnp->cn_proc;
 
        cn.cn_nameiop = CREATE;
        cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
        cn.cn_proc = cnp->cn_proc;
-       cn.cn_cred = cnp->cn_cred;
+       cn.cn_cred = um->um_cred;
        cn.cn_nameptr = cn.cn_pnbuf;
        cn.cn_namelen = cnp->cn_namelen;
        cn.cn_hash = cnp->cn_hash;
        cn.cn_consume = cnp->cn_consume;
 
        cn.cn_nameptr = cn.cn_pnbuf;
        cn.cn_namelen = cnp->cn_namelen;
        cn.cn_hash = cnp->cn_hash;
        cn.cn_consume = cnp->cn_consume;
 
+       VREF(dvp);
        if (error = relookup(dvp, vpp, &cn))
                return (error);
        if (error = relookup(dvp, vpp, &cn))
                return (error);
+       vrele(dvp);
 
        if (*vpp) {
                VOP_ABORTOP(dvp, &cn);
 
        if (*vpp) {
                VOP_ABORTOP(dvp, &cn);
@@ -347,7 +448,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 
        VATTR_NULL(&va);
        va.va_type = VDIR;
 
        VATTR_NULL(&va);
        va.va_type = VDIR;
-       va.va_mode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+       va.va_mode = um->um_cmode;
 
        /* LEASE_CHECK: dvp is locked */
        LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
 
        /* LEASE_CHECK: dvp is locked */
        LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
@@ -377,13 +478,21 @@ union_vn_create(vpp, un, p)
        struct vattr *vap = &vat;
        int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
        int error;
        struct vattr *vap = &vat;
        int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
        int error;
-       int hash;
-       int cmode = UN_FILEMODE &~ p->p_fd->fd_cmask;
+       int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
        char *cp;
        struct componentname cn;
 
        *vpp = NULLVP;
 
        char *cp;
        struct componentname cn;
 
        *vpp = NULLVP;
 
+       /*
+        * Build a new componentname structure (for the same
+        * reasons outlines in union_mkshadow).
+        * The difference here is that the file is owned by
+        * the current user, rather than by the person who
+        * did the mount, since the current user needs to be
+        * able to write the file (that's why it is being
+        * copied in the first place).
+        */
        cn.cn_namelen = strlen(un->un_path);
        cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
        bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
        cn.cn_namelen = strlen(un->un_path);
        cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
        bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
@@ -392,14 +501,25 @@ union_vn_create(vpp, un, p)
        cn.cn_proc = p;
        cn.cn_cred = p->p_ucred;
        cn.cn_nameptr = cn.cn_pnbuf;
        cn.cn_proc = p;
        cn.cn_cred = p->p_ucred;
        cn.cn_nameptr = cn.cn_pnbuf;
-       for (hash = 0, cp = cn.cn_nameptr; *cp != 0 && *cp != '/'; cp++)
-               hash += (unsigned char)*cp;
-       cn.cn_hash = hash;
+       cn.cn_hash = un->un_hash;
        cn.cn_consume = 0;
 
        cn.cn_consume = 0;
 
+       VREF(un->un_dirvp);
        if (error = relookup(un->un_dirvp, &vp, &cn))
                return (error);
        if (error = relookup(un->un_dirvp, &vp, &cn))
                return (error);
+       vrele(un->un_dirvp);
+
        if (vp == NULLVP) {
        if (vp == NULLVP) {
+               /*
+                * Good - there was no race to create the file
+                * so go ahead and create it.  The permissions
+                * on the file will be 0666 modified by the
+                * current user's umask.  Access to the file, while
+                * it is unioned, will require access to the top *and*
+                * bottom files.  Access when not unioned will simply
+                * require access to the top-level file.
+                * TODO: confirm choice of access permissions.
+                */
                VATTR_NULL(vap);
                vap->va_type = VREG;
                vap->va_mode = cmode;
                VATTR_NULL(vap);
                vap->va_type = VREG;
                vap->va_mode = cmode;
@@ -440,3 +560,42 @@ bad:
        vput(vp);
        return (error);
 }
        vput(vp);
        return (error);
 }
+
+int
+union_vn_close(vp, fmode, cred, p)
+       struct vnode *vp;
+       int fmode;
+       struct ucred *cred;
+       struct proc *p;
+{
+       if (fmode & FWRITE)
+               --vp->v_writecount;
+       return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+       struct union_node *un;
+{
+       if (un->un_flags & UN_ULOCK) {
+               un->un_flags &= ~UN_ULOCK;
+               vput(un->un_uppervp);
+       } else {
+               vrele(un->un_uppervp);
+       }
+       un->un_uppervp = NULLVP;
+}
+
+struct vnode *
+union_lowervp(vp)
+       struct vnode *vp;
+{
+       struct union_node *un = VTOUNION(vp);
+
+       if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+               if (vget(un->un_lowervp, 0))
+                       return (NULLVP);
+       }
+
+       return (un->un_lowervp);
+}