fixup fsid correctly in union_getattr
[unix-history] / usr / src / sys / miscfs / union / union_vnops.c
index 81be3ab..14d124b 100644 (file)
@@ -4,18 +4,17 @@
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
- * Jan-Simon Pendry and by John Heidemann of the UCLA Ficus project.
+ * Jan-Simon Pendry.
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)union_vnops.c       1.1 (Berkeley) %G%
+ *     @(#)union_vnops.c       8.16 (Berkeley) %G%
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/file.h>
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/file.h>
-#include <sys/filedesc.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
-#include "union.h"
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
 
 
-
-int union_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
-
-/*
- * This is the 10-Apr-92 bypass routine.
- *    This version has been optimized for speed, throwing away some
- * safety checks.  It should still always work, but it's not as
- * robust to programmer errors.
- *    Define SAFETY to include some error checking code.
- *
- * In general, we map all vnodes going down and unmap them on the way back.
- * As an exception to this, vnodes can be marked "unmapped" by setting
- * the Nth bit in operation's vdesc_flags.
- *
- * Also, some BSD vnode operations have the side effect of vrele'ing
- * their arguments.  With stacking, the reference counts are held
- * by the upper node, not the lower one, so we must handle these
- * side-effects here.  This is not of concern in Sun-derived systems
- * since there are no such side-effects.
- *
- * This makes the following assumptions:
- * - only one returned vpp
- * - no INOUT vpp's (Sun's vop_open has one of these)
- * - the vnode operation vector of the first vnode should be used
- *   to determine what implementation of the op should be invoked
- * - all mapped vnodes are of our vnode-type (NEEDSWORK:
- *   problems on rmdir'ing mount points and renaming?)
- */ 
-int
-union_bypass(ap)
-       struct vop_generic_args /* {
-               struct vnodeop_desc *a_desc;
-               <other random data follows, presumably>
-       } */ *ap;
-{
-       struct vnode **this_vp_p;
-       int error;
-       struct vnode *old_vps[VDESC_MAX_VPS];
-       struct vnode **vps_p[VDESC_MAX_VPS];
-       struct vnode ***vppp;
-       struct vnodeop_desc *descp = ap->a_desc;
-       int reles, i;
-
-       if (union_bug_bypass)
-               printf ("union_bypass: %s\n", descp->vdesc_name);
-
-#ifdef SAFETY
-       /*
-        * We require at least one vp.
-        */
-       if (descp->vdesc_vp_offsets == NULL ||
-           descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
-               panic ("union_bypass: no vp's in map.\n");
-#endif
-
-       /*
-        * Map the vnodes going in.
-        * Later, we'll invoke the operation based on
-        * the first mapped vnode's operation vector.
-        */
-       reles = descp->vdesc_flags;
-       for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
-               if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-                       break;   /* bail out at end of list */
-               vps_p[i] = this_vp_p = 
-                       VOPARG_OFFSETTO(struct vnode **, descp->vdesc_vp_offsets[i],ap);
-               /*
-                * We're not guaranteed that any but the first vnode
-                * are of our type.  Check for and don't map any
-                * that aren't.  (We must always map first vp or vclean fails.)
-                */
-               if (i && (*this_vp_p)->v_op != union_vnodeop_p) {
-                       old_vps[i] = NULL;
-               } else {
-                       old_vps[i] = *this_vp_p;
-                       *(vps_p[i]) = OTHERVP(*this_vp_p);
-                       /*
-                        * XXX - Several operations have the side effect
-                        * of vrele'ing their vp's.  We must account for
-                        * that.  (This should go away in the future.)
-                        */
-                       if (reles & 1)
-                               VREF(*this_vp_p);
-               }
-                       
-       }
-
-       /*
-        * Call the operation on the lower layer
-        * with the modified argument structure.
-        */
-       error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
-
-       /*
-        * Maintain the illusion of call-by-value
-        * by restoring vnodes in the argument structure
-        * to their original value.
-        */
-       reles = descp->vdesc_flags;
-       for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
-               if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-                       break;   /* bail out at end of list */
-               if (old_vps[i]) {
-                       *(vps_p[i]) = old_vps[i];
-                       if (reles & 1)
-                               vrele(*(vps_p[i]));
-               }
-       }
-
-       /*
-        * Map the possible out-going vpp
-        * (Assumes that the lower layer always returns
-        * a VREF'ed vpp unless it gets an error.)
-        */
-       if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
-           !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
-           !error) {
-               /*
-                * XXX - even though some ops have vpp returned vp's,
-                * several ops actually vrele this before returning.
-                * We must avoid these ops.
-                * (This should go away when these ops are regularized.)
-                */
-               if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
-                       goto out;
-               vppp = VOPARG_OFFSETTO(struct vnode***,
-                                descp->vdesc_vpp_offset,ap);
-               panic("union: failed to handled returned vnode");
-               error = union_allocvp(0, 0, 0, 0, 0, 0);
-       }
-
-out:
-       return (error);
-}
-
-/*
- * Check access permission on the union vnode.
- * The access check being enforced is to check
- * against both the underlying vnode, and any
- * copied vnode.  This ensures that no additional
- * file permissions are given away simply because
- * the user caused an implicit file copy.
- */
-int
-union_access(ap)
-       struct vop_access_args /* {
-               struct vnodeop_desc *a_desc;
-               struct vnode *a_vp;
-               int a_mode;
-               struct ucred *a_cred;
-               struct proc *a_p;
-       } */ *ap;
-{
-       struct union_node *un = VTOUNION(ap->a_vp);
-       struct vnode *vp;
-
-       if (vp = un->un_lowervp) {
-               int error;
-
-               error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
-               if (error)
-                       return (error);
-       }
-
-       if (vp = un->un_uppervp)
-               return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
-       
-       return (0);
+#define FIXUP(un) { \
+       if (((un)->un_flags & UN_ULOCK) == 0) { \
+               union_fixup(un); \
+       } \
 }
 
 }
 
-static int
-union_mkshadow(dvp, cnp, vpp)
-       struct vnode *dvp;
-       struct componentname *cnp;
-       struct vnode *vpp;
+static void
+union_fixup(un)
+       struct union_node *un;
 {
 {
-       int error;
-       struct vattr va;
-       struct proc *p = cnp->cn_proc;
-       int lockparent = (cnp->cn_flags & LOCKPARENT);
-
-       /*
-        * policy: when creating the shadow directory in the
-        * upper layer, create it owned by the current user,
-        * group from parent directory, and mode 777 modified
-        * by umask (ie mostly identical to the mkdir syscall).
-        * (jsp, kb)
-        * TODO: create the directory owned by the user who
-        * did the mount (um->um_cred).
-        */
 
 
-       VATTR_NULL(&va);
-       va.va_type = VDIR;
-       va.va_mode = UN_DIRMODE &~ p->p_fd->fd_cmask;
-       if (lockparent)
-               VOP_UNLOCK(dvp);
-       LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
-       VOP_LOCK(dvp);
-       error = VOP_MKDIR(dvp, vpp, cnp, &va);
-       if (lockparent)
-               VOP_LOCK(dvp);
-       return (error);
+       VOP_LOCK(un->un_uppervp);
+       un->un_flags |= UN_ULOCK;
 }
 
 static int
 }
 
 static int
-union_lookup1(dvp, vpp, cnp)
-       struct vnode *dvp;
+union_lookup1(udvp, dvpp, vpp, cnp)
+       struct vnode *udvp;
+       struct vnode **dvpp;
        struct vnode **vpp;
        struct componentname *cnp;
 {
        int error;
        struct vnode *tdvp;
        struct vnode **vpp;
        struct componentname *cnp;
 {
        int error;
        struct vnode *tdvp;
+       struct vnode *dvp;
        struct mount *mp;
 
        struct mount *mp;
 
-       if (cnp->cn_flags & ISDOTDOT) {
-               for (;;) {
-                       if ((dvp->v_flag & VROOT) == 0 ||
-                           (cnp->cn_flags & NOCROSSMOUNT))
-                               break;
+       dvp = *dvpp;
 
 
+       /*
+        * If stepping up the directory tree, check for going
+        * back across the mount point, in which case do what
+        * lookup would do by stepping back down the mount
+        * hierarchy.
+        */
+       if (cnp->cn_flags & ISDOTDOT) {
+               while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
+                       /*
+                        * Don't do the NOCROSSMOUNT check
+                        * at this level.  By definition,
+                        * union fs deals with namespaces, not
+                        * filesystems.
+                        */
                        tdvp = dvp;
                        tdvp = dvp;
-                       dvp = dvp->v_mount->mnt_vnodecovered;
+                       *dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
                        vput(tdvp);
                        VREF(dvp);
                        VOP_LOCK(dvp);
                }
        }
                        vput(tdvp);
                        VREF(dvp);
                        VOP_LOCK(dvp);
                }
        }
-       
+
         error = VOP_LOOKUP(dvp, &tdvp, cnp);
        if (error)
                return (error);
 
         error = VOP_LOOKUP(dvp, &tdvp, cnp);
        if (error)
                return (error);
 
+       /*
+        * The parent directory will have been unlocked, unless lookup
+        * found the last component.  In which case, re-lock the node
+        * here to allow it to be unlocked again (phew) in union_lookup.
+        */
+       if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+               VOP_LOCK(dvp);
+
        dvp = tdvp;
        dvp = tdvp;
-       while ((dvp->v_type == VDIR) && (mp = dvp->v_mountedhere) &&
-              (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+
+       /*
+        * Lastly check if the current node is a mount point in
+        * which case walk up the mount hierarchy making sure not to
+        * bump into the root of the mount tree (ie. dvp != udvp).
+        */
+       while (dvp != udvp && (dvp->v_type == VDIR) &&
+              (mp = dvp->v_mountedhere)) {
 
                if (mp->mnt_flag & MNT_MLOCK) {
                        mp->mnt_flag |= MNT_MWAIT;
 
                if (mp->mnt_flag & MNT_MLOCK) {
                        mp->mnt_flag |= MNT_MWAIT;
@@ -272,7 +109,7 @@ union_lookup1(dvp, vpp, cnp)
                        return (error);
                }
 
                        return (error);
                }
 
-               vput(tdvp);
+               vput(dvp);
                dvp = tdvp;
        }
 
                dvp = tdvp;
        }
 
@@ -289,16 +126,24 @@ union_lookup(ap)
                struct componentname *a_cnp;
        } */ *ap;
 {
                struct componentname *a_cnp;
        } */ *ap;
 {
+       int error;
        int uerror, lerror;
        struct vnode *uppervp, *lowervp;
        struct vnode *upperdvp, *lowerdvp;
        struct vnode *dvp = ap->a_dvp;
        int uerror, lerror;
        struct vnode *uppervp, *lowervp;
        struct vnode *upperdvp, *lowerdvp;
        struct vnode *dvp = ap->a_dvp;
-       struct union_node *dun = VTOUNION(ap->a_dvp);
+       struct union_node *dun = VTOUNION(dvp);
        struct componentname *cnp = ap->a_cnp;
        int lockparent = cnp->cn_flags & LOCKPARENT;
        struct componentname *cnp = ap->a_cnp;
        int lockparent = cnp->cn_flags & LOCKPARENT;
+       int rdonly = cnp->cn_flags & RDONLY;
+       struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+       struct ucred *saved_cred;
+
+       cnp->cn_flags |= LOCKPARENT;
 
        upperdvp = dun->un_uppervp;
        lowerdvp = dun->un_lowervp;
 
        upperdvp = dun->un_uppervp;
        lowerdvp = dun->un_lowervp;
+       uppervp = NULLVP;
+       lowervp = NULLVP;
 
        /*
         * do the lookup in the upper level.
 
        /*
         * do the lookup in the upper level.
@@ -306,15 +151,19 @@ union_lookup(ap)
         * then assume that something special is going
         * on and just return that vnode.
         */
         * then assume that something special is going
         * on and just return that vnode.
         */
-       uppervp = 0;
-       if (upperdvp) {
-               uerror = union_lookup1(upperdvp, &uppervp, cnp);
+       if (upperdvp != NULLVP) {
+               FIXUP(dun);
+               uerror = union_lookup1(um->um_uppervp, &upperdvp,
+                                       &uppervp, cnp);
+               /*if (uppervp == upperdvp)
+                       dun->un_flags |= UN_KLOCK;*/
+
                if (cnp->cn_consume != 0) {
                        *ap->a_vpp = uppervp;
                if (cnp->cn_consume != 0) {
                        *ap->a_vpp = uppervp;
+                       if (!lockparent)
+                               cnp->cn_flags &= ~LOCKPARENT;
                        return (uerror);
                }
                        return (uerror);
                }
-               if (!lockparent)
-                       VOP_LOCK(upperdvp);
        } else {
                uerror = ENOENT;
        }
        } else {
                uerror = ENOENT;
        }
@@ -326,23 +175,50 @@ union_lookup(ap)
         * back from the upper layer and return the lower vnode
         * instead.
         */
         * back from the upper layer and return the lower vnode
         * instead.
         */
-       lowervp = 0;
-       if (lowerdvp) {
-               lerror = union_lookup1(lowerdvp, &lowervp, cnp);
+       if (lowerdvp != NULLVP) {
+               int nameiop;
+
+               VOP_LOCK(lowerdvp);
+
+               /*
+                * Only do a LOOKUP on the bottom node, since
+                * we won't be making changes to it anyway.
+                */
+               nameiop = cnp->cn_nameiop;
+               cnp->cn_nameiop = LOOKUP;
+               if (um->um_op == UNMNT_BELOW) {
+                       saved_cred = cnp->cn_cred;
+                       cnp->cn_cred = um->um_cred;
+               }
+               lerror = union_lookup1(um->um_lowervp, &lowerdvp,
+                               &lowervp, cnp);
+               if (um->um_op == UNMNT_BELOW)
+                       cnp->cn_cred = saved_cred;
+               cnp->cn_nameiop = nameiop;
+
+               if (lowervp != lowerdvp)
+                       VOP_UNLOCK(lowerdvp);
+
                if (cnp->cn_consume != 0) {
                if (cnp->cn_consume != 0) {
-                       if (uppervp) {
-                               vput(uppervp);
-                               uppervp = 0;
+                       if (uppervp != NULLVP) {
+                               if (uppervp == upperdvp)
+                                       vrele(uppervp);
+                               else
+                                       vput(uppervp);
+                               uppervp = NULLVP;
                        }
                        *ap->a_vpp = lowervp;
                        }
                        *ap->a_vpp = lowervp;
+                       if (!lockparent)
+                               cnp->cn_flags &= ~LOCKPARENT;
                        return (lerror);
                }
                        return (lerror);
                }
-               if (!lockparent)
-                       VOP_LOCK(lowerdvp);
        } else {
                lerror = ENOENT;
        }
 
        } else {
                lerror = ENOENT;
        }
 
+       if (!lockparent)
+               cnp->cn_flags &= ~LOCKPARENT;
+
        /*
         * at this point, we have uerror and lerror indicating
         * possible errors with the lookups in the upper and lower
        /*
         * at this point, we have uerror and lerror indicating
         * possible errors with the lookups in the upper and lower
@@ -367,96 +243,136 @@ union_lookup(ap)
         *    whatever the bottom layer returned.
         */
 
         *    whatever the bottom layer returned.
         */
 
+       *ap->a_vpp = NULLVP;
+
        /* case 1. */
        if ((uerror != 0) && (lerror != 0)) {
        /* case 1. */
        if ((uerror != 0) && (lerror != 0)) {
-               *ap->a_vpp = 0;
                return (uerror);
        }
 
        /* case 2. */
        if (uerror != 0 /* && (lerror == 0) */ ) {
                if (lowervp->v_type == VDIR) { /* case 2b. */
                return (uerror);
        }
 
        /* case 2. */
        if (uerror != 0 /* && (lerror == 0) */ ) {
                if (lowervp->v_type == VDIR) { /* case 2b. */
-                       uerror = union_mkshadow(upperdvp, cnp, &uppervp);
+                       dun->un_flags &= ~UN_ULOCK;
+                       VOP_UNLOCK(upperdvp);
+                       uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+                       VOP_LOCK(upperdvp);
+                       dun->un_flags |= UN_ULOCK;
+
                        if (uerror) {
                        if (uerror) {
-                               if (lowervp) {
+                               if (lowervp != NULLVP) {
                                        vput(lowervp);
                                        vput(lowervp);
-                                       lowervp = 0;
+                                       lowervp = NULLVP;
                                }
                                return (uerror);
                        }
                }
        }
 
                                }
                                return (uerror);
                        }
                }
        }
 
-       return (union_allocvp(ap->a_vpp, dvp->v_mount, dvp, cnp,
-                             uppervp, lowervp));
+       if (lowervp != NULLVP)
+               VOP_UNLOCK(lowervp);
+
+       error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+                             uppervp, lowervp);
+
+       if (error) {
+               if (uppervp != NULLVP)
+                       vput(uppervp);
+               if (lowervp != NULLVP)
+                       vrele(lowervp);
+       } else {
+               if (*ap->a_vpp != dvp)
+                       if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+                               VOP_UNLOCK(dvp);
+       }
+
+       return (error);
 }
 
 }
 
-/*
- * copyfile.  copy the vnode (fvp) to the vnode (tvp)
- * using a sequence of reads and writes.
- */
-static int
-union_copyfile(p, cred, fvp, tvp)
-       struct proc *p;
-       struct ucred *cred;
-       struct vnode *fvp;
-       struct vnode *tvp;
+int
+union_create(ap)
+       struct vop_create_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
 {
 {
-       char *buf;
-       struct uio uio;
-       struct iovec iov;
-       int error = 0;
-       off_t offset;
+       struct union_node *un = VTOUNION(ap->a_dvp);
+       struct vnode *dvp = un->un_uppervp;
 
 
-       /*
-        * strategy:
-        * allocate a buffer of size MAXBSIZE.
-        * loop doing reads and writes, keeping track
-        * of the current uio offset.
-        * give up at the first sign of trouble.
-        */
+       if (dvp != NULLVP) {
+               int error;
+               struct vnode *vp;
 
 
-       uio.uio_procp = p;
-       uio.uio_segflg = UIO_SYSSPACE;
-       offset = 0;
-
-       VOP_UNLOCK(fvp);                                /* XXX */
-       LEASE_CHECK(fvp, p, cred, LEASE_READ);
-       VOP_LOCK(fvp);                                  /* XXX */
-       VOP_UNLOCK(tvp);                                /* XXX */
-       LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
-       VOP_LOCK(tvp);                                  /* XXX */
-
-       buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
-       do {
-               uio.uio_iov = &iov;
-               uio.uio_iovcnt = 1;
-               iov.iov_base = buf;
-               iov.iov_len = MAXBSIZE;
-               uio.uio_resid = iov.iov_len;
-               uio.uio_offset = offset;
-               uio.uio_rw = UIO_READ;
-               error = VOP_READ(fvp, &uio, 0, cred);
+               FIXUP(un);
 
 
-               if (error == 0) {
-                       uio.uio_iov = &iov;
-                       uio.uio_iovcnt = 1;
-                       iov.iov_base = buf;
-                       iov.iov_len = MAXBSIZE - uio.uio_resid;
-                       uio.uio_rw = UIO_WRITE;
-                       uio.uio_resid = iov.iov_len;
-                       uio.uio_offset = offset;
-
-                       do {
-                               error = VOP_WRITE(tvp, &uio, 0, cred);
-                       } while (error == 0 && uio.uio_resid > 0);
-                       if (error == 0)
-                               offset = uio.uio_offset;
+               VREF(dvp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+               if (error)
+                       return (error);
+
+               error = union_allocvp(
+                               ap->a_vpp,
+                               ap->a_dvp->v_mount,
+                               ap->a_dvp,
+                               NULLVP,
+                               ap->a_cnp,
+                               vp,
+                               NULLVP);
+               if (error)
+                       vput(vp);
+               return (error);
+       }
+
+       vput(ap->a_dvp);
+       return (EROFS);
+}
+
+int
+union_mknod(ap)
+       struct vop_mknod_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       struct union_node *un = VTOUNION(ap->a_dvp);
+       struct vnode *dvp = un->un_uppervp;
+
+       if (dvp != NULLVP) {
+               int error;
+               struct vnode *vp;
+
+               FIXUP(un);
+
+               VREF(dvp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+               if (error)
+                       return (error);
+
+               if (vp != NULLVP) {
+                       error = union_allocvp(
+                                       ap->a_vpp,
+                                       ap->a_dvp->v_mount,
+                                       ap->a_dvp,
+                                       NULLVP,
+                                       ap->a_cnp,
+                                       vp,
+                                       NULLVP);
+                       if (error)
+                               vput(vp);
                }
                }
-       } while ((uio.uio_resid == 0) && (error == 0));
+               return (error);
+       }
 
 
-       free(buf, M_TEMP);
-       return (error);
+       vput(ap->a_dvp);
+       return (EROFS);
 }
 
 int
 }
 
 int
@@ -470,272 +386,887 @@ union_open(ap)
        } */ *ap;
 {
        struct union_node *un = VTOUNION(ap->a_vp);
        } */ *ap;
 {
        struct union_node *un = VTOUNION(ap->a_vp);
+       struct vnode *tvp;
        int mode = ap->a_mode;
        struct ucred *cred = ap->a_cred;
        struct proc *p = ap->a_p;
        int mode = ap->a_mode;
        struct ucred *cred = ap->a_cred;
        struct proc *p = ap->a_p;
+       int error;
 
        /*
         * If there is an existing upper vp then simply open that.
         */
 
        /*
         * If there is an existing upper vp then simply open that.
         */
-       if (un->un_uppervp)
-               return (VOP_OPEN(un->un_uppervp, mode, cred, p));
-
-       /*
-        * If the lower vnode is being opened for writing, then
-        * copy the file contents to the upper vnode and open that,
-        * otherwise can simply open the lower vnode.
-        */
-       if ((ap->a_mode & FWRITE) && (un->un_lowervp->v_type == VREG)) {
-               int error;
-               struct nameidata nd;
-               struct filedesc *fdp = p->p_fd;
-               int fmode;
-               int cmode;
-
+       tvp = un->un_uppervp;
+       if (tvp == NULLVP) {
                /*
                /*
-                * Open the named file in the upper layer.  Note that
-                * the file may have come into existence *since* the lookup
-                * was done, since the upper layer may really be a
-                * loopback mount of some other filesystem... so open
-                * the file with exclusive create and barf if it already
-                * exists.
-                * XXX - perhaps shoudl re-lookup the node (once more with
-                * feeling) and simply open that.  Who knows.
+                * If the lower vnode is being opened for writing, then
+                * copy the file contents to the upper vnode and open that,
+                * otherwise can simply open the lower vnode.
                 */
                 */
-               NDINIT(&nd, CREATE, 0, UIO_SYSSPACE, un->un_path, p);
-               fmode = (O_CREAT|O_TRUNC|O_EXCL);
-               cmode = UN_FILEMODE & ~fdp->fd_cmask;
-               error = vn_open(&nd, fmode, cmode);
-               if (error)
+               tvp = un->un_lowervp;
+               if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+                       error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p);
+                       if (error == 0)
+                               error = VOP_OPEN(un->un_uppervp, mode, cred, p);
                        return (error);
                        return (error);
-               un->un_uppervp = nd.ni_vp;
+               }
+
                /*
                /*
-                * Now, if the file is being opened with truncation, then
-                * the (new) upper vnode is ready to fly, otherwise the
-                * data from the lower vnode must be copied to the upper
-                * layer first.  This only works for regular files (check
-                * is made above).
+                * Just open the lower vnode
                 */
                 */
-               if ((mode & O_TRUNC) == 0) {
-                       /* XXX - should not ignore errors from VOP_CLOSE */
-                       error = VOP_OPEN(un->un_lowervp, FREAD, cred, p);
-                       if (error == 0) {
-                               error = union_copyfile(p, cred,
-                                              un->un_lowervp, un->un_uppervp);
-                               (void) VOP_CLOSE(un->un_lowervp, FREAD);
-                       }
-                       (void) VOP_CLOSE(un->un_uppervp, FWRITE);
-               }
-               if (error == 0)
-                       error = VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+               un->un_openl++;
+               VOP_LOCK(tvp);
+               error = VOP_OPEN(tvp, mode, cred, p);
+               VOP_UNLOCK(tvp);
+
                return (error);
        }
 
                return (error);
        }
 
-       return (VOP_OPEN(un->un_lowervp, mode, cred, p));
+       FIXUP(un);
+
+       error = VOP_OPEN(tvp, mode, cred, p);
+
+       return (error);
 }
 
 }
 
-/*
- *  We handle getattr only to change the fsid.
- */
 int
 int
-union_getattr(ap)
-       struct vop_getattr_args /* {
+union_close(ap)
+       struct vop_close_args /* {
                struct vnode *a_vp;
                struct vnode *a_vp;
-               struct vattr *a_vap;
+               int  a_fflag;
                struct ucred *a_cred;
                struct proc *a_p;
        } */ *ap;
 {
                struct ucred *a_cred;
                struct proc *a_p;
        } */ *ap;
 {
-       int error;
+       struct union_node *un = VTOUNION(ap->a_vp);
+       struct vnode *vp;
 
 
-       if (error = union_bypass(ap))
-               return (error);
-       /* Requires that arguments be restored. */
-       ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
-       return (0);
+       if (un->un_uppervp != NULLVP) {
+               vp = un->un_uppervp;
+       } else {
+#ifdef UNION_DIAGNOSTIC
+               if (un->un_openl <= 0)
+                       panic("union: un_openl cnt");
+#endif
+               --un->un_openl;
+               vp = un->un_lowervp;
+       }
+
+       return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
 }
 
 /*
 }
 
 /*
- * union_readdir works in concert with getdirentries and
- * readdir(3) to provide a list of entries in the unioned
- * directories.  getdirentries is responsible for walking
- * down the union stack.  readdir(3) is responsible for
- * eliminating duplicate names from the returned data stream.
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode.  This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
  */
 int
  */
 int
-union_readdir(ap)
-       struct vop_readdir_args /* {
+union_access(ap)
+       struct vop_access_args /* {
                struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
-               struct uio *a_uio;
+               int a_mode;
                struct ucred *a_cred;
                struct ucred *a_cred;
+               struct proc *a_p;
        } */ *ap;
 {
        struct union_node *un = VTOUNION(ap->a_vp);
        } */ *ap;
 {
        struct union_node *un = VTOUNION(ap->a_vp);
+       int error = EACCES;
+       struct vnode *vp;
+
+       if ((vp = un->un_uppervp) != NULLVP) {
+               FIXUP(un);
+               return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+       }
 
 
-       if (un->un_uppervp)
-               return (union_bypass(ap));
+       if ((vp = un->un_lowervp) != NULLVP) {
+               VOP_LOCK(vp);
+               error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+               if (error == 0) {
+                       struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
 
 
-       return (0);
+                       if (um->um_op == UNMNT_BELOW)
+                               error = VOP_ACCESS(vp, ap->a_mode,
+                                               um->um_cred, ap->a_p);
+               }
+               VOP_UNLOCK(vp);
+               if (error)
+                       return (error);
+       }
+
+       return (error);
 }
 
 }
 
+/*
+ * We handle getattr only to change the fsid and
+ * track object sizes
+ */
 int
 int
-union_inactive(ap)
-       struct vop_inactive_args /* {
+union_getattr(ap)
+       struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vnode *a_vp;
+               struct vattr *a_vap;
+               struct ucred *a_cred;
+               struct proc *a_p;
        } */ *ap;
 {
        } */ *ap;
 {
+       int error;
+       struct union_node *un = VTOUNION(ap->a_vp);
+       struct vnode *vp = un->un_uppervp;
+       struct vattr *vap;
+       struct vattr va;
+
 
        /*
 
        /*
-        * Do nothing (and _don't_ bypass).
-        * Wait to vrele lowervp until reclaim,
-        * so that until then our union_node is in the
-        * cache and reusable.
-        *
-        * NEEDSWORK: Someday, consider inactive'ing
-        * the lowervp and then trying to reactivate it
-        * with capabilities (v_id)
-        * like they do in the name lookup cache code.
-        * That's too much work for now.
+        * Some programs walk the filesystem hierarchy by counting
+        * links to directories to avoid stat'ing all the time.
+        * This means the link count on directories needs to be "correct".
+        * The only way to do that is to call getattr on both layers
+        * and fix up the link count.  The link count will not necessarily
+        * be accurate but will be large enough to defeat the tree walkers.
         */
         */
-       return (0);
-}
 
 
-int
-union_reclaim(ap)
-       struct vop_reclaim_args /* {
-               struct vnode *a_vp;
-       } */ *ap;
+       vap = ap->a_vap;
+
+       vp = un->un_uppervp;
+       if (vp != NULLVP) {
+               /*
+                * It's not clear whether VOP_GETATTR is to be
+                * called with the vnode locked or not.  stat() calls
+                * it with (vp) locked, and fstat calls it with
+                * (vp) unlocked.
+                * In the mean time, compensate here by checking
+                * the union_node's lock flag.
+                */
+               if (un->un_flags & UN_LOCKED)
+                       FIXUP(un);
+
+               error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+               if (error)
+                       return (error);
+               union_newsize(ap->a_vp, vap->va_size, VNOVAL);
+       }
+
+       if (vp == NULLVP) {
+               vp = un->un_lowervp;
+       } else if (vp->v_type == VDIR) {
+               vp = un->un_lowervp;
+               vap = &va;
+       } else {
+               vp = NULLVP;
+       }
+
+       if (vp != NULLVP) {
+               VOP_LOCK(vp);
+               error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+               VOP_UNLOCK(vp);
+               if (error)
+                       return (error);
+               union_newsize(ap->a_vp, VNOVAL, vap->va_size);
+       }
+
+       if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+               ap->a_vap->va_nlink += vap->va_nlink;
+
+       ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+       return (0);
+}
+
+int
+union_setattr(ap)
+       struct vop_setattr_args /* {
+               struct vnode *a_vp;
+               struct vattr *a_vap;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
 {
 {
-       struct vnode *vp = ap->a_vp;
-       struct union_node *un = VTOUNION(vp);
-       struct vnode *uppervp = un->un_uppervp;
-       struct vnode *lowervp = un->un_lowervp;
-       struct vnode *dirvp = un->un_dirvp;
-       char *path = un->un_path;
+       struct union_node *un = VTOUNION(ap->a_vp);
+       int error;
 
        /*
 
        /*
-        * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
-        * so we can't call VOPs on ourself.
+        * Handle case of truncating lower object to zero size,
+        * by creating a zero length upper object.  This is to
+        * handle the case of open with O_TRUNC and O_CREAT.
         */
         */
-       /* After this assignment, this node will not be re-used. */
-       un->un_uppervp = 0;
-       un->un_lowervp = 0;
-       un->un_dirvp = 0;
-       un->un_path = NULL;
-       union_freevp(vp);
-       if (uppervp)
-               vrele(uppervp);
-       if (lowervp)
-               vrele(lowervp);
-       if (dirvp)
-               vrele(dirvp);
-       if (path)
-               free(path, M_TEMP);
-       return (0);
+       if ((un->un_uppervp == NULLVP) &&
+           /* assert(un->un_lowervp != NULLVP) */
+           (un->un_lowervp->v_type == VREG) &&
+           (ap->a_vap->va_size == 0)) {
+               struct vnode *vp;
+
+               error = union_vn_create(&vp, un, ap->a_p);
+               if (error)
+                       return (error);
+
+               /* at this point, uppervp is locked */
+               union_newupper(un, vp);
+
+               VOP_UNLOCK(vp);
+               union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+               VOP_LOCK(vp);
+               un->un_flags |= UN_ULOCK;
+       }
+
+       /*
+        * Try to set attributes in upper layer,
+        * otherwise return read-only filesystem error.
+        */
+       if (un->un_uppervp != NULLVP) {
+               FIXUP(un);
+               error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+                                       ap->a_cred, ap->a_p);
+               if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
+                       union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
+       } else {
+               error = EROFS;
+       }
+
+       return (error);
+}
+
+int
+union_read(ap)
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int  a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
+{
+       int error;
+       struct vnode *vp = OTHERVP(ap->a_vp);
+       int dolock = (vp == LOWERVP(ap->a_vp));
+
+       if (dolock)
+               VOP_LOCK(vp);
+       else
+               FIXUP(VTOUNION(ap->a_vp));
+       error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+       if (dolock)
+               VOP_UNLOCK(vp);
+
+       /*
+        * XXX
+        * perhaps the size of the underlying object has changed under
+        * our feet.  take advantage of the offset information present
+        * in the uio structure.
+        */
+       if (error == 0) {
+               struct union_node *un = VTOUNION(ap->a_vp);
+               off_t cur = ap->a_uio->uio_offset;
+
+               if (vp == un->un_uppervp) {
+                       if (cur > un->un_uppersz)
+                               union_newsize(ap->a_vp, cur, VNOVAL);
+               } else {
+                       if (cur > un->un_lowersz)
+                               union_newsize(ap->a_vp, VNOVAL, cur);
+               }
+       }
+
+       return (error);
 }
 
 }
 
+int
+union_write(ap)
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int  a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
+{
+       int error;
+       struct vnode *vp = OTHERVP(ap->a_vp);
+       int dolock = (vp == LOWERVP(ap->a_vp));
+
+       if (dolock)
+               VOP_LOCK(vp);
+       else
+               FIXUP(VTOUNION(ap->a_vp));
+       error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+       if (dolock)
+               VOP_UNLOCK(vp);
+
+       /*
+        * the size of the underlying object may be changed by the
+        * write.
+        */
+       if (error == 0) {
+               struct union_node *un = VTOUNION(ap->a_vp);
+               off_t cur = ap->a_uio->uio_offset;
+
+               if (vp == un->un_uppervp) {
+                       if (cur > un->un_uppersz)
+                               union_newsize(ap->a_vp, cur, VNOVAL);
+               } else {
+                       if (cur > un->un_lowersz)
+                               union_newsize(ap->a_vp, VNOVAL, cur);
+               }
+       }
+
+       return (error);
+}
 
 int
 
 int
-union_print(ap)
-       struct vop_print_args /* {
+union_ioctl(ap)
+       struct vop_ioctl_args /* {
                struct vnode *a_vp;
                struct vnode *a_vp;
+               int  a_command;
+               caddr_t  a_data;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
        } */ *ap;
 {
        } */ *ap;
 {
-       struct vnode *vp = ap->a_vp;
 
 
-       printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
-                       vp, UPPERVP(vp), LOWERVP(vp));
-       return (0);
+       return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+                               ap->a_fflag, ap->a_cred, ap->a_p));
 }
 
 }
 
+int
+union_select(ap)
+       struct vop_select_args /* {
+               struct vnode *a_vp;
+               int  a_which;
+               int  a_fflags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+
+       return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+                               ap->a_cred, ap->a_p));
+}
 
 
-/*
- * XXX - vop_strategy must be hand coded because it has no
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
- */
 int
 int
-union_strategy(ap)
-       struct vop_strategy_args /* {
-               struct buf *a_bp;
+union_mmap(ap)
+       struct vop_mmap_args /* {
+               struct vnode *a_vp;
+               int  a_fflags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+
+       return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+                               ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+       struct vop_fsync_args /* {
+               struct vnode *a_vp;
+               struct ucred *a_cred;
+               int  a_waitfor;
+               struct proc *a_p;
+       } */ *ap;
+{
+       int error = 0;
+       struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+       if (targetvp != NULLVP) {
+               int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+               if (dolock)
+                       VOP_LOCK(targetvp);
+               else
+                       FIXUP(VTOUNION(ap->a_vp));
+               error = VOP_FSYNC(targetvp, ap->a_cred,
+                                       ap->a_waitfor, ap->a_p);
+               if (dolock)
+                       VOP_UNLOCK(targetvp);
+       }
+
+       return (error);
+}
+
+int
+union_seek(ap)
+       struct vop_seek_args /* {
+               struct vnode *a_vp;
+               off_t  a_oldoff;
+               off_t  a_newoff;
+               struct ucred *a_cred;
+       } */ *ap;
+{
+
+       return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+       struct vop_remove_args /* {
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
        } */ *ap;
 {
        } */ *ap;
 {
-       struct buf *bp = ap->a_bp;
        int error;
        int error;
-       struct vnode *savedvp;
+       struct union_node *dun = VTOUNION(ap->a_dvp);
+       struct union_node *un = VTOUNION(ap->a_vp);
 
 
-       savedvp = bp->b_vp;
-       bp->b_vp = OTHERVP(bp->b_vp);
+       if (dun->un_uppervp != NULLVP && un->un_uppervp != NULLVP) {
+               struct vnode *dvp = dun->un_uppervp;
+               struct vnode *vp = un->un_uppervp;
 
 
-#ifdef DIAGNOSTIC
-       if (bp->b_vp == 0)
-               panic("union_strategy: nil vp");
-       if (((bp->b_flags & B_READ) == 0) &&
-           (bp->b_vp == LOWERVP(savedvp)))
-               panic("union_strategy: writing to lowervp");
-#endif
+               FIXUP(dun);
+               VREF(dvp);
+               dun->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               FIXUP(un);
+               VREF(vp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_vp);
 
 
-       error = VOP_STRATEGY(bp);
-       bp->b_vp = savedvp;
+               error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+               if (!error)
+                       union_removed_upper(un);
+
+               /*
+                * XXX: should create a whiteout here
+                */
+       } else {
+               /*
+                * XXX: should create a whiteout here
+                */
+               vput(ap->a_dvp);
+               vput(ap->a_vp);
+               error = EROFS;
+       }
 
        return (error);
 }
 
 
        return (error);
 }
 
+int
+union_link(ap)
+       struct vop_link_args /* {
+               struct vnode *a_vp;
+               struct vnode *a_tdvp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int error = 0;
+       struct union_node *un;
+       struct vnode *vp;
+       struct vnode *tdvp;
+
+       un = VTOUNION(ap->a_vp);
+
+       if (ap->a_vp->v_op != ap->a_tdvp->v_op) {
+               tdvp = ap->a_tdvp;
+       } else {
+               struct union_node *tdun = VTOUNION(ap->a_tdvp);
+               if (tdun->un_uppervp == NULLVP) {
+                       VOP_LOCK(ap->a_tdvp);
+                       if (un->un_uppervp == tdun->un_dirvp) {
+                               un->un_flags &= ~UN_ULOCK;
+                               VOP_UNLOCK(un->un_uppervp);
+                       }
+                       error = union_copyup(tdun, 1, ap->a_cnp->cn_cred,
+                                               ap->a_cnp->cn_proc);
+                       if (un->un_uppervp == tdun->un_dirvp) {
+                               VOP_LOCK(un->un_uppervp);
+                               un->un_flags |= UN_ULOCK;
+                       }
+                       VOP_UNLOCK(ap->a_tdvp);
+               }
+               tdvp = tdun->un_uppervp;
+       }
+
+       vp = un->un_uppervp;
+       if (vp == NULLVP)
+               error = EROFS;
+
+       if (error) {
+               vput(ap->a_vp);
+               return (error);
+       }
+
+       FIXUP(un);
+       VREF(vp);
+       un->un_flags |= UN_KLOCK;
+       vput(ap->a_vp);
+
+       return (VOP_LINK(vp, tdvp, ap->a_cnp));
+}
+
+int
+union_rename(ap)
+       struct vop_rename_args  /* {
+               struct vnode *a_fdvp;
+               struct vnode *a_fvp;
+               struct componentname *a_fcnp;
+               struct vnode *a_tdvp;
+               struct vnode *a_tvp;
+               struct componentname *a_tcnp;
+       } */ *ap;
+{
+       int error;
+
+       struct vnode *fdvp = ap->a_fdvp;
+       struct vnode *fvp = ap->a_fvp;
+       struct vnode *tdvp = ap->a_tdvp;
+       struct vnode *tvp = ap->a_tvp;
+
+       if (fdvp->v_op == union_vnodeop_p) {    /* always true */
+               struct union_node *un = VTOUNION(fdvp);
+               if (un->un_uppervp == NULLVP) {
+                       error = EROFS;
+                       goto bad;
+               }
+
+               fdvp = un->un_uppervp;
+               VREF(fdvp);
+               vrele(ap->a_fdvp);
+       }
+
+       if (fvp->v_op == union_vnodeop_p) {     /* always true */
+               struct union_node *un = VTOUNION(fvp);
+               if (un->un_uppervp == NULLVP) {
+                       error = EROFS;
+                       goto bad;
+               }
+
+               fvp = un->un_uppervp;
+               VREF(fvp);
+               vrele(ap->a_fvp);
+       }
+
+       if (tdvp->v_op == union_vnodeop_p) {
+               struct union_node *un = VTOUNION(tdvp);
+               if (un->un_uppervp == NULLVP) {
+                       /*
+                        * this should never happen in normal
+                        * operation but might if there was
+                        * a problem creating the top-level shadow
+                        * directory.
+                        */
+                       error = EROFS;
+                       goto bad;
+               }
+
+               tdvp = un->un_uppervp;
+               VREF(tdvp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_tdvp);
+       }
+
+       if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
+               struct union_node *un = VTOUNION(tvp);
+
+               tvp = un->un_uppervp;
+               if (tvp != NULLVP) {
+                       VREF(tvp);
+                       un->un_flags |= UN_KLOCK;
+               }
+               vput(ap->a_tvp);
+       }
+
+       return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+       vrele(fdvp);
+       vrele(fvp);
+       vput(tdvp);
+       if (tvp != NULLVP)
+               vput(tvp);
+
+       return (error);
+}
+
+int
+union_mkdir(ap)
+       struct vop_mkdir_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       struct union_node *un = VTOUNION(ap->a_dvp);
+       struct vnode *dvp = un->un_uppervp;
+
+       if (dvp != NULLVP) {
+               int error;
+               struct vnode *vp;
+
+               FIXUP(un);
+               VREF(dvp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+               if (error)
+                       return (error);
+
+               error = union_allocvp(
+                               ap->a_vpp,
+                               ap->a_dvp->v_mount,
+                               ap->a_dvp,
+                               NULLVP,
+                               ap->a_cnp,
+                               vp,
+                               NULLVP);
+               if (error)
+                       vput(vp);
+               return (error);
+       }
+
+       vput(ap->a_dvp);
+       return (EROFS);
+}
+
+int
+union_rmdir(ap)
+       struct vop_rmdir_args /* {
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int error;
+       struct union_node *dun = VTOUNION(ap->a_dvp);
+       struct union_node *un = VTOUNION(ap->a_vp);
+
+       if (dun->un_uppervp != NULLVP && un->un_uppervp != NULLVP) {
+               struct vnode *dvp = dun->un_uppervp;
+               struct vnode *vp = un->un_uppervp;
+
+               FIXUP(dun);
+               VREF(dvp);
+               dun->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               FIXUP(un);
+               VREF(vp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_vp);
+
+               error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+               if (!error)
+                       union_removed_upper(un);
+
+               /*
+                * XXX: should create a whiteout here
+                */
+       } else {
+               /*
+                * XXX: should create a whiteout here
+                */
+               vput(ap->a_dvp);
+               vput(ap->a_vp);
+               error = EROFS;
+       }
+
+       return (error);
+}
+
+int
+union_symlink(ap)
+       struct vop_symlink_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+               char *a_target;
+       } */ *ap;
+{
+       struct union_node *un = VTOUNION(ap->a_dvp);
+       struct vnode *dvp = un->un_uppervp;
+
+       if (dvp != NULLVP) {
+               int error;
+               struct vnode *vp;
+               struct mount *mp = ap->a_dvp->v_mount;
+
+               FIXUP(un);
+               VREF(dvp);
+               un->un_flags |= UN_KLOCK;
+               vput(ap->a_dvp);
+               error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+                                       ap->a_vap, ap->a_target);
+               *ap->a_vpp = NULLVP;
+               return (error);
+       }
+
+       vput(ap->a_dvp);
+       return (EROFS);
+}
 
 /*
 
 /*
- * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories.  getdirentries is responsible for walking
+ * down the union stack.  readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
  */
 int
  */
 int
-union_bwrite(ap)
-       struct vop_bwrite_args /* {
-               struct buf *a_bp;
+union_readdir(ap)
+       struct vop_readdir_args /* {
+               struct vnodeop_desc *a_desc;
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               struct ucred *a_cred;
+               int *a_eofflag;
+               u_long *a_cookies;
+               int a_ncookies;
+       } */ *ap;
+{
+       register struct union_node *un = VTOUNION(ap->a_vp);
+       register struct vnode *uvp = un->un_uppervp;
+
+       if (uvp == NULLVP)
+               return (0);
+
+       FIXUP(un);
+       ap->a_vp = uvp;
+       return (VOCALL(uvp->v_op, VOFFSET(vop_readdir), ap));
+}
+
+int
+union_readlink(ap)
+       struct vop_readlink_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               struct ucred *a_cred;
        } */ *ap;
 {
        } */ *ap;
 {
-       struct buf *bp = ap->a_bp;
        int error;
        int error;
-       struct vnode *savedvp;
+       struct vnode *vp = OTHERVP(ap->a_vp);
+       int dolock = (vp == LOWERVP(ap->a_vp));
 
 
-       savedvp = bp->b_vp;
-       bp->b_vp = UPPERVP(bp->b_vp);
+       if (dolock)
+               VOP_LOCK(vp);
+       else
+               FIXUP(VTOUNION(ap->a_vp));
+       error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+       if (dolock)
+               VOP_UNLOCK(vp);
 
 
-#ifdef DIAGNOSTIC
-       if (bp->b_vp == 0)
-               panic("union_bwrite: no upper vp");
+       return (error);
+}
+
+int
+union_abortop(ap)
+       struct vop_abortop_args /* {
+               struct vnode *a_dvp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int error;
+       struct vnode *vp = OTHERVP(ap->a_dvp);
+       struct union_node *un = VTOUNION(ap->a_dvp);
+       int islocked = un->un_flags & UN_LOCKED;
+       int dolock = (vp == LOWERVP(ap->a_dvp));
+
+       if (islocked) {
+               if (dolock)
+                       VOP_LOCK(vp);
+               else
+                       FIXUP(VTOUNION(ap->a_dvp));
+       }
+       error = VOP_ABORTOP(vp, ap->a_cnp);
+       if (islocked && dolock)
+               VOP_UNLOCK(vp);
+
+       return (error);
+}
+
+int
+union_inactive(ap)
+       struct vop_inactive_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
+{
+       struct union_node *un = VTOUNION(ap->a_vp);
+
+       /*
+        * Do nothing (and _don't_ bypass).
+        * Wait to vrele lowervp until reclaim,
+        * so that until then our union_node is in the
+        * cache and reusable.
+        *
+        * NEEDSWORK: Someday, consider inactive'ing
+        * the lowervp and then trying to reactivate it
+        * with capabilities (v_id)
+        * like they do in the name lookup cache code.
+        * That's too much work for now.
+        */
+
+#ifdef UNION_DIAGNOSTIC
+       if (un->un_flags & UN_LOCKED)
+               panic("union: inactivating locked node");
+       if (un->un_flags & UN_ULOCK)
+               panic("union: inactivating w/locked upper node");
 #endif
 
 #endif
 
-       error = VOP_BWRITE(bp);
+       if ((un->un_flags & UN_CACHED) == 0)
+               vgone(ap->a_vp);
 
 
-       bp->b_vp = savedvp;
+       return (0);
+}
 
 
-       return (error);
+int
+union_reclaim(ap)
+       struct vop_reclaim_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
+{
+
+       union_freevp(ap->a_vp);
+
+       return (0);
 }
 
 int
 union_lock(ap)
        struct vop_lock_args *ap;
 {
 }
 
 int
 union_lock(ap)
        struct vop_lock_args *ap;
 {
-       struct union_node *un = VTOUNION(ap->a_vp);
+       struct vnode *vp = ap->a_vp;
+       struct union_node *un;
+
+start:
+       while (vp->v_flag & VXLOCK) {
+               vp->v_flag |= VXWANT;
+               sleep((caddr_t)vp, PINOD);
+       }
+
+       un = VTOUNION(vp);
+
+       if (un->un_uppervp != NULLVP) {
+               if (((un->un_flags & UN_ULOCK) == 0) &&
+                   (vp->v_usecount != 0)) {
+                       un->un_flags |= UN_ULOCK;
+                       VOP_LOCK(un->un_uppervp);
+               }
+#ifdef DIAGNOSTIC
+               if (un->un_flags & UN_KLOCK)
+                       panic("union: dangling upper lock");
+#endif
+       }
 
 
+       if (un->un_flags & UN_LOCKED) {
 #ifdef DIAGNOSTIC
 #ifdef DIAGNOSTIC
-       if (un->un_pid == curproc->p_pid)
-               panic("union: locking agsinst myself");
+               if (curproc && un->un_pid == curproc->p_pid &&
+                           un->un_pid > -1 && curproc->p_pid > -1)
+                       panic("union: locking against myself");
 #endif
 #endif
-       while (un->un_flags & UN_LOCKED) {
                un->un_flags |= UN_WANT;
                sleep((caddr_t) &un->un_flags, PINOD);
                un->un_flags |= UN_WANT;
                sleep((caddr_t) &un->un_flags, PINOD);
+               goto start;
        }
        }
-       un->un_flags |= UN_LOCKED;
+
 #ifdef DIAGNOSTIC
 #ifdef DIAGNOSTIC
-       un->un_pid = curproc->p_pid;
+       if (curproc)
+               un->un_pid = curproc->p_pid;
+       else
+               un->un_pid = -1;
 #endif
 
 #endif
 
-       if (un->un_lowervp && !VOP_ISLOCKED(un->un_lowervp))
-               VOP_LOCK(un->un_lowervp);
-       if (un->un_uppervp && !VOP_ISLOCKED(un->un_uppervp))
-               VOP_LOCK(un->un_uppervp);
+       un->un_flags |= UN_LOCKED;
+       return (0);
 }
 
 int
 }
 
 int
@@ -745,18 +1276,20 @@ union_unlock(ap)
        struct union_node *un = VTOUNION(ap->a_vp);
 
 #ifdef DIAGNOSTIC
        struct union_node *un = VTOUNION(ap->a_vp);
 
 #ifdef DIAGNOSTIC
-       if (un->un_pid != curproc->p_pid)
-               panic("union: unlocking other process's union node");
        if ((un->un_flags & UN_LOCKED) == 0)
                panic("union: unlock unlocked node");
        if ((un->un_flags & UN_LOCKED) == 0)
                panic("union: unlock unlocked node");
+       if (curproc && un->un_pid != curproc->p_pid &&
+                       curproc->p_pid > -1 && un->un_pid > -1)
+               panic("union: unlocking other process's union node");
 #endif
 
 #endif
 
-       if (un->un_uppervp && VOP_ISLOCKED(un->un_uppervp))
+       un->un_flags &= ~UN_LOCKED;
+
+       if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
                VOP_UNLOCK(un->un_uppervp);
                VOP_UNLOCK(un->un_uppervp);
-       if (un->un_lowervp && VOP_ISLOCKED(un->un_lowervp))
-               VOP_UNLOCK(un->un_lowervp);
 
 
-       un->un_flags &= ~UN_LOCKED;
+       un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
        if (un->un_flags & UN_WANT) {
                un->un_flags &= ~UN_WANT;
                wakeup((caddr_t) &un->un_flags);
        if (un->un_flags & UN_WANT) {
                un->un_flags &= ~UN_WANT;
                wakeup((caddr_t) &un->un_flags);
@@ -765,26 +1298,177 @@ union_unlock(ap)
 #ifdef DIAGNOSTIC
        un->un_pid = 0;
 #endif
 #ifdef DIAGNOSTIC
        un->un_pid = 0;
 #endif
+
+       return (0);
 }
 
 }
 
+int
+union_bmap(ap)
+       struct vop_bmap_args /* {
+               struct vnode *a_vp;
+               daddr_t  a_bn;
+               struct vnode **a_vpp;
+               daddr_t *a_bnp;
+               int *a_runp;
+       } */ *ap;
+{
+       int error;
+       struct vnode *vp = OTHERVP(ap->a_vp);
+       int dolock = (vp == LOWERVP(ap->a_vp));
+
+       if (dolock)
+               VOP_LOCK(vp);
+       else
+               FIXUP(VTOUNION(ap->a_vp));
+       error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+       if (dolock)
+               VOP_UNLOCK(vp);
+
+       return (error);
+}
+
+int
+union_print(ap)
+       struct vop_print_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
+{
+       struct vnode *vp = ap->a_vp;
+
+       printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+                       vp, UPPERVP(vp), LOWERVP(vp));
+       return (0);
+}
+
+int
+union_islocked(ap)
+       struct vop_islocked_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
+{
+
+       return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+       struct vop_pathconf_args /* {
+               struct vnode *a_vp;
+               int a_name;
+               int *a_retval;
+       } */ *ap;
+{
+       int error;
+       struct vnode *vp = OTHERVP(ap->a_vp);
+       int dolock = (vp == LOWERVP(ap->a_vp));
+
+       if (dolock)
+               VOP_LOCK(vp);
+       else
+               FIXUP(VTOUNION(ap->a_vp));
+       error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+       if (dolock)
+               VOP_UNLOCK(vp);
+
+       return (error);
+}
+
+int
+union_advlock(ap)
+       struct vop_advlock_args /* {
+               struct vnode *a_vp;
+               caddr_t  a_id;
+               int  a_op;
+               struct flock *a_fl;
+               int  a_flags;
+       } */ *ap;
+{
+
+       return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+                               ap->a_fl, ap->a_flags));
+}
+
+
 /*
 /*
- * Global vfs data structures
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
  */
  */
-int (**union_vnodeop_p)();
-struct vnodeopv_entry_desc union_vnodeop_entries[] = {
-       { &vop_default_desc, union_bypass },
+int
+union_strategy(ap)
+       struct vop_strategy_args /* {
+               struct buf *a_bp;
+       } */ *ap;
+{
+       struct buf *bp = ap->a_bp;
+       int error;
+       struct vnode *savedvp;
 
 
-       { &vop_getattr_desc, union_getattr },
-       { &vop_inactive_desc, union_inactive },
-       { &vop_reclaim_desc, union_reclaim },
-       { &vop_print_desc, union_print },
+       savedvp = bp->b_vp;
+       bp->b_vp = OTHERVP(bp->b_vp);
 
 
-       { &vop_strategy_desc, union_strategy },
-       { &vop_bwrite_desc, union_bwrite },
+#ifdef DIAGNOSTIC
+       if (bp->b_vp == NULLVP)
+               panic("union_strategy: nil vp");
+       if (((bp->b_flags & B_READ) == 0) &&
+           (bp->b_vp == LOWERVP(savedvp)))
+               panic("union_strategy: writing to lowervp");
+#endif
 
 
-       { &vop_lock_desc, union_lock }, 
-       { &vop_unlock_desc, union_unlock }, 
+       error = VOP_STRATEGY(bp);
+       bp->b_vp = savedvp;
+
+       return (error);
+}
 
 
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, union_lookup },             /* lookup */
+       { &vop_create_desc, union_create },             /* create */
+       { &vop_mknod_desc, union_mknod },               /* mknod */
+       { &vop_open_desc, union_open },                 /* open */
+       { &vop_close_desc, union_close },               /* close */
+       { &vop_access_desc, union_access },             /* access */
+       { &vop_getattr_desc, union_getattr },           /* getattr */
+       { &vop_setattr_desc, union_setattr },           /* setattr */
+       { &vop_read_desc, union_read },                 /* read */
+       { &vop_write_desc, union_write },               /* write */
+       { &vop_ioctl_desc, union_ioctl },               /* ioctl */
+       { &vop_select_desc, union_select },             /* select */
+       { &vop_mmap_desc, union_mmap },                 /* mmap */
+       { &vop_fsync_desc, union_fsync },               /* fsync */
+       { &vop_seek_desc, union_seek },                 /* seek */
+       { &vop_remove_desc, union_remove },             /* remove */
+       { &vop_link_desc, union_link },                 /* link */
+       { &vop_rename_desc, union_rename },             /* rename */
+       { &vop_mkdir_desc, union_mkdir },               /* mkdir */
+       { &vop_rmdir_desc, union_rmdir },               /* rmdir */
+       { &vop_symlink_desc, union_symlink },           /* symlink */
+       { &vop_readdir_desc, union_readdir },           /* readdir */
+       { &vop_readlink_desc, union_readlink },         /* readlink */
+       { &vop_abortop_desc, union_abortop },           /* abortop */
+       { &vop_inactive_desc, union_inactive },         /* inactive */
+       { &vop_reclaim_desc, union_reclaim },           /* reclaim */
+       { &vop_lock_desc, union_lock },                 /* lock */
+       { &vop_unlock_desc, union_unlock },             /* unlock */
+       { &vop_bmap_desc, union_bmap },                 /* bmap */
+       { &vop_strategy_desc, union_strategy },         /* strategy */
+       { &vop_print_desc, union_print },               /* print */
+       { &vop_islocked_desc, union_islocked },         /* islocked */
+       { &vop_pathconf_desc, union_pathconf },         /* pathconf */
+       { &vop_advlock_desc, union_advlock },           /* advlock */
+#ifdef notdef
+       { &vop_blkatoff_desc, union_blkatoff },         /* blkatoff */
+       { &vop_valloc_desc, union_valloc },             /* valloc */
+       { &vop_vfree_desc, union_vfree },               /* vfree */
+       { &vop_truncate_desc, union_truncate },         /* truncate */
+       { &vop_update_desc, union_update },             /* update */
+       { &vop_bwrite_desc, union_bwrite },             /* bwrite */
+#endif
        { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 };
 struct vnodeopv_desc union_vnodeop_opv_desc =
        { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 };
 struct vnodeopv_desc union_vnodeop_opv_desc =