union_statfs should only return free space information

[unix-history] / usr / src / sys / miscfs / union / union_vfsops.c
diff --git a/usr/src/sys/miscfs/union/union_vfsops.c b/usr/src/sys/miscfs/union/union_vfsops.c

index dfd9791..8bb8cb0 100644 (file)
--- a/usr/src/sys/miscfs/union/union_vfsops.c
+++ b/usr/src/sys/miscfs/union/union_vfsops.c
@@ -8,12 +8,11 @@
   *
   * %sccs.include.redist.c%
   *
- *     @(#)union_vfsops.c      1.1 (Berkeley) %G%
+ *     @(#)union_vfsops.c      8.14 (Berkeley) %G%
   */
  
  /*
- * Null Layer
- * (See union_vnops.c for a description of what this does.)
+ * Union Layer
   */
  
  #include <sys/param.h>
@@ -25,7 +24,9 @@
  #include <sys/mount.h>
  #include <sys/namei.h>
  #include <sys/malloc.h>
-#include "union.h"
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
  
  /*
   * Mount union filesystem
@@ -40,9 +41,14 @@ union_mount(mp, path, data, ndp, p)
  {
         int error = 0;
         struct union_args args;
-       struct vnode *lowerrootvp;
-       struct vnode *upperrootvp;
-       struct union_mount *um;
+       struct vnode *lowerrootvp = NULLVP;
+       struct vnode *upperrootvp = NULLVP;
+       struct union_mount *um = 0;
+       struct ucred *cred = 0;
+       struct ucred *scred;
+       struct vattr va;
+       char *cp;
+       int len;
         u_int size;
  
  #ifdef UNION_DIAGNOSTIC
@@ -52,34 +58,41 @@ union_mount(mp, path, data, ndp, p)
         /*
          * Update is a no-op
          */
-       if (mp->mnt_flag & MNT_UPDATE)
-               return (EOPNOTSUPP);
+       if (mp->mnt_flag & MNT_UPDATE) {
+               /*
+                * Need to provide.
+                * 1. a way to convert between rdonly and rdwr mounts.
+                * 2. support for nfs exports.
+                */
+               error = EOPNOTSUPP;
+               goto bad;
+       }
  
         /*
          * Get argument
          */
         if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
-               return (error);
+               goto bad;
  
         lowerrootvp = mp->mnt_vnodecovered;
         VREF(lowerrootvp);
  
         /*
-        * Find upper node
+        * Find upper node.
          */
         NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
                UIO_USERSPACE, args.target, p);
-       if (error = namei(ndp)) {
-               vrele(lowerrootvp);
-               return (error);
-       }
+
+       if (error = namei(ndp))
+               goto bad;
+
         upperrootvp = ndp->ni_vp;
         vrele(ndp->ni_dvp);
         ndp->ni_dvp = NULL;
  
         if (upperrootvp->v_type != VDIR) {
-               vrele(upperrootvp);
-               return (EINVAL);
+               error = EINVAL;
+               goto bad;
         }
         
         um = (struct union_mount *) malloc(sizeof(struct union_mount),
@@ -88,34 +101,117 @@ union_mount(mp, path, data, ndp, p)
         /*
          * Keep a held reference to the target vnodes.
          * They are vrele'd in union_unmount.
+        *
+        * Depending on the _BELOW flag, the filesystems are
+        * viewed in a different order.  In effect, this is the
+        * same as providing a mount under option to the mount syscall.
          */
-       um->um_lowervp = lowerrootvp;
-       um->um_uppervp = upperrootvp;
+
+       um->um_op = args.mntflags & UNMNT_OPMASK;
+       switch (um->um_op) {
+       case UNMNT_ABOVE:
+               um->um_lowervp = lowerrootvp;
+               um->um_uppervp = upperrootvp;
+               break;
+
+       case UNMNT_BELOW:
+               um->um_lowervp = upperrootvp;
+               um->um_uppervp = lowerrootvp;
+               break;
+
+       case UNMNT_REPLACE:
+               vrele(lowerrootvp);
+               lowerrootvp = NULLVP;
+               um->um_uppervp = upperrootvp;
+               um->um_lowervp = lowerrootvp;
+               break;
+
+       default:
+               error = EINVAL;
+               goto bad;
+       }
+
         /*
-        * Take a copy of the process's credentials.  This isn't
-        * quite right since the euid will always be zero and we
-        * want to get the "real" users credentials.  So fix up
-        * the uid field after taking the copy.
+        * Unless the mount is readonly, ensure that the top layer
+        * supports whiteout operations
          */
-       um->um_cred = crdup(p->p_ucred);
-       um->um_cred->cr_uid = p->p_cred->p_ruid;
+       if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+               error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
+               if (error)
+                       goto bad;
+       }
+
+       um->um_cred = p->p_ucred;
+       crhold(um->um_cred);
+       um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+       /*
+        * Depending on what you think the MNT_LOCAL flag might mean,
+        * you may want the && to be || on the conditional below.
+        * At the moment it has been defined that the filesystem is
+        * only local if it is all local, ie the MNT_LOCAL flag implies
+        * that the entire namespace is local.  If you think the MNT_LOCAL
+        * flag implies that some of the files might be stored locally
+        * then you will want to change the conditional.
+        */
+       if (um->um_op == UNMNT_ABOVE) {
+               if (((um->um_lowervp == NULLVP) ||
+                    (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+                   (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+                       mp->mnt_flag |= MNT_LOCAL;
+       }
+
+       /*
+        * Copy in the upper layer's RDONLY flag.  This is for the benefit
+        * of lookup() which explicitly checks the flag, rather than asking
+        * the filesystem for it's own opinion.  This means, that an update
+        * mount of the underlying filesystem to go from rdonly to rdwr
+        * will leave the unioned view as read-only.
+        */
+       mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
  
-       if ((lowerrootvp->v_mount->mnt_flag & MNT_LOCAL) ||
-           (upperrootvp->v_mount->mnt_flag & MNT_LOCAL))
-               mp->mnt_flag |= MNT_LOCAL;
         mp->mnt_data = (qaddr_t) um;
         getnewfsid(mp, MOUNT_UNION);
  
         (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
         bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
-       (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
-           &size);
-       bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+       switch (um->um_op) {
+       case UNMNT_ABOVE:
+               cp = "<above>:";
+               break;
+       case UNMNT_BELOW:
+               cp = "<below>:";
+               break;
+       case UNMNT_REPLACE:
+               cp = "";
+               break;
+       }
+       len = strlen(cp);
+       bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+       cp = mp->mnt_stat.f_mntfromname + len;
+       len = MNAMELEN - len;
+
+       (void) copyinstr(args.target, cp, len - 1, &size);
+       bzero(cp + size, len - size);
+
  #ifdef UNION_DIAGNOSTIC
-       printf("union_mount: upper %s, lower at %s\n",
+       printf("union_mount: from %s, on %s\n",
                 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
  #endif
         return (0);
+
+bad:
+       if (um)
+               free(um, M_UFSMNT);
+       if (cred)
+               crfree(cred);
+       if (upperrootvp)
+               vrele(upperrootvp);
+       if (lowerrootvp)
+               vrele(lowerrootvp);
+       return (error);
  }
  
  /*
@@ -145,6 +241,7 @@ union_unmount(mp, mntflags, p)
         struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
         struct vnode *um_rootvp;
         int error;
+       int freeing;
         int flags = 0;
         extern int doforce;
  
@@ -161,24 +258,54 @@ union_unmount(mp, mntflags, p)
  
         if (error = union_root(mp, &um_rootvp))
                 return (error);
-       if (um_rootvp->v_usecount > 1)
+
+       /*
+        * Keep flushing vnodes from the mount list.
+        * This is needed because of the un_pvp held
+        * reference to the parent vnode.
+        * If more vnodes have been freed on a given pass,
+        * the try again.  The loop will iterate at most
+        * (d) times, where (d) is the maximum tree depth
+        * in the filesystem.
+        */
+       for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
+               struct vnode *vp;
+               int n;
+
+               /* count #vnodes held on mount list */
+               for (n = 0, vp = mp->mnt_vnodelist.lh_first;
+                               vp != NULLVP;
+                               vp = vp->v_mntvnodes.le_next)
+                       n++;
+
+               /* if this is unchanged then stop */
+               if (n == freeing)
+                       break;
+
+               /* otherwise try once more time */
+               freeing = n;
+       }
+
+       /* At this point the root vnode should have a single reference */
+       if (um_rootvp->v_usecount > 1) {
+               vput(um_rootvp);
                 return (EBUSY);
-       if (error = vflush(mp, um_rootvp, flags))
-               return (error);
+       }
  
  #ifdef UNION_DIAGNOSTIC
-       vprint("alias root of lower", um_rootvp);
+       vprint("union root", um_rootvp);
  #endif  
         /*
          * Discard references to upper and lower target vnodes.
          */
-       vrele(um->um_lowervp);
+       if (um->um_lowervp)
+               vrele(um->um_lowervp);
         vrele(um->um_uppervp);
         crfree(um->um_cred);
         /*
          * Release reference on underlying root vnode
          */
-       vrele(um_rootvp);
+       vput(um_rootvp);
         /*
          * And blow it away for future re-use
          */
@@ -188,7 +315,7 @@ union_unmount(mp, mntflags, p)
          */
         free(mp->mnt_data, M_UFSMNT);   /* XXX */
         mp->mnt_data = 0;
-       return 0;
+       return (0);
  }
  
  int
@@ -198,22 +325,39 @@ union_root(mp, vpp)
  {
         struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
         int error;
-
-#ifdef UNION_DIAGNOSTIC
-       printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
-                       um->um_lowervp,
-                       um->um_uppervp);
-#endif
+       int loselock;
  
         /*
          * Return locked reference to root.
          */
-       error = union_allocvp(vpp, mp, (struct vnode *) 0,
+       VREF(um->um_uppervp);
+       if ((um->um_op == UNMNT_BELOW) &&
+            VOP_ISLOCKED(um->um_uppervp)) {
+               loselock = 1;
+       } else {
+               VOP_LOCK(um->um_uppervp);
+               loselock = 0;
+       }
+       if (um->um_lowervp)
+               VREF(um->um_lowervp);
+       error = union_allocvp(vpp, mp,
+                             (struct vnode *) 0,
+                             (struct vnode *) 0,
                               (struct componentname *) 0,
                               um->um_uppervp,
-                             um->um_lowervp);
-       if (error == 0)
-               (*vpp)->v_flag |= VROOT;
+                             um->um_lowervp,
+                             1);
+
+       if (error) {
+               if (!loselock)
+                       VOP_UNLOCK(um->um_uppervp);
+               vrele(um->um_uppervp);
+               if (um->um_lowervp)
+                       vrele(um->um_lowervp);
+       } else {
+               if (loselock)
+                       VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+       }
  
         return (error);
  }
@@ -249,9 +393,11 @@ union_statfs(mp, sbp, p)
  
         bzero(&mstat, sizeof(mstat));
  
-       error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
-       if (error)
-               return (error);
+       if (um->um_lowervp) {
+               error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+               if (error)
+                       return (error);
+       }
  
         /* now copy across the "interesting" information and fake the rest */
  #if 0
@@ -271,7 +417,7 @@ union_statfs(mp, sbp, p)
         if (error)
                 return (error);
  
-       sbp->f_type = mstat.f_type;
+       sbp->f_type = MOUNT_UNION;
         sbp->f_flags = mstat.f_flags;
         sbp->f_bsize = mstat.f_bsize;
         sbp->f_iosize = mstat.f_iosize;
@@ -282,16 +428,20 @@ union_statfs(mp, sbp, p)
          * kind of sense.  none of this makes sense though.
          */
  
-       if (mstat.f_bsize != lbsize) {
+       if (mstat.f_bsize != lbsize)
                 sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
-               sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
-               sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
-       }
+
+       /*
+        * The "total" fields count total resources in all layers,
+        * the "free" fields count only those resources which are
+        * free in the upper layer (since only the upper layer
+        * is writeable).
+        */
         sbp->f_blocks += mstat.f_blocks;
-       sbp->f_bfree += mstat.f_bfree;
-       sbp->f_bavail += mstat.f_bavail;
+       sbp->f_bfree = mstat.f_bfree;
+       sbp->f_bavail = mstat.f_bavail;
         sbp->f_files += mstat.f_files;
-       sbp->f_ffree += mstat.f_ffree;
+       sbp->f_ffree = mstat.f_ffree;
  
         if (sbp != &mp->mnt_stat) {
                 bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));