changes for whiteouts and union filesystem
[unix-history] / usr / src / sys / ufs / ffs / ffs_vnops.c
index 3fbd51d..f49458c 100644 (file)
@@ -1,10 +1,10 @@
 /*
 /*
- * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *     The Regents of the University of California.  All rights reserved.
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)ffs_vnops.c 7.82 (Berkeley) %G%
+ *     @(#)ffs_vnops.c 8.9 (Berkeley) %G%
  */
 
 #include <sys/param.h>
  */
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
-#include <sys/specdev.h>
-#include <sys/fifo.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
 #include <ufs/ufs/lockf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/lockf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -39,6 +40,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_lookup_desc, ufs_lookup },               /* lookup */
        { &vop_create_desc, ufs_create },               /* create */
        { &vop_default_desc, vn_default_error },
        { &vop_lookup_desc, ufs_lookup },               /* lookup */
        { &vop_create_desc, ufs_create },               /* create */
+       { &vop_whiteout_desc, ufs_whiteout },           /* whiteout */
        { &vop_mknod_desc, ufs_mknod },                 /* mknod */
        { &vop_open_desc, ufs_open },                   /* open */
        { &vop_close_desc, ufs_close },                 /* close */
        { &vop_mknod_desc, ufs_mknod },                 /* mknod */
        { &vop_open_desc, ufs_open },                   /* open */
        { &vop_close_desc, ufs_close },                 /* close */
@@ -61,18 +63,19 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
        { &vop_readdir_desc, ufs_readdir },             /* readdir */
        { &vop_readlink_desc, ufs_readlink },           /* readlink */
        { &vop_abortop_desc, ufs_abortop },             /* abortop */
        { &vop_readdir_desc, ufs_readdir },             /* readdir */
        { &vop_readlink_desc, ufs_readlink },           /* readlink */
        { &vop_abortop_desc, ufs_abortop },             /* abortop */
-       { &vop_inactive_desc, ffs_inactive },           /* inactive */
-       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_inactive_desc, ufs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ffs_reclaim },             /* reclaim */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
-       { &vop_bmap_desc, ffs_bmap },                   /* bmap */
+       { &vop_bmap_desc, ufs_bmap },                   /* bmap */
        { &vop_strategy_desc, ufs_strategy },           /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
        { &vop_strategy_desc, ufs_strategy },           /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_pathconf_desc, ufs_pathconf },           /* pathconf */
        { &vop_advlock_desc, ufs_advlock },             /* advlock */
        { &vop_blkatoff_desc, ffs_blkatoff },           /* blkatoff */
        { &vop_advlock_desc, ufs_advlock },             /* advlock */
        { &vop_blkatoff_desc, ffs_blkatoff },           /* blkatoff */
-       { &vop_vget_desc, ffs_vget },                   /* vget */
        { &vop_valloc_desc, ffs_valloc },               /* valloc */
        { &vop_valloc_desc, ffs_valloc },               /* valloc */
+       { &vop_reallocblks_desc, ffs_reallocblks },     /* reallocblks */
        { &vop_vfree_desc, ffs_vfree },                 /* vfree */
        { &vop_truncate_desc, ffs_truncate },           /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
        { &vop_vfree_desc, ffs_vfree },                 /* vfree */
        { &vop_truncate_desc, ffs_truncate },           /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
@@ -98,7 +101,7 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = {
        { &vop_ioctl_desc, spec_ioctl },                /* ioctl */
        { &vop_select_desc, spec_select },              /* select */
        { &vop_mmap_desc, spec_mmap },                  /* mmap */
        { &vop_ioctl_desc, spec_ioctl },                /* ioctl */
        { &vop_select_desc, spec_select },              /* select */
        { &vop_mmap_desc, spec_mmap },                  /* mmap */
-       { &vop_fsync_desc, spec_fsync },                /* fsync */
+       { &vop_fsync_desc, ffs_fsync },                 /* fsync */
        { &vop_seek_desc, spec_seek },                  /* seek */
        { &vop_remove_desc, spec_remove },              /* remove */
        { &vop_link_desc, spec_link },                  /* link */
        { &vop_seek_desc, spec_seek },                  /* seek */
        { &vop_remove_desc, spec_remove },              /* remove */
        { &vop_link_desc, spec_link },                  /* link */
@@ -109,19 +112,20 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = {
        { &vop_readdir_desc, spec_readdir },            /* readdir */
        { &vop_readlink_desc, spec_readlink },          /* readlink */
        { &vop_abortop_desc, spec_abortop },            /* abortop */
        { &vop_readdir_desc, spec_readdir },            /* readdir */
        { &vop_readlink_desc, spec_readlink },          /* readlink */
        { &vop_abortop_desc, spec_abortop },            /* abortop */
-       { &vop_inactive_desc, ffs_inactive },           /* inactive */
-       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_inactive_desc, ufs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ffs_reclaim },             /* reclaim */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
        { &vop_bmap_desc, spec_bmap },                  /* bmap */
        { &vop_strategy_desc, spec_strategy },          /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
        { &vop_bmap_desc, spec_bmap },                  /* bmap */
        { &vop_strategy_desc, spec_strategy },          /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_pathconf_desc, spec_pathconf },          /* pathconf */
        { &vop_advlock_desc, spec_advlock },            /* advlock */
        { &vop_blkatoff_desc, spec_blkatoff },          /* blkatoff */
        { &vop_advlock_desc, spec_advlock },            /* advlock */
        { &vop_blkatoff_desc, spec_blkatoff },          /* blkatoff */
-       { &vop_vget_desc, spec_vget },                  /* vget */
        { &vop_valloc_desc, spec_valloc },              /* valloc */
        { &vop_valloc_desc, spec_valloc },              /* valloc */
-       { &vop_vfree_desc, spec_vfree },                /* vfree */
+       { &vop_reallocblks_desc, spec_reallocblks },    /* reallocblks */
+       { &vop_vfree_desc, ffs_vfree },                 /* vfree */
        { &vop_truncate_desc, spec_truncate },          /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
        { &vop_bwrite_desc, vn_bwrite },
        { &vop_truncate_desc, spec_truncate },          /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
        { &vop_bwrite_desc, vn_bwrite },
@@ -147,7 +151,7 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
        { &vop_ioctl_desc, fifo_ioctl },                /* ioctl */
        { &vop_select_desc, fifo_select },              /* select */
        { &vop_mmap_desc, fifo_mmap },                  /* mmap */
        { &vop_ioctl_desc, fifo_ioctl },                /* ioctl */
        { &vop_select_desc, fifo_select },              /* select */
        { &vop_mmap_desc, fifo_mmap },                  /* mmap */
-       { &vop_fsync_desc, fifo_fsync },                /* fsync */
+       { &vop_fsync_desc, ffs_fsync },                 /* fsync */
        { &vop_seek_desc, fifo_seek },                  /* seek */
        { &vop_remove_desc, fifo_remove },              /* remove */
        { &vop_link_desc, fifo_link },                  /* link */
        { &vop_seek_desc, fifo_seek },                  /* seek */
        { &vop_remove_desc, fifo_remove },              /* remove */
        { &vop_link_desc, fifo_link },                  /* link */
@@ -158,19 +162,20 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
        { &vop_readdir_desc, fifo_readdir },            /* readdir */
        { &vop_readlink_desc, fifo_readlink },          /* readlink */
        { &vop_abortop_desc, fifo_abortop },            /* abortop */
        { &vop_readdir_desc, fifo_readdir },            /* readdir */
        { &vop_readlink_desc, fifo_readlink },          /* readlink */
        { &vop_abortop_desc, fifo_abortop },            /* abortop */
-       { &vop_inactive_desc, ffs_inactive },           /* inactive */
-       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_inactive_desc, ufs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ffs_reclaim },             /* reclaim */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
        { &vop_bmap_desc, fifo_bmap },                  /* bmap */
        { &vop_strategy_desc, fifo_strategy },          /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
        { &vop_lock_desc, ufs_lock },                   /* lock */
        { &vop_unlock_desc, ufs_unlock },               /* unlock */
        { &vop_bmap_desc, fifo_bmap },                  /* bmap */
        { &vop_strategy_desc, fifo_strategy },          /* strategy */
        { &vop_print_desc, ufs_print },                 /* print */
        { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_pathconf_desc, fifo_pathconf },          /* pathconf */
        { &vop_advlock_desc, fifo_advlock },            /* advlock */
        { &vop_blkatoff_desc, fifo_blkatoff },          /* blkatoff */
        { &vop_advlock_desc, fifo_advlock },            /* advlock */
        { &vop_blkatoff_desc, fifo_blkatoff },          /* blkatoff */
-       { &vop_vget_desc, fifo_vget },                  /* vget */
        { &vop_valloc_desc, fifo_valloc },              /* valloc */
        { &vop_valloc_desc, fifo_valloc },              /* valloc */
-       { &vop_vfree_desc, fifo_vfree },                /* vfree */
+       { &vop_reallocblks_desc, fifo_reallocblks },    /* reallocblks */
+       { &vop_vfree_desc, ffs_vfree },                 /* vfree */
        { &vop_truncate_desc, fifo_truncate },          /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
        { &vop_bwrite_desc, vn_bwrite },
        { &vop_truncate_desc, fifo_truncate },          /* truncate */
        { &vop_update_desc, ffs_update },               /* update */
        { &vop_bwrite_desc, vn_bwrite },
@@ -180,236 +185,98 @@ struct vnodeopv_desc ffs_fifoop_opv_desc =
        { &ffs_fifoop_p, ffs_fifoop_entries };
 #endif /* FIFO */
 
        { &ffs_fifoop_p, ffs_fifoop_entries };
 #endif /* FIFO */
 
-
+#ifdef DEBUG
 /*
 /*
- * Vnode op for reading.
+ * Enabling cluster read/write operations.
  */
  */
-/* ARGSUSED */
-ffs_read(ap)
-       struct vop_read_args *ap;
-{
-       register struct vnode *vp = ap->a_vp;
-       register struct inode *ip = VTOI(vp);
-       register struct uio *uio = ap->a_uio;
-       register struct fs *fs;
-       struct buf *bp;
-       daddr_t lbn, bn, rablock;
-       off_t diff;
-       int rasize, error = 0;
-       long size, n, on;
-
-#ifdef DIAGNOSTIC
-       int type;
-       if (uio->uio_rw != UIO_READ)
-               panic("ffs_read mode");
-       type = ip->i_mode & IFMT;
-       if (type != IFDIR && type != IFREG && type != IFLNK)
-               panic("ffs_read type");
+#include <sys/sysctl.h>
+int doclusterread = 1;
+struct ctldebug debug11 = { "doclusterread", &doclusterread };
+int doclusterwrite = 1;
+struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
+#else
+/* XXX for ufs_readwrite */
+#define doclusterread 1
+#define doclusterwrite 1
 #endif
 #endif
-       if (uio->uio_resid == 0)
-               return (0);
-       fs = ip->i_fs;
-       if (uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
-               return (EFBIG);
-       ip->i_flag |= IACC;
-       do {
-               lbn = lblkno(fs, uio->uio_offset);
-               on = blkoff(fs, uio->uio_offset);
-               n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
-               diff = ip->i_size - uio->uio_offset;
-               if (diff <= 0)
-                       return (0);
-               if (diff < n)
-                       n = diff;
-               size = blksize(fs, ip, lbn);
-               rablock = lbn + 1;
-               if (vp->v_lastr + 1 == lbn &&
-                   lblktosize(fs, rablock) < ip->i_size) {
-                       rasize = blksize(fs, ip, rablock);
-                       error = breadn(vp, lbn, size, &rablock,
-                               &rasize, 1, NOCRED, &bp);
-               } else
-                       error = bread(vp, lbn, size, NOCRED, &bp);
-               vp->v_lastr = lbn;
-               n = MIN(n, size - bp->b_resid);
-               if (error) {
-                       brelse(bp);
-                       return (error);
-               }
-               error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
-               if (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size)
-                       bp->b_flags |= B_AGE;
-               brelse(bp);
-       } while (error == 0 && uio->uio_resid > 0 && n != 0);
-       return (error);
-}
+
+#include <ufs/ufs/ufs_readwrite.c>
 
 /*
 
 /*
- * Vnode op for writing.
+ * Synch an open file.
  */
  */
-ffs_write(ap)
-       struct vop_write_args *ap;
+/* ARGSUSED */
+int
+ffs_fsync(ap)
+       struct vop_fsync_args /* {
+               struct vnode *a_vp;
+               struct ucred *a_cred;
+               int a_waitfor;
+               struct proc *a_p;
+       } */ *ap;
 {
 {
-       USES_VOP_TRUNCATE;
-       USES_VOP_UPDATE;
        register struct vnode *vp = ap->a_vp;
        register struct vnode *vp = ap->a_vp;
-       register struct uio *uio = ap->a_uio;
-       register struct inode *ip = VTOI(vp);
-       register struct fs *fs;
-       struct proc *p = uio->uio_procp;
-       int ioflag = ap->a_ioflag;
-       struct buf *bp;
-       daddr_t lbn, bn;
-       off_t osize;
-       int n, on, flags;
-       int size, resid, error = 0;
+       register struct buf *bp;
+       struct timeval tv;
+       struct buf *nbp;
+       int s;
 
 
-#ifdef DIAGNOSTIC
-       if (uio->uio_rw != UIO_WRITE)
-               panic("ffs_write mode");
-#endif
-       switch (vp->v_type) {
-       case VREG:
-               if (ioflag & IO_APPEND)
-                       uio->uio_offset = ip->i_size;
-               /* fall through */
-       case VLNK:
-               break;
-
-       case VDIR:
-               if ((ioflag & IO_SYNC) == 0)
-                       panic("ffs_write nonsync dir write");
-               break;
-
-       default:
-               panic("ffs_write type");
-       }
-       if (uio->uio_resid == 0)
-               return (0);
-       fs = ip->i_fs;
-       if (uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
-               return (EFBIG);
        /*
        /*
-        * Maybe this should be above the vnode op call, but so long as
-        * file servers have no limits, i don't think it matters
+        * Flush all dirty buffers associated with a vnode.
         */
         */
-       if (vp->v_type == VREG && p &&
-           uio->uio_offset + uio->uio_resid >
-             p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
-               psignal(p, SIGXFSZ);
-               return (EFBIG);
-       }
-       resid = uio->uio_resid;
-       osize = ip->i_size;
-       flags = 0;
-       if (ioflag & IO_SYNC)
-               flags = B_SYNC;
-       do {
-               lbn = lblkno(fs, uio->uio_offset);
-               on = blkoff(fs, uio->uio_offset);
-               n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
-               if (n < fs->fs_bsize)
-                       flags |= B_CLRBUF;
+loop:
+       s = splbio();
+       for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+               nbp = bp->b_vnbufs.le_next;
+               if ((bp->b_flags & B_BUSY))
+                       continue;
+               if ((bp->b_flags & B_DELWRI) == 0)
+                       panic("ffs_fsync: not dirty");
+               bremfree(bp);
+               bp->b_flags |= B_BUSY;
+               splx(s);
+               /*
+                * Wait for I/O associated with indirect blocks to complete,
+                * since there is no way to quickly wait for them below.
+                */
+               if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
+                       (void) bawrite(bp);
                else
                else
-                       flags &= ~B_CLRBUF;
-               if (error = ffs_balloc(ip, lbn, on + n, ap->a_cred, &bp, flags))
-                       break;
-               bn = bp->b_blkno;
-               if (uio->uio_offset + n > ip->i_size) {
-                       ip->i_size = uio->uio_offset + n;
-                       vnode_pager_setsize(vp, (u_long)ip->i_size);
-               }
-               size = blksize(fs, ip, lbn);
-               (void) vnode_pager_uncache(vp);
-               n = MIN(n, size - bp->b_resid);
-               error = uiomove(bp->b_un.b_addr + on, n, uio);
-               if (ioflag & IO_SYNC)
                        (void) bwrite(bp);
                        (void) bwrite(bp);
-               else if (n + on == fs->fs_bsize) {
-                       bp->b_flags |= B_AGE;
-                       bawrite(bp);
-               } else
-                       bdwrite(bp);
-               ip->i_flag |= IUPD|ICHG;
-               if (ap->a_cred->cr_uid != 0)
-                       ip->i_mode &= ~(ISUID|ISGID);
-       } while (error == 0 && uio->uio_resid > 0 && n != 0);
-       if (error && (ioflag & IO_UNIT)) {
-               (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred);
-               uio->uio_offset -= resid - uio->uio_resid;
-               uio->uio_resid = resid;
+               goto loop;
        }
        }
-       if (!error && (ioflag & IO_SYNC))
-               error = VOP_UPDATE(vp, &time, &time, 1);
-       return (error);
-}
-
-/*
- * Synch an open file.
- */
-/* ARGSUSED */
-int
-ffs_fsync(ap)
-       struct vop_fsync_args *ap;
-{
-       USES_VOP_UPDATE;
-       struct inode *ip = VTOI(ap->a_vp);
-
-       if (ap->a_fflags & FWRITE)
-               ip->i_flag |= ICHG;
-       vflushbuf(ap->a_vp, ap->a_waitfor == MNT_WAIT ? B_SYNC : 0);
-       return (VOP_UPDATE(ap->a_vp, &time, &time, ap->a_waitfor == MNT_WAIT));
+       if (ap->a_waitfor == MNT_WAIT) {
+               while (vp->v_numoutput) {
+                       vp->v_flag |= VBWAIT;
+                       sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+               }
+#ifdef DIAGNOSTIC
+               if (vp->v_dirtyblkhd.lh_first) {
+                       vprint("ffs_fsync: dirty", vp);
+                       goto loop;
+               }
+#endif
+       }
+       splx(s);
+       tv = time;
+       return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
 }
 
 /*
 }
 
 /*
- * Last reference to an inode, write the inode out and if necessary,
- * truncate and deallocate the file.
+ * Reclaim an inode so that it can be used for other purposes.
  */
 int
  */
 int
-ffs_inactive(ap)
-       struct vop_inactive_args *ap;
+ffs_reclaim(ap)
+       struct vop_reclaim_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
 {
 {
-       USES_VOP_TRUNCATE;
-       USES_VOP_UPDATE;
-       USES_VOP_VFREE;
        register struct vnode *vp = ap->a_vp;
        register struct vnode *vp = ap->a_vp;
-       register struct inode *ip = VTOI(vp);
-       int mode, error;
-       extern int prtactive;
-
-       if (prtactive && vp->v_usecount != 0)
-               vprint("ffs_inactive: pushing active", vp);
+       int error;
 
 
-       /* Get rid of inodes related to stale file handles. */
-       if (ip->i_mode == 0) {
-               if ((vp->v_flag & VXLOCK) == 0)
-                       vgone(vp);
-               return (0);
-       }
-
-       error = 0;
-       ILOCK(ip);
-       if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
-#ifdef QUOTA
-               if (!getinoquota(ip))
-                       (void)chkiq(ip, -1, NOCRED, 0);
-#endif
-               error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED);
-               mode = ip->i_mode;
-               ip->i_mode = 0;
-               ip->i_rdev = 0;
-               ip->i_flag |= IUPD|ICHG;
-               VOP_VFREE(vp, ip->i_number, mode);
-       }
-       if (ip->i_flag&(IUPD|IACC|ICHG|IMOD))
-               VOP_UPDATE(vp, &time, &time, 0);
-       IUNLOCK(ip);
-       ip->i_flag = 0;
-       /*
-        * If we are done with the inode, reclaim it
-        * so that it can be reused immediately.
-        */
-       if (vp->v_usecount == 0 && ip->i_mode == 0)
-               vgone(vp);
-       return (error);
+       if (error = ufs_reclaim(vp))
+               return (error);
+       FREE(vp->v_data, M_FFSNODE);
+       vp->v_data = NULL;
+       return (0);
 }
 }