Do not lock inode during entire read/write operations (because if
[unix-history] / usr / src / sys / ufs / lfs / lfs_vnops.c
index eb2375d..94e5c03 100644 (file)
@@ -4,7 +4,7 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)lfs_vnops.c 7.75 (Berkeley) %G%
+ *     @(#)lfs_vnops.c 7.94 (Berkeley) %G%
  */
 
 #include <sys/param.h>
  */
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
-#include <sys/specdev.h>
-#include <sys/fifo.h>
 #include <sys/malloc.h>
 
 #include <sys/malloc.h>
 
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/lfs/lfs.h>
 #include <ufs/lfs/lfs_extern.h>
 
 /* Global vfs data structures for lfs. */
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/lfs/lfs.h>
 #include <ufs/lfs/lfs_extern.h>
 
 /* Global vfs data structures for lfs. */
-struct vnodeops lfs_vnodeops = {
-       ufs_lookup,             /* lookup */
-       ufs_create,             /* create */
-       ufs_mknod,              /* mknod */
-       ufs_open,               /* open */
-       ufs_close,              /* close */
-       ufs_access,             /* access */
-       ufs_getattr,            /* getattr */
-       ufs_setattr,            /* setattr */
-       lfs_read,               /* read */
-       lfs_write,              /* write */
-       ufs_ioctl,              /* ioctl */
-       ufs_select,             /* select */
-       ufs_mmap,               /* mmap */
-       lfs_fsync,              /* fsync */
-       ufs_seek,               /* seek */
-       ufs_remove,             /* remove */
-       ufs_link,               /* link */
-       ufs_rename,             /* rename */
-       ufs_mkdir,              /* mkdir */
-       ufs_rmdir,              /* rmdir */
-       ufs_symlink,            /* symlink */
-       ufs_readdir,            /* readdir */
-       ufs_readlink,           /* readlink */
-       ufs_abortop,            /* abortop */
-       lfs_inactive,           /* inactive */
-       ufs_reclaim,            /* reclaim */
-       ufs_lock,               /* lock */
-       ufs_unlock,             /* unlock */
-       lfs_bmap,               /* bmap */
-       ufs_strategy,           /* strategy */
-       ufs_print,              /* print */
-       ufs_islocked,           /* islocked */
-       ufs_advlock,            /* advlock */
-       lfs_blkatoff,           /* blkatoff */
-       lfs_vget,               /* vget */
-       lfs_valloc,             /* valloc */
-       lfs_vfree,              /* vfree */
-       lfs_truncate,           /* truncate */
-       lfs_update,             /* update */
-       lfs_bwrite,             /* bwrite */
+int (**lfs_vnodeop_p)();
+struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, ufs_lookup },               /* lookup */
+       { &vop_create_desc, lfs_create },               /* create */
+       { &vop_mknod_desc, lfs_mknod },                 /* mknod */
+       { &vop_open_desc, ufs_open },                   /* open */
+       { &vop_close_desc, lfs_close },                 /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, lfs_read },                   /* read */
+       { &vop_write_desc, lfs_write },                 /* write */
+       { &vop_ioctl_desc, ufs_ioctl },                 /* ioctl */
+       { &vop_select_desc, ufs_select },               /* select */
+       { &vop_mmap_desc, ufs_mmap },                   /* mmap */
+       { &vop_fsync_desc, lfs_fsync },                 /* fsync */
+       { &vop_seek_desc, ufs_seek },                   /* seek */
+       { &vop_remove_desc, lfs_remove },               /* remove */
+       { &vop_link_desc, lfs_link },                   /* link */
+       { &vop_rename_desc, lfs_rename },               /* rename */
+       { &vop_mkdir_desc, lfs_mkdir },                 /* mkdir */
+       { &vop_rmdir_desc, lfs_rmdir },                 /* rmdir */
+       { &vop_symlink_desc, lfs_symlink },             /* symlink */
+       { &vop_readdir_desc, ufs_readdir },             /* readdir */
+       { &vop_readlink_desc, ufs_readlink },           /* readlink */
+       { &vop_abortop_desc, ufs_abortop },             /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, lfs_bmap },                   /* bmap */
+       { &vop_strategy_desc, ufs_strategy },           /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, ufs_advlock },             /* advlock */
+       { &vop_blkatoff_desc, lfs_blkatoff },           /* blkatoff */
+       { &vop_valloc_desc, lfs_valloc },               /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, lfs_truncate },           /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 };
 };
+struct vnodeopv_desc lfs_vnodeop_opv_desc =
+       { &lfs_vnodeop_p, lfs_vnodeop_entries };
 
 
-struct vnodeops lfs_specops = {
-       spec_lookup,            /* lookup */
-       spec_create,            /* create */
-       spec_mknod,             /* mknod */
-       spec_open,              /* open */
-       ufsspec_close,          /* close */
-       ufs_access,             /* access */
-       ufs_getattr,            /* getattr */
-       ufs_setattr,            /* setattr */
-       ufsspec_read,           /* read */
-       ufsspec_write,          /* write */
-       spec_ioctl,             /* ioctl */
-       spec_select,            /* select */
-       spec_mmap,              /* mmap */
-       spec_fsync,             /* fsync */
-       spec_seek,              /* seek */
-       spec_remove,            /* remove */
-       spec_link,              /* link */
-       spec_rename,            /* rename */
-       spec_mkdir,             /* mkdir */
-       spec_rmdir,             /* rmdir */
-       spec_symlink,           /* symlink */
-       spec_readdir,           /* readdir */
-       spec_readlink,          /* readlink */
-       spec_abortop,           /* abortop */
-       lfs_inactive,           /* inactive */
-       ufs_reclaim,            /* reclaim */
-       ufs_lock,               /* lock */
-       ufs_unlock,             /* unlock */
-       spec_bmap,              /* bmap */
-       spec_strategy,          /* strategy */
-       ufs_print,              /* print */
-       ufs_islocked,           /* islocked */
-       spec_advlock,           /* advlock */
-       spec_blkatoff,          /* blkatoff */
-       spec_vget,              /* vget */
-       spec_valloc,            /* valloc */
-       spec_vfree,             /* vfree */
-       spec_truncate,          /* truncate */
-       lfs_update,             /* update */
-       lfs_bwrite,             /* bwrite */
+int (**lfs_specop_p)();
+struct vnodeopv_entry_desc lfs_specop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, spec_lookup },              /* lookup */
+       { &vop_create_desc, spec_create },              /* create */
+       { &vop_mknod_desc, spec_mknod },                /* mknod */
+       { &vop_open_desc, spec_open },                  /* open */
+       { &vop_close_desc, ufsspec_close },             /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, ufsspec_read },               /* read */
+       { &vop_write_desc, ufsspec_write },             /* write */
+       { &vop_ioctl_desc, spec_ioctl },                /* ioctl */
+       { &vop_select_desc, spec_select },              /* select */
+       { &vop_mmap_desc, spec_mmap },                  /* mmap */
+       { &vop_fsync_desc, spec_fsync },                /* fsync */
+       { &vop_seek_desc, spec_seek },                  /* seek */
+       { &vop_remove_desc, spec_remove },              /* remove */
+       { &vop_link_desc, spec_link },                  /* link */
+       { &vop_rename_desc, spec_rename },              /* rename */
+       { &vop_mkdir_desc, spec_mkdir },                /* mkdir */
+       { &vop_rmdir_desc, spec_rmdir },                /* rmdir */
+       { &vop_symlink_desc, spec_symlink },            /* symlink */
+       { &vop_readdir_desc, spec_readdir },            /* readdir */
+       { &vop_readlink_desc, spec_readlink },          /* readlink */
+       { &vop_abortop_desc, spec_abortop },            /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, spec_bmap },                  /* bmap */
+       { &vop_strategy_desc, spec_strategy },          /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, spec_advlock },            /* advlock */
+       { &vop_blkatoff_desc, spec_blkatoff },          /* blkatoff */
+       { &vop_valloc_desc, spec_valloc },              /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, spec_truncate },          /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 };
 };
+struct vnodeopv_desc lfs_specop_opv_desc =
+       { &lfs_specop_p, lfs_specop_entries };
 
 #ifdef FIFO
 
 #ifdef FIFO
-struct vnodeops lfs_fifoops = {
-       fifo_lookup,            /* lookup */
-       fifo_create,            /* create */
-       fifo_mknod,             /* mknod */
-       fifo_open,              /* open */
-       ufsfifo_close,          /* close */
-       ufs_access,             /* access */
-       ufs_getattr,            /* getattr */
-       ufs_setattr,            /* setattr */
-       ufsfifo_read,           /* read */
-       ufsfifo_write,          /* write */
-       fifo_ioctl,             /* ioctl */
-       fifo_select,            /* select */
-       fifo_mmap,              /* mmap */
-       fifo_fsync,             /* fsync */
-       fifo_seek,              /* seek */
-       fifo_remove,            /* remove */
-       fifo_link,              /* link */
-       fifo_rename,            /* rename */
-       fifo_mkdir,             /* mkdir */
-       fifo_rmdir,             /* rmdir */
-       fifo_symlink,           /* symlink */
-       fifo_readdir,           /* readdir */
-       fifo_readlink,          /* readlink */
-       fifo_abortop,           /* abortop */
-       lfs_inactive,           /* inactive */
-       ufs_reclaim,            /* reclaim */
-       ufs_lock,               /* lock */
-       ufs_unlock,             /* unlock */
-       fifo_bmap,              /* bmap */
-       fifo_strategy,          /* strategy */
-       ufs_print,              /* print */
-       ufs_islocked,           /* islocked */
-       fifo_advlock,           /* advlock */
-       fifo_blkatoff,          /* blkatoff */
-       fifo_vget,              /* vget */
-       fifo_valloc,            /* valloc */
-       fifo_vfree,             /* vfree */
-       fifo_truncate,          /* truncate */
-       lfs_update,             /* update */
-       lfs_bwrite,             /* bwrite */
+int (**lfs_fifoop_p)();
+struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, fifo_lookup },              /* lookup */
+       { &vop_create_desc, fifo_create },              /* create */
+       { &vop_mknod_desc, fifo_mknod },                /* mknod */
+       { &vop_open_desc, fifo_open },                  /* open */
+       { &vop_close_desc, ufsfifo_close },             /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, ufsfifo_read },               /* read */
+       { &vop_write_desc, ufsfifo_write },             /* write */
+       { &vop_ioctl_desc, fifo_ioctl },                /* ioctl */
+       { &vop_select_desc, fifo_select },              /* select */
+       { &vop_mmap_desc, fifo_mmap },                  /* mmap */
+       { &vop_fsync_desc, fifo_fsync },                /* fsync */
+       { &vop_seek_desc, fifo_seek },                  /* seek */
+       { &vop_remove_desc, fifo_remove },              /* remove */
+       { &vop_link_desc, fifo_link },                  /* link */
+       { &vop_rename_desc, fifo_rename },              /* rename */
+       { &vop_mkdir_desc, fifo_mkdir },                /* mkdir */
+       { &vop_rmdir_desc, fifo_rmdir },                /* rmdir */
+       { &vop_symlink_desc, fifo_symlink },            /* symlink */
+       { &vop_readdir_desc, fifo_readdir },            /* readdir */
+       { &vop_readlink_desc, fifo_readlink },          /* readlink */
+       { &vop_abortop_desc, fifo_abortop },            /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, fifo_bmap },                  /* bmap */
+       { &vop_strategy_desc, fifo_strategy },          /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, fifo_advlock },            /* advlock */
+       { &vop_blkatoff_desc, fifo_blkatoff },          /* blkatoff */
+       { &vop_valloc_desc, fifo_valloc },              /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, fifo_truncate },          /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 };
 };
+struct vnodeopv_desc lfs_fifoop_opv_desc =
+       { &lfs_fifoop_p, lfs_fifoop_entries };
 #endif /* FIFO */
 
 /*
  * Vnode op for reading.
  */
 /* ARGSUSED */
 #endif /* FIFO */
 
 /*
  * Vnode op for reading.
  */
 /* ARGSUSED */
-lfs_read(vp, uio, ioflag, cred)
-       struct vnode *vp;
-       register struct uio *uio;
-       int ioflag;
-       struct ucred *cred;
+lfs_read(ap)
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
 {
 {
+       register struct vnode *vp = ap->a_vp;
        register struct inode *ip = VTOI(vp);
        register struct inode *ip = VTOI(vp);
-       register struct lfs *fs;                                /* LFS */
-       struct buf *bp;
+       register struct uio *uio = ap->a_uio;
+       register struct lfs *fs;
+       struct buf *bp1, *bp2;
        daddr_t lbn, bn, rablock;
        daddr_t lbn, bn, rablock;
-       int size, diff, error = 0;
-       long n, on, type;
+       off_t diff;
+       int error = 0, size;
+       long n, on;
 
 
-#ifdef VERBOSE
-       printf("lfs_read: ino %d\n", ip->i_number);
-#endif
 #ifdef DIAGNOSTIC
 #ifdef DIAGNOSTIC
+       int type;
        if (uio->uio_rw != UIO_READ)
        if (uio->uio_rw != UIO_READ)
-               panic("ufs_read mode");
+               panic("lfs_read mode");
        type = ip->i_mode & IFMT;
        if (type != IFDIR && type != IFREG && type != IFLNK)
        type = ip->i_mode & IFMT;
        if (type != IFDIR && type != IFREG && type != IFLNK)
-               panic("ufs_read type");
+               panic("lfs_read type");
+       if (type == IFLNK && (int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+               panic("read short symlink");
 #endif
        if (uio->uio_resid == 0)
                return (0);
 #endif
        if (uio->uio_resid == 0)
                return (0);
-       if (uio->uio_offset < 0)
-               return (EINVAL);
+       fs = ip->i_lfs;
+       if (uio->uio_offset < 0 ||
+           (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
+               return (EFBIG);
        ip->i_flag |= IACC;
        ip->i_flag |= IACC;
-
-       fs = ip->i_lfs;                                         /* LFS */
+       bp1 = bp2 = NULL;
+       IUNLOCK(ip);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
-               n = MIN((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
+               n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
                diff = ip->i_size - uio->uio_offset;
                if (diff <= 0)
                diff = ip->i_size - uio->uio_offset;
                if (diff <= 0)
-                       return (0);
+                       break;
                if (diff < n)
                        n = diff;
                if (diff < n)
                        n = diff;
-               size = blksize(fs);                             /* LFS */
+               size = blksize(fs);
                rablock = lbn + 1;
                rablock = lbn + 1;
+               lfs_check(vp, lbn);
                if (vp->v_lastr + 1 == lbn &&
                    lblktosize(fs, rablock) < ip->i_size)
                if (vp->v_lastr + 1 == lbn &&
                    lblktosize(fs, rablock) < ip->i_size)
-                       error = breada(ITOV(ip), lbn, size, rablock,
-                               blksize(fs), NOCRED, &bp);
+                       error = breadn(ITOV(ip), lbn, size, &rablock,
+                               &size, 1, NOCRED, &bp1);
                else
                else
-                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp);
+                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp1);
+               if (bp2)
+                       brelse(bp2);
+               bp2 = bp1;
                vp->v_lastr = lbn;
                vp->v_lastr = lbn;
-               n = MIN(n, size - bp->b_resid);
-               if (error) {
-                       brelse(bp);
-                       return (error);
-               }
-               error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+               n = min(n, size - bp2->b_resid);
+               if (error)
+                       break;
+               error = uiomove(bp2->b_un.b_addr + on, (int)n, uio);
                if (n + on == fs->lfs_bsize || uio->uio_offset == ip->i_size)
                if (n + on == fs->lfs_bsize || uio->uio_offset == ip->i_size)
-                       bp->b_flags |= B_AGE;
-               brelse(bp);
+                       bp2->b_flags |= B_AGE;
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       if (bp2)
+               brelse(bp2);
+       ILOCK(ip);
        return (error);
 }
 
 /*
  * Vnode op for writing.
  */
        return (error);
 }
 
 /*
  * Vnode op for writing.
  */
-lfs_write(vp, uio, ioflag, cred)
-       register struct vnode *vp;
-       struct uio *uio;
-       int ioflag;
-       struct ucred *cred;
+lfs_write(ap)
+       struct vop_write_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
 {
 {
+       register struct vnode *vp = ap->a_vp;
+       register struct uio *uio = ap->a_uio;
        struct proc *p = uio->uio_procp;
        register struct inode *ip = VTOI(vp);
        register struct lfs *fs;
        struct proc *p = uio->uio_procp;
        register struct inode *ip = VTOI(vp);
        register struct lfs *fs;
-       struct buf *bp;
+       register ioflag = ap->a_ioflag;
+       struct timeval tv;
+       struct buf *bp1, *bp2;
        daddr_t lbn;
        daddr_t lbn;
-       u_long osize;
+       off_t osize;
        int n, on, flags, newblock;
        int size, resid, error = 0;
 
        int n, on, flags, newblock;
        int size, resid, error = 0;
 
-#ifdef VERBOSE
-       printf("lfs_write ino %d\n", ip->i_number);
-#endif
 #ifdef DIAGNOSTIC
        if (uio->uio_rw != UIO_WRITE)
                panic("lfs_write mode");
 #ifdef DIAGNOSTIC
        if (uio->uio_rw != UIO_WRITE)
                panic("lfs_write mode");
@@ -287,16 +317,31 @@ lfs_write(vp, uio, ioflag, cred)
        resid = uio->uio_resid;
        osize = ip->i_size;
        fs = ip->i_lfs;                                         /* LFS */
        resid = uio->uio_resid;
        osize = ip->i_size;
        fs = ip->i_lfs;                                         /* LFS */
-       flags = 0;
-#ifdef NOTLFS
-       if (ioflag & IO_SYNC)
-               flags = B_SYNC;
-#endif
+       if (uio->uio_offset < 0 ||
+           (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
+               return (EFBIG);
+
+       /*
+        * XXX
+        * FFS uses the VOP_LOCK to provide serializability of multi-block
+        * reads and writes.  Since the cleaner may need to interrupt and
+        * clean a vnode, this isn't such a good idea for us.  We use 
+        * ordered locking instead.  Hold buffer N busy until buffer N+1
+        * has been obtained.  We get much better concurrency that way.
+        */
+       bp1 = bp2 = NULL;
+       IUNLOCK(ip);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
-               n = MIN((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
-               if (error = lfs_balloc(vp, n, lbn, &bp))
+               n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
+               lfs_check(vp, lbn);
+               if (error = lfs_balloc(vp, n, lbn, &bp1))
+                       break;
+               if (bp2)
+                       error = VOP_BWRITE(bp2);
+               bp2 = NULL;
+               if (error)
                        break;
                if (uio->uio_offset + n > ip->i_size) {
                        ip->i_size = uio->uio_offset + n;
                        break;
                if (uio->uio_offset + n > ip->i_size) {
                        ip->i_size = uio->uio_offset + n;
@@ -304,31 +349,35 @@ lfs_write(vp, uio, ioflag, cred)
                }
                size = blksize(fs);
                (void) vnode_pager_uncache(vp);
                }
                size = blksize(fs);
                (void) vnode_pager_uncache(vp);
-               n = MIN(n, size - bp->b_resid);
-               error = uiomove(bp->b_un.b_addr + on, n, uio);
-#ifdef NOTLFS                                                  /* LFS */
-               if (ioflag & IO_SYNC)
-                       (void) bwrite(bp);
-               else if (n + on == fs->fs_bsize) {
-                       bp->b_flags |= B_AGE;
-                       bawrite(bp);
-               } else
-                       bdwrite(bp);
-               ip->i_flag |= IUPD|ICHG;
-#else
-               /* XXX This doesn't handle IO_SYNC. */
-               LFS_UBWRITE(bp);
-#endif
-               if (cred->cr_uid != 0)
+               n = min(n, size - bp1->b_resid);
+               error = uiomove(bp1->b_un.b_addr + on, n, uio);
+               /* XXX Why is this in the loop? */
+               if (ap->a_cred->cr_uid != 0)
                        ip->i_mode &= ~(ISUID|ISGID);
                        ip->i_mode &= ~(ISUID|ISGID);
+               bp2 = bp1;
+               bp1 = NULL;
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
-       if (error && (ioflag & IO_UNIT)) {
-               (void)lfs_truncate(vp, osize, ioflag & IO_SYNC);
-               uio->uio_offset -= resid - uio->uio_resid;
-               uio->uio_resid = resid;
+       if (bp1)
+               brelse(bp1);
+       if (bp2)
+               error = VOP_BWRITE(bp2);
+
+       if (error) {
+               if (ioflag & IO_UNIT) {
+                       (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC,
+                           ap->a_cred, uio->uio_procp);
+                       uio->uio_offset -= resid - uio->uio_resid;
+                       uio->uio_resid = resid;
+               }
+       } 
+
+       if (!error && (ioflag & IO_SYNC)) {
+               tv = time;
+               if (!(error = VOP_UPDATE(vp, &tv, &tv, 1)))
+                       error = VOP_FSYNC(vp, ap->a_cred, MNT_WAIT,
+                           uio->uio_procp);
        }
        }
-       if (!error && (ioflag & IO_SYNC))
-               error = lfs_update(vp, &time, &time, 1);
+       ILOCK(ip);
        return (error);
 }
 
        return (error);
 }
 
@@ -336,28 +385,19 @@ lfs_write(vp, uio, ioflag, cred)
  * Synch an open file.
  */
 /* ARGSUSED */
  * Synch an open file.
  */
 /* ARGSUSED */
-lfs_fsync(vp, fflags, cred, waitfor, p)
-       struct vnode *vp;
-       int fflags;
-       struct ucred *cred;
-       int waitfor;
-       struct proc *p;
+lfs_fsync(ap)
+       struct vop_fsync_args /* {
+               struct vnode *a_vp;
+               struct ucred *a_cred;
+               int a_waitfor;
+               struct proc *a_p;
+       } */ *ap;
 {
 {
-       struct inode *ip;
+       struct timeval tv;
 
 
-#ifdef VERBOSE
-       printf("lfs_fsync\n");
-#endif
-       ip = VTOI(vp);
-       if (fflags & FWRITE)
-               ip->i_flag |= ICHG;
-       /*
-        * XXX
-        * Sync the mounted file system associated with the file
-        * descriptor.
-        */
-       ITIMES(ip, &time, &time);                               /* LFS */
-       return (0);
+       tv = time;
+       return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+           ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
 }
 
 /*
 }
 
 /*
@@ -365,17 +405,17 @@ lfs_fsync(vp, fflags, cred, waitfor, p)
  * truncate and deallocate the file.
  */
 int
  * truncate and deallocate the file.
  */
 int
-lfs_inactive(vp, p)
-       struct vnode *vp;
-       struct proc *p;
+lfs_inactive(ap)
+       struct vop_inactive_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
 {
        extern int prtactive;
 {
        extern int prtactive;
+       register struct vnode *vp = ap->a_vp;
        register struct inode *ip;
        register struct inode *ip;
+       struct timeval tv;
        int mode, error;
 
        int mode, error;
 
-#ifdef VERBOSE
-       printf("lfs_inactive\n");
-#endif
        if (prtactive && vp->v_usecount != 0)
                vprint("lfs_inactive: pushing active", vp);
 
        if (prtactive && vp->v_usecount != 0)
                vprint("lfs_inactive: pushing active", vp);
 
@@ -394,17 +434,18 @@ lfs_inactive(vp, p)
                if (!getinoquota(ip))
                        (void)chkiq(ip, -1, NOCRED, 0);
 #endif
                if (!getinoquota(ip))
                        (void)chkiq(ip, -1, NOCRED, 0);
 #endif
-               error = lfs_truncate(vp, (u_long)0, 0);
+               error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
                mode = ip->i_mode;
                ip->i_mode = 0;
                ip->i_rdev = 0;
                ip->i_flag |= IUPD|ICHG;
                mode = ip->i_mode;
                ip->i_mode = 0;
                ip->i_rdev = 0;
                ip->i_flag |= IUPD|ICHG;
-               lfs_vfree(vp, ip->i_number, mode);
+               VOP_VFREE(vp, ip->i_number, mode);
+       }
+       if (ip->i_flag&(IUPD|IACC|ICHG|IMOD)) {
+               tv = time;
+               VOP_UPDATE(vp, &tv, &tv, 0);
        }
        }
-       if (ip->i_flag&(IUPD|IACC|ICHG|IMOD))
-               lfs_update(vp, &time, &time, 0);
        IUNLOCK(ip);
        IUNLOCK(ip);
-       ip->i_flag = 0;
        /*
         * If we are done with the inode, reclaim it
         * so that it can be reused immediately.
        /*
         * If we are done with the inode, reclaim it
         * so that it can be reused immediately.
@@ -413,3 +454,242 @@ lfs_inactive(vp, p)
                vgone(vp);
        return (error);
 }
                vgone(vp);
        return (error);
 }
+
+/*
+ * These macros are used to bracket UFS directory ops, so that we can
+ * identify all the pages touched during directory ops which need to
+ * be ordered and flushed atomically, so that they may be recovered.
+ */
+#define        SET_DIROP(fs) {                                                 \
+       if ((fs)->lfs_writer)                                           \
+               tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);  \
+       ++(fs)->lfs_dirops;                                             \
+       (fs)->lfs_doifile = 1;                                          \
+}
+
+#define        SET_ENDOP(fs) {                                                 \
+       --(fs)->lfs_dirops;                                             \
+       if (!(fs)->lfs_dirops)                                          \
+               wakeup(&(fs)->lfs_writer);                              \
+}
+
+#define        MARK_VNODE(dvp) (dvp)->v_flag |= VDIROP
+
+int
+lfs_symlink(ap)
+       struct vop_symlink_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+               char *a_target;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_symlink(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_mknod(ap)
+       struct vop_mknod_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_mknod(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_create(ap)
+       struct vop_create_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_create(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_mkdir(ap)
+       struct vop_mkdir_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_mkdir(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_remove(ap)
+       struct vop_remove_args /* {
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_remove(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_rmdir(ap)
+       struct vop_rmdir_args /* {
+               struct vnodeop_desc *a_desc;
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_rmdir(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_link(ap)
+       struct vop_link_args /* {
+               struct vnode *a_vp;
+               struct vnode *a_tdvp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_vp)->i_lfs);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_link(ap);
+       SET_ENDOP(VTOI(ap->a_vp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_rename(ap)
+       struct vop_rename_args  /* {
+               struct vnode *a_fdvp;
+               struct vnode *a_fvp;
+               struct componentname *a_fcnp;
+               struct vnode *a_tdvp;
+               struct vnode *a_tvp;
+               struct componentname *a_tcnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_fdvp)->i_lfs);
+       MARK_VNODE(ap->a_fdvp);
+       MARK_VNODE(ap->a_tdvp);
+       ret = ufs_rename(ap);
+       SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
+       return (ret);
+}
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+       struct vop_getattr_args /* {
+               struct vnode *a_vp;
+               struct vattr *a_vap;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       register struct vattr *vap = ap->a_vap;
+       /*
+        * Copy from inode table
+        */
+       vap->va_fsid = ip->i_dev;
+       vap->va_fileid = ip->i_number;
+       vap->va_mode = ip->i_mode & ~IFMT;
+       vap->va_nlink = ip->i_nlink;
+       vap->va_uid = ip->i_uid;
+       vap->va_gid = ip->i_gid;
+       vap->va_rdev = (dev_t)ip->i_rdev;
+       vap->va_size = ip->i_din.di_size;
+       vap->va_atime = ip->i_atime;
+       vap->va_mtime = ip->i_mtime;
+       vap->va_ctime = ip->i_ctime;
+       vap->va_flags = ip->i_flags;
+       vap->va_gen = ip->i_gen;
+       /* this doesn't belong here */
+       if (vp->v_type == VBLK)
+               vap->va_blocksize = BLKDEV_IOSIZE;
+       else if (vp->v_type == VCHR)
+               vap->va_blocksize = MAXBSIZE;
+       else
+               vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+       vap->va_bytes = dbtob(ip->i_blocks);
+       vap->va_type = vp->v_type;
+       vap->va_filerev = ip->i_modrev;
+       return (0);
+}
+/*
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+       struct vop_close_args /* {
+               struct vnode *a_vp;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       int mod;
+
+       if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED)) {
+               mod = ip->i_flag & IMOD;
+               ITIMES(ip, &time, &time);
+               if (!mod && ip->i_flag & IMOD)
+                       ip->i_lfs->lfs_uinodes++;
+       }
+       return (0);
+}
+