Do not lock inode during entire read/write operations (because if
[unix-history] / usr / src / sys / ufs / lfs / lfs_vnops.c
index 9095bce..94e5c03 100644 (file)
 /*
 /*
- * Copyright (c) 1982 Regents of the University of California.
- * All rights reserved.  The Berkeley software License Agreement
- * specifies the terms and conditions for redistribution.
+ * Copyright (c) 1986, 1989, 1991 Regents of the University of California.
+ * All rights reserved.
  *
  *
- *     @(#)lfs_vnops.c 6.21 (Berkeley) %G%
- */
-
-#include "param.h"
-#include "systm.h"
-#include "dir.h"
-#include "user.h"
-#include "kernel.h"
-#include "file.h"
-#include "stat.h"
-#include "inode.h"
-#include "fs.h"
-#include "buf.h"
-#include "proc.h"
-#include "quota.h"
-#include "uio.h"
-#include "socket.h"
-#include "socketvar.h"
-#include "mount.h"
-
-extern struct fileops inodeops;
-struct file *getinode();
-
-/*
- * Change current working directory (``.'').
- */
-chdir()
-{
-
-       chdirec(&u.u_cdir);
-}
-
-/*
- * Change notion of root (``/'') directory.
- */
-chroot()
-{
-
-       if (suser())
-               chdirec(&u.u_rdir);
-}
-
-/*
- * Common routine for chroot and chdir.
- */
-chdirec(ipp)
-       register struct inode **ipp;
-{
-       register struct inode *ip;
-       struct a {
-               char    *fname;
-       } *uap = (struct a *)u.u_ap;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = LOOKUP | FOLLOW;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       if ((ip->i_mode&IFMT) != IFDIR) {
-               u.u_error = ENOTDIR;
-               goto bad;
-       }
-       if (access(ip, IEXEC))
-               goto bad;
+ * %sccs.include.redist.c%
+ *
+ *     @(#)lfs_vnops.c 7.94 (Berkeley) %G%
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/* Global vfs data structures for lfs. */
+int (**lfs_vnodeop_p)();
+struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, ufs_lookup },               /* lookup */
+       { &vop_create_desc, lfs_create },               /* create */
+       { &vop_mknod_desc, lfs_mknod },                 /* mknod */
+       { &vop_open_desc, ufs_open },                   /* open */
+       { &vop_close_desc, lfs_close },                 /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, lfs_read },                   /* read */
+       { &vop_write_desc, lfs_write },                 /* write */
+       { &vop_ioctl_desc, ufs_ioctl },                 /* ioctl */
+       { &vop_select_desc, ufs_select },               /* select */
+       { &vop_mmap_desc, ufs_mmap },                   /* mmap */
+       { &vop_fsync_desc, lfs_fsync },                 /* fsync */
+       { &vop_seek_desc, ufs_seek },                   /* seek */
+       { &vop_remove_desc, lfs_remove },               /* remove */
+       { &vop_link_desc, lfs_link },                   /* link */
+       { &vop_rename_desc, lfs_rename },               /* rename */
+       { &vop_mkdir_desc, lfs_mkdir },                 /* mkdir */
+       { &vop_rmdir_desc, lfs_rmdir },                 /* rmdir */
+       { &vop_symlink_desc, lfs_symlink },             /* symlink */
+       { &vop_readdir_desc, ufs_readdir },             /* readdir */
+       { &vop_readlink_desc, ufs_readlink },           /* readlink */
+       { &vop_abortop_desc, ufs_abortop },             /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, lfs_bmap },                   /* bmap */
+       { &vop_strategy_desc, ufs_strategy },           /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, ufs_advlock },             /* advlock */
+       { &vop_blkatoff_desc, lfs_blkatoff },           /* blkatoff */
+       { &vop_valloc_desc, lfs_valloc },               /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, lfs_truncate },           /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_vnodeop_opv_desc =
+       { &lfs_vnodeop_p, lfs_vnodeop_entries };
+
+int (**lfs_specop_p)();
+struct vnodeopv_entry_desc lfs_specop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, spec_lookup },              /* lookup */
+       { &vop_create_desc, spec_create },              /* create */
+       { &vop_mknod_desc, spec_mknod },                /* mknod */
+       { &vop_open_desc, spec_open },                  /* open */
+       { &vop_close_desc, ufsspec_close },             /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, ufsspec_read },               /* read */
+       { &vop_write_desc, ufsspec_write },             /* write */
+       { &vop_ioctl_desc, spec_ioctl },                /* ioctl */
+       { &vop_select_desc, spec_select },              /* select */
+       { &vop_mmap_desc, spec_mmap },                  /* mmap */
+       { &vop_fsync_desc, spec_fsync },                /* fsync */
+       { &vop_seek_desc, spec_seek },                  /* seek */
+       { &vop_remove_desc, spec_remove },              /* remove */
+       { &vop_link_desc, spec_link },                  /* link */
+       { &vop_rename_desc, spec_rename },              /* rename */
+       { &vop_mkdir_desc, spec_mkdir },                /* mkdir */
+       { &vop_rmdir_desc, spec_rmdir },                /* rmdir */
+       { &vop_symlink_desc, spec_symlink },            /* symlink */
+       { &vop_readdir_desc, spec_readdir },            /* readdir */
+       { &vop_readlink_desc, spec_readlink },          /* readlink */
+       { &vop_abortop_desc, spec_abortop },            /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, spec_bmap },                  /* bmap */
+       { &vop_strategy_desc, spec_strategy },          /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, spec_advlock },            /* advlock */
+       { &vop_blkatoff_desc, spec_blkatoff },          /* blkatoff */
+       { &vop_valloc_desc, spec_valloc },              /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, spec_truncate },          /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_specop_opv_desc =
+       { &lfs_specop_p, lfs_specop_entries };
+
+#ifdef FIFO
+int (**lfs_fifoop_p)();
+struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
+       { &vop_default_desc, vn_default_error },
+       { &vop_lookup_desc, fifo_lookup },              /* lookup */
+       { &vop_create_desc, fifo_create },              /* create */
+       { &vop_mknod_desc, fifo_mknod },                /* mknod */
+       { &vop_open_desc, fifo_open },                  /* open */
+       { &vop_close_desc, ufsfifo_close },             /* close */
+       { &vop_access_desc, ufs_access },               /* access */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
+       { &vop_setattr_desc, ufs_setattr },             /* setattr */
+       { &vop_read_desc, ufsfifo_read },               /* read */
+       { &vop_write_desc, ufsfifo_write },             /* write */
+       { &vop_ioctl_desc, fifo_ioctl },                /* ioctl */
+       { &vop_select_desc, fifo_select },              /* select */
+       { &vop_mmap_desc, fifo_mmap },                  /* mmap */
+       { &vop_fsync_desc, fifo_fsync },                /* fsync */
+       { &vop_seek_desc, fifo_seek },                  /* seek */
+       { &vop_remove_desc, fifo_remove },              /* remove */
+       { &vop_link_desc, fifo_link },                  /* link */
+       { &vop_rename_desc, fifo_rename },              /* rename */
+       { &vop_mkdir_desc, fifo_mkdir },                /* mkdir */
+       { &vop_rmdir_desc, fifo_rmdir },                /* rmdir */
+       { &vop_symlink_desc, fifo_symlink },            /* symlink */
+       { &vop_readdir_desc, fifo_readdir },            /* readdir */
+       { &vop_readlink_desc, fifo_readlink },          /* readlink */
+       { &vop_abortop_desc, fifo_abortop },            /* abortop */
+       { &vop_inactive_desc, lfs_inactive },           /* inactive */
+       { &vop_reclaim_desc, ufs_reclaim },             /* reclaim */
+       { &vop_lock_desc, ufs_lock },                   /* lock */
+       { &vop_unlock_desc, ufs_unlock },               /* unlock */
+       { &vop_bmap_desc, fifo_bmap },                  /* bmap */
+       { &vop_strategy_desc, fifo_strategy },          /* strategy */
+       { &vop_print_desc, ufs_print },                 /* print */
+       { &vop_islocked_desc, ufs_islocked },           /* islocked */
+       { &vop_advlock_desc, fifo_advlock },            /* advlock */
+       { &vop_blkatoff_desc, fifo_blkatoff },          /* blkatoff */
+       { &vop_valloc_desc, fifo_valloc },              /* valloc */
+       { &vop_vfree_desc, lfs_vfree },                 /* vfree */
+       { &vop_truncate_desc, fifo_truncate },          /* truncate */
+       { &vop_update_desc, lfs_update },               /* update */
+       { &vop_bwrite_desc, lfs_bwrite },               /* bwrite */
+       { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_fifoop_opv_desc =
+       { &lfs_fifoop_p, lfs_fifoop_entries };
+#endif /* FIFO */
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+lfs_read(ap)
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       register struct uio *uio = ap->a_uio;
+       register struct lfs *fs;
+       struct buf *bp1, *bp2;
+       daddr_t lbn, bn, rablock;
+       off_t diff;
+       int error = 0, size;
+       long n, on;
+
+#ifdef DIAGNOSTIC
+       int type;
+       if (uio->uio_rw != UIO_READ)
+               panic("lfs_read mode");
+       type = ip->i_mode & IFMT;
+       if (type != IFDIR && type != IFREG && type != IFLNK)
+               panic("lfs_read type");
+       if (type == IFLNK && (int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+               panic("read short symlink");
+#endif
+       if (uio->uio_resid == 0)
+               return (0);
+       fs = ip->i_lfs;
+       if (uio->uio_offset < 0 ||
+           (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
+               return (EFBIG);
+       ip->i_flag |= IACC;
+       bp1 = bp2 = NULL;
        IUNLOCK(ip);
        IUNLOCK(ip);
-       if (*ipp)
-               irele(*ipp);
-       *ipp = ip;
-       return;
-
-bad:
-       iput(ip);
-}
-
-/*
- * Open system call.
- */
-open()
-{
-       struct a {
-               char    *fname;
-               int     mode;
-               int     crtmode;
-       } *uap = (struct a *) u.u_ap;
-
-       copen(uap->mode-FOPEN, uap->crtmode, uap->fname);
-}
-
-/*
- * Creat system call.
- */
-creat()
-{
-       struct a {
-               char    *fname;
-               int     fmode;
-       } *uap = (struct a *)u.u_ap;
-
-       copen(FWRITE|FCREAT|FTRUNC, uap->fmode, uap->fname);
-}
-
-/*
- * Common code for open and creat.
- * Check permissions, allocate an open file structure,
- * and call the device open routine if any.
- */
-copen(mode, arg, fname)
-       register int mode;
-       int arg;
-       caddr_t fname;
-{
-       register struct inode *ip;
-       register struct file *fp;
-       register struct nameidata *ndp = &u.u_nd;
-       int indx;
-
-       fp = falloc();
-       if (fp == NULL)
-               return;
-       indx = u.u_r.r_val1;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = fname;
-       if (mode&FCREAT) {
-               if (mode & FEXCL)
-                       ndp->ni_nameiop = CREATE;
+       do {
+               lbn = lblkno(fs, uio->uio_offset);
+               on = blkoff(fs, uio->uio_offset);
+               n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
+               diff = ip->i_size - uio->uio_offset;
+               if (diff <= 0)
+                       break;
+               if (diff < n)
+                       n = diff;
+               size = blksize(fs);
+               rablock = lbn + 1;
+               lfs_check(vp, lbn);
+               if (vp->v_lastr + 1 == lbn &&
+                   lblktosize(fs, rablock) < ip->i_size)
+                       error = breadn(ITOV(ip), lbn, size, &rablock,
+                               &size, 1, NOCRED, &bp1);
                else
                else
-                       ndp->ni_nameiop = CREATE | FOLLOW;
-               ip = namei(ndp);
-               if (ip == NULL) {
-                       if (u.u_error)
-                               goto bad1;
-                       ip = maknode(arg&07777&(~ISVTX), ndp);
-                       if (ip == NULL)
-                               goto bad1;
-                       mode &= ~FTRUNC;
-               } else {
-                       if (mode&FEXCL) {
-                               u.u_error = EEXIST;
-                               goto bad;
-                       }
-                       mode &= ~FCREAT;
-               }
-       } else {
-               ndp->ni_nameiop = LOOKUP | FOLLOW;
-               ip = namei(ndp);
-               if (ip == NULL)
-                       goto bad1;
-       }
-       if ((ip->i_mode & IFMT) == IFSOCK) {
-               u.u_error = EOPNOTSUPP;
-               goto bad;
-       }
-       if ((mode&FCREAT) == 0) {
-               if (mode&FREAD)
-                       if (access(ip, IREAD))
-                               goto bad;
-               if (mode&(FWRITE|FTRUNC)) {
-                       if (access(ip, IWRITE))
-                               goto bad;
-                       if ((ip->i_mode&IFMT) == IFDIR) {
-                               u.u_error = EISDIR;
-                               goto bad;
-                       }
-               }
-       }
-       if (mode&FTRUNC)
-               itrunc(ip, (u_long)0);
-       IUNLOCK(ip);
-       fp->f_flag = mode&FMASK;
-       fp->f_type = DTYPE_INODE;
-       fp->f_ops = &inodeops;
-       fp->f_data = (caddr_t)ip;
-       if (setjmp(&u.u_qsave)) {
-               if (u.u_error == 0)
-                       u.u_error = EINTR;
-               u.u_ofile[indx] = NULL;
-               closef(fp);
-               return;
-       }
-       u.u_error = openi(ip, mode);
-       if (u.u_error == 0)
-               return;
+                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp1);
+               if (bp2)
+                       brelse(bp2);
+               bp2 = bp1;
+               vp->v_lastr = lbn;
+               n = min(n, size - bp2->b_resid);
+               if (error)
+                       break;
+               error = uiomove(bp2->b_un.b_addr + on, (int)n, uio);
+               if (n + on == fs->lfs_bsize || uio->uio_offset == ip->i_size)
+                       bp2->b_flags |= B_AGE;
+       } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       if (bp2)
+               brelse(bp2);
        ILOCK(ip);
        ILOCK(ip);
-bad:
-       iput(ip);
-bad1:
-       u.u_ofile[indx] = NULL;
-       fp->f_count--;
-}
-
-/*
- * Mknod system call
- */
-mknod()
-{
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-               int     dev;
-       } *uap = (struct a *)u.u_ap;
-       register struct nameidata *ndp = &u.u_nd;
-
-       if (!suser())
-               return;
-       ndp->ni_nameiop = CREATE;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip != NULL) {
-               u.u_error = EEXIST;
-               goto out;
-       }
-       if (u.u_error)
-               return;
-       ip = maknode(uap->fmode, ndp);
-       if (ip == NULL)
-               return;
-       switch (ip->i_mode & IFMT) {
-
-       case IFMT:      /* used by badsect to flag bad sectors */
-       case IFCHR:
-       case IFBLK:
-               if (uap->dev) {
-                       /*
-                        * Want to be able to use this to make badblock
-                        * inodes, so don't truncate the dev number.
-                        */
-                       ip->i_rdev = uap->dev;
-                       ip->i_flag |= IACC|IUPD|ICHG;
-               }
-       }
-
-out:
-       iput(ip);
-}
-
-/*
- * link system call
- */
-link()
-{
-       register struct inode *ip, *xp;
-       register struct a {
-               char    *target;
-               char    *linkname;
-       } *uap = (struct a *)u.u_ap;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = LOOKUP | FOLLOW;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->target;
-       ip = namei(ndp);        /* well, this routine is doomed anyhow */
-       if (ip == NULL)
-               return;
-       if ((ip->i_mode&IFMT) == IFDIR && !suser()) {
-               iput(ip);
-               return;
-       }
-       ip->i_nlink++;
-       ip->i_flag |= ICHG;
-       iupdat(ip, &time, &time, 1);
-       IUNLOCK(ip);
-       ndp->ni_nameiop = CREATE;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = (caddr_t)uap->linkname;
-       xp = namei(ndp);
-       if (xp != NULL) {
-               u.u_error = EEXIST;
-               iput(xp);
-               goto out;
-       }
-       if (u.u_error)
-               goto out;
-       if (ndp->ni_pdir->i_dev != ip->i_dev) {
-               iput(ndp->ni_pdir);
-               u.u_error = EXDEV;
-               goto out;
-       }
-       u.u_error = direnter(ip, ndp);
-out:
-       if (u.u_error) {
-               ip->i_nlink--;
-               ip->i_flag |= ICHG;
-       }
-       irele(ip);
-}
-
-/*
- * symlink -- make a symbolic link
- */
-symlink()
-{
-       register struct a {
-               char    *target;
-               char    *linkname;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip;
-       register char *tp;
-       register c, nc;
-       register struct nameidata *ndp = &u.u_nd;
-
-       tp = uap->target;
-       nc = 0;
-       while (c = fubyte(tp)) {
-               if (c < 0) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               tp++;
-               nc++;
-       }
-       ndp->ni_nameiop = CREATE;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->linkname;
-       ip = namei(ndp);
-       if (ip) {
-               iput(ip);
-               u.u_error = EEXIST;
-               return;
-       }
-       if (u.u_error)
-               return;
-       ip = maknode(IFLNK | 0777, ndp);
-       if (ip == NULL)
-               return;
-       u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0);
-       /* handle u.u_error != 0 */
-       iput(ip);
-}
-
-/*
- * Unlink system call.
- * Hard to avoid races here, especially
- * in unlinking directories.
- */
-unlink()
-{
-       struct a {
-               char    *fname;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip, *dp;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = DELETE | LOCKPARENT;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       dp = ndp->ni_pdir;
-       if ((ip->i_mode&IFMT) == IFDIR && !suser())
-               goto out;
-       /*
-        * Don't unlink a mounted file.
-        */
-       if (ip->i_dev != dp->i_dev) {
-               u.u_error = EBUSY;
-               goto out;
-       }
-       if (ip->i_flag&ITEXT)
-               xrele(ip);      /* try once to free text */
-       if (dirremove(ndp)) {
-               ip->i_nlink--;
-               ip->i_flag |= ICHG;
-       }
-out:
-       if (dp == ip)
-               irele(ip);
-       else
-               iput(ip);
-       iput(dp);
-}
-
-/*
- * Seek system call
- */
-lseek()
-{
-       register struct file *fp;
-       register struct a {
-               int     fd;
-               off_t   off;
-               int     sbase;
-       } *uap = (struct a *)u.u_ap;
-
-       GETF(fp, uap->fd);
-       if (fp->f_type != DTYPE_INODE) {
-               u.u_error = ESPIPE;
-               return;
-       }
-       switch (uap->sbase) {
-
-       case L_INCR:
-               fp->f_offset += uap->off;
-               break;
-
-       case L_XTND:
-               fp->f_offset = uap->off + ((struct inode *)fp->f_data)->i_size;
+       return (error);
+}
+
+/*
+ * Vnode op for writing.
+ */
+lfs_write(ap)
+       struct vop_write_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct uio *uio = ap->a_uio;
+       struct proc *p = uio->uio_procp;
+       register struct inode *ip = VTOI(vp);
+       register struct lfs *fs;
+       register ioflag = ap->a_ioflag;
+       struct timeval tv;
+       struct buf *bp1, *bp2;
+       daddr_t lbn;
+       off_t osize;
+       int n, on, flags, newblock;
+       int size, resid, error = 0;
+
+#ifdef DIAGNOSTIC
+       if (uio->uio_rw != UIO_WRITE)
+               panic("lfs_write mode");
+#endif
+       switch (vp->v_type) {
+       case VREG:
+               if (ioflag & IO_APPEND)
+                       uio->uio_offset = ip->i_size;
+               /* fall through */
+       case VLNK:
                break;
 
                break;
 
-       case L_SET:
-               fp->f_offset = uap->off;
+       case VDIR:
+               /* XXX This may not be correct for LFS. */
+               if ((ioflag & IO_SYNC) == 0)
+                       panic("lfs_write nonsync dir write");
                break;
 
        default:
                break;
 
        default:
-               u.u_error = EINVAL;
-               return;
+               panic("lfs_write type");
        }
        }
-       u.u_r.r_off = fp->f_offset;
-}
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       if (uio->uio_resid == 0)
+               return (0);
+       /*
+        * Maybe this should be above the vnode op call, but so long as
+        * file servers have no limits, i don't think it matters
+        */
+       if (vp->v_type == VREG && p &&
+           uio->uio_offset + uio->uio_resid >
+             p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+               psignal(p, SIGXFSZ);
+               return (EFBIG);
+       }
+       resid = uio->uio_resid;
+       osize = ip->i_size;
+       fs = ip->i_lfs;                                         /* LFS */
+       if (uio->uio_offset < 0 ||
+           (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
+               return (EFBIG);
 
 
-/*
- * Access system call
- */
-saccess()
-{
-       register svuid, svgid;
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-       } *uap = (struct a *)u.u_ap;
-       register struct nameidata *ndp = &u.u_nd;
+       /*
+        * XXX
+        * FFS uses the VOP_LOCK to provide serializability of multi-block
+        * reads and writes.  Since the cleaner may need to interrupt and
+        * clean a vnode, this isn't such a good idea for us.  We use 
+        * ordered locking instead.  Hold buffer N busy until buffer N+1
+        * has been obtained.  We get much better concurrency that way.
+        */
+       bp1 = bp2 = NULL;
+       IUNLOCK(ip);
+       do {
+               lbn = lblkno(fs, uio->uio_offset);
+               on = blkoff(fs, uio->uio_offset);
+               n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
+               lfs_check(vp, lbn);
+               if (error = lfs_balloc(vp, n, lbn, &bp1))
+                       break;
+               if (bp2)
+                       error = VOP_BWRITE(bp2);
+               bp2 = NULL;
+               if (error)
+                       break;
+               if (uio->uio_offset + n > ip->i_size) {
+                       ip->i_size = uio->uio_offset + n;
+                       vnode_pager_setsize(vp, (u_long)ip->i_size);
+               }
+               size = blksize(fs);
+               (void) vnode_pager_uncache(vp);
+               n = min(n, size - bp1->b_resid);
+               error = uiomove(bp1->b_un.b_addr + on, n, uio);
+               /* XXX Why is this in the loop? */
+               if (ap->a_cred->cr_uid != 0)
+                       ip->i_mode &= ~(ISUID|ISGID);
+               bp2 = bp1;
+               bp1 = NULL;
+       } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       if (bp1)
+               brelse(bp1);
+       if (bp2)
+               error = VOP_BWRITE(bp2);
+
+       if (error) {
+               if (ioflag & IO_UNIT) {
+                       (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC,
+                           ap->a_cred, uio->uio_procp);
+                       uio->uio_offset -= resid - uio->uio_resid;
+                       uio->uio_resid = resid;
+               }
+       } 
 
 
-       svuid = u.u_uid;
-       svgid = u.u_gid;
-       u.u_uid = u.u_ruid;
-       u.u_gid = u.u_rgid;
-       ndp->ni_nameiop = LOOKUP | FOLLOW;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip != NULL) {
-               if ((uap->fmode&R_OK) && access(ip, IREAD))
-                       goto done;
-               if ((uap->fmode&W_OK) && access(ip, IWRITE))
-                       goto done;
-               if ((uap->fmode&X_OK) && access(ip, IEXEC))
-                       goto done;
-done:
-               iput(ip);
+       if (!error && (ioflag & IO_SYNC)) {
+               tv = time;
+               if (!(error = VOP_UPDATE(vp, &tv, &tv, 1)))
+                       error = VOP_FSYNC(vp, ap->a_cred, MNT_WAIT,
+                           uio->uio_procp);
        }
        }
-       u.u_uid = svuid;
-       u.u_gid = svgid;
+       ILOCK(ip);
+       return (error);
 }
 
 /*
 }
 
 /*
- * Stat system call.  This version follows links.
+ * Synch an open file.
  */
  */
-stat()
+/* ARGSUSED */
+lfs_fsync(ap)
+       struct vop_fsync_args /* {
+               struct vnode *a_vp;
+               struct ucred *a_cred;
+               int a_waitfor;
+               struct proc *a_p;
+       } */ *ap;
 {
 {
+       struct timeval tv;
 
 
-       stat1(FOLLOW);
+       tv = time;
+       return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+           ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
 }
 
 /*
 }
 
 /*
- * Lstat system call.  This version does not follow links.
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
  */
  */
-lstat()
-{
-
-       stat1(NOFOLLOW);
-}
-
-stat1(follow)
-       int follow;
+int
+lfs_inactive(ap)
+       struct vop_inactive_args /* {
+               struct vnode *a_vp;
+       } */ *ap;
 {
 {
+       extern int prtactive;
+       register struct vnode *vp = ap->a_vp;
        register struct inode *ip;
        register struct inode *ip;
-       register struct a {
-               char    *fname;
-               struct stat *ub;
-       } *uap = (struct a *)u.u_ap;
-       struct stat sb;
-       register struct nameidata *ndp = &u.u_nd;
+       struct timeval tv;
+       int mode, error;
 
 
-       ndp->ni_nameiop = LOOKUP | follow;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       (void) ino_stat(ip, &sb);
-       iput(ip);
-       u.u_error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
-}
+       if (prtactive && vp->v_usecount != 0)
+               vprint("lfs_inactive: pushing active", vp);
 
 
-/*
- * Return target name of a symbolic link
- */
-readlink()
-{
-       register struct inode *ip;
-       register struct a {
-               char    *name;
-               char    *buf;
-               int     count;
-       } *uap = (struct a *)u.u_ap;
-       register struct nameidata *ndp = &u.u_nd;
-       int resid;
-
-       ndp->ni_nameiop = LOOKUP;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->name;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       if ((ip->i_mode&IFMT) != IFLNK) {
-               u.u_error = EINVAL;
-               goto out;
+       /* Get rid of inodes related to stale file handles. */
+       ip = VTOI(vp);
+       if (ip->i_mode == 0) {
+               if ((vp->v_flag & VXLOCK) == 0)
+                       vgone(vp);
+               return (0);
        }
        }
-       u.u_error = rdwri(UIO_READ, ip, uap->buf, uap->count, 0, 0, &resid);
-out:
-       iput(ip);
-       u.u_r.r_val1 = uap->count - resid;
-}
-
-/*
- * Change mode of a file given path name.
- */
-chmod()
-{
-       struct inode *ip;
-       struct a {
-               char    *fname;
-               int     fmode;
-       } *uap = (struct a *)u.u_ap;
-
-       if ((ip = owner(uap->fname, FOLLOW)) == NULL)
-               return;
-       u.u_error = chmod1(ip, uap->fmode);
-       iput(ip);
-}
-
-/*
- * Change mode of a file given a file descriptor.
- */
-fchmod()
-{
-       struct a {
-               int     fd;
-               int     fmode;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip;
-       register struct file *fp;
 
 
-       fp = getinode(uap->fd);
-       if (fp == NULL)
-               return;
-       ip = (struct inode *)fp->f_data;
-       if (u.u_uid != ip->i_uid && !suser())
-               return;
+       error = 0;
        ILOCK(ip);
        ILOCK(ip);
-       u.u_error = chmod1(ip, uap->fmode);
-       IUNLOCK(ip);
-}
-
-/*
- * Change the mode on a file.
- * Inode must be locked before calling.
- */
-chmod1(ip, mode)
-       register struct inode *ip;
-       register int mode;
-{
-
-       if (ip->i_fs->fs_ronly)
-               return (EROFS);
-       ip->i_mode &= ~07777;
-       if (u.u_uid) {
-               if ((ip->i_mode & IFMT) != IFDIR)
-                       mode &= ~ISVTX;
-               if (!groupmember(ip->i_gid))
-                       mode &= ~ISGID;
-       }
-       ip->i_mode |= mode&07777;
-       ip->i_flag |= ICHG;
-       if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0)
-               xrele(ip);
-       return (0);
-}
-
-/*
- * Set ownership given a path name.
- */
-chown()
-{
-       struct inode *ip;
-       struct a {
-               char    *fname;
-               int     uid;
-               int     gid;
-       } *uap = (struct a *)u.u_ap;
-
-       if (!suser() || (ip = owner(uap->fname, NOFOLLOW)) == NULL)
-               return;
-       u.u_error = chown1(ip, uap->uid, uap->gid);
-       iput(ip);
-}
-
-/*
- * Set ownership given a file descriptor.
- */
-fchown()
-{
-       struct a {
-               int     fd;
-               int     uid;
-               int     gid;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip;
-       register struct file *fp;
-
-       fp = getinode(uap->fd);
-       if (fp == NULL)
-               return;
-       ip = (struct inode *)fp->f_data;
-       if (!suser())
-               return;
-       ILOCK(ip);
-       u.u_error = chown1(ip, uap->uid, uap->gid);
-       IUNLOCK(ip);
-}
-
-/*
- * Perform chown operation on inode ip;
- * inode must be locked prior to call.
- */
-chown1(ip, uid, gid)
-       register struct inode *ip;
-       int uid, gid;
-{
+       if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 #ifdef QUOTA
 #ifdef QUOTA
-       register long change;
+               if (!getinoquota(ip))
+                       (void)chkiq(ip, -1, NOCRED, 0);
 #endif
 #endif
-
-       if (ip->i_fs->fs_ronly)
-               return (EROFS);
-       if (uid == -1)
-               uid = ip->i_uid;
-       if (gid == -1)
-               gid = ip->i_gid;
-#ifdef QUOTA
-       if (ip->i_uid == uid)           /* this just speeds things a little */
-               change = 0;
-       else
-               change = ip->i_blocks;
-       (void) chkdq(ip, -change, 1);
-       (void) chkiq(ip->i_dev, ip, ip->i_uid, 1);
-       dqrele(ip->i_dquot);
-#endif
-       ip->i_uid = uid;
-       ip->i_gid = gid;
-       ip->i_flag |= ICHG;
-       if (u.u_ruid != 0)
-               ip->i_mode &= ~(ISUID|ISGID);
-#ifdef QUOTA
-       ip->i_dquot = inoquota(ip);
-       (void) chkdq(ip, change, 1);
-       (void) chkiq(ip->i_dev, (struct inode *)NULL, uid, 1);
-       return (u.u_error);             /* should == 0 ALWAYS !! */
-#else
-       return (0);
-#endif
-}
-
-utimes()
-{
-       register struct a {
-               char    *fname;
-               struct  timeval *tptr;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip;
-       struct timeval tv[2];
-
-       if ((ip = owner(uap->fname, FOLLOW)) == NULL)
-               return;
-       if (ip->i_fs->fs_ronly) {
-               u.u_error = EROFS;
-               iput(ip);
-               return;
-       }
-       u.u_error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv));
-       if (u.u_error == 0) {
-               ip->i_flag |= IACC|IUPD|ICHG;
-               iupdat(ip, &tv[0], &tv[1], 0);
-       }
-       iput(ip);
-}
-
-/*
- * Flush any pending I/O.
- */
-sync()
-{
-
-       update();
-}
-
-/*
- * Truncate a file given its path name.
- */
-truncate()
-{
-       struct a {
-               char    *fname;
-               u_long  length;
-       } *uap = (struct a *)u.u_ap;
-       struct inode *ip;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = LOOKUP | FOLLOW;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->fname;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       if (access(ip, IWRITE))
-               goto bad;
-       if ((ip->i_mode&IFMT) == IFDIR) {
-               u.u_error = EISDIR;
-               goto bad;
-       }
-       itrunc(ip, uap->length);
-bad:
-       iput(ip);
-}
-
-/*
- * Truncate a file given a file descriptor.
- */
-ftruncate()
-{
-       struct a {
-               int     fd;
-               u_long  length;
-       } *uap = (struct a *)u.u_ap;
-       struct inode *ip;
-       struct file *fp;
-
-       fp = getinode(uap->fd);
-       if (fp == NULL)
-               return;
-       if ((fp->f_flag&FWRITE) == 0) {
-               u.u_error = EINVAL;
-               return;
+               error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+               mode = ip->i_mode;
+               ip->i_mode = 0;
+               ip->i_rdev = 0;
+               ip->i_flag |= IUPD|ICHG;
+               VOP_VFREE(vp, ip->i_number, mode);
        }
        }
-       ip = (struct inode *)fp->f_data;
-       ILOCK(ip);
-       itrunc(ip, uap->length);
-       IUNLOCK(ip);
-}
-
-/*
- * Synch an open file.
- */
-fsync()
-{
-       struct a {
-               int     fd;
-       } *uap = (struct a *)u.u_ap;
-       struct inode *ip;
-       struct file *fp;
-
-       fp = getinode(uap->fd);
-       if (fp == NULL)
-               return;
-       ip = (struct inode *)fp->f_data;
-       ILOCK(ip);
-       syncip(ip);
-       IUNLOCK(ip);
-}
-
-/*
- * Rename system call.
- *     rename("foo", "bar");
- * is essentially
- *     unlink("bar");
- *     link("foo", "bar");
- *     unlink("foo");
- * but ``atomically''.  Can't do full commit without saving state in the
- * inode on disk which isn't feasible at this time.  Best we can do is
- * always guarantee the target exists.
- *
- * Basic algorithm is:
- *
- * 1) Bump link count on source while we're linking it to the
- *    target.  This also insure the inode won't be deleted out
- *    from underneath us while we work (it may be truncated by
- *    a concurrent `trunc' or `open' for creation).
- * 2) Link source to destination.  If destination already exists,
- *    delete it first.
- * 3) Unlink source reference to inode if still around. If a
- *    directory was moved and the parent of the destination
- *    is different from the source, patch the ".." entry in the
- *    directory.
- *
- * Source and destination must either both be directories, or both
- * not be directories.  If target is a directory, it must be empty.
- */
-rename()
-{
-       struct a {
-               char    *from;
-               char    *to;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip, *xp, *dp;
-       struct dirtemplate dirbuf;
-       int doingdirectory = 0, oldparent = 0, newparent = 0;
-       register struct nameidata *ndp = &u.u_nd;
-       int error = 0;
-
-       ndp->ni_nameiop = DELETE | LOCKPARENT;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->from;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       dp = ndp->ni_pdir;
-       if ((ip->i_mode&IFMT) == IFDIR) {
-               register struct direct *d;
-
-               d = &ndp->ni_dent;
-               /*
-                * Avoid ".", "..", and aliases of "." for obvious reasons.
-                */
-               if ((d->d_namlen == 1 && d->d_name[0] == '.') ||
-                   (d->d_namlen == 2 && bcmp(d->d_name, "..", 2) == 0) ||
-                   (dp == ip) || (ip->i_flag & IRENAME)) {
-                       iput(dp);
-                       if (dp == ip)
-                               irele(ip);
-                       else
-                               iput(ip);
-                       u.u_error = EINVAL;
-                       return;
-               }
-               ip->i_flag |= IRENAME;
-               oldparent = dp->i_number;
-               doingdirectory++;
+       if (ip->i_flag&(IUPD|IACC|ICHG|IMOD)) {
+               tv = time;
+               VOP_UPDATE(vp, &tv, &tv, 0);
        }
        }
-       iput(dp);
-
-       /*
-        * 1) Bump link count while we're moving stuff
-        *    around.  If we crash somewhere before
-        *    completing our work, the link count
-        *    may be wrong, but correctable.
-        */
-       ip->i_nlink++;
-       ip->i_flag |= ICHG;
-       iupdat(ip, &time, &time, 1);
        IUNLOCK(ip);
        IUNLOCK(ip);
-
        /*
        /*
-        * When the target exists, both the directory
-        * and target inodes are returned locked.
+        * If we are done with the inode, reclaim it
+        * so that it can be reused immediately.
         */
         */
-       ndp->ni_nameiop = CREATE | LOCKPARENT | NOCACHE;
-       ndp->ni_dirp = (caddr_t)uap->to;
-       xp = namei(ndp);
-       if (u.u_error) {
-               error = u.u_error;
-               goto out;
-       }
-       dp = ndp->ni_pdir;
-       /*
-        * If ".." must be changed (ie the directory gets a new
-        * parent) then the source directory must not be in the
-        * directory heirarchy above the target, as this would
-        * orphan everything below the source directory. Also
-        * the user must have write permission in the source so
-        * as to be able to change "..". We must repeat the call 
-        * to namei, as the parent directory is unlocked by the
-        * call to checkpath().
-        */
-       if (oldparent != dp->i_number)
-               newparent = dp->i_number;
-       if (doingdirectory && newparent) {
-               if (access(ip, IWRITE))
-                       goto bad;
-               do {
-                       dp = ndp->ni_pdir;
-                       if (xp != NULL)
-                               iput(xp);
-                       u.u_error = checkpath(ip, dp);
-                       if (u.u_error)
-                               goto out;
-                       xp = namei(ndp);
-                       if (u.u_error) {
-                               error = u.u_error;
-                               goto out;
-                       }
-               } while (dp != ndp->ni_pdir);
-       }
-       /*
-        * 2) If target doesn't exist, link the target
-        *    to the source and unlink the source. 
-        *    Otherwise, rewrite the target directory
-        *    entry to reference the source inode and
-        *    expunge the original entry's existence.
-        */
-       if (xp == NULL) {
-               if (dp->i_dev != ip->i_dev) {
-                       error = EXDEV;
-                       goto bad;
-               }
-               /*
-                * Account for ".." in new directory.
-                * When source and destination have the same
-                * parent we don't fool with the link count.
-                */
-               if (doingdirectory && newparent) {
-                       dp->i_nlink++;
-                       dp->i_flag |= ICHG;
-                       iupdat(dp, &time, &time, 1);
-               }
-               error = direnter(ip, ndp);
-               if (error)
-                       goto out;
-       } else {
-               if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) {
-                       error = EXDEV;
-                       goto bad;
-               }
-               /*
-                * Short circuit rename(foo, foo).
-                */
-               if (xp->i_number == ip->i_number)
-                       goto bad;
-               /*
-                * If the parent directory is "sticky", then the user must
-                * own the parent directory, or the destination of the rename,
-                * otherwise the destination may not be changed (except by
-                * root). This implements append-only directories.
-                */
-               if ((dp->i_mode & ISVTX) && u.u_uid != 0 &&
-                   u.u_uid != dp->i_uid && xp->i_uid != u.u_uid) {
-                       error = EPERM;
-                       goto bad;
-               }
-               /*
-                * Target must be empty if a directory
-                * and have no links to it.
-                * Also, insure source and target are
-                * compatible (both directories, or both
-                * not directories).
-                */
-               if ((xp->i_mode&IFMT) == IFDIR) {
-                       if (!dirempty(xp, dp->i_number) || xp->i_nlink > 2) {
-                               error = ENOTEMPTY;
-                               goto bad;
-                       }
-                       if (!doingdirectory) {
-                               error = ENOTDIR;
-                               goto bad;
-                       }
-                       cacheinval(dp);
-               } else if (doingdirectory) {
-                       error = EISDIR;
-                       goto bad;
-               }
-               dirrewrite(dp, ip, ndp);
-               if (u.u_error) {
-                       error = u.u_error;
-                       goto bad1;
-               }
-               /*
-                * Adjust the link count of the target to
-                * reflect the dirrewrite above.  If this is
-                * a directory it is empty and there are
-                * no links to it, so we can squash the inode and
-                * any space associated with it.  We disallowed
-                * renaming over top of a directory with links to
-                * it above, as the remaining link would point to
-                * a directory without "." or ".." entries.
-                */
-               xp->i_nlink--;
-               if (doingdirectory) {
-                       if (--xp->i_nlink != 0)
-                               panic("rename: linked directory");
-                       itrunc(xp, (u_long)0);
-               }
-               xp->i_flag |= ICHG;
-               iput(xp);
-               xp = NULL;
-       }
-
-       /*
-        * 3) Unlink the source.
-        */
-       ndp->ni_nameiop = DELETE | LOCKPARENT;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->from;
-       xp = namei(ndp);
-       if (xp != NULL)
-               dp = ndp->ni_pdir;
-       else
-               dp = NULL;
-       /*
-        * Insure that the directory entry still exists and has not
-        * changed while the new name has been entered. If the source is
-        * a file then the entry may have been unlinked or renamed. In
-        * either case there is no further work to be done. If the source
-        * is a directory then it cannot have been rmdir'ed; its link
-        * count of three would cause a rmdir to fail with ENOTEMPTY.
-        * The IRENAME flag insures that it cannot be moved by another
-        * rename.
-        */
-       if (xp != ip) {
-               if (doingdirectory)
-                       panic("rename: lost dir entry");
-       } else {
-               /*
-                * If the source is a directory with a
-                * new parent, the link count of the old
-                * parent directory must be decremented
-                * and ".." set to point to the new parent.
-                */
-               if (doingdirectory && newparent) {
-                       dp->i_nlink--;
-                       dp->i_flag |= ICHG;
-                       error = rdwri(UIO_READ, xp, (caddr_t)&dirbuf,
-                               sizeof (struct dirtemplate), (off_t)0, 1,
-                               (int *)0);
-                       if (error == 0) {
-                               if (dirbuf.dotdot_namlen != 2 ||
-                                   dirbuf.dotdot_name[0] != '.' ||
-                                   dirbuf.dotdot_name[1] != '.') {
-                                       printf("rename: mangled dir\n");
-                               } else {
-                                       dirbuf.dotdot_ino = newparent;
-                                       (void) rdwri(UIO_WRITE, xp,
-                                           (caddr_t)&dirbuf,
-                                           sizeof (struct dirtemplate),
-                                           (off_t)0, 1, (int *)0);
-                                       cacheinval(dp);
-                               }
-                       }
-               }
-               if (dirremove(ndp)) {
-                       xp->i_nlink--;
-                       xp->i_flag |= ICHG;
-               }
-               xp->i_flag &= ~IRENAME;
-               if (error == 0)         /* XXX conservative */
-                       error = u.u_error;
-       }
-       if (dp)
-               iput(dp);
-       if (xp)
-               iput(xp);
-       irele(ip);
-       if (error)
-               u.u_error = error;
-       return;
-
-bad:
-       iput(dp);
-bad1:
-       if (xp)
-               iput(xp);
-out:
-       ip->i_nlink--;
-       ip->i_flag |= ICHG;
-       irele(ip);
-       if (error)
-               u.u_error = error;
-}
-
+       if (vp->v_usecount == 0 && ip->i_mode == 0)
+               vgone(vp);
+       return (error);
+}
+
 /*
 /*
- * Make a new file.
+ * These macros are used to bracket UFS directory ops, so that we can
+ * identify all the pages touched during directory ops which need to
+ * be ordered and flushed atomically, so that they may be recovered.
  */
  */
-struct inode *
-maknode(mode, ndp)
-       int mode;
-       register struct nameidata *ndp;
-{
-       register struct inode *ip;
-       register struct inode *pdir = ndp->ni_pdir;
-       ino_t ipref;
-
-       if ((mode & IFMT) == IFDIR)
-               ipref = dirpref(pdir->i_fs);
-       else
-               ipref = pdir->i_number;
-       ip = ialloc(pdir, ipref, mode);
-       if (ip == NULL) {
-               iput(pdir);
-               return (NULL);
-       }
-#ifdef QUOTA
-       if (ip->i_dquot != NODQUOT)
-               panic("maknode: dquot");
-#endif
-       ip->i_flag |= IACC|IUPD|ICHG;
-       if ((mode & IFMT) == 0)
-               mode |= IFREG;
-       ip->i_mode = mode & ~u.u_cmask;
-       ip->i_nlink = 1;
-       ip->i_uid = u.u_uid;
-       ip->i_gid = pdir->i_gid;
-       if (ip->i_mode & ISGID && !groupmember(ip->i_gid))
-               ip->i_mode &= ~ISGID;
-#ifdef QUOTA
-       ip->i_dquot = inoquota(ip);
-#endif
-
-       /*
-        * Make sure inode goes to disk before directory entry.
-        */
-       iupdat(ip, &time, &time, 1);
-       u.u_error = direnter(ip, ndp);
-       if (u.u_error) {
-               /*
-                * Write error occurred trying to update directory
-                * so must deallocate the inode.
-                */
-               ip->i_nlink = 0;
-               ip->i_flag |= ICHG;
-               iput(ip);
-               return (NULL);
-       }
-       return (ip);
+#define        SET_DIROP(fs) {                                                 \
+       if ((fs)->lfs_writer)                                           \
+               tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);  \
+       ++(fs)->lfs_dirops;                                             \
+       (fs)->lfs_doifile = 1;                                          \
 }
 
 }
 
-/*
- * A virgin directory (no blushing please).
- */
-struct dirtemplate mastertemplate = {
-       0, 12, 1, ".",
-       0, DIRBLKSIZ - 12, 2, ".."
-};
-
-/*
- * Mkdir system call
- */
-mkdir()
+#define        SET_ENDOP(fs) {                                                 \
+       --(fs)->lfs_dirops;                                             \
+       if (!(fs)->lfs_dirops)                                          \
+               wakeup(&(fs)->lfs_writer);                              \
+}
+
+#define        MARK_VNODE(dvp) (dvp)->v_flag |= VDIROP
+
+int
+lfs_symlink(ap)
+       struct vop_symlink_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+               char *a_target;
+       } */ *ap;
 {
 {
-       struct a {
-               char    *name;
-               int     dmode;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip, *dp;
-       struct dirtemplate dirtemplate;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = CREATE;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->name;
-       ip = namei(ndp);
-       if (u.u_error)
-               return;
-       if (ip != NULL) {
-               iput(ip);
-               u.u_error = EEXIST;
-               return;
-       }
-       dp = ndp->ni_pdir;
-       uap->dmode &= 0777;
-       uap->dmode |= IFDIR;
-       /*
-        * Must simulate part of maknode here
-        * in order to acquire the inode, but
-        * not have it entered in the parent
-        * directory.  The entry is made later
-        * after writing "." and ".." entries out.
-        */
-       ip = ialloc(dp, dirpref(dp->i_fs), uap->dmode);
-       if (ip == NULL) {
-               iput(dp);
-               return;
-       }
-#ifdef QUOTA
-       if (ip->i_dquot != NODQUOT)
-               panic("mkdir: dquot");
-#endif
-       ip->i_flag |= IACC|IUPD|ICHG;
-       ip->i_mode = uap->dmode & ~u.u_cmask;
-       ip->i_nlink = 2;
-       ip->i_uid = u.u_uid;
-       ip->i_gid = dp->i_gid;
-#ifdef QUOTA
-       ip->i_dquot = inoquota(ip);
-#endif
-       iupdat(ip, &time, &time, 1);
-
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_symlink(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_mknod(ap)
+       struct vop_mknod_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_mknod(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_create(ap)
+       struct vop_create_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_create(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_mkdir(ap)
+       struct vop_mkdir_args /* {
+               struct vnode *a_dvp;
+               struct vnode **a_vpp;
+               struct componentname *a_cnp;
+               struct vattr *a_vap;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       ret = ufs_mkdir(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_remove(ap)
+       struct vop_remove_args /* {
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_remove(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_rmdir(ap)
+       struct vop_rmdir_args /* {
+               struct vnodeop_desc *a_desc;
+               struct vnode *a_dvp;
+               struct vnode *a_vp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+       MARK_VNODE(ap->a_dvp);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_rmdir(ap);
+       SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_link(ap)
+       struct vop_link_args /* {
+               struct vnode *a_vp;
+               struct vnode *a_tdvp;
+               struct componentname *a_cnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_vp)->i_lfs);
+       MARK_VNODE(ap->a_vp);
+       ret = ufs_link(ap);
+       SET_ENDOP(VTOI(ap->a_vp)->i_lfs);
+       return (ret);
+}
+
+int
+lfs_rename(ap)
+       struct vop_rename_args  /* {
+               struct vnode *a_fdvp;
+               struct vnode *a_fvp;
+               struct componentname *a_fcnp;
+               struct vnode *a_tdvp;
+               struct vnode *a_tvp;
+               struct componentname *a_tcnp;
+       } */ *ap;
+{
+       int ret;
+
+       SET_DIROP(VTOI(ap->a_fdvp)->i_lfs);
+       MARK_VNODE(ap->a_fdvp);
+       MARK_VNODE(ap->a_tdvp);
+       ret = ufs_rename(ap);
+       SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
+       return (ret);
+}
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+       struct vop_getattr_args /* {
+               struct vnode *a_vp;
+               struct vattr *a_vap;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       register struct vattr *vap = ap->a_vap;
        /*
        /*
-        * Bump link count in parent directory
-        * to reflect work done below.  Should
-        * be done before reference is created
-        * so reparation is possible if we crash.
+        * Copy from inode table
         */
         */
-       dp->i_nlink++;
-       dp->i_flag |= ICHG;
-       iupdat(dp, &time, &time, 1);
-
-       /*
-        * Initialize directory with "."
-        * and ".." from static template.
-        */
-       dirtemplate = mastertemplate;
-       dirtemplate.dot_ino = ip->i_number;
-       dirtemplate.dotdot_ino = dp->i_number;
-       u.u_error = rdwri(UIO_WRITE, ip, (caddr_t)&dirtemplate,
-               sizeof (dirtemplate), (off_t)0, 1, (int *)0);
-       if (u.u_error) {
-               dp->i_nlink--;
-               dp->i_flag |= ICHG;
-               goto bad;
-       }
-       if (DIRBLKSIZ > ip->i_fs->fs_fsize)
-               panic("mkdir: blksize");     /* XXX - should grow with bmap() */
+       vap->va_fsid = ip->i_dev;
+       vap->va_fileid = ip->i_number;
+       vap->va_mode = ip->i_mode & ~IFMT;
+       vap->va_nlink = ip->i_nlink;
+       vap->va_uid = ip->i_uid;
+       vap->va_gid = ip->i_gid;
+       vap->va_rdev = (dev_t)ip->i_rdev;
+       vap->va_size = ip->i_din.di_size;
+       vap->va_atime = ip->i_atime;
+       vap->va_mtime = ip->i_mtime;
+       vap->va_ctime = ip->i_ctime;
+       vap->va_flags = ip->i_flags;
+       vap->va_gen = ip->i_gen;
+       /* this doesn't belong here */
+       if (vp->v_type == VBLK)
+               vap->va_blocksize = BLKDEV_IOSIZE;
+       else if (vp->v_type == VCHR)
+               vap->va_blocksize = MAXBSIZE;
        else
        else
-               ip->i_size = DIRBLKSIZ;
-       /*
-        * Directory all set up, now
-        * install the entry for it in
-        * the parent directory.
-        */
-       u.u_error = direnter(ip, ndp);
-       dp = NULL;
-       if (u.u_error) {
-               ndp->ni_nameiop = LOOKUP | NOCACHE;
-               ndp->ni_segflg = UIO_USERSPACE;
-               ndp->ni_dirp = uap->name;
-               dp = namei(ndp);
-               if (dp) {
-                       dp->i_nlink--;
-                       dp->i_flag |= ICHG;
-               }
-       }
-bad:
-       /*
-        * No need to do an explicit itrunc here,
-        * irele will do this for us because we set
-        * the link count to 0.
-        */
-       if (u.u_error) {
-               ip->i_nlink = 0;
-               ip->i_flag |= ICHG;
-       }
-       if (dp)
-               iput(dp);
-       iput(ip);
+               vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+       vap->va_bytes = dbtob(ip->i_blocks);
+       vap->va_type = vp->v_type;
+       vap->va_filerev = ip->i_modrev;
+       return (0);
 }
 }
-
 /*
 /*
- * Rmdir system call.
- */
-rmdir()
-{
-       struct a {
-               char    *name;
-       } *uap = (struct a *)u.u_ap;
-       register struct inode *ip, *dp;
-       register struct nameidata *ndp = &u.u_nd;
-
-       ndp->ni_nameiop = DELETE | LOCKPARENT;
-       ndp->ni_segflg = UIO_USERSPACE;
-       ndp->ni_dirp = uap->name;
-       ip = namei(ndp);
-       if (ip == NULL)
-               return;
-       dp = ndp->ni_pdir;
-       /*
-        * No rmdir "." please.
-        */
-       if (dp == ip) {
-               irele(dp);
-               iput(ip);
-               u.u_error = EINVAL;
-               return;
-       }
-       if ((ip->i_mode&IFMT) != IFDIR) {
-               u.u_error = ENOTDIR;
-               goto out;
-       }
-       /*
-        * Don't remove a mounted on directory.
-        */
-       if (ip->i_dev != dp->i_dev) {
-               u.u_error = EBUSY;
-               goto out;
-       }
-       /*
-        * Verify the directory is empty (and valid).
-        * (Rmdir ".." won't be valid since
-        *  ".." will contain a reference to
-        *  the current directory and thus be
-        *  non-empty.)
-        */
-       if (ip->i_nlink != 2 || !dirempty(ip, dp->i_number)) {
-               u.u_error = ENOTEMPTY;
-               goto out;
-       }
-       /*
-        * Delete reference to directory before purging
-        * inode.  If we crash in between, the directory
-        * will be reattached to lost+found,
-        */
-       if (dirremove(ndp) == 0)
-               goto out;
-       dp->i_nlink--;
-       dp->i_flag |= ICHG;
-       cacheinval(dp);
-       iput(dp);
-       dp = NULL;
-       /*
-        * Truncate inode.  The only stuff left
-        * in the directory is "." and "..".  The
-        * "." reference is inconsequential since
-        * we're quashing it.  The ".." reference
-        * has already been adjusted above.  We've
-        * removed the "." reference and the reference
-        * in the parent directory, but there may be
-        * other hard links so decrement by 2 and
-        * worry about them later.
-        */
-       ip->i_nlink -= 2;
-       itrunc(ip, (u_long)0);
-       cacheinval(ip);
-out:
-       if (dp)
-               iput(dp);
-       iput(ip);
-}
-
-struct file *
-getinode(fdes)
-       int fdes;
-{
-       struct file *fp;
-
-       if ((unsigned)fdes >= NOFILE || (fp = u.u_ofile[fdes]) == NULL) {
-               u.u_error = EBADF;
-               return ((struct file *)0);
-       }
-       if (fp->f_type != DTYPE_INODE) {
-               u.u_error = EINVAL;
-               return ((struct file *)0);
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+       struct vop_close_args /* {
+               struct vnode *a_vp;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       int mod;
+
+       if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED)) {
+               mod = ip->i_flag & IMOD;
+               ITIMES(ip, &time, &time);
+               if (!mod && ip->i_flag & IMOD)
+                       ip->i_lfs->lfs_uinodes++;
        }
        }
-       return (fp);
+       return (0);
 }
 
 }
 
-/*
- * mode mask for creation of files
- */
-umask()
-{
-       register struct a {
-               int     mask;
-       } *uap = (struct a *)u.u_ap;
-
-       u.u_r.r_val1 = u.u_cmask;
-       u.u_cmask = uap->mask & 07777;
-}