Do not lock inode during entire read/write operations (because if
[unix-history] / usr / src / sys / ufs / lfs / lfs_vnops.c
index 1bfc12e..94e5c03 100644 (file)
@@ -4,7 +4,7 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)lfs_vnops.c 7.88 (Berkeley) %G%
+ *     @(#)lfs_vnops.c 7.94 (Berkeley) %G%
  */
 
 #include <sys/param.h>
  */
 
 #include <sys/param.h>
@@ -29,6 +29,7 @@
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/lfs/lfs.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/lfs/lfs.h>
@@ -42,9 +43,9 @@ struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
        { &vop_create_desc, lfs_create },               /* create */
        { &vop_mknod_desc, lfs_mknod },                 /* mknod */
        { &vop_open_desc, ufs_open },                   /* open */
        { &vop_create_desc, lfs_create },               /* create */
        { &vop_mknod_desc, lfs_mknod },                 /* mknod */
        { &vop_open_desc, ufs_open },                   /* open */
-       { &vop_close_desc, ufs_close },                 /* close */
+       { &vop_close_desc, lfs_close },                 /* close */
        { &vop_access_desc, ufs_access },               /* access */
        { &vop_access_desc, ufs_access },               /* access */
-       { &vop_getattr_desc, ufs_getattr },             /* getattr */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, lfs_read },                   /* read */
        { &vop_write_desc, lfs_write },                 /* write */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, lfs_read },                   /* read */
        { &vop_write_desc, lfs_write },                 /* write */
@@ -91,7 +92,7 @@ struct vnodeopv_entry_desc lfs_specop_entries[] = {
        { &vop_open_desc, spec_open },                  /* open */
        { &vop_close_desc, ufsspec_close },             /* close */
        { &vop_access_desc, ufs_access },               /* access */
        { &vop_open_desc, spec_open },                  /* open */
        { &vop_close_desc, ufsspec_close },             /* close */
        { &vop_access_desc, ufs_access },               /* access */
-       { &vop_getattr_desc, ufs_getattr },             /* getattr */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, ufsspec_read },               /* read */
        { &vop_write_desc, ufsspec_write },             /* write */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, ufsspec_read },               /* read */
        { &vop_write_desc, ufsspec_write },             /* write */
@@ -139,7 +140,7 @@ struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
        { &vop_open_desc, fifo_open },                  /* open */
        { &vop_close_desc, ufsfifo_close },             /* close */
        { &vop_access_desc, ufs_access },               /* access */
        { &vop_open_desc, fifo_open },                  /* open */
        { &vop_close_desc, ufsfifo_close },             /* close */
        { &vop_access_desc, ufs_access },               /* access */
-       { &vop_getattr_desc, ufs_getattr },             /* getattr */
+       { &vop_getattr_desc, lfs_getattr },             /* getattr */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, ufsfifo_read },               /* read */
        { &vop_write_desc, ufsfifo_write },             /* write */
        { &vop_setattr_desc, ufs_setattr },             /* setattr */
        { &vop_read_desc, ufsfifo_read },               /* read */
        { &vop_write_desc, ufsfifo_write },             /* write */
@@ -194,7 +195,7 @@ lfs_read(ap)
        register struct inode *ip = VTOI(vp);
        register struct uio *uio = ap->a_uio;
        register struct lfs *fs;
        register struct inode *ip = VTOI(vp);
        register struct uio *uio = ap->a_uio;
        register struct lfs *fs;
-       struct buf *bp;
+       struct buf *bp1, *bp2;
        daddr_t lbn, bn, rablock;
        off_t diff;
        int error = 0, size;
        daddr_t lbn, bn, rablock;
        off_t diff;
        int error = 0, size;
@@ -217,34 +218,40 @@ lfs_read(ap)
            (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
                return (EFBIG);
        ip->i_flag |= IACC;
            (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
                return (EFBIG);
        ip->i_flag |= IACC;
+       bp1 = bp2 = NULL;
+       IUNLOCK(ip);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
                n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
                diff = ip->i_size - uio->uio_offset;
                if (diff <= 0)
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
                n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
                diff = ip->i_size - uio->uio_offset;
                if (diff <= 0)
-                       return (0);
+                       break;
                if (diff < n)
                        n = diff;
                size = blksize(fs);
                rablock = lbn + 1;
                if (diff < n)
                        n = diff;
                size = blksize(fs);
                rablock = lbn + 1;
+               lfs_check(vp, lbn);
                if (vp->v_lastr + 1 == lbn &&
                    lblktosize(fs, rablock) < ip->i_size)
                        error = breadn(ITOV(ip), lbn, size, &rablock,
                if (vp->v_lastr + 1 == lbn &&
                    lblktosize(fs, rablock) < ip->i_size)
                        error = breadn(ITOV(ip), lbn, size, &rablock,
-                               &size, 1, NOCRED, &bp);
+                               &size, 1, NOCRED, &bp1);
                else
                else
-                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp);
+                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp1);
+               if (bp2)
+                       brelse(bp2);
+               bp2 = bp1;
                vp->v_lastr = lbn;
                vp->v_lastr = lbn;
-               n = min(n, size - bp->b_resid);
-               if (error) {
-                       brelse(bp);
-                       return (error);
-               }
-               error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+               n = min(n, size - bp2->b_resid);
+               if (error)
+                       break;
+               error = uiomove(bp2->b_un.b_addr + on, (int)n, uio);
                if (n + on == fs->lfs_bsize || uio->uio_offset == ip->i_size)
                if (n + on == fs->lfs_bsize || uio->uio_offset == ip->i_size)
-                       bp->b_flags |= B_AGE;
-               brelse(bp);
+                       bp2->b_flags |= B_AGE;
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       if (bp2)
+               brelse(bp2);
+       ILOCK(ip);
        return (error);
 }
 
        return (error);
 }
 
@@ -266,7 +273,7 @@ lfs_write(ap)
        register struct lfs *fs;
        register ioflag = ap->a_ioflag;
        struct timeval tv;
        register struct lfs *fs;
        register ioflag = ap->a_ioflag;
        struct timeval tv;
-       struct buf *bp;
+       struct buf *bp1, *bp2;
        daddr_t lbn;
        off_t osize;
        int n, on, flags, newblock;
        daddr_t lbn;
        off_t osize;
        int n, on, flags, newblock;
@@ -313,16 +320,28 @@ lfs_write(ap)
        if (uio->uio_offset < 0 ||
            (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
                return (EFBIG);
        if (uio->uio_offset < 0 ||
            (u_quad_t)uio->uio_offset + uio->uio_resid > fs->lfs_maxfilesize)
                return (EFBIG);
-       flags = 0;
-#ifdef NOTLFS
-       if (ioflag & IO_SYNC)
-               flags = B_SYNC;
-#endif
+
+       /*
+        * XXX
+        * FFS uses the VOP_LOCK to provide serializability of multi-block
+        * reads and writes.  Since the cleaner may need to interrupt and
+        * clean a vnode, this isn't such a good idea for us.  We use 
+        * ordered locking instead.  Hold buffer N busy until buffer N+1
+        * has been obtained.  We get much better concurrency that way.
+        */
+       bp1 = bp2 = NULL;
+       IUNLOCK(ip);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
                n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
        do {
                lbn = lblkno(fs, uio->uio_offset);
                on = blkoff(fs, uio->uio_offset);
                n = min((unsigned)(fs->lfs_bsize - on), uio->uio_resid);
-               if (error = lfs_balloc(vp, n, lbn, &bp))
+               lfs_check(vp, lbn);
+               if (error = lfs_balloc(vp, n, lbn, &bp1))
+                       break;
+               if (bp2)
+                       error = VOP_BWRITE(bp2);
+               bp2 = NULL;
+               if (error)
                        break;
                if (uio->uio_offset + n > ip->i_size) {
                        ip->i_size = uio->uio_offset + n;
                        break;
                if (uio->uio_offset + n > ip->i_size) {
                        ip->i_size = uio->uio_offset + n;
@@ -330,34 +349,35 @@ lfs_write(ap)
                }
                size = blksize(fs);
                (void) vnode_pager_uncache(vp);
                }
                size = blksize(fs);
                (void) vnode_pager_uncache(vp);
-               n = min(n, size - bp->b_resid);
-               error = uiomove(bp->b_un.b_addr + on, n, uio);
-#ifdef NOTLFS                                                  /* LFS */
-               if (ioflag & IO_SYNC)
-                       (void) bwrite(bp);
-               else if (n + on == fs->fs_bsize) {
-                       bp->b_flags |= B_AGE;
-                       bawrite(bp);
-               } else
-                       bdwrite(bp);
-               ip->i_flag |= IUPD|ICHG;
-#else
-               /* XXX This doesn't handle IO_SYNC. */
-               LFS_UBWRITE(bp);
-#endif
+               n = min(n, size - bp1->b_resid);
+               error = uiomove(bp1->b_un.b_addr + on, n, uio);
+               /* XXX Why is this in the loop? */
                if (ap->a_cred->cr_uid != 0)
                        ip->i_mode &= ~(ISUID|ISGID);
                if (ap->a_cred->cr_uid != 0)
                        ip->i_mode &= ~(ISUID|ISGID);
+               bp2 = bp1;
+               bp1 = NULL;
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
        } while (error == 0 && uio->uio_resid > 0 && n != 0);
-       if (error && (ioflag & IO_UNIT)) {
-               (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
-                   uio->uio_procp);
-               uio->uio_offset -= resid - uio->uio_resid;
-               uio->uio_resid = resid;
-       }
+       if (bp1)
+               brelse(bp1);
+       if (bp2)
+               error = VOP_BWRITE(bp2);
+
+       if (error) {
+               if (ioflag & IO_UNIT) {
+                       (void)VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC,
+                           ap->a_cred, uio->uio_procp);
+                       uio->uio_offset -= resid - uio->uio_resid;
+                       uio->uio_resid = resid;
+               }
+       } 
+
        if (!error && (ioflag & IO_SYNC)) {
                tv = time;
        if (!error && (ioflag & IO_SYNC)) {
                tv = time;
-               error = VOP_UPDATE(vp, &tv, &tv, 1);
+               if (!(error = VOP_UPDATE(vp, &tv, &tv, 1)))
+                       error = VOP_FSYNC(vp, ap->a_cred, MNT_WAIT,
+                           uio->uio_procp);
        }
        }
+       ILOCK(ip);
        return (error);
 }
 
        return (error);
 }
 
@@ -375,11 +395,9 @@ lfs_fsync(ap)
 {
        struct timeval tv;
 
 {
        struct timeval tv;
 
-#ifdef VERBOSE
-       printf("lfs_fsync\n");
-#endif
        tv = time;
        tv = time;
-       return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
+       return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+           ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
 }
 
 /*
 }
 
 /*
@@ -398,9 +416,6 @@ lfs_inactive(ap)
        struct timeval tv;
        int mode, error;
 
        struct timeval tv;
        int mode, error;
 
-#ifdef VERBOSE
-       printf("lfs_inactive\n");
-#endif
        if (prtactive && vp->v_usecount != 0)
                vprint("lfs_inactive: pushing active", vp);
 
        if (prtactive && vp->v_usecount != 0)
                vprint("lfs_inactive: pushing active", vp);
 
@@ -431,7 +446,6 @@ lfs_inactive(ap)
                VOP_UPDATE(vp, &tv, &tv, 0);
        }
        IUNLOCK(ip);
                VOP_UPDATE(vp, &tv, &tv, 0);
        }
        IUNLOCK(ip);
-       ip->i_flag = 0;
        /*
         * If we are done with the inode, reclaim it
         * so that it can be reused immediately.
        /*
         * If we are done with the inode, reclaim it
         * so that it can be reused immediately.
@@ -447,22 +461,16 @@ lfs_inactive(ap)
  * be ordered and flushed atomically, so that they may be recovered.
  */
 #define        SET_DIROP(fs) {                                                 \
  * be ordered and flushed atomically, so that they may be recovered.
  */
 #define        SET_DIROP(fs) {                                                 \
-       int __s;                                                        \
-       __s = splbio();                                                 \
        if ((fs)->lfs_writer)                                           \
        if ((fs)->lfs_writer)                                           \
-               tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs dirop", 0);  \
+               tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);  \
        ++(fs)->lfs_dirops;                                             \
        (fs)->lfs_doifile = 1;                                          \
        ++(fs)->lfs_dirops;                                             \
        (fs)->lfs_doifile = 1;                                          \
-       splx(__s);                                                      \
 }
 
 #define        SET_ENDOP(fs) {                                                 \
 }
 
 #define        SET_ENDOP(fs) {                                                 \
-       int __s;                                                        \
-       __s = splbio();                                                 \
        --(fs)->lfs_dirops;                                             \
        if (!(fs)->lfs_dirops)                                          \
                wakeup(&(fs)->lfs_writer);                              \
        --(fs)->lfs_dirops;                                             \
        if (!(fs)->lfs_dirops)                                          \
                wakeup(&(fs)->lfs_writer);                              \
-       splx(__s);                                                      \
 }
 
 #define        MARK_VNODE(dvp) (dvp)->v_flag |= VDIROP
 }
 
 #define        MARK_VNODE(dvp) (dvp)->v_flag |= VDIROP
@@ -614,3 +622,74 @@ lfs_rename(ap)
        SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
        return (ret);
 }
        SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
        return (ret);
 }
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+       struct vop_getattr_args /* {
+               struct vnode *a_vp;
+               struct vattr *a_vap;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       register struct vattr *vap = ap->a_vap;
+       /*
+        * Copy from inode table
+        */
+       vap->va_fsid = ip->i_dev;
+       vap->va_fileid = ip->i_number;
+       vap->va_mode = ip->i_mode & ~IFMT;
+       vap->va_nlink = ip->i_nlink;
+       vap->va_uid = ip->i_uid;
+       vap->va_gid = ip->i_gid;
+       vap->va_rdev = (dev_t)ip->i_rdev;
+       vap->va_size = ip->i_din.di_size;
+       vap->va_atime = ip->i_atime;
+       vap->va_mtime = ip->i_mtime;
+       vap->va_ctime = ip->i_ctime;
+       vap->va_flags = ip->i_flags;
+       vap->va_gen = ip->i_gen;
+       /* this doesn't belong here */
+       if (vp->v_type == VBLK)
+               vap->va_blocksize = BLKDEV_IOSIZE;
+       else if (vp->v_type == VCHR)
+               vap->va_blocksize = MAXBSIZE;
+       else
+               vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+       vap->va_bytes = dbtob(ip->i_blocks);
+       vap->va_type = vp->v_type;
+       vap->va_filerev = ip->i_modrev;
+       return (0);
+}
+/*
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+       struct vop_close_args /* {
+               struct vnode *a_vp;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct inode *ip = VTOI(vp);
+       int mod;
+
+       if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED)) {
+               mod = ip->i_flag & IMOD;
+               ITIMES(ip, &time, &time);
+               if (!mod && ip->i_flag & IMOD)
+                       ip->i_lfs->lfs_uinodes++;
+       }
+       return (0);
+}
+