have to check that vnode is still associated with mount point before using it
[unix-history] / usr / src / sys / kern / vfs_bio.c
index a702845..205724a 100644 (file)
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
- *     @(#)vfs_bio.c   7.17 (Berkeley) %G%
+ *     @(#)vfs_bio.c   7.28 (Berkeley) %G%
  */
 
 #include "param.h"
 #include "user.h"
 #include "buf.h"
 #include "vnode.h"
  */
 
 #include "param.h"
 #include "user.h"
 #include "buf.h"
 #include "vnode.h"
+#include "specdev.h"
 #include "mount.h"
 #include "trace.h"
 #include "ucred.h"
 #include "mount.h"
 #include "trace.h"
 #include "ucred.h"
@@ -48,7 +49,7 @@ bread(vp, blkno, size, cred, bpp)
        *bpp = bp = getblk(vp, blkno, size);
 #endif SECSIZE
        if (bp->b_flags&(B_DONE|B_DELWRI)) {
        *bpp = bp = getblk(vp, blkno, size);
 #endif SECSIZE
        if (bp->b_flags&(B_DONE|B_DELWRI)) {
-               trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
+               trace(TR_BREADHIT, pack(vp, size), blkno);
                return (0);
        }
        bp->b_flags |= B_READ;
                return (0);
        }
        bp->b_flags |= B_READ;
@@ -59,7 +60,7 @@ bread(vp, blkno, size, cred, bpp)
                bp->b_rcred = cred;
        }
        VOP_STRATEGY(bp);
                bp->b_rcred = cred;
        }
        VOP_STRATEGY(bp);
-       trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
+       trace(TR_BREADMISS, pack(vp, size), blkno);
        u.u_ru.ru_inblock++;            /* pay for read */
        return (biowait(bp));
 }
        u.u_ru.ru_inblock++;            /* pay for read */
        return (biowait(bp));
 }
@@ -98,25 +99,22 @@ breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
                                bp->b_rcred = cred;
                        }
                        VOP_STRATEGY(bp);
                                bp->b_rcred = cred;
                        }
                        VOP_STRATEGY(bp);
-                       trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
-                           blkno);
+                       trace(TR_BREADMISS, pack(vp, size), blkno);
                        u.u_ru.ru_inblock++;            /* pay for read */
                } else
                        u.u_ru.ru_inblock++;            /* pay for read */
                } else
-                       trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
-                           blkno);
+                       trace(TR_BREADHIT, pack(vp, size), blkno);
        }
 
        /*
         * If there's a read-ahead block, start i/o
         * on it also (as above).
         */
        }
 
        /*
         * If there's a read-ahead block, start i/o
         * on it also (as above).
         */
-       if (rablkno && !incore(vp, rablkno)) {
+       if (!incore(vp, rablkno)) {
                rabp = getblk(vp, rablkno, rabsize);
 #endif SECSIZE
                if (rabp->b_flags & (B_DONE|B_DELWRI)) {
                        brelse(rabp);
                rabp = getblk(vp, rablkno, rabsize);
 #endif SECSIZE
                if (rabp->b_flags & (B_DONE|B_DELWRI)) {
                        brelse(rabp);
-                       trace(TR_BREADHITRA,
-                           pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
+                       trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
                } else {
                        rabp->b_flags |= B_READ|B_ASYNC;
                        if (rabp->b_bcount > rabp->b_bufsize)
                } else {
                        rabp->b_flags |= B_READ|B_ASYNC;
                        if (rabp->b_bcount > rabp->b_bufsize)
@@ -126,8 +124,7 @@ breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
                                rabp->b_rcred = cred;
                        }
                        VOP_STRATEGY(rabp);
                                rabp->b_rcred = cred;
                        }
                        VOP_STRATEGY(rabp);
-                       trace(TR_BREADMISSRA,
-                           pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
+                       trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
                        u.u_ru.ru_inblock++;            /* pay in advance */
                }
        }
                        u.u_ru.ru_inblock++;            /* pay in advance */
                }
        }
@@ -153,16 +150,20 @@ bwrite(bp)
        register struct buf *bp;
 {
        register int flag;
        register struct buf *bp;
 {
        register int flag;
-       int error;
+       int s, error;
 
        flag = bp->b_flags;
        bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
        if ((flag&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
 
        flag = bp->b_flags;
        bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
        if ((flag&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
-       trace(TR_BWRITE,
-           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_lblkno);
+       else
+               reassignbuf(bp, bp->b_vp);
+       trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
        if (bp->b_bcount > bp->b_bufsize)
                panic("bwrite");
        if (bp->b_bcount > bp->b_bufsize)
                panic("bwrite");
+       s = splbio();
+       bp->b_vp->v_numoutput++;
+       splx(s);
        VOP_STRATEGY(bp);
 
        /*
        VOP_STRATEGY(bp);
 
        /*
@@ -192,8 +193,11 @@ bdwrite(bp)
        register struct buf *bp;
 {
 
        register struct buf *bp;
 {
 
-       if ((bp->b_flags&B_DELWRI) == 0)
+       if ((bp->b_flags & B_DELWRI) == 0) {
+               bp->b_flags |= B_DELWRI;
+               reassignbuf(bp, bp->b_vp);
                u.u_ru.ru_oublock++;            /* noone paid yet */
                u.u_ru.ru_oublock++;            /* noone paid yet */
+       }
        /*
         * If this is a tape drive, the write must be initiated.
         */
        /*
         * If this is a tape drive, the write must be initiated.
         */
@@ -225,8 +229,7 @@ brelse(bp)
        register struct buf *flist;
        register s;
 
        register struct buf *flist;
        register s;
 
-       trace(TR_BRELSE,
-           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
+       trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
        /*
         * If a process is waiting for the buffer, or
         * is waiting for a free buffer, awaken it.
        /*
         * If a process is waiting for the buffer, or
         * is waiting for a free buffer, awaken it.
@@ -376,8 +379,9 @@ loop:
                        splx(s);
                        goto loop;
                }
                        splx(s);
                        goto loop;
                }
+               bremfree(bp);
+               bp->b_flags |= B_BUSY;
                splx(s);
                splx(s);
-               notavail(bp);
                if (bp->b_bcount != size) {
                        printf("getblk: stray size");
                        bp->b_flags |= B_INVAL;
                if (bp->b_bcount != size) {
                        printf("getblk: stray size");
                        bp->b_flags |= B_INVAL;
@@ -470,15 +474,15 @@ loop:
                splx(s);
                goto loop;
        }
                splx(s);
                goto loop;
        }
-       splx(s);
        bp = dp->av_forw;
        bp = dp->av_forw;
-       notavail(bp);
+       bremfree(bp);
+       bp->b_flags |= B_BUSY;
+       splx(s);
        if (bp->b_flags & B_DELWRI) {
                (void) bawrite(bp);
                goto loop;
        }
        if (bp->b_flags & B_DELWRI) {
                (void) bawrite(bp);
                goto loop;
        }
-       trace(TR_BRELSE,
-           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
+       trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
        if (bp->b_vp)
                brelvp(bp);
        if (bp->b_rcred != NOCRED) {
        if (bp->b_vp)
                brelvp(bp);
        if (bp->b_rcred != NOCRED) {
@@ -528,12 +532,23 @@ biowait(bp)
 biodone(bp)
        register struct buf *bp;
 {
 biodone(bp)
        register struct buf *bp;
 {
+       register struct vnode *vp;
 
        if (bp->b_flags & B_DONE)
                panic("dup biodone");
        bp->b_flags |= B_DONE;
 
        if (bp->b_flags & B_DONE)
                panic("dup biodone");
        bp->b_flags |= B_DONE;
-       if ((bp->b_flags & B_READ) == 0)
+       if ((bp->b_flags & B_READ) == 0) {
                bp->b_dirtyoff = bp->b_dirtyend = 0;
                bp->b_dirtyoff = bp->b_dirtyend = 0;
+               if (vp = bp->b_vp) {
+                       vp->v_numoutput--;
+                       if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+                               if (vp->v_numoutput < 0)
+                                       panic("biodone: neg numoutput");
+                               vp->v_flag &= ~VBWAIT;
+                               wakeup((caddr_t)&vp->v_numoutput);
+                       }
+               }
+       }
        if (bp->b_flags & B_CALL) {
                bp->b_flags &= ~B_CALL;
                (*bp->b_iodone)(bp);
        if (bp->b_flags & B_CALL) {
                bp->b_flags &= ~B_CALL;
                (*bp->b_iodone)(bp);
@@ -547,69 +562,6 @@ biodone(bp)
        }
 }
 
        }
 }
 
-/*
- * Ensure that no part of a specified block is in an incore buffer.
-#ifdef SECSIZE
- * "size" is given in device blocks (the units of b_lblkno).
-#endif SECSIZE
-#ifdef SECSIZE
- * "size" is given in device blocks (the units of b_blkno).
-#endif SECSIZE
- */
-blkflush(vp, blkno, size)
-       struct vnode *vp;
-       daddr_t blkno;
-#ifdef SECSIZE
-       int size;
-#else SECSIZE
-       long size;
-#endif SECSIZE
-{
-       register struct buf *ep;
-       struct buf *dp;
-       daddr_t curblk, nextblk, ecurblk, lastblk;
-       int s, error, allerrors = 0;
-
-       /*
-        * Iterate through each possible hash chain.
-        */
-       lastblk = blkno + btodb(size) - 1;
-       for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
-#if RND & (RND-1)
-               nextblk = ((curblk / RND) + 1) * RND;
-#else
-               nextblk = ((curblk & ~(RND-1)) + RND);
-#endif
-               ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
-               dp = BUFHASH(vp, curblk);
-loop:
-               for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
-                       if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
-                               continue;
-                       /* look for overlap */
-                       if (ep->b_bcount == 0 || ep->b_lblkno > ecurblk ||
-                           ep->b_lblkno + btodb(ep->b_bcount) <= curblk)
-                               continue;
-                       s = splbio();
-                       if (ep->b_flags&B_BUSY) {
-                               ep->b_flags |= B_WANTED;
-                               sleep((caddr_t)ep, PRIBIO+1);
-                               splx(s);
-                               goto loop;
-                       }
-                       if (ep->b_flags & B_DELWRI) {
-                               splx(s);
-                               notavail(ep);
-                               if (error = bwrite(ep))
-                                       allerrors = error;
-                               goto loop;
-                       }
-                       splx(s);
-               }
-       }
-       return (allerrors);
-}
-
 /*
  * Make sure all write-behind blocks associated
  * with mount point are flushed out (from sync).
 /*
  * Make sure all write-behind blocks associated
  * with mount point are flushed out (from sync).
@@ -619,15 +571,17 @@ mntflushbuf(mountp, flags)
        int flags;
 {
        register struct vnode *vp;
        int flags;
 {
        register struct vnode *vp;
-       struct vnode *nvp;
 
 
+       if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
+               panic("mntflushbuf: not busy");
 loop:
 loop:
-       for (vp = mountp->m_mounth; vp; vp = nvp) {
-               nvp = vp->v_mountf;
+       for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
                if (vget(vp))
                        goto loop;
                vflushbuf(vp, flags);
                vput(vp);
                if (vget(vp))
                        goto loop;
                vflushbuf(vp, flags);
                vput(vp);
+               if (vp->v_mount != mountp)
+                       goto loop;
        }
 }
 
        }
 }
 
@@ -644,36 +598,42 @@ vflushbuf(vp, flags)
 
 loop:
        s = splbio();
 
 loop:
        s = splbio();
-       for (bp = vp->v_blockh; bp; bp = nbp) {
+       for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
                nbp = bp->b_blockf;
                if ((bp->b_flags & B_BUSY))
                        continue;
                if ((bp->b_flags & B_DELWRI) == 0)
                nbp = bp->b_blockf;
                if ((bp->b_flags & B_BUSY))
                        continue;
                if ((bp->b_flags & B_DELWRI) == 0)
-                       continue;
+                       panic("vflushbuf: not dirty");
+               bremfree(bp);
+               bp->b_flags |= B_BUSY;
                splx(s);
                splx(s);
-               notavail(bp);
-               (void) bawrite(bp);
-               goto loop;
+               /*
+                * Wait for I/O associated with indirect blocks to complete,
+                * since there is no way to quickly wait for them below.
+                * NB - This is really specific to ufs, but is done here
+                * as it is easier and quicker.
+                */
+               if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
+                       (void) bawrite(bp);
+                       s = splbio();
+               } else {
+                       (void) bwrite(bp);
+                       goto loop;
+               }
        }
        splx(s);
        if ((flags & B_SYNC) == 0)
                return;
        }
        splx(s);
        if ((flags & B_SYNC) == 0)
                return;
-wloop:
        s = splbio();
        s = splbio();
-       for (bp = vp->v_blockh; bp; bp = nbp) {
-               nbp = bp->b_blockf;
-               if (bp->b_flags & B_BUSY) {
-                       bp->b_flags |= B_WANTED;
-                       sleep((caddr_t)bp, PRIBIO+1);
-                       splx(s);
-                       goto wloop;
-               }
-               if ((bp->b_flags & B_DELWRI)) {
-                       splx(s);
-                       goto loop;
-               }
+       while (vp->v_numoutput) {
+               vp->v_flag |= VBWAIT;
+               sleep((caddr_t)&vp->v_numoutput, PRIBIO+1);
        }
        splx(s);
        }
        splx(s);
+       if (vp->v_dirtyblkhd) {
+               vprint("vflushbuf: dirty", vp);
+               goto loop;
+       }
 }
 
 /*
 }
 
 /*
@@ -689,16 +649,18 @@ mntinvalbuf(mountp)
        struct mount *mountp;
 {
        register struct vnode *vp;
        struct mount *mountp;
 {
        register struct vnode *vp;
-       struct vnode *nvp;
        int dirty = 0;
 
        int dirty = 0;
 
+       if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
+               panic("mntinvalbuf: not busy");
 loop:
 loop:
-       for (vp = mountp->m_mounth; vp; vp = nvp) {
-               nvp = vp->v_mountf;
+       for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
                if (vget(vp))
                        goto loop;
                dirty += vinvalbuf(vp, 1);
                vput(vp);
                if (vget(vp))
                        goto loop;
                dirty += vinvalbuf(vp, 1);
                vput(vp);
+               if (vp->v_mount != mountp)
+                       goto loop;
        }
        return (dirty);
 }
        }
        return (dirty);
 }
@@ -712,32 +674,41 @@ vinvalbuf(vp, save)
        int save;
 {
        register struct buf *bp;
        int save;
 {
        register struct buf *bp;
-       struct buf *nbp;
+       struct buf *nbp, *blist;
        int s, dirty = 0;
 
        int s, dirty = 0;
 
-loop:
-       for (bp = vp->v_blockh; bp; bp = nbp) {
-               nbp = bp->b_blockf;
-               s = splbio();
-               if (bp->b_flags & B_BUSY) {
-                       bp->b_flags |= B_WANTED;
-                       sleep((caddr_t)bp, PRIBIO+1);
+       for (;;) {
+               if (blist = vp->v_dirtyblkhd)
+                       /* void */;
+               else if (blist = vp->v_cleanblkhd)
+                       /* void */;
+               else
+                       break;
+               for (bp = blist; bp; bp = nbp) {
+                       nbp = bp->b_blockf;
+                       s = splbio();
+                       if (bp->b_flags & B_BUSY) {
+                               bp->b_flags |= B_WANTED;
+                               sleep((caddr_t)bp, PRIBIO+1);
+                               splx(s);
+                               break;
+                       }
+                       bremfree(bp);
+                       bp->b_flags |= B_BUSY;
                        splx(s);
                        splx(s);
-                       goto loop;
-               }
-               splx(s);
-               notavail(bp);
-               if (save) {
-                       if (bp->b_flags & B_DELWRI) {
+                       if (save && (bp->b_flags & B_DELWRI)) {
                                dirty++;
                                (void) bwrite(bp);
                                dirty++;
                                (void) bwrite(bp);
-                               goto loop;
+                               break;
                        }
                        }
+                       if (bp->b_vp != vp)
+                               reassignbuf(bp, bp->b_vp);
+                       else
+                               bp->b_flags |= B_INVAL;
+                       brelse(bp);
                }
                }
-               bp->b_flags |= B_INVAL;
-               brelse(bp);
        }
        }
-       if (vp->v_blockh != 0)
+       if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
                panic("vinvalbuf: flush failed");
        return (dirty);
 }
                panic("vinvalbuf: flush failed");
        return (dirty);
 }
@@ -752,7 +723,7 @@ bgetvp(vp, bp)
 
        if (bp->b_vp)
                panic("bgetvp: not free");
 
        if (bp->b_vp)
                panic("bgetvp: not free");
-       VREF(vp);
+       VHOLD(vp);
        bp->b_vp = vp;
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                bp->b_dev = vp->v_rdev;
        bp->b_vp = vp;
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                bp->b_dev = vp->v_rdev;
@@ -761,14 +732,14 @@ bgetvp(vp, bp)
        /*
         * Insert onto list for new vnode.
         */
        /*
         * Insert onto list for new vnode.
         */
-       if (vp->v_blockh) {
-               bp->b_blockf = vp->v_blockh;
-               bp->b_blockb = &vp->v_blockh;
-               vp->v_blockh->b_blockb = &bp->b_blockf;
-               vp->v_blockh = bp;
+       if (vp->v_cleanblkhd) {
+               bp->b_blockf = vp->v_cleanblkhd;
+               bp->b_blockb = &vp->v_cleanblkhd;
+               vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
+               vp->v_cleanblkhd = bp;
        } else {
        } else {
-               vp->v_blockh = bp;
-               bp->b_blockb = &vp->v_blockh;
+               vp->v_cleanblkhd = bp;
+               bp->b_blockb = &vp->v_cleanblkhd;
                bp->b_blockf = NULL;
        }
 }
                bp->b_blockf = NULL;
        }
 }
@@ -796,7 +767,7 @@ brelvp(bp)
        }
        vp = bp->b_vp;
        bp->b_vp = (struct vnode *) 0;
        }
        vp = bp->b_vp;
        bp->b_vp = (struct vnode *) 0;
-       vrele(vp);
+       HOLDRELE(vp);
 }
 
 /*
 }
 
 /*
@@ -808,8 +779,10 @@ reassignbuf(bp, newvp)
        register struct buf *bp;
        register struct vnode *newvp;
 {
        register struct buf *bp;
        register struct vnode *newvp;
 {
-       register struct buf *bq;
+       register struct buf *bq, **listheadp;
 
 
+       if (newvp == NULL)
+               panic("reassignbuf: NULL");
        /*
         * Delete from old vnode list, if on one.
         */
        /*
         * Delete from old vnode list, if on one.
         */
@@ -819,16 +792,21 @@ reassignbuf(bp, newvp)
                *bp->b_blockb = bq;
        }
        /*
                *bp->b_blockb = bq;
        }
        /*
-        * Insert onto list for new vnode.
+        * If dirty, put on list of dirty buffers;
+        * otherwise insert onto list of clean buffers.
         */
         */
-       if (newvp->v_blockh) {
-               bp->b_blockf = newvp->v_blockh;
-               bp->b_blockb = &newvp->v_blockh;
-               newvp->v_blockh->b_blockb = &bp->b_blockf;
-               newvp->v_blockh = bp;
+       if (bp->b_flags & B_DELWRI)
+               listheadp = &newvp->v_dirtyblkhd;
+       else
+               listheadp = &newvp->v_cleanblkhd;
+       if (*listheadp) {
+               bp->b_blockf = *listheadp;
+               bp->b_blockb = listheadp;
+               bp->b_blockf->b_blockb = &bp->b_blockf;
+               *listheadp = bp;
        } else {
        } else {
-               newvp->v_blockh = bp;
-               bp->b_blockb = &newvp->v_blockh;
+               *listheadp = bp;
+               bp->b_blockb = listheadp;
                bp->b_blockf = NULL;
        }
 }
                bp->b_blockf = NULL;
        }
 }