generalize the buffer pool so that NFS can become a client
[unix-history] / usr / src / sys / kern / vfs_cluster.c
index 4f74f8f..362ad50 100644 (file)
@@ -1,36 +1,38 @@
 /*
 /*
- * Copyright (c) 1982, 1986 Regents of the University of California.
- * All rights reserved.  The Berkeley software License Agreement
- * specifies the terms and conditions for redistribution.
+ * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
+ * All rights reserved.
  *
  *
- *     @(#)vfs_cluster.c       7.3 (Berkeley) %G%
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *     @(#)vfs_cluster.c       7.10 (Berkeley) %G%
  */
 
  */
 
-#include "../machine/pte.h"
-
 #include "param.h"
 #include "param.h"
-#include "systm.h"
-#include "dir.h"
 #include "user.h"
 #include "buf.h"
 #include "user.h"
 #include "buf.h"
-#include "conf.h"
-#include "proc.h"
-#include "seg.h"
-#include "vm.h"
+#include "vnode.h"
 #include "trace.h"
 #include "trace.h"
+#include "ucred.h"
 
 /*
  * Read in (if necessary) the block and return a buffer pointer.
  */
 
 /*
  * Read in (if necessary) the block and return a buffer pointer.
  */
-struct buf *
-#ifdef SECSIZE
-bread(dev, blkno, size, secsize)
-#else SECSIZE
-bread(dev, blkno, size)
-#endif SECSIZE
-       dev_t dev;
+bread(vp, blkno, size, cred, bpp)
+       struct vnode *vp;
        daddr_t blkno;
        int size;
        daddr_t blkno;
        int size;
+       struct ucred *cred;
+       struct buf **bpp;
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
@@ -42,38 +44,38 @@ bread(dev, blkno, size)
 #ifdef SECSIZE
        bp = getblk(dev, blkno, size, secsize);
 #else SECSIZE
 #ifdef SECSIZE
        bp = getblk(dev, blkno, size, secsize);
 #else SECSIZE
-       bp = getblk(dev, blkno, size);
+       *bpp = bp = getblk(vp, blkno, size);
 #endif SECSIZE
        if (bp->b_flags&(B_DONE|B_DELWRI)) {
 #endif SECSIZE
        if (bp->b_flags&(B_DONE|B_DELWRI)) {
-               trace(TR_BREADHIT, pack(dev, size), blkno);
-               return (bp);
+               trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
+               return (0);
        }
        bp->b_flags |= B_READ;
        if (bp->b_bcount > bp->b_bufsize)
                panic("bread");
        }
        bp->b_flags |= B_READ;
        if (bp->b_bcount > bp->b_bufsize)
                panic("bread");
-       (*bdevsw[major(dev)].d_strategy)(bp);
-       trace(TR_BREADMISS, pack(dev, size), blkno);
+       if (bp->b_rcred == NOCRED && cred != NOCRED) {
+               crhold(cred);
+               bp->b_rcred = cred;
+       }
+       VOP_STRATEGY(bp);
+       trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
        u.u_ru.ru_inblock++;            /* pay for read */
        u.u_ru.ru_inblock++;            /* pay for read */
-       biowait(bp);
-       return (bp);
+       return (biowait(bp));
 }
 
 /*
  * Read in the block, like bread, but also start I/O on the
  * read-ahead block (which is not allocated to the caller)
  */
 }
 
 /*
  * Read in the block, like bread, but also start I/O on the
  * read-ahead block (which is not allocated to the caller)
  */
-struct buf *
-#ifdef SECSIZE
-breada(dev, blkno, size, secsize, rablkno, rabsize)
-#else SECSIZE
-breada(dev, blkno, size, rablkno, rabsize)
-#endif SECSIZE
-       dev_t dev;
+breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
+       struct vnode *vp;
        daddr_t blkno; int size;
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
        daddr_t rablkno; int rabsize;
        daddr_t blkno; int size;
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
        daddr_t rablkno; int rabsize;
+       struct ucred *cred;
+       struct buf **bpp;
 {
        register struct buf *bp, *rabp;
 
 {
        register struct buf *bp, *rabp;
 
@@ -83,42 +85,48 @@ breada(dev, blkno, size, rablkno, rabsize)
         * a buffer and initiate i/o (getblk checks
         * for a cache hit).
         */
         * a buffer and initiate i/o (getblk checks
         * for a cache hit).
         */
-       if (!incore(dev, blkno)) {
-#ifdef SECSIZE
-               bp = getblk(dev, blkno, size, secsize);
-#else SECSIZE
-               bp = getblk(dev, blkno, size);
+       if (!incore(vp, blkno)) {
+               *bpp = bp = getblk(vp, blkno, size);
 #endif SECSIZE
                if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
                        bp->b_flags |= B_READ;
                        if (bp->b_bcount > bp->b_bufsize)
                                panic("breada");
 #endif SECSIZE
                if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
                        bp->b_flags |= B_READ;
                        if (bp->b_bcount > bp->b_bufsize)
                                panic("breada");
-                       (*bdevsw[major(dev)].d_strategy)(bp);
-                       trace(TR_BREADMISS, pack(dev, size), blkno);
+                       if (bp->b_rcred == NOCRED && cred != NOCRED) {
+                               crhold(cred);
+                               bp->b_rcred = cred;
+                       }
+                       VOP_STRATEGY(bp);
+                       trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
+                           blkno);
                        u.u_ru.ru_inblock++;            /* pay for read */
                } else
                        u.u_ru.ru_inblock++;            /* pay for read */
                } else
-                       trace(TR_BREADHIT, pack(dev, size), blkno);
+                       trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
+                           blkno);
        }
 
        /*
         * If there's a read-ahead block, start i/o
         * on it also (as above).
         */
        }
 
        /*
         * If there's a read-ahead block, start i/o
         * on it also (as above).
         */
-       if (rablkno && !incore(dev, rablkno)) {
-#ifdef SECSIZE
-               rabp = getblk(dev, rablkno, rabsize, secsize);
-#else SECSIZE
-               rabp = getblk(dev, rablkno, rabsize);
+       if (rablkno && !incore(vp, rablkno)) {
+               rabp = getblk(vp, rablkno, rabsize);
 #endif SECSIZE
                if (rabp->b_flags & (B_DONE|B_DELWRI)) {
                        brelse(rabp);
 #endif SECSIZE
                if (rabp->b_flags & (B_DONE|B_DELWRI)) {
                        brelse(rabp);
-                       trace(TR_BREADHITRA, pack(dev, rabsize), blkno);
+                       trace(TR_BREADHITRA,
+                           pack(vp->v_mount->m_fsid[0], rabsize), blkno);
                } else {
                        rabp->b_flags |= B_READ|B_ASYNC;
                        if (rabp->b_bcount > rabp->b_bufsize)
                                panic("breadrabp");
                } else {
                        rabp->b_flags |= B_READ|B_ASYNC;
                        if (rabp->b_bcount > rabp->b_bufsize)
                                panic("breadrabp");
-                       (*bdevsw[major(dev)].d_strategy)(rabp);
-                       trace(TR_BREADMISSRA, pack(dev, rabsize), rablock);
+                       if (bp->b_rcred == NOCRED && cred != NOCRED) {
+                               crhold(cred);
+                               bp->b_rcred = cred;
+                       }
+                       VOP_STRATEGY(rabp);
+                       trace(TR_BREADMISSRA,
+                           pack(vp->v_mount->m_fsid[0], rabsize), rablock);
                        u.u_ru.ru_inblock++;            /* pay in advance */
                }
        }
                        u.u_ru.ru_inblock++;            /* pay in advance */
                }
        }
@@ -132,10 +140,8 @@ breada(dev, blkno, size, rablkno, rabsize)
 #ifdef SECSIZE
                return (bread(dev, blkno, size, secsize));
 #else SECSIZE
 #ifdef SECSIZE
                return (bread(dev, blkno, size, secsize));
 #else SECSIZE
-               return (bread(dev, blkno, size));
-#endif SECSIZE
-       biowait(bp);
-       return (bp);
+               return (bread(vp, blkno, size, cred, bpp));
+       return (biowait(bp));
 }
 
 /*
 }
 
 /*
@@ -145,16 +151,18 @@ breada(dev, blkno, size, rablkno, rabsize)
 bwrite(bp)
        register struct buf *bp;
 {
 bwrite(bp)
        register struct buf *bp;
 {
-       register flag;
+       register int flag;
+       int error;
 
        flag = bp->b_flags;
        bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
        if ((flag&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
 
        flag = bp->b_flags;
        bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
        if ((flag&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
-       trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno);
+       trace(TR_BWRITE,
+           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno);
        if (bp->b_bcount > bp->b_bufsize)
                panic("bwrite");
        if (bp->b_bcount > bp->b_bufsize)
                panic("bwrite");
-       (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
+       VOP_STRATEGY(bp);
 
        /*
         * If the write was synchronous, then await i/o completion.
 
        /*
         * If the write was synchronous, then await i/o completion.
@@ -162,10 +170,13 @@ bwrite(bp)
         * the q of blocks awaiting i/o completion status.
         */
        if ((flag&B_ASYNC) == 0) {
         * the q of blocks awaiting i/o completion status.
         */
        if ((flag&B_ASYNC) == 0) {
-               biowait(bp);
+               error = biowait(bp);
                brelse(bp);
                brelse(bp);
-       } else if (flag & B_DELWRI)
+       } else if (flag & B_DELWRI) {
                bp->b_flags |= B_AGE;
                bp->b_flags |= B_AGE;
+               error = 0;
+       }
+       return (error);
 }
 
 /*
 }
 
 /*
@@ -182,12 +193,22 @@ bdwrite(bp)
 
        if ((bp->b_flags&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
 
        if ((bp->b_flags&B_DELWRI) == 0)
                u.u_ru.ru_oublock++;            /* noone paid yet */
+#ifdef notdef
+       /*
+        * This does not work for buffers associated with
+        * vnodes that are remote - they have no dev.
+        * Besides, we don't use bio with tapes, so rather
+        * than develop a fix, we just ifdef this out for now.
+        */
        if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
                bawrite(bp);
        else {
                bp->b_flags |= B_DELWRI | B_DONE;
                brelse(bp);
        }
        if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
                bawrite(bp);
        else {
                bp->b_flags |= B_DELWRI | B_DONE;
                brelse(bp);
        }
+#endif
+       bp->b_flags |= B_DELWRI | B_DONE;
+       brelse(bp);
 }
 
 /*
 }
 
 /*
@@ -198,7 +219,7 @@ bawrite(bp)
 {
 
        bp->b_flags |= B_ASYNC;
 {
 
        bp->b_flags |= B_ASYNC;
-       bwrite(bp);
+       (void) bwrite(bp);
 }
 
 /*
 }
 
 /*
@@ -210,7 +231,8 @@ brelse(bp)
        register struct buf *flist;
        register s;
 
        register struct buf *flist;
        register s;
 
-       trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno);
+       trace(TR_BRELSE,
+           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
        /*
         * If someone's waiting for the buffer, or
         * is waiting for a buffer wake 'em up.
        /*
         * If someone's waiting for the buffer, or
         * is waiting for a buffer wake 'em up.
@@ -221,11 +243,14 @@ brelse(bp)
                bfreelist[0].b_flags &= ~B_WANTED;
                wakeup((caddr_t)bfreelist);
        }
                bfreelist[0].b_flags &= ~B_WANTED;
                wakeup((caddr_t)bfreelist);
        }
+       if (bp->b_flags & B_NOCACHE) {
+               bp->b_flags |= B_INVAL;
+       }
        if (bp->b_flags&B_ERROR)
                if (bp->b_flags & B_LOCKED)
                        bp->b_flags &= ~B_ERROR;        /* try again later */
                else
        if (bp->b_flags&B_ERROR)
                if (bp->b_flags & B_LOCKED)
                        bp->b_flags &= ~B_ERROR;        /* try again later */
                else
-                       bp->b_dev = NODEV;              /* no assoc */
+                       brelvp(bp);                     /* no assoc */
 
        /*
         * Stick the buffer back on a free list.
 
        /*
         * Stick the buffer back on a free list.
@@ -248,7 +273,7 @@ brelse(bp)
                        flist = &bfreelist[BQ_LRU];
                binstailfree(bp, flist);
        }
                        flist = &bfreelist[BQ_LRU];
                binstailfree(bp, flist);
        }
-       bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
+       bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
        splx(s);
 }
 
        splx(s);
 }
 
@@ -256,40 +281,35 @@ brelse(bp)
  * See if the block is associated with some buffer
  * (mainly to avoid getting hung up on a wait in breada)
  */
  * See if the block is associated with some buffer
  * (mainly to avoid getting hung up on a wait in breada)
  */
-incore(dev, blkno)
-       dev_t dev;
+incore(vp, blkno)
+       struct vnode *vp;
        daddr_t blkno;
 {
        register struct buf *bp;
        register struct buf *dp;
 
        daddr_t blkno;
 {
        register struct buf *bp;
        register struct buf *dp;
 
-       dp = BUFHASH(dev, blkno);
+       dp = BUFHASH(vp, blkno);
        for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
        for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
-               if (bp->b_blkno == blkno && bp->b_dev == dev &&
+               if (bp->b_blkno == blkno && bp->b_vp == vp &&
                    (bp->b_flags & B_INVAL) == 0)
                        return (1);
        return (0);
 }
 
                    (bp->b_flags & B_INVAL) == 0)
                        return (1);
        return (0);
 }
 
-struct buf *
-#ifdef SECSIZE
-baddr(dev, blkno, size, secsize)
-#else SECSIZE
-baddr(dev, blkno, size)
-#endif SECSIZE
-       dev_t dev;
+baddr(vp, blkno, size, cred, bpp)
+       struct vnode *vp;
        daddr_t blkno;
        int size;
        daddr_t blkno;
        int size;
+       struct ucred *cred;
+       struct buf **bpp;
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
 {
 
 #ifdef SECSIZE
        long secsize;
 #endif SECSIZE
 {
 
-       if (incore(dev, blkno))
-#ifdef SECSIZE
-               return (bread(dev, blkno, size, secsize));
-#else SECSIZE
-               return (bread(dev, blkno, size));
+       if (incore(vp, blkno))
+               return (bread(vp, blkno, size, cred, bpp));
+       *bpp = 0;
 #endif SECSIZE
        return (0);
 }
 #endif SECSIZE
        return (0);
 }
@@ -314,9 +334,8 @@ struct buf *
 #ifdef SECSIZE
 getblk(dev, blkno, size, secsize)
 #else SECSIZE
 #ifdef SECSIZE
 getblk(dev, blkno, size, secsize)
 #else SECSIZE
-getblk(dev, blkno, size)
-#endif SECSIZE
-       dev_t dev;
+getblk(vp, blkno, size)
+       register struct vnode *vp;
        daddr_t blkno;
        int size;
 #ifdef SECSIZE
        daddr_t blkno;
        int size;
 #ifdef SECSIZE
@@ -343,10 +362,10 @@ getblk(dev, blkno, size)
         * the buffer is in use for i/o, then we wait until
         * the i/o has completed.
         */
         * the buffer is in use for i/o, then we wait until
         * the i/o has completed.
         */
-       dp = BUFHASH(dev, blkno);
+       dp = BUFHASH(vp, blkno);
 loop:
        for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
 loop:
        for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
-               if (bp->b_blkno != blkno || bp->b_dev != dev ||
+               if (bp->b_blkno != blkno || bp->b_vp != vp ||
                    bp->b_flags&B_INVAL)
                        continue;
                s = splbio();
                    bp->b_flags&B_INVAL)
                        continue;
                s = splbio();
@@ -361,7 +380,7 @@ loop:
                if (bp->b_bcount != size) {
                        if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) {
                                bp->b_flags &= ~B_ASYNC;
                if (bp->b_bcount != size) {
                        if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) {
                                bp->b_flags &= ~B_ASYNC;
-                               bwrite(bp);
+                               (void) bwrite(bp);
                                goto loop;
                        }
                        if (brealloc(bp, size) == 0)
                                goto loop;
                        }
                        if (brealloc(bp, size) == 0)
@@ -372,18 +391,21 @@ loop:
                bp->b_flags |= B_CACHE;
                return (bp);
        }
                bp->b_flags |= B_CACHE;
                return (bp);
        }
-       if (major(dev) >= nblkdev)
-               panic("blkdev");
        bp = getnewbuf();
        bfree(bp);
        bremhash(bp);
        bp = getnewbuf();
        bfree(bp);
        bremhash(bp);
-       binshash(bp, dp);
-       bp->b_dev = dev;
+       if (bp->b_vp)
+               brelvp(bp);
+       VREF(vp);
+       bp->b_vp = vp;
+       bp->b_dev = vp->v_rdev;
 #ifdef SECSIZE
        bp->b_blksize = secsize;
 #endif SECSIZE
        bp->b_blkno = blkno;
        bp->b_error = 0;
 #ifdef SECSIZE
        bp->b_blksize = secsize;
 #endif SECSIZE
        bp->b_blkno = blkno;
        bp->b_error = 0;
+       bp->b_resid = 0;
+       binshash(bp, dp);
        if (brealloc(bp, size) == 0)
                goto loop;
        return (bp);
        if (brealloc(bp, size) == 0)
                goto loop;
        return (bp);
@@ -407,12 +429,13 @@ loop:
        bfree(bp);
        bremhash(bp);
        flist = &bfreelist[BQ_AGE];
        bfree(bp);
        bremhash(bp);
        flist = &bfreelist[BQ_AGE];
-       binshash(bp, flist);
-       bp->b_dev = (dev_t)NODEV;
+       brelvp(bp);
 #ifdef SECSIZE
        bp->b_blksize = DEV_BSIZE;
 #endif SECSIZE
        bp->b_error = 0;
 #ifdef SECSIZE
        bp->b_blksize = DEV_BSIZE;
 #endif SECSIZE
        bp->b_error = 0;
+       bp->b_resid = 0;
+       binshash(bp, flist);
        if (brealloc(bp, size) == 0)
                goto loop;
        return (bp);
        if (brealloc(bp, size) == 0)
                goto loop;
        return (bp);
@@ -439,7 +462,7 @@ brealloc(bp, size)
                return (1);
        if (size < bp->b_bcount) { 
                if (bp->b_flags & B_DELWRI) {
                return (1);
        if (size < bp->b_bcount) { 
                if (bp->b_flags & B_DELWRI) {
-                       bwrite(bp);
+                       (void) bwrite(bp);
                        return (0);
                }
                if (bp->b_flags & B_LOCKED)
                        return (0);
                }
                if (bp->b_flags & B_LOCKED)
@@ -447,10 +470,11 @@ brealloc(bp, size)
                return (allocbuf(bp, size));
        }
        bp->b_flags &= ~B_DONE;
                return (allocbuf(bp, size));
        }
        bp->b_flags &= ~B_DONE;
-       if (bp->b_dev == NODEV)
+       if (bp->b_vp == (struct vnode *)0)
                return (allocbuf(bp, size));
 
                return (allocbuf(bp, size));
 
-       trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno);
+       trace(TR_BREALLOC,
+           pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno);
        /*
         * Search cache for any buffers that overlap the one that we
         * are trying to allocate. Overlapping buffers must be marked
        /*
         * Search cache for any buffers that overlap the one that we
         * are trying to allocate. Overlapping buffers must be marked
@@ -465,10 +489,11 @@ brealloc(bp, size)
 #else SECSIZE
        last = start + btodb(size) - 1;
 #endif SECSIZE
 #else SECSIZE
        last = start + btodb(size) - 1;
 #endif SECSIZE
-       dp = BUFHASH(bp->b_dev, bp->b_blkno);
+       dp = BUFHASH(bp->b_vp, bp->b_blkno);
 loop:
        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
 loop:
        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
-               if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL))
+               if (ep == bp || ep->b_vp != bp->b_vp ||
+                   (ep->b_flags & B_INVAL))
                        continue;
                /* look for overlap */
                if (ep->b_bcount == 0 || ep->b_blkno > last ||
                        continue;
                /* look for overlap */
                if (ep->b_bcount == 0 || ep->b_blkno > last ||
@@ -488,7 +513,7 @@ loop:
                splx(s);
                notavail(ep);
                if (ep->b_flags & B_DELWRI) {
                splx(s);
                notavail(ep);
                if (ep->b_flags & B_DELWRI) {
-                       bwrite(ep);
+                       (void) bwrite(ep);
                        goto loop;
                }
                ep->b_flags |= B_INVAL;
                        goto loop;
                }
                ep->b_flags |= B_INVAL;
@@ -506,6 +531,7 @@ struct buf *
 getnewbuf()
 {
        register struct buf *bp, *dp;
 getnewbuf()
 {
        register struct buf *bp, *dp;
+       register struct ucred *cred;
        int s;
 
 loop:
        int s;
 
 loop:
@@ -523,11 +549,22 @@ loop:
        bp = dp->av_forw;
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
        bp = dp->av_forw;
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
-               bp->b_flags |= B_ASYNC;
-               bwrite(bp);
+               (void) bawrite(bp);
                goto loop;
        }
                goto loop;
        }
-       trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno);
+       trace(TR_BRELSE,
+           pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
+       brelvp(bp);
+       if (bp->b_rcred != NOCRED) {
+               cred = bp->b_rcred;
+               bp->b_rcred = NOCRED;
+               crfree(cred);
+       }
+       if (bp->b_wcred != NOCRED) {
+               cred = bp->b_wcred;
+               bp->b_wcred = NOCRED;
+               crfree(cred);
+       }
        bp->b_flags = B_BUSY;
        return (bp);
 }
        bp->b_flags = B_BUSY;
        return (bp);
 }
@@ -542,11 +579,18 @@ biowait(bp)
        int s;
 
        s = splbio();
        int s;
 
        s = splbio();
-       while ((bp->b_flags&B_DONE)==0)
+       while ((bp->b_flags & B_DONE) == 0)
                sleep((caddr_t)bp, PRIBIO);
        splx(s);
                sleep((caddr_t)bp, PRIBIO);
        splx(s);
-       if (u.u_error == 0)                     /* XXX */
-               u.u_error = geterror(bp);
+       /*
+        * Pick up the device's error number and pass it to the user;
+        * if there is an error but the number is 0 set a generalized code.
+        */
+       if ((bp->b_flags & B_ERROR) == 0)
+               return (0);
+       if (bp->b_error)
+               return (bp->b_error);
+       return (EIO);
 }
 
 /*
 }
 
 /*
@@ -562,6 +606,8 @@ biodone(bp)
        if (bp->b_flags & B_DONE)
                panic("dup biodone");
        bp->b_flags |= B_DONE;
        if (bp->b_flags & B_DONE)
                panic("dup biodone");
        bp->b_flags |= B_DONE;
+       if ((bp->b_flags & B_READ) == 0)
+               bp->b_dirtyoff = bp->b_dirtyend = 0;
        if (bp->b_flags & B_CALL) {
                bp->b_flags &= ~B_CALL;
                (*bp->b_iodone)(bp);
        if (bp->b_flags & B_CALL) {
                bp->b_flags &= ~B_CALL;
                (*bp->b_iodone)(bp);
@@ -576,7 +622,7 @@ biodone(bp)
 }
 
 /*
 }
 
 /*
- * Insure that no part of a specified block is in an incore buffer.
+ * Ensure that no part of a specified block is in an incore buffer.
 #ifdef SECSIZE
  * "size" is given in device blocks (the units of b_blkno).
 #endif SECSIZE
 #ifdef SECSIZE
  * "size" is given in device blocks (the units of b_blkno).
 #endif SECSIZE
@@ -584,8 +630,8 @@ biodone(bp)
  * "size" is given in device blocks (the units of b_blkno).
 #endif SECSIZE
  */
  * "size" is given in device blocks (the units of b_blkno).
 #endif SECSIZE
  */
-blkflush(dev, blkno, size)
-       dev_t dev;
+blkflush(vp, blkno, size)
+       struct vnode *vp;
        daddr_t blkno;
 #ifdef SECSIZE
        int size;
        daddr_t blkno;
 #ifdef SECSIZE
        int size;
@@ -596,7 +642,7 @@ blkflush(dev, blkno, size)
        register struct buf *ep;
        struct buf *dp;
        daddr_t start, last;
        register struct buf *ep;
        struct buf *dp;
        daddr_t start, last;
-       int s;
+       int s, error, allerrors = 0;
 
        start = blkno;
 #ifdef SECSIZE
 
        start = blkno;
 #ifdef SECSIZE
@@ -604,10 +650,10 @@ blkflush(dev, blkno, size)
 #else SECSIZE
        last = start + btodb(size) - 1;
 #endif SECSIZE
 #else SECSIZE
        last = start + btodb(size) - 1;
 #endif SECSIZE
-       dp = BUFHASH(dev, blkno);
+       dp = BUFHASH(vp, blkno);
 loop:
        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
 loop:
        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
-               if (ep->b_dev != dev || (ep->b_flags&B_INVAL))
+               if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
                        continue;
                /* look for overlap */
                if (ep->b_bcount == 0 || ep->b_blkno > last ||
                        continue;
                /* look for overlap */
                if (ep->b_bcount == 0 || ep->b_blkno > last ||
@@ -627,21 +673,21 @@ loop:
                if (ep->b_flags & B_DELWRI) {
                        splx(s);
                        notavail(ep);
                if (ep->b_flags & B_DELWRI) {
                        splx(s);
                        notavail(ep);
-                       bwrite(ep);
+                       if (error = bwrite(ep))
+                               allerrors = error;
                        goto loop;
                }
                splx(s);
        }
                        goto loop;
                }
                splx(s);
        }
+       return (allerrors);
 }
 
 /*
 }
 
 /*
- * Make sure all write-behind blocks
- * on dev (or NODEV for all)
- * are flushed out.
- * (from umount and update)
+ * Make sure all write-behind blocks associated
+ * with mount point are flushed out (from sync).
  */
  */
-bflush(dev)
-       dev_t dev;
+bflush(mountp)
+       struct mount *mountp;
 {
        register struct buf *bp;
        register struct buf *flist;
 {
        register struct buf *bp;
        register struct buf *flist;
@@ -649,56 +695,74 @@ bflush(dev)
 
 loop:
        s = splbio();
 
 loop:
        s = splbio();
-       for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++)
-       for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
-               if ((bp->b_flags & B_DELWRI) == 0)
-                       continue;
-               if (dev == NODEV || dev == bp->b_dev) {
-                       bp->b_flags |= B_ASYNC;
-                       notavail(bp);
-                       bwrite(bp);
-                       splx(s);
-                       goto loop;
+       for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) {
+               for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
+                       if ((bp->b_flags & B_BUSY))
+                               continue;
+                       if ((bp->b_flags & B_DELWRI) == 0)
+                               continue;
+                       if (bp->b_vp && bp->b_vp->v_mount == mountp) {
+                               notavail(bp);
+                               (void) bawrite(bp);
+                               splx(s);
+                               goto loop;
+                       }
                }
        }
        splx(s);
 }
 
                }
        }
        splx(s);
 }
 
-/*
- * Pick up the device's error number and pass it to the user;
- * if there is an error but the number is 0 set a generalized code.
- */
-geterror(bp)
-       register struct buf *bp;
-{
-       int error = 0;
-
-       if (bp->b_flags&B_ERROR)
-               if ((error = bp->b_error)==0)
-                       return (EIO);
-       return (error);
-}
-
 /*
  * Invalidate in core blocks belonging to closed or umounted filesystem
  *
 /*
  * Invalidate in core blocks belonging to closed or umounted filesystem
  *
- * This is not nicely done at all - the buffer ought to be removed from the
- * hash chains & have its dev/blkno fields clobbered, but unfortunately we
- * can't do that here, as it is quite possible that the block is still
- * being used for i/o. Eventually, all disc drivers should be forced to
- * have a close routine, which ought ensure that the queue is empty, then
- * properly flush the queues. Until that happy day, this suffices for
- * correctness.                                                ... kre
+ * We walk through the buffer pool and invalidate any buffers for the
+ * indicated mount point. Normally this routine is preceeded by a bflush
+ * call, so that on a quiescent filesystem there will be no dirty
+ * buffers when we are done. We return the count of dirty buffers when
+ * we are finished.
  */
  */
-binval(dev)
-       dev_t dev;
+binval(mountp)
+       struct mount *mountp;
 {
        register struct buf *bp;
        register struct bufhd *hp;
 {
        register struct buf *bp;
        register struct bufhd *hp;
+       int s, dirty = 0;
 #define dp ((struct buf *)hp)
 
 #define dp ((struct buf *)hp)
 
-       for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
-               for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
-                       if (bp->b_dev == dev)
-                               bp->b_flags |= B_INVAL;
+loop:
+       s = splbio();
+       for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) {
+               for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
+                       if (bp->b_vp == NULL || bp->b_vp->v_mount != mountp)
+                               continue;
+                       if (bp->b_flags & B_BUSY) {
+                               bp->b_flags |= B_WANTED;
+                               sleep((caddr_t)bp, PRIBIO+1);
+                               splx(s);
+                               goto loop;
+                       }
+                       notavail(bp);
+                       if (bp->b_flags & B_DELWRI) {
+                               (void) bawrite(bp);
+                               dirty++;
+                               continue;
+                       }
+                       bp->b_flags |= B_INVAL;
+                       brelvp(bp);
+                       brelse(bp);
+               }
+       }
+       return (dirty);
+}
+
+brelvp(bp)
+       struct buf *bp;
+{
+       struct vnode *vp;
+
+       if (bp->b_vp == (struct vnode *) 0)
+               return;
+       vp = bp->b_vp;
+       bp->b_vp = (struct vnode *) 0;
+       vrele(vp);
 }
 }