alloc tables at boot time version
[unix-history] / usr / src / sys / kern / vfs_cluster.c
index 4b8cb8f..37ad963 100644 (file)
@@ -1,4 +1,4 @@
-/*     vfs_cluster.c   3.11    %G%     */
+/*     vfs_cluster.c   4.14    %G%     */
 
 #include "../h/param.h"
 #include "../h/systm.h"
 
 #include "../h/param.h"
 #include "../h/systm.h"
@@ -10,6 +10,7 @@
 #include "../h/seg.h"
 #include "../h/pte.h"
 #include "../h/vm.h"
 #include "../h/seg.h"
 #include "../h/pte.h"
 #include "../h/vm.h"
+#include "../h/trace.h"
 
 /*
  * The following several routines allocate and free
 
 /*
  * The following several routines allocate and free
@@ -34,8 +35,9 @@
  */
 
 #define        BUFHSZ  63
  */
 
 #define        BUFHSZ  63
-#define        BUFHASH(blkno)  (blkno % BUFHSZ)
-short  bufhash[BUFHSZ];
+struct bufhd bufhash[BUFHSZ];
+#define        BUFHASH(dev, dblkno)    \
+               ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ])
 
 /*
  * Initialize hash links for buffers.
 
 /*
  * Initialize hash links for buffers.
@@ -43,9 +45,10 @@ short        bufhash[BUFHSZ];
 bhinit()
 {
        register int i;
 bhinit()
 {
        register int i;
+       register struct bufhd *bp;
 
 
-       for (i = 0; i < BUFHSZ; i++)
-               bufhash[i] = -1;
+       for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
+               bp->b_forw = bp->b_back = (struct buf *)bp;
 }
 
 /* #define     DISKMON 1 */
 }
 
 /* #define     DISKMON 1 */
@@ -57,7 +60,7 @@ struct {
        long    nreada;
        long    ncache;
        long    nwrite;
        long    nreada;
        long    ncache;
        long    nwrite;
-       long    bufcount[NBUF];
+       long    bufcount[64];
 } io_info;
 #endif
 
 } io_info;
 #endif
 
@@ -71,12 +74,12 @@ struct {
  * page push, when the I/O completes, it is inserted 
  * in a list of cleaned pages to be processed by the pageout daemon.
  */
  * page push, when the I/O completes, it is inserted 
  * in a list of cleaned pages to be processed by the pageout daemon.
  */
-struct buf swbuf[NSWBUF];
-short  swsize[NSWBUF];         /* CAN WE JUST USE B_BCOUNT? */
-int    swpf[NSWBUF];
+struct buf *swbuf;
+short  *swsize;                /* CAN WE JUST USE B_BCOUNT? */
+int    *swpf;
 
 
 
 
-#ifdef FASTVAX
+#ifndef        UNFAST
 #define        notavail(bp) \
 { \
        int s = spl6(); \
 #define        notavail(bp) \
 { \
        int s = spl6(); \
@@ -99,6 +102,9 @@ daddr_t blkno;
 
        bp = getblk(dev, blkno);
        if (bp->b_flags&B_DONE) {
 
        bp = getblk(dev, blkno);
        if (bp->b_flags&B_DONE) {
+#ifdef EPAWNJ
+               trace(TR_BREAD|TR_HIT, dev, blkno);
+#endif
 #ifdef DISKMON
                io_info.ncache++;
 #endif
 #ifdef DISKMON
                io_info.ncache++;
 #endif
@@ -107,6 +113,9 @@ daddr_t blkno;
        bp->b_flags |= B_READ;
        bp->b_bcount = BSIZE;
        (*bdevsw[major(dev)].d_strategy)(bp);
        bp->b_flags |= B_READ;
        bp->b_bcount = BSIZE;
        (*bdevsw[major(dev)].d_strategy)(bp);
+#ifdef EPAWNJ
+       trace(TR_BREAD|TR_MISS, dev, blkno);
+#endif
 #ifdef DISKMON
        io_info.nread++;
 #endif
 #ifdef DISKMON
        io_info.nread++;
 #endif
@@ -133,20 +142,33 @@ daddr_t blkno, rablkno;
                        bp->b_flags |= B_READ;
                        bp->b_bcount = BSIZE;
                        (*bdevsw[major(dev)].d_strategy)(bp);
                        bp->b_flags |= B_READ;
                        bp->b_bcount = BSIZE;
                        (*bdevsw[major(dev)].d_strategy)(bp);
+#ifdef EPAWNJ
+                       trace(TR_BREAD|TR_MISS, dev, blkno);
+#endif
 #ifdef DISKMON
                        io_info.nread++;
 #endif
                        u.u_vm.vm_inblk++;              /* pay for read */
                }
 #ifdef DISKMON
                        io_info.nread++;
 #endif
                        u.u_vm.vm_inblk++;              /* pay for read */
                }
+#ifdef EPAWNJ
+               else
+                       trace(TR_BREAD|TR_HIT, dev, blkno);
+#endif
        }
        if (rablkno && !incore(dev, rablkno)) {
                rabp = getblk(dev, rablkno);
        }
        if (rablkno && !incore(dev, rablkno)) {
                rabp = getblk(dev, rablkno);
-               if (rabp->b_flags & B_DONE)
+               if (rabp->b_flags & B_DONE) {
                        brelse(rabp);
                        brelse(rabp);
-               else {
+#ifdef EPAWNJ
+                       trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
+#endif
+               } else {
                        rabp->b_flags |= B_READ|B_ASYNC;
                        rabp->b_bcount = BSIZE;
                        (*bdevsw[major(dev)].d_strategy)(rabp);
                        rabp->b_flags |= B_READ|B_ASYNC;
                        rabp->b_bcount = BSIZE;
                        (*bdevsw[major(dev)].d_strategy)(rabp);
+#ifdef EPAWNJ
+                       trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
+#endif
 #ifdef DISKMON
                        io_info.nreada++;
 #endif
 #ifdef DISKMON
                        io_info.nreada++;
 #endif
@@ -176,6 +198,9 @@ register struct buf *bp;
 #endif
        if ((flag&B_DELWRI) == 0)
                u.u_vm.vm_oublk++;              /* noone paid yet */
 #endif
        if ((flag&B_DELWRI) == 0)
                u.u_vm.vm_oublk++;              /* noone paid yet */
+#ifdef EPAWNJ
+       trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
+#endif
        (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
        if ((flag&B_ASYNC) == 0) {
                iowait(bp);
        (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
        if ((flag&B_ASYNC) == 0) {
                iowait(bp);
@@ -197,12 +222,12 @@ register struct buf *bp;
 bdwrite(bp)
 register struct buf *bp;
 {
 bdwrite(bp)
 register struct buf *bp;
 {
-       register struct buf *dp;
+       register int flags;
 
        if ((bp->b_flags&B_DELWRI) == 0)
                u.u_vm.vm_oublk++;              /* noone paid yet */
 
        if ((bp->b_flags&B_DELWRI) == 0)
                u.u_vm.vm_oublk++;              /* noone paid yet */
-       dp = bdevsw[major(bp->b_dev)].d_tab;
-       if(dp->b_flags & B_TAPE)
+       flags = bdevsw[major(bp->b_dev)].d_flags;
+       if(flags & B_TAPE)
                bawrite(bp);
        else {
                bp->b_flags |= B_DELWRI | B_DONE;
                bawrite(bp);
        else {
                bp->b_flags |= B_DELWRI | B_DONE;
@@ -227,32 +252,39 @@ register struct buf *bp;
 brelse(bp)
 register struct buf *bp;
 {
 brelse(bp)
 register struct buf *bp;
 {
-       register struct buf **backp;
+       register struct buf *flist;
        register s;
 
        if (bp->b_flags&B_WANTED)
                wakeup((caddr_t)bp);
        register s;
 
        if (bp->b_flags&B_WANTED)
                wakeup((caddr_t)bp);
-       if (bfreelist.b_flags&B_WANTED) {
-               bfreelist.b_flags &= ~B_WANTED;
-               wakeup((caddr_t)&bfreelist);
-       }
-       if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
-               bunhash(bp);
-               bp->b_dev = NODEV;  /* no assoc. on error */
+       if (bfreelist[0].b_flags&B_WANTED) {
+               bfreelist[0].b_flags &= ~B_WANTED;
+               wakeup((caddr_t)bfreelist);
        }
        }
+       if (bp->b_flags&B_ERROR)
+               if (bp->b_flags & B_LOCKED)
+                       bp->b_flags &= ~B_ERROR;        /* try again later */
+               else
+                       bp->b_dev = NODEV;              /* no assoc */
        s = spl6();
        s = spl6();
-       if(bp->b_flags & (B_AGE|B_ERROR)) {
-               backp = &bfreelist.av_forw;
-               (*backp)->av_back = bp;
-               bp->av_forw = *backp;
-               *backp = bp;
-               bp->av_back = &bfreelist;
+       if (bp->b_flags & (B_ERROR|B_INVAL)) {
+               /* block has no info ... put at front of most free list */
+               flist = &bfreelist[BQUEUES-1];
+               flist->av_forw->av_back = bp;
+               bp->av_forw = flist->av_forw;
+               flist->av_forw = bp;
+               bp->av_back = flist;
        } else {
        } else {
-               backp = &bfreelist.av_back;
-               (*backp)->av_forw = bp;
-               bp->av_back = *backp;
-               *backp = bp;
-               bp->av_forw = &bfreelist;
+               if (bp->b_flags & B_LOCKED)
+                       flist = &bfreelist[BQ_LOCKED];
+               else if (bp->b_flags & B_AGE)
+                       flist = &bfreelist[BQ_AGE];
+               else
+                       flist = &bfreelist[BQ_LRU];
+               flist->av_back->av_forw = bp;
+               bp->av_back = flist->av_back;
+               flist->av_back = bp;
+               bp->av_forw = flist;
        }
        bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
        splx(s);
        }
        bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
        splx(s);
@@ -267,11 +299,13 @@ dev_t dev;
 daddr_t blkno;
 {
        register struct buf *bp;
 daddr_t blkno;
 {
        register struct buf *bp;
+       register struct buf *dp;
        register int dblkno = fsbtodb(blkno);
 
        register int dblkno = fsbtodb(blkno);
 
-       for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
-           bp = &buf[bp->b_hlink])
-               if (bp->b_blkno == dblkno && bp->b_dev == dev)
+       dp = BUFHASH(dev, dblkno);
+       for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
+               if (bp->b_blkno == dblkno && bp->b_dev == dev &&
+                   !(bp->b_flags & B_INVAL))
                        return (1);
        return (0);
 }
                        return (1);
        return (0);
 }
@@ -298,14 +332,20 @@ dev_t dev;
 daddr_t blkno;
 {
        register struct buf *bp, *dp, *ep;
 daddr_t blkno;
 {
        register struct buf *bp, *dp, *ep;
-       register int i, x;
        register int dblkno = fsbtodb(blkno);
        register int dblkno = fsbtodb(blkno);
+#ifdef DISKMON
+       register int i;
+#endif
 
 
+       if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
+               blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
+       dblkno = fsbtodb(blkno);
+       dp = BUFHASH(dev, dblkno);
     loop:
        (void) spl0();
     loop:
        (void) spl0();
-       for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
-           bp = &buf[bp->b_hlink]) {
-               if (bp->b_blkno != dblkno || bp->b_dev != dev)
+       for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
+               if (bp->b_blkno != dblkno || bp->b_dev != dev ||
+                   bp->b_flags&B_INVAL)
                        continue;
                (void) spl6();
                if (bp->b_flags&B_BUSY) {
                        continue;
                (void) spl6();
                if (bp->b_flags&B_BUSY) {
@@ -317,11 +357,11 @@ daddr_t blkno;
 #ifdef DISKMON
                i = 0;
                dp = bp->av_forw;
 #ifdef DISKMON
                i = 0;
                dp = bp->av_forw;
-               while (dp != &bfreelist) {
+               while ((dp->b_flags & B_HEAD) == 0) {
                        i++;
                        dp = dp->av_forw;
                }
                        i++;
                        dp = dp->av_forw;
                }
-               if (i<NBUF)
+               if (i<64)
                        io_info.bufcount[i]++;
 #endif
                notavail(bp);
                        io_info.bufcount[i]++;
 #endif
                notavail(bp);
@@ -330,41 +370,26 @@ daddr_t blkno;
        }
        if (major(dev) >= nblkdev)
                panic("blkdev");
        }
        if (major(dev) >= nblkdev)
                panic("blkdev");
-       dp = bdevsw[major(dev)].d_tab;
-       if (dp == NULL)
-               panic("devtab");
        (void) spl6();
        (void) spl6();
-       if (bfreelist.av_forw == &bfreelist) {
-               bfreelist.b_flags |= B_WANTED;
-               sleep((caddr_t)&bfreelist, PRIBIO+1);
+       for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
+               if (ep->av_forw != ep)
+                       break;
+       if (ep == bfreelist) {          /* no free blocks at all */
+               ep->b_flags |= B_WANTED;
+               sleep((caddr_t)ep, PRIBIO+1);
                goto loop;
        }
                goto loop;
        }
-       spl0();
-       bp = bfreelist.av_forw;
+       (void) spl0();
+       bp = ep->av_forw;
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
                bp->b_flags |= B_ASYNC;
                bwrite(bp);
                goto loop;
        }
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
                bp->b_flags |= B_ASYNC;
                bwrite(bp);
                goto loop;
        }
-       if (bp->b_dev == NODEV)
-               goto done;
-       /* INLINE EXPANSION OF bunhash(bp) */
-       i = BUFHASH(dbtofsb(bp->b_blkno));
-       x = bp - buf;
-       if (bufhash[i] == x) {
-               bufhash[i] = bp->b_hlink;
-       } else {
-               for (ep = &buf[bufhash[i]]; ep != &buf[-1];
-                   ep = &buf[ep->b_hlink])
-                       if (ep->b_hlink == x) {
-                               ep->b_hlink = bp->b_hlink;
-                               goto done;
-                       }
-               panic("getblk");
-       }
-done:
-       /* END INLINE EXPANSION */
+#ifdef EPAWNJ
+       trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
+#endif
        bp->b_flags = B_BUSY;
        bp->b_back->b_forw = bp->b_forw;
        bp->b_forw->b_back = bp->b_back;
        bp->b_flags = B_BUSY;
        bp->b_back->b_forw = bp->b_forw;
        bp->b_forw->b_back = bp->b_back;
@@ -374,9 +399,6 @@ done:
        dp->b_forw = bp;
        bp->b_dev = dev;
        bp->b_blkno = dblkno;
        dp->b_forw = bp;
        bp->b_dev = dev;
        bp->b_blkno = dblkno;
-       i = BUFHASH(blkno);
-       bp->b_hlink = bufhash[i];
-       bufhash[i] = bp - buf;
        return(bp);
 }
 
        return(bp);
 }
 
@@ -391,22 +413,26 @@ geteblk()
 
 loop:
        (void) spl6();
 
 loop:
        (void) spl6();
-       while (bfreelist.av_forw == &bfreelist) {
-               bfreelist.b_flags |= B_WANTED;
-               sleep((caddr_t)&bfreelist, PRIBIO+1);
+       for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
+               if (dp->av_forw != dp)
+                       break;
+       if (dp == bfreelist) {          /* no free blocks */
+               dp->b_flags |= B_WANTED;
+               sleep((caddr_t)dp, PRIBIO+1);
+               goto loop;
        }
        (void) spl0();
        }
        (void) spl0();
-       dp = &bfreelist;
-       bp = bfreelist.av_forw;
+       bp = dp->av_forw;
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
                bp->b_flags |= B_ASYNC;
                bwrite(bp);
                goto loop;
        }
        notavail(bp);
        if (bp->b_flags & B_DELWRI) {
                bp->b_flags |= B_ASYNC;
                bwrite(bp);
                goto loop;
        }
-       if (bp->b_dev != NODEV)
-               bunhash(bp);
-       bp->b_flags = B_BUSY;
+#ifdef EPAWNJ
+       trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
+#endif
+       bp->b_flags = B_BUSY|B_INVAL;
        bp->b_back->b_forw = bp->b_forw;
        bp->b_forw->b_back = bp->b_back;
        bp->b_forw = dp->b_forw;
        bp->b_back->b_forw = bp->b_forw;
        bp->b_forw->b_back = bp->b_back;
        bp->b_forw = dp->b_forw;
@@ -414,33 +440,9 @@ loop:
        dp->b_forw->b_back = bp;
        dp->b_forw = bp;
        bp->b_dev = (dev_t)NODEV;
        dp->b_forw->b_back = bp;
        dp->b_forw = bp;
        bp->b_dev = (dev_t)NODEV;
-       bp->b_hlink = -1;
        return(bp);
 }
 
        return(bp);
 }
 
-bunhash(bp)
-       register struct buf *bp;
-{
-       register struct buf *ep;
-       register int i, x;
-
-       if (bp->b_dev == NODEV)
-               return;
-       i = BUFHASH(dbtofsb(bp->b_blkno));
-       x = bp - buf;
-       if (bufhash[i] == x) {
-               bufhash[i] = bp->b_hlink;
-               return;
-       }
-       for (ep = &buf[bufhash[i]]; ep != &buf[-1];
-           ep = &buf[ep->b_hlink])
-               if (ep->b_hlink == x) {
-                       ep->b_hlink = bp->b_hlink;
-                       return;
-               }
-       panic("bunhash");
-}
-
 /*
  * Wait for I/O completion on the buffer; return errors
  * to the user.
 /*
  * Wait for I/O completion on the buffer; return errors
  * to the user.
@@ -456,7 +458,7 @@ register struct buf *bp;
        geterror(bp);
 }
 
        geterror(bp);
 }
 
-#ifndef FASTVAX
+#ifdef UNFAST
 /*
  * Unlink a buffer from the available list and mark it busy.
  * (internal interface)
 /*
  * Unlink a buffer from the available list and mark it busy.
  * (internal interface)
@@ -658,10 +660,12 @@ bflush(dev)
 dev_t dev;
 {
        register struct buf *bp;
 dev_t dev;
 {
        register struct buf *bp;
+       register struct buf *flist;
 
 loop:
        (void) spl6();
 
 loop:
        (void) spl6();
-       for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
+       for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
+       for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
                if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
                        bp->b_flags |= B_ASYNC;
                        notavail(bp);
                if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
                        bp->b_flags |= B_ASYNC;
                        notavail(bp);
@@ -757,3 +761,27 @@ register struct buf *bp;
                if ((u.u_error = bp->b_error)==0)
                        u.u_error = EIO;
 }
                if ((u.u_error = bp->b_error)==0)
                        u.u_error = EIO;
 }
+
+/*
+ * Invalidate in core blocks belonging to closed or umounted filesystem
+ *
+ * This is not nicely done at all - the buffer ought to be removed from the
+ * hash chains & have its dev/blkno fields clobbered, but unfortunately we
+ * can't do that here, as it is quite possible that the block is still
+ * being used for i/o. Eventually, all disc drivers should be forced to
+ * have a close routine, which ought ensure that the queue is empty, then
+ * properly flush the queues. Until that happy day, this suffices for
+ * correctness.                                                ... kre
+ */
+binval(dev)
+dev_t dev;
+{
+       register struct buf *bp;
+       register struct bufhd *hp;
+#define dp ((struct buf *)hp)
+
+       for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
+               for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
+                       if (bp->b_dev == dev)
+                               bp->b_flags |= B_INVAL;
+}