X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/663dbc72afbbac791a450b0d86fb0bfca7f63bc5..7b8b5a017ec860062c1f8626302492b3a164b77e:/usr/src/sys/kern/vfs_cluster.c diff --git a/usr/src/sys/kern/vfs_cluster.c b/usr/src/sys/kern/vfs_cluster.c index 1951dc35ec..9be3827e7d 100644 --- a/usr/src/sys/kern/vfs_cluster.c +++ b/usr/src/sys/kern/vfs_cluster.c @@ -1,4 +1,4 @@ -/* vfs_cluster.c 3.1 %H% */ +/* vfs_cluster.c 4.5 %G% */ #include "../h/param.h" #include "../h/systm.h" @@ -10,6 +10,44 @@ #include "../h/seg.h" #include "../h/pte.h" #include "../h/vm.h" +#include "../h/trace.h" + +/* + * The following several routines allocate and free + * buffers with various side effects. In general the + * arguments to an allocate routine are a device and + * a block number, and the value is a pointer to + * to the buffer header; the buffer is marked "busy" + * so that no one else can touch it. If the block was + * already in core, no I/O need be done; if it is + * already busy, the process waits until it becomes free. + * The following routines allocate a buffer: + * getblk + * bread + * breada + * baddr (if it is incore) + * Eventually the buffer must be released, possibly with the + * side effect of writing it out, by using one of + * bwrite + * bdwrite + * bawrite + * brelse + */ + +#define BUFHSZ 63 +#define BUFHASH(blkno) (blkno % BUFHSZ) +short bufhash[BUFHSZ]; + +/* + * Initialize hash links for buffers. + */ +bhinit() +{ + register int i; + + for (i = 0; i < BUFHSZ; i++) + bufhash[i] = -1; +} /* #define DISKMON 1 */ @@ -38,27 +76,6 @@ struct buf swbuf[NSWBUF]; short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ int swpf[NSWBUF]; -/* - * The following several routines allocate and free - * buffers with various side effects. In general the - * arguments to an allocate routine are a device and - * a block number, and the value is a pointer to - * to the buffer header; the buffer is marked "busy" - * so that no one else can touch it. If the block was - * already in core, no I/O need be done; if it is - * already busy, the process waits until it becomes free. - * The following routines allocate a buffer: - * getblk - * bread - * breada - * baddr (if it is incore) - * Eventually the buffer must be released, possibly with the - * side effect of writing it out, by using one of - * bwrite - * bdwrite - * bawrite - * brelse - */ #ifdef FASTVAX #define notavail(bp) \ @@ -83,6 +100,9 @@ daddr_t blkno; bp = getblk(dev, blkno); if (bp->b_flags&B_DONE) { +#ifdef EPAWNJ + trace(TR_BREAD|TR_HIT, dev, blkno); +#endif #ifdef DISKMON io_info.ncache++; #endif @@ -91,6 +111,9 @@ daddr_t blkno; bp->b_flags |= B_READ; bp->b_bcount = BSIZE; (*bdevsw[major(dev)].d_strategy)(bp); +#ifdef EPAWNJ + trace(TR_BREAD|TR_MISS, dev, blkno); +#endif #ifdef DISKMON io_info.nread++; #endif @@ -117,20 +140,33 @@ daddr_t blkno, rablkno; bp->b_flags |= B_READ; bp->b_bcount = BSIZE; (*bdevsw[major(dev)].d_strategy)(bp); +#ifdef EPAWNJ + trace(TR_BREAD|TR_MISS, dev, blkno); +#endif #ifdef DISKMON io_info.nread++; #endif u.u_vm.vm_inblk++; /* pay for read */ } +#ifdef EPAWNJ + else + trace(TR_BREAD|TR_HIT, dev, blkno); +#endif } if (rablkno && !incore(dev, rablkno)) { rabp = getblk(dev, rablkno); - if (rabp->b_flags & B_DONE) + if (rabp->b_flags & B_DONE) { brelse(rabp); - else { +#ifdef EPAWNJ + trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno); +#endif + } else { rabp->b_flags |= B_READ|B_ASYNC; rabp->b_bcount = BSIZE; (*bdevsw[major(dev)].d_strategy)(rabp); +#ifdef EPAWNJ + trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock); +#endif #ifdef DISKMON io_info.nreada++; #endif @@ -160,6 +196,9 @@ register struct buf *bp; #endif if ((flag&B_DELWRI) == 0) u.u_vm.vm_oublk++; /* noone paid yet */ +#ifdef EPAWNJ + trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno)); +#endif (*bdevsw[major(bp->b_dev)].d_strategy)(bp); if ((flag&B_ASYNC) == 0) { iowait(bp); @@ -220,8 +259,10 @@ register struct buf *bp; bfreelist.b_flags &= ~B_WANTED; wakeup((caddr_t)&bfreelist); } - if (bp->b_flags&B_ERROR) + if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { + bunhash(bp); bp->b_dev = NODEV; /* no assoc. on error */ + } s = spl6(); if(bp->b_flags & (B_AGE|B_ERROR)) { backp = &bfreelist.av_forw; @@ -249,14 +290,14 @@ dev_t dev; daddr_t blkno; { register struct buf *bp; - register struct buf *dp; register int dblkno = fsbtodb(blkno); - dp = bdevsw[major(dev)].d_tab; - for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) - if (bp->b_blkno==dblkno && bp->b_dev==dev) - return(1); - return(0); + for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; + bp = &buf[bp->b_hlink]) + if (bp->b_blkno == dblkno && bp->b_dev == dev + && !(bp->b_flags & B_INVAL)) + return (1); + return (0); } struct buf * @@ -280,31 +321,26 @@ getblk(dev, blkno) dev_t dev; daddr_t blkno; { - register struct buf *bp; - register struct buf *dp; -#ifdef DISKMON - register i; -#endif - register int dblkno = fsbtodb(blkno); - - if(major(dev) >= nblkdev) - panic("blkdev"); + register struct buf *bp, *dp, *ep; + register int i, x, dblkno; + if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) + blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); + dblkno = fsbtodb(blkno); loop: - VOID spl0(); - dp = bdevsw[major(dev)].d_tab; - if(dp == NULL) - panic("devtab"); - for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) { - if (bp->b_blkno!=dblkno || bp->b_dev!=dev) + (void) spl0(); + for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; + bp = &buf[bp->b_hlink]) { + if (bp->b_blkno != dblkno || bp->b_dev != dev + || bp->b_flags & B_INVAL) continue; - VOID spl6(); + (void) spl6(); if (bp->b_flags&B_BUSY) { bp->b_flags |= B_WANTED; sleep((caddr_t)bp, PRIBIO+1); goto loop; } - VOID spl0(); + (void) spl0(); #ifdef DISKMON i = 0; dp = bp->av_forw; @@ -319,13 +355,18 @@ daddr_t blkno; bp->b_flags |= B_CACHE; return(bp); } - VOID spl6(); + if (major(dev) >= nblkdev) + panic("blkdev"); + dp = bdevsw[major(dev)].d_tab; + if (dp == NULL) + panic("devtab"); + (void) spl6(); if (bfreelist.av_forw == &bfreelist) { bfreelist.b_flags |= B_WANTED; sleep((caddr_t)&bfreelist, PRIBIO+1); goto loop; } - spl0(); + (void) spl0(); bp = bfreelist.av_forw; notavail(bp); if (bp->b_flags & B_DELWRI) { @@ -333,6 +374,29 @@ daddr_t blkno; bwrite(bp); goto loop; } + if (bp->b_dev == NODEV) + goto done; + /* INLINE EXPANSION OF bunhash(bp) */ +#ifdef EPAWNJ + trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); +#endif + (void) spl6(); + i = BUFHASH(dbtofsb(bp->b_blkno)); + x = bp - buf; + if (bufhash[i] == x) { + bufhash[i] = bp->b_hlink; + } else { + for (ep = &buf[bufhash[i]]; ep != &buf[-1]; + ep = &buf[ep->b_hlink]) + if (ep->b_hlink == x) { + ep->b_hlink = bp->b_hlink; + goto done; + } + panic("getblk"); + } +done: + (void) spl0(); + /* END INLINE EXPANSION */ bp->b_flags = B_BUSY; bp->b_back->b_forw = bp->b_forw; bp->b_forw->b_back = bp->b_back; @@ -342,6 +406,9 @@ daddr_t blkno; dp->b_forw = bp; bp->b_dev = dev; bp->b_blkno = dblkno; + i = BUFHASH(blkno); + bp->b_hlink = bufhash[i]; + bufhash[i] = bp - buf; return(bp); } @@ -352,16 +419,15 @@ daddr_t blkno; struct buf * geteblk() { - register struct buf *bp; - register struct buf *dp; + register struct buf *bp, *dp; loop: - VOID spl6(); + (void) spl6(); while (bfreelist.av_forw == &bfreelist) { bfreelist.b_flags |= B_WANTED; sleep((caddr_t)&bfreelist, PRIBIO+1); } - VOID spl0(); + (void) spl0(); dp = &bfreelist; bp = bfreelist.av_forw; notavail(bp); @@ -370,6 +436,12 @@ loop: bwrite(bp); goto loop; } + if (bp->b_dev != NODEV) { +#ifdef EPAWNJ + trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); +#endif + bunhash(bp); + } bp->b_flags = B_BUSY; bp->b_back->b_forw = bp->b_forw; bp->b_forw->b_back = bp->b_back; @@ -378,9 +450,36 @@ loop: dp->b_forw->b_back = bp; dp->b_forw = bp; bp->b_dev = (dev_t)NODEV; + bp->b_hlink = -1; return(bp); } +bunhash(bp) + register struct buf *bp; +{ + register struct buf *ep; + register int i, x, s; + + if (bp->b_dev == NODEV) + return; + s = spl6(); + i = BUFHASH(dbtofsb(bp->b_blkno)); + x = bp - buf; + if (bufhash[i] == x) { + bufhash[i] = bp->b_hlink; + goto ret; + } + for (ep = &buf[bufhash[i]]; ep != &buf[-1]; + ep = &buf[ep->b_hlink]) + if (ep->b_hlink == x) { + ep->b_hlink = bp->b_hlink; + goto ret; + } + panic("bunhash"); +ret: + splx(s); +} + /* * Wait for I/O completion on the buffer; return errors * to the user. @@ -389,10 +488,10 @@ iowait(bp) register struct buf *bp; { - VOID spl6(); + (void) spl6(); while ((bp->b_flags&B_DONE)==0) sleep((caddr_t)bp, PRIBIO); - VOID spl0(); + (void) spl0(); geterror(bp); } @@ -427,6 +526,8 @@ register struct buf *bp; { register int s; + if (bp->b_flags & B_DONE) + panic("dup iodone"); bp->b_flags |= B_DONE; if (bp->b_flags & B_DIRTY) { if (bp->b_flags & B_ERROR) @@ -440,6 +541,7 @@ register struct buf *bp; if (bswlist.b_flags & B_WANTED) wakeup((caddr_t)&proc[2]); splx(s); + return; } if (bp->b_flags&B_ASYNC) brelse(bp); @@ -491,14 +593,14 @@ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) int p2dp; register struct pte *dpte, *vpte; - VOID spl6(); + (void) spl6(); while (bswlist.av_forw == NULL) { bswlist.b_flags |= B_WANTED; sleep((caddr_t)&bswlist, PSWP+1); } bp = bswlist.av_forw; bswlist.av_forw = bp->av_forw; - VOID spl0(); + (void) spl0(); bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) @@ -524,20 +626,20 @@ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) bp->b_bcount = c; bp->b_blkno = dblkno; bp->b_dev = dev; - if (dev == swapdev) - bp->b_blkno += swplo; + if (flag & B_DIRTY) { + swpf[bp - swbuf] = pfcent; + swsize[bp - swbuf] = nbytes; + } (*bdevsw[major(dev)].d_strategy)(bp); if (flag & B_DIRTY) { if (c < nbytes) panic("big push"); - swsize[bp - swbuf] = nbytes; - swpf[bp - swbuf] = pfcent; return; } - VOID spl6(); + (void) spl6(); while((bp->b_flags&B_DONE)==0) sleep((caddr_t)bp, PSWP); - VOID spl0(); + (void) spl0(); bp->b_un.b_addr += c; bp->b_flags &= ~B_DONE; if (bp->b_flags & B_ERROR) { @@ -548,7 +650,7 @@ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) nbytes -= c; dblkno += btoc(c); } - VOID spl6(); + (void) spl6(); bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); bp->av_forw = bswlist.av_forw; bswlist.av_forw = bp; @@ -557,7 +659,7 @@ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) wakeup((caddr_t)&bswlist); wakeup((caddr_t)&proc[2]); } - VOID spl0(); + (void) spl0(); } /* @@ -581,7 +683,7 @@ swkill(p, rout) * done by user) after killing it so noone will try * to swap it out. */ - psignal(p, SIGKIL); + psignal(p, SIGKILL); p->p_flag |= SULOCK; } @@ -597,7 +699,7 @@ dev_t dev; register struct buf *bp; loop: - VOID spl6(); + (void) spl6(); for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { bp->b_flags |= B_ASYNC; @@ -606,7 +708,7 @@ loop: goto loop; } } - VOID spl0(); + (void) spl0(); } /* @@ -635,7 +737,7 @@ unsigned (*mincnt)(); u.u_error = EFAULT; return; } - VOID spl6(); + (void) spl6(); while (bp->b_flags&B_BUSY) { bp->b_flags |= B_WANTED; sleep((caddr_t)bp, PRIBIO+1); @@ -653,14 +755,14 @@ unsigned (*mincnt)(); u.u_procp->p_flag |= SPHYSIO; vslock(a = bp->b_un.b_addr, c); (*strat)(bp); - VOID spl6(); + (void) spl6(); while ((bp->b_flags&B_DONE) == 0) sleep((caddr_t)bp, PRIBIO); vsunlock(a, c, rw); u.u_procp->p_flag &= ~SPHYSIO; if (bp->b_flags&B_WANTED) wakeup((caddr_t)bp); - VOID spl0(); + (void) spl0(); bp->b_un.b_addr += c; u.u_count -= c; u.u_offset += c; @@ -694,3 +796,26 @@ register struct buf *bp; if ((u.u_error = bp->b_error)==0) u.u_error = EIO; } + +/* + * Invalidate in core blocks belonging to closed or umounted filesystem + * + * This is not nicely done at all - the buffer ought to be removed from the + * hash chains & have its dev/blkno fields clobbered, but unfortunately we + * can't do that here, as it is quite possible that the block is still + * being used for i/o. Eventually, all disc drivers should be forced to + * have a close routine, which ought ensure that the queue is empty, then + * properly flush the queues. Until that happy day, this suffices for + * correctness. ... kre + */ +binval(dev) +dev_t dev; +{ + register struct buf *bp, *dp; + + dp = bdevsw[major(dev)].d_tab; + + for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) + if (bp->b_dev == dev) + bp->b_flags |= B_INVAL; +}