X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/84baaab36073dd5d55807ec0c2f16fbd545b7219..1eebc1a70802d2bd87f4a95eba439778ff6803ae:/usr/src/sys/kern/vfs_cluster.c diff --git a/usr/src/sys/kern/vfs_cluster.c b/usr/src/sys/kern/vfs_cluster.c index 02c6db1047..4f74f8f9e1 100644 --- a/usr/src/sys/kern/vfs_cluster.c +++ b/usr/src/sys/kern/vfs_cluster.c @@ -1,39 +1,61 @@ -/* vfs_cluster.c 4.33 82/06/07 */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/dir.h" -#include "../h/user.h" -#include "../h/buf.h" -#include "../h/conf.h" -#include "../h/proc.h" -#include "../h/seg.h" -#include "../h/pte.h" -#include "../h/vm.h" -#include "../h/trace.h" +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + * + * @(#)vfs_cluster.c 7.3 (Berkeley) %G% + */ + +#include "../machine/pte.h" + +#include "param.h" +#include "systm.h" +#include "dir.h" +#include "user.h" +#include "buf.h" +#include "conf.h" +#include "proc.h" +#include "seg.h" +#include "vm.h" +#include "trace.h" /* * Read in (if necessary) the block and return a buffer pointer. */ struct buf * +#ifdef SECSIZE +bread(dev, blkno, size, secsize) +#else SECSIZE bread(dev, blkno, size) +#endif SECSIZE dev_t dev; daddr_t blkno; int size; +#ifdef SECSIZE + long secsize; +#endif SECSIZE { register struct buf *bp; + if (size == 0) + panic("bread: size 0"); +#ifdef SECSIZE + bp = getblk(dev, blkno, size, secsize); +#else SECSIZE bp = getblk(dev, blkno, size); - if (bp->b_flags&B_DONE) { - trace(TR_BREADHIT, dev, blkno); - return(bp); +#endif SECSIZE + if (bp->b_flags&(B_DONE|B_DELWRI)) { + trace(TR_BREADHIT, pack(dev, size), blkno); + return (bp); } bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("bread"); (*bdevsw[major(dev)].d_strategy)(bp); - trace(TR_BREADMISS, dev, blkno); - u.u_vm.vm_inblk++; /* pay for read */ + trace(TR_BREADMISS, pack(dev, size), blkno); + u.u_ru.ru_inblock++; /* pay for read */ biowait(bp); - return(bp); + return (bp); } /* @@ -41,10 +63,17 @@ bread(dev, blkno, size) * read-ahead block (which is not allocated to the caller) */ struct buf * -breada(dev, blkno, size, rablkno, rasize) +#ifdef SECSIZE +breada(dev, blkno, size, secsize, rablkno, rabsize) +#else SECSIZE +breada(dev, blkno, size, rablkno, rabsize) +#endif SECSIZE dev_t dev; daddr_t blkno; int size; - daddr_t rablkno; int rasize; +#ifdef SECSIZE + long secsize; +#endif SECSIZE + daddr_t rablkno; int rabsize; { register struct buf *bp, *rabp; @@ -55,14 +84,20 @@ breada(dev, blkno, size, rablkno, rasize) * for a cache hit). */ if (!incore(dev, blkno)) { +#ifdef SECSIZE + bp = getblk(dev, blkno, size, secsize); +#else SECSIZE bp = getblk(dev, blkno, size); - if ((bp->b_flags&B_DONE) == 0) { +#endif SECSIZE + if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) { bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("breada"); (*bdevsw[major(dev)].d_strategy)(bp); - trace(TR_BREADMISS, dev, blkno); - u.u_vm.vm_inblk++; /* pay for read */ + trace(TR_BREADMISS, pack(dev, size), blkno); + u.u_ru.ru_inblock++; /* pay for read */ } else - trace(TR_BREADHIT, dev, blkno); + trace(TR_BREADHIT, pack(dev, size), blkno); } /* @@ -70,15 +105,21 @@ breada(dev, blkno, size, rablkno, rasize) * on it also (as above). */ if (rablkno && !incore(dev, rablkno)) { - rabp = getblk(dev, rablkno, rasize); - if (rabp->b_flags & B_DONE) { +#ifdef SECSIZE + rabp = getblk(dev, rablkno, rabsize, secsize); +#else SECSIZE + rabp = getblk(dev, rablkno, rabsize); +#endif SECSIZE + if (rabp->b_flags & (B_DONE|B_DELWRI)) { brelse(rabp); - trace(TR_BREADHITRA, dev, blkno); + trace(TR_BREADHITRA, pack(dev, rabsize), blkno); } else { rabp->b_flags |= B_READ|B_ASYNC; + if (rabp->b_bcount > rabp->b_bufsize) + panic("breadrabp"); (*bdevsw[major(dev)].d_strategy)(rabp); - trace(TR_BREADMISSRA, dev, rablock); - u.u_vm.vm_inblk++; /* pay in advance */ + trace(TR_BREADMISSRA, pack(dev, rabsize), rablock); + u.u_ru.ru_inblock++; /* pay in advance */ } } @@ -88,7 +129,11 @@ breada(dev, blkno, size, rablkno, rasize) * above, and just wait for it. */ if (bp == NULL) +#ifdef SECSIZE + return (bread(dev, blkno, size, secsize)); +#else SECSIZE return (bread(dev, blkno, size)); +#endif SECSIZE biowait(bp); return (bp); } @@ -103,26 +148,24 @@ bwrite(bp) register flag; flag = bp->b_flags; - bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); + bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); if ((flag&B_DELWRI) == 0) - u.u_vm.vm_oublk++; /* noone paid yet */ - trace(TR_BWRITE, bp->b_dev, bp->b_blkno); + u.u_ru.ru_oublock++; /* noone paid yet */ + trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno); + if (bp->b_bcount > bp->b_bufsize) + panic("bwrite"); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); /* * If the write was synchronous, then await i/o completion. * If the write was "delayed", then we put the buffer on * the q of blocks awaiting i/o completion status. - * Otherwise, the i/o must be finished and we check for - * an error. */ if ((flag&B_ASYNC) == 0) { biowait(bp); brelse(bp); } else if (flag & B_DELWRI) bp->b_flags |= B_AGE; - else - geterror(bp); } /* @@ -136,12 +179,10 @@ bwrite(bp) bdwrite(bp) register struct buf *bp; { - register int flags; if ((bp->b_flags&B_DELWRI) == 0) - u.u_vm.vm_oublk++; /* noone paid yet */ - flags = bdevsw[major(bp->b_dev)].d_flags; - if(flags & B_TAPE) + u.u_ru.ru_oublock++; /* noone paid yet */ + if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) bawrite(bp); else { bp->b_flags |= B_DELWRI | B_DONE; @@ -169,6 +210,7 @@ brelse(bp) register struct buf *flist; register s; + trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); /* * If someone's waiting for the buffer, or * is waiting for a buffer wake 'em up. @@ -188,10 +230,14 @@ brelse(bp) /* * Stick the buffer back on a free list. */ - s = spl6(); - if (bp->b_flags & (B_ERROR|B_INVAL)) { + s = splbio(); + if (bp->b_bufsize <= 0) { + /* block has no buffer ... put at front of unused buffer list */ + flist = &bfreelist[BQ_EMPTY]; + binsheadfree(bp, flist); + } else if (bp->b_flags & (B_ERROR|B_INVAL)) { /* block has no info ... put at front of most free list */ - flist = &bfreelist[BQUEUES-1]; + flist = &bfreelist[BQ_AGE]; binsheadfree(bp, flist); } else { if (bp->b_flags & B_LOCKED) @@ -226,14 +272,25 @@ incore(dev, blkno) } struct buf * +#ifdef SECSIZE +baddr(dev, blkno, size, secsize) +#else SECSIZE baddr(dev, blkno, size) +#endif SECSIZE dev_t dev; daddr_t blkno; int size; +#ifdef SECSIZE + long secsize; +#endif SECSIZE { if (incore(dev, blkno)) +#ifdef SECSIZE + return (bread(dev, blkno, size, secsize)); +#else SECSIZE return (bread(dev, blkno, size)); +#endif SECSIZE return (0); } @@ -242,21 +299,45 @@ baddr(dev, blkno, size) * block is already associated, return it; otherwise search * for the oldest non-busy buffer and reassign it. * + * If we find the buffer, but it is dirty (marked DELWRI) and + * its size is changing, we must write it out first. When the + * buffer is shrinking, the write is done by brealloc to avoid + * losing the unwritten data. When the buffer is growing, the + * write is done by getblk, so that bread will not read stale + * disk data over the modified data in the buffer. + * * We use splx here because this routine may be called * on the interrupt stack during a dump, and we don't * want to lower the ipl back to 0. */ struct buf * +#ifdef SECSIZE +getblk(dev, blkno, size, secsize) +#else SECSIZE getblk(dev, blkno, size) +#endif SECSIZE dev_t dev; daddr_t blkno; int size; +#ifdef SECSIZE + long secsize; +#endif SECSIZE { - register struct buf *bp, *dp, *ep; + register struct buf *bp, *dp; int s; - if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) - blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); + if (size > MAXBSIZE) + panic("getblk: size too big"); + /* + * To prevent overflow of 32-bit ints when converting block + * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set + * to the maximum number that can be converted to a byte offset + * without overflow. This is historic code; what bug it fixed, + * or whether it is still a reasonable thing to do is open to + * dispute. mkm 9/85 + */ + if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) + blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1); /* * Search the cache for the block. If we hit, but * the buffer is in use for i/o, then we wait until @@ -268,7 +349,7 @@ loop: if (bp->b_blkno != blkno || bp->b_dev != dev || bp->b_flags&B_INVAL) continue; - s = spl6(); + s = splbio(); if (bp->b_flags&B_BUSY) { bp->b_flags |= B_WANTED; sleep((caddr_t)bp, PRIBIO+1); @@ -277,43 +358,35 @@ loop: } splx(s); notavail(bp); - brealloc(bp, size); + if (bp->b_bcount != size) { + if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) { + bp->b_flags &= ~B_ASYNC; + bwrite(bp); + goto loop; + } + if (brealloc(bp, size) == 0) + goto loop; + } + if (bp->b_bcount != size && brealloc(bp, size) == 0) + goto loop; bp->b_flags |= B_CACHE; - return(bp); + return (bp); } if (major(dev) >= nblkdev) panic("blkdev"); - /* - * Not found in the cache, select something from - * a free list. Preference is to LRU list, then AGE list. - */ - s = spl6(); - for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) - if (ep->av_forw != ep) - break; - if (ep == bfreelist) { /* no free blocks at all */ - ep->b_flags |= B_WANTED; - sleep((caddr_t)ep, PRIBIO+1); - splx(s); - goto loop; - } - splx(s); - bp = ep->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY; + bp = getnewbuf(); bfree(bp); bremhash(bp); binshash(bp, dp); bp->b_dev = dev; +#ifdef SECSIZE + bp->b_blksize = secsize; +#endif SECSIZE bp->b_blkno = blkno; - brealloc(bp, size); - return(bp); + bp->b_error = 0; + if (brealloc(bp, size) == 0) + goto loop; + return (bp); } /* @@ -324,39 +397,30 @@ struct buf * geteblk(size) int size; { - register struct buf *bp, *dp; - int s; + register struct buf *bp, *flist; + if (size > MAXBSIZE) + panic("geteblk: size too big"); loop: - s = spl6(); - for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) - if (dp->av_forw != dp) - break; - if (dp == bfreelist) { /* no free blocks */ - dp->b_flags |= B_WANTED; - sleep((caddr_t)dp, PRIBIO+1); - goto loop; - } - splx(s); - bp = dp->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY|B_INVAL; + bp = getnewbuf(); + bp->b_flags |= B_INVAL; bfree(bp); bremhash(bp); - binshash(bp, dp); + flist = &bfreelist[BQ_AGE]; + binshash(bp, flist); bp->b_dev = (dev_t)NODEV; - brealloc(bp, size); - return(bp); +#ifdef SECSIZE + bp->b_blksize = DEV_BSIZE; +#endif SECSIZE + bp->b_error = 0; + if (brealloc(bp, size) == 0) + goto loop; + return (bp); } /* * Allocate space associated with a buffer. + * If can't get space, buffer is released */ brealloc(bp, size) register struct buf *bp; @@ -368,69 +432,104 @@ brealloc(bp, size) int s; /* - * First need to make sure that all overlaping previous I/O + * First need to make sure that all overlapping previous I/O * is dispatched with. */ if (size == bp->b_bcount) - return; - if (size < bp->b_bcount || bp->b_dev == NODEV) - goto allocit; - - start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); - last = bp->b_blkno + (size / DEV_BSIZE) - 1; - if (bp->b_bcount == 0) { - start++; - if (start == last) - goto allocit; + return (1); + if (size < bp->b_bcount) { + if (bp->b_flags & B_DELWRI) { + bwrite(bp); + return (0); + } + if (bp->b_flags & B_LOCKED) + panic("brealloc"); + return (allocbuf(bp, size)); } + bp->b_flags &= ~B_DONE; + if (bp->b_dev == NODEV) + return (allocbuf(bp, size)); + + trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno); + /* + * Search cache for any buffers that overlap the one that we + * are trying to allocate. Overlapping buffers must be marked + * invalid, after being written out if they are dirty. (indicated + * by B_DELWRI) A disk block must be mapped by at most one buffer + * at any point in time. Care must be taken to avoid deadlocking + * when two buffer are trying to get the same set of disk blocks. + */ + start = bp->b_blkno; +#ifdef SECSIZE + last = start + size/bp->b_blksize - 1; +#else SECSIZE + last = start + btodb(size) - 1; +#endif SECSIZE dp = BUFHASH(bp->b_dev, bp->b_blkno); loop: for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { - if (ep->b_blkno < start || ep->b_blkno > last || - ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) + if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) continue; - s = spl6(); + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || +#ifdef SECSIZE + ep->b_blkno + ep->b_bcount/ep->b_blksize <= start) +#else SECSIZE + ep->b_blkno + btodb(ep->b_bcount) <= start) +#endif SECSIZE + continue; + s = splbio(); if (ep->b_flags&B_BUSY) { ep->b_flags |= B_WANTED; sleep((caddr_t)ep, PRIBIO+1); - (void) splx(s); + splx(s); goto loop; } - (void) splx(s); - /* - * What we would really like to do is kill this - * I/O since it is now useless. We cannot do that - * so we force it to complete, so that it cannot - * over-write our useful data later. - */ + splx(s); + notavail(ep); if (ep->b_flags & B_DELWRI) { - notavail(ep); - ep->b_flags |= B_ASYNC; bwrite(ep); goto loop; } + ep->b_flags |= B_INVAL; + brelse(ep); } -allocit: - /* - * Here the buffer is already available, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. - */ - bp->b_bcount = size; + return (allocbuf(bp, size)); } /* - * Release space associated with a buffer. + * Find a buffer which is available for use. + * Select something from a free list. + * Preference is to AGE list, then LRU list. */ -bfree(bp) - struct buf *bp; +struct buf * +getnewbuf() { - /* - * Here the buffer does not change, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. - */ - bp->b_bcount = 0; + register struct buf *bp, *dp; + int s; + +loop: + s = splbio(); + for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) + if (dp->av_forw != dp) + break; + if (dp == bfreelist) { /* no free blocks */ + dp->b_flags |= B_WANTED; + sleep((caddr_t)dp, PRIBIO+1); + splx(s); + goto loop; + } + splx(s); + bp = dp->av_forw; + notavail(bp); + if (bp->b_flags & B_DELWRI) { + bp->b_flags |= B_ASYNC; + bwrite(bp); + goto loop; + } + trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); + bp->b_flags = B_BUSY; + return (bp); } /* @@ -442,42 +541,30 @@ biowait(bp) { int s; - s = spl6(); + s = splbio(); while ((bp->b_flags&B_DONE)==0) sleep((caddr_t)bp, PRIBIO); splx(s); - geterror(bp); + if (u.u_error == 0) /* XXX */ + u.u_error = geterror(bp); } /* - * Mark I/O complete on a buffer. If the header - * indicates a dirty page push completion, the - * header is inserted into the ``cleaned'' list - * to be processed by the pageout daemon. Otherwise - * release it if I/O is asynchronous, and wake - * up anyone waiting for it. + * Mark I/O complete on a buffer. + * If someone should be called, e.g. the pageout + * daemon, do so. Otherwise, wake up anyone + * waiting for it. */ biodone(bp) register struct buf *bp; { - register int s; if (bp->b_flags & B_DONE) panic("dup biodone"); bp->b_flags |= B_DONE; - if (bp->b_flags & B_DIRTY) { - if (bp->b_flags & B_ERROR) - panic("IO err in push"); - s = spl6(); - bp->av_forw = bclnlist; - bp->b_bcount = swsize[bp - swbuf]; - bp->b_pfcent = swpf[bp - swbuf]; - cnt.v_pgout++; - cnt.v_pgpgout += bp->b_bcount / NBPG; - bclnlist = bp; - if (bswlist.b_flags & B_WANTED) - wakeup((caddr_t)&proc[2]); - splx(s); + if (bp->b_flags & B_CALL) { + bp->b_flags &= ~B_CALL; + (*bp->b_iodone)(bp); return; } if (bp->b_flags&B_ASYNC) @@ -489,11 +576,69 @@ biodone(bp) } /* - * make sure all write-behind blocks + * Insure that no part of a specified block is in an incore buffer. +#ifdef SECSIZE + * "size" is given in device blocks (the units of b_blkno). +#endif SECSIZE +#ifdef SECSIZE + * "size" is given in device blocks (the units of b_blkno). +#endif SECSIZE + */ +blkflush(dev, blkno, size) + dev_t dev; + daddr_t blkno; +#ifdef SECSIZE + int size; +#else SECSIZE + long size; +#endif SECSIZE +{ + register struct buf *ep; + struct buf *dp; + daddr_t start, last; + int s; + + start = blkno; +#ifdef SECSIZE + last = start + size - 1; +#else SECSIZE + last = start + btodb(size) - 1; +#endif SECSIZE + dp = BUFHASH(dev, blkno); +loop: + for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { + if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || +#ifdef SECSIZE + ep->b_blkno + ep->b_bcount / ep->b_blksize <= start) +#else SECSIZE + ep->b_blkno + btodb(ep->b_bcount) <= start) +#endif SECSIZE + continue; + s = splbio(); + if (ep->b_flags&B_BUSY) { + ep->b_flags |= B_WANTED; + sleep((caddr_t)ep, PRIBIO+1); + splx(s); + goto loop; + } + if (ep->b_flags & B_DELWRI) { + splx(s); + notavail(ep); + bwrite(ep); + goto loop; + } + splx(s); + } +} + +/* + * Make sure all write-behind blocks * on dev (or NODEV for all) * are flushed out. * (from umount and update) - * (and temporarily pagein) */ bflush(dev) dev_t dev; @@ -503,8 +648,8 @@ bflush(dev) int s; loop: - s = spl6(); - for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) + s = splbio(); + for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { if ((bp->b_flags & B_DELWRI) == 0) continue; @@ -512,6 +657,7 @@ loop: bp->b_flags |= B_ASYNC; notavail(bp); bwrite(bp); + splx(s); goto loop; } } @@ -520,17 +666,17 @@ loop: /* * Pick up the device's error number and pass it to the user; - * if there is an error but the number is 0 set a generalized - * code. Actually the latter is always true because devices - * don't yet return specific errors. + * if there is an error but the number is 0 set a generalized code. */ geterror(bp) register struct buf *bp; { + int error = 0; if (bp->b_flags&B_ERROR) - if ((u.u_error = bp->b_error)==0) - u.u_error = EIO; + if ((error = bp->b_error)==0) + return (EIO); + return (error); } /*