X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/60a71525b34d4b9e3cfd368765a0c4878da3410e..d301d1501bc31355d0b202371a7c87f097d9ed8f:/usr/src/sys/kern/kern_physio.c?ds=inline diff --git a/usr/src/sys/kern/kern_physio.c b/usr/src/sys/kern/kern_physio.c index 2809b94942..79e2e03d24 100644 --- a/usr/src/sys/kern/kern_physio.c +++ b/usr/src/sys/kern/kern_physio.c @@ -1,68 +1,30 @@ -/* kern_physio.c 4.12 %G% */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/dir.h" -#include "../h/user.h" -#include "../h/buf.h" -#include "../h/conf.h" -#include "../h/proc.h" -#include "../h/seg.h" -#include "../h/pte.h" -#include "../h/vm.h" -#include "../h/trace.h" - /* - * The following several routines allocate and free - * buffers with various side effects. In general the - * arguments to an allocate routine are a device and - * a block number, and the value is a pointer to - * to the buffer header; the buffer is marked "busy" - * so that no one else can touch it. If the block was - * already in core, no I/O need be done; if it is - * already busy, the process waits until it becomes free. - * The following routines allocate a buffer: - * getblk - * bread - * breada - * baddr (if it is incore) - * Eventually the buffer must be released, possibly with the - * side effect of writing it out, by using one of - * bwrite - * bdwrite - * bawrite - * brelse + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + * + * @(#)kern_physio.c 7.5 (Berkeley) %G% */ -#define BUFHSZ 63 -struct bufhd bufhash[BUFHSZ]; -#define BUFHASH(dev, dblkno) \ - ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ]) - -/* - * Initialize hash links for buffers. - */ -bhinit() -{ - register int i; - register struct bufhd *bp; - - for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) - bp->b_forw = bp->b_back = (struct buf *)bp; -} - -/* #define DISKMON 1 */ - -#ifdef DISKMON -struct { - int nbuf; - long nread; - long nreada; - long ncache; - long nwrite; - long bufcount[NBUF]; -} io_info; -#endif +#include "param.h" +#include "systm.h" +#include "dir.h" +#include "user.h" +#include "buf.h" +#include "conf.h" +#include "proc.h" +#include "seg.h" +#include "vm.h" +#include "trace.h" +#include "map.h" +#include "uio.h" + +#include "machine/pte.h" +#ifdef SECSIZE +#include "file.h" +#include "ioctl.h" +#include "disklabel.h" +#endif SECSIZE /* * Swap IO headers - @@ -74,462 +36,7 @@ struct { * page push, when the I/O completes, it is inserted * in a list of cleaned pages to be processed by the pageout daemon. */ -struct buf swbuf[NSWBUF]; -short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ -int swpf[NSWBUF]; - - -#ifdef FASTVAX -#define notavail(bp) \ -{ \ - int s = spl6(); \ - (bp)->av_back->av_forw = (bp)->av_forw; \ - (bp)->av_forw->av_back = (bp)->av_back; \ - (bp)->b_flags |= B_BUSY; \ - splx(s); \ -} -#endif - -/* - * Read in (if necessary) the block and return a buffer pointer. - */ -struct buf * -bread(dev, blkno) -dev_t dev; -daddr_t blkno; -{ - register struct buf *bp; - - bp = getblk(dev, blkno); - if (bp->b_flags&B_DONE) { -#ifdef EPAWNJ - trace(TR_BREAD|TR_HIT, dev, blkno); -#endif -#ifdef DISKMON - io_info.ncache++; -#endif - return(bp); - } - bp->b_flags |= B_READ; - bp->b_bcount = BSIZE; - (*bdevsw[major(dev)].d_strategy)(bp); -#ifdef EPAWNJ - trace(TR_BREAD|TR_MISS, dev, blkno); -#endif -#ifdef DISKMON - io_info.nread++; -#endif - u.u_vm.vm_inblk++; /* pay for read */ - iowait(bp); - return(bp); -} - -/* - * Read in the block, like bread, but also start I/O on the - * read-ahead block (which is not allocated to the caller) - */ -struct buf * -breada(dev, blkno, rablkno) -dev_t dev; -daddr_t blkno, rablkno; -{ - register struct buf *bp, *rabp; - - bp = NULL; - if (!incore(dev, blkno)) { - bp = getblk(dev, blkno); - if ((bp->b_flags&B_DONE) == 0) { - bp->b_flags |= B_READ; - bp->b_bcount = BSIZE; - (*bdevsw[major(dev)].d_strategy)(bp); -#ifdef EPAWNJ - trace(TR_BREAD|TR_MISS, dev, blkno); -#endif -#ifdef DISKMON - io_info.nread++; -#endif - u.u_vm.vm_inblk++; /* pay for read */ - } -#ifdef EPAWNJ - else - trace(TR_BREAD|TR_HIT, dev, blkno); -#endif - } - if (rablkno && !incore(dev, rablkno)) { - rabp = getblk(dev, rablkno); - if (rabp->b_flags & B_DONE) { - brelse(rabp); -#ifdef EPAWNJ - trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno); -#endif - } else { - rabp->b_flags |= B_READ|B_ASYNC; - rabp->b_bcount = BSIZE; - (*bdevsw[major(dev)].d_strategy)(rabp); -#ifdef EPAWNJ - trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock); -#endif -#ifdef DISKMON - io_info.nreada++; -#endif - u.u_vm.vm_inblk++; /* pay in advance */ - } - } - if(bp == NULL) - return(bread(dev, blkno)); - iowait(bp); - return(bp); -} - -/* - * Write the buffer, waiting for completion. - * Then release the buffer. - */ -bwrite(bp) -register struct buf *bp; -{ - register flag; - - flag = bp->b_flags; - bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); - bp->b_bcount = BSIZE; -#ifdef DISKMON - io_info.nwrite++; -#endif - if ((flag&B_DELWRI) == 0) - u.u_vm.vm_oublk++; /* noone paid yet */ -#ifdef EPAWNJ - trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno)); -#endif - (*bdevsw[major(bp->b_dev)].d_strategy)(bp); - if ((flag&B_ASYNC) == 0) { - iowait(bp); - brelse(bp); - } else if (flag & B_DELWRI) - bp->b_flags |= B_AGE; - else - geterror(bp); -} - -/* - * Release the buffer, marking it so that if it is grabbed - * for another purpose it will be written out before being - * given up (e.g. when writing a partial block where it is - * assumed that another write for the same block will soon follow). - * This can't be done for magtape, since writes must be done - * in the same order as requested. - */ -bdwrite(bp) -register struct buf *bp; -{ - register int flags; - - if ((bp->b_flags&B_DELWRI) == 0) - u.u_vm.vm_oublk++; /* noone paid yet */ - flags = bdevsw[major(bp->b_dev)].d_flags; - if(flags & B_TAPE) - bawrite(bp); - else { - bp->b_flags |= B_DELWRI | B_DONE; - brelse(bp); - } -} - -/* - * Release the buffer, start I/O on it, but don't wait for completion. - */ -bawrite(bp) -register struct buf *bp; -{ - - bp->b_flags |= B_ASYNC; - bwrite(bp); -} - -/* - * release the buffer, with no I/O implied. - */ -brelse(bp) -register struct buf *bp; -{ - register struct buf *flist; - register s; - - if (bp->b_flags&B_WANTED) - wakeup((caddr_t)bp); - if (bfreelist[0].b_flags&B_WANTED) { - bfreelist[0].b_flags &= ~B_WANTED; - wakeup((caddr_t)bfreelist); - } - if (bp->b_flags&B_ERROR) - if (bp->b_flags & B_LOCKED) - bp->b_flags &= ~B_ERROR; /* try again later */ - else - bp->b_dev = NODEV; /* no assoc */ - s = spl6(); - if (bp->b_flags & (B_ERROR|B_INVAL)) { - /* block has no info ... put at front of most free list */ - flist = &bfreelist[BQUEUES-1]; - flist->av_forw->av_back = bp; - bp->av_forw = flist->av_forw; - flist->av_forw = bp; - bp->av_back = flist; - } else { - if (bp->b_flags & B_LOCKED) - flist = &bfreelist[BQ_LOCKED]; - else if (bp->b_flags & B_AGE) - flist = &bfreelist[BQ_AGE]; - else - flist = &bfreelist[BQ_LRU]; - flist->av_back->av_forw = bp; - bp->av_back = flist->av_back; - flist->av_back = bp; - bp->av_forw = flist; - } - bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); - splx(s); -} - -/* - * See if the block is associated with some buffer - * (mainly to avoid getting hung up on a wait in breada) - */ -incore(dev, blkno) -dev_t dev; -daddr_t blkno; -{ - register struct buf *bp; - register struct buf *dp; - register int dblkno = fsbtodb(blkno); - - dp = BUFHASH(dev, dblkno); - for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) - if (bp->b_blkno == dblkno && bp->b_dev == dev && - !(bp->b_flags & B_INVAL)) - return (1); - return (0); -} - -struct buf * -baddr(dev, blkno) -dev_t dev; -daddr_t blkno; -{ - - if (incore(dev, blkno)) - return (bread(dev, blkno)); - return (0); -} - -/* - * Assign a buffer for the given block. If the appropriate - * block is already associated, return it; otherwise search - * for the oldest non-busy buffer and reassign it. - */ -struct buf * -getblk(dev, blkno) -dev_t dev; -daddr_t blkno; -{ - register struct buf *bp, *dp, *ep; - register int dblkno = fsbtodb(blkno); -#ifdef DISKMON - register int i; -#endif - - if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) - blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); - dblkno = fsbtodb(blkno); - dp = BUFHASH(dev, dblkno); - loop: - (void) spl0(); - for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { - if (bp->b_blkno != dblkno || bp->b_dev != dev || - bp->b_flags&B_INVAL) - continue; - (void) spl6(); - if (bp->b_flags&B_BUSY) { - bp->b_flags |= B_WANTED; - sleep((caddr_t)bp, PRIBIO+1); - goto loop; - } - (void) spl0(); -#ifdef DISKMON - i = 0; - dp = bp->av_forw; - while ((dp->b_flags & B_HEAD) == 0) { - i++; - dp = dp->av_forw; - } - if (ib_flags |= B_CACHE; - return(bp); - } - if (major(dev) >= nblkdev) - panic("blkdev"); - (void) spl6(); - for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) - if (ep->av_forw != ep) - break; - if (ep == bfreelist) { /* no free blocks at all */ - ep->b_flags |= B_WANTED; - sleep((caddr_t)ep, PRIBIO+1); - goto loop; - } - (void) spl0(); - bp = ep->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } -#ifdef EPAWNJ - trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); -#endif - bp->b_flags = B_BUSY; - bp->b_back->b_forw = bp->b_forw; - bp->b_forw->b_back = bp->b_back; - bp->b_forw = dp->b_forw; - bp->b_back = dp; - dp->b_forw->b_back = bp; - dp->b_forw = bp; - bp->b_dev = dev; - bp->b_blkno = dblkno; - return(bp); -} - -/* - * get an empty block, - * not assigned to any particular device - */ -struct buf * -geteblk() -{ - register struct buf *bp, *dp; - -loop: - (void) spl6(); - for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) - if (dp->av_forw != dp) - break; - if (dp == bfreelist) { /* no free blocks */ - dp->b_flags |= B_WANTED; - sleep((caddr_t)dp, PRIBIO+1); - goto loop; - } - (void) spl0(); - bp = dp->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } -#ifdef EPAWNJ - trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); -#endif - bp->b_flags = B_BUSY|B_INVAL; - bp->b_back->b_forw = bp->b_forw; - bp->b_forw->b_back = bp->b_back; - bp->b_forw = dp->b_forw; - bp->b_back = dp; - dp->b_forw->b_back = bp; - dp->b_forw = bp; - bp->b_dev = (dev_t)NODEV; - return(bp); -} - -/* - * Wait for I/O completion on the buffer; return errors - * to the user. - */ -iowait(bp) -register struct buf *bp; -{ - - (void) spl6(); - while ((bp->b_flags&B_DONE)==0) - sleep((caddr_t)bp, PRIBIO); - (void) spl0(); - geterror(bp); -} - -#ifndef FASTVAX -/* - * Unlink a buffer from the available list and mark it busy. - * (internal interface) - */ -notavail(bp) -register struct buf *bp; -{ - register s; - - s = spl6(); - bp->av_back->av_forw = bp->av_forw; - bp->av_forw->av_back = bp->av_back; - bp->b_flags |= B_BUSY; - splx(s); -} -#endif - -/* - * Mark I/O complete on a buffer. If the header - * indicates a dirty page push completion, the - * header is inserted into the ``cleaned'' list - * to be processed by the pageout daemon. Otherwise - * release it if I/O is asynchronous, and wake - * up anyone waiting for it. - */ -iodone(bp) -register struct buf *bp; -{ - register int s; - - if (bp->b_flags & B_DONE) - panic("dup iodone"); - bp->b_flags |= B_DONE; - if (bp->b_flags & B_DIRTY) { - if (bp->b_flags & B_ERROR) - panic("IO err in push"); - s = spl6(); - cnt.v_pgout++; - bp->av_forw = bclnlist; - bp->b_bcount = swsize[bp - swbuf]; - bp->b_pfcent = swpf[bp - swbuf]; - bclnlist = bp; - if (bswlist.b_flags & B_WANTED) - wakeup((caddr_t)&proc[2]); - splx(s); - return; - } - if (bp->b_flags&B_ASYNC) - brelse(bp); - else { - bp->b_flags &= ~B_WANTED; - wakeup((caddr_t)bp); - } -} - -/* - * Zero the core associated with a buffer. - */ -clrbuf(bp) -struct buf *bp; -{ - register *p; - register c; - - p = bp->b_un.b_words; - c = BSIZE/sizeof(int); - do - *p++ = 0; - while (--c); - bp->b_resid = 0; -} +struct buf *swbuf; /* * swap I/O - @@ -540,32 +47,29 @@ struct buf *bp; * the index of the swap header that has been allocated. * We simply initialize the header and queue the I/O but * do not wait for completion. When the I/O completes, - * iodone() will link the header to a list of cleaned + * biodone() will link the header to a list of cleaned * pages to be processed by the pageout daemon. */ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) struct proc *p; swblk_t dblkno; caddr_t addr; - int flag, nbytes; + int nbytes, rdflg, flag; dev_t dev; - unsigned pfcent; + u_int pfcent; { register struct buf *bp; - register int c; - int p2dp; register struct pte *dpte, *vpte; + register u_int c; + int p2dp, s, error = 0; + struct buf *getswbuf(); + int swdone(); - (void) spl6(); - while (bswlist.av_forw == NULL) { - bswlist.b_flags |= B_WANTED; - sleep((caddr_t)&bswlist, PSWP+1); - } - bp = bswlist.av_forw; - bswlist.av_forw = bp->av_forw; - (void) spl0(); - + bp = getswbuf(PSWP+1); bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; +#ifdef SECSIZE + bp->b_blksize = DEV_BSIZE; +#endif SECSIZE if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) if (rdflg == B_READ) sum.v_pswpin += btoc(nbytes); @@ -581,48 +85,67 @@ swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) panic("swap bad pte"); *dpte++ = *vpte++; } - bp->b_un.b_addr = (caddr_t)ctob(p2dp); + bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp)); + bp->b_flags |= B_CALL; + bp->b_iodone = swdone; + bp->b_pfcent = pfcent; } else bp->b_un.b_addr = addr; while (nbytes > 0) { - c = imin(ctob(120), nbytes); - bp->b_bcount = c; + bp->b_bcount = nbytes; + minphys(bp); + c = bp->b_bcount; bp->b_blkno = dblkno; bp->b_dev = dev; - if (flag & B_DIRTY) { - swpf[bp - swbuf] = pfcent; - swsize[bp - swbuf] = nbytes; - } +#ifdef TRACE + trace(TR_SWAPIO, dev, bp->b_blkno); +#endif (*bdevsw[major(dev)].d_strategy)(bp); + /* pageout daemon doesn't wait for pushed pages */ if (flag & B_DIRTY) { if (c < nbytes) panic("big push"); - return; + return (0); } - (void) spl6(); - while((bp->b_flags&B_DONE)==0) - sleep((caddr_t)bp, PSWP); - (void) spl0(); bp->b_un.b_addr += c; bp->b_flags &= ~B_DONE; if (bp->b_flags & B_ERROR) { if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) panic("hard IO err in swap"); - swkill(p, (char *)0); + swkill(p, "swap: read error from swap device"); + error = EIO; } nbytes -= c; - dblkno += btoc(c); +#ifdef SECSIZE + if (flag & B_PGIN && nbytes > 0) + panic("big pgin"); +#endif SECSIZE + dblkno += btodb(c); } - (void) spl6(); bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); - bp->av_forw = bswlist.av_forw; - bswlist.av_forw = bp; - if (bswlist.b_flags & B_WANTED) { - bswlist.b_flags &= ~B_WANTED; - wakeup((caddr_t)&bswlist); + freeswbuf(bp); + return (error); +} + +/* + * Put a buffer on the clean list after I/O is done. + * Called from biodone. + */ +swdone(bp) + register struct buf *bp; +{ + register int s; + + if (bp->b_flags & B_ERROR) + panic("IO err in push"); + s = splbio(); + bp->av_forw = bclnlist; + cnt.v_pgout++; + cnt.v_pgpgout += bp->b_bcount / NBPG; + bclnlist = bp; + if (bswlist.b_flags & B_WANTED) wakeup((caddr_t)&proc[2]); - } - (void) spl0(); + splx(s); } /* @@ -635,11 +158,8 @@ swkill(p, rout) char *rout; { - printf("%d: ", p->p_pid); - if (rout) - printf("out of swap space in %s\n", rout); - else - printf("killed on swap error\n"); + printf("pid %d: %s\n", p->p_pid, rout); + uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout); /* * To be sure no looping (e.g. in vmsched trying to * swap out) mark process locked in core (as though @@ -650,37 +170,12 @@ swkill(p, rout) p->p_flag |= SULOCK; } -/* - * make sure all write-behind blocks - * on dev (or NODEV for all) - * are flushed out. - * (from umount and update) - */ -bflush(dev) -dev_t dev; -{ - register struct buf *bp; - register struct buf *flist; - -loop: - (void) spl6(); - for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) - for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { - if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { - bp->b_flags |= B_ASYNC; - notavail(bp); - bwrite(bp); - goto loop; - } - } - (void) spl0(); -} - /* * Raw I/O. The arguments are * The strategy routine for the device - * A buffer, which will always be a special buffer - * header owned exclusively by the device for this purpose + * A buffer, which will either be a special buffer header owned + * exclusively by the device for this purpose, or NULL, + * indicating that we should use a swap buffer * The device number * Read/write flag * Essentially all the work is computing physical addresses and @@ -690,98 +185,156 @@ loop: * faulted and locked. After the completion of the I/O, the above pages * are unlocked. */ -physio(strat, bp, dev, rw, mincnt) -int (*strat)(); -register struct buf *bp; -unsigned (*mincnt)(); +physio(strat, bp, dev, rw, mincnt, uio) + int (*strat)(); + register struct buf *bp; + dev_t dev; + int rw; + u_int (*mincnt)(); + struct uio *uio; { + register struct iovec *iov; register int c; char *a; - - if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { - u.u_error = EFAULT; - return; - } - (void) spl6(); - while (bp->b_flags&B_BUSY) { - bp->b_flags |= B_WANTED; - sleep((caddr_t)bp, PRIBIO+1); - } - bp->b_error = 0; - bp->b_proc = u.u_procp; - bp->b_un.b_addr = u.u_base; - while (u.u_count != 0 && bp->b_error==0) { - bp->b_flags = B_BUSY | B_PHYS | rw; - bp->b_dev = dev; - bp->b_blkno = u.u_offset >> PGSHIFT; - bp->b_bcount = u.u_count; - (*mincnt)(bp); - c = bp->b_bcount; - u.u_procp->p_flag |= SPHYSIO; - vslock(a = bp->b_un.b_addr, c); - (*strat)(bp); - (void) spl6(); - while ((bp->b_flags&B_DONE) == 0) - sleep((caddr_t)bp, PRIBIO); - vsunlock(a, c, rw); - u.u_procp->p_flag &= ~SPHYSIO; - if (bp->b_flags&B_WANTED) - wakeup((caddr_t)bp); - (void) spl0(); - bp->b_un.b_addr += c; - u.u_count -= c; - u.u_offset += c; + int s, allocbuf = 0, error = 0; + struct buf *getswbuf(); +#ifdef SECSIZE + int bsize; + struct partinfo dpart; +#endif SECSIZE + +#ifdef SECSIZE + if ((unsigned)major(dev) < nchrdev && + (*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart, + FREAD) == 0) + bsize = dpart.disklab->d_secsize; + else + bsize = DEV_BSIZE; +#endif SECSIZE + for (;;) { + if (uio->uio_iovcnt == 0) + return (0); + iov = uio->uio_iov; + if (useracc(iov->iov_base, (u_int)iov->iov_len, + rw==B_READ? B_WRITE : B_READ) == NULL) + return (EFAULT); + s = splbio(); + while (bp->b_flags&B_BUSY) { + bp->b_flags |= B_WANTED; + sleep((caddr_t)bp, PRIBIO+1); + } + if (!allocbuf) { /* only if sharing caller's buffer */ + s = splbio(); + while (bp->b_flags&B_BUSY) { + bp->b_flags |= B_WANTED; + sleep((caddr_t)bp, PRIBIO+1); + } + splx(s); + } + bp->b_error = 0; + bp->b_proc = u.u_procp; +#ifdef SECSIZE + bp->b_blksize = bsize; +#endif SECSIZE + bp->b_un.b_addr = iov->iov_base; + while (iov->iov_len > 0) { + bp->b_flags = B_BUSY | B_PHYS | rw; + bp->b_dev = dev; +#ifdef SECSIZE + bp->b_blkno = uio->uio_offset / bsize; +#else SECSIZE + bp->b_blkno = btodb(uio->uio_offset); +#endif SECSIZE + bp->b_bcount = iov->iov_len; + (*mincnt)(bp); + c = bp->b_bcount; + u.u_procp->p_flag |= SPHYSIO; + vslock(a = bp->b_un.b_addr, c); + physstrat(bp, strat, PRIBIO); + (void) splbio(); + vsunlock(a, c, rw); + u.u_procp->p_flag &= ~SPHYSIO; + if (bp->b_flags&B_WANTED) + wakeup((caddr_t)bp); + splx(s); + c -= bp->b_resid; + bp->b_un.b_addr += c; + iov->iov_len -= c; + uio->uio_resid -= c; + uio->uio_offset += c; + /* temp kludge for tape drives */ + if (bp->b_resid || (bp->b_flags&B_ERROR)) + break; + } + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); + error = geterror(bp); + /* temp kludge for tape drives */ + if (bp->b_resid || error) + return (error); + uio->uio_iov++; + uio->uio_iovcnt--; } - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); - u.u_count = bp->b_resid; - geterror(bp); + if (allocbuf) + freeswbuf(bp); + return (error); } -/*ARGSUSED*/ -unsigned +u_int minphys(bp) -struct buf *bp; + struct buf *bp; { + if (bp->b_bcount > MAXPHYS) + bp->b_bcount = MAXPHYS; +} + +static +struct buf * +getswbuf(prio) + int prio; +{ + int s; + struct buf *bp; - if (bp->b_bcount > 60 * 1024) - bp->b_bcount = 60 * 1024; + s = splbio(); + while (bswlist.av_forw == NULL) { + bswlist.b_flags |= B_WANTED; + sleep((caddr_t)&bswlist, prio); + } + bp = bswlist.av_forw; + bswlist.av_forw = bp->av_forw; + splx(s); + return (bp); } -/* - * Pick up the device's error number and pass it to the user; - * if there is an error but the number is 0 set a generalized - * code. Actually the latter is always true because devices - * don't yet return specific errors. - */ -geterror(bp) -register struct buf *bp; +static +freeswbuf(bp) + struct buf *bp; { + int s; - if (bp->b_flags&B_ERROR) - if ((u.u_error = bp->b_error)==0) - u.u_error = EIO; + s = splbio(); + bp->av_forw = bswlist.av_forw; + bswlist.av_forw = bp; + if (bswlist.b_flags & B_WANTED) { + bswlist.b_flags &= ~B_WANTED; + wakeup((caddr_t)&bswlist); + wakeup((caddr_t)&proc[2]); + } + splx(s); } -/* - * Invalidate in core blocks belonging to closed or umounted filesystem - * - * This is not nicely done at all - the buffer ought to be removed from the - * hash chains & have its dev/blkno fields clobbered, but unfortunately we - * can't do that here, as it is quite possible that the block is still - * being used for i/o. Eventually, all disc drivers should be forced to - * have a close routine, which ought ensure that the queue is empty, then - * properly flush the queues. Until that happy day, this suffices for - * correctness. ... kre - */ -binval(dev) -dev_t dev; +rawread(dev, uio) + dev_t dev; + struct uio *uio; { - register struct buf *bp; - register struct bufhd *hp; -#define dp ((struct buf *)hp) + return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, + dev, B_READ, minphys, uio)); +} - for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) - for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) - if (bp->b_dev == dev) - bp->b_flags |= B_INVAL; +rawwrite(dev, uio) + dev_t dev; + struct uio *uio; +{ + return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, + dev, B_WRITE, minphys, uio)); }