merge in vnodes
[unix-history] / usr / src / sys / kern / kern_physio.c
/*
* Copyright (c) 1982, 1986 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*
* @(#)kern_physio.c 7.6 (Berkeley) %G%
*/
#include "param.h"
#include "systm.h"
#include "user.h"
#include "buf.h"
#include "conf.h"
#include "proc.h"
#include "seg.h"
#include "vm.h"
#include "trace.h"
#include "map.h"
#include "vnode.h"
#include "machine/pte.h"
#ifdef SECSIZE
#include "file.h"
#include "ioctl.h"
#include "disklabel.h"
#endif SECSIZE
/*
* Swap IO headers -
* They contain the necessary information for the swap I/O.
* At any given time, a swap header can be in three
* different lists. When free it is in the free list,
* when allocated and the I/O queued, it is on the swap
* device list, and finally, if the operation was a dirty
* page push, when the I/O completes, it is inserted
* in a list of cleaned pages to be processed by the pageout daemon.
*/
struct buf *swbuf;
/*
* swap I/O -
*
* If the flag indicates a dirty page push initiated
* by the pageout daemon, we map the page into the i th
* virtual page of process 2 (the daemon itself) where i is
* the index of the swap header that has been allocated.
* We simply initialize the header and queue the I/O but
* do not wait for completion. When the I/O completes,
* biodone() will link the header to a list of cleaned
* pages to be processed by the pageout daemon.
*/
swap(p, dblkno, addr, nbytes, rdflg, flag, vp, pfcent)
struct proc *p;
swblk_t dblkno;
caddr_t addr;
int nbytes, rdflg, flag;
struct vnode *vp;
u_int pfcent;
{
register struct buf *bp;
register struct pte *dpte, *vpte;
register u_int c;
int p2dp, s, error = 0;
struct buf *getswbuf();
int swdone();
bp = getswbuf(PSWP+1);
bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
#ifdef SECSIZE
bp->b_blksize = DEV_BSIZE;
#endif SECSIZE
if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
if (rdflg == B_READ)
sum.v_pswpin += btoc(nbytes);
else
sum.v_pswpout += btoc(nbytes);
bp->b_proc = p;
if (flag & B_DIRTY) {
p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
dpte = dptopte(&proc[2], p2dp);
vpte = vtopte(p, btop(addr));
for (c = 0; c < nbytes; c += NBPG) {
if (vpte->pg_pfnum == 0 || vpte->pg_fod)
panic("swap bad pte");
*dpte++ = *vpte++;
}
bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp));
bp->b_flags |= B_CALL;
bp->b_iodone = swdone;
bp->b_pfcent = pfcent;
} else
bp->b_un.b_addr = addr;
while (nbytes > 0) {
bp->b_blkno = dblkno;
bp->b_dev = vp->v_rdev;
if (bp->b_vp)
brelvp(bp);
vp->v_count++;
bp->b_vp = vp;
bp->b_bcount = nbytes;
minphys(bp);
c = bp->b_bcount;
#ifdef TRACE
trace(TR_SWAPIO, vp, bp->b_blkno);
#endif
VOP_STRATEGY(bp);
/* pageout daemon doesn't wait for pushed pages */
if (flag & B_DIRTY) {
if (c < nbytes)
panic("big push");
return (0);
}
bp->b_un.b_addr += c;
bp->b_flags &= ~B_DONE;
if (bp->b_flags & B_ERROR) {
if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
panic("hard IO err in swap");
swkill(p, "swap: read error from swap device");
error = EIO;
}
nbytes -= c;
#ifdef SECSIZE
if (flag & B_PGIN && nbytes > 0)
panic("big pgin");
#endif SECSIZE
dblkno += btodb(c);
}
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
freeswbuf(bp);
return (error);
}
/*
* Put a buffer on the clean list after I/O is done.
* Called from biodone.
*/
swdone(bp)
register struct buf *bp;
{
register int s;
if (bp->b_flags & B_ERROR)
panic("IO err in push");
s = splbio();
bp->av_forw = bclnlist;
cnt.v_pgout++;
cnt.v_pgpgout += bp->b_bcount / NBPG;
bclnlist = bp;
if (bswlist.b_flags & B_WANTED)
wakeup((caddr_t)&proc[2]);
splx(s);
}
/*
* If rout == 0 then killed on swap error, else
* rout is the name of the routine where we ran out of
* swap space.
*/
swkill(p, rout)
struct proc *p;
char *rout;
{
printf("pid %d: %s\n", p->p_pid, rout);
uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout);
/*
* To be sure no looping (e.g. in vmsched trying to
* swap out) mark process locked in core (as though
* done by user) after killing it so noone will try
* to swap it out.
*/
psignal(p, SIGKILL);
p->p_flag |= SULOCK;
}
/*
* Raw I/O. The arguments are
* The strategy routine for the device
* A buffer, which will either be a special buffer header owned
* exclusively by the device for this purpose, or NULL,
* indicating that we should use a swap buffer
* The device number
* Read/write flag
* Essentially all the work is computing physical addresses and
* validating them.
* If the user has the proper access privilidges, the process is
* marked 'delayed unlock' and the pages involved in the I/O are
* faulted and locked. After the completion of the I/O, the above pages
* are unlocked.
*/
physio(strat, bp, dev, rw, mincnt, uio)
int (*strat)();
register struct buf *bp;
dev_t dev;
int rw;
u_int (*mincnt)();
struct uio *uio;
{
register struct iovec *iov;
register int c;
char *a;
int s, allocbuf = 0, error = 0;
struct buf *getswbuf();
#ifdef SECSIZE
int bsize;
struct partinfo dpart;
#endif SECSIZE
#ifdef SECSIZE
if ((unsigned)major(dev) < nchrdev &&
(*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart,
FREAD) == 0)
bsize = dpart.disklab->d_secsize;
else
bsize = DEV_BSIZE;
#endif SECSIZE
for (;;) {
if (uio->uio_iovcnt == 0)
return (0);
iov = uio->uio_iov;
if (useracc(iov->iov_base, (u_int)iov->iov_len,
rw==B_READ? B_WRITE : B_READ) == NULL)
return (EFAULT);
s = splbio();
while (bp->b_flags&B_BUSY) {
bp->b_flags |= B_WANTED;
sleep((caddr_t)bp, PRIBIO+1);
}
if (!allocbuf) { /* only if sharing caller's buffer */
s = splbio();
while (bp->b_flags&B_BUSY) {
bp->b_flags |= B_WANTED;
sleep((caddr_t)bp, PRIBIO+1);
}
splx(s);
}
bp->b_error = 0;
bp->b_proc = u.u_procp;
#ifdef SECSIZE
bp->b_blksize = bsize;
#endif SECSIZE
bp->b_un.b_addr = iov->iov_base;
while (iov->iov_len > 0) {
bp->b_flags = B_BUSY | B_PHYS | rw;
bp->b_dev = dev;
#ifdef SECSIZE
bp->b_blkno = uio->uio_offset / bsize;
#else SECSIZE
bp->b_blkno = btodb(uio->uio_offset);
#endif SECSIZE
bp->b_bcount = iov->iov_len;
(*mincnt)(bp);
c = bp->b_bcount;
u.u_procp->p_flag |= SPHYSIO;
vslock(a = bp->b_un.b_addr, c);
physstrat(bp, strat, PRIBIO);
(void) splbio();
vsunlock(a, c, rw);
u.u_procp->p_flag &= ~SPHYSIO;
if (bp->b_flags&B_WANTED)
wakeup((caddr_t)bp);
splx(s);
c -= bp->b_resid;
bp->b_un.b_addr += c;
iov->iov_len -= c;
uio->uio_resid -= c;
uio->uio_offset += c;
/* temp kludge for tape drives */
if (bp->b_resid || (bp->b_flags&B_ERROR))
break;
}
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
error = geterror(bp);
/* temp kludge for tape drives */
if (bp->b_resid || error)
return (error);
uio->uio_iov++;
uio->uio_iovcnt--;
}
if (allocbuf)
freeswbuf(bp);
return (error);
}
u_int
minphys(bp)
struct buf *bp;
{
if (bp->b_bcount > MAXPHYS)
bp->b_bcount = MAXPHYS;
}
static
struct buf *
getswbuf(prio)
int prio;
{
int s;
struct buf *bp;
s = splbio();
while (bswlist.av_forw == NULL) {
bswlist.b_flags |= B_WANTED;
sleep((caddr_t)&bswlist, prio);
}
bp = bswlist.av_forw;
bswlist.av_forw = bp->av_forw;
splx(s);
return (bp);
}
static
freeswbuf(bp)
struct buf *bp;
{
int s;
s = splbio();
bp->av_forw = bswlist.av_forw;
bswlist.av_forw = bp;
if (bswlist.b_flags & B_WANTED) {
bswlist.b_flags &= ~B_WANTED;
wakeup((caddr_t)&bswlist);
wakeup((caddr_t)&proc[2]);
}
splx(s);
}
rawread(dev, uio)
dev_t dev;
struct uio *uio;
{
return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
dev, B_READ, minphys, uio));
}
rawwrite(dev, uio)
dev_t dev;
struct uio *uio;
{
return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
dev, B_WRITE, minphys, uio));
}