* Copyright (c) 1982, 1986 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
* @(#)kern_physio.c 7.10 (Berkeley) %G%
* They contain the necessary information for the swap I/O.
* At any given time, a swap header can be in three
* different lists. When free it is in the free list,
* when allocated and the I/O queued, it is on the swap
* device list, and finally, if the operation was a dirty
* page push, when the I/O completes, it is inserted
* in a list of cleaned pages to be processed by the pageout daemon.
* If the flag indicates a dirty page push initiated
* by the pageout daemon, we map the page into the i th
* virtual page of process 2 (the daemon itself) where i is
* the index of the swap header that has been allocated.
* We simply initialize the header and queue the I/O but
* do not wait for completion. When the I/O completes,
* biodone() will link the header to a list of cleaned
* pages to be processed by the pageout daemon.
swap(p
, dblkno
, addr
, nbytes
, rdflg
, flag
, vp
, pfcent
)
register struct pte
*dpte
, *vpte
;
bp
->b_flags
= B_BUSY
| B_PHYS
| rdflg
| flag
;
bp
->b_blksize
= DEV_BSIZE
;
if ((bp
->b_flags
& (B_DIRTY
|B_PGIN
)) == 0)
sum
.v_pswpin
+= btoc(nbytes
);
sum
.v_pswpout
+= btoc(nbytes
);
p2dp
= ((bp
- swbuf
) * CLSIZE
) * KLMAX
;
dpte
= dptopte(&proc
[2], p2dp
);
vpte
= vtopte(p
, btop(addr
));
for (c
= 0; c
< nbytes
; c
+= NBPG
) {
if (vpte
->pg_pfnum
== 0 || vpte
->pg_fod
)
bp
->b_un
.b_addr
= (caddr_t
)ctob(dptov(&proc
[2], p2dp
));
trace(TR_SWAPIO
, vp
, bp
->b_blkno
);
/* pageout daemon doesn't wait for pushed pages */
if (bp
->b_flags
& B_ERROR
) {
if ((flag
& (B_UAREA
|B_PAGET
)) || rdflg
== B_WRITE
)
panic("hard IO err in swap");
swkill(p
, "swap: read error from swap device");
if (flag
& B_PGIN
&& nbytes
> 0)
bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_PAGET
|B_UAREA
|B_DIRTY
);
* Put a buffer on the clean list after I/O is done.
if (bp
->b_flags
& B_ERROR
)
cnt
.v_pgpgout
+= bp
->b_bcount
/ NBPG
;
if (bswlist
.b_flags
& B_WANTED
)
wakeup((caddr_t
)&proc
[2]);
* If rout == 0 then killed on swap error, else
* rout is the name of the routine where we ran out of
printf("pid %d: %s\n", p
->p_pid
, rout
);
uprintf("sorry, pid %d was killed in %s\n", p
->p_pid
, rout
);
* To be sure no looping (e.g. in vmsched trying to
* swap out) mark process locked in core (as though
* done by user) after killing it so noone will try
* Raw I/O. The arguments are
* The strategy routine for the device
* A buffer, which will either be a special buffer header owned
* exclusively by the device for this purpose, or NULL,
* indicating that we should use a swap buffer
* Essentially all the work is computing physical addresses and
* If the user has the proper access privilidges, the process is
* marked 'delayed unlock' and the pages involved in the I/O are
* faulted and locked. After the completion of the I/O, the above pages
physio(strat
, bp
, dev
, rw
, mincnt
, uio
)
register struct iovec
*iov
;
register int requested
, done
;
int s
, allocbuf
= 0, error
= 0;
if ((unsigned)major(dev
) < nchrdev
&&
(*cdevsw
[major(dev
)].d_ioctl
)(dev
, DIOCGPART
, (caddr_t
)&dpart
,
bsize
= dpart
.disklab
->d_secsize
;
if (uio
->uio_iovcnt
== 0)
if (useracc(iov
->iov_base
, (u_int
)iov
->iov_len
,
rw
==B_READ
? B_WRITE
: B_READ
) == NULL
)
while (bp
->b_flags
&B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+1);
if (!allocbuf
) { /* only if sharing caller's buffer */
while (bp
->b_flags
&B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+1);
bp
->b_un
.b_addr
= iov
->iov_base
;
while (iov
->iov_len
> 0) {
bp
->b_flags
= B_BUSY
| B_PHYS
| rw
;
bp
->b_blkno
= uio
->uio_offset
/ bsize
;
bp
->b_blkno
= btodb(uio
->uio_offset
);
bp
->b_bcount
= iov
->iov_len
;
u
.u_procp
->p_flag
|= SPHYSIO
;
vslock(a
= bp
->b_un
.b_addr
, c
);
physstrat(bp
, strat
, PRIBIO
);
u
.u_procp
->p_flag
&= ~SPHYSIO
;
if (bp
->b_flags
&B_WANTED
)
/* temp kludge for tape drives */
if (bp
->b_resid
|| (bp
->b_flags
&B_ERROR
))
bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
);
if (bp
->b_resid
|| error
)
if (bp
->b_bcount
> MAXPHYS
)
while (bswlist
.av_forw
== NULL
) {
bswlist
.b_flags
|= B_WANTED
;
sleep((caddr_t
)&bswlist
, prio
);
bswlist
.av_forw
= bp
->av_forw
;
bp
->av_forw
= bswlist
.av_forw
;
if (bswlist
.b_flags
& B_WANTED
) {
bswlist
.b_flags
&= ~B_WANTED
;
wakeup((caddr_t
)&bswlist
);
wakeup((caddr_t
)&proc
[2]);
return (physio(cdevsw
[major(dev
)].d_strategy
, (struct buf
*)NULL
,
dev
, B_READ
, minphys
, uio
));
return (physio(cdevsw
[major(dev
)].d_strategy
, (struct buf
*)NULL
,
dev
, B_WRITE
, minphys
, uio
));