* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
* @(#)swap_pager.c 8.1 (Berkeley) 6/11/93
* Quick hack to page to dedicated partition(s).
* Add multiprocessor locks
* Deal with async writes in a better fashion
#include <miscfs/specfs/specdev.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#define NSWSIZES 16 /* size of swtab */
#define NPENDINGIO 64 /* max # of pending cleans */
#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
int swpagerdebug
= 0x100;
#define SDB_ALLOCBLK 0x040
#define SDB_ANOMPANIC 0x200
} swcleanlist
[NPENDINGIO
];
typedef struct swpagerclean
*swp_clean_t
;
vm_size_t st_osize
; /* size of object (bytes) */
int st_bsize
; /* vs. size of swap block (DEV_BSIZE units) */
u_long st_inuse
; /* number in this range in use */
u_long st_usecnt
; /* total used of this size */
int swap_pager_pendingio
; /* max pending async "clean" ops */
int swap_pager_poip
; /* pageouts in progress */
int swap_pager_piip
; /* pageins in progress */
queue_head_t swap_pager_inuse
; /* list of pending page cleans */
queue_head_t swap_pager_free
; /* list of free pager clean structs */
queue_head_t swap_pager_list
; /* list of "named" anon regions */
static int swap_pager_finish
__P((swp_clean_t
));
static void swap_pager_init
__P((void));
static vm_pager_t swap_pager_alloc
__P((caddr_t
, vm_size_t
, vm_prot_t
));
static boolean_t swap_pager_clean
__P((vm_page_t
, int));
static void swap_pager_dealloc
__P((vm_pager_t
));
static int swap_pager_getpage
__P((vm_pager_t
, vm_page_t
, boolean_t
));
static boolean_t swap_pager_haspage
__P((vm_pager_t
, vm_offset_t
));
static int swap_pager_io
__P((sw_pager_t
, vm_page_t
, int));
static void swap_pager_iodone
__P((struct buf
*));
static int swap_pager_putpage
__P((vm_pager_t
, vm_page_t
, boolean_t
));
struct pagerops swappagerops
= {
register swp_clean_t spc
;
if (swpagerdebug
& (SDB_FOLLOW
|SDB_INIT
))
dfltpagerops
= &swappagerops
;
queue_init(&swap_pager_list
);
queue_init(&swap_pager_inuse
);
queue_init(&swap_pager_free
);
for (i
= 0, spc
= swcleanlist
; i
< NPENDINGIO
; i
++, spc
++) {
queue_enter(&swap_pager_free
, spc
, swp_clean_t
, spc_list
);
spc
->spc_flags
= SPC_FREE
;
* Calculate the swap allocation constants.
if (dmmin
< CLBYTES
/DEV_BSIZE
)
dmmin
= CLBYTES
/DEV_BSIZE
;
* Fill in our table of object size vs. allocation size
bsize
= btodb(PAGE_SIZE
);
maxbsize
= btodb(sizeof(sw_bm_t
) * NBBY
* PAGE_SIZE
);
for (i
= 0; i
< NSWSIZES
; i
++) {
swtab
[i
].st_osize
= (vm_size_t
) (MAXDADDRS
* dbtob(bsize
));
swtab
[i
].st_bsize
= bsize
;
if (swpagerdebug
& SDB_INIT
)
printf("swpg_init: ix %d, size %x, bsize %x\n",
i
, swtab
[i
].st_osize
, swtab
[i
].st_bsize
);
swtab
[i
].st_bsize
= bsize
;
* Allocate a pager structure and associated resources.
* Note that if we are called from the pageout daemon (handle == NULL)
* we should not wait for memory as it could resulting in deadlock.
swap_pager_alloc(handle
, size
, prot
)
register vm_pager_t pager
;
if (swpagerdebug
& (SDB_FOLLOW
|SDB_ALLOC
))
printf("swpg_alloc(%x, %x, %x)\n", handle
, size
, prot
);
* If this is a "named" anonymous region, look it up and
* return the appropriate pager if it exists.
pager
= vm_pager_lookup(&swap_pager_list
, handle
);
* Use vm_object_lookup to gain a reference
* to the object and also to remove from the
if (vm_object_lookup(pager
) == NULL
)
panic("swap_pager_alloc: bad object");
* Pager doesn't exist, allocate swap management resources
waitok
= handle
? M_WAITOK
: M_NOWAIT
;
pager
= (vm_pager_t
)malloc(sizeof *pager
, M_VMPAGER
, waitok
);
swp
= (sw_pager_t
)malloc(sizeof *swp
, M_VMPGDATA
, waitok
);
if (swpagerdebug
& SDB_FAIL
)
printf("swpg_alloc: swpager malloc failed\n");
free((caddr_t
)pager
, M_VMPAGER
);
for (swt
= swtab
; swt
->st_osize
; swt
++)
if (size
<= swt
->st_osize
)
swp
->sw_bsize
= swt
->st_bsize
;
swp
->sw_nblocks
= (btodb(size
) + swp
->sw_bsize
- 1) / swp
->sw_bsize
;
swp
->sw_blocks
= (sw_blk_t
)
malloc(swp
->sw_nblocks
*sizeof(*swp
->sw_blocks
),
if (swp
->sw_blocks
== NULL
) {
free((caddr_t
)swp
, M_VMPGDATA
);
free((caddr_t
)pager
, M_VMPAGER
);
if (swpagerdebug
& SDB_FAIL
)
printf("swpg_alloc: sw_blocks malloc failed\n");
bzero((caddr_t
)swp
->sw_blocks
,
swp
->sw_nblocks
* sizeof(*swp
->sw_blocks
));
swp
->sw_flags
= SW_NAMED
;
queue_enter(&swap_pager_list
, pager
, vm_pager_t
, pg_list
);
* Consistant with other pagers: return with object
* referenced. Can't do this with handle == NULL
* since it might be the pageout daemon calling.
object
= vm_object_allocate(size
);
vm_object_enter(object
, pager
);
vm_object_setpager(object
, pager
, 0, FALSE
);
queue_init(&pager
->pg_list
);
pager
->pg_handle
= handle
;
pager
->pg_ops
= &swappagerops
;
pager
->pg_type
= PG_SWAP
;
pager
->pg_data
= (caddr_t
)swp
;
if (swpagerdebug
& SDB_ALLOC
)
printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
swp
, swp
->sw_nblocks
, swp
->sw_bsize
, swp
->sw_blocks
);
swap_pager_dealloc(pager
)
/* save panic time state */
if ((swpagerdebug
& SDB_ANOMPANIC
) && panicstr
)
if (swpagerdebug
& (SDB_FOLLOW
|SDB_ALLOC
))
printf("swpg_dealloc(%x)\n", pager
);
* Remove from list right away so lookups will fail if we
* block for pageout completion.
swp
= (sw_pager_t
) pager
->pg_data
;
if (swp
->sw_flags
& SW_NAMED
) {
queue_remove(&swap_pager_list
, pager
, vm_pager_t
, pg_list
);
swp
->sw_flags
&= ~SW_NAMED
;
for (swt
= swtab
; swt
->st_osize
; swt
++)
if (swp
->sw_osize
<= swt
->st_osize
)
* Wait for all pageouts to finish and remove
* all entries from cleaning list.
swp
->sw_flags
|= SW_WANTED
;
assert_wait((int)swp
, 0);
(void) swap_pager_clean(NULL
, B_WRITE
);
* Free left over swap blocks
for (i
= 0, bp
= swp
->sw_blocks
; i
< swp
->sw_nblocks
; i
++, bp
++)
if (swpagerdebug
& (SDB_ALLOCBLK
|SDB_FULL
))
printf("swpg_dealloc: blk %x\n",
rmfree(swapmap
, swp
->sw_bsize
, bp
->swb_block
);
* Free swap management resources
free((caddr_t
)swp
->sw_blocks
, M_VMPGDATA
);
free((caddr_t
)swp
, M_VMPGDATA
);
free((caddr_t
)pager
, M_VMPAGER
);
swap_pager_getpage(pager
, m
, sync
)
if (swpagerdebug
& SDB_FOLLOW
)
printf("swpg_getpage(%x, %x, %d)\n", pager
, m
, sync
);
return(swap_pager_io((sw_pager_t
)pager
->pg_data
, m
, B_READ
));
swap_pager_putpage(pager
, m
, sync
)
if (swpagerdebug
& SDB_FOLLOW
)
printf("swpg_putpage(%x, %x, %d)\n", pager
, m
, sync
);
(void) swap_pager_clean(NULL
, B_WRITE
);
return (VM_PAGER_OK
); /* ??? */
return(swap_pager_io((sw_pager_t
)pager
->pg_data
, m
, flags
));
swap_pager_haspage(pager
, offset
)
if (swpagerdebug
& (SDB_FOLLOW
|SDB_ALLOCBLK
))
printf("swpg_haspage(%x, %x) ", pager
, offset
);
swp
= (sw_pager_t
) pager
->pg_data
;
ix
= offset
/ dbtob(swp
->sw_bsize
);
if (swp
->sw_blocks
== NULL
|| ix
>= swp
->sw_nblocks
) {
if (swpagerdebug
& (SDB_FAIL
|SDB_FOLLOW
|SDB_ALLOCBLK
))
printf("swpg_haspage: %x bad offset %x, ix %x\n",
swp
->sw_blocks
, offset
, ix
);
swb
= &swp
->sw_blocks
[ix
];
ix
= atop(offset
% dbtob(swp
->sw_bsize
));
if (swpagerdebug
& SDB_ALLOCBLK
)
printf("%x blk %x+%x ", swp
->sw_blocks
, swb
->swb_block
, ix
);
if (swpagerdebug
& (SDB_FOLLOW
|SDB_ALLOCBLK
))
"FT"[swb
->swb_block
&& (swb
->swb_mask
& (1 << ix
))]);
if (swb
->swb_block
&& (swb
->swb_mask
& (1 << ix
)))
* Scaled down version of swap().
* Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
* BOGUS: lower level IO routines expect a KVA so we have to map our
* provided physical page into the KVA to keep them happy.
swap_pager_io(swp
, m
, flags
)
/* save panic time state */
if ((swpagerdebug
& SDB_ANOMPANIC
) && panicstr
)
return (VM_PAGER_FAIL
); /* XXX: correct return? */
if (swpagerdebug
& (SDB_FOLLOW
|SDB_IO
))
printf("swpg_io(%x, %x, %x)\n", swp
, m
, flags
);
* For reads (pageins) and synchronous writes, we clean up
* all completed async pageouts.
if ((flags
& B_ASYNC
) == 0) {
* Check to see if this page is currently being cleaned.
* If it is, we just wait til the operation is done before
while (swap_pager_clean(m
, flags
&B_READ
)) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_io: page %x cleaning\n", m
);
swp
->sw_flags
|= SW_WANTED
;
assert_wait((int)swp
, 0);
(void) swap_pager_clean(m
, flags
&B_READ
);
* For async writes (pageouts), we cleanup completed pageouts so
* that all available resources are freed. Also tells us if this
* page is already being cleaned. If it is, or no resources
* are available, we try again later.
else if (swap_pager_clean(m
, B_WRITE
) ||
queue_empty(&swap_pager_free
)) {
if ((swpagerdebug
& SDB_ANOM
) &&
!queue_empty(&swap_pager_free
))
printf("swap_pager_io: page %x already cleaning\n", m
);
* Determine swap block and allocate as necessary.
off
= m
->offset
+ m
->object
->paging_offset
;
ix
= off
/ dbtob(swp
->sw_bsize
);
if (swp
->sw_blocks
== NULL
|| ix
>= swp
->sw_nblocks
) {
if (swpagerdebug
& SDB_FAIL
)
printf("swpg_io: bad offset %x+%x(%d) in %x\n",
m
->offset
, m
->object
->paging_offset
,
swb
= &swp
->sw_blocks
[ix
];
off
= off
% dbtob(swp
->sw_bsize
);
if (swb
->swb_block
== 0 ||
(swb
->swb_mask
& (1 << atop(off
))) == 0) {
if (swpagerdebug
& (SDB_ALLOCBLK
|SDB_FAIL
))
printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
swb
->swb_block
, atop(off
),
m
->offset
, m
->object
->paging_offset
);
/* XXX: should we zero page here?? */
} else if (swb
->swb_block
== 0) {
swb
->swb_block
= rmalloc(swapmap
, swp
->sw_bsize
);
if (swb
->swb_block
== 0) {
if (swpagerdebug
& SDB_FAIL
)
printf("swpg_io: rmalloc of %x failed\n",
if (swpagerdebug
& (SDB_FULL
|SDB_ALLOCBLK
))
printf("swpg_io: %x alloc blk %x at ix %x\n",
swp
->sw_blocks
, swb
->swb_block
, ix
);
* Allocate a kernel virtual address and initialize so that PTE
* is available for lower level IO drivers.
kva
= vm_pager_map_page(m
);
* Get a swap buffer header and perform the IO
while (bswlist
.b_actf
== NULL
) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_io: wait on swbuf for %x (%d)\n",
bswlist
.b_flags
|= B_WANTED
;
sleep((caddr_t
)&bswlist
, PSWP
+1);
bswlist
.b_actf
= bp
->b_actf
;
bp
->b_flags
= B_BUSY
| (flags
& B_READ
);
bp
->b_proc
= &proc0
; /* XXX (but without B_PHYS set this is ok) */
bp
->b_un
.b_addr
= (caddr_t
)kva
;
bp
->b_blkno
= swb
->swb_block
+ btodb(off
);
if (swapdev_vp
->v_type
== VBLK
)
bp
->b_dev
= swapdev_vp
->v_rdev
;
bp
->b_bcount
= PAGE_SIZE
;
if ((bp
->b_flags
& B_READ
) == 0) {
bp
->b_dirtyend
= PAGE_SIZE
;
swapdev_vp
->v_numoutput
++;
* If this is an async write we set up additional buffer fields
* and place a "cleaning" entry on the inuse queue.
if ((flags
& (B_READ
|B_ASYNC
)) == B_ASYNC
) {
if (queue_empty(&swap_pager_free
))
panic("swpg_io: lost spc");
queue_remove_first(&swap_pager_free
,
spc
, swp_clean_t
, spc_list
);
if (spc
->spc_flags
!= SPC_FREE
)
panic("swpg_io: bad free spc");
spc
->spc_flags
= SPC_BUSY
;
bp
->b_iodone
= swap_pager_iodone
;
queue_enter(&swap_pager_inuse
, spc
, swp_clean_t
, spc_list
);
if (swpagerdebug
& SDB_WRITE
)
printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
bp
, swp
, spc
, swp
->sw_poip
);
if ((swpagerdebug
& SDB_ALLOCBLK
) &&
(swb
->swb_mask
& (1 << atop(off
))) == 0)
printf("swpg_io: %x write blk %x+%x\n",
swp
->sw_blocks
, swb
->swb_block
, atop(off
));
swb
->swb_mask
|= (1 << atop(off
));
if (swpagerdebug
& SDB_IO
)
printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
bp
, swb
->swb_block
+btodb(off
), kva
, VM_PAGE_TO_PHYS(m
));
if ((flags
& (B_READ
|B_ASYNC
)) == B_ASYNC
) {
if (swpagerdebug
& SDB_IO
)
printf("swpg_io: IO started: bp %x\n", bp
);
while ((bp
->b_flags
& B_DONE
) == 0) {
rv
= (bp
->b_flags
& B_ERROR
) ? VM_PAGER_ERROR
: VM_PAGER_OK
;
bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_PAGET
|B_UAREA
|B_DIRTY
);
bp
->b_actf
= bswlist
.b_actf
;
if (bswlist
.b_flags
& B_WANTED
) {
bswlist
.b_flags
&= ~B_WANTED
;
thread_wakeup((int)&bswlist
);
if ((flags
& B_READ
) == 0 && rv
== VM_PAGER_OK
) {
pmap_clear_modify(VM_PAGE_TO_PHYS(m
));
if (swpagerdebug
& SDB_IO
)
printf("swpg_io: IO done: bp %x, rv %d\n", bp
, rv
);
if ((swpagerdebug
& SDB_FAIL
) && rv
== VM_PAGER_ERROR
)
printf("swpg_io: IO error\n");
vm_pager_unmap_page(kva
);
register swp_clean_t spc
, tspc
;
/* save panic time state */
if ((swpagerdebug
& SDB_ANOMPANIC
) && panicstr
)
return (FALSE
); /* ??? */
if (swpagerdebug
& SDB_FOLLOW
)
printf("swpg_clean(%x, %d)\n", m
, rw
);
* Look up and removal from inuse list must be done
* at splbio() to avoid conflicts with swap_pager_iodone.
spc
= (swp_clean_t
) queue_first(&swap_pager_inuse
);
while (!queue_end(&swap_pager_inuse
, (queue_entry_t
)spc
)) {
if ((spc
->spc_flags
& SPC_DONE
) &&
swap_pager_finish(spc
)) {
queue_remove(&swap_pager_inuse
, spc
,
if (m
&& m
== spc
->spc_m
) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_clean: page %x on list, flags %x\n",
spc
= (swp_clean_t
) queue_next(&spc
->spc_list
);
* No operations done, thats all we can do for now.
if (queue_end(&swap_pager_inuse
, (queue_entry_t
)spc
))
* The desired page was found to be busy earlier in
* the scan but has since completed.
if (tspc
&& tspc
== spc
) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_clean: page %x done while looking\n",
spc
->spc_flags
= SPC_FREE
;
vm_pager_unmap_page(spc
->spc_kva
);
queue_enter(&swap_pager_free
, spc
, swp_clean_t
, spc_list
);
if (swpagerdebug
& SDB_WRITE
)
printf("swpg_clean: free spc %x\n", spc
);
* If we found that the desired page is already being cleaned
* mark it so that swap_pager_iodone() will not set the clean
* flag before the pageout daemon has another chance to clean it.
if (tspc
&& rw
== B_WRITE
) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_clean: page %x on clean list\n",
tspc
->spc_flags
|= SPC_DIRTY
;
if (swpagerdebug
& SDB_WRITE
)
printf("swpg_clean: return %d\n", tspc
? TRUE
: FALSE
);
if ((swpagerdebug
& SDB_ANOM
) && tspc
)
printf("swpg_clean: %s of cleaning page %x\n",
rw
== B_READ
? "get" : "put", m
);
return(tspc
? TRUE
: FALSE
);
register swp_clean_t spc
;
vm_object_t object
= spc
->spc_m
->object
;
* Mark the paging operation as done.
* (XXX) If we cannot get the lock, leave it til later.
* (XXX) Also we are assuming that an async write is a
* pageout operation that has incremented the counter.
if (!vm_object_lock_try(object
))
if (--object
->paging_in_progress
== 0)
thread_wakeup((int) object
);
* XXX: this isn't even close to the right thing to do,
* introduces a variety of race conditions.
* If dirty, vm_pageout() has attempted to clean the page
* again. In this case we do not do anything as we will
* see the page again shortly.
if (spc
->spc_flags
& SPC_DIRTY
) {
if (swpagerdebug
& SDB_ANOM
)
printf("swap_pager_finish: page %x dirty again\n",
spc
->spc_m
->flags
&= ~PG_BUSY
;
vm_object_unlock(object
);
* If no error mark as clean and inform the pmap system.
* If error, mark as dirty so we will try again.
* (XXX could get stuck doing this, should give up after awhile)
if (spc
->spc_flags
& SPC_ERROR
) {
printf("swap_pager_finish: clean of page %x failed\n",
VM_PAGE_TO_PHYS(spc
->spc_m
));
spc
->spc_m
->flags
|= PG_LAUNDRY
;
spc
->spc_m
->flags
|= PG_CLEAN
;
pmap_clear_modify(VM_PAGE_TO_PHYS(spc
->spc_m
));
spc
->spc_m
->flags
&= ~PG_BUSY
;
vm_object_unlock(object
);
register swp_clean_t spc
;
/* save panic time state */
if ((swpagerdebug
& SDB_ANOMPANIC
) && panicstr
)
if (swpagerdebug
& SDB_FOLLOW
)
printf("swpg_iodone(%x)\n", bp
);
spc
= (swp_clean_t
) queue_first(&swap_pager_inuse
);
while (!queue_end(&swap_pager_inuse
, (queue_entry_t
)spc
)) {
spc
= (swp_clean_t
) queue_next(&spc
->spc_list
);
if (queue_end(&swap_pager_inuse
, (queue_entry_t
)spc
))
panic("swap_pager_iodone: bp not found");
spc
->spc_flags
&= ~SPC_BUSY
;
spc
->spc_flags
|= SPC_DONE
;
if (bp
->b_flags
& B_ERROR
)
spc
->spc_flags
|= SPC_ERROR
;
if (swpagerdebug
& SDB_WRITE
)
printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
bp
, spc
->spc_swp
, spc
->spc_swp
->sw_flags
,
spc
, spc
->spc_swp
->sw_poip
);
if (spc
->spc_swp
->sw_flags
& SW_WANTED
) {
spc
->spc_swp
->sw_flags
&= ~SW_WANTED
;
thread_wakeup((int)spc
->spc_swp
);
bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_PAGET
|B_UAREA
|B_DIRTY
);
bp
->b_actf
= bswlist
.b_actf
;
if (bswlist
.b_flags
& B_WANTED
) {
bswlist
.b_flags
&= ~B_WANTED
;
thread_wakeup((int)&bswlist
);
* Only kick the pageout daemon if we are really hurting
* for pages, otherwise this page will be picked up later.
if (cnt
.v_free_count
< cnt
.v_free_min
)
thread_wakeup((int) &vm_pages_needed
);