From 736d20f2339f35ca779856cf50dba7b890b8be30 Mon Sep 17 00:00:00 2001 From: David Greenman Date: Wed, 22 Dec 1993 12:52:00 +0000 Subject: [PATCH] First round of changes from John Dyson and myself to the VM system. This set improves performance and fixes the following problems (description from John Dyson): 1. Growing swap space problem in both static usage and in situations with lots of fork/execs in heavy paging situations. 2. Sparse swap space allocation (internal fragmentation.) 3. General swap_pager slowness. Additionally, the new swap_pager also provides hooks for multi-page I/O that is currently being developed (in early testing phases.) Problem #1 is a result of a problem where objects cannot be collapsed once a pager has been allocated for them. This problem has been solved by relaxing the restriction by allowing the pages contained in a shadow object's pager be copied to the parent object's pager. The copy is afforded by manipulating pointers to the disk blocks on the swap space. Since an improved swap_pager has already been developed with the data structures to support the copy operation, this new swap_pager has been introduced. Also, shadow object bypass in the collapse code has been enhanced to support checking for pages on disk. The vm_pageout daemon has also been modified to defer creation of an object's pager when the object's shadow is paging. This allows efficient immediate collapsing of a shadow into a parent object under many circumstances without the creation of an intermediate pager. Problem #2 is solved by the allocation data structures and algorithms in the new swap_pager. Additionally, a newer version of this new swap_pager is being tested that permits multiple page I/O and mitigation of the fragmentation problems associated with allocation of large contiguous blocks of swap space. Problem #3 is addressed by better algorithms and a fix of a couple of bugs in the swap_pager. Additionally, this new pager has a growth path allowing multi-page inputs from disk. Approximately 50% performance improvement can be expected under certain circumstances when using this pager in the standard single page mode. (Actually, I've seen more like twice the speed in my tests. -DLG) --- sys/i386/doc/Changes | 5 +- sys/kern/subr_rlist.c | 112 +++- sys/sys/buf.h | 3 +- sys/vm/swap_pager.c | 1127 +++++++++++++++++++++++------------------ sys/vm/swap_pager.h | 42 +- sys/vm/vm_object.c | 133 ++--- sys/vm/vm_pageout.c | 39 +- 7 files changed, 842 insertions(+), 619 deletions(-) diff --git a/sys/i386/doc/Changes b/sys/i386/doc/Changes index 9ac54a5282..cd74f0aba7 100644 --- a/sys/i386/doc/Changes +++ b/sys/i386/doc/Changes @@ -1,10 +1,13 @@ -$Id: Changes,v 1.1 1993/12/21 21:41:45 wollman Exp $ +$Id: Changes,v 1.2 1993/12/21 21:47:26 wollman Exp $ This file is intended to keep track of important kernel and user changes in FreeBSD between releases. Since 1.0.2: +- Fixed various swap 'leaks' and significantly improved paging + performance. (davidg/dyson) + - Add XNTPD to contrib section, and (un-compilable) kernel support for same to /sys/kern. (wollman) diff --git a/sys/kern/subr_rlist.c b/sys/kern/subr_rlist.c index a29fece003..4dd156cd34 100644 --- a/sys/kern/subr_rlist.c +++ b/sys/kern/subr_rlist.c @@ -45,18 +45,66 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: subr_rlist.c,v 1.3 1993/11/25 01:33:18 wollman Exp $ + * $Id$ */ #include "param.h" #include "systm.h" +#include "cdefs.h" #include "malloc.h" #include "rlist.h" +#include "vm/vm.h" +#include "vm/vm_map.h" + +extern vm_map_t kernel_map; /* * Resource lists. */ +#define RLIST_MIN 128 +static int rlist_count=0; +static struct rlist *rlfree; +int rlist_active; + +static struct rlist * +rlist_malloc() +{ + struct rlist *rl; + int i; + while( rlist_count < RLIST_MIN) { + extern vm_map_t kmem_map; + int s = splhigh(); + rl = (struct rlist *)kmem_malloc(kmem_map, NBPG, 0); + splx(s); + if( !rl) + break; + + for(i=0;i<(NBPG/(sizeof *rl));i++) { + rl->rl_next = rlfree; + rlfree = rl; + rlist_count++; + rl++; + } + } + + if( (rl = rlfree) == 0 ) + panic("Cannot get an rlist entry"); + + --rlist_count; + rlfree = rl->rl_next; + return rl; +} + +inline static void +rlist_mfree( struct rlist *rl) +{ + rl->rl_next = rlfree; + rlfree = rl; + ++rlist_count; +} + + /* * Add space to a resource list. Used to either * initialize a list or return free space to it. @@ -67,16 +115,26 @@ rlist_free (rlp, start, end) unsigned start, end; { struct rlist *head; + register struct rlist *olp = 0; + int s; + + s = splhigh(); + while( rlist_active) + tsleep((caddr_t)&rlist_active, PSWP, "rlistf", 0); + rlist_active = 1; + splx(s); head = *rlp; loop: /* if nothing here, insert (tail of list) */ if (*rlp == 0) { - *rlp = (struct rlist *)malloc(sizeof(**rlp), M_TEMP, M_NOWAIT); + *rlp = rlist_malloc(); (*rlp)->rl_start = start; (*rlp)->rl_end = end; (*rlp)->rl_next = 0; + rlist_active = 0; + wakeup((caddr_t)&rlist_active); return; } @@ -103,11 +161,21 @@ loop: if (end < (*rlp)->rl_start) { register struct rlist *nlp; - nlp = (struct rlist *)malloc(sizeof(*nlp), M_TEMP, M_NOWAIT); + nlp = rlist_malloc(); nlp->rl_start = start; nlp->rl_end = end; nlp->rl_next = *rlp; - *rlp = nlp; + /* + * If the new element is in front of the list, + * adjust *rlp, else don't. + */ + if( olp) { + olp->rl_next = nlp; + } else { + *rlp = nlp; + } + rlist_active = 0; + wakeup((caddr_t)&rlist_active); return; } @@ -120,6 +188,7 @@ loop: /* are we after this element */ if (start > (*rlp)->rl_end) { + olp = *rlp; rlp = &((*rlp)->rl_next); goto loop; } else @@ -137,11 +206,13 @@ scan: if (lp->rl_end + 1 == lpn->rl_start) { lp->rl_end = lpn->rl_end; lp->rl_next = lpn->rl_next; - free(lpn, M_TEMP); + rlist_mfree(lpn); } else lp = lp->rl_next; } } + rlist_active = 0; + wakeup((caddr_t)&rlist_active); } /* @@ -151,9 +222,18 @@ scan: * "*loc". (Note: loc can be zero if we don't wish the value) */ int rlist_alloc (rlp, size, loc) -struct rlist **rlp; unsigned size, *loc; { + struct rlist **rlp; + unsigned size, *loc; +{ register struct rlist *lp; + int s; + register struct rlist *olp = 0; + s = splhigh(); + while( rlist_active) + tsleep((caddr_t)&rlist_active, PSWP, "rlista", 0); + rlist_active = 1; + splx(s); /* walk list, allocating first thing that's big enough (first fit) */ for (; *rlp; rlp = &((*rlp)->rl_next)) @@ -166,13 +246,27 @@ struct rlist **rlp; unsigned size, *loc; { /* did we eat this element entirely? */ if ((*rlp)->rl_start > (*rlp)->rl_end) { lp = (*rlp)->rl_next; - free (*rlp, M_TEMP); - *rlp = lp; + rlist_mfree(*rlp); + /* + * if the deleted element was in fromt + * of the list, adjust *rlp, else don't. + */ + if (olp) { + olp->rl_next = lp; + } else { + *rlp = lp; + } } + rlist_active = 0; + wakeup((caddr_t)&rlist_active); return (1); + } else { + olp = *rlp; } + rlist_active = 0; + wakeup((caddr_t)&rlist_active); /* nothing in list that's big enough */ return (0); } @@ -191,6 +285,6 @@ rlist_destroy (rlp) *rlp = 0; for (; lp; lp = nlp) { nlp = lp->rl_next; - free (lp, M_TEMP); + rlist_mfree(lp); } } diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 2c9f2a4ee6..6c2d7ece2e 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * from: @(#)buf.h 7.11 (Berkeley) 5/9/90 - * $Id: buf.h,v 1.5 1993/11/25 01:37:50 wollman Exp $ + * $Id: buf.h,v 1.6 1993/12/19 00:55:11 wollman Exp $ */ #ifndef _SYS_BUF_H_ @@ -110,6 +110,7 @@ struct buf caddr_t b_saveaddr; /* original b_addr for PHYSIO */ void * b_driver1; /* for private use by the driver */ void * b_driver2; /* for private use by the driver */ + void * b_spc; /* swap pager info */ }; #define BQUEUES 4 /* number of free buffer queues */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 136f5f883a..52c1a83b76 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -37,7 +37,7 @@ * * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * from: @(#)swap_pager.c 7.4 (Berkeley) 5/7/91 - * $Id: swap_pager.c,v 1.8 1993/12/19 23:24:15 wollman Exp $ + * $Id: swap_pager.c,v 1.9 1993/12/21 05:50:59 davidg Exp $ */ /* @@ -47,6 +47,24 @@ * Deal with async writes in a better fashion */ +/* + * From John Dyson: + * + * Enhancements to page in multiple pages + * Efficiency improvements in pageout code + * Changes to allocation algorithm to allow more + * dense allocation of swap space. + * Addition of a routine to allow disk-based copying + * to allow vm_object_collapse to work better. + * + * TODO: + * Make allocation more intelligent re: text space + * because of rounding problems with the allocation of + * blocks of swap space, it is possible to allocate swap + * space for text. Add some hooks to find out if portions of an object + * will ever need swap space??????? + */ + #include "param.h" #include "proc.h" #include "buf.h" @@ -57,32 +75,18 @@ #include "rlist.h" #include "kernel.h" +#include "vm_param.h" +#include "queue.h" +#include "lock.h" #include "vm.h" +#include "vm_prot.h" +#include "vm_object.h" #include "vm_page.h" #include "vm_pageout.h" #include "swap_pager.h" +#include "vm_map.h" -#include "dmap.h" -struct dmap zdmap; /* not used */ -int dmmin, dmmax, dmtext; /* dmtext not used */ - -#define NSWSIZES 16 /* size of swtab */ -#define NPENDINGIO 64 /* max # of pending cleans */ -#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ - -#ifdef DEBUG -int swpagerdebug = 0 /*0x100*/; -#define SDB_FOLLOW 0x001 -#define SDB_INIT 0x002 -#define SDB_ALLOC 0x004 -#define SDB_IO 0x008 -#define SDB_WRITE 0x010 -#define SDB_FAIL 0x020 -#define SDB_ALLOCBLK 0x040 -#define SDB_FULL 0x080 -#define SDB_ANOM 0x100 -#define SDB_ANOMPANIC 0x200 -#endif +#define NPENDINGIO 64 struct pagerops swappagerops = { swap_pager_init, @@ -93,6 +97,14 @@ struct pagerops swappagerops = { swap_pager_haspage }; +extern int nswbuf; +int nswiodone; +extern int vm_pageout_rate_limit; +static int cleandone; +int swap_pager_full; +extern vm_map_t pager_map; +void swap_pager_finish(); + struct swpagerclean { queue_head_t spc_list; int spc_flags; @@ -100,48 +112,34 @@ struct swpagerclean { sw_pager_t spc_swp; vm_offset_t spc_kva; vm_page_t spc_m; -} swcleanlist[NPENDINGIO]; +} swcleanlist [NPENDINGIO] ; + typedef struct swpagerclean *swp_clean_t; +extern vm_map_t kernel_map; /* spc_flags values */ -#define SPC_FREE 0x00 -#define SPC_BUSY 0x01 -#define SPC_DONE 0x02 -#define SPC_ERROR 0x04 -#define SPC_DIRTY 0x08 - -struct swtab { - vm_size_t st_osize; /* size of object (bytes) */ - int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ -#ifdef DEBUG - u_long st_inuse; /* number in this range in use */ - u_long st_usecnt; /* total used of this size */ -#endif -} swtab[NSWSIZES+1]; +#define SPC_ERROR 0x01 -static int swap_pager_finish(swp_clean_t); - -#ifdef DEBUG -int swap_pager_pendingio; /* max pending async "clean" ops */ -int swap_pager_poip; /* pageouts in progress */ -int swap_pager_piip; /* pageins in progress */ -#endif +#define SWB_EMPTY (-1) +queue_head_t swap_pager_done; /* list of compileted page cleans */ queue_head_t swap_pager_inuse; /* list of pending page cleans */ queue_head_t swap_pager_free; /* list of free pager clean structs */ queue_head_t swap_pager_list; /* list of "named" anon regions */ +int npendingio = NPENDINGIO; +int swiopend; +int pendingiowait; +int require_swap_init; + +int swap_wakeup; +int dmmin, dmmax; + void swap_pager_init() { - register swp_clean_t spc; - register int i, bsize; - int maxbsize; + register int i; -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) - printf("swpg_init()\n"); -#endif dfltpagerops = &swappagerops; queue_init(&swap_pager_list); @@ -149,46 +147,20 @@ swap_pager_init() * Initialize clean lists */ queue_init(&swap_pager_inuse); + queue_init(&swap_pager_done); queue_init(&swap_pager_free); - for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { - queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); - spc->spc_flags = SPC_FREE; - } + + require_swap_init = 1; + /* * Calculate the swap allocation constants. */ - if (dmmin == 0) { - dmmin = DMMIN; - if (dmmin < CLBYTES/DEV_BSIZE) - dmmin = CLBYTES/DEV_BSIZE; - } - if (dmmax == 0) - dmmax = DMMAX; - /* - * Fill in our table of object size vs. allocation size - */ - bsize = btodb(PAGE_SIZE); - if (bsize < dmmin) - bsize = dmmin; - maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); - if (maxbsize > dmmax) - maxbsize = dmmax; - for (i = 0; i < NSWSIZES; i++) { - swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); - swtab[i].st_bsize = bsize; -#ifdef DEBUG - if (swpagerdebug & SDB_INIT) - printf("swpg_init: ix %d, size %x, bsize %x\n", - i, swtab[i].st_osize, swtab[i].st_bsize); -#endif - if (bsize >= maxbsize) - break; - bsize *= 2; - } - swtab[i].st_osize = 0; - swtab[i].st_bsize = bsize; + dmmin = CLBYTES/DEV_BSIZE; + + dmmax = btodb( SWB_NPAGES*NBPG) * 8; + } /* @@ -204,13 +176,23 @@ swap_pager_alloc(handle, size, prot) { register vm_pager_t pager; register sw_pager_t swp; - struct swtab *swt; int waitok; - -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); -#endif + int i,j; + + if( require_swap_init) { + register swp_clean_t spc; + if( npendingio > nswbuf) + npendingio = nswbuf; + for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { + spc->spc_kva = kmem_alloc_pageable( pager_map, NBPG); + if( !spc->spc_kva) + break; + spc->spc_flags = 0; + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + } + require_swap_init = 0; + } + /* * If this is a "named" anonymous region, look it up and * return the appropriate pager if it exists. @@ -228,50 +210,39 @@ swap_pager_alloc(handle, size, prot) return(pager); } } + /* * Pager doesn't exist, allocate swap management resources * and initialize. */ - waitok = handle ? M_WAITOK : M_NOWAIT; + waitok = handle ? M_WAITOK : M_NOWAIT; pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); if (pager == NULL) return(NULL); swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); if (swp == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: swpager malloc failed\n"); -#endif free((caddr_t)pager, M_VMPAGER); return(NULL); } size = round_page(size); - for (swt = swtab; swt->st_osize; swt++) - if (size <= swt->st_osize) - break; -#ifdef DEBUG - swt->st_inuse++; - swt->st_usecnt++; -#endif swp->sw_osize = size; - swp->sw_bsize = swt->st_bsize; - swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; + swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * NBPG) - 1) / btodb(SWB_NPAGES*NBPG); swp->sw_blocks = (sw_blk_t) malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), - M_VMPGDATA, M_NOWAIT); + M_VMPGDATA, waitok); if (swp->sw_blocks == NULL) { free((caddr_t)swp, M_VMPGDATA); free((caddr_t)pager, M_VMPAGER); -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_alloc: sw_blocks malloc failed\n"); - swt->st_inuse--; - swt->st_usecnt--; -#endif return(FALSE); } bzero((caddr_t)swp->sw_blocks, swp->sw_nblocks * sizeof(*swp->sw_blocks)); + + for(i=0;isw_nblocks;i++) { + for(j=0;jsw_blocks[i].swb_block[j] = SWB_EMPTY; + } + swp->sw_poip = 0; if (handle) { vm_object_t object; @@ -295,74 +266,203 @@ swap_pager_alloc(handle, size, prot) pager->pg_type = PG_SWAP; pager->pg_data = (caddr_t)swp; -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOC) - printf("swpg_alloc: pg_data %x, %x of %x at %x\n", - swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); -#endif return(pager); } +/* + * return the address on disk and the validity of the + * data on disk. + */ +static int * +swap_pager_diskaddr(swp, offset, valid) + sw_pager_t swp; + vm_offset_t offset; + int *valid; +{ + register sw_blk_t swb; + int ix; + + if( valid) + *valid = 0; + ix = offset / (SWB_NPAGES*NBPG); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + return(FALSE); + } + swb = &swp->sw_blocks[ix]; + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + if( valid) + *valid = swb->swb_valid & (1<swb_block[ix]; +} + +static void +swap_pager_setvalid(swp, offset) + sw_pager_t swp; + vm_offset_t offset; +{ + register sw_blk_t swb; + int ix; + + ix = offset / (SWB_NPAGES*NBPG); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) + return; + + swb = &swp->sw_blocks[ix]; + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + swb->swb_valid |= (1 << ix); + return; +} + +/* + * this routine frees swap blocks from a specified pager + */ +void +swap_pager_freespace(vm_pager_t pager, vm_offset_t start, vm_offset_t size) { + + sw_pager_t swp = (sw_pager_t) pager->pg_data; + vm_offset_t i; + int s; + + s = splbio(); + for(i=start;ipg_data; + dstswp = (sw_pager_t) dstpager->pg_data; + + s = splbio(); + + if (srcswp->sw_flags & SW_NAMED) { + queue_remove(&swap_pager_list, srcpager, vm_pager_t, pg_list); + srcswp->sw_flags &= ~SW_NAMED; + } + + while( srcswp->sw_poip) { + tsleep((caddr_t)&swap_wakeup, PVM, "wpgout", 0); + } + splx(s); + + (void) swap_pager_clean(NULL, B_WRITE); + + s = splbio(); +/* + * clear source block before destination object + */ + for(i=0;isw_osize;i+=NBPG) { + int srcvalid, dstvalid; + int *srcaddrp = swap_pager_diskaddr( srcswp, i+offset+srcoffset, + &srcvalid); + int *dstaddrp; + if( srcaddrp && *srcaddrp != SWB_EMPTY) { + if( srcvalid) { + dstaddrp = swap_pager_diskaddr( dstswp, i+dstoffset, &dstvalid); + if( !dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { + rlist_free(&swapmap, *dstaddrp, *dstaddrp + btodb(NBPG) - 1); + *dstaddrp = SWB_EMPTY; + } + if( dstaddrp && *dstaddrp == SWB_EMPTY) { + *dstaddrp = *srcaddrp; + *srcaddrp = SWB_EMPTY; + swap_pager_setvalid( dstswp, i + dstoffset); + } + } + if( *srcaddrp != SWB_EMPTY) + rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + } + } + +/* + * deallocate the rest of the source object + */ + for(i=dstswp->sw_osize + offset + srcoffset;isw_osize;i+=NBPG) { + int *srcaddrp = swap_pager_diskaddr( srcswp, i, 0); + if( srcaddrp && *srcaddrp != SWB_EMPTY) + rlist_free(&swapmap, *srcaddrp, *srcaddrp + btodb(NBPG) - 1); + } + + splx(s); + + free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); + free((caddr_t)srcswp, M_VMPGDATA); + free((caddr_t)srcpager, M_VMPAGER); + + return 1; +} + + void swap_pager_dealloc(pager) vm_pager_t pager; { - register int i; + register int i,j; register sw_blk_t bp; register sw_pager_t swp; - struct swtab *swt; int s; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) - printf("swpg_dealloc(%x)\n", pager); -#endif /* * Remove from list right away so lookups will fail if we * block for pageout completion. */ + s = splbio(); swp = (sw_pager_t) pager->pg_data; if (swp->sw_flags & SW_NAMED) { queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); swp->sw_flags &= ~SW_NAMED; } -#ifdef DEBUG - for (swt = swtab; swt->st_osize; swt++) - if (swp->sw_osize <= swt->st_osize) - break; - swt->st_inuse--; -#endif - /* * Wait for all pageouts to finish and remove * all entries from cleaning list. */ - s = splbio(); - while (swp->sw_poip) { - swp->sw_flags |= SW_WANTED; - assert_wait((int)swp, 0); - thread_block("swpgde"); + + while( swp->sw_poip) { + tsleep((caddr_t)&swap_wakeup, PVM, "wpgout", 0); } splx(s); + + (void) swap_pager_clean(NULL, B_WRITE); /* * Free left over swap blocks */ s = splbio(); - for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) - if (bp->swb_block) { -#ifdef DEBUG - if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) - printf("swpg_dealloc: blk %x\n", - bp->swb_block); -#endif - rlist_free(&swapmap, (unsigned)bp->swb_block, - (unsigned)bp->swb_block + swp->sw_bsize - 1); + for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { + for(j=0;jswb_block[j] != SWB_EMPTY) { + rlist_free(&swapmap, (unsigned)bp->swb_block[j], + (unsigned)bp->swb_block[j] + btodb(NBPG) - 1); } + } splx(s); /* * Free swap management resources @@ -370,6 +470,19 @@ swap_pager_dealloc(pager) free((caddr_t)swp->sw_blocks, M_VMPGDATA); free((caddr_t)swp, M_VMPGDATA); free((caddr_t)pager, M_VMPAGER); + swap_pager_full = 0; +} + +int +swap_pager_getmulti(pager, m, count, reqpage, sync) + vm_pager_t pager; + vm_page_t *m; + int count; + int reqpage; + boolean_t sync; +{ + + return swap_pager_io( (sw_pager_t) pager->pg_data, m, count, reqpage, B_READ); } int @@ -378,11 +491,10 @@ swap_pager_getpage(pager, m, sync) vm_page_t m; boolean_t sync; { -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); -#endif - return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); + vm_page_t marray[1]; + + marray[0] = m; + return swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, B_READ); } int @@ -392,111 +504,217 @@ swap_pager_putpage(pager, m, sync) boolean_t sync; { int flags; + vm_page_t marray[1]; + -#ifdef DEBUG - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); -#endif if (pager == NULL) { (void) swap_pager_clean(NULL, B_WRITE); - return 0; + return VM_PAGER_OK; } + + marray[0] = m; flags = B_WRITE; if (!sync) flags |= B_ASYNC; - return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); + return(swap_pager_io((sw_pager_t)pager->pg_data, marray, 1, 0, flags)); } -boolean_t -swap_pager_haspage(pager, offset) - vm_pager_t pager; +static inline int +swap_pager_block_index( swp, offset) + sw_pager_t swp; + vm_offset_t offset; +{ + return offset / (SWB_NPAGES*NBPG); +} + +static inline int +swap_pager_block_offset( swp, offset) + sw_pager_t swp; + vm_offset_t offset; +{ + return offset % (SWB_NPAGES*NBPG); +} + +static boolean_t +_swap_pager_haspage(swp, offset) + sw_pager_t swp; vm_offset_t offset; { - register sw_pager_t swp; register sw_blk_t swb; int ix; -#ifdef DEBUG - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage(%x, %x) ", pager, offset); -#endif - swp = (sw_pager_t) pager->pg_data; - ix = offset / dbtob(swp->sw_bsize); + ix = offset / (SWB_NPAGES*NBPG); if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) - printf("swpg_haspage: %x bad offset %x, ix %x\n", - swp->sw_blocks, offset, ix); -#endif return(FALSE); } swb = &swp->sw_blocks[ix]; - if (swb->swb_block) - ix = atop(offset % dbtob(swp->sw_bsize)); -#ifdef DEBUG - if (swpagerdebug & SDB_ALLOCBLK) - printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); - if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) - printf("-> %c\n", - "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); -#endif - if (swb->swb_block && (swb->swb_mask & (1 << ix))) - return(TRUE); + ix = (offset % (SWB_NPAGES*NBPG)) / NBPG; + if( swb->swb_block[ix] != SWB_EMPTY) { + if( swb->swb_valid & (1<pg_data, offset); +} + +static void +swap_pager_freepage( vm_page_t m) { + PAGE_WAKEUP(m); + vm_page_free(m); +} + +static void +swap_pager_ridpages( vm_page_t *m, int count, int reqpage) { + int i; + for(i=0;ib_flags |= B_DONE; + bp->b_flags &= ~B_ASYNC; + wakeup((caddr_t)bp); + if( (bp->b_flags & B_READ) == 0) + vwakeup(bp); +} /* * Scaled down version of swap(). - * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. * BOGUS: lower level IO routines expect a KVA so we have to map our * provided physical page into the KVA to keep them happy. + * + * This routine substantially enhanced by John Dyson, 18 Dec 93. + * */ int -swap_pager_io(swp, m, flags) +swap_pager_io(swp, m, count, reqpage, flags) register sw_pager_t swp; - vm_page_t m; + vm_page_t *m; + int count, reqpage; int flags; { register struct buf *bp; register sw_blk_t swb; register int s; + int i; int ix; boolean_t rv; vm_offset_t kva, off; swp_clean_t spc; + int cluster; + vm_offset_t paging_offset; + vm_object_t object; + int reqaddr; + int mydskregion; + extern int dmmin, dmmax; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) - printf("swpg_io(%x, %x, %x)\n", swp, m, flags); -#endif + + spc = NULL; + + + object = m[reqpage]->object; + paging_offset = object->paging_offset; + /* + * First determine if the page exists in the pager if this is + * a sync read. This quickly handles cases where we are + * following shadow chains looking for the top level object + * with the page. + */ + off = m[reqpage]->offset + paging_offset; + ix = swap_pager_block_index( swp, off); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { + /* printf("swap pager: out of range\n"); */ + swap_pager_ridpages( m, count, reqpage); + return(VM_PAGER_FAIL); + } + + + swb = &swp->sw_blocks[ix]; + off = swap_pager_block_offset(swp, off) / NBPG; + if ((flags & B_READ) && + ((swb->swb_block[off] == SWB_EMPTY) || + (swb->swb_valid & (1 << off)) == 0)) { + swap_pager_ridpages( m, count, reqpage); + return(VM_PAGER_FAIL); + } + + reqaddr = swb->swb_block[off]; + + /* make sure that our I/O request is contiguous */ + if ( flags & B_READ) { + int first=0, last=count; + int failed = 0; + int reqdskregion = reqaddr / dmmax; + for(i=reqpage-1;i>=0;--i) { + int *tmpaddr = swap_pager_diskaddr(swp, m[i]->offset + paging_offset,0); + if( tmpaddr == 0 || failed || + *tmpaddr != reqaddr + btodb((i-reqpage)*NBPG) ) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if( first == 0) + first = i + 1; + } else { + mydskregion = *tmpaddr / dmmax; + if( mydskregion != reqdskregion) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + first = i + 1; + } + } + } + failed = 0; + for(i=reqpage+1;ioffset + paging_offset,0); + if( tmpaddr == 0 || failed || + *tmpaddr != reqaddr + btodb((i-reqpage)*NBPG) ) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if( last == count) + last = i; + } else { + mydskregion = *tmpaddr / dmmax; + if( mydskregion != reqdskregion) { + failed = 1; + swap_pager_freepage(m[i]); + m[i] = 0; + if( last == count) + last = i; + } + } + } + count = last; + if( first != 0) { + for(i=first;isw_flags |= SW_WANTED; - assert_wait((int)swp); - thread_block("swpgio"); - } -#else - (void) swap_pager_clean(m, flags&B_READ); -#endif - splx(s); + swap_pager_clean(NULL, flags); } /* * For async writes (pageouts), we cleanup completed pageouts so @@ -504,103 +722,99 @@ swap_pager_io(swp, m, flags) * page is already being cleaned. If it is, or no resources * are available, we try again later. */ - else if (swap_pager_clean(m, B_WRITE) || - queue_empty(&swap_pager_free)) { -#ifdef DEBUG - if ((swpagerdebug & SDB_ANOM) && - !queue_empty(&swap_pager_free)) - printf("swap_pager_io: page %x already cleaning\n", m); -#endif - return(VM_PAGER_FAIL); + else if (swap_pager_clean(m[reqpage], B_WRITE)) { + swap_pager_ridpages( m, count, reqpage); + return VM_PAGER_FAIL; } + spc = NULL; /* we might not use an spc data structure */ + kva = 0; + + if( (flags & B_READ) && count > 1) { + kva = kmem_alloc_pageable( pager_map, count*NBPG); + } + + if( !kva) { /* - * Determine swap block and allocate as necessary. + * get a swap pager clean data structure, block until we get it */ - off = m->offset + m->object->paging_offset; - ix = off / dbtob(swp->sw_bsize); - if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_io: bad offset %x+%x(%d) in %x\n", - m->offset, m->object->paging_offset, - ix, swp->sw_blocks); -#endif - return(VM_PAGER_FAIL); - } - s = splbio(); - swb = &swp->sw_blocks[ix]; - off = off % dbtob(swp->sw_bsize); - if (flags & B_READ) { - if (swb->swb_block == 0 || - (swb->swb_mask & (1 << atop(off))) == 0) { -#ifdef DEBUG - if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) - printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", - swp->sw_blocks, - swb->swb_block, atop(off), - swb->swb_mask, - m->offset, m->object->paging_offset); -#endif - /* XXX: should we zero page here?? */ - splx(s); - return(VM_PAGER_FAIL); + if( queue_empty(&swap_pager_free)) { + (void) swap_pager_clean(NULL, B_WRITE); + while ( queue_empty(&swap_pager_free)) { + tsleep((caddr_t)&swap_wakeup, PVM, "swpfre", 0); + (void) swap_pager_clean(NULL, B_WRITE); + } } - } else if (swb->swb_block == 0) { -#ifdef old - swb->swb_block = rmalloc(swapmap, swp->sw_bsize); - if (swb->swb_block == 0) { -#else - if (!rlist_alloc(&swapmap, (unsigned)swp->sw_bsize, - (unsigned *)&swb->swb_block)) { -#endif -#ifdef DEBUG - if (swpagerdebug & SDB_FAIL) - printf("swpg_io: rmalloc of %x failed\n", - swp->sw_bsize); -#endif - splx(s); - return(VM_PAGER_FAIL); + queue_remove_first(&swap_pager_free, spc, swp_clean_t, spc_list); + for(i=0;isw_blocks, swb->swb_block, ix); -#endif + count = 1; + m[0] = m[reqpage]; + reqpage = 0; + kva = spc->spc_kva; } - splx(s); + /* - * Allocate a kernel virtual address and initialize so that PTE - * is available for lower level IO drivers. + * Determine swap block and allocate as necessary. */ - kva = vm_pager_map_page(m); + if (reqaddr == SWB_EMPTY) { + int blk; + for(i=0;iswb_block[i] != SWB_EMPTY) + break; + if( i == SWB_NPAGES && + rlist_alloc(&swapmap, btodb( SWB_NPAGES*NBPG),&blk)) { + for(i=0;iswb_block[i] = blk + btodb(NBPG)*i; + } else if( !rlist_alloc(&swapmap, btodb( NBPG), &swb->swb_block[off])) { + if( spc) + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + if( swap_pager_full == 0) + printf("swap_pager: out of swap space !!!\n"); + swap_pager_full = 1; + swap_pager_ridpages( m, count, reqpage); + return(VM_PAGER_FAIL); + } + swap_pager_full = 0; + } + for(i=0;ioffset+paging_offset) / NBPG; + +/* + if( flags & B_READ) + printf("obj: 0x%x off: 0x%x poff: 0x%x off: 0x%x, sz: %d blk: %d op: %s\n", + object, m[0]->offset, paging_offset, off, count, swb->swb_block[off], flags&B_READ?"r":"w"); +*/ + + s = splbio(); /* * Get a swap buffer header and perform the IO */ - s = splbio(); while (bswlist.av_forw == NULL) { -#ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_io: wait on swbuf for %x (%d)\n", - m, flags); -#endif bswlist.b_flags |= B_WANTED; - tsleep((caddr_t)&bswlist, PSWP+1, "pagerio", 0); + tsleep((caddr_t)&bswlist, PSWP+1, "wswbuf", 0); } bp = bswlist.av_forw; bswlist.av_forw = bp->av_forw; - splx(s); bp->b_flags = B_BUSY | (flags & B_READ); bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ - bp->b_un.b_addr = (caddr_t)kva; - bp->b_blkno = swb->swb_block + btodb(off); + bp->b_un.b_addr = (caddr_t) kva; + bp->b_blkno = swb->swb_block[off]; VHOLD(swapdev_vp); bp->b_vp = swapdev_vp; if (swapdev_vp->v_type == VBLK) bp->b_dev = swapdev_vp->v_rdev; - bp->b_bcount = PAGE_SIZE; + bp->b_bcount = NBPG*count; if ((bp->b_flags & B_READ) == 0) swapdev_vp->v_numoutput++; @@ -609,92 +823,91 @@ swap_pager_io(swp, m, flags) * and place a "cleaning" entry on the inuse queue. */ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG - if (queue_empty(&swap_pager_free)) - panic("swpg_io: lost spc"); -#endif - queue_remove_first(&swap_pager_free, - spc, swp_clean_t, spc_list); -#ifdef DEBUG - if (spc->spc_flags != SPC_FREE) - panic("swpg_io: bad free spc"); -#endif - spc->spc_flags = SPC_BUSY; + spc->spc_flags = 0; spc->spc_bp = bp; spc->spc_swp = swp; - spc->spc_kva = kva; - spc->spc_m = m; + spc->spc_m = m[reqpage]; bp->b_flags |= B_CALL; bp->b_iodone = swap_pager_iodone; - s = splbio(); + bp->b_spc = (void *) spc; swp->sw_poip++; queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); - -#ifdef DEBUG - swap_pager_poip++; - if (swpagerdebug & SDB_WRITE) - printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", - bp, swp, spc, swp->sw_poip); - if ((swpagerdebug & SDB_ALLOCBLK) && - (swb->swb_mask & (1 << atop(off))) == 0) - printf("swpg_io: %x write blk %x+%x\n", - swp->sw_blocks, swb->swb_block, atop(off)); -#endif - swb->swb_mask |= (1 << atop(off)); - splx(s); + swb->swb_valid |= (1 << off); + } else { + if( (flags & B_READ) == 0) + swb->swb_valid |= (1 << off); + bp->b_flags |= B_CALL; + bp->b_iodone = swap_pager_iodone1; } -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", - bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); -#endif VOP_STRATEGY(bp); - if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO started: bp %x\n", bp); -#endif + if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { + if( (bp->b_flags & B_DONE) == B_DONE) { + swap_pager_clean(NULL, flags); + } + splx(s); return(VM_PAGER_PEND); } - s = splbio(); -#ifdef DEBUG - if (flags & B_READ) - swap_pager_piip++; - else - swap_pager_poip++; -#endif while ((bp->b_flags & B_DONE) == 0) { - assert_wait((int)bp, 0); - thread_block("swpgio"); - } -#ifdef DEBUG - if (flags & B_READ) - --swap_pager_piip; - else - --swap_pager_poip; -#endif + tsleep((caddr_t)bp, PVM, (flags & B_READ)?"swread":"swwrt", 0); + } rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY); + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); bp->av_forw = bswlist.av_forw; bswlist.av_forw = bp; - if (bp->b_vp) - brelvp(bp); - if (bswlist.b_flags & B_WANTED) { + if( bswlist.b_flags & B_WANTED) { bswlist.b_flags &= ~B_WANTED; - thread_wakeup((int)&bswlist); + wakeup((caddr_t)&bswlist); } + + if (bp->b_vp) + brelvp(bp); + + splx(s); + + pmap_remove(vm_map_pmap( pager_map), kva, kva+count*NBPG); + if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { - m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m[reqpage]->flags |= PG_CLEAN; + pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); } - splx(s); -#ifdef DEBUG - if (swpagerdebug & SDB_IO) - printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); - if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL) - printf("swpg_io: IO error\n"); + + if( spc) { + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + } else { + for(i=0;iflags |= PG_CLEAN; + m[i]->flags &= ~PG_LAUNDRY; + if( i != reqpage) { + /* + * whether or not to leave the page activated + * is up in the air, but we should put the page + * on a page queue somewhere. (it already is in + * the object). + */ + if( i < count/2 && i > reqpage) + vm_page_activate(m[i]); + else + vm_page_deactivate(m[i]); + /* + * just in case someone was asking for this + * page we now tell them that it is ok to use + */ + PAGE_WAKEUP(m[i]); + } + } +/* + * and free the kernel virtual addresses + */ + kmem_free( pager_map, kva, count*NBPG); + } return(rv); } @@ -706,128 +919,61 @@ swap_pager_clean(m, rw) register swp_clean_t spc, tspc; register int s; -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_clean(%x, %d)\n", m, rw); -#endif tspc = NULL; + if( queue_empty( &swap_pager_done)) + return FALSE; for (;;) { + s = splbio(); /* - * Look up and removal from inuse list must be done + * Look up and removal from done list must be done * at splbio() to avoid conflicts with swap_pager_iodone. */ - s = splbio(); - spc = (swp_clean_t) queue_first(&swap_pager_inuse); - while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { - if ((spc->spc_flags & SPC_DONE) && - swap_pager_finish(spc)) { - queue_remove(&swap_pager_inuse, spc, - swp_clean_t, spc_list); - break; - } - if (m && m == spc->spc_m) { -#ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x on list, flags %x\n", - m, spc->spc_flags); -#endif - tspc = spc; - } - spc = (swp_clean_t) queue_next(&spc->spc_list); + spc = (swp_clean_t) queue_first(&swap_pager_done); + while (!queue_end(&swap_pager_done, (queue_entry_t)spc)) { + swap_pager_finish(spc); + queue_remove(&swap_pager_done, spc, + swp_clean_t, spc_list); + goto doclean; } /* * No operations done, thats all we can do for now. */ - if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) - break; + splx(s); + break; + /* * The desired page was found to be busy earlier in * the scan but has since completed. */ +doclean: if (tspc && tspc == spc) { -#ifdef DEBUG - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x done while looking\n", - m); -#endif tspc = NULL; } - spc->spc_flags = SPC_FREE; - vm_pager_unmap_page(spc->spc_kva); + spc->spc_flags = 0; + pmap_remove(vm_map_pmap( pager_map), spc->spc_kva, ((vm_offset_t) spc->spc_kva)+NBPG); queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); -#ifdef DEBUG - if (swpagerdebug & SDB_WRITE) - printf("swpg_clean: free spc %x\n", spc); -#endif - } -#ifdef DEBUG - /* - * If we found that the desired page is already being cleaned - * mark it so that swap_pager_iodone() will not set the clean - * flag before the pageout daemon has another chance to clean it. - */ - if (tspc && rw == B_WRITE) { - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_clean: page %x on clean list\n", - tspc); - tspc->spc_flags |= SPC_DIRTY; + ++cleandone; + splx(s); } -#endif - splx(s); -#ifdef DEBUG - if (swpagerdebug & SDB_WRITE) - printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); - if ((swpagerdebug & SDB_ANOM) && tspc) - printf("swpg_clean: %s of cleaning page %x\n", - rw == B_READ ? "get" : "put", m); -#endif return(tspc ? TRUE : FALSE); } -int +void swap_pager_finish(spc) register swp_clean_t spc; { - vm_object_t object = spc->spc_m->object; - - /* - * Mark the paging operation as done. - * (XXX) If we cannot get the lock, leave it til later. - * (XXX) Also we are assuming that an async write is a - * pageout operation that has incremented the counter. - */ - if (!vm_object_lock_try(object)) - return(0); + vm_page_t m = spc->spc_m; + vm_object_t object = m->object; + extern int vm_pageout_free_min; - if (--object->paging_in_progress == 0) + if (--object->paging_in_progress == 0) thread_wakeup((int) object); -#ifdef DEBUG - /* - * XXX: this isn't even close to the right thing to do, - * introduces a variety of race conditions. - * - * If dirty, vm_pageout() has attempted to clean the page - * again. In this case we do not do anything as we will - * see the page again shortly. - */ - if (spc->spc_flags & SPC_DIRTY) { - if (swpagerdebug & SDB_ANOM) - printf("swap_pager_finish: page %x dirty again\n", - spc->spc_m); - spc->spc_m->flags &= ~PG_BUSY; - PAGE_WAKEUP(spc->spc_m); - vm_object_unlock(object); - return(1); - } -#endif + /* * If no error mark as clean and inform the pmap system. * If error, mark as dirty so we will try again. @@ -835,17 +981,26 @@ swap_pager_finish(spc) */ if (spc->spc_flags & SPC_ERROR) { printf("swap_pager_finish: clean of page %x failed\n", - VM_PAGE_TO_PHYS(spc->spc_m)); - spc->spc_m->flags |= PG_LAUNDRY; + VM_PAGE_TO_PHYS(m)); + m->flags |= PG_LAUNDRY; } else { - spc->spc_m->flags |= PG_CLEAN; - pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_CLEAN; + } + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + PAGE_WAKEUP(m); + /* + * if we need memory desperately, then free it now + */ + if((m->flags & PG_CLEAN) && + vm_page_free_count <= vm_pageout_free_min) { + pmap_page_protect( VM_PAGE_TO_PHYS(m), VM_PROT_NONE); + vm_page_free(m); + thread_wakeup((int) &vm_pages_needed); } - spc->spc_m->flags &= ~PG_BUSY; - PAGE_WAKEUP(spc->spc_m); + --nswiodone; - vm_object_unlock(object); - return(1); + return; } void @@ -854,59 +1009,39 @@ swap_pager_iodone(bp) { register swp_clean_t spc; daddr_t blk; - int s; - -#ifdef DEBUG - /* save panic time state */ - if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) - return; - if (swpagerdebug & SDB_FOLLOW) - printf("swpg_iodone(%x)\n", bp); -#endif - s = splbio(); - spc = (swp_clean_t) queue_first(&swap_pager_inuse); - while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { - if (spc->spc_bp == bp) - break; - spc = (swp_clean_t) queue_next(&spc->spc_list); - } -#ifdef DEBUG - if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) - panic("swap_pager_iodone: bp not found"); -#endif - - spc->spc_flags &= ~SPC_BUSY; - spc->spc_flags |= SPC_DONE; + spc = (swp_clean_t) bp->b_spc; + queue_remove(&swap_pager_inuse, spc, swp_clean_t, spc_list); + queue_enter(&swap_pager_done, spc, swp_clean_t, spc_list); if (bp->b_flags & B_ERROR) { spc->spc_flags |= SPC_ERROR; -printf("error %d blkno %d sz %d ", bp->b_error, bp->b_blkno, bp->b_bcount); + printf("error %d blkno %d sz %d ", + bp->b_error, bp->b_blkno, bp->b_bcount); } spc->spc_bp = NULL; - blk = bp->b_blkno; - -#ifdef DEBUG - --swap_pager_poip; - if (swpagerdebug & SDB_WRITE) - printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", - bp, spc->spc_swp, spc->spc_swp->sw_flags, - spc, spc->spc_swp->sw_poip); -#endif - spc->spc_swp->sw_poip--; - if (spc->spc_swp->sw_flags & SW_WANTED) { - spc->spc_swp->sw_flags &= ~SW_WANTED; - thread_wakeup((int)spc->spc_swp); - } + if( (bp->b_flags & B_READ) == 0) + vwakeup(bp); - bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY); + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); + if (bp->b_vp) { + brelvp(bp); + } bp->av_forw = bswlist.av_forw; bswlist.av_forw = bp; - if (bp->b_vp) - brelvp(bp); - if (bswlist.b_flags & B_WANTED) { + if( bswlist.b_flags & B_WANTED) { bswlist.b_flags &= ~B_WANTED; - thread_wakeup((int)&bswlist); + wakeup((caddr_t)&bswlist); + } + nswiodone++; + + if( (--spc->spc_swp->sw_poip == 0) || + queue_empty( &swap_pager_inuse)) { + wakeup( (caddr_t)&swap_wakeup); + } + + if( queue_empty( &swap_pager_inuse) || + queue_empty( &swap_pager_free) || + nswiodone >= npendingio / 2 ) { + thread_wakeup((int) &vm_pages_needed); } - thread_wakeup((int) &vm_pages_needed); - splx(s); } diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index aa0d80d054..310319a790 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -36,39 +36,27 @@ * SUCH DAMAGE. * * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90 - * $Id: swap_pager.h,v 1.3 1993/11/07 17:54:06 wollman Exp $ + * $Id: swap_pager.h,v 1.2 1993/10/16 16:20:21 rgrimes Exp $ + */ + +/* + * Modifications to the block allocation data structure by John S. Dyson + * 18 Dec 93. */ #ifndef _SWAP_PAGER_ #define _SWAP_PAGER_ 1 /* - * In the swap pager, the backing store for an object is organized as an - * array of some number of "swap blocks". A swap block consists of a bitmask - * and some number of contiguous DEV_BSIZE disk blocks. The minimum size - * of a swap block is: - * - * max(PAGE_SIZE, dmmin*DEV_BSIZE) [ 32k currently ] - * - * bytes (since the pager interface is page oriented), the maximum size is: - * - * min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE) [ 128k currently ] - * - * where dmmin and dmmax are left over from the old VM interface. The bitmask - * (swb_mask) is used by swap_pager_haspage() to determine if a particular - * page has actually been written; i.e. the pager copy of the page is valid. - * All swap blocks in the backing store of an object will be the same size. - * - * The reason for variable sized swap blocks is to reduce fragmentation of - * swap resources. Whenever possible we allocate smaller swap blocks to - * smaller objects. The swap block size is determined from a table of - * object-size vs. swap-block-size computed at boot time. + * SWB_NPAGES can be set to any value from 1 to 32 pages per allocation, + * however, due to the allocation spilling into non-swap pager backed memory, + * suggest keeping SWB_NPAGES small (1-4). If high performance is manditory + * perhaps up to 8 pages might be in order???? */ -typedef int sw_bm_t; /* pager bitmask */ - +#define SWB_NPAGES 1 struct swblock { - sw_bm_t swb_mask; /* bitmask of valid pages in this block */ - daddr_t swb_block; /* starting disk block for this block */ + unsigned int swb_valid; /* bitmask for valid pages */ + int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */ }; typedef struct swblock *sw_blk_t; @@ -77,7 +65,6 @@ typedef struct swblock *sw_blk_t; */ struct swpager { vm_size_t sw_osize; /* size of object we are backing (bytes) */ - int sw_bsize; /* size of swap blocks (DEV_BSIZE units) */ int sw_nblocks;/* number of blocks in list (sw_blk_t units) */ sw_blk_t sw_blocks; /* pointer to list of swap blocks */ short sw_flags; /* flags */ @@ -95,8 +82,9 @@ vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t); void swap_pager_dealloc(vm_pager_t); boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); +boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); -extern int swap_pager_io(sw_pager_t, vm_page_t, int); +int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); void swap_pager_iodone(struct buf *); boolean_t swap_pager_clean(vm_page_t, int); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index cb258a92f5..166495b057 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_object.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_object.c,v 1.11 1993/12/19 00:56:07 wollman Exp $ + * $Id: vm_object.c,v 1.12 1993/12/21 05:51:02 davidg Exp $ */ /* @@ -64,6 +64,11 @@ * rights to redistribute these changes. */ +/* + * Significant modifications to vm_object_collapse fixing the growing + * swap space allocation by John S. Dyson, 18 Dec 93. + */ + /* * Virtual memory object module. */ @@ -76,7 +81,6 @@ #include "vm_page.h" #include "ddb.h" - static void _vm_object_allocate(vm_size_t, vm_object_t); static void vm_object_deactivate_pages(vm_object_t); static void vm_object_cache_trim(void); @@ -695,6 +699,12 @@ void vm_object_copy(src_object, src_offset, size, vm_object_lock(src_object); + /* + * Try to collapse the object before copying it. + */ + vm_object_collapse(src_object); + + /* * we can simply add a reference to the object if we have no * pager or are using the swap pager or we have an internal object @@ -732,11 +742,6 @@ void vm_object_copy(src_object, src_offset, size, return; } - /* - * Try to collapse the object before copying it. - */ - vm_object_collapse(src_object); - /* * If the object has a pager, the pager wants to * see all of the changes. We need a copy-object @@ -1084,7 +1089,17 @@ boolean_t vm_object_collapse_allowed = TRUE; * Requires that the object be locked and the page * queues be unlocked. * + * This routine substantially modified by John S. Dyson + * Dec 18,1993. Many restrictions have been removed from + * this routine including: + * + * 1) The object CAN now have a pager + * 2) Backing object pager space is now removed if not needed + * 3) Bypasses now check for existance of pages on paging space + * */ + + void vm_object_collapse(object) register vm_object_t object; @@ -1103,11 +1118,10 @@ void vm_object_collapse(object) * Verify that the conditions are right for collapse: * * The object exists and no pages in it are currently - * being paged out (or have ever been paged out). + * being paged out. */ if (object == NULL || - object->paging_in_progress != 0 || - object->pager != NULL) + object->paging_in_progress != 0) return; /* @@ -1127,7 +1141,7 @@ void vm_object_collapse(object) */ if (!backing_object->internal || - backing_object->paging_in_progress != 0) { + backing_object->paging_in_progress != 0 ) { vm_object_unlock(backing_object); return; } @@ -1143,10 +1157,22 @@ void vm_object_collapse(object) * parent object. */ if (backing_object->shadow != NULL && - backing_object->shadow->copy != NULL) { + backing_object->shadow->copy == backing_object) { + vm_object_unlock(backing_object); + return; + } + + /* + * we can deal only with the swap pager + */ + if( (object->pager && + object->pager->pg_type != PG_SWAP) || + (backing_object->pager && + backing_object->pager->pg_type != PG_SWAP)) { vm_object_unlock(backing_object); return; } + /* * We know that we can either collapse the backing @@ -1203,7 +1229,6 @@ void vm_object_collapse(object) } else { if (pp) { -#if 1 /* * This should never happen -- the * parent cannot have ever had an @@ -1213,13 +1238,6 @@ void vm_object_collapse(object) panic("vm_object_collapse: bad case"); /* andrew@werple.apana.org.au - from mach 3.0 VM */ -#else - /* may be someone waiting for it */ - PAGE_WAKEUP(pp); - vm_page_lock_queues(); - vm_page_free(pp); - vm_page_unlock_queues(); -#endif } /* * Parent now has no page. @@ -1233,44 +1251,39 @@ void vm_object_collapse(object) /* * Move the pager from backing_object to object. - * - * XXX We're only using part of the paging space - * for keeps now... we ought to discard the - * unused portion. - */ - - /* - * Remove backing_object from the object hashtable now. - * This is necessary since its pager is going away - * and therefore it is not going to be removed from - * hashtable in vm_object_deallocate(). - * - * NOTE - backing_object can only get at this stage if - * it has an internal pager. It is not normally on the - * hashtable unless it was put there by eg. vm_mmap() - * - * XXX - Need I worry here about *named* ANON pagers ? */ - if (backing_object->pager) { - vm_object_remove(backing_object->pager); + if( backing_object->pager) { + if( object->pager) { + object->paging_in_progress++; + backing_object->paging_in_progress++; + /* + * copy shadow object pages into ours + * and destroy unneeded pages in shadow object. + */ + swap_pager_copy( + backing_object->pager, backing_object->paging_offset, + object->pager, object->paging_offset, + object->shadow_offset); + object->paging_in_progress--; + if( object->paging_in_progress == 0) + wakeup((caddr_t)object); + backing_object->paging_in_progress--; + if( backing_object->paging_in_progress == 0) + wakeup((caddr_t)backing_object); + } else { + /* + * grab the shadow objects pager + */ + object->pager = backing_object->pager; + object->paging_offset = backing_object->paging_offset + backing_offset; + /* + * free unnecessary blocks + */ + swap_pager_freespace( object->pager, 0, object->paging_offset); + } + vm_object_remove( backing_object->pager); } - object->pager = backing_object->pager; -#if 1 - /* Mach 3.0 code */ - /* andrew@werple.apana.org.au, 12 Feb 1993 */ - - /* - * If there is no pager, leave paging-offset alone. - */ - if (object->pager) - object->paging_offset = - backing_object->paging_offset + - backing_offset; -#else - /* old VM 2.5 version */ - object->paging_offset += backing_offset; -#endif backing_object->pager = NULL; @@ -1346,9 +1359,10 @@ void vm_object_collapse(object) if (p->offset >= backing_offset && new_offset <= size && - ((pp = vm_page_lookup(object, new_offset)) - == NULL || - (pp->flags & PG_FAKE))) { + ((pp = vm_page_lookup(object, new_offset)) == NULL || + (pp->flags & PG_FAKE)) && + (!object->pager || + !vm_pager_has_page( object->pager, object->paging_offset+new_offset))) { /* * Page still needed. * Can't go any further. @@ -1386,6 +1400,8 @@ void vm_object_collapse(object) * will not vanish; so we don't need to call * vm_object_deallocate. */ + if( backing_object->ref_count == 1) + printf("should have called obj deallocate\n"); backing_object->ref_count--; vm_object_unlock(backing_object); @@ -1399,6 +1415,7 @@ void vm_object_collapse(object) } } + /* * vm_object_page_remove: [internal] * diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index e0620db8dd..91d8f0d9c5 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_pageout.c,v 1.7 1993/12/19 00:56:12 wollman Exp $ + * $Id: vm_pageout.c,v 1.8 1993/12/21 05:51:06 davidg Exp $ */ /* @@ -220,37 +220,22 @@ vm_pageout_scan() * until all shadows are not paging. This * allows vm_object_collapse to work better and * helps control swap space size. - * (J. Dyson 11 Nov 93) + * (J. Dyson 18 Dec 93) */ vm_page_unlock_queues(); - if( !object->pager) { - /* - * If our shadow is active, then wait until paging is done - * this will allow vm_object_collapse to work well. - * The collapse is necessary to keep swap space down. - */ - if( ((object->shadow && object->shadow->paging_in_progress) || - (vm_page_free_count < vm_pageout_free_min))) { - vm_object_unlock(object); - m = (vm_page_t) queue_next(&m->pageq); - continue; - } else { - vm_object_collapse(object); - /* - * If we still have a shadow active, then defer the - * creation of the pager further... - */ - if( object->shadow) { - if( object->shadow->paging_in_progress) { - vm_object_unlock(object); - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - } - } + + if( !object->pager && + (object->shadow && object->shadow->paging_in_progress ) || + (vm_page_free_count < vm_pageout_free_min)) { + vm_object_unlock(object); + m = (vm_page_t) queue_next(&m->pageq); + continue; } + + vm_object_collapse(object); + pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); m->flags |= PG_BUSY; -- 2.20.1