From 619edcce9981988d183374adf3f5abcfcc261c09 Mon Sep 17 00:00:00 2001 From: Kirk McKusick Date: Thu, 6 Dec 1990 02:13:49 -0800 Subject: [PATCH] adopted from Mike Hibler at Utah SCCS-vsn: sys/vm/device_pager.c 7.1 SCCS-vsn: sys/vm/device_pager.h 7.1 SCCS-vsn: sys/vm/swap_pager.c 7.1 SCCS-vsn: sys/vm/swap_pager.h 7.1 SCCS-vsn: sys/vm/vm_mmap.c 7.1 SCCS-vsn: sys/vm/vm_pager.h 7.1 SCCS-vsn: sys/vm/vm_unix.c 7.1 SCCS-vsn: sys/vm/vnode_pager.c 7.1 SCCS-vsn: sys/vm/vnode_pager.h 7.1 --- usr/src/sys/vm/device_pager.c | 237 +++++++++ usr/src/sys/vm/device_pager.h | 52 ++ usr/src/sys/vm/swap_pager.c | 874 ++++++++++++++++++++++++++++++++++ usr/src/sys/vm/swap_pager.h | 86 ++++ usr/src/sys/vm/vm_mmap.c | 818 +++++++++++++++++++++++++++++++ usr/src/sys/vm/vm_pager.h | 92 ++++ usr/src/sys/vm/vm_unix.c | 103 ++++ usr/src/sys/vm/vnode_pager.c | 450 +++++++++++++++++ usr/src/sys/vm/vnode_pager.h | 52 ++ 9 files changed, 2764 insertions(+) create mode 100644 usr/src/sys/vm/device_pager.c create mode 100644 usr/src/sys/vm/device_pager.h create mode 100644 usr/src/sys/vm/swap_pager.c create mode 100644 usr/src/sys/vm/swap_pager.h create mode 100644 usr/src/sys/vm/vm_mmap.c create mode 100644 usr/src/sys/vm/vm_pager.h create mode 100644 usr/src/sys/vm/vm_unix.c create mode 100644 usr/src/sys/vm/vnode_pager.c create mode 100644 usr/src/sys/vm/vnode_pager.h diff --git a/usr/src/sys/vm/device_pager.c b/usr/src/sys/vm/device_pager.c new file mode 100644 index 0000000000..50b5bc7ab2 --- /dev/null +++ b/usr/src/sys/vm/device_pager.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)device_pager.c 7.1 (Berkeley) %G% + */ + +/* + * Page to/from special files. + */ + +#include "devpager.h" +#if NDEVPAGER > 0 + +#include "param.h" +#include "queue.h" +#include "conf.h" +#include "mman.h" +#include "malloc.h" +#include "uio.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_pager.h" +#include "../vm/vm_page.h" +#include "../vm/vm_kern.h" +#include "../vm/device_pager.h" + +queue_head_t dev_pager_list; /* list of managed devices */ + +#ifdef DEBUG +int dpagerdebug = 0; +#define DDB_FOLLOW 0x01 +#define DDB_INIT 0x02 +#define DDB_ALLOC 0x04 +#define DDB_FAIL 0x08 +#endif + +void +dev_pager_init() +{ +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_init()\n"); +#endif + queue_init(&dev_pager_list); +} + +vm_pager_t +dev_pager_alloc(handle, size, prot) + caddr_t handle; + vm_size_t size; + vm_prot_t prot; +{ + dev_t dev; + vm_pager_t pager; + int (*mapfunc)(), nprot; + register vm_object_t object; + register vm_page_t page; + register dev_pager_t devp; + register int npages, off; + extern int nulldev(), nodev(); + + +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_alloc(%x, %x, %x)\n", handle, size, prot); +#endif + /* + * Pageout to device, should never happen. + */ + if (handle == NULL) + panic("dev_pager_alloc called"); + + /* + * Look it up, creating as necessary + */ + pager = vm_pager_lookup(&dev_pager_list, handle); + if (pager == VM_PAGER_NULL) { + /* + * Validation. Make sure this device can be mapped + * and that range to map is acceptible to device. + */ + dev = (dev_t)handle; + mapfunc = cdevsw[major(dev)].d_mmap; + if (!mapfunc || mapfunc == nodev || mapfunc == nulldev) + return(VM_PAGER_NULL); + nprot = 0; + if (prot & VM_PROT_READ) + nprot |= PROT_READ; + if (prot & VM_PROT_WRITE) + nprot |= PROT_WRITE; + if (prot & VM_PROT_EXECUTE) + nprot |= PROT_EXEC; + npages = atop(round_page(size)); + for (off = 0; npages--; off += PAGE_SIZE) + if ((*mapfunc)(dev, off, nprot) == -1) + return(VM_PAGER_NULL); + /* + * Allocate and initialize pager structs + */ + pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); + if (pager == VM_PAGER_NULL) + return(VM_PAGER_NULL); + devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK); + if (devp == DEV_PAGER_NULL) { + free((caddr_t)pager, M_VMPAGER); + return(VM_PAGER_NULL); + } + devp->devp_dev = dev; + devp->devp_npages = atop(round_page(size)); + pager->pg_handle = handle; + pager->pg_ops = &devicepagerops; + pager->pg_type = PG_DEVICE; + pager->pg_data = (caddr_t)devp; + /* + * Allocate object and vm_page structures to describe memory + */ + npages = devp->devp_npages; + object = devp->devp_object = vm_object_allocate(ptoa(npages)); + vm_object_enter(object, pager); + vm_object_setpager(object, pager, (vm_offset_t)0, FALSE); + devp->devp_pages = (vm_page_t) + kmem_alloc(kernel_map, npages*sizeof(struct vm_page)); + off = 0; + for (page = devp->devp_pages; + page < &devp->devp_pages[npages]; page++) { + vm_object_lock(object); + vm_page_init(page, object, off); + page->phys_addr = + pmap_phys_address((*mapfunc)(dev, off, nprot)); + page->wire_count = 1; + page->fictitious = TRUE; + PAGE_WAKEUP(page); + vm_object_unlock(object); + off += PAGE_SIZE; + } + /* + * Finally, put it on the managed list so other can find it. + */ + queue_enter(&dev_pager_list, devp, dev_pager_t, devp_list); +#ifdef DEBUG + if (dpagerdebug & DDB_ALLOC) + printf("dev_pager_alloc: pages %d@%x\n", + devp->devp_npages, devp->devp_pages); +#endif + } else { + /* + * vm_object_lookup() gains a reference and also + * removes the object from the cache. + */ + devp = (dev_pager_t)pager->pg_data; + if (vm_object_lookup(pager) != devp->devp_object) + panic("dev_pager_setup: bad object"); + } +#ifdef DEBUG + if (dpagerdebug & DDB_ALLOC) { + printf("dev_pager_alloc: pager %x devp %x object %x\n", + pager, devp, object); + vm_object_print(object, FALSE); + } +#endif + return(pager); + +} + +void +dev_pager_dealloc(pager) + vm_pager_t pager; +{ + dev_pager_t devp = (dev_pager_t)pager->pg_data; + register vm_object_t object; + +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_dealloc(%x)\n", pager); +#endif + queue_remove(&dev_pager_list, devp, dev_pager_t, devp_list); + object = devp->devp_object; +#ifdef DEBUG + if (dpagerdebug & DDB_ALLOC) + printf("dev_pager_dealloc: devp %x object %x pages %d@%x\n", + devp, object, devp->devp_npages, devp->devp_pages); +#endif + while (!queue_empty(&object->memq)) + vm_page_remove((vm_page_t)queue_first(&object->memq)); + kmem_free(kernel_map, devp->devp_pages, + devp->devp_npages * sizeof(struct vm_page)); + free((caddr_t)devp, M_VMPGDATA); + pager->pg_data = 0; +} + +dev_pager_getpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_getpage(%x, %x)\n", pager, m); +#endif + return(VM_PAGER_BAD); +} + +dev_pager_putpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_putpage(%x, %x)\n", pager, m); +#endif + if (pager == VM_PAGER_NULL) + return; + panic("dev_pager_putpage called"); +} + +boolean_t +dev_pager_haspage(pager, offset) + vm_pager_t pager; + vm_offset_t offset; +{ +#ifdef DEBUG + if (dpagerdebug & DDB_FOLLOW) + printf("dev_pager_haspage(%x, %x)\n", pager, offset); +#endif + return(TRUE); +} +#endif diff --git a/usr/src/sys/vm/device_pager.h b/usr/src/sys/vm/device_pager.h new file mode 100644 index 0000000000..984a675a11 --- /dev/null +++ b/usr/src/sys/vm/device_pager.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)device_pager.h 7.1 (Berkeley) %G% + */ + +#ifndef _DEVICE_PAGER_ +#define _DEVICE_PAGER_ 1 + +/* + * Device pager private data. + */ +struct devpager { + queue_head_t devp_list; /* list of managed devices */ + dev_t devp_dev; /* devno of device */ + vm_page_t devp_pages; /* page structs for device */ + int devp_npages; /* size of device in pages */ + int devp_count; /* reference count */ + vm_object_t devp_object; /* object representing this device */ +}; +typedef struct devpager *dev_pager_t; + +#define DEV_PAGER_NULL ((dev_pager_t)0) + +#ifdef KERNEL + +void dev_pager_init(); +vm_pager_t dev_pager_alloc(); +void dev_pager_dealloc(); +boolean_t dev_pager_getpage(), dev_pager_putpage(); +boolean_t dev_pager_haspage(); + +struct pagerops devicepagerops = { + dev_pager_init, + dev_pager_alloc, + dev_pager_dealloc, + dev_pager_getpage, + dev_pager_putpage, + dev_pager_haspage +}; + +#endif + +#endif /* _DEVICE_PAGER_ */ diff --git a/usr/src/sys/vm/swap_pager.c b/usr/src/sys/vm/swap_pager.c new file mode 100644 index 0000000000..06763d0723 --- /dev/null +++ b/usr/src/sys/vm/swap_pager.c @@ -0,0 +1,874 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)swap_pager.c 7.1 (Berkeley) %G% + */ + +/* + * Quick hack to page to dedicated partition(s). + * TODO: + * Add multiprocessor locks + * Deal with async writes in a better fashion + */ + +#include "swappager.h" +#if NSWAPPAGER > 0 + +#include "param.h" +#include "user.h" +#include "proc.h" +#include "buf.h" +#include "map.h" +#include "systm.h" +#include "specdev.h" +#include "vnode.h" +#include "malloc.h" +#include "queue.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_pager.h" +#include "../vm/vm_page.h" +#include "../vm/vm_pageout.h" +#include "../vm/swap_pager.h" + +#define NSWSIZES 16 /* size of swtab */ +#define NPENDINGIO 64 /* max # of pending cleans */ +#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ + +#ifdef DEBUG +int swpagerdebug = 0x100; +#define SDB_FOLLOW 0x001 +#define SDB_INIT 0x002 +#define SDB_ALLOC 0x004 +#define SDB_IO 0x008 +#define SDB_WRITE 0x010 +#define SDB_FAIL 0x020 +#define SDB_ALLOCBLK 0x040 +#define SDB_FULL 0x080 +#define SDB_ANOM 0x100 +#define SDB_ANOMPANIC 0x200 +#endif + +struct swpagerclean { + queue_head_t spc_list; + int spc_flags; + struct buf *spc_bp; + sw_pager_t spc_swp; + vm_offset_t spc_kva; + vm_page_t spc_m; +} swcleanlist[NPENDINGIO]; +typedef struct swpagerclean *swp_clean_t; + +#define SWP_CLEAN_NULL ((swp_clean_t)0) + +/* spc_flags values */ +#define SPC_FREE 0x00 +#define SPC_BUSY 0x01 +#define SPC_DONE 0x02 +#define SPC_ERROR 0x04 +#define SPC_DIRTY 0x08 + +struct swtab { + vm_size_t st_osize; /* size of object (bytes) */ + int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ +#ifdef DEBUG + u_long st_inuse; /* number in this range in use */ + u_long st_usecnt; /* total used of this size */ +#endif +} swtab[NSWSIZES+1]; + +#ifdef DEBUG +int swap_pager_pendingio; /* max pending async "clean" ops */ +int swap_pager_poip; /* pageouts in progress */ +int swap_pager_piip; /* pageins in progress */ +#endif + +queue_head_t swap_pager_inuse; /* list of pending page cleans */ +queue_head_t swap_pager_free; /* list of free pager clean structs */ +queue_head_t swap_pager_list; /* list of "named" anon regions */ + +void +swap_pager_init() +{ + register swp_clean_t spc; + register int i, bsize; + extern int dmmin, dmmax; + int maxbsize; + +#ifdef DEBUG + if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) + printf("swpg_init()\n"); +#endif + dfltpagerops = &swappagerops; + queue_init(&swap_pager_list); + + /* + * Initialize clean lists + */ + queue_init(&swap_pager_inuse); + queue_init(&swap_pager_free); + for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); + spc->spc_flags = SPC_FREE; + } + + /* + * Calculate the swap allocation constants. + */ + if (dmmin == 0) { + dmmin = DMMIN; + if (dmmin < CLBYTES/DEV_BSIZE) + dmmin = CLBYTES/DEV_BSIZE; + } + if (dmmax == 0) + dmmax = DMMAX; + + /* + * Fill in our table of object size vs. allocation size + */ + bsize = btodb(PAGE_SIZE); + if (bsize < dmmin) + bsize = dmmin; + maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); + if (maxbsize > dmmax) + maxbsize = dmmax; + for (i = 0; i < NSWSIZES; i++) { + swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); + swtab[i].st_bsize = bsize; +#ifdef DEBUG + if (swpagerdebug & SDB_INIT) + printf("swpg_init: ix %d, size %x, bsize %x\n", + i, swtab[i].st_osize, swtab[i].st_bsize); +#endif + if (bsize >= maxbsize) + break; + bsize *= 2; + } + swtab[i].st_osize = 0; + swtab[i].st_bsize = bsize; +} + +/* + * Allocate a pager structure and associated resources. + * Note that if we are called from the pageout daemon (handle == NULL) + * we should not wait for memory as it could resulting in deadlock. + */ +vm_pager_t +swap_pager_alloc(handle, size, prot) + caddr_t handle; + register vm_size_t size; + vm_prot_t prot; +{ + register vm_pager_t pager; + register sw_pager_t swp; + struct swtab *swt; + int waitok; + +#ifdef DEBUG + if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) + printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); +#endif + /* + * If this is a "named" anonymous region, look it up and + * return the appropriate pager if it exists. + */ + if (handle) { + pager = vm_pager_lookup(&swap_pager_list, handle); + if (pager != VM_PAGER_NULL) { + /* + * Use vm_object_lookup to gain a reference + * to the object and also to remove from the + * object cache. + */ + if (vm_object_lookup(pager) == VM_OBJECT_NULL) + panic("swap_pager_alloc: bad object"); + return(pager); + } + } + /* + * Pager doesn't exist, allocate swap management resources + * and initialize. + */ + waitok = handle ? M_WAITOK : M_NOWAIT; + pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); + if (pager == VM_PAGER_NULL) + return(VM_PAGER_NULL); + swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); + if (swp == NULL) { +#ifdef DEBUG + if (swpagerdebug & SDB_FAIL) + printf("swpg_alloc: swpager malloc failed\n"); +#endif + free((caddr_t)pager, M_VMPAGER); + return(VM_PAGER_NULL); + } + size = round_page(size); + for (swt = swtab; swt->st_osize; swt++) + if (size <= swt->st_osize) + break; +#ifdef DEBUG + swt->st_inuse++; + swt->st_usecnt++; +#endif + swp->sw_osize = size; + swp->sw_bsize = swt->st_bsize; + swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; + swp->sw_blocks = (sw_blk_t) + malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), + M_VMPGDATA, M_NOWAIT); + if (swp->sw_blocks == NULL) { + free((caddr_t)swp, M_VMPGDATA); + free((caddr_t)pager, M_VMPAGER); +#ifdef DEBUG + if (swpagerdebug & SDB_FAIL) + printf("swpg_alloc: sw_blocks malloc failed\n"); + swt->st_inuse--; + swt->st_usecnt--; +#endif + return(FALSE); + } + bzero((caddr_t)swp->sw_blocks, + swp->sw_nblocks * sizeof(*swp->sw_blocks)); + swp->sw_poip = 0; + if (handle) { + vm_object_t object; + + swp->sw_flags = SW_NAMED; + queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list); + /* + * Consistant with other pagers: return with object + * referenced. Can't do this with handle == NULL + * since it might be the pageout daemon calling. + */ + object = vm_object_allocate(size); + vm_object_enter(object, pager); + vm_object_setpager(object, pager, 0, FALSE); + } else { + swp->sw_flags = 0; + queue_init(&pager->pg_list); + } + pager->pg_handle = handle; + pager->pg_ops = &swappagerops; + pager->pg_type = PG_SWAP; + pager->pg_data = (caddr_t)swp; + +#ifdef DEBUG + if (swpagerdebug & SDB_ALLOC) + printf("swpg_alloc: pg_data %x, %x of %x at %x\n", + swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); +#endif + return(pager); +} + +void +swap_pager_dealloc(pager) + vm_pager_t pager; +{ + register int i; + register sw_blk_t bp; + register sw_pager_t swp; + struct swtab *swt; + int s; + +#ifdef DEBUG + /* save panic time state */ + if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) + return; + if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) + printf("swpg_dealloc(%x)\n", pager); +#endif + /* + * Remove from list right away so lookups will fail if we + * block for pageout completion. + */ + swp = (sw_pager_t) pager->pg_data; + if (swp->sw_flags & SW_NAMED) { + queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); + swp->sw_flags &= ~SW_NAMED; + } +#ifdef DEBUG + for (swt = swtab; swt->st_osize; swt++) + if (swp->sw_osize <= swt->st_osize) + break; + swt->st_inuse--; +#endif + + /* + * Wait for all pageouts to finish and remove + * all entries from cleaning list. + */ + s = splbio(); + while (swp->sw_poip) { + swp->sw_flags |= SW_WANTED; + assert_wait((int)swp); + thread_block(); + } + splx(s); + (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); + + /* + * Free left over swap blocks + */ + for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) + if (bp->swb_block) { +#ifdef DEBUG + if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) + printf("swpg_dealloc: blk %x\n", + bp->swb_block); +#endif + rmfree(swapmap, swp->sw_bsize, bp->swb_block); + } + /* + * Free swap management resources + */ + free((caddr_t)swp->sw_blocks, M_VMPGDATA); + free((caddr_t)swp, M_VMPGDATA); + free((caddr_t)pager, M_VMPAGER); +} + +swap_pager_getpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ +#ifdef DEBUG + if (swpagerdebug & SDB_FOLLOW) + printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); +#endif + return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); +} + +swap_pager_putpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ + int flags; + +#ifdef DEBUG + if (swpagerdebug & SDB_FOLLOW) + printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); +#endif + if (pager == VM_PAGER_NULL) { + (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); + return; + } + flags = B_WRITE; + if (!sync) + flags |= B_ASYNC; + return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); +} + +boolean_t +swap_pager_haspage(pager, offset) + vm_pager_t pager; + vm_offset_t offset; +{ + register sw_pager_t swp; + register sw_blk_t swb; + int ix; + +#ifdef DEBUG + if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) + printf("swpg_haspage(%x, %x) ", pager, offset); +#endif + swp = (sw_pager_t) pager->pg_data; + ix = offset / dbtob(swp->sw_bsize); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { +#ifdef DEBUG + if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) + printf("swpg_haspage: %x bad offset %x, ix %x\n", + swp->sw_blocks, offset, ix); +#endif + return(FALSE); + } + swb = &swp->sw_blocks[ix]; + if (swb->swb_block) + ix = atop(offset % dbtob(swp->sw_bsize)); +#ifdef DEBUG + if (swpagerdebug & SDB_ALLOCBLK) + printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); + if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) + printf("-> %c\n", + "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); +#endif + if (swb->swb_block && (swb->swb_mask & (1 << ix))) + return(TRUE); + return(FALSE); +} + +/* + * Scaled down version of swap(). + * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. + * BOGUS: lower level IO routines expect a KVA so we have to map our + * provided physical page into the KVA to keep them happy. + */ +swap_pager_io(swp, m, flags) + register sw_pager_t swp; + vm_page_t m; + int flags; +{ + register struct buf *bp; + register sw_blk_t swb; + register int s; + int ix; + boolean_t rv; + vm_offset_t kva, off; + swp_clean_t spc; + +#ifdef DEBUG + /* save panic time state */ + if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) + return; + if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) + printf("swpg_io(%x, %x, %x)\n", swp, m, flags); +#endif + + /* + * For reads (pageins) and synchronous writes, we clean up + * all completed async pageouts and check to see if this + * page is currently being cleaned. If it is, we just wait + * til the operation is done before continuing. + */ + if ((flags & B_ASYNC) == 0) { + s = splbio(); + while (swap_pager_clean(m, flags&B_READ)) { + swp->sw_flags |= SW_WANTED; + assert_wait((int)swp); + thread_block(); + } + splx(s); + } + /* + * For async writes (pageouts), we cleanup completed pageouts so + * that all available resources are freed. Also tells us if this + * page is already being cleaned. If it is, or no resources + * are available, we try again later. + */ + else if (swap_pager_clean(m, B_WRITE) || queue_empty(&swap_pager_free)) + return(VM_PAGER_FAIL); + + /* + * Determine swap block and allocate as necessary. + */ + off = m->offset + m->object->paging_offset; + ix = off / dbtob(swp->sw_bsize); + if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { +#ifdef DEBUG + if (swpagerdebug & SDB_FAIL) + printf("swpg_io: bad offset %x+%x(%d) in %x\n", + m->offset, m->object->paging_offset, + ix, swp->sw_blocks); +#endif + return(VM_PAGER_FAIL); + } + swb = &swp->sw_blocks[ix]; + off = off % dbtob(swp->sw_bsize); + if (flags & B_READ) { + if (swb->swb_block == 0 || + (swb->swb_mask & (1 << atop(off))) == 0) { +#ifdef DEBUG + if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) + printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", + swp->sw_blocks, + swb->swb_block, atop(off), + swb->swb_mask, + m->offset, m->object->paging_offset); +#endif + /* XXX: should we zero page here?? */ + return(VM_PAGER_FAIL); + } + } else if (swb->swb_block == 0) { + swb->swb_block = rmalloc(swapmap, swp->sw_bsize); + if (swb->swb_block == 0) { +#ifdef DEBUG + if (swpagerdebug & SDB_FAIL) + printf("swpg_io: rmalloc of %x failed\n", + swp->sw_bsize); +#endif + return(VM_PAGER_FAIL); + } +#ifdef DEBUG + if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) + printf("swpg_io: %x alloc blk %x at ix %x\n", + swp->sw_blocks, swb->swb_block, ix); +#endif + } + + /* + * Allocate a kernel virtual address and initialize so that PTE + * is available for lower level IO drivers. + */ + kva = vm_pager_map_page(m); + + /* + * Get a swap buffer header and perform the IO + */ + s = splbio(); + while (bswlist.av_forw == NULL) { +#ifdef DEBUG + if (swpagerdebug & SDB_ANOM) + printf("swpg_io: wait on swbuf for %x (%d)\n", + m, flags); +#endif + bswlist.b_flags |= B_WANTED; + sleep((caddr_t)&bswlist, PSWP+1); + } + bp = bswlist.av_forw; + bswlist.av_forw = bp->av_forw; + splx(s); + bp->b_flags = B_BUSY | (flags & B_READ); + bp->b_proc = &proc[0]; /* XXX (but without B_PHYS set this is ok) */ + bp->b_un.b_addr = (caddr_t)kva; + bp->b_blkno = swb->swb_block + btodb(off); + VHOLD(swapdev_vp); + bp->b_vp = swapdev_vp; + bp->b_dev = swapdev_vp->v_rdev; + bp->b_bcount = PAGE_SIZE; + if ((bp->b_flags & B_READ) == 0) + swapdev_vp->v_numoutput++; + + /* + * If this is an async write we set up additional buffer fields + * and place a "cleaning" entry on the inuse queue. + */ + if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { +#ifdef DEBUG + if (queue_empty(&swap_pager_free)) + panic("swpg_io: lost spc"); +#endif + queue_remove_first(&swap_pager_free, + spc, swp_clean_t, spc_list); +#ifdef DEBUG + if (spc->spc_flags != SPC_FREE) + panic("swpg_io: bad free spc"); +#endif + spc->spc_flags = SPC_BUSY; + spc->spc_bp = bp; + spc->spc_swp = swp; + spc->spc_kva = kva; + spc->spc_m = m; +#ifdef DEBUG + m->pagerowned = 1; +#endif + bp->b_flags |= B_CALL; + bp->b_iodone = swap_pager_iodone; + s = splbio(); + swp->sw_poip++; + queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); + +#ifdef DEBUG + swap_pager_poip++; + if (swpagerdebug & SDB_WRITE) + printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", + bp, swp, spc, swp->sw_poip); + if ((swpagerdebug & SDB_ALLOCBLK) && + (swb->swb_mask & (1 << atop(off))) == 0) + printf("swpg_io: %x write blk %x+%x\n", + swp->sw_blocks, swb->swb_block, atop(off)); +#endif + swb->swb_mask |= (1 << atop(off)); + /* + * XXX: Block write faults til we are done. + */ + m->page_lock = VM_PROT_WRITE; + m->unlock_request = VM_PROT_ALL; + pmap_copy_on_write(VM_PAGE_TO_PHYS(m)); + splx(s); + } +#ifdef DEBUG + if (swpagerdebug & SDB_IO) + printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", + bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); +#endif + VOP_STRATEGY(bp); + if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { +#ifdef DEBUG + if (swpagerdebug & SDB_IO) + printf("swpg_io: IO started: bp %x\n", bp); +#endif + return(VM_PAGER_PEND); + } + s = splbio(); +#ifdef DEBUG + if (flags & B_READ) + swap_pager_piip++; + else + swap_pager_poip++; +#endif + while ((bp->b_flags & B_DONE) == 0) { + assert_wait((int)bp); + thread_block(); + } +#ifdef DEBUG + if (flags & B_READ) + --swap_pager_piip; + else + --swap_pager_poip; +#endif + rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); + bp->av_forw = bswlist.av_forw; + bswlist.av_forw = bp; + if (bp->b_vp) + brelvp(bp); + if (bswlist.b_flags & B_WANTED) { + bswlist.b_flags &= ~B_WANTED; + thread_wakeup((int)&bswlist); + } + if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { + m->clean = 1; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + } + splx(s); +#ifdef DEBUG + if (swpagerdebug & SDB_IO) + printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); + if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL) + printf("swpg_io: IO error\n"); +#endif + vm_pager_unmap_page(kva); + return(rv); +} + +boolean_t +swap_pager_clean(m, rw) + vm_page_t m; + int rw; +{ + register swp_clean_t spc, tspc; + register int s; + +#ifdef DEBUG + /* save panic time state */ + if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) + return; + if (swpagerdebug & SDB_FOLLOW) + printf("swpg_clean(%x, %d)\n", m, rw); +#endif + tspc = SWP_CLEAN_NULL; + for (;;) { + /* + * Look up and removal from inuse list must be done + * at splbio() to avoid conflicts with swap_pager_iodone. + */ + s = splbio(); + spc = (swp_clean_t) queue_first(&swap_pager_inuse); + while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { + if ((spc->spc_flags & SPC_DONE) && + swap_pager_finish(spc)) { + queue_remove(&swap_pager_inuse, spc, + swp_clean_t, spc_list); + break; + } + if (m && m == spc->spc_m) { +#ifdef DEBUG + if (swpagerdebug & SDB_ANOM) + printf("swpg_clean: %x on list, flags %x\n", + m, spc->spc_flags); +#endif + tspc = spc; + } + spc = (swp_clean_t) queue_next(&spc->spc_list); + } + + /* + * No operations done, thats all we can do for now. + */ + if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) + break; + splx(s); + + /* + * The desired page was found to be busy earlier in + * the scan but has since completed. + */ + if (tspc && tspc == spc) { +#ifdef DEBUG + if (swpagerdebug & SDB_ANOM) + printf("swpg_clean: %x done while looking\n", + m); +#endif + tspc = SWP_CLEAN_NULL; + } + spc->spc_flags = SPC_FREE; + vm_pager_unmap_page(spc->spc_kva); + queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); +#ifdef DEBUG + if (swpagerdebug & SDB_WRITE) + printf("swpg_clean: free spc %x\n", spc); +#endif + } + /* + * If we found that the desired page is already being cleaned + * mark it so that swap_pager_iodone() will not set the clean + * flag before the pageout daemon has another chance to clean it. + */ + if (tspc && rw == B_WRITE) { +#ifdef DEBUG + if (swpagerdebug & SDB_ANOM) + printf("swpg_clean: %x on clean list\n", tspc); +#endif + tspc->spc_flags |= SPC_DIRTY; + } + splx(s); + +#ifdef DEBUG + if (swpagerdebug & SDB_WRITE) + printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); + if ((swpagerdebug & SDB_ANOM) && tspc) + printf("swpg_clean: %s of cleaning page %x\n", + rw == B_READ ? "get" : "put", m); +#endif + return(tspc ? TRUE : FALSE); +} + +swap_pager_finish(spc) + register swp_clean_t spc; +{ + vm_object_t object = spc->spc_m->object; + + /* + * Mark the paging operation as done. + * (XXX) If we cannot get the lock, leave it til later. + * (XXX) Also we are assuming that an async write is a + * pageout operation that has incremented the counter. + */ + if (!vm_object_lock_try(object)) + return(0); + +#ifdef DEBUG + spc->spc_m->pagerowned = 0; +#endif + + if (--object->paging_in_progress == 0) + thread_wakeup((int) object); + + /* + * XXX: this isn't even close to the right thing to do, + * introduces a variety of race conditions. + * + * If dirty, vm_pageout() has attempted to clean the page + * again. In this case we do not do anything as we will + * see the page again shortly. Otherwise, if no error mark + * as clean and inform the pmap system. If error, mark as + * dirty so we will try again (XXX: could get stuck doing + * this, should give up after awhile). + */ + if ((spc->spc_flags & SPC_DIRTY) == 0) { + if (spc->spc_flags & SPC_ERROR) { + printf("swap_pager: clean of %x failed\n", + VM_PAGE_TO_PHYS(spc->spc_m)); + spc->spc_m->laundry = TRUE; + } else { + spc->spc_m->clean = TRUE; + pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); + } + } + /* + * XXX: allow blocked write faults to continue + */ + spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; + PAGE_WAKEUP(spc->spc_m); + + vm_object_unlock(object); + return(1); +} + +swap_pager_iodone(bp) + register struct buf *bp; +{ + register swp_clean_t spc; + daddr_t blk; + int s; + +#ifdef DEBUG + /* save panic time state */ + if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) + return; + if (swpagerdebug & SDB_FOLLOW) + printf("swpg_iodone(%x)\n", bp); +#endif + s = splbio(); + spc = (swp_clean_t) queue_first(&swap_pager_inuse); + while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { + if (spc->spc_bp == bp) + break; + spc = (swp_clean_t) queue_next(&spc->spc_list); + } +#ifdef DEBUG + if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) + panic("swpg_iodone: bp not found"); +#endif + + spc->spc_flags &= ~SPC_BUSY; + spc->spc_flags |= SPC_DONE; + if (bp->b_flags & B_ERROR) + spc->spc_flags |= SPC_ERROR; + spc->spc_bp = NULL; + blk = bp->b_blkno; + +#ifdef DEBUG + --swap_pager_poip; + if (swpagerdebug & SDB_WRITE) + printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", + bp, spc->spc_swp, spc->spc_swp->sw_flags, + spc, spc->spc_swp->sw_poip); +#endif + + spc->spc_swp->sw_poip--; + if (spc->spc_swp->sw_flags & SW_WANTED) { + spc->spc_swp->sw_flags &= ~SW_WANTED; + thread_wakeup((int)spc->spc_swp); + } + + bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); + bp->av_forw = bswlist.av_forw; + bswlist.av_forw = bp; + if (bp->b_vp) + brelvp(bp); + if (bswlist.b_flags & B_WANTED) { + bswlist.b_flags &= ~B_WANTED; + thread_wakeup((int)&bswlist); + } +#if 0 + /* + * XXX: this isn't even close to the right thing to do, + * introduces a variety of race conditions. + * + * If dirty, vm_pageout() has attempted to clean the page + * again. In this case we do not do anything as we will + * see the page again shortly. Otherwise, if no error mark + * as clean and inform the pmap system. If error, mark as + * dirty so we will try again (XXX: could get stuck doing + * this, should give up after awhile). + */ + if ((spc->spc_flags & SPC_DIRTY) == 0) { + if (spc->spc_flags & SPC_ERROR) { + printf("swap_pager: clean of %x (block %x) failed\n", + VM_PAGE_TO_PHYS(spc->spc_m), blk); + spc->spc_m->laundry = TRUE; + } else { + spc->spc_m->clean = TRUE; + pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); + } + } + /* + * XXX: allow blocked write faults to continue + */ + spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; + PAGE_WAKEUP(spc->spc_m); +#endif + + thread_wakeup((int) &vm_pages_needed); + splx(s); +} +#endif diff --git a/usr/src/sys/vm/swap_pager.h b/usr/src/sys/vm/swap_pager.h new file mode 100644 index 0000000000..0eef48dafb --- /dev/null +++ b/usr/src/sys/vm/swap_pager.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)swap_pager.h 7.1 (Berkeley) %G% + */ + +#ifndef _SWAP_PAGER_ +#define _SWAP_PAGER_ 1 + +/* + * In the swap pager, the backing store for an object is organized as an + * array of some number of "swap blocks". A swap block consists of a bitmask + * and some number of contiguous DEV_BSIZE disk blocks. The minimum size + * of a swap block is: + * + * max(PAGE_SIZE, dmmin*DEV_BSIZE) [ 32k currently ] + * + * bytes (since the pager interface is page oriented), the maximum size is: + * + * min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE) [ 128k currently ] + * + * where dmmin and dmmax are left over from the old VM interface. The bitmask + * (swb_mask) is used by swap_pager_haspage() to determine if a particular + * page has actually been written; i.e. the pager copy of the page is valid. + * All swap blocks in the backing store of an object will be the same size. + * + * The reason for variable sized swap blocks is to reduce fragmentation of + * swap resources. Whenever possible we allocate smaller swap blocks to + * smaller objects. The swap block size is determined from a table of + * object-size vs. swap-block-size computed at boot time. + */ +typedef int sw_bm_t; /* pager bitmask */ + +struct swblock { + sw_bm_t swb_mask; /* bitmask of valid pages in this block */ + daddr_t swb_block; /* starting disk block for this block */ +}; +typedef struct swblock *sw_blk_t; + +/* + * Swap pager private data. + */ +struct swpager { + vm_size_t sw_osize; /* size of object we are backing (bytes) */ + int sw_bsize; /* size of swap blocks (DEV_BSIZE units) */ + int sw_nblocks;/* number of blocks in list (sw_blk_t units) */ + sw_blk_t sw_blocks; /* pointer to list of swap blocks */ + short sw_flags; /* flags */ + short sw_poip; /* pageouts in progress */ +}; +typedef struct swpager *sw_pager_t; + +#define SW_WANTED 0x01 +#define SW_NAMED 0x02 + +#ifdef KERNEL + +void swap_pager_init(); +vm_pager_t swap_pager_alloc(); +void swap_pager_dealloc(); +boolean_t swap_pager_getpage(), swap_pager_putpage(); +boolean_t swap_pager_haspage(); + +struct pagerops swappagerops = { + swap_pager_init, + swap_pager_alloc, + swap_pager_dealloc, + swap_pager_getpage, + swap_pager_putpage, + swap_pager_haspage +}; + +int swap_pager_iodone(); +boolean_t swap_pager_clean(); + +#endif + +#endif /* _SWAP_PAGER_ */ diff --git a/usr/src/sys/vm/vm_mmap.c b/usr/src/sys/vm/vm_mmap.c new file mode 100644 index 0000000000..230b280655 --- /dev/null +++ b/usr/src/sys/vm/vm_mmap.c @@ -0,0 +1,818 @@ +/* + * Copyright (c) 1988 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * from: Utah $Hdr: vm_mmap.c 1.3 90/01/21$ + * + * @(#)vm_mmap.c 7.1 (Berkeley) %G% + */ + +/* + * Mapped file (mmap) interface to VM + */ + +#include "param.h" +#include "systm.h" +#include "user.h" +#include "proc.h" +#include "vnode.h" +#include "specdev.h" +#include "file.h" +#include "mman.h" +#include "conf.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_pager.h" +#include "../vm/vm_prot.h" + +#ifdef DEBUG +int mmapdebug = 0; +#define MDB_FOLLOW 0x01 +#define MDB_SYNC 0x02 +#define MDB_MAPIT 0x04 +#endif + +/* ARGSUSED */ +getpagesize(p, uap, retval) + struct proc *p; + struct args *uap; + int *retval; +{ + + *retval = NBPG * CLSIZE; + return (0); +} + +/* ARGSUSED */ +sbrk(p, uap, retval) + struct proc *p; + struct args { + int incr; + } *uap; + int *retval; +{ + + /* Not yet implemented */ + return (EOPNOTSUPP); +} + +/* ARGSUSED */ +sstk(p, uap, retval) + struct proc *p; + struct args { + int incr; + } *uap; + int *retval; +{ + + /* Not yet implemented */ + return (EOPNOTSUPP); +} + +smmap(p, uap, retval) + register struct proc *p; + register struct args { + caddr_t addr; + int len; + int prot; + int flags; + int fd; + off_t pos; + } *uap; + int *retval; +{ + struct file *fp; + struct vnode *vp; + vm_offset_t addr; + vm_size_t size; + vm_prot_t prot; + caddr_t handle; + int mtype, error; + +#ifdef DEBUG + if (mmapdebug & MDB_FOLLOW) + printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", + p->p_pid, uap->addr, uap->len, uap->prot, + uap->flags, uap->fd, uap->pos); +#endif + /* + * Make sure one of the sharing types is specified + */ + mtype = uap->flags & MAP_TYPE; + switch (mtype) { + case MAP_FILE: + case MAP_ANON: + break; + default: + return(EINVAL); + } + /* + * Address (if FIXED) and size must be page aligned + */ + size = (vm_size_t)uap->len; + addr = (vm_offset_t)uap->addr; + if ((size & page_mask) || + (uap->flags & MAP_FIXED) && (addr & page_mask)) + return(EINVAL); + /* + * Mapping file or named anonymous, get fp for validation + */ + if (mtype == MAP_FILE || uap->fd != -1) { + if ((unsigned)uap->fd >= NOFILE || + (fp = u.u_ofile[uap->fd]) == NULL) + return(EBADF); + } + /* + * If we are mapping a file we need to check various + * file/vnode related things. + */ + if (mtype == MAP_FILE) { + /* + * Obtain vnode and make sure it is of appropriate type + */ + if (fp->f_type != DTYPE_VNODE) + return(EINVAL); + vp = (struct vnode *)fp->f_data; + if (vp->v_type != VREG && vp->v_type != VCHR) + return(EINVAL); + /* + * Ensure that file protection and desired protection + * are compatible. Note that we only worry about writability + * if mapping is shared. + */ + if ((uap->prot & PROT_READ) && (fp->f_flag & FREAD) == 0 || + ((uap->flags & MAP_SHARED) && + (uap->prot & PROT_WRITE) && (fp->f_flag & FWRITE) == 0)) + return(EACCES); + handle = (caddr_t)vp; + } else if (uap->fd != -1) + handle = (caddr_t)fp; + else + handle = NULL; + /* + * Map protections to MACH style + */ + prot = VM_PROT_NONE; + if (uap->prot & PROT_READ) + prot |= VM_PROT_READ; + if (uap->prot & PROT_WRITE) + prot |= VM_PROT_WRITE; + if (uap->prot & PROT_EXEC) + prot |= VM_PROT_EXECUTE; + + error = vm_mmap(p->p_map, &addr, size, prot, + uap->flags, handle, (vm_offset_t)uap->pos); + if (error == 0) + *retval = (int) addr; + return(error); +} + +msync(p, uap, retval) + struct proc *p; + struct args { + char *addr; + int len; + } *uap; + int *retval; +{ + vm_offset_t addr, objoff, oaddr; + vm_size_t size, osize; + vm_prot_t prot, mprot; + vm_inherit_t inherit; + vm_object_t object; + boolean_t shared; + int rv; + +#ifdef DEBUG + if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) + printf("msync(%d): addr %x len %x\n", + p->p_pid, uap->addr, uap->len); +#endif + if (((int)uap->addr & page_mask) || (uap->len & page_mask)) + return(EINVAL); + addr = oaddr = (vm_offset_t)uap->addr; + osize = (vm_size_t)uap->len; + /* + * Region must be entirely contained in a single entry + */ + if (!vm_map_is_allocated(p->p_map, addr, addr+osize, TRUE)) + return(EINVAL); + /* + * Determine the object associated with that entry + * (object is returned locked on KERN_SUCCESS) + */ + rv = vm_region(p->p_map, &addr, &size, &prot, &mprot, + &inherit, &shared, &object, &objoff); + if (rv != KERN_SUCCESS) + return(EINVAL); +#ifdef DEBUG + if (mmapdebug & MDB_SYNC) + printf("msync: region: object %x addr %x size %d objoff %d\n", + object, addr, size, objoff); +#endif + /* + * Do not msync non-vnoded backed objects. + */ + if (object->internal || object->pager == vm_pager_null || + object->pager->pg_type != PG_VNODE) { + vm_object_unlock(object); + return(EINVAL); + } + objoff += oaddr - addr; + if (osize == 0) + osize = size; +#ifdef DEBUG + if (mmapdebug & MDB_SYNC) + printf("msync: cleaning/flushing object range [%x-%x)\n", + objoff, objoff+osize); +#endif + if (prot & VM_PROT_WRITE) + vm_object_page_clean(object, objoff, objoff+osize); + /* + * (XXX) + * Bummer, gotta flush all cached pages to ensure + * consistency with the file system cache. + */ + vm_object_page_remove(object, objoff, objoff+osize); + vm_object_unlock(object); + return(0); +} + +munmap(p, uap, retval) + register struct proc *p; + register struct args { + caddr_t addr; + int len; + } *uap; + int *retval; +{ + vm_offset_t addr; + vm_size_t size; + +#ifdef DEBUG + if (mmapdebug & MDB_FOLLOW) + printf("munmap(%d): addr %x len %x\n", + p->p_pid, uap->addr, uap->len); +#endif + + addr = (vm_offset_t) uap->addr; + size = (vm_size_t) uap->len; + if ((addr & page_mask) || (size & page_mask)) + return(EINVAL); + if (size == 0) + return(0); + if (!vm_map_is_allocated(p->p_map, addr, addr+size, FALSE)) + return(EINVAL); + /* returns nothing but KERN_SUCCESS anyway */ + (void) vm_map_remove(p->p_map, addr, addr+size); + return(0); +} + +munmapfd(fd) +{ +#ifdef DEBUG + if (mmapdebug & MDB_FOLLOW) + printf("munmapfd(%d): fd %d\n", u.u_procp->p_pid, fd); +#endif + + /* + * XXX -- should vm_deallocate any regions mapped to this file + */ + u.u_pofile[fd] &= ~UF_MAPPED; +} + +mprotect(p, uap, retval) + struct proc *p; + struct args { + char *addr; + int len; + int prot; + } *uap; + int *retval; +{ + vm_offset_t addr; + vm_size_t size; + register vm_prot_t prot; + +#ifdef DEBUG + if (mmapdebug & MDB_FOLLOW) + printf("mprotect(%d): addr %x len %x prot %d\n", + p->p_pid, uap->addr, uap->len, uap->prot); +#endif + + addr = (vm_offset_t) uap->addr; + size = (vm_size_t) uap->len; + if ((addr & page_mask) || (size & page_mask)) + return(EINVAL); + /* + * Map protections + */ + prot = VM_PROT_NONE; + if (uap->prot & PROT_READ) + prot |= VM_PROT_READ; + if (uap->prot & PROT_WRITE) + prot |= VM_PROT_WRITE; + if (uap->prot & PROT_EXEC) + prot |= VM_PROT_EXECUTE; + + switch (vm_map_protect(p->p_map, addr, addr+size, prot, FALSE)) { + case KERN_SUCCESS: + return (0); + case KERN_PROTECTION_FAILURE: + return (EACCES); + } + return (EINVAL); +} + +/* ARGSUSED */ +madvise(p, uap, retval) + struct proc *p; + struct args { + char *addr; + int len; + int behav; + } *uap; + int *retval; +{ + + /* Not yet implemented */ + return (EOPNOTSUPP); +} + +/* ARGSUSED */ +mincore(p, uap, retval) + struct proc *p; + struct args { + char *addr; + int len; + char *vec; + } *uap; + int *retval; +{ + + /* Not yet implemented */ + return (EOPNOTSUPP); +} + +/* + * Internal version of mmap. + * Currently used by mmap, exec, and sys5 shared memory. + * Handle is: + * MAP_FILE: a vnode pointer + * MAP_ANON: NULL or a file pointer + */ +vm_mmap(map, addr, size, prot, flags, handle, foff) + register vm_map_t map; + register vm_offset_t *addr; + register vm_size_t size; + vm_prot_t prot; + register int flags; + caddr_t handle; /* XXX should be vp */ + vm_offset_t foff; +{ + register vm_pager_t pager; + boolean_t fitit; + vm_object_t object; + struct vnode *vp; + int type; + int rv = KERN_SUCCESS; + + if (size == 0) + return (0); + + if ((flags & MAP_FIXED) == 0) { + fitit = TRUE; + *addr = round_page(*addr); + } else { + fitit = FALSE; + (void) vm_deallocate(map, *addr, size); + } + + /* + * Lookup/allocate pager. All except an unnamed anonymous lookup + * gain a reference to ensure continued existance of the object. + * (XXX the exception is to appease the pageout daemon) + */ + if ((flags & MAP_TYPE) == MAP_ANON) + type = PG_DFLT; + else { + vp = (struct vnode *)handle; + if (vp->v_type == VCHR) { + type = PG_DEVICE; + handle = (caddr_t)vp->v_rdev; + } else + type = PG_VNODE; + } + pager = vm_pager_allocate(type, handle, size, prot); + if (pager == VM_PAGER_NULL) + return (type == PG_DEVICE ? EINVAL : ENOMEM); + /* + * Find object and release extra reference gained by lookup + */ + object = vm_object_lookup(pager); + vm_object_deallocate(object); + + /* + * Anonymous memory. + */ + if ((flags & MAP_TYPE) == MAP_ANON) { + rv = vm_allocate_with_pager(map, addr, size, fitit, + pager, (vm_offset_t)foff, TRUE); + if (rv != KERN_SUCCESS) { + if (handle == NULL) + vm_pager_deallocate(pager); + else + vm_object_deallocate(object); + goto out; + } + /* + * Don't cache anonymous objects. + * Loses the reference gained by vm_pager_allocate. + */ + (void) pager_cache(object, FALSE); +#ifdef DEBUG + if (mmapdebug & MDB_MAPIT) + printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", + u.u_procp->p_pid, *addr, size, pager); +#endif + } + /* + * Must be type MAP_FILE. + * Distinguish between character special and regular files. + */ + else if (vp->v_type == VCHR) { + rv = vm_allocate_with_pager(map, addr, size, fitit, + pager, (vm_offset_t)foff, FALSE); + /* + * Uncache the object and lose the reference gained + * by vm_pager_allocate(). If the call to + * vm_allocate_with_pager() was sucessful, then we + * gained an additional reference ensuring the object + * will continue to exist. If the call failed then + * the deallocate call below will terminate the + * object which is fine. + */ + (void) pager_cache(object, FALSE); + if (rv != KERN_SUCCESS) + goto out; + } + /* + * A regular file + */ + else { +#ifdef DEBUG + if (object == VM_OBJECT_NULL) + printf("vm_mmap: no object: vp %x, pager %x\n", + vp, pager); +#endif + /* + * Map it directly. + * Allows modifications to go out to the vnode. + */ + if (flags & MAP_SHARED) { + rv = vm_allocate_with_pager(map, addr, size, + fitit, pager, + (vm_offset_t)foff, FALSE); + if (rv != KERN_SUCCESS) { + vm_object_deallocate(object); + goto out; + } + /* + * Don't cache the object. This is the easiest way + * of ensuring that data gets back to the filesystem + * because vnode_pager_deallocate() will fsync the + * vnode. pager_cache() will lose the extra ref. + */ + if (prot & VM_PROT_WRITE) + pager_cache(object, FALSE); + else + vm_object_deallocate(object); + } + /* + * Copy-on-write of file. Two flavors. + * MAP_COPY is true COW, you essentially get a snapshot of + * the region at the time of mapping. MAP_PRIVATE means only + * that your changes are not reflected back to the object. + * Changes made by others will be seen. + */ + else { + vm_map_t tmap; + vm_offset_t off; + + /* locate and allocate the target address space */ + rv = vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t)0, + addr, size, fitit); + if (rv != KERN_SUCCESS) { + vm_object_deallocate(object); + goto out; + } + tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, + VM_MIN_ADDRESS+size, TRUE); + off = VM_MIN_ADDRESS; + rv = vm_allocate_with_pager(tmap, &off, size, + TRUE, pager, + (vm_offset_t)foff, FALSE); + if (rv != KERN_SUCCESS) { + vm_object_deallocate(object); + vm_map_deallocate(tmap); + goto out; + } + /* + * (XXX) + * MAP_PRIVATE implies that we see changes made by + * others. To ensure that we need to guarentee that + * no copy object is created (otherwise original + * pages would be pushed to the copy object and we + * would never see changes made by others). We + * totally sleeze it right now by marking the object + * internal temporarily. + */ + if ((flags & MAP_COPY) == 0) + object->internal = TRUE; + rv = vm_map_copy(map, tmap, *addr, size, off, + FALSE, FALSE); + object->internal = FALSE; + /* + * (XXX) + * My oh my, this only gets worse... + * Force creation of a shadow object so that + * vm_map_fork will do the right thing. + */ + if ((flags & MAP_COPY) == 0) { + vm_map_t tmap; + vm_map_entry_t tentry; + vm_object_t tobject; + vm_offset_t toffset; + vm_prot_t tprot; + boolean_t twired, tsu; + + tmap = map; + vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, + &tentry, &tobject, &toffset, + &tprot, &twired, &tsu); + vm_map_lookup_done(tmap, tentry); + } + /* + * (XXX) + * Map copy code cannot detect sharing unless a + * sharing map is involved. So we cheat and write + * protect everything ourselves. Note we cannot + * use vm_object_pmap_copy() because that relies + * on the page copy_on_write bit which isn't + * always accurate with shared objects. + */ + vm_object_pmap_force_copy(object, (vm_offset_t)foff, + (vm_offset_t)foff+size); + vm_object_deallocate(object); + vm_map_deallocate(tmap); + if (rv != KERN_SUCCESS) + goto out; + } +#ifdef DEBUG + if (mmapdebug & MDB_MAPIT) + printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", + u.u_procp->p_pid, *addr, size, pager); +#endif + } + /* + * Correct protection (default is VM_PROT_ALL). + * Note that we set the maximum protection. This may not be + * entirely correct. Maybe the maximum protection should be based + * on the object permissions where it makes sense (e.g. a vnode). + * + * Changed my mind: leave max prot at VM_PROT_ALL. + */ + if (prot != VM_PROT_ALL) { + rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); + if (rv != KERN_SUCCESS) { + (void) vm_deallocate(map, *addr, size); + goto out; + } + } + /* + * Shared memory is also shared with children. + */ + if (flags & MAP_SHARED) { + rv = vm_inherit(map, *addr, size, VM_INHERIT_SHARE); + if (rv != KERN_SUCCESS) { + (void) vm_deallocate(map, *addr, size); + goto out; + } + } +out: +#ifdef DEBUG + if (mmapdebug & MDB_MAPIT) + printf("vm_mmap: rv %d\n", rv); +#endif + switch (rv) { + case KERN_SUCCESS: + return (0); + case KERN_INVALID_ADDRESS: + case KERN_NO_SPACE: + return (ENOMEM); + case KERN_PROTECTION_FAILURE: + return (EACCES); + default: + return (EINVAL); + } +} + +/* + * Internal bastardized version of MACHs vm_region system call. + * Given address and size it returns map attributes as well + * as the (locked) object mapped at that location. + */ +vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff) + vm_map_t map; + vm_offset_t *addr; /* IN/OUT */ + vm_size_t *size; /* OUT */ + vm_prot_t *prot; /* OUT */ + vm_prot_t *max_prot; /* OUT */ + vm_inherit_t *inheritance; /* OUT */ + boolean_t *shared; /* OUT */ + vm_object_t *object; /* OUT */ + vm_offset_t *objoff; /* OUT */ +{ + vm_map_entry_t tmp_entry; + register + vm_map_entry_t entry; + register + vm_offset_t tmp_offset; + vm_offset_t start; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + start = *addr; + + vm_map_lock_read(map); + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->next) == &map->header) { + vm_map_unlock_read(map); + return(KERN_NO_SPACE); + } + start = entry->start; + *addr = start; + } else + entry = tmp_entry; + + *prot = entry->protection; + *max_prot = entry->max_protection; + *inheritance = entry->inheritance; + + tmp_offset = entry->offset + (start - entry->start); + *size = (entry->end - start); + + if (entry->is_a_map) { + register vm_map_t share_map; + vm_size_t share_size; + + share_map = entry->object.share_map; + + vm_map_lock_read(share_map); + (void) vm_map_lookup_entry(share_map, tmp_offset, &tmp_entry); + + if ((share_size = (tmp_entry->end - tmp_offset)) < *size) + *size = share_size; + + vm_object_lock(tmp_entry->object); + *object = tmp_entry->object.vm_object; + *objoff = tmp_entry->offset + (tmp_offset - tmp_entry->start); + + *shared = (share_map->ref_count != 1); + vm_map_unlock_read(share_map); + } else { + vm_object_lock(entry->object); + *object = entry->object.vm_object; + *objoff = tmp_offset; + + *shared = FALSE; + } + + vm_map_unlock_read(map); + + return(KERN_SUCCESS); +} + +/* + * Yet another bastard routine. + */ +vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) + register vm_map_t map; + register vm_offset_t *addr; + register vm_size_t size; + boolean_t fitit; + vm_pager_t pager; + vm_offset_t poffset; + boolean_t internal; +{ + register vm_object_t object; + register int result; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + *addr = trunc_page(*addr); + size = round_page(size); + + /* + * Lookup the pager/paging-space in the object cache. + * If it's not there, then create a new object and cache + * it. + */ + object = vm_object_lookup(pager); + vm_stat.lookups++; + if (object == VM_OBJECT_NULL) { + object = vm_object_allocate(size); + vm_object_enter(object, pager); + } else + vm_stat.hits++; + object->internal = internal; + + result = vm_map_find(map, object, poffset, addr, size, fitit); + if (result != KERN_SUCCESS) + vm_object_deallocate(object); + else if (pager != vm_pager_null) + vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE); + return(result); +} + +/* + * XXX: this routine belongs in vm_map.c. + * + * Returns TRUE if the range [start - end) is allocated in either + * a single entry (single_entry == TRUE) or multiple contiguous + * entries (single_entry == FALSE). + * + * start and end should be page aligned. + */ +boolean_t +vm_map_is_allocated(map, start, end, single_entry) + vm_map_t map; + vm_offset_t start, end; + boolean_t single_entry; +{ + vm_map_entry_t mapent; + register vm_offset_t nend; + + vm_map_lock_read(map); + + /* + * Start address not in any entry + */ + if (!vm_map_lookup_entry(map, start, &mapent)) { + vm_map_unlock_read(map); + return (FALSE); + } + /* + * Find the maximum stretch of contiguously allocated space + */ + nend = mapent->end; + if (!single_entry) { + mapent = mapent->next; + while (mapent != &map->header && mapent->start == nend) { + nend = mapent->end; + mapent = mapent->next; + } + } + + vm_map_unlock_read(map); + return (end <= nend); +} + +#include "../vm/vm_page.h" + +/* + * Doesn't trust the COW bit in the page structure. + * vm_fault can improperly set it. + */ +void +vm_object_pmap_force_copy(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; +{ + register vm_page_t p; + + if (object == VM_OBJECT_NULL) + return; + + vm_object_lock(object); + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + if (start <= p->offset && p->offset < end) { + pmap_copy_on_write(VM_PAGE_TO_PHYS(p)); + p->copy_on_write = TRUE; + } + p = (vm_page_t) queue_next(&p->listq); + } + vm_object_unlock(object); +} diff --git a/usr/src/sys/vm/vm_pager.h b/usr/src/sys/vm/vm_pager.h new file mode 100644 index 0000000000..44db4357e3 --- /dev/null +++ b/usr/src/sys/vm/vm_pager.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)vm_pager.h 7.1 (Berkeley) %G% + */ + +/* + * Pager routine interface definition. + * For BSD we use a cleaner version of the internal pager interface. + */ + +#ifndef _VM_PAGER_ +#define _VM_PAGER_ + +#ifdef KERNEL +#include "types.h" +#include "queue.h" +#else +#include +#include +#endif + +struct pager_struct { + queue_head_t pg_list; /* links for list management */ + caddr_t pg_handle; /* external handle (vp, dev, fp) */ + int pg_type; /* type of pager */ + struct pagerops *pg_ops; /* pager operations */ + caddr_t pg_data; /* private pager data */ +}; +typedef struct pager_struct *vm_pager_t; + +#define vm_pager_null ((vm_pager_t) 0) /* XXX MACH compat */ +#define VM_PAGER_NULL ((vm_pager_t) 0) + +/* pager types */ +#define PG_DFLT -1 +#define PG_SWAP 0 +#define PG_VNODE 1 +#define PG_DEVICE 2 + +struct pagerops { + void (*pgo_init)(); /* initialize pager */ + vm_pager_t (*pgo_alloc)(); /* allocate pager */ + void (*pgo_dealloc)(); /* disassociate */ + int (*pgo_getpage)(); /* get (read) page */ + int (*pgo_putpage)(); /* put (write) page */ + boolean_t (*pgo_haspage)(); /* does pager have page? */ +}; +#define PAGER_OPS_NULL ((struct pagerops *)0) + +/* + * get/put return values + * OK operation was successful + * BAD specified data was out of the accepted range + * FAIL specified data was in range, but doesn't exist + * PEND operations was initiated but not completed + */ +#define VM_PAGER_OK 0 +#define VM_PAGER_BAD 1 +#define VM_PAGER_FAIL 2 +#define VM_PAGER_PEND 3 + +#define VM_PAGER_ALLOC(h, s, p) (*(pg)->pg_ops->pgo_alloc)(h, s, p) +#define VM_PAGER_DEALLOC(pg) (*(pg)->pg_ops->pgo_dealloc)(pg) +#define VM_PAGER_GET(pg, m, s) (*(pg)->pg_ops->pgo_getpage)(pg, m, s) +#define VM_PAGER_PUT(pg, m, s) (*(pg)->pg_ops->pgo_putpage)(pg, m, s) +#define VM_PAGER_HASPAGE(pg, o) (*(pg)->pg_ops->pgo_haspage)(pg, o) + +#ifdef KERNEL +vm_pager_t vm_pager_allocate(); +void vm_pager_deallocate(); +int vm_pager_get(); +int vm_pager_put(); +boolean_t vm_pager_has_page(); + +vm_offset_t vm_pager_map_page(); +void vm_pager_unmap_page(); +vm_pager_t vm_pager_lookup(); +void vm_pager_sync(); + +extern struct pagerops *dfltpagerops; +#endif + +#endif /* _VM_PAGER_ */ diff --git a/usr/src/sys/vm/vm_unix.c b/usr/src/sys/vm/vm_unix.c new file mode 100644 index 0000000000..a9f954d258 --- /dev/null +++ b/usr/src/sys/vm/vm_unix.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 1988 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ + * + * @(#)vm_unix.c 7.1 (Berkeley) %G% + */ + +/* + * Traditional sbrk/grow interface to VM + */ +#include "param.h" +#include "systm.h" +#include "user.h" +#include "proc.h" + +#include "../vm/vm_param.h" +#include "machine/vmparam.h" + +/* ARGSUSED */ +obreak(p, uap, retval) + struct proc *p; + struct args { + char *nsiz; + } *uap; + int *retval; +{ + vm_offset_t new, old; + int rv; + register int diff; + + old = (vm_offset_t)u.u_daddr; + new = round_page(uap->nsiz); + if ((int)(new - old) > u.u_rlimit[RLIMIT_DATA].rlim_cur) + return(ENOMEM); + old = round_page(old + ctob(u.u_dsize)); + diff = new - old; + if (diff > 0) { + rv = vm_allocate(p->p_map, &old, diff, FALSE); + if (rv != KERN_SUCCESS) { + uprintf("sbrk: grow failed, return = %d\n", rv); + return(ENOMEM); + } + u.u_dsize += btoc(diff); + } else if (diff < 0) { + diff = -diff; + rv = vm_deallocate(p->p_map, new, diff); + if (rv != KERN_SUCCESS) { + uprintf("sbrk: shrink failed, return = %d\n", rv); + return(ENOMEM); + } + u.u_dsize -= btoc(diff); + } + return(0); +} + +/* + * grow the stack to include the SP + * true return if successful. + */ +grow(sp) + unsigned sp; +{ + register int si; + + /* + * For user defined stacks (from sendsig). + */ + if (sp < (unsigned)u.u_maxsaddr) + return (0); + /* + * For common case of already allocated (from trap). + */ + if (sp >= USRSTACK-ctob(u.u_ssize)) + return (1); + /* + * Really need to check vs limit and increment stack size if ok. + */ + si = clrnd(btoc(USRSTACK-sp) - u.u_ssize); + if (u.u_ssize+si > btoc(u.u_rlimit[RLIMIT_STACK].rlim_cur)) + return (0); + u.u_ssize += si; + return (1); +} + +/* ARGSUSED */ +ovadvise(p, uap, retval) + struct proc *p; + struct args { + int anom; + } *uap; + int *retval; +{ + +} diff --git a/usr/src/sys/vm/vnode_pager.c b/usr/src/sys/vm/vnode_pager.c new file mode 100644 index 0000000000..ae215107a4 --- /dev/null +++ b/usr/src/sys/vm/vnode_pager.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)vnode_pager.c 7.1 (Berkeley) %G% + */ + +/* + * Page to/from files (vnodes). + * + * TODO: + * pageouts + */ +#include "vnodepager.h" +#if NVNODEPAGER > 0 + +#include "param.h" +#include "user.h" +#include "malloc.h" +#include "vnode.h" +#include "uio.h" +#include "mount.h" +#include "queue.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_pager.h" +#include "../vm/vm_page.h" +#include "../vm/vnode_pager.h" + +queue_head_t vnode_pager_list; /* list of managed vnodes */ + +#ifdef DEBUG +int vpagerdebug = 0x00; +#define VDB_FOLLOW 0x01 +#define VDB_INIT 0x02 +#define VDB_IO 0x04 +#define VDB_FAIL 0x08 +#define VDB_ALLOC 0x10 +#define VDB_SIZE 0x20 +#endif + +void +vnode_pager_init() +{ +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_init()\n"); +#endif + queue_init(&vnode_pager_list); +} + +/* + * Allocate (or lookup) pager for a vnode. + * Handle is a vnode pointer. + */ +vm_pager_t +vnode_pager_alloc(handle, size, prot) + caddr_t handle; + vm_size_t size; + vm_prot_t prot; +{ + register vm_pager_t pager; + register vn_pager_t vnp; + vm_object_t object; + struct vattr vattr; + struct vnode *vp; + +#ifdef DEBUG + if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) + printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); +#endif + /* + * Pageout to vnode, no can do yet. + */ + if (handle == NULL) + return(VM_PAGER_NULL); + + /* + * Vnodes keep a pointer to any associated pager so no need to + * lookup with vm_pager_lookup. + */ + vp = (struct vnode *)handle; + pager = (vm_pager_t)vp->v_vmdata; + if (pager == VM_PAGER_NULL) { + /* + * Allocate pager structures + */ + pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); + if (pager == VM_PAGER_NULL) + return(VM_PAGER_NULL); + vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); + if (vnp == VN_PAGER_NULL) { + free((caddr_t)pager, M_VMPAGER); + return(VM_PAGER_NULL); + } + /* + * And an object of the appropriate size + */ + if (VOP_GETATTR(vp, &vattr, u.u_cred) == 0) { + object = vm_object_allocate(round_page(vattr.va_size)); + vm_object_enter(object, pager); + vm_object_setpager(object, pager, 0, TRUE); + } else { + free((caddr_t)vnp, M_VMPGDATA); + free((caddr_t)pager, M_VMPAGER); + return(VM_PAGER_NULL); + } + /* + * Hold a reference to the vnode and initialize pager data. + */ + VREF(vp); + vnp->vnp_flags = 0; + vnp->vnp_vp = vp; + vnp->vnp_size = vattr.va_size; + queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); + pager->pg_handle = handle; + pager->pg_type = PG_VNODE; + pager->pg_ops = &vnodepagerops; + pager->pg_data = (caddr_t)vnp; + vp->v_vmdata = (caddr_t)pager; + } else { + /* + * vm_object_lookup() will remove the object from the + * cache if found and also gain a reference to the object. + */ + object = vm_object_lookup(pager); + vnp = (vn_pager_t)pager->pg_data; + } + if (prot & VM_PROT_EXECUTE) + vp->v_flag |= VTEXT; /* XXX */ +#ifdef DEBUG + if (vpagerdebug & VDB_ALLOC) + printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", + vp, vnp->vnp_size, pager, object); +#endif + return(pager); +} + +void +vnode_pager_dealloc(pager) + vm_pager_t pager; +{ + register vn_pager_t vnp = (vn_pager_t)pager->pg_data; + register struct vnode *vp; + +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_dealloc(%x)\n", pager); +#endif + if (vp = vnp->vnp_vp) { + vp->v_vmdata = NULL; + vp->v_flag &= ~VTEXT; +#if 0 + /* can hang if done at reboot on NFS FS */ + (void) VOP_FSYNC(vp, u.u_cred); +#endif + vrele(vp); + } + queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); + free((caddr_t)vnp, M_VMPGDATA); + free((caddr_t)pager, M_VMPAGER); +} + +vnode_pager_getpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ + +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_getpage(%x, %x)\n", pager, m); +#endif + return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ)); +} + +boolean_t +vnode_pager_putpage(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ + int err; + +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_putpage(%x, %x)\n", pager, m); +#endif + if (pager == VM_PAGER_NULL) + return; + err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE); + if (err == VM_PAGER_OK) { + m->clean = TRUE; /* XXX - wrong place */ + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */ + } + return(err); +} + +boolean_t +vnode_pager_haspage(pager, offset) + vm_pager_t pager; + vm_offset_t offset; +{ + register vn_pager_t vnp = (vn_pager_t)pager->pg_data; + daddr_t bn; + int err; + +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_haspage(%x, %x)\n", pager, offset); +#endif + + /* + * Offset beyond end of file, do not have the page + */ + if (offset >= vnp->vnp_size) { +#ifdef DEBUG + if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) + printf("vnode_pager_haspage: pg %x, off %x, size %x\n", + pager, offset, vnp->vnp_size); +#endif + return(FALSE); + } + + /* + * Read the index to find the disk block to read + * from. If there is no block, report that we don't + * have this data. + * + * Assumes that the vnode has whole page or nothing. + */ + err = VOP_BMAP(vnp->vnp_vp, + offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, + (struct vnode *)0, &bn); + if (err) { +#ifdef DEBUG + if (vpagerdebug & VDB_FAIL) + printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", + err, pager, offset); +#endif + return(TRUE); + } + return((long)bn < 0 ? FALSE : TRUE); +} + +/* + * (XXX) + * Lets the VM system know about a change in size for a file. + * If this vnode is mapped into some address space (i.e. we have a pager + * for it) we adjust our own internal size and flush any cached pages in + * the associated object that are affected by the size change. + * + * Note: this routine may be invoked as a result of a pager put + * operation (possibly at object termination time), so we must be careful. + */ +vnode_pager_setsize(vp, nsize) + struct vnode *vp; + u_long nsize; +{ + register vn_pager_t vnp; + register vm_object_t object; + vm_pager_t pager; + + /* + * Not a mapped vnode + */ + if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) + return; + /* + * Hasn't changed size + */ + pager = (vm_pager_t)vp->v_vmdata; + vnp = (vn_pager_t)pager->pg_data; + if (nsize == vnp->vnp_size) + return; + /* + * No object. + * This can happen during object termination since + * vm_object_page_clean is called after the object + * has been removed from the hash table, and clean + * may cause vnode write operations which can wind + * up back here. + */ + object = vm_object_lookup(pager); + if (object == VM_OBJECT_NULL) + return; + +#ifdef DEBUG + if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) + printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", + vp, object, vnp->vnp_size, nsize); +#endif + /* + * File has shrunk. + * Toss any cached pages beyond the new EOF. + */ + if (nsize < vnp->vnp_size) { + vm_object_lock(object); + vm_object_page_remove(object, + (vm_offset_t)nsize, vnp->vnp_size); + vm_object_unlock(object); + } + vnp->vnp_size = (vm_offset_t)nsize; + vm_object_deallocate(object); +} + +vnode_pager_umount(mp) + register struct mount *mp; +{ + register vm_pager_t pager, npager; + struct vnode *vp; + + pager = (vm_pager_t) queue_first(&vnode_pager_list); + while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { + /* + * Save the next pointer now since uncaching may + * terminate the object and render pager invalid + */ + vp = ((vn_pager_t)pager->pg_data)->vnp_vp; + npager = (vm_pager_t) queue_next(&pager->pg_list); + if (mp == (struct mount *)0 || vp->v_mount == mp) + (void) vnode_pager_uncache(vp); + pager = npager; + } +} + +/* + * Remove vnode associated object from the object cache. + * + * Note: this routine may be invoked as a result of a pager put + * operation (possibly at object termination time), so we must be careful. + */ +boolean_t +vnode_pager_uncache(vp) + register struct vnode *vp; +{ + register vm_object_t object; + boolean_t uncached, locked; + vm_pager_t pager; + + /* + * Not a mapped vnode + */ + pager = (vm_pager_t)vp->v_vmdata; + if (pager == vm_pager_null) + return (TRUE); + /* + * Unlock the vnode if it is currently locked. + * We do this since uncaching the object may result + * in its destruction which may initiate paging + * activity which may necessitate locking the vnode. + */ + locked = VOP_ISLOCKED(vp); + if (locked) + VOP_UNLOCK(vp); + /* + * Must use vm_object_lookup() as it actually removes + * the object from the cache list. + */ + object = vm_object_lookup(pager); + if (object) { + uncached = (object->ref_count <= 1); + pager_cache(object, FALSE); + } else + uncached = TRUE; + if (locked) + VOP_LOCK(vp); + return(uncached); +} + +vnode_pager_io(vnp, m, rw) + register vn_pager_t vnp; + vm_page_t m; + enum uio_rw rw; +{ + struct uio auio; + struct iovec aiov; + vm_offset_t kva, foff; + int error, size; + +#ifdef DEBUG + if (vpagerdebug & VDB_FOLLOW) + printf("vnode_pager_io(%x, %x, %c): vnode %x\n", + vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); +#endif + foff = m->offset + m->object->paging_offset; + /* + * Return failure if beyond current EOF + */ + if (foff >= vnp->vnp_size) { +#ifdef DEBUG + if (vpagerdebug & VDB_SIZE) + printf("vnode_pager_io: vp %x, off %d size %d\n", + vnp->vnp_vp, foff, vnp->vnp_size); +#endif + return(VM_PAGER_BAD); + } + if (foff + PAGE_SIZE > vnp->vnp_size) + size = vnp->vnp_size - foff; + else + size = PAGE_SIZE; + /* + * Allocate a kernel virtual address and initialize so that + * we can use VOP_READ/WRITE routines. + */ + kva = vm_pager_map_page(m); + aiov.iov_base = (caddr_t)kva; + aiov.iov_len = size; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = foff; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = rw; + auio.uio_resid = size; +#ifdef DEBUG + if (vpagerdebug & VDB_IO) + printf("vnode_pager_io: vp %x kva %x foff %x size %x", + vnp->vnp_vp, kva, foff, size); +#endif + if (rw == UIO_READ) + error = VOP_READ(vnp->vnp_vp, &auio, 0, u.u_cred); + else + error = VOP_WRITE(vnp->vnp_vp, &auio, 0, u.u_cred); +#ifdef DEBUG + if (vpagerdebug & VDB_IO) { + if (error || auio.uio_resid) + printf(" returns error %x, resid %x", + error, auio.uio_resid); + printf("\n"); + } +#endif + if (!error) { + register int count = size - auio.uio_resid; + + if (count == 0) + error = EINVAL; + else if (count != PAGE_SIZE && rw == UIO_READ) + bzero(kva + count, PAGE_SIZE - count); + } + vm_pager_unmap_page(kva); + return (error ? VM_PAGER_FAIL : VM_PAGER_OK); +} +#endif diff --git a/usr/src/sys/vm/vnode_pager.h b/usr/src/sys/vm/vnode_pager.h new file mode 100644 index 0000000000..4daa3dfcd6 --- /dev/null +++ b/usr/src/sys/vm/vnode_pager.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1990 University of Utah. + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * %sccs.include.redist.c% + * + * @(#)vnode_pager.h 7.1 (Berkeley) %G% + */ + +#ifndef _VNODE_PAGER_ +#define _VNODE_PAGER_ 1 + +/* + * VNODE pager private data. + */ +struct vnpager { + int vnp_flags; /* flags */ + struct vnode *vnp_vp; /* vnode */ + vm_size_t vnp_size; /* vnode current size */ +}; +typedef struct vnpager *vn_pager_t; + +#define VN_PAGER_NULL ((vn_pager_t)0) + +#define VNP_PAGING 0x01 /* vnode used for pageout */ +#define VNP_CACHED 0x02 /* vnode is cached */ + +#ifdef KERNEL + +void vnode_pager_init(); +vm_pager_t vnode_pager_alloc(); +void vnode_pager_dealloc(); +int vnode_pager_getpage(), vnode_pager_putpage(); +boolean_t vnode_pager_haspage(); + +struct pagerops vnodepagerops = { + vnode_pager_init, + vnode_pager_alloc, + vnode_pager_dealloc, + vnode_pager_getpage, + vnode_pager_putpage, + vnode_pager_haspage +}; + +#endif + +#endif /* _VNODE_PAGER_ */ -- 2.20.1